Skip to content

File Format: EBNF syntax

Here we use the EBNF notation from the W3C Extensible Markup Language (XML), not the ISO/IEC 14977:1996 EBNF standard. With this a short form specification of the file format is possible. This EBNF Syntax is also used to generate the graphical representation.

iCSV                  ::= firstline header_section data_section
firstline             ::= '#' ' ' 'iCSV' ' ' version_number ' ' file_format newline
header_section        ::= metadata_header metadata_section fields_header fields_section

/* metadata */
metadata_header       ::= '#' ' ' '[METADATA]' newline
metadata_section      ::= (('#' ((whitespace (required_metadata |
                                              recommended_metadata |
                                              other_metadata))? )? lineend ) | newline)+
required_metadata     ::= ('field_delimiter' assignment field_delimiter) |
                          ('geometry' assignment geometry) |
                          ('srid' assignment EPSG_code)
recommended_metadata  ::= ('station_id' assignment alphanumeric)
recommended_metadata  ::= ('timestamp_meaning' assignment timestamp_meanings)
recommended_metadata  ::= 'nodata' assignment (integer | float)
recommended_metadata  ::= 'timezone' assignment (integer | float | tz_string)
recommended_metadata  ::= ('doi' | 'reference') assignment value
other_metadata        ::= key assignment value

/* fields */
fields_header         ::= '#' ' ' '[FIELDS]' newline
fields_section        ::= (('#' ((whitespace ( required_fields |
                                               recommended_fields |
                                               other_fields ))? )? lineend ) | newline)+
required_fields       ::= ('fields' assignment values)
recommended_fields    ::= ('units_multiplier' |
                           'units_offset' |
                           'units' |
                           'long_name' |
                           'standard_name') assignment values
recommended_fields    ::= 'timestamp_meaning' assignment timestamp_meanings
other_fields          ::= key assignment values

/* data */
data_section          ::= '#' ' ' '[DATA]' newline dataline+
dataline              ::= (value ( field_delimiter value )* newline)
values                ::= value ( field_delimiter whitespace value )*

/*
  NOTE:
  All values must be the same length.
  This means everything in the "[FIELDS]" section maps 1:1 to the columns in the "[DATA]" section.
*/


/* other */
key             ::= char (alphanumeric*)?
value           ::= unicode-char*
assignment      ::= whitespace? '=' whitespace?
field_delimiter ::= [,|\/:;]
version_number  ::= digit* ('.' (alphanumeric)+)? ('.' (alphanumeric)+)?
file_format     ::= 'UTF-8'
application_profile ::= unicode-char*
EPSG_code       ::= 'EPSG' ':' digit digit digit digit
geometry        ::= 'POINT(' float float ')' |
                    'POINTZ(' float float float ')' |
                    WKT_string |
                    column_name
timestamp       ::= ISO-8601
timestamp_meanings ::= 'beginning' | 'end' | 'middle' | 'instantaneous' | 'other' | 'undefined'

/* generic */
whitespace      ::= (tab | space)+
comment         ::= '#' whitespace? (unicode-char)?
lineend         ::= (whitespace | comment)? newline
newline         ::= #x0A
tab             ::= #x9
space           ::= #x20
char            ::= [a-zA-Z]
digit           ::= [0-9]
integer         ::= [+-]? digit+
alphanumeric    ::= (digit | char)+
hex             ::= (digit | [a-fA-F])+
float           ::= integer '.' ((digit)+)?

/* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
unicode-char    ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]