// Entry point: the document. // This grammar is aligned to the doctree names when possible. // It will however contain blocks, as we can’t parse sections: // Section headers define the hierarchy by their delimiters, // and pest only has one stack that we need for indentation. document = _{ SOI ~ blocks ~ EOI } blocks = _{ block ~ (blank_line* ~ block)* ~ blank_line? } block = _{ PEEK[..] ~ hanging_block } // This is the list of all block-level elements // They’re defined hanging, i.e. without the first PEEK[..] // This is d hanging_block = _{ substitution_def | image_directive | code_directive | admonition | admonition_gen | target | title | bullet_list | paragraph // TODO: implement all those things: // | block_quote // | verbatim // | doctest_block // | horizontal_rule // | table // | ordered_list // | plain } // Substitution definition. A block type substitution_def = { ".." ~ PUSH(" "+) ~ "|" ~ substitution_name ~ "|" ~ " "+ ~ inline_dirblock ~ DROP } substitution_name = { !" " ~ (!(" "|"|") ~ ANY)+ ~ (" "+ ~ (!(" "|"|") ~ ANY)+)* } inline_dirblock = _{ replace | image } // TODO: implement others // Target. A block type target = { target_qu | target_uq } target_uq = _{ ".. _" ~ target_name_uq ~ ":" ~ (" " ~ link_target)? ~ " "* ~ NEWLINE } target_qu = _{ ".. _`" ~ !"``" ~ target_name_qu ~ !"``:" ~ "`:" ~ (" " ~ link_target)? ~ " "* ~ NEWLINE } target_name_uq = { ( !("_"|":"|"`") ~ !NEWLINE ~ ANY )* } target_name_qu = { ( !(":"|"`"|"_>") ~ ANY )* } link_target = { nonspacechar+ } // Title. A block type title = { title_double | title_single } title_double = { PUSH(adornments) ~ NEWLINE ~ PEEK[..-1] ~ " "* ~ line ~ PEEK[..-1] ~ POP } title_single = { line ~ PEEK[..] ~ adornments ~ NEWLINE } // Bullet list. A block type. bullet_list = { bullet_item ~ (PEEK[..] ~ bullet_item)* } bullet_item = { bullet_marker ~ PUSH(" "+) ~ line ~ blank_line* ~ blist_body? ~ DROP } blist_body = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ hanging_block ~ block* } // paragraph. A block type. paragraph = { inlines } /* Directives: http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#directives * .. name:: arguments ~ :options: ~ blank_line+ ~ content * Everything except for the first argument has to be indented */ // Directives with options can have these or specific ones: common_opt_name = { "class" | "name" } // Replace. A directive only usable in substitutions. replace = { ^"replace::" ~ " "* ~ paragraph } // Image. A directive. image_directive = _{ ".." ~ PUSH(" "+) ~ image ~ DROP } image = { ^"image::" ~ line ~ image_opt_block? } image_opt_block = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ image_option } //TODO: merge with other directives? image_option = { ":" ~ image_opt_name ~ ":" ~ line } image_opt_name = { common_opt_name | "alt" | "height" | "width" | "scale" | "align" | "target" } // Code block. A directive code_directive = { ".." ~ PUSH(" "+) ~ "code" ~ "-block"? ~ "::" ~ (" "+ ~ source)? ~ NEWLINE ~ blank_line+ ~ PEEK[..-1] ~ PUSH(" " ~ POP) ~ code_block ~ DROP } source = { (!NEWLINE ~ ANY)+ } code_block = { code_line ~ (code_line_blank* ~ PEEK[..] ~ code_line)* } code_line_blank = { " "* ~ NEWLINE } code_line = { (!NEWLINE ~ ANY)+ ~ NEWLINE } // Admonition. A directive. The generic one has a title admonition = { ".." ~ PUSH(" "+) ~ ^"admonition::" ~ line ~ blank_line* ~ admonition_content? ~ DROP } admonition_gen = { ".." ~ PUSH(" "+) ~ admonition_type ~ "::" ~ (blank_line | line) ~ blank_line* ~ admonition_content? ~ DROP } admonition_type = { ^"attention" | ^"caution" | ^"danger" | ^"error" | ^"hint" | ^"important" | ^"note" | ^"tip" | ^"warning" } admonition_content = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ hanging_block ~ block* } //TODO: merge with other directives? /* * inlines */ line = { !marker ~ inline+ ~ NEWLINE } blank_line = _{ !marker ~ !inline ~ " "* ~ NEWLINE } inlines = _{ !marker ~ inline+ ~ ( ( ws_newline ~ PEEK[..] ~ !marker ~ inline+ )+ ~ NEWLINE )? } ws_newline = { NEWLINE } inline = _{ inline_special | str } inline_special = _{ reference | substitution_ref | emph_outer | strong_outer | literal_outer // | ul_or_star_line // | space // | note_reference // | footnote // //| citation // | code // | application_depent // | entity // | escaped_char // | smart // | symbol } str = { (!(NEWLINE | inline_special) ~ ANY)+ } // simple formatting inline_nested = _{ inline_special | str_nested } str_nested = { word_nested ~ ( " "+ ~ word_nested)* } // TODO: allow ` in emph and * in literal word_nested = _{ (!(NEWLINE | " " | inline_special | "*" | "`") ~ ANY)+ } emph_outer = _{ "*" ~ emph ~ "*" } emph = { (!("*"|" ") ~ inline_nested)+ ~ (" "+ ~ (!("*"|" ") ~ inline_nested)+)* } strong_outer = _{ "**" ~ strong ~ "**" } strong = { (!("*"|" ") ~ inline_nested)+ ~ (" "+ ~ (!("*"|" ") ~ inline_nested)+)* } literal_outer = _{ "``" ~ literal ~ "``" } literal = { (!("`"|" ") ~ inline_nested)+ ~ (" "+ ~ (!("`"|" ") ~ inline_nested)+)* } // inline links reference = { reference_target | reference_explicit | reference_auto } reference_target = { (reference_target_uq ~ "_" | reference_target_qu) ~ !(LETTER|NUMBER) } reference_target_uq = { (!("_"|":"|"`") ~ nonspacechar)+ } reference_target_qu = { ( !("`"? ~ "`_") ~ "`" ~ !"``" ) ~ reference_text? ~ ("<" ~ reference_bracketed ~ ">")? ~ ( "`" ~ !"``" ) ~ "_" } reference_text = { !"<" ~ ( !("`"|"<") ~ ANY )+ } reference_bracketed = { url | (target_name_qu ~ "_") | relative_reference } relative_reference = { (!("`"|">") ~ ANY)+ } reference_explicit = { reference_label ~ "(" ~ " "* ~ reference_source ~ " "* ~ (NEWLINE ~ PEEK[..])? ~ reference_title ~ " "* ~ ")" } reference_label = { "[" ~ !"^" ~ (!"]" ~ inline)* ~ "]" } reference_source = { reference_source_contents } reference_source_contents = _{ ( (!("("|")"|">") ~ nonspacechar)+ | "(" ~ reference_source_contents ~ ")" )* } reference_title = { ( reference_title_single | reference_title_double | "" ) } reference_title_single = { "'" ~ ( !("'" ~ " "+ ~ (")" | NEWLINE)) ~ ANY )* ~ "'" } reference_title_double = { "\"" ~ ( !("\"" ~ " "+ ~ (")" | NEWLINE)) ~ ANY )* ~ "\"" } // Emails can't end with punctuation, but URLs must use a separate rule. reference_auto = { url_auto | email } //reference_embedded = { "`" ~ reference_embedded_source ~ "<" ~ absolute_url_with_fragment ~ ">`_" ~ "_"? } //reference_embedded_source = { ( !("<"|":"|"`") ~ ( " " | nonspacechar | blank_line ) )* } substitution_ref = _{ "|" ~ substitution_name ~ "|" } /* URLs as defined by the WHATWG URL standard. */ url = { absolute_url_no_query ~ ("?" ~ url_unit*)? ~ ("#" ~ url_unit*)? } absolute_url_no_query = { ( special_url_scheme ~ ":" ~ scheme_relative_special_url ) | ( ^"file:" ~ scheme_relative_file_url ) | ( arbitrary_scheme ~ ":" ~ relative_url ) } scheme_relative_special_url = { "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url? } path_absolute_url = { "/" ~ path_relative_url } path_relative_url = { ( url_path_segment_unit* ~ "/" )* ~ url_path_segment_unit* } url_path_segment_unit = { !("/"|"?") ~ url_unit } url_port = { ASCII_DIGIT* } scheme_relative_file_url = { "//" ~ ( host ~ !("/:/"|"/|/") )? ~ path_absolute_url } relative_url = { ( "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url? ) | path_absolute_url | (!(arbitrary_scheme ~ ":") ~ path_relative_url) } /* this is approximately a superset of valid hosts and opaque hosts */ host = { ( !(":"|"/"|"?"|"#") ~ url_unit)+ | ("["~(ASCII_HEX_DIGIT|"."|":")+~"]") } special_url_scheme = { ^"ftp" | (^"http" | ^"ws") ~ ^"s"? } /* doesn't include "file" */ arbitrary_scheme = { ASCII_ALPHA ~ ASCII_ALPHANUMERIC* } url_unit = { ASCII_ALPHANUMERIC | "!"|"$"|"&"|"'"|"("|")"|"*"|"+"|","|"-"|"."|"/"|":"|";"|"="|"?"|"@"|"_"|"~" | (!(SURROGATE|NONCHARACTER_CODE_POINT) ~ '\u{A0}'..'\u{10FFFD}') | ("%" ~ ASCII_HEX_DIGIT{2}) } /* * Rules for URLs that don't end in punctuation. * This is a modification of the rules above to incorporate the docutils rules * for the final character in an auto URL and for the character after it. * The patterns used here to emulate the behavior of docutils' regex are taken * from . */ url_auto = { ( absolute_url_no_query ~ ("?" ~ url_unit*)? ~ "#" ~ url_units_auto ) | ( absolute_url_no_query ~ "?" ~ url_units_auto ) | ( special_url_scheme ~ "://" ~ host ~ (":" ~ url_port)? ~ path_absolute_url_auto ) | ( special_url_scheme ~ "://" ~ host ~ ":" ~ url_port ~ &follows_auto_url ) | ( special_url_scheme ~ "://" ~ ( domain_host_auto | "["~(ASCII_HEX_DIGIT|"."|":")+~"]" ~ &follows_auto_url ) ) | ( ^"file://" ~ ( host ~ !("/:/"|"/|/") )? ~ path_absolute_url_auto ) | ( arbitrary_scheme ~ ":" ~ relative_url_auto ) } domain_host_auto = { ( !(":"|"/"|"?"|"#") ~ url_unit ~ url_units_auto ) | ( !(":"|"/"|"?"|"#") ~ url_unit ~ &">" ) | ( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"/"|"="|"+") ~ &follows_auto_url ) } path_absolute_url_auto = { "/" ~ path_relative_url_auto } path_relative_url_auto = { prua1 | prua2 | &follows_auto_url } prua1 = { ( url_path_segment_unit ~ prua1 ) | ( "/" ~ path_relative_url_auto ) } prua2 = { ( url_path_segment_unit ~ prua2 ) | ( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"="|"+") ~ &follows_auto_url ) } relative_url_auto = { ( "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url_auto ) | ( "//" ~ host ~ ":" ~ url_port ~ &follows_auto_url ) | ( "//" ~ ( domain_host_auto | "["~(ASCII_HEX_DIGIT|"."|":")+~"]" ~ &follows_auto_url ) ) | path_absolute_url_auto | // (prua1|prua2) is path_relative_url_auto minus the &follows_auto_url case (!(arbitrary_scheme ~ ":") ~ (prua1 | prua2)) } url_units_auto = { ( url_unit ~ url_units_auto ) | ( url_unit ~ &">" ~ &follows_auto_url ) | ( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"/"|"="|"+") ~ &follows_auto_url ) } follows_auto_url = { EOI|"\x00"|WHITE_SPACE|">"|"\u{201A}"|"\u{201E}"| (!(CONNECTOR_PUNCTUATION|OPEN_PUNCTUATION|"#"|"%"|"&"|"*"|"@") ~ PUNCTUATION) } /* Rules for emails as defined by the HTML standard */ email = { ( email_atext | "." )+ ~ "@" ~ email_label ~ ( "." ~ email_label )* } email_atext = { ASCII_ALPHANUMERIC|"!"|"#"|"$"|"%"|"&"|"'"|"/"|"="|"?"|"^"|"_"|"`"|"{"|"|"|"}"|"~" } email_label = { ASCII_ALPHANUMERIC ~ ( !("-"+ ~ !ASCII_ALPHANUMERIC) ~ (ASCII_ALPHANUMERIC|"-") ){0,62} } /* * character classes */ bullet_marker = _{ "+" | "*" | "-" } adornments = { // recommended "="+ | "-"+ | "`"+ | ":"+ | "."+ | "'"+ | "\""+ | "~"+ | "^"+ | "_"+ | "*"+ | "+"+ | "#"+ | // parentheses "("+ | ")"+ | "["+ | "]"+ | "{"+ | "}"+ | // punctuation ","+ | ";"+ | "!"+ | "?"+ | // operators "&"+ | "|"+ | "/"+ | "%"+ | "<"+ | ">"+ | // misc "$"+ | "@"+ | "\\"+ } nonspacechar = _{ !(" " | NEWLINE) ~ ANY } /* * lookaheads. do not use in another position */ marker = _{ (bullet_marker | "..") ~ " " } //################################################################################# // doctest_block = { (doctest_line+ ~ (!(">" | blank_line) ~ line)*)+ } // block_quote_raw = { ":" ~ blank_line ~ NEWLINE ~ nonblank_indented_line+ } // block_quote_chunk = { // !"::" ~ ":" ~ blank_line ~ // NEWLINE ~ // blank_line* ~ // nonblank_indented_line+ // } // block_quote = { block_quote_chunk+ } // horizontal_rule = { // ( "=" ~ sp ~ "=" ~ sp ~ "=" ~ (sp ~ "=")* // | "-" ~ sp ~ "-" ~ sp ~ "-" ~ (sp ~ "-")* // | "*" ~ sp ~ "*" ~ sp ~ "*" ~ (sp ~ "*")* // | "^" ~ sp ~ "^" ~ sp ~ "^" ~ (sp ~ "^")* // | "~" ~ sp ~ "~" ~ sp ~ "~" ~ (sp ~ "~")* // | "_" ~ sp ~ "_" ~ sp ~ "_" ~ (sp ~ "_")* // ) ~ // sp ~ NEWLINE ~ blank_line+ // } // table = { grid_table | header_less_grid_table | simple_table } // simple_table = { "NotImplemented" ~ "simple_table" } // grid_table = { grid_table_header ~ grid_table_header_sep ~ grid_table_body+ } // header_less_grid_table = { grid_table_sep ~ grid_table_body+ } // grid_table_header = { sp ~ "+" ~ ( "-"+ ~ "+" )+ ~ blank_line ~ grid_table_row+ } // grid_table_body = { ( grid_table_row ~ grid_table_sep )+ } // grid_table_row = { sp ~ "|" ~ sp ~ ( table_cell ~ sp ~ "|" )+ ~ blank_line } // table_cell = { ( ":" | ">" | "<" | "/" | "-" | spacechar | escaped_char | alphanumeric )+ } // grid_table_header_sep = { sp ~ "+" ~ ( "="+ ~ "+" )+ ~ blank_line } // grid_table_sep = { sp ~ "+" ~ ( "-"+ ~ "+" )+ ~ blank_line } // bullet = { !horizontal_rule ~ ("+" | "*" | "-") ~ spacechar+ } // bullet_list = { &bullet ~ (list_tight | list_loose) } // list_tight = { list_item_tight+ ~ blank_line* ~ !(bullet | enumerator | def_marker) } // list_loose = { ( list_item ~ blank_line* )+ } // list_item = { (bullet | enumerator | def_marker) ~ list_block ~ list_continuation_block* } // list_item_tight = { // (bullet | enumerator | def_marker) ~ // list_block ~ // (!blank_line ~ list_continuation_block)* ~ // !list_continuation_block // } // list_block = { !blank_line ~ line ~ list_block_line* } // list_continuation_block = { blank_line* ~ ( indent ~ list_block )+ } // enumerator = { (ASCII_DIGIT+ | "#"+) ~ "." ~ spacechar+ } // ordered_list = { &enumerator ~ (list_tight | list_loose) } // list_block_line = { // !blank_line ~ // !( (indent? ~ (bullet | enumerator)) | def_marker ) ~ // !horizontal_rule ~ // optionally_indented_line // } // space = _{ spacechar+ } // str = { normal_char+ ~ str_chunk* } // str_chunk = _{ (normal_char | "_"+ ~ &alphanumeric)+ } // escaped_char = { "\\" ~ !NEWLINE ~ ("-" | "\\" | "`" | "|" | "*" | "_" | "{" | "}" | "[" | "]" | "(" | ")" | "#" | "+" | "." | "!" | ">" | "<") } // entity = { hex_entity | dec_entity | char_entity } // endline = _{ line_break | terminal_endline | normal_endline } // normal_endline = _{ sp ~ NEWLINE ~ !(blank_line | ">" | line ~ ("="+ | "-"+) ~ NEWLINE) } // terminal_endline = _{ sp ~ NEWLINE ~ EOI } // line_break = _{ " " ~ normal_endline } // symbol = { special_char } // application_depent = { !("`_" | "``_") ~ "`" ~ !"``" ~ target_name_qu ~ "`" ~ !("``" | "_") } // // This keeps the parser from getting bogged down on long strings of "*" or "_", // // or strings of "*" or "_" with space on each side: // ul_or_star_line = { ul_line | star_line } // star_line = { "****" ~ "*"* | spacechar ~ "*"+ ~ &spacechar } // ul_line = { "____" ~ "_"* | spacechar ~ "_"+ ~ &spacechar } // empty_title = { "" } // ticks_2 = { "``" ~ !"`" } // code = { ticks_2 ~ ( (!"`" ~ nonspacechar)+ | "_" | !ticks_2 ~ "`" | !(sp ~ ticks_2) ~ (spacechar | NEWLINE ~ !blank_line) )+ ~ ticks_2 } // quoted = { // "\"" ~ (!"\"" ~ ANY)* ~ "\"" | // "'" ~ (!"'" ~ ANY)* ~ "'" // } // spacechar = _{ " " | "\t" } // sp = _{ spacechar* } // spnl = _{ sp ~ (NEWLINE ~ sp)? } // special_char = _{ "~" | "*" | "_" | "`" | "&" | "[" | "]" | "(" | ")" | "<" | "!" | "#" | "\\" | "\"" | "'" | extended_special_char } // normal_char = _{ !( special_char | spacechar | NEWLINE ) ~ ANY } // alphanumeric = { // ASCII_ALPHANUMERIC | // "\u{200}" | "\u{201}" | "\u{202}" | "\u{203}" | "\u{204}" | "\u{205}" | "\u{206}" | "\u{207}" | // "\u{210}" | "\u{211}" | "\u{212}" | "\u{213}" | "\u{214}" | "\u{215}" | "\u{216}" | "\u{217}" | // "\u{220}" | "\u{221}" | "\u{222}" | "\u{223}" | "\u{224}" | "\u{225}" | "\u{226}" | "\u{227}" | // "\u{230}" | "\u{231}" | "\u{232}" | "\u{233}" | "\u{234}" | "\u{235}" | "\u{236}" | "\u{237}" | // "\u{240}" | "\u{241}" | "\u{242}" | "\u{243}" | "\u{244}" | "\u{245}" | "\u{246}" | "\u{247}" | // "\u{250}" | "\u{251}" | "\u{252}" | "\u{253}" | "\u{254}" | "\u{255}" | "\u{256}" | "\u{257}" | // "\u{260}" | "\u{261}" | "\u{262}" | "\u{263}" | "\u{264}" | "\u{265}" | "\u{266}" | "\u{267}" | // "\u{270}" | "\u{271}" | "\u{272}" | "\u{273}" | "\u{274}" | "\u{275}" | "\u{276}" | "\u{277}" | // "\u{300}" | "\u{301}" | "\u{302}" | "\u{303}" | "\u{304}" | "\u{305}" | "\u{306}" | "\u{307}" | // "\u{310}" | "\u{311}" | "\u{312}" | "\u{313}" | "\u{314}" | "\u{315}" | "\u{316}" | "\u{317}" | // "\u{320}" | "\u{321}" | "\u{322}" | "\u{323}" | "\u{324}" | "\u{325}" | "\u{326}" | "\u{327}" | // "\u{330}" | "\u{331}" | "\u{332}" | "\u{333}" | "\u{334}" | "\u{335}" | "\u{336}" | "\u{337}" | // "\u{340}" | "\u{341}" | "\u{342}" | "\u{343}" | "\u{344}" | "\u{345}" | "\u{346}" | "\u{347}" | // "\u{350}" | "\u{351}" | "\u{352}" | "\u{353}" | "\u{354}" | "\u{355}" | "\u{356}" | "\u{357}" | // "\u{360}" | "\u{361}" | "\u{362}" | "\u{363}" | "\u{364}" | "\u{365}" | "\u{366}" | "\u{367}" | // "\u{370}" | "\u{371}" | "\u{372}" | "\u{373}" | "\u{374}" | "\u{375}" | "\u{376}" | "\u{377}" // } // hex_entity = { "&#" ~ ("X"|"x") ~ ('0'..'9' | 'a'..'f' | 'A'..'F')+ ~ ";" } // dec_entity = { "&#" ~ ASCII_DIGIT+ ~ ";" } // char_entity = { "&" ~ ASCII_ALPHANUMERIC+ ~ ";" } // indent = _{ "\t" | " " } // indented_line = { indent ~ line } // optionally_indented_line = { indent? ~ line } // doctest_line = { ">>> " ~ raw_line } // line = _{ raw_line } // raw_line = _{ (!NEWLINE ~ ANY)* ~ NEWLINE | (!EOI ~ ANY)+ ~ EOI } // // Syntax extensions // extended_special_char = { // //&{ extension(EXT_SMART) } ~ // ("." | "-" | "\"" | "'") | // //&{ extension(EXT_NOTES) } ~ // "^" // } // smart = { // //&{ extension(EXT_SMART) } ~ // ( ellipsis | dash | single_quoted | double_quoted | apostrophe ) // } // apostrophe = { "'" } // ellipsis = { "..." | ". . ." } // dash = { em_dash | en_dash } // en_dash = { "-" ~ &ASCII_DIGIT } // em_dash = { "---" | "--" } // single_quote_start = { "'" ~ !(spacechar | NEWLINE) } // single_quote_end = { "'" ~ !alphanumeric } // single_quoted = { single_quote_start ~ ( !single_quote_end ~ inline )+ ~ single_quote_end } // double_quote_start = { "\"" } // double_quote_end = { "\"" } // double_quoted = { double_quote_start ~ ( !double_quote_end ~ inline )+ ~ double_quote_end } // footnote = { "[#" ~ (!"]" ~ inline)+ ~ "]_" } // definition = { // &( (!defmark ~ nonspacechar ~ raw_line) ~ blank_line? ~ defmark) ~ // d_list_title+ ~ // (def_tight | def_loose) // } // d_list_title = { !defmark ~ &nonspacechar ~ (!endline ~ inline)+ ~ sp ~ NEWLINE } // def_tight = { &defmark ~ list_tight } // def_loose = { blank_line ~ &defmark ~ list_loose } // defmark = { (":" | "~") ~ spacechar+ } // def_marker = { // //&{ extension(EXT_DLISTS) } ~ // defmark // }