diff options
| author | Philipp A | 2018-10-29 11:19:47 +0100 |
|---|---|---|
| committer | Philipp A | 2018-10-29 11:19:47 +0100 |
| commit | 08c373c48fed56d99759d54588081aefa297af3c (patch) | |
| tree | 0c1e80c86ee2f8b8b69d6be26e2a8a65a1420088 | |
| parent | 11050075b4a97a51714c61f4e174d6ea85b793e5 (diff) | |
| download | rust-rst-08c373c48fed56d99759d54588081aefa297af3c.tar.bz2 | |
added parser module
| -rw-r--r-- | Cargo.toml | 8 | ||||
| -rw-r--r-- | src/document_tree/element_categories.rs | 2 | ||||
| -rw-r--r-- | src/lib.rs | 2 | ||||
| -rw-r--r-- | src/parser/mod.rs | 34 | ||||
| -rw-r--r-- | src/rst.pest | 52 |
5 files changed, 57 insertions, 41 deletions
@@ -11,6 +11,8 @@ homepage = 'https://github.com/flying-sheep/rust-rst' repository = 'https://github.com/flying-sheep/rust-rst.git' [dependencies] -url = '0.5' -bitflags = '0.5' -unicode_categories = '0.1.0' +url = '1.7.1' +bitflags = '1.0.4' +unicode_categories = '0.1.1' +pest = '2.0.2' +pest_derive = '2.0.1' diff --git a/src/document_tree/element_categories.rs b/src/document_tree/element_categories.rs index e3a2425..7fcce82 100644 --- a/src/document_tree/element_categories.rs +++ b/src/document_tree/element_categories.rs @@ -10,7 +10,7 @@ pub trait HasChildren<C> { self.children_mut().push(child.into()); } fn append_children<R: Into<C> + Clone>(&mut self, more: &[R]) { - let mut children = self.children_mut(); + let children = self.children_mut(); children.reserve(more.len()); for child in more { children.push(child.clone().into()); @@ -1,5 +1,7 @@ extern crate url; +extern crate pest; #[macro_use] +extern crate pest_derive; extern crate bitflags; extern crate unicode_categories; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 79c66ba..07a2278 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1 +1,35 @@ pub mod token; + +#[derive(Parser)] +#[grammar = "rst.pest"] +pub struct RstParser; + + + +#[test] +fn line() { + use pest::Parser; + let result = RstParser::parse(Rule::plain, &"line\n").expect("unsuccessful parse").next().unwrap(); + eprintln!("{}", result); +} + +#[test] +fn title() { + use pest::Parser; + let result = RstParser::parse(Rule::heading, &"\ +Title +===== +").expect("unsuccessful parse").next().unwrap(); + eprintln!("{}", result); +} + +#[test] +fn heading_title() { + use pest::Parser; + let result = RstParser::parse(Rule::heading_title, &"\ +----- +Title +----- +").expect("unsuccessful parse").next().unwrap(); + eprintln!("{}", result); +} diff --git a/src/rst.pest b/src/rst.pest index cd19eed..465af27 100644 --- a/src/rst.pest +++ b/src/rst.pest @@ -105,19 +105,12 @@ table = { grid_table | header_less_grid_table | simple_table } simple_table = { "NotImplemented" ~ "simple_table" } grid_table = { grid_table_header ~ grid_table_header_sep ~ grid_table_body+ } - header_less_grid_table = { grid_table_sep ~ grid_table_body+ } - grid_table_header = { sp ~ "+" ~ ( "-"+ ~ "+" )+ ~ blank_line ~ grid_table_row+ } - grid_table_body = { ( grid_table_row ~ grid_table_sep )+ } - grid_table_row = { sp ~ "|" ~ sp ~ ( table_cell ~ sp ~ "|" )+ ~ blank_line } - table_cell = { ( ":" | ">" | "<" | "/" | "-" | spacechar | escaped_char | alphanumeric )+ } - grid_table_header_sep = { sp ~ "+" ~ ( "="+ ~ "+" )+ ~ blank_line } - grid_table_sep = { sp ~ "+" ~ ( "-"+ ~ "+" )+ ~ blank_line } bullet = { !horizontal_rule ~ nonindent_space ~ ("+" | "*" | "-") ~ spacechar+ } @@ -125,11 +118,9 @@ bullet = { !horizontal_rule ~ nonindent_space ~ ("+" | "*" | "-") ~ spacechar+ } bullet_list = { &bullet ~ (list_tight | list_loose) } list_tight = { list_item_tight+ ~ blank_line* ~ !(bullet | enumerator | def_marker) } - list_loose = { ( list_item ~ blank_line* )+ } list_item = { (bullet | enumerator | def_marker) ~ list_block ~ list_continuation_block* } - list_item_tight = { (bullet | enumerator | def_marker) ~ list_block ~ @@ -329,9 +320,7 @@ html_block_in_tags = { } html_block = { &"<" ~ ( html_block_in_tags | html_comment | html_block_self_closing ) ~ blank_line+ } - html_block_self_closing = { "<" ~ spnl ~ html_block_type ~ spnl ~ html_attribute* ~ "/" ~ spnl ~ ">" } - html_block_type = { "address" | "block_quote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" | @@ -369,12 +358,10 @@ inline = { | symbol } -space = { spacechar+ } +space = _{ spacechar+ } str = { normal_char+ ~ str_chunk* } - -str_chunk = { (normal_char | "_"+ ~ &alphanumeric)+ | apos_chunk } - +str_chunk = _{ (normal_char | "_"+ ~ &alphanumeric)+ | apos_chunk } apos_chunk = { // &{ extension(EXT_SMART) } ~ "'" ~ &alphanumeric @@ -384,13 +371,10 @@ escaped_char = { "\\" ~ !NEWLINE ~ ("-" | "\\" | "`" | "|" | "*" | "_" | "{" | " entity = { hex_entity | dec_entity | char_entity } -endline = { line_break | terminal_endline | normal_endline } - -normal_endline = { sp ~ NEWLINE ~ !blank_line ~ !">" ~ !(line ~ ("="+ | "-"+) ~ NEWLINE) } - -terminal_endline = { sp ~ NEWLINE ~ EOI } - -line_break = { " " ~ normal_endline } +endline = _{ line_break | terminal_endline | normal_endline } +normal_endline = _{ sp ~ NEWLINE ~ !blank_line ~ !">" ~ !(line ~ ("="+ | "-"+) ~ NEWLINE) } +terminal_endline = _{ sp ~ NEWLINE ~ EOI } +line_break = _{ " " ~ normal_endline } symbol = { special_char } @@ -405,9 +389,7 @@ ul_line = { "____" ~ "_"* | spacechar ~ "_"+ ~ &spacechar } whitespace = { spacechar | NEWLINE } emph = { "*" ~ !whitespace ~ ( !"*" ~ inline )+ ~ "*" } - strong = { "**" ~ !whitespace ~ ( !"**" ~ inline )+ ~ "**" } - strike = { //&{ extension(EXT_STRIKE) } ~ "~~" ~ !whitespace ~ ( !"~~" ~ inline )+ ~ "~~" @@ -471,12 +453,12 @@ quoted = { html_attribute = { (ASCII_ALPHANUMERIC | "-")+ ~ spnl ~ ("=" ~ spnl ~ (quoted | (!">" ~ nonspacechar)+))? ~ spnl } html_comment = { "<!--" ~ (!"-->" ~ ANY)* ~ "-->" } html_tag = { "<" ~ spnl ~ "/"? ~ ASCII_ALPHANUMERIC+ ~ spnl ~ html_attribute* ~ "/"? ~ spnl ~ ">" } -spacechar = { " " | "\t" } -nonspacechar = { !spacechar ~ !NEWLINE ~ ANY } -sp = { spacechar* } -spnl = { sp ~ (NEWLINE ~ sp)? } -special_char = { "~" | "*" | "_" | "`" | "&" | "[" | "]" | "(" | ")" | "<" | "!" | "#" | "\\" | "\"" | "'" | extended_special_char } -normal_char = { !( special_char | spacechar | NEWLINE ) ~ ANY } +spacechar = _{ " " | "\t" } +nonspacechar = _{ !spacechar ~ !NEWLINE ~ ANY } +sp = _{ spacechar* } +spnl = _{ sp ~ (NEWLINE ~ sp)? } +special_char = _{ "~" | "*" | "_" | "`" | "&" | "[" | "]" | "(" | ")" | "<" | "!" | "#" | "\\" | "\"" | "'" | extended_special_char } +normal_char = _{ !( special_char | spacechar | NEWLINE ) ~ ANY } alphanumeric = { ASCII_ALPHANUMERIC | "\u{200}" | "\u{201}" | "\u{202}" | "\u{203}" | "\u{204}" | "\u{205}" | "\u{206}" | "\u{207}" | @@ -501,8 +483,8 @@ hex_entity = { "&#" ~ ("X"|"x") ~ ('0'..'9' | 'a'..'f' | 'A'..'F')+ ~ ";" } dec_entity = { "&#" ~ ASCII_DIGIT+ ~ ";" } char_entity = { "&" ~ ASCII_ALPHANUMERIC+ ~ ";" } -nonindent_space = { " " | " " | " " | "" } -indent = { "\t" | " " } +nonindent_space = { " " | " " | "" } +indent = { "\t" | " " } indented_line = { indent ~ line } optionally_indented_line = { indent? ~ line } @@ -510,7 +492,7 @@ doctest_line = { ">>> " ~ raw_line } line = { raw_line } -raw_line = { ( (!NEWLINE ~ ANY)* ~ NEWLINE | ANY+ ~ EOI ) } +raw_line = { (!NEWLINE ~ ANY)* ~ NEWLINE | (!EOI ~ ANY)+ ~ EOI } skip_block = { html_block | @@ -569,18 +551,14 @@ notes = { (note | skip_block)* } raw_note_block = { ( !blank_line ~ optionally_indented_line )+ ~ blank_line* } - definition = { &( (nonindent_space ~ !defmark ~ nonspacechar ~ raw_line) ~ blank_line? ~ defmark) ~ d_list_title+ ~ (def_tight | def_loose) } - d_list_title = { nonindent_space ~ !defmark ~ &nonspacechar ~ (!endline ~ inline)+ ~ sp ~ NEWLINE } - def_tight = { &defmark ~ list_tight } def_loose = { blank_line ~ &defmark ~ list_loose } - defmark = { nonindent_space ~ (":" | "~") ~ spacechar+ } def_marker = { //&{ extension(EXT_DLISTS) } ~ |
