aboutsummaryrefslogtreecommitdiffstats
path: root/parser/src/conversion
diff options
context:
space:
mode:
Diffstat (limited to 'parser/src/conversion')
-rw-r--r--parser/src/conversion/block.rs202
-rw-r--r--parser/src/conversion/inline.rs160
-rw-r--r--parser/src/conversion/tests.rs65
3 files changed, 427 insertions, 0 deletions
diff --git a/parser/src/conversion/block.rs b/parser/src/conversion/block.rs
new file mode 100644
index 0000000..ab18c48
--- /dev/null
+++ b/parser/src/conversion/block.rs
@@ -0,0 +1,202 @@
+use failure::{Error,bail};
+use pest::iterators::Pair;
+
+use document_tree::{
+ Element,HasChildren,ExtraAttributes,
+ elements as e,
+ element_categories as c,
+ extra_attributes as a,
+ attribute_types as at
+};
+
+use crate::{
+ pest_rst::Rule,
+ pair_ext_parse::PairExt,
+};
+use super::{whitespace_normalize_name, inline::convert_inlines};
+
+
+#[derive(PartialEq)]
+pub(super) enum TitleKind { Double(char), Single(char) }
+
+pub(super) enum TitleOrSsubel {
+ Title(e::Title, TitleKind),
+ Ssubel(c::StructuralSubElement),
+}
+
+
+pub(super) fn convert_ssubel(pair: Pair<Rule>) -> Result<Option<TitleOrSsubel>, Error> {
+ use self::TitleOrSsubel::*;
+ Ok(Some(match pair.as_rule() {
+ Rule::title => { let (t, k) = convert_title(pair)?; Title(t, k) },
+ //TODO: subtitle, decoration, docinfo
+ Rule::EOI => return Ok(None),
+ _ => Ssubel(convert_substructure(pair)?.into()),
+ }))
+}
+
+
+fn convert_substructure(pair: Pair<Rule>) -> Result<c::SubStructure, Error> {
+ Ok(match pair.as_rule() {
+ // todo: Topic, Sidebar, Transition
+ // no section here, as it’s constructed from titles
+ _ => convert_body_elem(pair)?.into(),
+ })
+}
+
+
+fn convert_body_elem(pair: Pair<Rule>) -> Result<c::BodyElement, Error> {
+ Ok(match pair.as_rule() {
+ Rule::paragraph => convert_paragraph(pair)?.into(),
+ Rule::target => convert_target(pair)?.into(),
+ Rule::substitution_def => convert_substitution_def(pair)?.into(),
+ Rule::admonition_gen => convert_admonition_gen(pair)?.into(),
+ Rule::image => convert_image::<e::Image>(pair)?.into(),
+ Rule::bullet_list => convert_bullet_list(pair)?.into(),
+ rule => unimplemented!("unhandled rule {:?}", rule),
+ })
+}
+
+
+fn convert_title(pair: Pair<Rule>) -> Result<(e::Title, TitleKind), Error> {
+ let mut title: Option<String> = None;
+ let mut title_inlines: Option<Vec<c::TextOrInlineElement>> = None;
+ let mut adornment_char: Option<char> = None;
+ // title_double or title_single. Extract kind before consuming
+ let inner_pair = pair.into_inner().next().unwrap();
+ let kind = inner_pair.as_rule();
+ for p in inner_pair.into_inner() {
+ match p.as_rule() {
+ Rule::line => {
+ title = Some(p.as_str().to_owned());
+ title_inlines = Some(convert_inlines(p)?);
+ },
+ Rule::adornments => adornment_char = Some(p.as_str().chars().next().expect("Empty adornment?")),
+ rule => unimplemented!("Unexpected rule in title: {:?}", rule),
+ };
+ }
+ // now we encountered one line of text and one of adornments
+ // TODO: emit error if the adornment line is too short (has to match title length)
+ let mut elem = e::Title::with_children(title_inlines.expect("No text in title"));
+ if let Some(title) = title {
+ //TODO: slugify properly
+ let slug = title.to_lowercase().replace("\n", "").replace(" ", "-");
+ elem.names_mut().push(at::NameToken(slug));
+ }
+ let title_kind = match kind {
+ Rule::title_double => TitleKind::Double(adornment_char.unwrap()),
+ Rule::title_single => TitleKind::Single(adornment_char.unwrap()),
+ _ => unreachable!(),
+ };
+ Ok((elem, title_kind))
+}
+
+
+fn convert_paragraph(pair: Pair<Rule>) -> Result<e::Paragraph, Error> {
+ Ok(e::Paragraph::with_children(convert_inlines(pair)?))
+}
+
+
+fn convert_target(pair: Pair<Rule>) -> Result<e::Target, Error> {
+ let mut elem: e::Target = Default::default();
+ elem.extra_mut().anonymous = false;
+ for p in pair.into_inner() {
+ match p.as_rule() {
+ Rule::target_name_uq | Rule::target_name_qu => {
+ elem.ids_mut().push(p.as_str().into());
+ elem.names_mut().push(p.as_str().into());
+ },
+ // TODO: also handle non-urls
+ Rule::link_target => elem.extra_mut().refuri = Some(p.parse()?),
+ rule => panic!("Unexpected rule in target: {:?}", rule),
+ }
+ }
+ Ok(elem)
+}
+
+fn convert_substitution_def(pair: Pair<Rule>) -> Result<e::SubstitutionDefinition, Error> {
+ let mut pairs = pair.into_inner();
+ let name = whitespace_normalize_name(pairs.next().unwrap().as_str()); // Rule::substitution_name
+ let inner_pair = pairs.next().unwrap();
+ let inner: Vec<c::TextOrInlineElement> = match inner_pair.as_rule() {
+ Rule::replace => convert_replace(inner_pair)?,
+ Rule::image => vec![convert_image::<e::ImageInline>(inner_pair)?.into()],
+ rule => panic!("Unknown substitution rule {:?}", rule),
+ };
+ let mut subst_def = e::SubstitutionDefinition::with_children(inner);
+ subst_def.names_mut().push(at::NameToken(name));
+ Ok(subst_def)
+}
+
+fn convert_replace(pair: Pair<Rule>) -> Result<Vec<c::TextOrInlineElement>, Error> {
+ let mut pairs = pair.into_inner();
+ let paragraph = pairs.next().unwrap();
+ convert_inlines(paragraph)
+}
+
+fn convert_image<I>(pair: Pair<Rule>) -> Result<I, Error> where I: Element + ExtraAttributes<a::Image> {
+ let mut pairs = pair.into_inner();
+ let mut image = I::with_extra(a::Image::new(
+ pairs.next().unwrap().as_str().trim().parse()?, // line
+ ));
+ for opt in pairs {
+ let mut opt_iter = opt.into_inner();
+ let opt_name = opt_iter.next().unwrap();
+ let opt_val = opt_iter.next().unwrap();
+ match opt_name.as_str() {
+ "class" => image.classes_mut().push(opt_val.as_str().to_owned()),
+ "name" => image.names_mut().push(opt_val.as_str().into()),
+ "alt" => image.extra_mut().alt = Some(opt_val.as_str().to_owned()),
+ "height" => image.extra_mut().height = Some(opt_val.parse()?),
+ "width" => image.extra_mut().width = Some(opt_val.parse()?),
+ "scale" => image.extra_mut().scale = Some(parse_scale(&opt_val)?),
+ "align" => image.extra_mut().align = Some(opt_val.parse()?),
+ "target" => image.extra_mut().target = Some(opt_val.parse()?),
+ name => bail!("Unknown Image option {}", name),
+ }
+ }
+ Ok(image)
+}
+
+fn parse_scale(pair: &Pair<Rule>) -> Result<u8, Error> {
+ let input = if pair.as_str().chars().rev().next() == Some('%') { &pair.as_str()[..pair.as_str().len()-1] } else { pair.as_str() };
+ use pest::error::{Error,ErrorVariant};
+ Ok(input.parse().map_err(|e: std::num::ParseIntError| {
+ let var: ErrorVariant<Rule> = ErrorVariant::CustomError { message: e.to_string() };
+ Error::new_from_span(var, pair.as_span())
+ })?)
+}
+
+fn convert_admonition_gen(pair: Pair<Rule>) -> Result<c::BodyElement, Error> {
+ let mut iter = pair.into_inner();
+ let typ = iter.next().unwrap().as_str();
+ // TODO: in reality it contains body elements.
+ let children: Vec<c::BodyElement> = iter.map(|p| e::Paragraph::with_children(vec![p.as_str().into()]).into()).collect();
+ Ok(match typ {
+ "attention" => e::Attention::with_children(children).into(),
+ "hint" => e::Hint::with_children(children).into(),
+ "note" => e::Note::with_children(children).into(),
+ "caution" => e::Caution::with_children(children).into(),
+ "danger" => e::Danger::with_children(children).into(),
+ "error" => e::Error::with_children(children).into(),
+ "important" => e::Important::with_children(children).into(),
+ "tip" => e::Tip::with_children(children).into(),
+ "warning" => e::Warning::with_children(children).into(),
+ typ => panic!("Unknown admontion type {}!", typ),
+ })
+}
+
+fn convert_bullet_list(pair: Pair<Rule>) -> Result<e::BulletList, Error> {
+ Ok(e::BulletList::with_children(pair.into_inner().map(convert_bullet_item).collect::<Result<_, _>>()?))
+}
+
+fn convert_bullet_item(pair: Pair<Rule>) -> Result<e::ListItem, Error> {
+ let mut iter = pair.into_inner();
+ let mut children: Vec<c::BodyElement> = vec![
+ convert_paragraph(iter.next().unwrap())?.into()
+ ];
+ for p in iter {
+ children.push(convert_body_elem(p)?);
+ }
+ Ok(e::ListItem::with_children(children))
+}
diff --git a/parser/src/conversion/inline.rs b/parser/src/conversion/inline.rs
new file mode 100644
index 0000000..6094714
--- /dev/null
+++ b/parser/src/conversion/inline.rs
@@ -0,0 +1,160 @@
+use failure::Error;
+use pest::iterators::Pair;
+
+use document_tree::{
+ HasChildren,
+ elements as e,
+ url::Url,
+ element_categories as c,
+ extra_attributes as a,
+ attribute_types as at,
+};
+
+use crate::{
+ pest_rst::Rule,
+// pair_ext_parse::PairExt,
+};
+use super::whitespace_normalize_name;
+
+
+pub fn convert_inline(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> {
+ Ok(match pair.as_rule() {
+ Rule::str | Rule::str_nested => pair.as_str().into(),
+ Rule::ws_newline => " ".to_owned().into(),
+ Rule::reference => convert_reference(pair)?,
+ Rule::substitution_name => convert_substitution_ref(pair)?.into(),
+ Rule::emph => e::Emphasis::with_children(convert_inlines(pair)?).into(),
+ Rule::strong => e::Strong::with_children(convert_inlines(pair)?).into(),
+ Rule::literal => e::Literal::with_children(convert_inlines(pair)?).into(),
+ rule => unimplemented!("unknown rule {:?}", rule),
+ })
+}
+
+pub fn convert_inlines(pair: Pair<Rule>) -> Result<Vec<c::TextOrInlineElement>, Error> {
+ pair.into_inner().map(convert_inline).collect()
+}
+
+fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> {
+ let name;
+ let refuri;
+ let refid;
+ let mut refname = vec![];
+ let mut children: Vec<c::TextOrInlineElement> = vec![];
+ let concrete = pair.into_inner().next().unwrap();
+ match concrete.as_rule() {
+ Rule::reference_target => {
+ let rt_inner = concrete.into_inner().next().unwrap(); // reference_target_uq or target_name_qu
+ match rt_inner.as_rule() {
+ Rule::reference_target_uq => {
+ refid = None;
+ name = Some(rt_inner.as_str().into());
+ refuri = None;
+ refname.push(rt_inner.as_str().into());
+ children.push(rt_inner.as_str().into());
+ },
+ Rule::reference_target_qu => {
+ let (text, reference) = {
+ let mut text = None;
+ let mut reference = None;
+ for inner in rt_inner.clone().into_inner() {
+ match inner.as_rule() {
+ Rule::reference_text => text = Some(inner),
+ Rule::reference_bracketed => reference = Some(inner),
+ _ => unreachable!()
+ }
+ }
+ (text, reference)
+ };
+ let trimmed_text = match (&text, &reference) {
+ (Some(text), None) => text.as_str(),
+ (_, Some(reference)) => {
+ text
+ .map(|text| text.as_str().trim_end_matches(|ch| " \n\r".contains(ch)))
+ .filter(|text| !text.is_empty())
+ .unwrap_or_else(|| reference.clone().into_inner().next().unwrap().as_str())
+ }
+ (None, None) => unreachable!()
+ };
+ refid = None;
+ name = Some(trimmed_text.into());
+ refuri = if let Some(reference) = reference {
+ let inner = reference.into_inner().next().unwrap();
+ match inner.as_rule() {
+ // The URL rules in our parser accept a narrow superset of
+ // valid URLs, so we need to handle false positives.
+ Rule::url => if let Ok(target) = Url::parse_absolute(inner.as_str()) {
+ Some(target)
+ } else if inner.as_str().ends_with('_') {
+ // like target_name_qu (minus the final underscore)
+ let full_str = inner.as_str();
+ refname.push(full_str[0..full_str.len() - 1].into());
+ None
+ } else {
+ // like relative_reference
+ Some(Url::parse_relative(inner.as_str())?)
+ },
+ Rule::target_name_qu => {
+ refname.push(inner.as_str().into());
+ None
+ },
+ Rule::relative_reference => {
+ Some(Url::parse_relative(inner.as_str())?)
+ },
+ _ => unreachable!()
+ }
+ } else {
+ refname.push(trimmed_text.into());
+ None
+ };
+ children.push(trimmed_text.into());
+ },
+ _ => unreachable!()
+ }
+ },
+ Rule::reference_explicit => unimplemented!("explicit reference"),
+ Rule::reference_auto => {
+ let rt_inner = concrete.into_inner().next().unwrap();
+ match rt_inner.as_rule() {
+ Rule::url_auto => match Url::parse_absolute(rt_inner.as_str()) {
+ Ok(target) => {
+ refuri = Some(target);
+ name = None;
+ refid = None;
+ children.push(rt_inner.as_str().into());
+ },
+ // if our parser got a URL wrong, return it as a string
+ Err(_) => return Ok(rt_inner.as_str().into())
+ },
+ Rule::email => {
+ let mailto_url = String::from("mailto:") + rt_inner.as_str();
+ match Url::parse_absolute(&mailto_url) {
+ Ok(target) => {
+ refuri = Some(target);
+ name = None;
+ refid = None;
+ children.push(rt_inner.as_str().into());
+ },
+ // if our parser got a URL wrong, return it as a string
+ Err(_) => return Ok(rt_inner.as_str().into())
+ }
+ },
+ _ => unreachable!()
+ }
+ },
+ _ => unreachable!(),
+ };
+ Ok(e::Reference::new(
+ Default::default(),
+ a::Reference { name, refuri, refid, refname },
+ children
+ ).into())
+}
+
+fn convert_substitution_ref(pair: Pair<Rule>) -> Result<e::SubstitutionReference, Error> {
+ let name = whitespace_normalize_name(pair.as_str());
+ Ok(a::ExtraAttributes::with_extra(
+ a::SubstitutionReference {
+ refname: vec![at::NameToken(name)]
+ }
+ ))
+}
diff --git a/parser/src/conversion/tests.rs b/parser/src/conversion/tests.rs
new file mode 100644
index 0000000..89b0a1c
--- /dev/null
+++ b/parser/src/conversion/tests.rs
@@ -0,0 +1,65 @@
+use document_tree::{
+ elements as e,
+ element_categories as c,
+ HasChildren,
+};
+
+use crate::parse;
+
+fn ssubel_to_section(ssubel: &c::StructuralSubElement) -> &e::Section {
+ match ssubel {
+ c::StructuralSubElement::SubStructure(ref b) => match **b {
+ c::SubStructure::Section(ref s) => s,
+ ref c => panic!("Expected section, not {:?}", c),
+ },
+ ref c => panic!("Expected SubStructure, not {:?}", c),
+ }
+}
+
+const SECTIONS: &str = "\
+Intro before first section title
+
+Level 1
+*******
+
+-------
+Level 2
+-------
+
+Level 3
+=======
+
+L1 again
+********
+
+L3 again, skipping L2
+=====================
+";
+
+#[test]
+fn convert_skipped_section() {
+ let doctree = parse(SECTIONS).unwrap();
+ let lvl0 = doctree.children();
+ assert_eq!(lvl0.len(), 3, "Should be a paragraph and 2 sections: {:?}", lvl0);
+
+ assert_eq!(lvl0[0], e::Paragraph::with_children(vec![
+ "Intro before first section title".to_owned().into()
+ ]).into(), "The intro text should fit");
+
+ let lvl1a = ssubel_to_section(&lvl0[1]).children();
+ assert_eq!(lvl1a.len(), 2, "The 1st lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1a);
+ //TODO: test title lvl1a[0]
+ let lvl2 = ssubel_to_section(&lvl1a[1]).children();
+ assert_eq!(lvl2.len(), 2, "The lvl2 section should have (a title and) a single lvl3 section as child: {:?}", lvl2);
+ //TODO: test title lvl2[0]
+ let lvl3a = ssubel_to_section(&lvl2[1]).children();
+ assert_eq!(lvl3a.len(), 1, "The 1st lvl3 section should just a title: {:?}", lvl3a);
+ //TODO: test title lvl3a[0]
+
+ let lvl1b = ssubel_to_section(&lvl0[2]).children();
+ assert_eq!(lvl1b.len(), 2, "The 2nd lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1b);
+ //TODO: test title lvl1b[0]
+ let lvl3b = ssubel_to_section(&lvl1b[1]).children();
+ assert_eq!(lvl3b.len(), 1, "The 2nd lvl3 section should have just a title: {:?}", lvl3b);
+ //TODO: test title lvl3b[0]
+}