aboutsummaryrefslogtreecommitdiffstats
path: root/parser/src/conversion
diff options
context:
space:
mode:
Diffstat (limited to 'parser/src/conversion')
-rw-r--r--parser/src/conversion/block.rs415
-rw-r--r--parser/src/conversion/inline.rs278
-rw-r--r--parser/src/conversion/tests.rs95
3 files changed, 421 insertions, 367 deletions
diff --git a/parser/src/conversion/block.rs b/parser/src/conversion/block.rs
index a68dd17..97f0e23 100644
--- a/parser/src/conversion/block.rs
+++ b/parser/src/conversion/block.rs
@@ -1,257 +1,286 @@
-use failure::{Error,bail};
+use failure::{bail, Error};
use pest::iterators::Pair;
use document_tree::{
- Element,HasChildren,ExtraAttributes,
- elements as e,
- element_categories as c,
- extra_attributes as a,
- attribute_types as at
+ attribute_types as at, element_categories as c, elements as e, extra_attributes as a, Element,
+ ExtraAttributes, HasChildren,
};
-use crate::{
- pest_rst::Rule,
- pair_ext_parse::PairExt,
-};
-use super::{whitespace_normalize_name, inline::convert_inlines};
-
+use super::{inline::convert_inlines, whitespace_normalize_name};
+use crate::{pair_ext_parse::PairExt, pest_rst::Rule};
#[derive(PartialEq)]
-pub(super) enum TitleKind { Double(char), Single(char) }
+pub(super) enum TitleKind {
+ Double(char),
+ Single(char),
+}
pub(super) enum TitleOrSsubel {
- Title(e::Title, TitleKind),
- Ssubel(c::StructuralSubElement),
+ Title(e::Title, TitleKind),
+ Ssubel(c::StructuralSubElement),
}
-
pub(super) fn convert_ssubel(pair: Pair<Rule>) -> Result<Option<TitleOrSsubel>, Error> {
- use self::TitleOrSsubel::*;
- Ok(Some(match pair.as_rule() {
- Rule::title => { let (t, k) = convert_title(pair)?; Title(t, k) },
- //TODO: subtitle, decoration, docinfo
- Rule::EOI => return Ok(None),
- _ => Ssubel(convert_substructure(pair)?.into()),
- }))
+ use self::TitleOrSsubel::*;
+ Ok(Some(match pair.as_rule() {
+ Rule::title => {
+ let (t, k) = convert_title(pair)?;
+ Title(t, k)
+ }
+ //TODO: subtitle, decoration, docinfo
+ Rule::EOI => return Ok(None),
+ _ => Ssubel(convert_substructure(pair)?.into()),
+ }))
}
-
fn convert_substructure(pair: Pair<Rule>) -> Result<c::SubStructure, Error> {
- #[allow(clippy::match_single_binding)]
- Ok(match pair.as_rule() {
- // TODO: Topic, Sidebar, Transition
- // no section here, as it’s constructed from titles
- _ => convert_body_elem(pair)?.into(),
- })
+ #[allow(clippy::match_single_binding)]
+ Ok(match pair.as_rule() {
+ // TODO: Topic, Sidebar, Transition
+ // no section here, as it’s constructed from titles
+ _ => convert_body_elem(pair)?.into(),
+ })
}
-
fn convert_body_elem(pair: Pair<Rule>) -> Result<c::BodyElement, Error> {
- Ok(match pair.as_rule() {
- Rule::paragraph => convert_paragraph(pair)?.into(),
- Rule::target => convert_target(pair)?.into(),
- Rule::substitution_def => convert_substitution_def(pair)?.into(),
- Rule::admonition_gen => convert_admonition_gen(pair)?,
- Rule::image => convert_image::<e::Image>(pair)?.into(),
- Rule::bullet_list => convert_bullet_list(pair)?.into(),
- Rule::literal_block => convert_literal_block(pair).into(),
- Rule::code_directive => convert_code_directive(pair).into(),
- Rule::raw_directive => convert_raw_directive(pair).into(),
- Rule::block_comment => convert_comment(pair).into(),
- rule => unimplemented!("unhandled rule {:?}", rule),
- })
+ Ok(match pair.as_rule() {
+ Rule::paragraph => convert_paragraph(pair)?.into(),
+ Rule::target => convert_target(pair)?.into(),
+ Rule::substitution_def => convert_substitution_def(pair)?.into(),
+ Rule::admonition_gen => convert_admonition_gen(pair)?,
+ Rule::image => convert_image::<e::Image>(pair)?.into(),
+ Rule::bullet_list => convert_bullet_list(pair)?.into(),
+ Rule::literal_block => convert_literal_block(pair).into(),
+ Rule::code_directive => convert_code_directive(pair).into(),
+ Rule::raw_directive => convert_raw_directive(pair).into(),
+ Rule::block_comment => convert_comment(pair).into(),
+ rule => unimplemented!("unhandled rule {:?}", rule),
+ })
}
-
fn convert_title(pair: Pair<Rule>) -> Result<(e::Title, TitleKind), Error> {
- let mut title: Option<String> = None;
- let mut title_inlines: Option<Vec<c::TextOrInlineElement>> = None;
- let mut adornment_char: Option<char> = None;
- // title_double or title_single. Extract kind before consuming
- let inner_pair = pair.into_inner().next().unwrap();
- let kind = inner_pair.as_rule();
- for p in inner_pair.into_inner() {
- match p.as_rule() {
- Rule::line => {
- title = Some(p.as_str().to_owned());
- title_inlines = Some(convert_inlines(p)?);
- },
- Rule::adornments => adornment_char = Some(p.as_str().chars().next().expect("Empty adornment?")),
- rule => unimplemented!("Unexpected rule in title: {:?}", rule),
- };
- }
- // now we encountered one line of text and one of adornments
- // TODO: emit error if the adornment line is too short (has to match title length)
- let mut elem = e::Title::with_children(title_inlines.expect("No text in title"));
- if let Some(title) = title {
- //TODO: slugify properly
- let slug = title.to_lowercase().replace('\n', "").replace(' ', "-");
- elem.names_mut().push(at::NameToken(slug));
- }
- let title_kind = match kind {
- Rule::title_double => TitleKind::Double(adornment_char.unwrap()),
- Rule::title_single => TitleKind::Single(adornment_char.unwrap()),
- _ => unreachable!(),
- };
- Ok((elem, title_kind))
+ let mut title: Option<String> = None;
+ let mut title_inlines: Option<Vec<c::TextOrInlineElement>> = None;
+ let mut adornment_char: Option<char> = None;
+ // title_double or title_single. Extract kind before consuming
+ let inner_pair = pair.into_inner().next().unwrap();
+ let kind = inner_pair.as_rule();
+ for p in inner_pair.into_inner() {
+ match p.as_rule() {
+ Rule::line => {
+ title = Some(p.as_str().to_owned());
+ title_inlines = Some(convert_inlines(p)?);
+ }
+ Rule::adornments => {
+ adornment_char = Some(p.as_str().chars().next().expect("Empty adornment?"))
+ }
+ rule => unimplemented!("Unexpected rule in title: {:?}", rule),
+ };
+ }
+ // now we encountered one line of text and one of adornments
+ // TODO: emit error if the adornment line is too short (has to match title length)
+ let mut elem = e::Title::with_children(title_inlines.expect("No text in title"));
+ if let Some(title) = title {
+ //TODO: slugify properly
+ let slug = title.to_lowercase().replace('\n', "").replace(' ', "-");
+ elem.names_mut().push(at::NameToken(slug));
+ }
+ let title_kind = match kind {
+ Rule::title_double => TitleKind::Double(adornment_char.unwrap()),
+ Rule::title_single => TitleKind::Single(adornment_char.unwrap()),
+ _ => unreachable!(),
+ };
+ Ok((elem, title_kind))
}
-
fn convert_paragraph(pair: Pair<Rule>) -> Result<e::Paragraph, Error> {
- Ok(e::Paragraph::with_children(convert_inlines(pair)?))
+ Ok(e::Paragraph::with_children(convert_inlines(pair)?))
}
-
fn convert_target(pair: Pair<Rule>) -> Result<e::Target, Error> {
- let mut elem: e::Target = Default::default();
- elem.extra_mut().anonymous = false;
- for p in pair.into_inner() {
- match p.as_rule() {
- Rule::target_name_uq | Rule::target_name_qu => {
- elem.ids_mut().push(p.as_str().into());
- elem.names_mut().push(p.as_str().into());
- },
- // TODO: also handle non-urls
- Rule::link_target => elem.extra_mut().refuri = Some(p.parse()?),
- rule => panic!("Unexpected rule in target: {:?}", rule),
- }
- }
- Ok(elem)
+ let mut elem: e::Target = Default::default();
+ elem.extra_mut().anonymous = false;
+ for p in pair.into_inner() {
+ match p.as_rule() {
+ Rule::target_name_uq | Rule::target_name_qu => {
+ elem.ids_mut().push(p.as_str().into());
+ elem.names_mut().push(p.as_str().into());
+ }
+ // TODO: also handle non-urls
+ Rule::link_target => elem.extra_mut().refuri = Some(p.parse()?),
+ rule => panic!("Unexpected rule in target: {:?}", rule),
+ }
+ }
+ Ok(elem)
}
fn convert_substitution_def(pair: Pair<Rule>) -> Result<e::SubstitutionDefinition, Error> {
- let mut pairs = pair.into_inner();
- let name = whitespace_normalize_name(pairs.next().unwrap().as_str()); // Rule::substitution_name
- let inner_pair = pairs.next().unwrap();
- let inner: Vec<c::TextOrInlineElement> = match inner_pair.as_rule() {
- Rule::replace => convert_replace(inner_pair)?,
- Rule::image => vec![convert_image::<e::ImageInline>(inner_pair)?.into()],
- rule => panic!("Unknown substitution rule {:?}", rule),
- };
- let mut subst_def = e::SubstitutionDefinition::with_children(inner);
- subst_def.names_mut().push(at::NameToken(name));
- Ok(subst_def)
+ let mut pairs = pair.into_inner();
+ let name = whitespace_normalize_name(pairs.next().unwrap().as_str()); // Rule::substitution_name
+ let inner_pair = pairs.next().unwrap();
+ let inner: Vec<c::TextOrInlineElement> = match inner_pair.as_rule() {
+ Rule::replace => convert_replace(inner_pair)?,
+ Rule::image => vec![convert_image::<e::ImageInline>(inner_pair)?.into()],
+ rule => panic!("Unknown substitution rule {:?}", rule),
+ };
+ let mut subst_def = e::SubstitutionDefinition::with_children(inner);
+ subst_def.names_mut().push(at::NameToken(name));
+ Ok(subst_def)
}
fn convert_replace(pair: Pair<Rule>) -> Result<Vec<c::TextOrInlineElement>, Error> {
- let mut pairs = pair.into_inner();
- let paragraph = pairs.next().unwrap();
- convert_inlines(paragraph)
+ let mut pairs = pair.into_inner();
+ let paragraph = pairs.next().unwrap();
+ convert_inlines(paragraph)
}
-fn convert_image<I>(pair: Pair<Rule>) -> Result<I, Error> where I: Element + ExtraAttributes<a::Image> {
- let mut pairs = pair.into_inner();
- let mut image = I::with_extra(a::Image::new(
- pairs.next().unwrap().as_str().trim().parse()?, // line
- ));
- for opt in pairs {
- let mut opt_iter = opt.into_inner();
- let opt_name = opt_iter.next().unwrap();
- let opt_val = opt_iter.next().unwrap();
- match opt_name.as_str() {
- "class" => image.classes_mut().push(opt_val.as_str().to_owned()),
- "name" => image.names_mut().push(opt_val.as_str().into()),
- "alt" => image.extra_mut().alt = Some(opt_val.as_str().to_owned()),
- "height" => image.extra_mut().height = Some(opt_val.parse()?),
- "width" => image.extra_mut().width = Some(opt_val.parse()?),
- "scale" => image.extra_mut().scale = Some(parse_scale(&opt_val)?),
- "align" => image.extra_mut().align = Some(opt_val.parse()?),
- "target" => image.extra_mut().target = Some(opt_val.parse()?),
- name => bail!("Unknown Image option {}", name),
- }
- }
- Ok(image)
+fn convert_image<I>(pair: Pair<Rule>) -> Result<I, Error>
+where
+ I: Element + ExtraAttributes<a::Image>,
+{
+ let mut pairs = pair.into_inner();
+ let mut image = I::with_extra(a::Image::new(
+ pairs.next().unwrap().as_str().trim().parse()?, // line
+ ));
+ for opt in pairs {
+ let mut opt_iter = opt.into_inner();
+ let opt_name = opt_iter.next().unwrap();
+ let opt_val = opt_iter.next().unwrap();
+ match opt_name.as_str() {
+ "class" => image.classes_mut().push(opt_val.as_str().to_owned()),
+ "name" => image.names_mut().push(opt_val.as_str().into()),
+ "alt" => image.extra_mut().alt = Some(opt_val.as_str().to_owned()),
+ "height" => image.extra_mut().height = Some(opt_val.parse()?),
+ "width" => image.extra_mut().width = Some(opt_val.parse()?),
+ "scale" => image.extra_mut().scale = Some(parse_scale(&opt_val)?),
+ "align" => image.extra_mut().align = Some(opt_val.parse()?),
+ "target" => image.extra_mut().target = Some(opt_val.parse()?),
+ name => bail!("Unknown Image option {}", name),
+ }
+ }
+ Ok(image)
}
fn parse_scale(pair: &Pair<Rule>) -> Result<u8, Error> {
- let input = if pair.as_str().ends_with('%') { &pair.as_str()[..pair.as_str().len()-1] } else { pair.as_str() };
- use pest::error::{Error,ErrorVariant};
- Ok(input.parse().map_err(|e: std::num::ParseIntError| {
- let var: ErrorVariant<Rule> = ErrorVariant::CustomError { message: e.to_string() };
- Error::new_from_span(var, pair.as_span())
- })?)
+ let input = if pair.as_str().ends_with('%') {
+ &pair.as_str()[..pair.as_str().len() - 1]
+ } else {
+ pair.as_str()
+ };
+ use pest::error::{Error, ErrorVariant};
+ Ok(input.parse().map_err(|e: std::num::ParseIntError| {
+ let var: ErrorVariant<Rule> = ErrorVariant::CustomError {
+ message: e.to_string(),
+ };
+ Error::new_from_span(var, pair.as_span())
+ })?)
}
fn convert_admonition_gen(pair: Pair<Rule>) -> Result<c::BodyElement, Error> {
- let mut iter = pair.into_inner();
- let typ = iter.next().unwrap().as_str();
- // TODO: in reality it contains body elements.
- let children: Vec<c::BodyElement> = iter.map(|p| e::Paragraph::with_children(vec![p.as_str().into()]).into()).collect();
- Ok(match typ {
- "attention" => e::Attention::with_children(children).into(),
- "hint" => e::Hint::with_children(children).into(),
- "note" => e::Note::with_children(children).into(),
- "caution" => e::Caution::with_children(children).into(),
- "danger" => e::Danger::with_children(children).into(),
- "error" => e::Error::with_children(children).into(),
- "important" => e::Important::with_children(children).into(),
- "tip" => e::Tip::with_children(children).into(),
- "warning" => e::Warning::with_children(children).into(),
- typ => panic!("Unknown admontion type {}!", typ),
- })
+ let mut iter = pair.into_inner();
+ let typ = iter.next().unwrap().as_str();
+ // TODO: in reality it contains body elements.
+ let children: Vec<c::BodyElement> = iter
+ .map(|p| e::Paragraph::with_children(vec![p.as_str().into()]).into())
+ .collect();
+ Ok(match typ {
+ "attention" => e::Attention::with_children(children).into(),
+ "hint" => e::Hint::with_children(children).into(),
+ "note" => e::Note::with_children(children).into(),
+ "caution" => e::Caution::with_children(children).into(),
+ "danger" => e::Danger::with_children(children).into(),
+ "error" => e::Error::with_children(children).into(),
+ "important" => e::Important::with_children(children).into(),
+ "tip" => e::Tip::with_children(children).into(),
+ "warning" => e::Warning::with_children(children).into(),
+ typ => panic!("Unknown admontion type {}!", typ),
+ })
}
fn convert_bullet_list(pair: Pair<Rule>) -> Result<e::BulletList, Error> {
- Ok(e::BulletList::with_children(pair.into_inner().map(convert_bullet_item).collect::<Result<_, _>>()?))
+ Ok(e::BulletList::with_children(
+ pair.into_inner()
+ .map(convert_bullet_item)
+ .collect::<Result<_, _>>()?,
+ ))
}
fn convert_bullet_item(pair: Pair<Rule>) -> Result<e::ListItem, Error> {
- let mut iter = pair.into_inner();
- let mut children: Vec<c::BodyElement> = vec![
- convert_paragraph(iter.next().unwrap())?.into()
- ];
- for p in iter {
- children.push(convert_body_elem(p)?);
- }
- Ok(e::ListItem::with_children(children))
+ let mut iter = pair.into_inner();
+ let mut children: Vec<c::BodyElement> = vec![convert_paragraph(iter.next().unwrap())?.into()];
+ for p in iter {
+ children.push(convert_body_elem(p)?);
+ }
+ Ok(e::ListItem::with_children(children))
}
fn convert_literal_block(pair: Pair<Rule>) -> e::LiteralBlock {
- convert_literal_lines(pair.into_inner().next().unwrap())
+ convert_literal_lines(pair.into_inner().next().unwrap())
}
fn convert_literal_lines(pair: Pair<Rule>) -> e::LiteralBlock {
- let children = pair.into_inner().map(|l| match l.as_rule() {
- Rule::literal_line => l.as_str(),
- Rule::literal_line_blank => "\n",
- _ => unreachable!(),
- }.into()).collect();
- e::LiteralBlock::with_children(children)
+ let children = pair
+ .into_inner()
+ .map(|l| {
+ match l.as_rule() {
+ Rule::literal_line => l.as_str(),
+ Rule::literal_line_blank => "\n",
+ _ => unreachable!(),
+ }
+ .into()
+ })
+ .collect();
+ e::LiteralBlock::with_children(children)
}
fn convert_code_directive(pair: Pair<Rule>) -> e::LiteralBlock {
- let mut iter = pair.into_inner();
- let (lang, code) = match (iter.next().unwrap(), iter.next()) {
- (lang, Some(code)) => (Some(lang), code),
- (code, None) => (None, code),
- };
- let mut code_block = convert_literal_lines(code);
- code_block.classes_mut().push("code".to_owned());
- if let Some(lang) = lang {
- code_block.classes_mut().push(lang.as_str().to_owned());
- };
- code_block
+ let mut iter = pair.into_inner();
+ let (lang, code) = match (iter.next().unwrap(), iter.next()) {
+ (lang, Some(code)) => (Some(lang), code),
+ (code, None) => (None, code),
+ };
+ let mut code_block = convert_literal_lines(code);
+ code_block.classes_mut().push("code".to_owned());
+ if let Some(lang) = lang {
+ code_block.classes_mut().push(lang.as_str().to_owned());
+ };
+ code_block
}
fn convert_raw_directive(pair: Pair<Rule>) -> e::Raw {
- let mut iter = pair.into_inner();
- let format = iter.next().unwrap();
+ let mut iter = pair.into_inner();
+ let format = iter.next().unwrap();
let block = iter.next().unwrap();
- let children = block.into_inner().map(|l| match l.as_rule() {
- Rule::raw_line => l.as_str(),
- Rule::raw_line_blank => "\n",
- _ => unreachable!(),
- }.into()).collect();
- let mut raw_block = e::Raw::with_children(children);
- raw_block.extra_mut().format.push(at::NameToken(format.as_str().to_owned()));
- raw_block
+ let children = block
+ .into_inner()
+ .map(|l| {
+ match l.as_rule() {
+ Rule::raw_line => l.as_str(),
+ Rule::raw_line_blank => "\n",
+ _ => unreachable!(),
+ }
+ .into()
+ })
+ .collect();
+ let mut raw_block = e::Raw::with_children(children);
+ raw_block
+ .extra_mut()
+ .format
+ .push(at::NameToken(format.as_str().to_owned()));
+ raw_block
}
fn convert_comment(pair: Pair<Rule>) -> e::Comment {
- let lines = pair.into_inner().map(|l| match l.as_rule() {
- Rule::comment_line_blank => "\n",
- Rule::comment_line => l.as_str(),
- _ => unreachable!(),
- }.into()).collect();
- e::Comment::with_children(lines)
+ let lines = pair
+ .into_inner()
+ .map(|l| {
+ match l.as_rule() {
+ Rule::comment_line_blank => "\n",
+ Rule::comment_line => l.as_str(),
+ _ => unreachable!(),
+ }
+ .into()
+ })
+ .collect();
+ e::Comment::with_children(lines)
}
diff --git a/parser/src/conversion/inline.rs b/parser/src/conversion/inline.rs
index 82a74e7..a0dcb88 100644
--- a/parser/src/conversion/inline.rs
+++ b/parser/src/conversion/inline.rs
@@ -2,159 +2,155 @@ use failure::Error;
use pest::iterators::Pair;
use document_tree::{
- HasChildren,
- elements as e,
- url::Url,
- element_categories as c,
- extra_attributes as a,
- attribute_types as at,
+ attribute_types as at, element_categories as c, elements as e, extra_attributes as a, url::Url,
+ HasChildren,
};
-use crate::{
- pest_rst::Rule,
-// pair_ext_parse::PairExt,
-};
use super::whitespace_normalize_name;
-
+use crate::pest_rst::Rule;
pub fn convert_inline(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> {
- Ok(match pair.as_rule() {
- Rule::str | Rule::str_nested => pair.as_str().into(),
- Rule::ws_newline => " ".to_owned().into(),
- Rule::reference => convert_reference(pair)?,
- Rule::substitution_name => convert_substitution_ref(pair)?.into(),
- Rule::emph => e::Emphasis::with_children(convert_inlines(pair)?).into(),
- Rule::strong => e::Strong::with_children(convert_inlines(pair)?).into(),
- Rule::literal => e::Literal::with_children(vec![pair.as_str().to_owned()]).into(),
- rule => unimplemented!("unknown rule {:?}", rule),
- })
+ Ok(match pair.as_rule() {
+ Rule::str | Rule::str_nested => pair.as_str().into(),
+ Rule::ws_newline => " ".to_owned().into(),
+ Rule::reference => convert_reference(pair)?,
+ Rule::substitution_name => convert_substitution_ref(pair)?.into(),
+ Rule::emph => e::Emphasis::with_children(convert_inlines(pair)?).into(),
+ Rule::strong => e::Strong::with_children(convert_inlines(pair)?).into(),
+ Rule::literal => e::Literal::with_children(vec![pair.as_str().to_owned()]).into(),
+ rule => unimplemented!("unknown rule {:?}", rule),
+ })
}
pub fn convert_inlines(pair: Pair<Rule>) -> Result<Vec<c::TextOrInlineElement>, Error> {
- pair.into_inner().map(convert_inline).collect()
+ pair.into_inner().map(convert_inline).collect()
}
fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> {
- let name;
- let refuri;
- let refid;
- let mut refname = vec![];
- let mut children: Vec<c::TextOrInlineElement> = vec![];
- let concrete = pair.into_inner().next().unwrap();
- match concrete.as_rule() {
- Rule::reference_target => {
- let rt_inner = concrete.into_inner().next().unwrap(); // reference_target_uq or target_name_qu
- match rt_inner.as_rule() {
- Rule::reference_target_uq => {
- refid = None;
- name = Some(rt_inner.as_str().into());
- refuri = None;
- refname.push(rt_inner.as_str().into());
- children.push(rt_inner.as_str().into());
- },
- Rule::reference_target_qu => {
- let (text, reference) = {
- let mut text = None;
- let mut reference = None;
- for inner in rt_inner.clone().into_inner() {
- match inner.as_rule() {
- Rule::reference_text => text = Some(inner),
- Rule::reference_bracketed => reference = Some(inner),
- _ => unreachable!()
- }
- }
- (text, reference)
- };
- let trimmed_text = match (&text, &reference) {
- (Some(text), None) => text.as_str(),
- (_, Some(reference)) => {
- text
- .map(|text| text.as_str().trim_end_matches(|ch| " \n\r".contains(ch)))
- .filter(|text| !text.is_empty())
- .unwrap_or_else(|| reference.clone().into_inner().next().unwrap().as_str())
- }
- (None, None) => unreachable!()
- };
- refid = None;
- name = Some(trimmed_text.into());
- refuri = if let Some(reference) = reference {
- let inner = reference.into_inner().next().unwrap();
- match inner.as_rule() {
- // The URL rules in our parser accept a narrow superset of
- // valid URLs, so we need to handle false positives.
- Rule::url => if let Ok(target) = Url::parse_absolute(inner.as_str()) {
- Some(target)
- } else if inner.as_str().ends_with('_') {
- // like target_name_qu (minus the final underscore)
- let full_str = inner.as_str();
- refname.push(full_str[0..full_str.len() - 1].into());
- None
- } else {
- // like relative_reference
- Some(Url::parse_relative(inner.as_str())?)
- },
- Rule::target_name_qu => {
- refname.push(inner.as_str().into());
- None
- },
- Rule::relative_reference => {
- Some(Url::parse_relative(inner.as_str())?)
- },
- _ => unreachable!()
- }
- } else {
- refname.push(trimmed_text.into());
- None
- };
- children.push(trimmed_text.into());
- },
- _ => unreachable!()
- }
- },
- Rule::reference_explicit => unimplemented!("explicit reference"),
- Rule::reference_auto => {
- let rt_inner = concrete.into_inner().next().unwrap();
- match rt_inner.as_rule() {
- Rule::url_auto => match Url::parse_absolute(rt_inner.as_str()) {
- Ok(target) => {
- refuri = Some(target);
- name = None;
- refid = None;
- children.push(rt_inner.as_str().into());
- },
- // if our parser got a URL wrong, return it as a string
- Err(_) => return Ok(rt_inner.as_str().into())
- },
- Rule::email => {
- let mailto_url = String::from("mailto:") + rt_inner.as_str();
- match Url::parse_absolute(&mailto_url) {
- Ok(target) => {
- refuri = Some(target);
- name = None;
- refid = None;
- children.push(rt_inner.as_str().into());
- },
- // if our parser got a URL wrong, return it as a string
- Err(_) => return Ok(rt_inner.as_str().into())
- }
- },
- _ => unreachable!()
- }
- },
- _ => unreachable!(),
- };
- Ok(e::Reference::new(
- Default::default(),
- a::Reference { name, refuri, refid, refname },
- children
- ).into())
+ let name;
+ let refuri;
+ let refid;
+ let mut refname = vec![];
+ let mut children: Vec<c::TextOrInlineElement> = vec![];
+ let concrete = pair.into_inner().next().unwrap();
+ match concrete.as_rule() {
+ Rule::reference_target => {
+ let rt_inner = concrete.into_inner().next().unwrap(); // reference_target_uq or target_name_qu
+ match rt_inner.as_rule() {
+ Rule::reference_target_uq => {
+ refid = None;
+ name = Some(rt_inner.as_str().into());
+ refuri = None;
+ refname.push(rt_inner.as_str().into());
+ children.push(rt_inner.as_str().into());
+ }
+ Rule::reference_target_qu => {
+ let (text, reference) = {
+ let mut text = None;
+ let mut reference = None;
+ for inner in rt_inner.clone().into_inner() {
+ match inner.as_rule() {
+ Rule::reference_text => text = Some(inner),
+ Rule::reference_bracketed => reference = Some(inner),
+ _ => unreachable!(),
+ }
+ }
+ (text, reference)
+ };
+ let trimmed_text = match (&text, &reference) {
+ (Some(text), None) => text.as_str(),
+ (_, Some(reference)) => text
+ .map(|text| text.as_str().trim_end_matches(|ch| " \n\r".contains(ch)))
+ .filter(|text| !text.is_empty())
+ .unwrap_or_else(|| {
+ reference.clone().into_inner().next().unwrap().as_str()
+ }),
+ (None, None) => unreachable!(),
+ };
+ refid = None;
+ name = Some(trimmed_text.into());
+ refuri = if let Some(reference) = reference {
+ let inner = reference.into_inner().next().unwrap();
+ match inner.as_rule() {
+ // The URL rules in our parser accept a narrow superset of
+ // valid URLs, so we need to handle false positives.
+ Rule::url => {
+ if let Ok(target) = Url::parse_absolute(inner.as_str()) {
+ Some(target)
+ } else if inner.as_str().ends_with('_') {
+ // like target_name_qu (minus the final underscore)
+ let full_str = inner.as_str();
+ refname.push(full_str[0..full_str.len() - 1].into());
+ None
+ } else {
+ // like relative_reference
+ Some(Url::parse_relative(inner.as_str())?)
+ }
+ }
+ Rule::target_name_qu => {
+ refname.push(inner.as_str().into());
+ None
+ }
+ Rule::relative_reference => Some(Url::parse_relative(inner.as_str())?),
+ _ => unreachable!(),
+ }
+ } else {
+ refname.push(trimmed_text.into());
+ None
+ };
+ children.push(trimmed_text.into());
+ }
+ _ => unreachable!(),
+ }
+ }
+ Rule::reference_explicit => unimplemented!("explicit reference"),
+ Rule::reference_auto => {
+ let rt_inner = concrete.into_inner().next().unwrap();
+ match rt_inner.as_rule() {
+ Rule::url_auto => match Url::parse_absolute(rt_inner.as_str()) {
+ Ok(target) => {
+ refuri = Some(target);
+ name = None;
+ refid = None;
+ children.push(rt_inner.as_str().into());
+ }
+ // if our parser got a URL wrong, return it as a string
+ Err(_) => return Ok(rt_inner.as_str().into()),
+ },
+ Rule::email => {
+ let mailto_url = String::from("mailto:") + rt_inner.as_str();
+ match Url::parse_absolute(&mailto_url) {
+ Ok(target) => {
+ refuri = Some(target);
+ name = None;
+ refid = None;
+ children.push(rt_inner.as_str().into());
+ }
+ // if our parser got a URL wrong, return it as a string
+ Err(_) => return Ok(rt_inner.as_str().into()),
+ }
+ }
+ _ => unreachable!(),
+ }
+ }
+ _ => unreachable!(),
+ };
+ Ok(e::Reference::new(
+ Default::default(),
+ a::Reference {
+ name,
+ refuri,
+ refid,
+ refname,
+ },
+ children,
+ )
+ .into())
}
fn convert_substitution_ref(pair: Pair<Rule>) -> Result<e::SubstitutionReference, Error> {
- let name = whitespace_normalize_name(pair.as_str());
- Ok(a::ExtraAttributes::with_extra(
- a::SubstitutionReference {
- refname: vec![at::NameToken(name)]
- }
- ))
+ let name = whitespace_normalize_name(pair.as_str());
+ Ok(a::ExtraAttributes::with_extra(a::SubstitutionReference {
+ refname: vec![at::NameToken(name)],
+ }))
}
diff --git a/parser/src/conversion/tests.rs b/parser/src/conversion/tests.rs
index e042d01..8fcb408 100644
--- a/parser/src/conversion/tests.rs
+++ b/parser/src/conversion/tests.rs
@@ -1,19 +1,15 @@
-use document_tree::{
- elements as e,
- element_categories as c,
- HasChildren,
-};
+use document_tree::{element_categories as c, elements as e, HasChildren};
use crate::parse;
fn ssubel_to_section(ssubel: &c::StructuralSubElement) -> &e::Section {
- match ssubel {
- c::StructuralSubElement::SubStructure(ref b) => match **b {
- c::SubStructure::Section(ref s) => s,
- ref c => panic!("Expected section, not {:?}", c),
- },
- ref c => panic!("Expected SubStructure, not {:?}", c),
- }
+ match ssubel {
+ c::StructuralSubElement::SubStructure(ref b) => match **b {
+ c::SubStructure::Section(ref s) => s,
+ ref c => panic!("Expected section, not {:?}", c),
+ },
+ ref c => panic!("Expected SubStructure, not {:?}", c),
+ }
}
const SECTIONS: &str = "\
@@ -38,28 +34,61 @@ L3 again, skipping L2
#[test]
fn convert_skipped_section() {
- let doctree = parse(SECTIONS).unwrap();
- let lvl0 = doctree.children();
- assert_eq!(lvl0.len(), 3, "Should be a paragraph and 2 sections: {:?}", lvl0);
+ let doctree = parse(SECTIONS).unwrap();
+ let lvl0 = doctree.children();
+ assert_eq!(
+ lvl0.len(),
+ 3,
+ "Should be a paragraph and 2 sections: {:?}",
+ lvl0
+ );
- assert_eq!(lvl0[0], e::Paragraph::with_children(vec![
- "Intro before first section title".to_owned().into()
- ]).into(), "The intro text should fit");
+ assert_eq!(
+ lvl0[0],
+ e::Paragraph::with_children(vec!["Intro before first section title".to_owned().into()])
+ .into(),
+ "The intro text should fit"
+ );
- let lvl1a = ssubel_to_section(&lvl0[1]).children();
- assert_eq!(lvl1a.len(), 2, "The 1st lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1a);
- //TODO: test title lvl1a[0]
- let lvl2 = ssubel_to_section(&lvl1a[1]).children();
- assert_eq!(lvl2.len(), 2, "The lvl2 section should have (a title and) a single lvl3 section as child: {:?}", lvl2);
- //TODO: test title lvl2[0]
- let lvl3a = ssubel_to_section(&lvl2[1]).children();
- assert_eq!(lvl3a.len(), 1, "The 1st lvl3 section should just a title: {:?}", lvl3a);
- //TODO: test title lvl3a[0]
+ let lvl1a = ssubel_to_section(&lvl0[1]).children();
+ assert_eq!(
+ lvl1a.len(),
+ 2,
+ "The 1st lvl1 section should have (a title and) a single lvl2 section as child: {:?}",
+ lvl1a
+ );
+ //TODO: test title lvl1a[0]
+ let lvl2 = ssubel_to_section(&lvl1a[1]).children();
+ assert_eq!(
+ lvl2.len(),
+ 2,
+ "The lvl2 section should have (a title and) a single lvl3 section as child: {:?}",
+ lvl2
+ );
+ //TODO: test title lvl2[0]
+ let lvl3a = ssubel_to_section(&lvl2[1]).children();
+ assert_eq!(
+ lvl3a.len(),
+ 1,
+ "The 1st lvl3 section should just a title: {:?}",
+ lvl3a
+ );
+ //TODO: test title lvl3a[0]
- let lvl1b = ssubel_to_section(&lvl0[2]).children();
- assert_eq!(lvl1b.len(), 2, "The 2nd lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1b);
- //TODO: test title lvl1b[0]
- let lvl3b = ssubel_to_section(&lvl1b[1]).children();
- assert_eq!(lvl3b.len(), 1, "The 2nd lvl3 section should have just a title: {:?}", lvl3b);
- //TODO: test title lvl3b[0]
+ let lvl1b = ssubel_to_section(&lvl0[2]).children();
+ assert_eq!(
+ lvl1b.len(),
+ 2,
+ "The 2nd lvl1 section should have (a title and) a single lvl2 section as child: {:?}",
+ lvl1b
+ );
+ //TODO: test title lvl1b[0]
+ let lvl3b = ssubel_to_section(&lvl1b[1]).children();
+ assert_eq!(
+ lvl3b.len(),
+ 1,
+ "The 2nd lvl3 section should have just a title: {:?}",
+ lvl3b
+ );
+ //TODO: test title lvl3b[0]
}