diff options
Diffstat (limited to 'parser/src/conversion')
| -rw-r--r-- | parser/src/conversion/block.rs | 415 | ||||
| -rw-r--r-- | parser/src/conversion/inline.rs | 278 | ||||
| -rw-r--r-- | parser/src/conversion/tests.rs | 95 |
3 files changed, 421 insertions, 367 deletions
diff --git a/parser/src/conversion/block.rs b/parser/src/conversion/block.rs index a68dd17..97f0e23 100644 --- a/parser/src/conversion/block.rs +++ b/parser/src/conversion/block.rs @@ -1,257 +1,286 @@ -use failure::{Error,bail}; +use failure::{bail, Error}; use pest::iterators::Pair; use document_tree::{ - Element,HasChildren,ExtraAttributes, - elements as e, - element_categories as c, - extra_attributes as a, - attribute_types as at + attribute_types as at, element_categories as c, elements as e, extra_attributes as a, Element, + ExtraAttributes, HasChildren, }; -use crate::{ - pest_rst::Rule, - pair_ext_parse::PairExt, -}; -use super::{whitespace_normalize_name, inline::convert_inlines}; - +use super::{inline::convert_inlines, whitespace_normalize_name}; +use crate::{pair_ext_parse::PairExt, pest_rst::Rule}; #[derive(PartialEq)] -pub(super) enum TitleKind { Double(char), Single(char) } +pub(super) enum TitleKind { + Double(char), + Single(char), +} pub(super) enum TitleOrSsubel { - Title(e::Title, TitleKind), - Ssubel(c::StructuralSubElement), + Title(e::Title, TitleKind), + Ssubel(c::StructuralSubElement), } - pub(super) fn convert_ssubel(pair: Pair<Rule>) -> Result<Option<TitleOrSsubel>, Error> { - use self::TitleOrSsubel::*; - Ok(Some(match pair.as_rule() { - Rule::title => { let (t, k) = convert_title(pair)?; Title(t, k) }, - //TODO: subtitle, decoration, docinfo - Rule::EOI => return Ok(None), - _ => Ssubel(convert_substructure(pair)?.into()), - })) + use self::TitleOrSsubel::*; + Ok(Some(match pair.as_rule() { + Rule::title => { + let (t, k) = convert_title(pair)?; + Title(t, k) + } + //TODO: subtitle, decoration, docinfo + Rule::EOI => return Ok(None), + _ => Ssubel(convert_substructure(pair)?.into()), + })) } - fn convert_substructure(pair: Pair<Rule>) -> Result<c::SubStructure, Error> { - #[allow(clippy::match_single_binding)] - Ok(match pair.as_rule() { - // TODO: Topic, Sidebar, Transition - // no section here, as it’s constructed from titles - _ => convert_body_elem(pair)?.into(), - }) + #[allow(clippy::match_single_binding)] + Ok(match pair.as_rule() { + // TODO: Topic, Sidebar, Transition + // no section here, as it’s constructed from titles + _ => convert_body_elem(pair)?.into(), + }) } - fn convert_body_elem(pair: Pair<Rule>) -> Result<c::BodyElement, Error> { - Ok(match pair.as_rule() { - Rule::paragraph => convert_paragraph(pair)?.into(), - Rule::target => convert_target(pair)?.into(), - Rule::substitution_def => convert_substitution_def(pair)?.into(), - Rule::admonition_gen => convert_admonition_gen(pair)?, - Rule::image => convert_image::<e::Image>(pair)?.into(), - Rule::bullet_list => convert_bullet_list(pair)?.into(), - Rule::literal_block => convert_literal_block(pair).into(), - Rule::code_directive => convert_code_directive(pair).into(), - Rule::raw_directive => convert_raw_directive(pair).into(), - Rule::block_comment => convert_comment(pair).into(), - rule => unimplemented!("unhandled rule {:?}", rule), - }) + Ok(match pair.as_rule() { + Rule::paragraph => convert_paragraph(pair)?.into(), + Rule::target => convert_target(pair)?.into(), + Rule::substitution_def => convert_substitution_def(pair)?.into(), + Rule::admonition_gen => convert_admonition_gen(pair)?, + Rule::image => convert_image::<e::Image>(pair)?.into(), + Rule::bullet_list => convert_bullet_list(pair)?.into(), + Rule::literal_block => convert_literal_block(pair).into(), + Rule::code_directive => convert_code_directive(pair).into(), + Rule::raw_directive => convert_raw_directive(pair).into(), + Rule::block_comment => convert_comment(pair).into(), + rule => unimplemented!("unhandled rule {:?}", rule), + }) } - fn convert_title(pair: Pair<Rule>) -> Result<(e::Title, TitleKind), Error> { - let mut title: Option<String> = None; - let mut title_inlines: Option<Vec<c::TextOrInlineElement>> = None; - let mut adornment_char: Option<char> = None; - // title_double or title_single. Extract kind before consuming - let inner_pair = pair.into_inner().next().unwrap(); - let kind = inner_pair.as_rule(); - for p in inner_pair.into_inner() { - match p.as_rule() { - Rule::line => { - title = Some(p.as_str().to_owned()); - title_inlines = Some(convert_inlines(p)?); - }, - Rule::adornments => adornment_char = Some(p.as_str().chars().next().expect("Empty adornment?")), - rule => unimplemented!("Unexpected rule in title: {:?}", rule), - }; - } - // now we encountered one line of text and one of adornments - // TODO: emit error if the adornment line is too short (has to match title length) - let mut elem = e::Title::with_children(title_inlines.expect("No text in title")); - if let Some(title) = title { - //TODO: slugify properly - let slug = title.to_lowercase().replace('\n', "").replace(' ', "-"); - elem.names_mut().push(at::NameToken(slug)); - } - let title_kind = match kind { - Rule::title_double => TitleKind::Double(adornment_char.unwrap()), - Rule::title_single => TitleKind::Single(adornment_char.unwrap()), - _ => unreachable!(), - }; - Ok((elem, title_kind)) + let mut title: Option<String> = None; + let mut title_inlines: Option<Vec<c::TextOrInlineElement>> = None; + let mut adornment_char: Option<char> = None; + // title_double or title_single. Extract kind before consuming + let inner_pair = pair.into_inner().next().unwrap(); + let kind = inner_pair.as_rule(); + for p in inner_pair.into_inner() { + match p.as_rule() { + Rule::line => { + title = Some(p.as_str().to_owned()); + title_inlines = Some(convert_inlines(p)?); + } + Rule::adornments => { + adornment_char = Some(p.as_str().chars().next().expect("Empty adornment?")) + } + rule => unimplemented!("Unexpected rule in title: {:?}", rule), + }; + } + // now we encountered one line of text and one of adornments + // TODO: emit error if the adornment line is too short (has to match title length) + let mut elem = e::Title::with_children(title_inlines.expect("No text in title")); + if let Some(title) = title { + //TODO: slugify properly + let slug = title.to_lowercase().replace('\n', "").replace(' ', "-"); + elem.names_mut().push(at::NameToken(slug)); + } + let title_kind = match kind { + Rule::title_double => TitleKind::Double(adornment_char.unwrap()), + Rule::title_single => TitleKind::Single(adornment_char.unwrap()), + _ => unreachable!(), + }; + Ok((elem, title_kind)) } - fn convert_paragraph(pair: Pair<Rule>) -> Result<e::Paragraph, Error> { - Ok(e::Paragraph::with_children(convert_inlines(pair)?)) + Ok(e::Paragraph::with_children(convert_inlines(pair)?)) } - fn convert_target(pair: Pair<Rule>) -> Result<e::Target, Error> { - let mut elem: e::Target = Default::default(); - elem.extra_mut().anonymous = false; - for p in pair.into_inner() { - match p.as_rule() { - Rule::target_name_uq | Rule::target_name_qu => { - elem.ids_mut().push(p.as_str().into()); - elem.names_mut().push(p.as_str().into()); - }, - // TODO: also handle non-urls - Rule::link_target => elem.extra_mut().refuri = Some(p.parse()?), - rule => panic!("Unexpected rule in target: {:?}", rule), - } - } - Ok(elem) + let mut elem: e::Target = Default::default(); + elem.extra_mut().anonymous = false; + for p in pair.into_inner() { + match p.as_rule() { + Rule::target_name_uq | Rule::target_name_qu => { + elem.ids_mut().push(p.as_str().into()); + elem.names_mut().push(p.as_str().into()); + } + // TODO: also handle non-urls + Rule::link_target => elem.extra_mut().refuri = Some(p.parse()?), + rule => panic!("Unexpected rule in target: {:?}", rule), + } + } + Ok(elem) } fn convert_substitution_def(pair: Pair<Rule>) -> Result<e::SubstitutionDefinition, Error> { - let mut pairs = pair.into_inner(); - let name = whitespace_normalize_name(pairs.next().unwrap().as_str()); // Rule::substitution_name - let inner_pair = pairs.next().unwrap(); - let inner: Vec<c::TextOrInlineElement> = match inner_pair.as_rule() { - Rule::replace => convert_replace(inner_pair)?, - Rule::image => vec![convert_image::<e::ImageInline>(inner_pair)?.into()], - rule => panic!("Unknown substitution rule {:?}", rule), - }; - let mut subst_def = e::SubstitutionDefinition::with_children(inner); - subst_def.names_mut().push(at::NameToken(name)); - Ok(subst_def) + let mut pairs = pair.into_inner(); + let name = whitespace_normalize_name(pairs.next().unwrap().as_str()); // Rule::substitution_name + let inner_pair = pairs.next().unwrap(); + let inner: Vec<c::TextOrInlineElement> = match inner_pair.as_rule() { + Rule::replace => convert_replace(inner_pair)?, + Rule::image => vec![convert_image::<e::ImageInline>(inner_pair)?.into()], + rule => panic!("Unknown substitution rule {:?}", rule), + }; + let mut subst_def = e::SubstitutionDefinition::with_children(inner); + subst_def.names_mut().push(at::NameToken(name)); + Ok(subst_def) } fn convert_replace(pair: Pair<Rule>) -> Result<Vec<c::TextOrInlineElement>, Error> { - let mut pairs = pair.into_inner(); - let paragraph = pairs.next().unwrap(); - convert_inlines(paragraph) + let mut pairs = pair.into_inner(); + let paragraph = pairs.next().unwrap(); + convert_inlines(paragraph) } -fn convert_image<I>(pair: Pair<Rule>) -> Result<I, Error> where I: Element + ExtraAttributes<a::Image> { - let mut pairs = pair.into_inner(); - let mut image = I::with_extra(a::Image::new( - pairs.next().unwrap().as_str().trim().parse()?, // line - )); - for opt in pairs { - let mut opt_iter = opt.into_inner(); - let opt_name = opt_iter.next().unwrap(); - let opt_val = opt_iter.next().unwrap(); - match opt_name.as_str() { - "class" => image.classes_mut().push(opt_val.as_str().to_owned()), - "name" => image.names_mut().push(opt_val.as_str().into()), - "alt" => image.extra_mut().alt = Some(opt_val.as_str().to_owned()), - "height" => image.extra_mut().height = Some(opt_val.parse()?), - "width" => image.extra_mut().width = Some(opt_val.parse()?), - "scale" => image.extra_mut().scale = Some(parse_scale(&opt_val)?), - "align" => image.extra_mut().align = Some(opt_val.parse()?), - "target" => image.extra_mut().target = Some(opt_val.parse()?), - name => bail!("Unknown Image option {}", name), - } - } - Ok(image) +fn convert_image<I>(pair: Pair<Rule>) -> Result<I, Error> +where + I: Element + ExtraAttributes<a::Image>, +{ + let mut pairs = pair.into_inner(); + let mut image = I::with_extra(a::Image::new( + pairs.next().unwrap().as_str().trim().parse()?, // line + )); + for opt in pairs { + let mut opt_iter = opt.into_inner(); + let opt_name = opt_iter.next().unwrap(); + let opt_val = opt_iter.next().unwrap(); + match opt_name.as_str() { + "class" => image.classes_mut().push(opt_val.as_str().to_owned()), + "name" => image.names_mut().push(opt_val.as_str().into()), + "alt" => image.extra_mut().alt = Some(opt_val.as_str().to_owned()), + "height" => image.extra_mut().height = Some(opt_val.parse()?), + "width" => image.extra_mut().width = Some(opt_val.parse()?), + "scale" => image.extra_mut().scale = Some(parse_scale(&opt_val)?), + "align" => image.extra_mut().align = Some(opt_val.parse()?), + "target" => image.extra_mut().target = Some(opt_val.parse()?), + name => bail!("Unknown Image option {}", name), + } + } + Ok(image) } fn parse_scale(pair: &Pair<Rule>) -> Result<u8, Error> { - let input = if pair.as_str().ends_with('%') { &pair.as_str()[..pair.as_str().len()-1] } else { pair.as_str() }; - use pest::error::{Error,ErrorVariant}; - Ok(input.parse().map_err(|e: std::num::ParseIntError| { - let var: ErrorVariant<Rule> = ErrorVariant::CustomError { message: e.to_string() }; - Error::new_from_span(var, pair.as_span()) - })?) + let input = if pair.as_str().ends_with('%') { + &pair.as_str()[..pair.as_str().len() - 1] + } else { + pair.as_str() + }; + use pest::error::{Error, ErrorVariant}; + Ok(input.parse().map_err(|e: std::num::ParseIntError| { + let var: ErrorVariant<Rule> = ErrorVariant::CustomError { + message: e.to_string(), + }; + Error::new_from_span(var, pair.as_span()) + })?) } fn convert_admonition_gen(pair: Pair<Rule>) -> Result<c::BodyElement, Error> { - let mut iter = pair.into_inner(); - let typ = iter.next().unwrap().as_str(); - // TODO: in reality it contains body elements. - let children: Vec<c::BodyElement> = iter.map(|p| e::Paragraph::with_children(vec![p.as_str().into()]).into()).collect(); - Ok(match typ { - "attention" => e::Attention::with_children(children).into(), - "hint" => e::Hint::with_children(children).into(), - "note" => e::Note::with_children(children).into(), - "caution" => e::Caution::with_children(children).into(), - "danger" => e::Danger::with_children(children).into(), - "error" => e::Error::with_children(children).into(), - "important" => e::Important::with_children(children).into(), - "tip" => e::Tip::with_children(children).into(), - "warning" => e::Warning::with_children(children).into(), - typ => panic!("Unknown admontion type {}!", typ), - }) + let mut iter = pair.into_inner(); + let typ = iter.next().unwrap().as_str(); + // TODO: in reality it contains body elements. + let children: Vec<c::BodyElement> = iter + .map(|p| e::Paragraph::with_children(vec![p.as_str().into()]).into()) + .collect(); + Ok(match typ { + "attention" => e::Attention::with_children(children).into(), + "hint" => e::Hint::with_children(children).into(), + "note" => e::Note::with_children(children).into(), + "caution" => e::Caution::with_children(children).into(), + "danger" => e::Danger::with_children(children).into(), + "error" => e::Error::with_children(children).into(), + "important" => e::Important::with_children(children).into(), + "tip" => e::Tip::with_children(children).into(), + "warning" => e::Warning::with_children(children).into(), + typ => panic!("Unknown admontion type {}!", typ), + }) } fn convert_bullet_list(pair: Pair<Rule>) -> Result<e::BulletList, Error> { - Ok(e::BulletList::with_children(pair.into_inner().map(convert_bullet_item).collect::<Result<_, _>>()?)) + Ok(e::BulletList::with_children( + pair.into_inner() + .map(convert_bullet_item) + .collect::<Result<_, _>>()?, + )) } fn convert_bullet_item(pair: Pair<Rule>) -> Result<e::ListItem, Error> { - let mut iter = pair.into_inner(); - let mut children: Vec<c::BodyElement> = vec![ - convert_paragraph(iter.next().unwrap())?.into() - ]; - for p in iter { - children.push(convert_body_elem(p)?); - } - Ok(e::ListItem::with_children(children)) + let mut iter = pair.into_inner(); + let mut children: Vec<c::BodyElement> = vec![convert_paragraph(iter.next().unwrap())?.into()]; + for p in iter { + children.push(convert_body_elem(p)?); + } + Ok(e::ListItem::with_children(children)) } fn convert_literal_block(pair: Pair<Rule>) -> e::LiteralBlock { - convert_literal_lines(pair.into_inner().next().unwrap()) + convert_literal_lines(pair.into_inner().next().unwrap()) } fn convert_literal_lines(pair: Pair<Rule>) -> e::LiteralBlock { - let children = pair.into_inner().map(|l| match l.as_rule() { - Rule::literal_line => l.as_str(), - Rule::literal_line_blank => "\n", - _ => unreachable!(), - }.into()).collect(); - e::LiteralBlock::with_children(children) + let children = pair + .into_inner() + .map(|l| { + match l.as_rule() { + Rule::literal_line => l.as_str(), + Rule::literal_line_blank => "\n", + _ => unreachable!(), + } + .into() + }) + .collect(); + e::LiteralBlock::with_children(children) } fn convert_code_directive(pair: Pair<Rule>) -> e::LiteralBlock { - let mut iter = pair.into_inner(); - let (lang, code) = match (iter.next().unwrap(), iter.next()) { - (lang, Some(code)) => (Some(lang), code), - (code, None) => (None, code), - }; - let mut code_block = convert_literal_lines(code); - code_block.classes_mut().push("code".to_owned()); - if let Some(lang) = lang { - code_block.classes_mut().push(lang.as_str().to_owned()); - }; - code_block + let mut iter = pair.into_inner(); + let (lang, code) = match (iter.next().unwrap(), iter.next()) { + (lang, Some(code)) => (Some(lang), code), + (code, None) => (None, code), + }; + let mut code_block = convert_literal_lines(code); + code_block.classes_mut().push("code".to_owned()); + if let Some(lang) = lang { + code_block.classes_mut().push(lang.as_str().to_owned()); + }; + code_block } fn convert_raw_directive(pair: Pair<Rule>) -> e::Raw { - let mut iter = pair.into_inner(); - let format = iter.next().unwrap(); + let mut iter = pair.into_inner(); + let format = iter.next().unwrap(); let block = iter.next().unwrap(); - let children = block.into_inner().map(|l| match l.as_rule() { - Rule::raw_line => l.as_str(), - Rule::raw_line_blank => "\n", - _ => unreachable!(), - }.into()).collect(); - let mut raw_block = e::Raw::with_children(children); - raw_block.extra_mut().format.push(at::NameToken(format.as_str().to_owned())); - raw_block + let children = block + .into_inner() + .map(|l| { + match l.as_rule() { + Rule::raw_line => l.as_str(), + Rule::raw_line_blank => "\n", + _ => unreachable!(), + } + .into() + }) + .collect(); + let mut raw_block = e::Raw::with_children(children); + raw_block + .extra_mut() + .format + .push(at::NameToken(format.as_str().to_owned())); + raw_block } fn convert_comment(pair: Pair<Rule>) -> e::Comment { - let lines = pair.into_inner().map(|l| match l.as_rule() { - Rule::comment_line_blank => "\n", - Rule::comment_line => l.as_str(), - _ => unreachable!(), - }.into()).collect(); - e::Comment::with_children(lines) + let lines = pair + .into_inner() + .map(|l| { + match l.as_rule() { + Rule::comment_line_blank => "\n", + Rule::comment_line => l.as_str(), + _ => unreachable!(), + } + .into() + }) + .collect(); + e::Comment::with_children(lines) } diff --git a/parser/src/conversion/inline.rs b/parser/src/conversion/inline.rs index 82a74e7..a0dcb88 100644 --- a/parser/src/conversion/inline.rs +++ b/parser/src/conversion/inline.rs @@ -2,159 +2,155 @@ use failure::Error; use pest::iterators::Pair; use document_tree::{ - HasChildren, - elements as e, - url::Url, - element_categories as c, - extra_attributes as a, - attribute_types as at, + attribute_types as at, element_categories as c, elements as e, extra_attributes as a, url::Url, + HasChildren, }; -use crate::{ - pest_rst::Rule, -// pair_ext_parse::PairExt, -}; use super::whitespace_normalize_name; - +use crate::pest_rst::Rule; pub fn convert_inline(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> { - Ok(match pair.as_rule() { - Rule::str | Rule::str_nested => pair.as_str().into(), - Rule::ws_newline => " ".to_owned().into(), - Rule::reference => convert_reference(pair)?, - Rule::substitution_name => convert_substitution_ref(pair)?.into(), - Rule::emph => e::Emphasis::with_children(convert_inlines(pair)?).into(), - Rule::strong => e::Strong::with_children(convert_inlines(pair)?).into(), - Rule::literal => e::Literal::with_children(vec![pair.as_str().to_owned()]).into(), - rule => unimplemented!("unknown rule {:?}", rule), - }) + Ok(match pair.as_rule() { + Rule::str | Rule::str_nested => pair.as_str().into(), + Rule::ws_newline => " ".to_owned().into(), + Rule::reference => convert_reference(pair)?, + Rule::substitution_name => convert_substitution_ref(pair)?.into(), + Rule::emph => e::Emphasis::with_children(convert_inlines(pair)?).into(), + Rule::strong => e::Strong::with_children(convert_inlines(pair)?).into(), + Rule::literal => e::Literal::with_children(vec![pair.as_str().to_owned()]).into(), + rule => unimplemented!("unknown rule {:?}", rule), + }) } pub fn convert_inlines(pair: Pair<Rule>) -> Result<Vec<c::TextOrInlineElement>, Error> { - pair.into_inner().map(convert_inline).collect() + pair.into_inner().map(convert_inline).collect() } fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> { - let name; - let refuri; - let refid; - let mut refname = vec![]; - let mut children: Vec<c::TextOrInlineElement> = vec![]; - let concrete = pair.into_inner().next().unwrap(); - match concrete.as_rule() { - Rule::reference_target => { - let rt_inner = concrete.into_inner().next().unwrap(); // reference_target_uq or target_name_qu - match rt_inner.as_rule() { - Rule::reference_target_uq => { - refid = None; - name = Some(rt_inner.as_str().into()); - refuri = None; - refname.push(rt_inner.as_str().into()); - children.push(rt_inner.as_str().into()); - }, - Rule::reference_target_qu => { - let (text, reference) = { - let mut text = None; - let mut reference = None; - for inner in rt_inner.clone().into_inner() { - match inner.as_rule() { - Rule::reference_text => text = Some(inner), - Rule::reference_bracketed => reference = Some(inner), - _ => unreachable!() - } - } - (text, reference) - }; - let trimmed_text = match (&text, &reference) { - (Some(text), None) => text.as_str(), - (_, Some(reference)) => { - text - .map(|text| text.as_str().trim_end_matches(|ch| " \n\r".contains(ch))) - .filter(|text| !text.is_empty()) - .unwrap_or_else(|| reference.clone().into_inner().next().unwrap().as_str()) - } - (None, None) => unreachable!() - }; - refid = None; - name = Some(trimmed_text.into()); - refuri = if let Some(reference) = reference { - let inner = reference.into_inner().next().unwrap(); - match inner.as_rule() { - // The URL rules in our parser accept a narrow superset of - // valid URLs, so we need to handle false positives. - Rule::url => if let Ok(target) = Url::parse_absolute(inner.as_str()) { - Some(target) - } else if inner.as_str().ends_with('_') { - // like target_name_qu (minus the final underscore) - let full_str = inner.as_str(); - refname.push(full_str[0..full_str.len() - 1].into()); - None - } else { - // like relative_reference - Some(Url::parse_relative(inner.as_str())?) - }, - Rule::target_name_qu => { - refname.push(inner.as_str().into()); - None - }, - Rule::relative_reference => { - Some(Url::parse_relative(inner.as_str())?) - }, - _ => unreachable!() - } - } else { - refname.push(trimmed_text.into()); - None - }; - children.push(trimmed_text.into()); - }, - _ => unreachable!() - } - }, - Rule::reference_explicit => unimplemented!("explicit reference"), - Rule::reference_auto => { - let rt_inner = concrete.into_inner().next().unwrap(); - match rt_inner.as_rule() { - Rule::url_auto => match Url::parse_absolute(rt_inner.as_str()) { - Ok(target) => { - refuri = Some(target); - name = None; - refid = None; - children.push(rt_inner.as_str().into()); - }, - // if our parser got a URL wrong, return it as a string - Err(_) => return Ok(rt_inner.as_str().into()) - }, - Rule::email => { - let mailto_url = String::from("mailto:") + rt_inner.as_str(); - match Url::parse_absolute(&mailto_url) { - Ok(target) => { - refuri = Some(target); - name = None; - refid = None; - children.push(rt_inner.as_str().into()); - }, - // if our parser got a URL wrong, return it as a string - Err(_) => return Ok(rt_inner.as_str().into()) - } - }, - _ => unreachable!() - } - }, - _ => unreachable!(), - }; - Ok(e::Reference::new( - Default::default(), - a::Reference { name, refuri, refid, refname }, - children - ).into()) + let name; + let refuri; + let refid; + let mut refname = vec![]; + let mut children: Vec<c::TextOrInlineElement> = vec![]; + let concrete = pair.into_inner().next().unwrap(); + match concrete.as_rule() { + Rule::reference_target => { + let rt_inner = concrete.into_inner().next().unwrap(); // reference_target_uq or target_name_qu + match rt_inner.as_rule() { + Rule::reference_target_uq => { + refid = None; + name = Some(rt_inner.as_str().into()); + refuri = None; + refname.push(rt_inner.as_str().into()); + children.push(rt_inner.as_str().into()); + } + Rule::reference_target_qu => { + let (text, reference) = { + let mut text = None; + let mut reference = None; + for inner in rt_inner.clone().into_inner() { + match inner.as_rule() { + Rule::reference_text => text = Some(inner), + Rule::reference_bracketed => reference = Some(inner), + _ => unreachable!(), + } + } + (text, reference) + }; + let trimmed_text = match (&text, &reference) { + (Some(text), None) => text.as_str(), + (_, Some(reference)) => text + .map(|text| text.as_str().trim_end_matches(|ch| " \n\r".contains(ch))) + .filter(|text| !text.is_empty()) + .unwrap_or_else(|| { + reference.clone().into_inner().next().unwrap().as_str() + }), + (None, None) => unreachable!(), + }; + refid = None; + name = Some(trimmed_text.into()); + refuri = if let Some(reference) = reference { + let inner = reference.into_inner().next().unwrap(); + match inner.as_rule() { + // The URL rules in our parser accept a narrow superset of + // valid URLs, so we need to handle false positives. + Rule::url => { + if let Ok(target) = Url::parse_absolute(inner.as_str()) { + Some(target) + } else if inner.as_str().ends_with('_') { + // like target_name_qu (minus the final underscore) + let full_str = inner.as_str(); + refname.push(full_str[0..full_str.len() - 1].into()); + None + } else { + // like relative_reference + Some(Url::parse_relative(inner.as_str())?) + } + } + Rule::target_name_qu => { + refname.push(inner.as_str().into()); + None + } + Rule::relative_reference => Some(Url::parse_relative(inner.as_str())?), + _ => unreachable!(), + } + } else { + refname.push(trimmed_text.into()); + None + }; + children.push(trimmed_text.into()); + } + _ => unreachable!(), + } + } + Rule::reference_explicit => unimplemented!("explicit reference"), + Rule::reference_auto => { + let rt_inner = concrete.into_inner().next().unwrap(); + match rt_inner.as_rule() { + Rule::url_auto => match Url::parse_absolute(rt_inner.as_str()) { + Ok(target) => { + refuri = Some(target); + name = None; + refid = None; + children.push(rt_inner.as_str().into()); + } + // if our parser got a URL wrong, return it as a string + Err(_) => return Ok(rt_inner.as_str().into()), + }, + Rule::email => { + let mailto_url = String::from("mailto:") + rt_inner.as_str(); + match Url::parse_absolute(&mailto_url) { + Ok(target) => { + refuri = Some(target); + name = None; + refid = None; + children.push(rt_inner.as_str().into()); + } + // if our parser got a URL wrong, return it as a string + Err(_) => return Ok(rt_inner.as_str().into()), + } + } + _ => unreachable!(), + } + } + _ => unreachable!(), + }; + Ok(e::Reference::new( + Default::default(), + a::Reference { + name, + refuri, + refid, + refname, + }, + children, + ) + .into()) } fn convert_substitution_ref(pair: Pair<Rule>) -> Result<e::SubstitutionReference, Error> { - let name = whitespace_normalize_name(pair.as_str()); - Ok(a::ExtraAttributes::with_extra( - a::SubstitutionReference { - refname: vec![at::NameToken(name)] - } - )) + let name = whitespace_normalize_name(pair.as_str()); + Ok(a::ExtraAttributes::with_extra(a::SubstitutionReference { + refname: vec![at::NameToken(name)], + })) } diff --git a/parser/src/conversion/tests.rs b/parser/src/conversion/tests.rs index e042d01..8fcb408 100644 --- a/parser/src/conversion/tests.rs +++ b/parser/src/conversion/tests.rs @@ -1,19 +1,15 @@ -use document_tree::{ - elements as e, - element_categories as c, - HasChildren, -}; +use document_tree::{element_categories as c, elements as e, HasChildren}; use crate::parse; fn ssubel_to_section(ssubel: &c::StructuralSubElement) -> &e::Section { - match ssubel { - c::StructuralSubElement::SubStructure(ref b) => match **b { - c::SubStructure::Section(ref s) => s, - ref c => panic!("Expected section, not {:?}", c), - }, - ref c => panic!("Expected SubStructure, not {:?}", c), - } + match ssubel { + c::StructuralSubElement::SubStructure(ref b) => match **b { + c::SubStructure::Section(ref s) => s, + ref c => panic!("Expected section, not {:?}", c), + }, + ref c => panic!("Expected SubStructure, not {:?}", c), + } } const SECTIONS: &str = "\ @@ -38,28 +34,61 @@ L3 again, skipping L2 #[test] fn convert_skipped_section() { - let doctree = parse(SECTIONS).unwrap(); - let lvl0 = doctree.children(); - assert_eq!(lvl0.len(), 3, "Should be a paragraph and 2 sections: {:?}", lvl0); + let doctree = parse(SECTIONS).unwrap(); + let lvl0 = doctree.children(); + assert_eq!( + lvl0.len(), + 3, + "Should be a paragraph and 2 sections: {:?}", + lvl0 + ); - assert_eq!(lvl0[0], e::Paragraph::with_children(vec![ - "Intro before first section title".to_owned().into() - ]).into(), "The intro text should fit"); + assert_eq!( + lvl0[0], + e::Paragraph::with_children(vec!["Intro before first section title".to_owned().into()]) + .into(), + "The intro text should fit" + ); - let lvl1a = ssubel_to_section(&lvl0[1]).children(); - assert_eq!(lvl1a.len(), 2, "The 1st lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1a); - //TODO: test title lvl1a[0] - let lvl2 = ssubel_to_section(&lvl1a[1]).children(); - assert_eq!(lvl2.len(), 2, "The lvl2 section should have (a title and) a single lvl3 section as child: {:?}", lvl2); - //TODO: test title lvl2[0] - let lvl3a = ssubel_to_section(&lvl2[1]).children(); - assert_eq!(lvl3a.len(), 1, "The 1st lvl3 section should just a title: {:?}", lvl3a); - //TODO: test title lvl3a[0] + let lvl1a = ssubel_to_section(&lvl0[1]).children(); + assert_eq!( + lvl1a.len(), + 2, + "The 1st lvl1 section should have (a title and) a single lvl2 section as child: {:?}", + lvl1a + ); + //TODO: test title lvl1a[0] + let lvl2 = ssubel_to_section(&lvl1a[1]).children(); + assert_eq!( + lvl2.len(), + 2, + "The lvl2 section should have (a title and) a single lvl3 section as child: {:?}", + lvl2 + ); + //TODO: test title lvl2[0] + let lvl3a = ssubel_to_section(&lvl2[1]).children(); + assert_eq!( + lvl3a.len(), + 1, + "The 1st lvl3 section should just a title: {:?}", + lvl3a + ); + //TODO: test title lvl3a[0] - let lvl1b = ssubel_to_section(&lvl0[2]).children(); - assert_eq!(lvl1b.len(), 2, "The 2nd lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1b); - //TODO: test title lvl1b[0] - let lvl3b = ssubel_to_section(&lvl1b[1]).children(); - assert_eq!(lvl3b.len(), 1, "The 2nd lvl3 section should have just a title: {:?}", lvl3b); - //TODO: test title lvl3b[0] + let lvl1b = ssubel_to_section(&lvl0[2]).children(); + assert_eq!( + lvl1b.len(), + 2, + "The 2nd lvl1 section should have (a title and) a single lvl2 section as child: {:?}", + lvl1b + ); + //TODO: test title lvl1b[0] + let lvl3b = ssubel_to_section(&lvl1b[1]).children(); + assert_eq!( + lvl3b.len(), + 1, + "The 2nd lvl3 section should have just a title: {:?}", + lvl3b + ); + //TODO: test title lvl3b[0] } |
