diff options
Diffstat (limited to 'parser/src/conversion')
| -rw-r--r-- | parser/src/conversion/block.rs | 202 | ||||
| -rw-r--r-- | parser/src/conversion/inline.rs | 160 | ||||
| -rw-r--r-- | parser/src/conversion/tests.rs | 65 | 
3 files changed, 427 insertions, 0 deletions
| diff --git a/parser/src/conversion/block.rs b/parser/src/conversion/block.rs new file mode 100644 index 0000000..ab18c48 --- /dev/null +++ b/parser/src/conversion/block.rs @@ -0,0 +1,202 @@ +use failure::{Error,bail}; +use pest::iterators::Pair; + +use document_tree::{ +	Element,HasChildren,ExtraAttributes, +	elements as e, +	element_categories as c, +	extra_attributes as a, +	attribute_types as at +}; + +use crate::{ +	pest_rst::Rule, +	pair_ext_parse::PairExt, +}; +use super::{whitespace_normalize_name, inline::convert_inlines}; + + +#[derive(PartialEq)] +pub(super) enum TitleKind { Double(char), Single(char) } + +pub(super) enum TitleOrSsubel { +	Title(e::Title, TitleKind), +	Ssubel(c::StructuralSubElement), +} + + +pub(super) fn convert_ssubel(pair: Pair<Rule>) -> Result<Option<TitleOrSsubel>, Error> { +	use self::TitleOrSsubel::*; +	Ok(Some(match pair.as_rule() { +		Rule::title => { let (t, k) = convert_title(pair)?; Title(t, k) }, +		//TODO: subtitle, decoration, docinfo +		Rule::EOI   => return Ok(None), +		_           => Ssubel(convert_substructure(pair)?.into()), +	})) +} + + +fn convert_substructure(pair: Pair<Rule>) -> Result<c::SubStructure, Error> { +	Ok(match pair.as_rule() { +		// todo: Topic, Sidebar, Transition +		// no section here, as it’s constructed from titles +		_ => convert_body_elem(pair)?.into(), +	}) +} + + +fn convert_body_elem(pair: Pair<Rule>) -> Result<c::BodyElement, Error> { +	Ok(match pair.as_rule() { +		Rule::paragraph        => convert_paragraph(pair)?.into(), +		Rule::target           => convert_target(pair)?.into(), +		Rule::substitution_def => convert_substitution_def(pair)?.into(), +		Rule::admonition_gen   => convert_admonition_gen(pair)?.into(), +		Rule::image            => convert_image::<e::Image>(pair)?.into(), +		Rule::bullet_list      => convert_bullet_list(pair)?.into(), +		rule => unimplemented!("unhandled rule {:?}", rule), +	}) +} + + +fn convert_title(pair: Pair<Rule>) -> Result<(e::Title, TitleKind), Error> { +	let mut title: Option<String> = None; +	let mut title_inlines: Option<Vec<c::TextOrInlineElement>> = None; +	let mut adornment_char: Option<char> = None; +	// title_double or title_single. Extract kind before consuming +	let inner_pair = pair.into_inner().next().unwrap(); +	let kind = inner_pair.as_rule(); +	for p in inner_pair.into_inner() { +		match p.as_rule() { +			Rule::line => { +				title = Some(p.as_str().to_owned()); +				title_inlines = Some(convert_inlines(p)?); +			}, +			Rule::adornments => adornment_char = Some(p.as_str().chars().next().expect("Empty adornment?")), +			rule => unimplemented!("Unexpected rule in title: {:?}", rule), +		}; +	} +	// now we encountered one line of text and one of adornments +	// TODO: emit error if the adornment line is too short (has to match title length) +	let mut elem = e::Title::with_children(title_inlines.expect("No text in title")); +	if let Some(title) = title { +		//TODO: slugify properly +		let slug =  title.to_lowercase().replace("\n", "").replace(" ", "-"); +		elem.names_mut().push(at::NameToken(slug)); +	} +	let title_kind = match kind { +		Rule::title_double => TitleKind::Double(adornment_char.unwrap()), +		Rule::title_single => TitleKind::Single(adornment_char.unwrap()), +		_ => unreachable!(), +	}; +	Ok((elem, title_kind)) +} + + +fn convert_paragraph(pair: Pair<Rule>) -> Result<e::Paragraph, Error> { +	Ok(e::Paragraph::with_children(convert_inlines(pair)?)) +} + + +fn convert_target(pair: Pair<Rule>) -> Result<e::Target, Error> { +	let mut elem: e::Target = Default::default(); +	elem.extra_mut().anonymous = false; +	for p in pair.into_inner() { +		match p.as_rule() { +			Rule::target_name_uq | Rule::target_name_qu => { +				elem.ids_mut().push(p.as_str().into()); +				elem.names_mut().push(p.as_str().into()); +			}, +			// TODO: also handle non-urls +			Rule::link_target => elem.extra_mut().refuri = Some(p.parse()?), +			rule => panic!("Unexpected rule in target: {:?}", rule), +		} +	} +	Ok(elem) +} + +fn convert_substitution_def(pair: Pair<Rule>) -> Result<e::SubstitutionDefinition, Error> { +	let mut pairs = pair.into_inner(); +	let name = whitespace_normalize_name(pairs.next().unwrap().as_str());  // Rule::substitution_name +	let inner_pair = pairs.next().unwrap(); +	let inner: Vec<c::TextOrInlineElement> = match inner_pair.as_rule() { +		Rule::replace => convert_replace(inner_pair)?, +		Rule::image   => vec![convert_image::<e::ImageInline>(inner_pair)?.into()], +		rule => panic!("Unknown substitution rule {:?}", rule), +	}; +	let mut subst_def = e::SubstitutionDefinition::with_children(inner); +	subst_def.names_mut().push(at::NameToken(name)); +	Ok(subst_def) +} + +fn convert_replace(pair: Pair<Rule>) -> Result<Vec<c::TextOrInlineElement>, Error> { +	let mut pairs = pair.into_inner(); +	let paragraph = pairs.next().unwrap(); +	convert_inlines(paragraph) +}  + +fn convert_image<I>(pair: Pair<Rule>) -> Result<I, Error> where I: Element + ExtraAttributes<a::Image> { +	let mut pairs = pair.into_inner(); +	let mut image = I::with_extra(a::Image::new( +		pairs.next().unwrap().as_str().trim().parse()?,  // line +	)); +	for opt in pairs { +		let mut opt_iter = opt.into_inner(); +		let opt_name = opt_iter.next().unwrap(); +		let opt_val = opt_iter.next().unwrap(); +		match opt_name.as_str() { +			"class"  => image.classes_mut().push(opt_val.as_str().to_owned()), +			"name"   => image.names_mut().push(opt_val.as_str().into()), +			"alt"    => image.extra_mut().alt    = Some(opt_val.as_str().to_owned()), +			"height" => image.extra_mut().height = Some(opt_val.parse()?), +			"width"  => image.extra_mut().width  = Some(opt_val.parse()?), +			"scale"  => image.extra_mut().scale  = Some(parse_scale(&opt_val)?), +			"align"  => image.extra_mut().align  = Some(opt_val.parse()?), +			"target" => image.extra_mut().target = Some(opt_val.parse()?), +			name => bail!("Unknown Image option {}", name), +		} +	} +	Ok(image) +} + +fn parse_scale(pair: &Pair<Rule>) -> Result<u8, Error> { +	let input = if pair.as_str().chars().rev().next() == Some('%') { &pair.as_str()[..pair.as_str().len()-1] } else { pair.as_str() }; +	use pest::error::{Error,ErrorVariant}; +	Ok(input.parse().map_err(|e: std::num::ParseIntError| { +		let var: ErrorVariant<Rule> = ErrorVariant::CustomError { message: e.to_string() }; +		Error::new_from_span(var, pair.as_span()) +	})?) +} + +fn convert_admonition_gen(pair: Pair<Rule>) -> Result<c::BodyElement, Error> { +	let mut iter = pair.into_inner(); +	let typ = iter.next().unwrap().as_str(); +	// TODO: in reality it contains body elements. +	let children: Vec<c::BodyElement> = iter.map(|p| e::Paragraph::with_children(vec![p.as_str().into()]).into()).collect(); +	Ok(match typ { +		"attention" => e::Attention::with_children(children).into(), +		"hint"      =>      e::Hint::with_children(children).into(), +		"note"      =>      e::Note::with_children(children).into(), +		"caution"   =>   e::Caution::with_children(children).into(), +		"danger"    =>    e::Danger::with_children(children).into(), +		"error"     =>     e::Error::with_children(children).into(), +		"important" => e::Important::with_children(children).into(), +		"tip"       =>       e::Tip::with_children(children).into(), +		"warning"   =>   e::Warning::with_children(children).into(), +		typ         => panic!("Unknown admontion type {}!", typ), +	}) +} + +fn convert_bullet_list(pair: Pair<Rule>) -> Result<e::BulletList, Error> { +	Ok(e::BulletList::with_children(pair.into_inner().map(convert_bullet_item).collect::<Result<_, _>>()?)) +} + +fn convert_bullet_item(pair: Pair<Rule>) -> Result<e::ListItem, Error> { +	let mut iter = pair.into_inner(); +	let mut children: Vec<c::BodyElement> = vec![ +		convert_paragraph(iter.next().unwrap())?.into() +	]; +	for p in iter { +		children.push(convert_body_elem(p)?); +	} +	Ok(e::ListItem::with_children(children)) +} diff --git a/parser/src/conversion/inline.rs b/parser/src/conversion/inline.rs new file mode 100644 index 0000000..6094714 --- /dev/null +++ b/parser/src/conversion/inline.rs @@ -0,0 +1,160 @@ +use failure::Error; +use pest::iterators::Pair; + +use document_tree::{ +	HasChildren, +	elements as e, +	url::Url, +	element_categories as c, +	extra_attributes as a, +	attribute_types as at, +}; + +use crate::{ +	pest_rst::Rule, +//    pair_ext_parse::PairExt, +}; +use super::whitespace_normalize_name; + + +pub fn convert_inline(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> { +	Ok(match pair.as_rule() { +		Rule::str | Rule::str_nested => pair.as_str().into(), +		Rule::ws_newline        => " ".to_owned().into(), +		Rule::reference         => convert_reference(pair)?, +		Rule::substitution_name => convert_substitution_ref(pair)?.into(), +		Rule::emph              => e::Emphasis::with_children(convert_inlines(pair)?).into(), +		Rule::strong            => e::Strong::with_children(convert_inlines(pair)?).into(), +		Rule::literal           => e::Literal::with_children(convert_inlines(pair)?).into(), +		rule => unimplemented!("unknown rule {:?}", rule), +	}) +} + +pub fn convert_inlines(pair: Pair<Rule>) -> Result<Vec<c::TextOrInlineElement>, Error> { +	pair.into_inner().map(convert_inline).collect() +} + +fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> { +	let name; +	let refuri; +	let refid; +	let mut refname = vec![]; +	let mut children: Vec<c::TextOrInlineElement> = vec![]; +	let concrete = pair.into_inner().next().unwrap(); +	match concrete.as_rule() { +		Rule::reference_target => { +			let rt_inner = concrete.into_inner().next().unwrap(); // reference_target_uq or target_name_qu +			match rt_inner.as_rule() { +				Rule::reference_target_uq => { +					refid  = None; +					name   = Some(rt_inner.as_str().into()); +					refuri = None; +					refname.push(rt_inner.as_str().into()); +					children.push(rt_inner.as_str().into()); +				}, +				Rule::reference_target_qu => { +					let (text, reference) = { +						let mut text = None; +						let mut reference = None; +						for inner in rt_inner.clone().into_inner() { +							match inner.as_rule() { +								Rule::reference_text => text = Some(inner), +								Rule::reference_bracketed => reference = Some(inner), +								_ => unreachable!() +							} +						} +						(text, reference) +					}; +					let trimmed_text = match (&text, &reference) { +						(Some(text), None) => text.as_str(), +						(_, Some(reference)) => { +							text +								.map(|text| text.as_str().trim_end_matches(|ch| " \n\r".contains(ch))) +								.filter(|text| !text.is_empty()) +								.unwrap_or_else(|| reference.clone().into_inner().next().unwrap().as_str()) +						} +						(None, None) => unreachable!() +					}; +					refid = None; +					name = Some(trimmed_text.into()); +					refuri = if let Some(reference) = reference { +						let inner = reference.into_inner().next().unwrap(); +						match inner.as_rule() { +							// The URL rules in our parser accept a narrow superset of +							// valid URLs, so we need to handle false positives. +							Rule::url => if let Ok(target) = Url::parse_absolute(inner.as_str()) { +								Some(target) +							} else if inner.as_str().ends_with('_') { +								// like target_name_qu (minus the final underscore) +								let full_str = inner.as_str(); +								refname.push(full_str[0..full_str.len() - 1].into()); +								None +							} else { +								// like relative_reference +								Some(Url::parse_relative(inner.as_str())?) +							}, +							Rule::target_name_qu => { +								refname.push(inner.as_str().into()); +								None +							}, +							Rule::relative_reference => { +								Some(Url::parse_relative(inner.as_str())?) +							}, +							_ => unreachable!() +						} +					} else { +						refname.push(trimmed_text.into()); +						None +					}; +					children.push(trimmed_text.into()); +				}, +				_ => unreachable!() +			} +		}, +		Rule::reference_explicit => unimplemented!("explicit reference"), +		Rule::reference_auto => { +			let rt_inner = concrete.into_inner().next().unwrap(); +			match rt_inner.as_rule() { +				Rule::url_auto => match Url::parse_absolute(rt_inner.as_str()) { +					Ok(target) => { +						refuri = Some(target); +						name   = None; +						refid  = None; +						children.push(rt_inner.as_str().into()); +					}, +					// if our parser got a URL wrong, return it as a string +					Err(_) => return Ok(rt_inner.as_str().into()) +				}, +				Rule::email => { +					let mailto_url = String::from("mailto:") + rt_inner.as_str(); +					match Url::parse_absolute(&mailto_url) { +						Ok(target) => { +							refuri = Some(target); +							name   = None; +							refid  = None; +							children.push(rt_inner.as_str().into()); +						}, +						// if our parser got a URL wrong, return it as a string +						Err(_) => return Ok(rt_inner.as_str().into()) +					} +				}, +				_ => unreachable!() +			} +		}, +		_ => unreachable!(), +	}; +	Ok(e::Reference::new( +		Default::default(), +		a::Reference { name, refuri, refid, refname }, +		children +	).into()) +} + +fn convert_substitution_ref(pair: Pair<Rule>) -> Result<e::SubstitutionReference, Error> { +	let name = whitespace_normalize_name(pair.as_str()); +	Ok(a::ExtraAttributes::with_extra( +		a::SubstitutionReference { +			refname: vec![at::NameToken(name)] +		} +	)) +} diff --git a/parser/src/conversion/tests.rs b/parser/src/conversion/tests.rs new file mode 100644 index 0000000..89b0a1c --- /dev/null +++ b/parser/src/conversion/tests.rs @@ -0,0 +1,65 @@ +use document_tree::{ +	elements as e, +	element_categories as c, +	HasChildren, +}; + +use crate::parse; + +fn ssubel_to_section(ssubel: &c::StructuralSubElement) -> &e::Section { +	match ssubel { +		c::StructuralSubElement::SubStructure(ref b) => match **b { +			c::SubStructure::Section(ref s) => s, +			ref c => panic!("Expected section, not {:?}", c), +		}, +		ref c => panic!("Expected SubStructure, not {:?}", c), +	} +} + +const SECTIONS: &str = "\ +Intro before first section title + +Level 1 +******* + +------- +Level 2 +------- + +Level 3 +======= + +L1 again +******** + +L3 again, skipping L2 +===================== +"; + +#[test] +fn convert_skipped_section() { +	let doctree = parse(SECTIONS).unwrap(); +	let lvl0 = doctree.children(); +	assert_eq!(lvl0.len(), 3, "Should be a paragraph and 2 sections: {:?}", lvl0); +	 +	assert_eq!(lvl0[0], e::Paragraph::with_children(vec![ +		"Intro before first section title".to_owned().into() +	]).into(), "The intro text should fit"); +	 +	let lvl1a = ssubel_to_section(&lvl0[1]).children(); +	assert_eq!(lvl1a.len(), 2, "The 1st lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1a); +	//TODO: test title lvl1a[0] +	let lvl2  = ssubel_to_section(&lvl1a[1]).children(); +	assert_eq!(lvl2.len(), 2, "The lvl2 section should have (a title and) a single lvl3 section as child: {:?}", lvl2); +	//TODO: test title lvl2[0] +	let lvl3a = ssubel_to_section(&lvl2[1]).children(); +	assert_eq!(lvl3a.len(), 1, "The 1st lvl3 section should just a title: {:?}", lvl3a); +	//TODO: test title lvl3a[0] +	 +	let lvl1b = ssubel_to_section(&lvl0[2]).children(); +	assert_eq!(lvl1b.len(), 2, "The 2nd lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1b); +	//TODO: test title lvl1b[0] +	let lvl3b = ssubel_to_section(&lvl1b[1]).children(); +	assert_eq!(lvl3b.len(), 1, "The 2nd lvl3 section should have just a title: {:?}", lvl3b); +	//TODO: test title lvl3b[0] +} | 
