diff options
| author | Philipp A | 2019-12-26 23:01:00 +0100 | 
|---|---|---|
| committer | Philipp A | 2019-12-26 23:36:48 +0100 | 
| commit | a0e3c53758d526bb418c068bce1c99fa5a597ed3 (patch) | |
| tree | e640238b011a9ea7806ccccaf1a435e4b371a376 /parser/src/conversion/inline.rs | |
| parent | 7018f5d3c42f18b6c83f398db9f1915361a7c679 (diff) | |
| download | rust-rst-a0e3c53758d526bb418c068bce1c99fa5a597ed3.tar.bz2 | |
Split into smaller crates
Diffstat (limited to 'parser/src/conversion/inline.rs')
| -rw-r--r-- | parser/src/conversion/inline.rs | 160 | 
1 files changed, 160 insertions, 0 deletions
| diff --git a/parser/src/conversion/inline.rs b/parser/src/conversion/inline.rs new file mode 100644 index 0000000..6094714 --- /dev/null +++ b/parser/src/conversion/inline.rs @@ -0,0 +1,160 @@ +use failure::Error; +use pest::iterators::Pair; + +use document_tree::{ +	HasChildren, +	elements as e, +	url::Url, +	element_categories as c, +	extra_attributes as a, +	attribute_types as at, +}; + +use crate::{ +	pest_rst::Rule, +//    pair_ext_parse::PairExt, +}; +use super::whitespace_normalize_name; + + +pub fn convert_inline(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> { +	Ok(match pair.as_rule() { +		Rule::str | Rule::str_nested => pair.as_str().into(), +		Rule::ws_newline        => " ".to_owned().into(), +		Rule::reference         => convert_reference(pair)?, +		Rule::substitution_name => convert_substitution_ref(pair)?.into(), +		Rule::emph              => e::Emphasis::with_children(convert_inlines(pair)?).into(), +		Rule::strong            => e::Strong::with_children(convert_inlines(pair)?).into(), +		Rule::literal           => e::Literal::with_children(convert_inlines(pair)?).into(), +		rule => unimplemented!("unknown rule {:?}", rule), +	}) +} + +pub fn convert_inlines(pair: Pair<Rule>) -> Result<Vec<c::TextOrInlineElement>, Error> { +	pair.into_inner().map(convert_inline).collect() +} + +fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> { +	let name; +	let refuri; +	let refid; +	let mut refname = vec![]; +	let mut children: Vec<c::TextOrInlineElement> = vec![]; +	let concrete = pair.into_inner().next().unwrap(); +	match concrete.as_rule() { +		Rule::reference_target => { +			let rt_inner = concrete.into_inner().next().unwrap(); // reference_target_uq or target_name_qu +			match rt_inner.as_rule() { +				Rule::reference_target_uq => { +					refid  = None; +					name   = Some(rt_inner.as_str().into()); +					refuri = None; +					refname.push(rt_inner.as_str().into()); +					children.push(rt_inner.as_str().into()); +				}, +				Rule::reference_target_qu => { +					let (text, reference) = { +						let mut text = None; +						let mut reference = None; +						for inner in rt_inner.clone().into_inner() { +							match inner.as_rule() { +								Rule::reference_text => text = Some(inner), +								Rule::reference_bracketed => reference = Some(inner), +								_ => unreachable!() +							} +						} +						(text, reference) +					}; +					let trimmed_text = match (&text, &reference) { +						(Some(text), None) => text.as_str(), +						(_, Some(reference)) => { +							text +								.map(|text| text.as_str().trim_end_matches(|ch| " \n\r".contains(ch))) +								.filter(|text| !text.is_empty()) +								.unwrap_or_else(|| reference.clone().into_inner().next().unwrap().as_str()) +						} +						(None, None) => unreachable!() +					}; +					refid = None; +					name = Some(trimmed_text.into()); +					refuri = if let Some(reference) = reference { +						let inner = reference.into_inner().next().unwrap(); +						match inner.as_rule() { +							// The URL rules in our parser accept a narrow superset of +							// valid URLs, so we need to handle false positives. +							Rule::url => if let Ok(target) = Url::parse_absolute(inner.as_str()) { +								Some(target) +							} else if inner.as_str().ends_with('_') { +								// like target_name_qu (minus the final underscore) +								let full_str = inner.as_str(); +								refname.push(full_str[0..full_str.len() - 1].into()); +								None +							} else { +								// like relative_reference +								Some(Url::parse_relative(inner.as_str())?) +							}, +							Rule::target_name_qu => { +								refname.push(inner.as_str().into()); +								None +							}, +							Rule::relative_reference => { +								Some(Url::parse_relative(inner.as_str())?) +							}, +							_ => unreachable!() +						} +					} else { +						refname.push(trimmed_text.into()); +						None +					}; +					children.push(trimmed_text.into()); +				}, +				_ => unreachable!() +			} +		}, +		Rule::reference_explicit => unimplemented!("explicit reference"), +		Rule::reference_auto => { +			let rt_inner = concrete.into_inner().next().unwrap(); +			match rt_inner.as_rule() { +				Rule::url_auto => match Url::parse_absolute(rt_inner.as_str()) { +					Ok(target) => { +						refuri = Some(target); +						name   = None; +						refid  = None; +						children.push(rt_inner.as_str().into()); +					}, +					// if our parser got a URL wrong, return it as a string +					Err(_) => return Ok(rt_inner.as_str().into()) +				}, +				Rule::email => { +					let mailto_url = String::from("mailto:") + rt_inner.as_str(); +					match Url::parse_absolute(&mailto_url) { +						Ok(target) => { +							refuri = Some(target); +							name   = None; +							refid  = None; +							children.push(rt_inner.as_str().into()); +						}, +						// if our parser got a URL wrong, return it as a string +						Err(_) => return Ok(rt_inner.as_str().into()) +					} +				}, +				_ => unreachable!() +			} +		}, +		_ => unreachable!(), +	}; +	Ok(e::Reference::new( +		Default::default(), +		a::Reference { name, refuri, refid, refname }, +		children +	).into()) +} + +fn convert_substitution_ref(pair: Pair<Rule>) -> Result<e::SubstitutionReference, Error> { +	let name = whitespace_normalize_name(pair.as_str()); +	Ok(a::ExtraAttributes::with_extra( +		a::SubstitutionReference { +			refname: vec![at::NameToken(name)] +		} +	)) +} | 
