diff options
Diffstat (limited to 'parser/src/conversion.rs')
| -rw-r--r-- | parser/src/conversion.rs | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/parser/src/conversion.rs b/parser/src/conversion.rs new file mode 100644 index 0000000..de5f091 --- /dev/null +++ b/parser/src/conversion.rs @@ -0,0 +1,96 @@ +mod block; +mod inline; +#[cfg(test)] +mod tests; + +use failure::Error; +use pest::iterators::Pairs; + +use document_tree::{ + Element,HasChildren, + elements as e, + element_categories as c, + attribute_types as at, +}; + +use crate::pest_rst::Rule; + + +fn ssubel_to_section_unchecked_mut(ssubel: &mut c::StructuralSubElement) -> &mut e::Section { + match ssubel { + c::StructuralSubElement::SubStructure(ref mut b) => match **b { + c::SubStructure::Section(ref mut s) => s, + _ => unreachable!(), + }, + _ => unreachable!(), + } +} + + +fn get_level<'tl>(toplevel: &'tl mut Vec<c::StructuralSubElement>, section_idxs: &[Option<usize>]) -> &'tl mut Vec<c::StructuralSubElement> { + let mut level = toplevel; + for maybe_i in section_idxs { + if let Some(i) = *maybe_i { + level = ssubel_to_section_unchecked_mut(&mut level[i]).children_mut(); + } + } + level +} + + +pub fn convert_document(pairs: Pairs<Rule>) -> Result<e::Document, Error> { + use self::block::TitleOrSsubel::*; + + let mut toplevel: Vec<c::StructuralSubElement> = vec![]; + // The kinds of section titles encountered. + // `section_idx[x]` has the kind `kinds[x]`, but `kinds` can be longer + let mut kinds: Vec<block::TitleKind> = vec![]; + // Recursive indices into the tree, pointing at the active sections. + // `None`s indicate skipped section levels: + // toplevel[section_idxs.flatten()[0]].children[section_idxs.flatten()[1]]... + let mut section_idxs: Vec<Option<usize>> = vec![]; + + for pair in pairs { + if let Some(ssubel) = block::convert_ssubel(pair)? { match ssubel { + Title(title, kind) => { + match kinds.iter().position(|k| k == &kind) { + // Idx points to the level we want to add, + // so idx-1 needs to be the last valid index. + Some(idx) => { + // If idx < len: Remove found section and all below + section_idxs.truncate(idx); + // If idx > len: Add None for skipped levels + // TODO: test skipped levels + while section_idxs.len() < idx { section_idxs.push(None) } + }, + None => kinds.push(kind), + } + let super_level = get_level(&mut toplevel, §ion_idxs); + let slug = title.names().iter().next().map(|at::NameToken(name)| at::ID(name.to_owned())); + let mut section = e::Section::with_children(vec![title.into()]); + section.ids_mut().extend(slug.into_iter()); + super_level.push(section.into()); + section_idxs.push(Some(super_level.len() - 1)); + }, + Ssubel(elem) => get_level(&mut toplevel, §ion_idxs).push(elem), + }} + } + Ok(e::Document::with_children(toplevel)) +} + +/// Normalizes a name in terms of whitespace. Equivalent to docutils's +/// `docutils.nodes.whitespace_normalize_name`. +pub fn whitespace_normalize_name(name: &str) -> String { + // Python's string.split() defines whitespace differently than Rust does. + let split_iter = name.split( + |ch: char| ch.is_whitespace() || (ch >= '\x1C' && ch <= '\x1F') + ).filter(|split| !split.is_empty()); + let mut ret = String::new(); + for split in split_iter { + if !ret.is_empty() { + ret.push(' '); + } + ret.push_str(split); + } + ret +} |
