parser/src/conversion.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

mod block;
mod inline;
#[cfg(test)]
mod tests;

use failure::Error;
use pest::iterators::Pairs;

use document_tree::{
	Element,HasChildren,
	elements as e,
	element_categories as c,
	attribute_types as at,
};

use crate::pest_rst::Rule;


fn ssubel_to_section_unchecked_mut(ssubel: &mut c::StructuralSubElement) -> &mut e::Section {
	match ssubel {
		c::StructuralSubElement::SubStructure(ref mut b) => match **b {
			c::SubStructure::Section(ref mut s) => s,
			_ => unreachable!(),
		},
		_ => unreachable!(),
	}
}


fn get_level<'tl>(toplevel: &'tl mut Vec<c::StructuralSubElement>, section_idxs: &[Option<usize>]) -> &'tl mut Vec<c::StructuralSubElement> {
	let mut level = toplevel;
	for maybe_i in section_idxs {
		if let Some(i) = *maybe_i {
			level = ssubel_to_section_unchecked_mut(&mut level[i]).children_mut();
		}
	}
	level
}


pub fn convert_document(pairs: Pairs<Rule>) -> Result<e::Document, Error> {
	use self::block::TitleOrSsubel::*;
	
	let mut toplevel: Vec<c::StructuralSubElement> = vec![];
	// The kinds of section titles encountered.
	// `section_idx[x]` has the kind `kinds[x]`, but `kinds` can be longer
	let mut kinds: Vec<block::TitleKind> = vec![];
	// Recursive indices into the tree, pointing at the active sections.
	// `None`s indicate skipped section levels:
	// toplevel[section_idxs.flatten()[0]].children[section_idxs.flatten()[1]]...
	let mut section_idxs: Vec<Option<usize>> = vec![];
	
	for pair in pairs {
		if let Some(ssubel) = block::convert_ssubel(pair)? { match ssubel {
			Title(title, kind) => {
				match kinds.iter().position(|k| k == &kind) {
					// Idx points to the level we want to add,
					// so idx-1 needs to be the last valid index.
					Some(idx) => {
						// If idx < len: Remove found section and all below
						section_idxs.truncate(idx);
						// If idx > len: Add None for skipped levels
						// TODO: test skipped levels
						while section_idxs.len() < idx { section_idxs.push(None) }
					},
					None => kinds.push(kind),
				}
				let super_level = get_level(&mut toplevel, &section_idxs);
				let slug = title.names().iter().next().map(|at::NameToken(name)| at::ID(name.to_owned()));
				let mut section = e::Section::with_children(vec![title.into()]);
				section.ids_mut().extend(slug.into_iter());
				super_level.push(section.into());
				section_idxs.push(Some(super_level.len() - 1));
			},
			Ssubel(elem) => get_level(&mut toplevel, &section_idxs).push(elem),
		}}
	}
	Ok(e::Document::with_children(toplevel))
}

/// Normalizes a name in terms of whitespace. Equivalent to docutils's
/// `docutils.nodes.whitespace_normalize_name`.
pub fn whitespace_normalize_name(name: &str) -> String {
	// Python's string.split() defines whitespace differently than Rust does.
	let split_iter = name.split(
		|ch: char| ch.is_whitespace() || (ch >= '\x1C' && ch <= '\x1F')
	).filter(|split| !split.is_empty());
	let mut ret = String::new();
	for split in split_iter {
		if !ret.is_empty() {
			ret.push(' ');
		}
		ret.push_str(split);
	}
	ret
}