src/parser/conversion.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165

mod block;
mod inline;

use failure::Error;
use pest::iterators::Pairs;

use crate::document_tree::{
	Element,HasChildren,
	elements as e,
	element_categories as c,
	attribute_types as at,
};

use super::pest_rst::Rule;


fn ssubel_to_section_unchecked_mut(ssubel: &mut c::StructuralSubElement) -> &mut e::Section {
	match ssubel {
		c::StructuralSubElement::SubStructure(ref mut b) => match **b {
			c::SubStructure::Section(ref mut s) => s,
			_ => unreachable!(),
		},
		_ => unreachable!(),
	}
}


fn get_level<'tl>(toplevel: &'tl mut Vec<c::StructuralSubElement>, section_idxs: &[Option<usize>]) -> &'tl mut Vec<c::StructuralSubElement> {
	let mut level = toplevel;
	for maybe_i in section_idxs {
		if let Some(i) = *maybe_i {
			level = ssubel_to_section_unchecked_mut(&mut level[i]).children_mut();
		}
	}
	level
}


pub fn convert_document(pairs: Pairs<Rule>) -> Result<e::Document, Error> {
	use self::block::TitleOrSsubel::*;
	
	let mut toplevel: Vec<c::StructuralSubElement> = vec![];
	// The kinds of section titles encountered.
	// `section_idx[x]` has the kind `kinds[x]`, but `kinds` can be longer
	let mut kinds: Vec<block::TitleKind> = vec![];
	// Recursive indices into the tree, pointing at the active sections.
	// `None`s indicate skipped section levels:
	// toplevel[section_idxs.flatten()[0]].children[section_idxs.flatten()[1]]...
	let mut section_idxs: Vec<Option<usize>> = vec![];
	
	for pair in pairs {
		if let Some(ssubel) = block::convert_ssubel(pair)? { match ssubel {
			Title(title, kind) => {
				match kinds.iter().position(|k| k == &kind) {
					// Idx points to the level we want to add,
					// so idx-1 needs to be the last valid index.
					Some(idx) => {
						// If idx < len: Remove found section and all below
						section_idxs.truncate(idx);
						// If idx > len: Add None for skipped levels
						// TODO: test skipped levels
						while section_idxs.len() < idx { section_idxs.push(None) }
					},
					None => kinds.push(kind),
				}
				let super_level = get_level(&mut toplevel, &section_idxs);
				let slug = title.names().iter().next().map(|at::NameToken(name)| at::ID(name.to_owned()));
				let mut section = e::Section::with_children(vec![title.into()]);
				section.ids_mut().extend(slug.into_iter());
				super_level.push(section.into());
				section_idxs.push(Some(super_level.len() - 1));
			},
			Ssubel(elem) => get_level(&mut toplevel, &section_idxs).push(elem),
		}}
	}
	Ok(e::Document::with_children(toplevel))
}

/// Normalizes a name in terms of whitespace. Equivalent to docutils's
/// `docutils.nodes.whitespace_normalize_name`.
pub fn whitespace_normalize_name(name: &str) -> String {
	// Python's string.split() defines whitespace differently than Rust does.
	let split_iter = name.split(
		|ch: char| ch.is_whitespace() || (ch >= '\x1C' && ch <= '\x1F')
	).filter(|split| !split.is_empty());
	let mut ret = String::new();
	for split in split_iter {
		if !ret.is_empty() {
			ret.push(' ');
		}
		ret.push_str(split);
	}
	ret
}


#[cfg(test)]
mod tests {
	use crate::{
		parser::parse,
		document_tree::{
			elements as e,
			element_categories as c,
			HasChildren,
		}
	};
	
	fn ssubel_to_section(ssubel: &c::StructuralSubElement) -> &e::Section {
		match ssubel {
			c::StructuralSubElement::SubStructure(ref b) => match **b {
				c::SubStructure::Section(ref s) => s,
				ref c => panic!("Expected section, not {:?}", c),
			},
			ref c => panic!("Expected SubStructure, not {:?}", c),
		}
	}
	
	const SECTIONS: &str = "\
Intro before first section title

Level 1
*******

-------
Level 2
-------

Level 3
=======

L1 again
********

L3 again, skipping L2
=====================
";
	
	#[test]
	fn convert_skipped_section() {
		let doctree = parse(SECTIONS).unwrap();
		let lvl0 = doctree.children();
		assert_eq!(lvl0.len(), 3, "Should be a paragraph and 2 sections: {:?}", lvl0);
		
		assert_eq!(lvl0[0], e::Paragraph::with_children(vec![
			"Intro before first section title".to_owned().into()
		]).into(), "The intro text should fit");
		
		let lvl1a = ssubel_to_section(&lvl0[1]).children();
		assert_eq!(lvl1a.len(), 2, "The 1st lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1a);
		//TODO: test title lvl1a[0]
		let lvl2  = ssubel_to_section(&lvl1a[1]).children();
		assert_eq!(lvl2.len(), 2, "The lvl2 section should have (a title and) a single lvl3 section as child: {:?}", lvl2);
		//TODO: test title lvl2[0]
		let lvl3a = ssubel_to_section(&lvl2[1]).children();
		assert_eq!(lvl3a.len(), 1, "The 1st lvl3 section should just a title: {:?}", lvl3a);
		//TODO: test title lvl3a[0]
		
		let lvl1b = ssubel_to_section(&lvl0[2]).children();
		assert_eq!(lvl1b.len(), 2, "The 2nd lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1b);
		//TODO: test title lvl1b[0]
		let lvl3b = ssubel_to_section(&lvl1b[1]).children();
		assert_eq!(lvl3b.len(), 1, "The 2nd lvl3 section should have just a title: {:?}", lvl3b);
		//TODO: test title lvl3b[0]
	}
}