aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilipp A2018-12-10 09:38:31 +0100
committerPhilipp A2018-12-30 17:29:00 +0100
commit6d995f698f580aba9e67b847432899ce841e6e7d (patch)
treee2388a9967c230934e8d83731605afb57867e841
parentd019d0bff16e7277533a89399a57f290439bc2f5 (diff)
downloadrust-rst-6d995f698f580aba9e67b847432899ce841e6e7d.tar.bz2
document conversion
-rw-r--r--src/document_tree/attribute_types.rs14
-rw-r--r--src/document_tree/element_categories.rs3
-rw-r--r--src/document_tree/elements.rs6
-rw-r--r--src/document_tree/extra_attributes.rs4
-rw-r--r--src/parser/conversion.rs132
-rw-r--r--src/parser/conversion/block.rs51
-rw-r--r--src/parser/tests.rs8
-rw-r--r--src/rst.pest7
-rw-r--r--src/target.rs2
9 files changed, 184 insertions, 43 deletions
diff --git a/src/document_tree/attribute_types.rs b/src/document_tree/attribute_types.rs
index ba631d9..400c59e 100644
--- a/src/document_tree/attribute_types.rs
+++ b/src/document_tree/attribute_types.rs
@@ -4,7 +4,7 @@ use failure::{Error,bail,format_err};
use serde_derive::Serialize;
use regex::Regex;
-#[derive(Debug,Serialize)]
+#[derive(Debug,PartialEq,Serialize)]
pub enum EnumeratedListType {
Arabic,
LowerAlpha,
@@ -13,17 +13,17 @@ pub enum EnumeratedListType {
UpperRoman,
}
-#[derive(Debug,Serialize)]
+#[derive(Debug,PartialEq,Serialize)]
pub enum FixedSpace { Default, Preserve } // yes, default really is not “Default”
impl Default for FixedSpace { fn default() -> FixedSpace { FixedSpace::Preserve } }
-#[derive(Debug,Serialize)] pub enum AlignH { Left, Center, Right}
-#[derive(Debug,Serialize)] pub enum AlignHV { Top, Middle, Bottom, Left, Center, Right }
+#[derive(Debug,PartialEq,Serialize)] pub enum AlignH { Left, Center, Right}
+#[derive(Debug,PartialEq,Serialize)] pub enum AlignHV { Top, Middle, Bottom, Left, Center, Right }
-#[derive(Debug,Serialize)] pub struct ID(pub String);
-#[derive(Debug,Serialize)] pub struct NameToken(pub String);
+#[derive(Debug,PartialEq,Serialize)] pub struct ID(pub String);
+#[derive(Debug,PartialEq,Serialize)] pub struct NameToken(pub String);
-#[derive(Debug,Serialize)]
+#[derive(Debug,PartialEq,Serialize)]
pub enum Measure { // http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#length-units
Em(f64),
Ex(f64),
diff --git a/src/document_tree/element_categories.rs b/src/document_tree/element_categories.rs
index ec53f09..f3ac884 100644
--- a/src/document_tree/element_categories.rs
+++ b/src/document_tree/element_categories.rs
@@ -43,7 +43,7 @@ macro_rules! synonymous_enum {
cartesian!(impl_into, [ $( ($subcat::$entry) ),+ ], [ $($supcat),+ ]);
};
( $name:ident { $( $entry:ident ),+ $(,)* } ) => {
- #[derive(Serialize)]
+ #[derive(PartialEq,Serialize)]
pub enum $name { $(
$entry(Box<$entry>),
)* }
@@ -87,7 +87,6 @@ synonymous_enum!(TextOrInlineElement {
//Content Models\\
//--------------\\
-synonymous_enum!(SubSection { Title, Subtitle, Docinfo, Decoration, SubStructure });
synonymous_enum!(AuthorInfo { Author, Organization, Address, Contact });
synonymous_enum!(DecorationElement { Header, Footer });
synonymous_enum!(SubTopic { Title, BodyElement });
diff --git a/src/document_tree/elements.rs b/src/document_tree/elements.rs
index 72f10e8..52b1f5b 100644
--- a/src/document_tree/elements.rs
+++ b/src/document_tree/elements.rs
@@ -26,7 +26,7 @@ pub trait Element {
fn classes_mut(&mut self) -> &mut Vec<String>;
}
-#[derive(Debug,Default,Serialize)]
+#[derive(Debug,Default,PartialEq,Serialize)]
pub struct CommonAttributes {
ids: Vec<ID>,
names: Vec<NameToken>,
@@ -78,7 +78,7 @@ macro_rules! impl_new {(
),* $(,)* }
) => (
$(#[$attr])*
- #[derive(Debug,Serialize)]
+ #[derive(Debug,PartialEq,Serialize)]
pub struct $name { $(
$(#[$fattr])* $field: $typ,
)* }
@@ -136,7 +136,7 @@ impl_children!(Document, StructuralSubElement);
impl_elems!(
//structual elements
- (Section, SubSection)
+ (Section, StructuralSubElement)
(Topic, SubTopic)
(Sidebar, SubSidebar)
diff --git a/src/document_tree/extra_attributes.rs b/src/document_tree/extra_attributes.rs
index 58a1e94..0708080 100644
--- a/src/document_tree/extra_attributes.rs
+++ b/src/document_tree/extra_attributes.rs
@@ -20,7 +20,7 @@ macro_rules! skip {
macro_rules! impl_extra {
( $name:ident { $( $(#[$pattr:meta])* $param:ident : $type:ty ),* $(,)* } ) => (
impl_extra!(
- #[derive(Default,Debug,Serialize)]
+ #[derive(Default,Debug,PartialEq,Serialize)]
$name { $( $(#[$pattr])* $param : $type, )* }
);
);
@@ -49,7 +49,7 @@ impl_extra!(Target {
anonymous: bool,
});
impl_extra!(Raw { space: FixedSpace, format: Vec<NameToken> });
-impl_extra!(#[derive(Debug,Serialize)] Image {
+impl_extra!(#[derive(Debug,PartialEq,Serialize)] Image {
uri: target::Target,
align: Option<AlignHV>,
alt: Option<String>,
diff --git a/src/parser/conversion.rs b/src/parser/conversion.rs
index ad214d5..50a7fb2 100644
--- a/src/parser/conversion.rs
+++ b/src/parser/conversion.rs
@@ -7,14 +7,138 @@ use pest::iterators::Pairs;
use crate::document_tree::{
HasChildren,
elements as e,
+ element_categories as c,
};
use super::pest_rst::Rule;
+fn ssubel_to_section_unchecked_mut(ssubel: &mut c::StructuralSubElement) -> &mut e::Section {
+ match ssubel {
+ c::StructuralSubElement::SubStructure(ref mut b) => match **b {
+ c::SubStructure::Section(ref mut s) => s,
+ _ => unreachable!(),
+ },
+ _ => unreachable!(),
+ }
+}
+
+
+fn get_level<'tl>(toplevel: &'tl mut Vec<c::StructuralSubElement>, section_idxs: &[Option<usize>]) -> &'tl mut Vec<c::StructuralSubElement> {
+ let mut level = toplevel;
+ for maybe_i in section_idxs {
+ if let Some(i) = *maybe_i {
+ level = ssubel_to_section_unchecked_mut(&mut level[i]).children_mut();
+ }
+ }
+ level
+}
+
+
pub fn convert_document(pairs: Pairs<Rule>) -> Result<e::Document, Error> {
- let structural_elems = pairs.map(block::convert_ssubel)
- .filter_map(|elem| match elem { Ok(Some(e)) => Some(Ok(e)), Err(e) => Some(Err(e)), Ok(None) => None })
- .collect::<Result<_,_>>()?;
- Ok(e::Document::with_children(structural_elems))
+ use self::block::TitleOrSsubel::*;
+
+ let mut toplevel: Vec<c::StructuralSubElement> = vec![];
+ // The kinds of section titles encountered.
+ // `section_idx[x]` has the kind `kinds[x]`, but `kinds` can be longer
+ let mut kinds: Vec<block::TitleKind> = vec![];
+ // Recursive indices into the tree, pointing at the active sections.
+ // `None`s indicate skipped section levels:
+ // toplevel[section_idxs.flatten()[0]].children[section_idxs.flatten()[1]]...
+ let mut section_idxs: Vec<Option<usize>> = vec![];
+
+ for pair in pairs {
+ if let Some(ssubel) = block::convert_ssubel(pair)? { match ssubel {
+ Title(title, kind) => {
+ match kinds.iter().position(|k| k == &kind) {
+ // Idx points to the level we want to add,
+ // so idx-1 needs to be the last valid index.
+ Some(idx) => {
+ // If idx < len: Remove found section and all below
+ section_idxs.truncate(idx);
+ // If idx > len: Add None for skipped levels
+ // TODO: test skipped levels
+ while section_idxs.len() < idx { section_idxs.push(None) }
+ },
+ None => kinds.push(kind),
+ }
+ let super_level = get_level(&mut toplevel, &section_idxs);
+ super_level.push(e::Section::with_children(vec![title.into()]).into());
+ section_idxs.push(Some(super_level.len() - 1));
+ },
+ Ssubel(elem) => get_level(&mut toplevel, &section_idxs).push(elem),
+ }}
+ }
+ Ok(e::Document::with_children(toplevel))
+}
+
+
+#[cfg(test)]
+mod tests {
+ use crate::{
+ parser::parse,
+ document_tree::{
+ elements as e,
+ element_categories as c,
+ HasChildren,
+ }
+ };
+
+ fn ssubel_to_section(ssubel: &c::StructuralSubElement) -> &e::Section {
+ match ssubel {
+ c::StructuralSubElement::SubStructure(ref b) => match **b {
+ c::SubStructure::Section(ref s) => s,
+ ref c => panic!("Expected section, not {:?}", c),
+ },
+ ref c => panic!("Expected SubStructure, not {:?}", c),
+ }
+ }
+
+ const SECTIONS: &str = "\
+Intro before first section title
+
+Level 1
+*******
+
+-------
+Level 2
+-------
+
+Level 3
+=======
+
+L1 again
+********
+
+L3 again, skipping L2
+=====================
+";
+
+ #[test]
+ fn convert_skipped_section() {
+ let doctree = parse(SECTIONS).unwrap();
+ let lvl0 = doctree.children();
+ assert_eq!(lvl0.len(), 3, "Should be a paragraph and 2 sections: {:?}", lvl0);
+
+ assert_eq!(lvl0[0], e::Paragraph::with_children(vec![
+ "Intro before first section title".to_owned().into()
+ ]).into(), "The intro text should fit");
+
+ let lvl1a = ssubel_to_section(&lvl0[1]).children();
+ assert_eq!(lvl1a.len(), 2, "The 1st lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1a);
+ //TODO: test title lvl1a[0]
+ let lvl2 = ssubel_to_section(&lvl1a[1]).children();
+ assert_eq!(lvl2.len(), 2, "The lvl2 section should have (a title and) a single lvl3 section as child: {:?}", lvl2);
+ //TODO: test title lvl2[0]
+ let lvl3a = ssubel_to_section(&lvl2[1]).children();
+ assert_eq!(lvl3a.len(), 1, "The 1st lvl3 section should just a title: {:?}", lvl3a);
+ //TODO: test title lvl3a[0]
+
+ let lvl1b = ssubel_to_section(&lvl0[2]).children();
+ assert_eq!(lvl1b.len(), 2, "The 2nd lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1b);
+ //TODO: test title lvl1b[0]
+ let lvl3b = ssubel_to_section(&lvl1b[1]).children();
+ assert_eq!(lvl3b.len(), 1, "The 2nd lvl3 section should have just a title: {:?}", lvl3b);
+ //TODO: test title lvl3b[0]
+ }
}
diff --git a/src/parser/conversion/block.rs b/src/parser/conversion/block.rs
index f3ef923..9abd1e4 100644
--- a/src/parser/conversion/block.rs
+++ b/src/parser/conversion/block.rs
@@ -15,35 +15,54 @@ use crate::parser::{
use super::inline::convert_inline;
-pub fn convert_ssubel(pair: Pair<Rule>) -> Result<Option<c::StructuralSubElement>, Error> {
- // TODO: This is just a proof of concept. Keep closely to DTD in final version!
+#[derive(PartialEq)]
+pub(super) enum TitleKind { Double(char), Single(char) }
+
+pub(super) enum TitleOrSsubel {
+ Title(e::Title, TitleKind),
+ Ssubel(c::StructuralSubElement),
+}
+
+
+pub(super) fn convert_ssubel(pair: Pair<Rule>) -> Result<Option<TitleOrSsubel>, Error> {
+ use self::TitleOrSsubel::*;
Ok(Some(match pair.as_rule() {
- Rule::title => convert_title(pair).into(),
- Rule::paragraph => convert_paragraph(pair)?.into(),
- Rule::target => convert_target(pair)?.into(),
- Rule::substitution_def => convert_substitution_def(pair)?.into(),
- Rule::admonition_gen => convert_admonition_gen(pair)?.into(),
- Rule::image => convert_image::<e::Image>(pair)?.into(),
+ Rule::title => { let (t, k) = convert_title(pair); Title(t, k) },
+ Rule::paragraph => Ssubel(convert_paragraph(pair)?.into()),
+ Rule::target => Ssubel(convert_target(pair)?.into()),
+ Rule::substitution_def => Ssubel(convert_substitution_def(pair)?.into()),
+ Rule::admonition_gen => Ssubel(convert_admonition_gen(pair)?.into()),
+ Rule::image => Ssubel(convert_image::<e::Image>(pair)?.into()),
Rule::EOI => return Ok(None),
rule => panic!("unknown rule {:?}", rule),
}))
}
-fn convert_title(pair: Pair<Rule>) -> e::Title {
+fn convert_title(pair: Pair<Rule>) -> (e::Title, TitleKind) {
let mut title: Option<&str> = None;
- let mut _adornment_char: Option<char> = None;
- for p in pair.into_inner() {
+ let mut adornment_char: Option<char> = None;
+ // title_double or title_single. Extract kind before consuming
+ let inner_pair = pair.into_inner().next().unwrap();
+ let kind = inner_pair.as_rule();
+ for p in inner_pair.into_inner() {
match p.as_rule() {
- Rule::line => title = Some(p.as_str()),
- Rule::adornments => _adornment_char = Some(p.as_str().chars().next().expect("Empty adornment?")),
+ Rule::line => title = Some(p.as_str()), // TODO: can contain other stuff?
+ Rule::adornments => adornment_char = Some(p.as_str().chars().next().expect("Empty adornment?")),
rule => unimplemented!("Unexpected rule in title: {:?}", rule),
};
}
- // TODO adornment char
- e::Title::with_children(vec![
+ // now we encountered one line of text and one of adornments
+ // TODO: emit error if the adornment line is too short (has to match title length)
+ let elem = e::Title::with_children(vec![
title.expect("No text in title").into()
- ])
+ ]);
+ let title_kind = match kind {
+ Rule::title_double => TitleKind::Double(adornment_char.unwrap()),
+ Rule::title_single => TitleKind::Single(adornment_char.unwrap()),
+ _ => unreachable!(),
+ };
+ (elem, title_kind)
}
diff --git a/src/parser/tests.rs b/src/parser/tests.rs
index 73fbe85..76af915 100644
--- a/src/parser/tests.rs
+++ b/src/parser/tests.rs
@@ -26,10 +26,10 @@ Title
",
rule: Rule::title,
tokens: [
- title(0, 12, [
+ title(0, 12, [ title_single(0, 12, [
line(0, 6, [ str(0, 5) ]),
adornments(6, 11),
- ])
+ ]) ])
]
};
}
@@ -45,10 +45,10 @@ Title
",
rule: Rule::title,
tokens: [
- title(0, 17, [
+ title(0, 17, [ title_double(0, 17, [
adornments(0, 5),
line(6, 12, [ str(6, 11) ]),
- ])
+ ]) ])
]
};
}
diff --git a/src/rst.pest b/src/rst.pest
index 595fd76..9934f44 100644
--- a/src/rst.pest
+++ b/src/rst.pest
@@ -52,10 +52,9 @@ target_name_qu = { ( !( ":"|"`") ~ !NEWLINE ~ ANY )* }
link_target = { nonspacechar+ }
// Title. A block type
-title = {
- PUSH(adornments) ~ NEWLINE ~ PEEK[..-1] ~ " "* ~ line ~ PEEK[..-1] ~ POP
- | line ~ PEEK[..] ~ adornments ~ NEWLINE
-}
+title = { title_double | title_single }
+title_double = { PUSH(adornments) ~ NEWLINE ~ PEEK[..-1] ~ " "* ~ line ~ PEEK[..-1] ~ POP }
+title_single = { line ~ PEEK[..] ~ adornments ~ NEWLINE }
// Bullet list. A block type.
bullet_list = { bullet_item ~ (PEEK[..] ~ bullet_item)* }
diff --git a/src/target.rs b/src/target.rs
index 9bfd216..81ff925 100644
--- a/src/target.rs
+++ b/src/target.rs
@@ -6,7 +6,7 @@ use url::{self,Url};
use serde_derive::Serialize;
-#[derive(Debug, Serialize)]
+#[derive(Debug,PartialEq,Serialize)]
#[serde(untagged)]
pub enum Target {
#[serde(serialize_with = "serialize_url")]