From a0e3c53758d526bb418c068bce1c99fa5a597ed3 Mon Sep 17 00:00:00 2001 From: Philipp A Date: Thu, 26 Dec 2019 23:01:00 +0100 Subject: Split into smaller crates --- .gitignore | 2 +- Cargo.toml | 49 +-- document_tree/Cargo.toml | 19 + document_tree/src/attribute_types.rs | 155 ++++++++ document_tree/src/element_categories.rs | 130 +++++++ document_tree/src/element_types.rs | 96 +++++ document_tree/src/elements.rs | 288 ++++++++++++++ document_tree/src/extra_attributes.rs | 120 ++++++ document_tree/src/lib.rs | 43 +++ document_tree/src/macro_util.rs | 42 ++ document_tree/src/url.rs | 78 ++++ parser/Cargo.toml | 18 + parser/src/conversion.rs | 96 +++++ parser/src/conversion/block.rs | 202 ++++++++++ parser/src/conversion/inline.rs | 160 ++++++++ parser/src/conversion/tests.rs | 65 ++++ parser/src/lib.rs | 28 ++ parser/src/pair_ext_parse.rs | 21 + parser/src/pest_rst.rs | 7 + parser/src/rst.pest | 474 +++++++++++++++++++++++ parser/src/simplify.rs | 662 ++++++++++++++++++++++++++++++++ parser/src/tests.rs | 242 ++++++++++++ parser/src/token.rs | 16 + renderer/Cargo.toml | 23 ++ renderer/src/html.rs | 393 +++++++++++++++++++ renderer/src/html/tests.rs | 275 +++++++++++++ renderer/src/lib.rs | 21 + rst/Cargo.toml | 19 + rst/src/main.rs | 47 +++ src/bin.rs | 55 --- src/document_tree.rs | 35 -- src/document_tree/attribute_types.rs | 155 -------- src/document_tree/element_categories.rs | 130 ------- src/document_tree/element_types.rs | 96 ----- src/document_tree/elements.rs | 288 -------------- src/document_tree/extra_attributes.rs | 112 ------ src/document_tree/macro_util.rs | 42 -- src/lib.rs | 6 - src/parser.rs | 28 -- src/parser/conversion.rs | 165 -------- src/parser/conversion/block.rs | 202 ---------- src/parser/conversion/inline.rs | 161 -------- src/parser/pair_ext_parse.rs | 21 - src/parser/pest_rst.rs | 7 - src/parser/simplify.rs | 662 -------------------------------- src/parser/tests.rs | 241 ------------ src/parser/token.rs | 16 - src/renderer.rs | 24 -- src/renderer/html.rs | 388 ------------------- src/renderer/html_tests.rs | 274 ------------- src/rst.pest | 474 ----------------------- src/url.rs | 78 ---- 52 files changed, 3748 insertions(+), 3703 deletions(-) create mode 100644 document_tree/Cargo.toml create mode 100644 document_tree/src/attribute_types.rs create mode 100644 document_tree/src/element_categories.rs create mode 100644 document_tree/src/element_types.rs create mode 100644 document_tree/src/elements.rs create mode 100644 document_tree/src/extra_attributes.rs create mode 100644 document_tree/src/lib.rs create mode 100644 document_tree/src/macro_util.rs create mode 100644 document_tree/src/url.rs create mode 100644 parser/Cargo.toml create mode 100644 parser/src/conversion.rs create mode 100644 parser/src/conversion/block.rs create mode 100644 parser/src/conversion/inline.rs create mode 100644 parser/src/conversion/tests.rs create mode 100644 parser/src/lib.rs create mode 100644 parser/src/pair_ext_parse.rs create mode 100644 parser/src/pest_rst.rs create mode 100644 parser/src/rst.pest create mode 100644 parser/src/simplify.rs create mode 100644 parser/src/tests.rs create mode 100644 parser/src/token.rs create mode 100644 renderer/Cargo.toml create mode 100644 renderer/src/html.rs create mode 100644 renderer/src/html/tests.rs create mode 100644 renderer/src/lib.rs create mode 100644 rst/Cargo.toml create mode 100644 rst/src/main.rs delete mode 100644 src/bin.rs delete mode 100644 src/document_tree.rs delete mode 100644 src/document_tree/attribute_types.rs delete mode 100644 src/document_tree/element_categories.rs delete mode 100644 src/document_tree/element_types.rs delete mode 100644 src/document_tree/elements.rs delete mode 100644 src/document_tree/extra_attributes.rs delete mode 100644 src/document_tree/macro_util.rs delete mode 100644 src/lib.rs delete mode 100644 src/parser.rs delete mode 100644 src/parser/conversion.rs delete mode 100644 src/parser/conversion/block.rs delete mode 100644 src/parser/conversion/inline.rs delete mode 100644 src/parser/pair_ext_parse.rs delete mode 100644 src/parser/pest_rst.rs delete mode 100644 src/parser/simplify.rs delete mode 100644 src/parser/tests.rs delete mode 100644 src/parser/token.rs delete mode 100644 src/renderer.rs delete mode 100644 src/renderer/html.rs delete mode 100644 src/renderer/html_tests.rs delete mode 100644 src/rst.pest delete mode 100644 src/url.rs diff --git a/.gitignore b/.gitignore index e9e2199..ca98cd9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ /target/ -/Cargo.lock +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml index 273a4b0..e06882d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,42 +1,7 @@ -[package] -name = 'rst' -version = '0.2.0' -authors = [ 'Phil Schaf ' ] - -description = 'a reStructuredText parser and renderer' -license = 'MIT OR Apache-2.0' - -documentation = 'https://flying-sheep.github.io/rust-rst' -homepage = 'https://github.com/flying-sheep/rust-rst' -repository = 'https://github.com/flying-sheep/rust-rst.git' - -edition = '2018' - -[lib] -name = 'rst' -path = 'src/lib.rs' - -[[bin]] -name = 'rst' -path = 'src/bin.rs' - -[dependencies] -failure = '0.1.5' -failure_derive = '0.1.5' -url = '1.7.2' -regex = '1.1.3' -bitflags = '1.0.4' -unicode_categories = '0.1.1' -pest = '2.1.0' -pest_derive = '2.1.0' -serde = '1.0.89' -serde_derive = '1.0.89' -serde_json = '1.0.39' -serde-xml-rs = '0.3.1' - -quicli = '0.4.0' -structopt = '0.2.15' -clap = '2.32.0' - -[dev-dependencies] -pretty_assertions = '0.6.1' +[workspace] +members = [ + 'document_tree', + 'parser', + 'renderer', + 'rst', +] diff --git a/document_tree/Cargo.toml b/document_tree/Cargo.toml new file mode 100644 index 0000000..09e827e --- /dev/null +++ b/document_tree/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = 'document_tree' +version = '0.2.0' +authors = ['Philipp A. '] +description = 'reStructuredText’s DocumenTree representation' +license = 'MIT OR Apache-2.0' + +documentation = 'https://flying-sheep.github.io/rust-rst' +homepage = 'https://github.com/flying-sheep/rust-rst' +repository = 'https://github.com/flying-sheep/rust-rst.git' + +edition = '2018' + +[dependencies] +failure = '0.1.6' +regex = '1.3.1' +url = '2.1.0' +serde = '1.0.104' +serde_derive = '1.0.104' diff --git a/document_tree/src/attribute_types.rs b/document_tree/src/attribute_types.rs new file mode 100644 index 0000000..411b24d --- /dev/null +++ b/document_tree/src/attribute_types.rs @@ -0,0 +1,155 @@ +use std::str::FromStr; + +use failure::{Error,bail,format_err}; +use serde_derive::Serialize; +use regex::Regex; + +use crate::url::Url; + +#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] +pub enum EnumeratedListType { + Arabic, + LowerAlpha, + UpperAlpha, + LowerRoman, + UpperRoman, +} + +#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] +pub enum FixedSpace { Default, Preserve } // yes, default really is not “Default” +impl Default for FixedSpace { fn default() -> FixedSpace { FixedSpace::Preserve } } + +#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub enum AlignH { Left, Center, Right} +#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub enum AlignHV { Top, Middle, Bottom, Left, Center, Right } +#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub enum AlignV { Top, Middle, Bottom } + +#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub enum TableAlignH { Left, Right, Center, Justify, Char } +#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub enum TableBorder { Top, Bottom, TopBottom, All, Sides, None } + +#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub struct ID(pub String); +#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub struct NameToken(pub String); + +// The table DTD has the cols attribute of tgroup as required, but having +// TableGroupCols not implement Default would leave no possible implementation +// for TableGroup::with_children. +#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub struct TableGroupCols(pub usize); +impl Default for TableGroupCols { + fn default() -> Self { + TableGroupCols(0) + } +} + +// no eq for f64 +#[derive(Debug,PartialEq,Serialize,Clone)] +pub enum Measure { // http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#length-units + Em(f64), + Ex(f64), + Mm(f64), + Cm(f64), + In(f64), + Px(f64), + Pt(f64), + Pc(f64), +} + +impl FromStr for AlignHV { + type Err = Error; + fn from_str(s: &str) -> Result { + use self::AlignHV::*; + Ok(match s { + "top" => Top, + "middle" => Middle, + "bottom" => Bottom, + "left" => Left, + "center" => Center, + "right" => Right, + s => bail!("Invalid Alignment {}", s), + }) + } +} + +impl From<&str> for ID { + fn from(s: &str) -> Self { + ID(s.to_owned().replace(' ', "-")) + } +} + +impl From<&str> for NameToken { + fn from(s: &str) -> Self { + NameToken(s.to_owned()) + } +} + +impl FromStr for Measure { + type Err = Error; + fn from_str(s: &str) -> Result { + use self::Measure::*; + let re = Regex::new(r"(?P\d+\.\d*|\.?\d+)\s*(?Pem|ex|mm|cm|in|px|pt|pc)").unwrap(); + let caps: regex::Captures = re.captures(s).ok_or_else(|| format_err!("Invalid measure"))?; + let value: f64 = caps["float"].parse()?; + Ok(match &caps["unit"] { + "em" => Em(value), + "ex" => Ex(value), + "mm" => Mm(value), + "cm" => Cm(value), + "in" => In(value), + "px" => Px(value), + "pt" => Pt(value), + "pc" => Pc(value), + _ => unreachable!(), + }) + } +} + +#[cfg(test)] +mod parse_tests { + use super::*; + + #[test] + fn measure() { + let _a: Measure = "1.5em".parse().unwrap(); + let _b: Measure = "20 mm".parse().unwrap(); + let _c: Measure = ".5in".parse().unwrap(); + let _d: Measure = "1.pc".parse().unwrap(); + } +} + +pub(crate) trait CanBeEmpty { + fn is_empty(&self) -> bool; +} + +/* Specialization necessary +impl CanBeEmpty for T { + fn is_empty(&self) -> bool { false } +} +*/ +macro_rules! impl_cannot_be_empty { + ($t:ty) => { + impl CanBeEmpty for $t { + fn is_empty(&self) -> bool { false } + } + }; + ($t:ty, $($ts:ty),*) => { + impl_cannot_be_empty!($t); + impl_cannot_be_empty!($($ts),*); + }; +} +impl_cannot_be_empty!(Url); +impl_cannot_be_empty!(TableGroupCols); + +impl CanBeEmpty for Option { + fn is_empty(&self) -> bool { self.is_none() } +} + +impl CanBeEmpty for Vec { + fn is_empty(&self) -> bool { self.is_empty() } +} + +impl CanBeEmpty for bool { + fn is_empty(&self) -> bool { !self } +} + +impl CanBeEmpty for FixedSpace { + fn is_empty(&self) -> bool { self == &FixedSpace::default() } +} + diff --git a/document_tree/src/element_categories.rs b/document_tree/src/element_categories.rs new file mode 100644 index 0000000..24a0798 --- /dev/null +++ b/document_tree/src/element_categories.rs @@ -0,0 +1,130 @@ +use std::fmt::{self,Debug,Formatter}; + +use serde_derive::Serialize; + +use crate::elements::*; + +pub trait HasChildren { + fn with_children(children: Vec) -> Self; + fn children(&self) -> &Vec; + fn children_mut(&mut self) -> &mut Vec; + fn append_child>(&mut self, child: R) { + self.children_mut().push(child.into()); + } + fn append_children + Clone>(&mut self, more: &[R]) { + let children = self.children_mut(); + children.reserve(more.len()); + for child in more { + children.push(child.clone().into()); + } + } +} + +macro_rules! impl_into { + ([ $( (($subcat:ident :: $entry:ident), $supcat:ident), )+ ]) => { + $( impl_into!($subcat::$entry => $supcat); )+ + }; + ($subcat:ident :: $entry:ident => $supcat:ident ) => { + impl Into<$supcat> for $entry { + fn into(self) -> $supcat { + $supcat::$subcat(Box::new(self.into())) + } + } + }; +} + +macro_rules! synonymous_enum { + ( $subcat:ident : $($supcat:ident),+ ; $midcat:ident : $supsupcat:ident { $($entry:ident),+ $(,)* } ) => { + synonymous_enum!($subcat : $( $supcat ),+ , $midcat { $($entry,)* }); + $( impl_into!($midcat::$entry => $supsupcat); )+ + }; + ( $subcat:ident : $($supcat:ident),+ { $($entry:ident),+ $(,)* } ) => { + synonymous_enum!($subcat { $( $entry, )* }); + cartesian!(impl_into, [ $( ($subcat::$entry) ),+ ], [ $($supcat),+ ]); + }; + ( $name:ident { $( $entry:ident ),+ $(,)* } ) => { + #[derive(PartialEq,Serialize,Clone)] + pub enum $name { $( + $entry(Box<$entry>), + )* } + + impl Debug for $name { + fn fmt(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> { + match *self { + $( $name::$entry(ref inner) => inner.fmt(fmt), )* + } + } + } + + $( impl Into<$name> for $entry { + fn into(self) -> $name { + $name::$entry(Box::new(self)) + } + } )* + }; +} + +synonymous_enum!(StructuralSubElement { Title, Subtitle, Decoration, Docinfo, SubStructure }); +synonymous_enum!(SubStructure: StructuralSubElement { Topic, Sidebar, Transition, Section, BodyElement }); +synonymous_enum!(BodyElement: SubTopic, SubSidebar, SubBlockQuote, SubFootnote, SubFigure; SubStructure: StructuralSubElement { + //Simple + Paragraph, LiteralBlock, DoctestBlock, MathBlock, Rubric, SubstitutionDefinition, Comment, Pending, Target, Raw, Image, + //Compound + Compound, Container, + BulletList, EnumeratedList, DefinitionList, FieldList, OptionList, + LineBlock, BlockQuote, Admonition, Attention, Hint, Note, Caution, Danger, Error, Important, Tip, Warning, Footnote, Citation, SystemMessage, Figure, Table +}); + +synonymous_enum!(BibliographicElement { Author, Authors, Organization, Address, Contact, Version, Revision, Status, Date, Copyright, Field }); + +synonymous_enum!(TextOrInlineElement { + String, Emphasis, Strong, Literal, Reference, FootnoteReference, CitationReference, SubstitutionReference, TitleReference, Abbreviation, Acronym, Superscript, Subscript, Inline, Problematic, Generated, Math, + //also have non-inline versions. Inline image is no figure child, inline target has content + TargetInline, RawInline, ImageInline +}); + +//--------------\\ +//Content Models\\ +//--------------\\ + +synonymous_enum!(AuthorInfo { Author, Organization, Address, Contact }); +synonymous_enum!(DecorationElement { Header, Footer }); +synonymous_enum!(SubTopic { Title, BodyElement }); +synonymous_enum!(SubSidebar { Topic, Title, Subtitle, BodyElement }); +synonymous_enum!(SubDLItem { Term, Classifier, Definition }); +synonymous_enum!(SubField { FieldName, FieldBody }); +synonymous_enum!(SubOptionListItem { OptionGroup, Description }); +synonymous_enum!(SubOption { OptionString, OptionArgument }); +synonymous_enum!(SubLineBlock { LineBlock, Line }); +synonymous_enum!(SubBlockQuote { Attribution, BodyElement }); +synonymous_enum!(SubFootnote { Label, BodyElement }); +synonymous_enum!(SubFigure { Caption, Legend, BodyElement }); +synonymous_enum!(SubTable { Title, TableGroup }); +synonymous_enum!(SubTableGroup { TableColspec, TableHead, TableBody }); + +#[cfg(test)] +mod conversion_tests { + use std::default::Default; + use super::*; + + #[test] + fn basic() { + let _: BodyElement = Paragraph::default().into(); + } + + #[test] + fn more() { + let _: SubStructure = Paragraph::default().into(); + } + + #[test] + fn even_more() { + let _: StructuralSubElement = Paragraph::default().into(); + } + + #[test] + fn super_() { + let be: BodyElement = Paragraph::default().into(); + let _: StructuralSubElement = be.into(); + } +} diff --git a/document_tree/src/element_types.rs b/document_tree/src/element_types.rs new file mode 100644 index 0000000..429573e --- /dev/null +++ b/document_tree/src/element_types.rs @@ -0,0 +1,96 @@ + +// enum ElementType { +// //structual elements +// Section, Topic, Sidebar, +// +// //structural subelements +// Title, Subtitle, Decoration, Docinfo, Transition, +// +// //bibliographic elements +// Author, Authors, Organization, +// Address { space: FixedSpace }, +// Contact, Version, Revision, Status, +// Date, Copyright, Field, +// +// //decoration elements +// Header, Footer, +// +// //simple body elements +// Paragraph, +// LiteralBlock { space: FixedSpace }, +// DoctestBlock { space: FixedSpace }, +// MathBlock, Rubric, +// SubstitutionDefinition { ltrim: bool, rtrim: bool }, +// Comment { space: FixedSpace }, +// Pending, +// Target { refuri: Url, refid: ID, refname: Vec, anonymous: bool }, +// Raw { space: FixedSpace, format: Vec }, +// Image { +// align: AlignHV, +// uri: Url, +// alt: String, +// height: Measure, +// width: Measure, +// scale: f64, +// }, +// +// //compound body elements +// Compound, Container, +// +// BulletList { bullet: String }, +// EnumeratedList { enumtype: EnumeratedListType, prefix: String, suffix: String }, +// DefinitionList, FieldList, OptionList, +// +// LineBlock, BlockQuote, +// Admonition, Attention, Hint, Note, +// Caution, Danger, Error, Important, +// Tip, Warning, +// Footnote { backrefs: Vec, auto: bool }, +// Citation { backrefs: Vec }, +// SystemMessage { backrefs: Vec, level: usize, line: usize, type_: NameToken }, +// Figure { align: AlignH, width: usize }, +// Table, //TODO: Table +// +// //body sub elements +// ListItem, +// +// DefinitionListItem, Term, +// Classifier, Definition, +// +// FieldName, FieldBody, +// +// OptionListItem, OptionGroup, Description, Option_, OptionString, +// OptionArgument { delimiter: String }, +// +// Line, Attribution, Label, +// +// Caption, Legend, +// +// //inline elements +// Emphasis, Strong, Literal, +// Reference { name: String, refuri: Url, refid: ID, refname: Vec }, +// FootnoteReference { refid: ID, refname: Vec, auto: bool }, +// CitationReference { refid: ID, refname: Vec }, +// SubstitutionReference { refname: Vec }, +// TitleReference, +// Abbreviation, Acronym, +// Superscript, Subscript, +// Inline, +// Problematic { refid: ID }, +// Generated, Math, +// +// //also have non-inline versions. Inline image is no figure child, inline target has content +// TargetInline { refuri: Url, refid: ID, refname: Vec, anonymous: bool }, +// RawInline { space: FixedSpace, format: Vec }, +// ImageInline { +// align: AlignHV, +// uri: Url, +// alt: String, +// height: Measure, +// width: Measure, +// scale: f64, +// }, +// +// //text element +// TextElement, +// } diff --git a/document_tree/src/elements.rs b/document_tree/src/elements.rs new file mode 100644 index 0000000..26bccf6 --- /dev/null +++ b/document_tree/src/elements.rs @@ -0,0 +1,288 @@ +use std::path::PathBuf; +use serde_derive::Serialize; + +use crate::attribute_types::{CanBeEmpty,ID,NameToken}; +use crate::extra_attributes::{self,ExtraAttributes}; +use crate::element_categories::*; + + +//-----------------\\ +//Element hierarchy\\ +//-----------------\\ + +pub trait Element { + /// A list containing one or more unique identifier keys + fn ids (& self) -> & Vec; + fn ids_mut(&mut self) -> &mut Vec; + /// a list containing the names of an element, typically originating from the element's title or content. + /// Each name in names must be unique; if there are name conflicts (two or more elements want to the same name), + /// the contents will be transferred to the dupnames attribute on the duplicate elements. + /// An element may have at most one of the names or dupnames attributes, but not both. + fn names (& self) -> & Vec; + fn names_mut(&mut self) -> &mut Vec; + fn source (& self) -> & Option; + fn source_mut(&mut self) -> &mut Option; + fn classes (& self) -> & Vec; + fn classes_mut(&mut self) -> &mut Vec; +} + +#[derive(Debug,Default,PartialEq,Serialize,Clone)] +pub struct CommonAttributes { + #[serde(skip_serializing_if = "CanBeEmpty::is_empty")] + ids: Vec, + #[serde(skip_serializing_if = "CanBeEmpty::is_empty")] + names: Vec, + #[serde(skip_serializing_if = "CanBeEmpty::is_empty")] + source: Option, + #[serde(skip_serializing_if = "CanBeEmpty::is_empty")] + classes: Vec, + //TODO: dupnames +} + +//----\\ +//impl\\ +//----\\ + +macro_rules! impl_element { ($name:ident) => ( + impl Element for $name { + fn ids (& self) -> & Vec { & self.common.ids } + fn ids_mut(&mut self) -> &mut Vec { &mut self.common.ids } + fn names (& self) -> & Vec { & self.common.names } + fn names_mut(&mut self) -> &mut Vec { &mut self.common.names } + fn source (& self) -> & Option { & self.common.source } + fn source_mut(&mut self) -> &mut Option { &mut self.common.source } + fn classes (& self) -> & Vec { & self.common.classes } + fn classes_mut(&mut self) -> &mut Vec { &mut self.common.classes } + } +)} + +macro_rules! impl_children { ($name:ident, $childtype:ident) => ( + impl HasChildren<$childtype> for $name { + #[allow(clippy::needless_update)] + fn with_children(children: Vec<$childtype>) -> $name { $name { children: children, ..Default::default() } } + fn children (& self) -> & Vec<$childtype> { & self.children } + fn children_mut(&mut self) -> &mut Vec<$childtype> { &mut self.children } + } +)} + +macro_rules! impl_extra { ($name:ident $($more:tt)*) => ( + impl ExtraAttributes for $name { + #[allow(clippy::needless_update)] + fn with_extra(extra: extra_attributes::$name) -> $name { $name { common: Default::default(), extra: extra $($more)* } } + fn extra (& self) -> & extra_attributes::$name { & self.extra } + fn extra_mut(&mut self) -> &mut extra_attributes::$name { &mut self.extra } + } +)} + +trait HasExtraAndChildren { + fn with_extra_and_children(extra: A, children: Vec) -> Self; +} + +impl HasExtraAndChildren for T where T: HasChildren + ExtraAttributes { + #[allow(clippy::needless_update)] + fn with_extra_and_children(extra: A, mut children: Vec) -> Self { + let mut r = Self::with_extra(extra); + r.children_mut().extend(children.drain(..)); + r + } +} + +macro_rules! impl_new {( + $(#[$attr:meta])* + pub struct $name:ident { $( + $(#[$fattr:meta])* + $field:ident : $typ:path + ),* $(,)* } +) => ( + $(#[$attr])* + #[derive(Debug,PartialEq,Serialize,Clone)] + pub struct $name { $( + $(#[$fattr])* $field: $typ, + )* } + impl $name { + pub fn new( $( $field: $typ, )* ) -> $name { $name { $( $field: $field, )* } } + } +)} + +macro_rules! impl_elem { + ($name:ident) => { + impl_new!(#[derive(Default)] pub struct $name { + #[serde(flatten)] common: CommonAttributes, + }); + impl_element!($name); + }; + ($name:ident; +) => { + impl_new!(#[derive(Default)] pub struct $name { + #[serde(flatten)] common: CommonAttributes, + #[serde(flatten)] extra: extra_attributes::$name, + }); + impl_element!($name); impl_extra!($name, ..Default::default()); + }; + ($name:ident; *) => { //same as above with no default + impl_new!(pub struct $name { + #[serde(flatten)] common: CommonAttributes, + #[serde(flatten)] extra: extra_attributes::$name + }); + impl_element!($name); impl_extra!($name); + }; + ($name:ident, $childtype:ident) => { + impl_new!(#[derive(Default)] pub struct $name { + #[serde(flatten)] common: CommonAttributes, + children: Vec<$childtype>, + }); + impl_element!($name); impl_children!($name, $childtype); + }; + ($name:ident, $childtype:ident; +) => { + impl_new!(#[derive(Default)] pub struct $name { + #[serde(flatten)] common: CommonAttributes, + #[serde(flatten)] extra: extra_attributes::$name, + children: Vec<$childtype>, + }); + impl_element!($name); impl_extra!($name, ..Default::default()); impl_children!($name, $childtype); + }; +} + +macro_rules! impl_elems { ( $( ($($args:tt)*) )* ) => ( + $( impl_elem!($($args)*); )* +)} + + +#[derive(Default,Debug,Serialize)] +pub struct Document { children: Vec } +impl_children!(Document, StructuralSubElement); + +impl_elems!( + //structual elements + (Section, StructuralSubElement) + (Topic, SubTopic) + (Sidebar, SubSidebar) + + //structural subelements + (Title, TextOrInlineElement) + (Subtitle, TextOrInlineElement) + (Decoration, DecorationElement) + (Docinfo, BibliographicElement) + (Transition) + + //bibliographic elements + (Author, TextOrInlineElement) + (Authors, AuthorInfo) + (Organization, TextOrInlineElement) + (Address, TextOrInlineElement; +) + (Contact, TextOrInlineElement) + (Version, TextOrInlineElement) + (Revision, TextOrInlineElement) + (Status, TextOrInlineElement) + (Date, TextOrInlineElement) + (Copyright, TextOrInlineElement) + (Field, SubField) + + //decoration elements + (Header, BodyElement) + (Footer, BodyElement) + + //simple body elements + (Paragraph, TextOrInlineElement) + (LiteralBlock, TextOrInlineElement; +) + (DoctestBlock, TextOrInlineElement; +) + (MathBlock, String) + (Rubric, TextOrInlineElement) + (SubstitutionDefinition, TextOrInlineElement; +) + (Comment, TextOrInlineElement; +) + (Pending) + (Target; +) + (Raw, String; +) + (Image; *) + + //compound body elements + (Compound, BodyElement) + (Container, BodyElement) + + (BulletList, ListItem; +) + (EnumeratedList, ListItem; +) + (DefinitionList, DefinitionListItem) + (FieldList, Field) + (OptionList, OptionListItem) + + (LineBlock, SubLineBlock) + (BlockQuote, SubBlockQuote) + (Admonition, SubTopic) + (Attention, BodyElement) + (Hint, BodyElement) + (Note, BodyElement) + (Caution, BodyElement) + (Danger, BodyElement) + (Error, BodyElement) + (Important, BodyElement) + (Tip, BodyElement) + (Warning, BodyElement) + (Footnote, SubFootnote; +) + (Citation, SubFootnote; +) + (SystemMessage, BodyElement; +) + (Figure, SubFigure; +) + (Table, SubTable; +) + + //table elements + (TableGroup, SubTableGroup; +) + (TableHead, TableRow; +) + (TableBody, TableRow; +) + (TableRow, TableEntry; +) + (TableEntry, BodyElement; +) + (TableColspec; +) + + //body sub elements + (ListItem, BodyElement) + + (DefinitionListItem, SubDLItem) + (Term, TextOrInlineElement) + (Classifier, TextOrInlineElement) + (Definition, BodyElement) + + (FieldName, TextOrInlineElement) + (FieldBody, BodyElement) + + (OptionListItem, SubOptionListItem) + (OptionGroup, Option_) + (Description, BodyElement) + (Option_, SubOption) + (OptionString, String) + (OptionArgument, String; +) + + (Line, TextOrInlineElement) + (Attribution, TextOrInlineElement) + (Label, TextOrInlineElement) + + (Caption, TextOrInlineElement) + (Legend, BodyElement) + + //inline elements + (Emphasis, TextOrInlineElement) + (Literal, TextOrInlineElement) + (Reference, TextOrInlineElement; +) + (Strong, TextOrInlineElement) + (FootnoteReference, TextOrInlineElement; +) + (CitationReference, TextOrInlineElement; +) + (SubstitutionReference, TextOrInlineElement; +) + (TitleReference, TextOrInlineElement) + (Abbreviation, TextOrInlineElement) + (Acronym, TextOrInlineElement) + (Superscript, TextOrInlineElement) + (Subscript, TextOrInlineElement) + (Inline, TextOrInlineElement) + (Problematic, TextOrInlineElement; +) + (Generated, TextOrInlineElement) + (Math, String) + + //also have non-inline versions. Inline image is no figure child, inline target has content + (TargetInline, String; +) + (RawInline, String; +) + (ImageInline; *) + + //text element = String +); + +impl<'a> From<&'a str> for TextOrInlineElement { + fn from(s: &'a str) -> Self { + s.to_owned().into() + } +} diff --git a/document_tree/src/extra_attributes.rs b/document_tree/src/extra_attributes.rs new file mode 100644 index 0000000..45fcf32 --- /dev/null +++ b/document_tree/src/extra_attributes.rs @@ -0,0 +1,120 @@ +use serde_derive::Serialize; + +use crate::url::Url; +use crate::attribute_types::{ + CanBeEmpty, + FixedSpace, + ID,NameToken, + AlignHV,AlignH,AlignV, + TableAlignH,TableBorder,TableGroupCols, + Measure, + EnumeratedListType, +}; + +pub trait ExtraAttributes { + fn with_extra(extra: A) -> Self; + fn extra (& self) -> & A; + fn extra_mut(&mut self) -> &mut A; +} + +macro_rules! impl_extra { + ( $name:ident { $( $(#[$pattr:meta])* $param:ident : $type:ty ),* $(,)* } ) => ( + impl_extra!( + #[derive(Default,Debug,PartialEq,Serialize,Clone)] + $name { $( $(#[$pattr])* $param : $type, )* } + ); + ); + ( $(#[$attr:meta])+ $name:ident { $( $(#[$pattr:meta])* $param:ident : $type:ty ),* $(,)* } ) => ( + $(#[$attr])+ + pub struct $name { $( + $(#[$pattr])* + #[serde(skip_serializing_if = "CanBeEmpty::is_empty")] + pub $param : $type, + )* } + ); +} + +impl_extra!(Address { space: FixedSpace }); +impl_extra!(LiteralBlock { space: FixedSpace }); +impl_extra!(DoctestBlock { space: FixedSpace }); +impl_extra!(SubstitutionDefinition { ltrim: bool, rtrim: bool }); +impl_extra!(Comment { space: FixedSpace }); +impl_extra!(Target { + /// External reference to a URI/URL + refuri: Option, + /// References to ids attributes in other elements + refid: Option, + /// Internal reference to the names attribute of another element. May resolve to either an internal or external reference. + refname: Vec, + anonymous: bool, +}); +impl_extra!(Raw { space: FixedSpace, format: Vec }); +impl_extra!(#[derive(Debug,PartialEq,Serialize,Clone)] Image { + uri: Url, + align: Option, + alt: Option, + height: Option, + width: Option, + scale: Option, + target: Option, // Not part of the DTD but a valid argument +}); + +//bools usually are XML yesorno. “auto” however either exists and is set to something random like “1” or doesn’t exist +//does auto actually mean the numbering prefix? + +impl_extra!(BulletList { bullet: Option }); +impl_extra!(EnumeratedList { enumtype: Option, prefix: Option, suffix: Option }); + +impl_extra!(Footnote { backrefs: Vec, auto: bool }); +impl_extra!(Citation { backrefs: Vec }); +impl_extra!(SystemMessage { backrefs: Vec, level: Option, line: Option, type_: Option }); +impl_extra!(Figure { align: Option, width: Option }); +impl_extra!(Table { frame: Option, colsep: Option, rowsep: Option, pgwide: Option }); + +impl_extra!(TableGroup { cols: TableGroupCols, colsep: Option, rowsep: Option, align: Option }); +impl_extra!(TableHead { valign: Option }); +impl_extra!(TableBody { valign: Option }); +impl_extra!(TableRow { rowsep: Option, valign: Option }); +impl_extra!(TableEntry { colname: Option, namest: Option, nameend: Option, morerows: Option, colsep: Option, rowsep: Option, align: Option, r#char: Option, charoff: Option, valign: Option, morecols: Option }); +impl_extra!(TableColspec { colnum: Option, colname: Option, colwidth: Option, colsep: Option, rowsep: Option, align: Option, r#char: Option, charoff: Option, stub: Option }); + +impl_extra!(OptionArgument { delimiter: Option }); + +impl_extra!(Reference { + name: Option, //TODO: is CDATA in the DTD, so maybe no nametoken? + /// External reference to a URI/URL + refuri: Option, + /// References to ids attributes in other elements + refid: Option, + /// Internal reference to the names attribute of another element + refname: Vec, +}); +impl_extra!(FootnoteReference { refid: Option, refname: Vec, auto: bool }); +impl_extra!(CitationReference { refid: Option, refname: Vec }); +impl_extra!(SubstitutionReference { refname: Vec }); +impl_extra!(Problematic { refid: Option }); + +//also have non-inline versions. Inline image is no figure child, inline target has content +impl_extra!(TargetInline { + /// External reference to a URI/URL + refuri: Option, + /// References to ids attributes in other elements + refid: Option, + /// Internal reference to the names attribute of another element. May resolve to either an internal or external reference. + refname: Vec, + anonymous: bool, +}); +impl_extra!(RawInline { space: FixedSpace, format: Vec }); +pub type ImageInline = Image; + +impl Image { + pub fn new(uri: Url) -> Image { Image { + uri, + align: None, + alt: None, + height: None, + width: None, + scale: None, + target: None, + } } +} diff --git a/document_tree/src/lib.rs b/document_tree/src/lib.rs new file mode 100644 index 0000000..324fc44 --- /dev/null +++ b/document_tree/src/lib.rs @@ -0,0 +1,43 @@ +#![recursion_limit="256"] + +///http://docutils.sourceforge.net/docs/ref/doctree.html +///serves as AST + +#[macro_use] +mod macro_util; + +pub mod url; +pub mod elements; +pub mod element_categories; +pub mod extra_attributes; +pub mod attribute_types; + +pub use self::elements::*; //Element,CommonAttributes,HasExtraAndChildren +pub use self::extra_attributes::ExtraAttributes; +pub use self::element_categories::HasChildren; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn imperative() { + let mut doc = Document::default(); + let mut title = Title::default(); + title.append_child("Hi"); + doc.append_child(title); + + println!("{:?}", doc); + } + + #[test] + fn descriptive() { + let doc = Document::with_children(vec![ + Title::with_children(vec![ + "Hi".into() + ]).into() + ]); + + println!("{:?}", doc); + } +} diff --git a/document_tree/src/macro_util.rs b/document_tree/src/macro_util.rs new file mode 100644 index 0000000..dcf3725 --- /dev/null +++ b/document_tree/src/macro_util.rs @@ -0,0 +1,42 @@ +macro_rules! cartesian_impl { + ($out:tt [] $b:tt $init_b:tt $submacro:tt) => { + $submacro!{$out} + }; + ($out:tt [$a:tt, $($at:tt)*] [] $init_b:tt $submacro:tt) => { + cartesian_impl!{$out [$($at)*] $init_b $init_b $submacro} + }; + ([$($out:tt)*] [$a:tt, $($at:tt)*] [$b:tt, $($bt:tt)*] $init_b:tt $submacro:tt) => { + cartesian_impl!{[$($out)* ($a, $b),] [$a, $($at)*] [$($bt)*] $init_b $submacro} + }; +} + +macro_rules! cartesian { + ( $submacro:tt, [$($a:tt)*], [$($b:tt)*]) => { + cartesian_impl!{[] [$($a)*,] [$($b)*,] [$($b)*,] $submacro} + }; +} + + +#[cfg(test)] +mod tests { + macro_rules! print_cartesian { + ( [ $(($a1:tt, $a2:tt)),* , ] ) => { + fn test_f(x:i64, y:i64) -> Result<(i64, i64), ()> { + match (x, y) { + $( + ($a1, $a2) => { Ok(($a1, $a2)) } + )* + _ => { Err(()) } + } + } + }; + } + + #[test] + fn print_cartesian() { + cartesian!(print_cartesian, [1, 2, 3], [4, 5, 6]); + assert_eq!(test_f(1, 4), Ok((1, 4))); + assert_eq!(test_f(1, 3), Err(())); + assert_eq!(test_f(3, 5), Ok((3, 5))); + } +} diff --git a/document_tree/src/url.rs b/document_tree/src/url.rs new file mode 100644 index 0000000..31a0536 --- /dev/null +++ b/document_tree/src/url.rs @@ -0,0 +1,78 @@ +use std::fmt; +use std::str::FromStr; + +use url::{self,ParseError}; +use serde_derive::Serialize; + + +fn starts_with_scheme(input: &str) -> bool { + let scheme = input.split(':').next().unwrap(); + if scheme == input || scheme.is_empty() { + return false; + } + let mut chars = input.chars(); + // First character. + if !chars.next().unwrap().is_ascii_alphabetic() { + return false; + } + for ch in chars { + if !ch.is_ascii_alphanumeric() && ch != '+' && ch != '-' && ch != '.' { + return false; + } + } + true +} + +/// The string representation of a URL, either absolute or relative, that has +/// been verified as a valid URL on construction. +#[derive(Debug,PartialEq,Serialize,Clone)] +#[serde(transparent)] +pub struct Url(String); + +impl Url { + pub fn parse_absolute(input: &str) -> Result { + Ok(url::Url::parse(input)?.into()) + } + pub fn parse_relative(input: &str) -> Result { + // We're assuming that any scheme through which RsT documents are being + // accessed is a hierarchical scheme, and so we can parse relative to a + // random hierarchical URL. + if input.starts_with('/') || !starts_with_scheme(input) { + // Continue only if the parse succeeded, disregarding its result. + let random_base_url = url::Url::parse("https://a/b").unwrap(); + url::Url::options() + .base_url(Some(&random_base_url)) + .parse(input)?; + Ok(Url(input.into())) + } else { + // If this is a URL at all, it's an absolute one. + // There's no appropriate variant of url::ParseError really. + Err(ParseError::SetHostOnCannotBeABaseUrl) + } + } + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl From for Url { + fn from(url: url::Url) -> Self { + Url(url.into_string()) + } +} + + +impl fmt::Display for Url { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} + + +impl FromStr for Url { + type Err = ParseError; + fn from_str(input: &str) -> Result { + Url::parse_absolute(input) + .or_else(|_| Url::parse_relative(input)) + } +} diff --git a/parser/Cargo.toml b/parser/Cargo.toml new file mode 100644 index 0000000..22f2490 --- /dev/null +++ b/parser/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = 'rst_parser' +version = '0.2.0' +authors = ['Philipp A. '] +edition = '2018' +description = 'a reStructuredText parser' +license = 'MIT OR Apache-2.0' + +documentation = 'https://flying-sheep.github.io/rust-rst' +homepage = 'https://github.com/flying-sheep/rust-rst' +repository = 'https://github.com/flying-sheep/rust-rst.git' + +[dependencies] +document_tree = { path = '../document_tree' } + +pest = '2.1.2' +pest_derive = '2.1.0' +failure = '0.1.6' diff --git a/parser/src/conversion.rs b/parser/src/conversion.rs new file mode 100644 index 0000000..de5f091 --- /dev/null +++ b/parser/src/conversion.rs @@ -0,0 +1,96 @@ +mod block; +mod inline; +#[cfg(test)] +mod tests; + +use failure::Error; +use pest::iterators::Pairs; + +use document_tree::{ + Element,HasChildren, + elements as e, + element_categories as c, + attribute_types as at, +}; + +use crate::pest_rst::Rule; + + +fn ssubel_to_section_unchecked_mut(ssubel: &mut c::StructuralSubElement) -> &mut e::Section { + match ssubel { + c::StructuralSubElement::SubStructure(ref mut b) => match **b { + c::SubStructure::Section(ref mut s) => s, + _ => unreachable!(), + }, + _ => unreachable!(), + } +} + + +fn get_level<'tl>(toplevel: &'tl mut Vec, section_idxs: &[Option]) -> &'tl mut Vec { + let mut level = toplevel; + for maybe_i in section_idxs { + if let Some(i) = *maybe_i { + level = ssubel_to_section_unchecked_mut(&mut level[i]).children_mut(); + } + } + level +} + + +pub fn convert_document(pairs: Pairs) -> Result { + use self::block::TitleOrSsubel::*; + + let mut toplevel: Vec = vec![]; + // The kinds of section titles encountered. + // `section_idx[x]` has the kind `kinds[x]`, but `kinds` can be longer + let mut kinds: Vec = vec![]; + // Recursive indices into the tree, pointing at the active sections. + // `None`s indicate skipped section levels: + // toplevel[section_idxs.flatten()[0]].children[section_idxs.flatten()[1]]... + let mut section_idxs: Vec> = vec![]; + + for pair in pairs { + if let Some(ssubel) = block::convert_ssubel(pair)? { match ssubel { + Title(title, kind) => { + match kinds.iter().position(|k| k == &kind) { + // Idx points to the level we want to add, + // so idx-1 needs to be the last valid index. + Some(idx) => { + // If idx < len: Remove found section and all below + section_idxs.truncate(idx); + // If idx > len: Add None for skipped levels + // TODO: test skipped levels + while section_idxs.len() < idx { section_idxs.push(None) } + }, + None => kinds.push(kind), + } + let super_level = get_level(&mut toplevel, §ion_idxs); + let slug = title.names().iter().next().map(|at::NameToken(name)| at::ID(name.to_owned())); + let mut section = e::Section::with_children(vec![title.into()]); + section.ids_mut().extend(slug.into_iter()); + super_level.push(section.into()); + section_idxs.push(Some(super_level.len() - 1)); + }, + Ssubel(elem) => get_level(&mut toplevel, §ion_idxs).push(elem), + }} + } + Ok(e::Document::with_children(toplevel)) +} + +/// Normalizes a name in terms of whitespace. Equivalent to docutils's +/// `docutils.nodes.whitespace_normalize_name`. +pub fn whitespace_normalize_name(name: &str) -> String { + // Python's string.split() defines whitespace differently than Rust does. + let split_iter = name.split( + |ch: char| ch.is_whitespace() || (ch >= '\x1C' && ch <= '\x1F') + ).filter(|split| !split.is_empty()); + let mut ret = String::new(); + for split in split_iter { + if !ret.is_empty() { + ret.push(' '); + } + ret.push_str(split); + } + ret +} diff --git a/parser/src/conversion/block.rs b/parser/src/conversion/block.rs new file mode 100644 index 0000000..ab18c48 --- /dev/null +++ b/parser/src/conversion/block.rs @@ -0,0 +1,202 @@ +use failure::{Error,bail}; +use pest::iterators::Pair; + +use document_tree::{ + Element,HasChildren,ExtraAttributes, + elements as e, + element_categories as c, + extra_attributes as a, + attribute_types as at +}; + +use crate::{ + pest_rst::Rule, + pair_ext_parse::PairExt, +}; +use super::{whitespace_normalize_name, inline::convert_inlines}; + + +#[derive(PartialEq)] +pub(super) enum TitleKind { Double(char), Single(char) } + +pub(super) enum TitleOrSsubel { + Title(e::Title, TitleKind), + Ssubel(c::StructuralSubElement), +} + + +pub(super) fn convert_ssubel(pair: Pair) -> Result, Error> { + use self::TitleOrSsubel::*; + Ok(Some(match pair.as_rule() { + Rule::title => { let (t, k) = convert_title(pair)?; Title(t, k) }, + //TODO: subtitle, decoration, docinfo + Rule::EOI => return Ok(None), + _ => Ssubel(convert_substructure(pair)?.into()), + })) +} + + +fn convert_substructure(pair: Pair) -> Result { + Ok(match pair.as_rule() { + // todo: Topic, Sidebar, Transition + // no section here, as it’s constructed from titles + _ => convert_body_elem(pair)?.into(), + }) +} + + +fn convert_body_elem(pair: Pair) -> Result { + Ok(match pair.as_rule() { + Rule::paragraph => convert_paragraph(pair)?.into(), + Rule::target => convert_target(pair)?.into(), + Rule::substitution_def => convert_substitution_def(pair)?.into(), + Rule::admonition_gen => convert_admonition_gen(pair)?.into(), + Rule::image => convert_image::(pair)?.into(), + Rule::bullet_list => convert_bullet_list(pair)?.into(), + rule => unimplemented!("unhandled rule {:?}", rule), + }) +} + + +fn convert_title(pair: Pair) -> Result<(e::Title, TitleKind), Error> { + let mut title: Option = None; + let mut title_inlines: Option> = None; + let mut adornment_char: Option = None; + // title_double or title_single. Extract kind before consuming + let inner_pair = pair.into_inner().next().unwrap(); + let kind = inner_pair.as_rule(); + for p in inner_pair.into_inner() { + match p.as_rule() { + Rule::line => { + title = Some(p.as_str().to_owned()); + title_inlines = Some(convert_inlines(p)?); + }, + Rule::adornments => adornment_char = Some(p.as_str().chars().next().expect("Empty adornment?")), + rule => unimplemented!("Unexpected rule in title: {:?}", rule), + }; + } + // now we encountered one line of text and one of adornments + // TODO: emit error if the adornment line is too short (has to match title length) + let mut elem = e::Title::with_children(title_inlines.expect("No text in title")); + if let Some(title) = title { + //TODO: slugify properly + let slug = title.to_lowercase().replace("\n", "").replace(" ", "-"); + elem.names_mut().push(at::NameToken(slug)); + } + let title_kind = match kind { + Rule::title_double => TitleKind::Double(adornment_char.unwrap()), + Rule::title_single => TitleKind::Single(adornment_char.unwrap()), + _ => unreachable!(), + }; + Ok((elem, title_kind)) +} + + +fn convert_paragraph(pair: Pair) -> Result { + Ok(e::Paragraph::with_children(convert_inlines(pair)?)) +} + + +fn convert_target(pair: Pair) -> Result { + let mut elem: e::Target = Default::default(); + elem.extra_mut().anonymous = false; + for p in pair.into_inner() { + match p.as_rule() { + Rule::target_name_uq | Rule::target_name_qu => { + elem.ids_mut().push(p.as_str().into()); + elem.names_mut().push(p.as_str().into()); + }, + // TODO: also handle non-urls + Rule::link_target => elem.extra_mut().refuri = Some(p.parse()?), + rule => panic!("Unexpected rule in target: {:?}", rule), + } + } + Ok(elem) +} + +fn convert_substitution_def(pair: Pair) -> Result { + let mut pairs = pair.into_inner(); + let name = whitespace_normalize_name(pairs.next().unwrap().as_str()); // Rule::substitution_name + let inner_pair = pairs.next().unwrap(); + let inner: Vec = match inner_pair.as_rule() { + Rule::replace => convert_replace(inner_pair)?, + Rule::image => vec![convert_image::(inner_pair)?.into()], + rule => panic!("Unknown substitution rule {:?}", rule), + }; + let mut subst_def = e::SubstitutionDefinition::with_children(inner); + subst_def.names_mut().push(at::NameToken(name)); + Ok(subst_def) +} + +fn convert_replace(pair: Pair) -> Result, Error> { + let mut pairs = pair.into_inner(); + let paragraph = pairs.next().unwrap(); + convert_inlines(paragraph) +} + +fn convert_image(pair: Pair) -> Result where I: Element + ExtraAttributes { + let mut pairs = pair.into_inner(); + let mut image = I::with_extra(a::Image::new( + pairs.next().unwrap().as_str().trim().parse()?, // line + )); + for opt in pairs { + let mut opt_iter = opt.into_inner(); + let opt_name = opt_iter.next().unwrap(); + let opt_val = opt_iter.next().unwrap(); + match opt_name.as_str() { + "class" => image.classes_mut().push(opt_val.as_str().to_owned()), + "name" => image.names_mut().push(opt_val.as_str().into()), + "alt" => image.extra_mut().alt = Some(opt_val.as_str().to_owned()), + "height" => image.extra_mut().height = Some(opt_val.parse()?), + "width" => image.extra_mut().width = Some(opt_val.parse()?), + "scale" => image.extra_mut().scale = Some(parse_scale(&opt_val)?), + "align" => image.extra_mut().align = Some(opt_val.parse()?), + "target" => image.extra_mut().target = Some(opt_val.parse()?), + name => bail!("Unknown Image option {}", name), + } + } + Ok(image) +} + +fn parse_scale(pair: &Pair) -> Result { + let input = if pair.as_str().chars().rev().next() == Some('%') { &pair.as_str()[..pair.as_str().len()-1] } else { pair.as_str() }; + use pest::error::{Error,ErrorVariant}; + Ok(input.parse().map_err(|e: std::num::ParseIntError| { + let var: ErrorVariant = ErrorVariant::CustomError { message: e.to_string() }; + Error::new_from_span(var, pair.as_span()) + })?) +} + +fn convert_admonition_gen(pair: Pair) -> Result { + let mut iter = pair.into_inner(); + let typ = iter.next().unwrap().as_str(); + // TODO: in reality it contains body elements. + let children: Vec = iter.map(|p| e::Paragraph::with_children(vec![p.as_str().into()]).into()).collect(); + Ok(match typ { + "attention" => e::Attention::with_children(children).into(), + "hint" => e::Hint::with_children(children).into(), + "note" => e::Note::with_children(children).into(), + "caution" => e::Caution::with_children(children).into(), + "danger" => e::Danger::with_children(children).into(), + "error" => e::Error::with_children(children).into(), + "important" => e::Important::with_children(children).into(), + "tip" => e::Tip::with_children(children).into(), + "warning" => e::Warning::with_children(children).into(), + typ => panic!("Unknown admontion type {}!", typ), + }) +} + +fn convert_bullet_list(pair: Pair) -> Result { + Ok(e::BulletList::with_children(pair.into_inner().map(convert_bullet_item).collect::>()?)) +} + +fn convert_bullet_item(pair: Pair) -> Result { + let mut iter = pair.into_inner(); + let mut children: Vec = vec![ + convert_paragraph(iter.next().unwrap())?.into() + ]; + for p in iter { + children.push(convert_body_elem(p)?); + } + Ok(e::ListItem::with_children(children)) +} diff --git a/parser/src/conversion/inline.rs b/parser/src/conversion/inline.rs new file mode 100644 index 0000000..6094714 --- /dev/null +++ b/parser/src/conversion/inline.rs @@ -0,0 +1,160 @@ +use failure::Error; +use pest::iterators::Pair; + +use document_tree::{ + HasChildren, + elements as e, + url::Url, + element_categories as c, + extra_attributes as a, + attribute_types as at, +}; + +use crate::{ + pest_rst::Rule, +// pair_ext_parse::PairExt, +}; +use super::whitespace_normalize_name; + + +pub fn convert_inline(pair: Pair) -> Result { + Ok(match pair.as_rule() { + Rule::str | Rule::str_nested => pair.as_str().into(), + Rule::ws_newline => " ".to_owned().into(), + Rule::reference => convert_reference(pair)?, + Rule::substitution_name => convert_substitution_ref(pair)?.into(), + Rule::emph => e::Emphasis::with_children(convert_inlines(pair)?).into(), + Rule::strong => e::Strong::with_children(convert_inlines(pair)?).into(), + Rule::literal => e::Literal::with_children(convert_inlines(pair)?).into(), + rule => unimplemented!("unknown rule {:?}", rule), + }) +} + +pub fn convert_inlines(pair: Pair) -> Result, Error> { + pair.into_inner().map(convert_inline).collect() +} + +fn convert_reference(pair: Pair) -> Result { + let name; + let refuri; + let refid; + let mut refname = vec![]; + let mut children: Vec = vec![]; + let concrete = pair.into_inner().next().unwrap(); + match concrete.as_rule() { + Rule::reference_target => { + let rt_inner = concrete.into_inner().next().unwrap(); // reference_target_uq or target_name_qu + match rt_inner.as_rule() { + Rule::reference_target_uq => { + refid = None; + name = Some(rt_inner.as_str().into()); + refuri = None; + refname.push(rt_inner.as_str().into()); + children.push(rt_inner.as_str().into()); + }, + Rule::reference_target_qu => { + let (text, reference) = { + let mut text = None; + let mut reference = None; + for inner in rt_inner.clone().into_inner() { + match inner.as_rule() { + Rule::reference_text => text = Some(inner), + Rule::reference_bracketed => reference = Some(inner), + _ => unreachable!() + } + } + (text, reference) + }; + let trimmed_text = match (&text, &reference) { + (Some(text), None) => text.as_str(), + (_, Some(reference)) => { + text + .map(|text| text.as_str().trim_end_matches(|ch| " \n\r".contains(ch))) + .filter(|text| !text.is_empty()) + .unwrap_or_else(|| reference.clone().into_inner().next().unwrap().as_str()) + } + (None, None) => unreachable!() + }; + refid = None; + name = Some(trimmed_text.into()); + refuri = if let Some(reference) = reference { + let inner = reference.into_inner().next().unwrap(); + match inner.as_rule() { + // The URL rules in our parser accept a narrow superset of + // valid URLs, so we need to handle false positives. + Rule::url => if let Ok(target) = Url::parse_absolute(inner.as_str()) { + Some(target) + } else if inner.as_str().ends_with('_') { + // like target_name_qu (minus the final underscore) + let full_str = inner.as_str(); + refname.push(full_str[0..full_str.len() - 1].into()); + None + } else { + // like relative_reference + Some(Url::parse_relative(inner.as_str())?) + }, + Rule::target_name_qu => { + refname.push(inner.as_str().into()); + None + }, + Rule::relative_reference => { + Some(Url::parse_relative(inner.as_str())?) + }, + _ => unreachable!() + } + } else { + refname.push(trimmed_text.into()); + None + }; + children.push(trimmed_text.into()); + }, + _ => unreachable!() + } + }, + Rule::reference_explicit => unimplemented!("explicit reference"), + Rule::reference_auto => { + let rt_inner = concrete.into_inner().next().unwrap(); + match rt_inner.as_rule() { + Rule::url_auto => match Url::parse_absolute(rt_inner.as_str()) { + Ok(target) => { + refuri = Some(target); + name = None; + refid = None; + children.push(rt_inner.as_str().into()); + }, + // if our parser got a URL wrong, return it as a string + Err(_) => return Ok(rt_inner.as_str().into()) + }, + Rule::email => { + let mailto_url = String::from("mailto:") + rt_inner.as_str(); + match Url::parse_absolute(&mailto_url) { + Ok(target) => { + refuri = Some(target); + name = None; + refid = None; + children.push(rt_inner.as_str().into()); + }, + // if our parser got a URL wrong, return it as a string + Err(_) => return Ok(rt_inner.as_str().into()) + } + }, + _ => unreachable!() + } + }, + _ => unreachable!(), + }; + Ok(e::Reference::new( + Default::default(), + a::Reference { name, refuri, refid, refname }, + children + ).into()) +} + +fn convert_substitution_ref(pair: Pair) -> Result { + let name = whitespace_normalize_name(pair.as_str()); + Ok(a::ExtraAttributes::with_extra( + a::SubstitutionReference { + refname: vec![at::NameToken(name)] + } + )) +} diff --git a/parser/src/conversion/tests.rs b/parser/src/conversion/tests.rs new file mode 100644 index 0000000..89b0a1c --- /dev/null +++ b/parser/src/conversion/tests.rs @@ -0,0 +1,65 @@ +use document_tree::{ + elements as e, + element_categories as c, + HasChildren, +}; + +use crate::parse; + +fn ssubel_to_section(ssubel: &c::StructuralSubElement) -> &e::Section { + match ssubel { + c::StructuralSubElement::SubStructure(ref b) => match **b { + c::SubStructure::Section(ref s) => s, + ref c => panic!("Expected section, not {:?}", c), + }, + ref c => panic!("Expected SubStructure, not {:?}", c), + } +} + +const SECTIONS: &str = "\ +Intro before first section title + +Level 1 +******* + +------- +Level 2 +------- + +Level 3 +======= + +L1 again +******** + +L3 again, skipping L2 +===================== +"; + +#[test] +fn convert_skipped_section() { + let doctree = parse(SECTIONS).unwrap(); + let lvl0 = doctree.children(); + assert_eq!(lvl0.len(), 3, "Should be a paragraph and 2 sections: {:?}", lvl0); + + assert_eq!(lvl0[0], e::Paragraph::with_children(vec![ + "Intro before first section title".to_owned().into() + ]).into(), "The intro text should fit"); + + let lvl1a = ssubel_to_section(&lvl0[1]).children(); + assert_eq!(lvl1a.len(), 2, "The 1st lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1a); + //TODO: test title lvl1a[0] + let lvl2 = ssubel_to_section(&lvl1a[1]).children(); + assert_eq!(lvl2.len(), 2, "The lvl2 section should have (a title and) a single lvl3 section as child: {:?}", lvl2); + //TODO: test title lvl2[0] + let lvl3a = ssubel_to_section(&lvl2[1]).children(); + assert_eq!(lvl3a.len(), 1, "The 1st lvl3 section should just a title: {:?}", lvl3a); + //TODO: test title lvl3a[0] + + let lvl1b = ssubel_to_section(&lvl0[2]).children(); + assert_eq!(lvl1b.len(), 2, "The 2nd lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1b); + //TODO: test title lvl1b[0] + let lvl3b = ssubel_to_section(&lvl1b[1]).children(); + assert_eq!(lvl3b.len(), 1, "The 2nd lvl3 section should have just a title: {:?}", lvl3b); + //TODO: test title lvl3b[0] +} diff --git a/parser/src/lib.rs b/parser/src/lib.rs new file mode 100644 index 0000000..23e97c7 --- /dev/null +++ b/parser/src/lib.rs @@ -0,0 +1,28 @@ +pub mod token; +mod conversion; +mod simplify; +mod pest_rst; +mod pair_ext_parse; +#[cfg(test)] +pub mod tests; + +use failure::Error; +use pest::Parser; + +use document_tree::Document; + +use self::pest_rst::{RstParser,Rule}; +use self::conversion::convert_document; +use self::simplify::resolve_references; + + +/// Parse into a document tree and resolve sections, but not references. +pub fn parse_only(source: &str) -> Result { + let pairs = RstParser::parse(Rule::document, source)?; + convert_document(pairs) +} + +/// Parse into a document tree and resolve sections and references. +pub fn parse(source: &str) -> Result { + parse_only(source).map(resolve_references) +} diff --git a/parser/src/pair_ext_parse.rs b/parser/src/pair_ext_parse.rs new file mode 100644 index 0000000..a04b3dd --- /dev/null +++ b/parser/src/pair_ext_parse.rs @@ -0,0 +1,21 @@ +use std::str::FromStr; + +use pest::Span; +use pest::iterators::Pair; +use pest::error::{Error,ErrorVariant}; + + +pub trait PairExt where R: pest::RuleType { + fn parse(&self) -> Result> where T: FromStr, E: ToString; +} + +impl<'l, R> PairExt for Pair<'l, R> where R: pest::RuleType { + fn parse(&self) -> Result> where T: FromStr, E: ToString { + self.as_str().parse().map_err(|e| to_parse_error(self.as_span(), &e)) + } +} + +pub(crate) fn to_parse_error(span: Span, e: &E) -> Error where E: ToString, R: pest::RuleType { + let var: ErrorVariant = ErrorVariant::CustomError { message: e.to_string() }; + Error::new_from_span(var, span) +} diff --git a/parser/src/pest_rst.rs b/parser/src/pest_rst.rs new file mode 100644 index 0000000..74199a8 --- /dev/null +++ b/parser/src/pest_rst.rs @@ -0,0 +1,7 @@ +#![allow(clippy::redundant_closure)] + +use pest_derive::Parser; + +#[derive(Parser)] +#[grammar = "rst.pest"] +pub struct RstParser; diff --git a/parser/src/rst.pest b/parser/src/rst.pest new file mode 100644 index 0000000..f3a1516 --- /dev/null +++ b/parser/src/rst.pest @@ -0,0 +1,474 @@ +// Entry point: the document. + +// This grammar is aligned to the doctree names when possible. +// It will however contain blocks, as we can’t parse sections: +// Section headers define the hierarchy by their delimiters, +// and pest only has one stack that we need for indentation. + +document = _{ SOI ~ blocks ~ EOI } +blocks = _{ block ~ (blank_line* ~ block)* ~ blank_line? } +block = _{ PEEK[..] ~ hanging_block } + +// This is the list of all block-level elements +// They’re defined hanging, i.e. without the first PEEK[..] +// This is d +hanging_block = _{ + substitution_def + | image_directive + | admonition + | admonition_gen + | target + | title + | bullet_list + | paragraph +// TODO: implement all those things: +// | block_quote +// | verbatim +// | image ✓ +// | code_block +// | doctest_block +// | admonition ✓ +// | target ✓ +// | horizontal_rule +// | title ✓ +// | table +// | ordered_list +// | bullet_list ✓ +// | paragraph ✓ +// | plain +} + +// Substitution definition. A block type +substitution_def = { ".." ~ PUSH(" "+) ~ "|" ~ substitution_name ~ "|" ~ " "+ ~ inline_dirblock ~ DROP } +substitution_name = { !" " ~ (!(" "|"|") ~ ANY)+ ~ (" "+ ~ (!(" "|"|") ~ ANY)+)* } +inline_dirblock = _{ replace | image } // TODO: implement others + +// Target. A block type +target = { target_qu | target_uq } +target_uq = _{ ".. _" ~ target_name_uq ~ ":" ~ (" " ~ link_target)? ~ " "* ~ NEWLINE } +target_qu = _{ ".. _`" ~ !"``" ~ target_name_qu ~ !"``:" ~ "`:" ~ (" " ~ link_target)? ~ " "* ~ NEWLINE } +target_name_uq = { ( !("_"|":"|"`") ~ !NEWLINE ~ ANY )* } +target_name_qu = { ( !(":"|"`"|"_>") ~ ANY )* } +link_target = { nonspacechar+ } + +// Title. A block type +title = { title_double | title_single } +title_double = { PUSH(adornments) ~ NEWLINE ~ PEEK[..-1] ~ " "* ~ line ~ PEEK[..-1] ~ POP } +title_single = { line ~ PEEK[..] ~ adornments ~ NEWLINE } + +// Bullet list. A block type. +bullet_list = { bullet_item ~ (PEEK[..] ~ bullet_item)* } +bullet_item = { bullet_marker ~ PUSH(" "+) ~ line ~ blank_line* ~ blist_body? ~ DROP } +blist_body = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ hanging_block ~ block* } + +// paragraph. A block type. +paragraph = { inlines } + + +/* Directives: http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#directives + * .. name:: arguments ~ :options: ~ blank_line+ ~ content + * Everything except for the first argument has to be indented + */ + + +// Directives with options can have these or specific ones: +common_opt_name = { "class" | "name" } + +// Replace. A directive only usable in substitutions. + +replace = { ^"replace::" ~ " "* ~ paragraph } + +// Image. A directive. + +image_directive = _{ ".." ~ PUSH(" "+) ~ image ~ DROP } +image = { ^"image::" ~ line ~ image_opt_block? } +image_opt_block = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ image_option } //TODO: merge with other directives? +image_option = { ":" ~ image_opt_name ~ ":" ~ line } +image_opt_name = { common_opt_name | "alt" | "height" | "width" | "scale" | "align" | "target" } + +// Admonition. A directive. The generic one has a title + +admonition = { ".." ~ PUSH(" "+) ~ ^"admonition::" ~ line ~ blank_line* ~ admonition_content? ~ DROP } +admonition_gen = { ".." ~ PUSH(" "+) ~ admonition_type ~ "::" ~ (blank_line | line) ~ blank_line* ~ admonition_content? ~ DROP } +admonition_type = { ^"attention" | ^"caution" | ^"danger" | ^"error" | ^"hint" | ^"important" | ^"note" | ^"tip" | ^"warning" } +admonition_content = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ hanging_block ~ block* } //TODO: merge with other directives? + + + +/* + * inlines + */ + + +line = { !marker ~ inline+ ~ NEWLINE } +blank_line = _{ !marker ~ !inline ~ " "* ~ NEWLINE } + +inlines = _{ !marker ~ inline+ ~ ( ( ws_newline ~ PEEK[..] ~ !marker ~ inline+ )+ ~ NEWLINE )? } +ws_newline = { NEWLINE } +inline = _{ inline_special | str } +inline_special = _{ + reference + | substitution_ref + | emph_outer + | strong_outer + | literal_outer +// | ul_or_star_line +// | space +// | note_reference +// | footnote +// //| citation +// | code +// | application_depent +// | entity +// | escaped_char +// | smart +// | symbol +} + +str = { (!(NEWLINE | inline_special) ~ ANY)+ } + +// simple formatting +inline_nested = _{ inline_special | str_nested } +str_nested = { word_nested ~ ( " "+ ~ word_nested)* } +// TODO: allow ` in emph and * in literal +word_nested = _{ (!(NEWLINE | " " | inline_special | "*" | "`") ~ ANY)+ } + +emph_outer = _{ "*" ~ emph ~ "*" } +emph = { (!("*"|" ") ~ inline_nested)+ ~ (" "+ ~ (!("*"|" ") ~ inline_nested)+)* } +strong_outer = _{ "**" ~ strong ~ "**" } +strong = { (!("*"|" ") ~ inline_nested)+ ~ (" "+ ~ (!("*"|" ") ~ inline_nested)+)* } +literal_outer = _{ "``" ~ literal ~ "``" } +literal = { (!("`"|" ") ~ inline_nested)+ ~ (" "+ ~ (!("`"|" ") ~ inline_nested)+)* } + +// inline links +reference = { reference_target | reference_explicit | reference_auto } + +reference_target = { reference_target_uq ~ "_" | reference_target_qu } +reference_target_uq = { (!("_"|":"|"`") ~ nonspacechar)+ } +reference_target_qu = { ( !("`"? ~ "`_") ~ "`" ~ !"``" ) ~ reference_text? ~ ("<" ~ reference_bracketed ~ ">")? ~ ( "`" ~ !"``" ) ~ "_" } +reference_text = { !"<" ~ ( !("`"|"<") ~ ANY )+ } +reference_bracketed = { url | (target_name_qu ~ "_") | relative_reference } +relative_reference = { (!("`"|">") ~ ANY)+ } + +reference_explicit = { reference_label ~ "(" ~ " "* ~ reference_source ~ " "* ~ (NEWLINE ~ PEEK[..])? ~ reference_title ~ " "* ~ ")" } +reference_label = { "[" ~ !"^" ~ (!"]" ~ inline)* ~ "]" } +reference_source = { reference_source_contents } +reference_source_contents = _{ ( (!("("|")"|">") ~ nonspacechar)+ | "(" ~ reference_source_contents ~ ")" )* } +reference_title = { ( reference_title_single | reference_title_double | "" ) } +reference_title_single = { "'" ~ ( !("'" ~ " "+ ~ (")" | NEWLINE)) ~ ANY )* ~ "'" } +reference_title_double = { "\"" ~ ( !("\"" ~ " "+ ~ (")" | NEWLINE)) ~ ANY )* ~ "\"" } + +// Emails can't end with punctuation, but URLs must use a separate rule. +reference_auto = { url_auto | email } +//reference_embedded = { "`" ~ reference_embedded_source ~ "<" ~ absolute_url_with_fragment ~ ">`_" ~ "_"? } +//reference_embedded_source = { ( !("<"|":"|"`") ~ ( " " | nonspacechar | blank_line ) )* } + +substitution_ref = _{ "|" ~ substitution_name ~ "|" } + +/* URLs as defined by the WHATWG URL standard. */ +url = { absolute_url_no_query ~ ("?" ~ url_unit*)? ~ ("#" ~ url_unit*)? } +absolute_url_no_query = { + ( special_url_scheme ~ ":" ~ scheme_relative_special_url ) | + ( ^"file:" ~ scheme_relative_file_url ) | + ( arbitrary_scheme ~ ":" ~ relative_url ) +} +scheme_relative_special_url = { "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url? } +path_absolute_url = { "/" ~ path_relative_url } +path_relative_url = { ( url_path_segment_unit* ~ "/" )* ~ url_path_segment_unit* } +url_path_segment_unit = { !("/"|"?") ~ url_unit } +url_port = { ASCII_DIGIT* } +scheme_relative_file_url = { "//" ~ ( host ~ !("/:/"|"/|/") )? ~ path_absolute_url } +relative_url = { ( "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url? ) | path_absolute_url | (!(arbitrary_scheme ~ ":") ~ path_relative_url) } +/* this is approximately a superset of valid hosts and opaque hosts */ +host = { ( !(":"|"/"|"?"|"#") ~ url_unit)+ | ("["~(ASCII_HEX_DIGIT|"."|":")+~"]") } +special_url_scheme = { ^"ftp" | (^"http" | ^"ws") ~ ^"s"? } /* doesn't include "file" */ +arbitrary_scheme = { ASCII_ALPHA ~ ASCII_ALPHANUMERIC* } +url_unit = { + ASCII_ALPHANUMERIC | + "!"|"$"|"&"|"'"|"("|")"|"*"|"+"|","|"-"|"."|"/"|":"|";"|"="|"?"|"@"|"_"|"~" | + (!(SURROGATE|NONCHARACTER_CODE_POINT) ~ '\u{A0}'..'\u{10FFFD}') | + ("%" ~ ASCII_HEX_DIGIT{2}) +} + +/* + * Rules for URLs that don't end in punctuation. + * This is a modification of the rules above to incorporate the docutils rules + * for the final character in an auto URL and for the character after it. + * The patterns used here to emulate the behavior of docutils' regex are taken + * from . + */ +url_auto = { + ( absolute_url_no_query ~ ("?" ~ url_unit*)? ~ "#" ~ url_units_auto ) | + ( absolute_url_no_query ~ "?" ~ url_units_auto ) | + ( special_url_scheme ~ "://" ~ host ~ (":" ~ url_port)? ~ path_absolute_url_auto ) | + ( special_url_scheme ~ "://" ~ host ~ ":" ~ url_port ~ &follows_auto_url ) | + ( special_url_scheme ~ "://" ~ ( domain_host_auto | "["~(ASCII_HEX_DIGIT|"."|":")+~"]" ~ &follows_auto_url ) ) | + ( ^"file://" ~ ( host ~ !("/:/"|"/|/") )? ~ path_absolute_url_auto ) | + ( arbitrary_scheme ~ ":" ~ relative_url_auto ) +} +domain_host_auto = { + ( !(":"|"/"|"?"|"#") ~ url_unit ~ url_units_auto ) | + ( !(":"|"/"|"?"|"#") ~ url_unit ~ &">" ) | + ( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"/"|"="|"+") ~ &follows_auto_url ) +} +path_absolute_url_auto = { "/" ~ path_relative_url_auto } +path_relative_url_auto = { prua1 | prua2 | &follows_auto_url } +prua1 = { ( url_path_segment_unit ~ prua1 ) | ( "/" ~ path_relative_url_auto ) } +prua2 = { ( url_path_segment_unit ~ prua2 ) | ( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"="|"+") ~ &follows_auto_url ) } +relative_url_auto = { + ( "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url_auto ) | + ( "//" ~ host ~ ":" ~ url_port ~ &follows_auto_url ) | + ( "//" ~ ( domain_host_auto | "["~(ASCII_HEX_DIGIT|"."|":")+~"]" ~ &follows_auto_url ) ) | + path_absolute_url_auto | + // (prua1|prua2) is path_relative_url_auto minus the &follows_auto_url case + (!(arbitrary_scheme ~ ":") ~ (prua1 | prua2)) +} +url_units_auto = { + ( url_unit ~ url_units_auto ) | + ( url_unit ~ &">" ~ &follows_auto_url ) | + ( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"/"|"="|"+") ~ &follows_auto_url ) +} +follows_auto_url = { + EOI|"\x00"|WHITE_SPACE|">"|"\u{201A}"|"\u{201E}"| + (!(CONNECTOR_PUNCTUATION|OPEN_PUNCTUATION|"#"|"%"|"&"|"*"|"@") ~ PUNCTUATION) +} + +/* Rules for emails as defined by the HTML standard */ +email = { ( email_atext | "." )+ ~ "@" ~ email_label ~ ( "." ~ email_label )* } +email_atext = { ASCII_ALPHANUMERIC|"!"|"#"|"$"|"%"|"&"|"'"|"/"|"="|"?"|"^"|"_"|"`"|"{"|"|"|"}"|"~" } +email_label = { ASCII_ALPHANUMERIC ~ ( !("-"+ ~ !ASCII_ALPHANUMERIC) ~ (ASCII_ALPHANUMERIC|"-") ){0,62} } + +/* + * character classes + */ + + +bullet_marker = _{ "+" | "*" | "-" } +adornments = { + // recommended + "="+ | "-"+ | "`"+ | ":"+ | "."+ | "'"+ | "\""+ | "~"+ | "^"+ | "_"+ | "*"+ | "+"+ | "#"+ | + // parentheses + "("+ | ")"+ | "["+ | "]"+ | "{"+ | "}"+ | + // punctuation + ","+ | ";"+ | "!"+ | "?"+ | + // operators + "&"+ | "|"+ | "/"+ | "%"+ | "<"+ | ">"+ | + // misc + "$"+ | "@"+ | "\\"+ +} +nonspacechar = _{ !(" " | NEWLINE) ~ ANY } + + +/* + * lookaheads. do not use in another position + */ + + +marker = _{ (bullet_marker | "..") ~ " " } + + + +//################################################################################# + + + +// code_block = { +// ".. code" ~ "-block"? ~ ":: " ~ source ~ blank_line ~ +// NEWLINE ~ verbatim_chunk+ +// } + +// doctest_block = { (doctest_line+ ~ (!(">" | blank_line) ~ line)*)+ } + +// block_quote_raw = { ":" ~ blank_line ~ NEWLINE ~ nonblank_indented_line+ } + +// block_quote_chunk = { +// !"::" ~ ":" ~ blank_line ~ +// NEWLINE ~ +// blank_line* ~ +// nonblank_indented_line+ +// } + +// block_quote = { block_quote_chunk+ } + +// nonblank_indented_line = { !blank_line ~ indented_line } + +// verbatim_chunk = { blank_line* ~ nonblank_indented_line+ } + +// verbatim = { verbatim_chunk+ } + +// horizontal_rule = { +// ( "=" ~ sp ~ "=" ~ sp ~ "=" ~ (sp ~ "=")* +// | "-" ~ sp ~ "-" ~ sp ~ "-" ~ (sp ~ "-")* +// | "*" ~ sp ~ "*" ~ sp ~ "*" ~ (sp ~ "*")* +// | "^" ~ sp ~ "^" ~ sp ~ "^" ~ (sp ~ "^")* +// | "~" ~ sp ~ "~" ~ sp ~ "~" ~ (sp ~ "~")* +// | "_" ~ sp ~ "_" ~ sp ~ "_" ~ (sp ~ "_")* +// ) ~ +// sp ~ NEWLINE ~ blank_line+ +// } + +// table = { grid_table | header_less_grid_table | simple_table } + +// simple_table = { "NotImplemented" ~ "simple_table" } + +// grid_table = { grid_table_header ~ grid_table_header_sep ~ grid_table_body+ } +// header_less_grid_table = { grid_table_sep ~ grid_table_body+ } +// grid_table_header = { sp ~ "+" ~ ( "-"+ ~ "+" )+ ~ blank_line ~ grid_table_row+ } +// grid_table_body = { ( grid_table_row ~ grid_table_sep )+ } +// grid_table_row = { sp ~ "|" ~ sp ~ ( table_cell ~ sp ~ "|" )+ ~ blank_line } +// table_cell = { ( ":" | ">" | "<" | "/" | "-" | spacechar | escaped_char | alphanumeric )+ } +// grid_table_header_sep = { sp ~ "+" ~ ( "="+ ~ "+" )+ ~ blank_line } +// grid_table_sep = { sp ~ "+" ~ ( "-"+ ~ "+" )+ ~ blank_line } + +// bullet = { !horizontal_rule ~ ("+" | "*" | "-") ~ spacechar+ } + +// bullet_list = { &bullet ~ (list_tight | list_loose) } + +// list_tight = { list_item_tight+ ~ blank_line* ~ !(bullet | enumerator | def_marker) } +// list_loose = { ( list_item ~ blank_line* )+ } + +// list_item = { (bullet | enumerator | def_marker) ~ list_block ~ list_continuation_block* } +// list_item_tight = { +// (bullet | enumerator | def_marker) ~ +// list_block ~ +// (!blank_line ~ list_continuation_block)* ~ +// !list_continuation_block +// } + +// list_block = { !blank_line ~ line ~ list_block_line* } + +// list_continuation_block = { blank_line* ~ ( indent ~ list_block )+ } + +// enumerator = { (ASCII_DIGIT+ | "#"+) ~ "." ~ spacechar+ } + +// ordered_list = { &enumerator ~ (list_tight | list_loose) } + +// list_block_line = { +// !blank_line ~ +// !( (indent? ~ (bullet | enumerator)) | def_marker ) ~ +// !horizontal_rule ~ +// optionally_indented_line +// } + + + +// space = _{ spacechar+ } + +// str = { normal_char+ ~ str_chunk* } +// str_chunk = _{ (normal_char | "_"+ ~ &alphanumeric)+ } + +// escaped_char = { "\\" ~ !NEWLINE ~ ("-" | "\\" | "`" | "|" | "*" | "_" | "{" | "}" | "[" | "]" | "(" | ")" | "#" | "+" | "." | "!" | ">" | "<") } + +// entity = { hex_entity | dec_entity | char_entity } + +// endline = _{ line_break | terminal_endline | normal_endline } +// normal_endline = _{ sp ~ NEWLINE ~ !(blank_line | ">" | line ~ ("="+ | "-"+) ~ NEWLINE) } +// terminal_endline = _{ sp ~ NEWLINE ~ EOI } +// line_break = _{ " " ~ normal_endline } + +// symbol = { special_char } + +// application_depent = { !("`_" | "``_") ~ "`" ~ !"``" ~ target_name_qu ~ "`" ~ !("``" | "_") } + +// // This keeps the parser from getting bogged down on long strings of "*" or "_", +// // or strings of "*" or "_" with space on each side: +// ul_or_star_line = { ul_line | star_line } +// star_line = { "****" ~ "*"* | spacechar ~ "*"+ ~ &spacechar } +// ul_line = { "____" ~ "_"* | spacechar ~ "_"+ ~ &spacechar } + + +// empty_title = { "" } + +// ticks_2 = { "``" ~ !"`" } + +// code = { ticks_2 ~ ( (!"`" ~ nonspacechar)+ | "_" | !ticks_2 ~ "`" | !(sp ~ ticks_2) ~ (spacechar | NEWLINE ~ !blank_line) )+ ~ ticks_2 } + + +// quoted = { +// "\"" ~ (!"\"" ~ ANY)* ~ "\"" | +// "'" ~ (!"'" ~ ANY)* ~ "'" +// } +// spacechar = _{ " " | "\t" } +// sp = _{ spacechar* } +// spnl = _{ sp ~ (NEWLINE ~ sp)? } +// special_char = _{ "~" | "*" | "_" | "`" | "&" | "[" | "]" | "(" | ")" | "<" | "!" | "#" | "\\" | "\"" | "'" | extended_special_char } +// normal_char = _{ !( special_char | spacechar | NEWLINE ) ~ ANY } +// alphanumeric = { +// ASCII_ALPHANUMERIC | +// "\u{200}" | "\u{201}" | "\u{202}" | "\u{203}" | "\u{204}" | "\u{205}" | "\u{206}" | "\u{207}" | +// "\u{210}" | "\u{211}" | "\u{212}" | "\u{213}" | "\u{214}" | "\u{215}" | "\u{216}" | "\u{217}" | +// "\u{220}" | "\u{221}" | "\u{222}" | "\u{223}" | "\u{224}" | "\u{225}" | "\u{226}" | "\u{227}" | +// "\u{230}" | "\u{231}" | "\u{232}" | "\u{233}" | "\u{234}" | "\u{235}" | "\u{236}" | "\u{237}" | +// "\u{240}" | "\u{241}" | "\u{242}" | "\u{243}" | "\u{244}" | "\u{245}" | "\u{246}" | "\u{247}" | +// "\u{250}" | "\u{251}" | "\u{252}" | "\u{253}" | "\u{254}" | "\u{255}" | "\u{256}" | "\u{257}" | +// "\u{260}" | "\u{261}" | "\u{262}" | "\u{263}" | "\u{264}" | "\u{265}" | "\u{266}" | "\u{267}" | +// "\u{270}" | "\u{271}" | "\u{272}" | "\u{273}" | "\u{274}" | "\u{275}" | "\u{276}" | "\u{277}" | +// "\u{300}" | "\u{301}" | "\u{302}" | "\u{303}" | "\u{304}" | "\u{305}" | "\u{306}" | "\u{307}" | +// "\u{310}" | "\u{311}" | "\u{312}" | "\u{313}" | "\u{314}" | "\u{315}" | "\u{316}" | "\u{317}" | +// "\u{320}" | "\u{321}" | "\u{322}" | "\u{323}" | "\u{324}" | "\u{325}" | "\u{326}" | "\u{327}" | +// "\u{330}" | "\u{331}" | "\u{332}" | "\u{333}" | "\u{334}" | "\u{335}" | "\u{336}" | "\u{337}" | +// "\u{340}" | "\u{341}" | "\u{342}" | "\u{343}" | "\u{344}" | "\u{345}" | "\u{346}" | "\u{347}" | +// "\u{350}" | "\u{351}" | "\u{352}" | "\u{353}" | "\u{354}" | "\u{355}" | "\u{356}" | "\u{357}" | +// "\u{360}" | "\u{361}" | "\u{362}" | "\u{363}" | "\u{364}" | "\u{365}" | "\u{366}" | "\u{367}" | +// "\u{370}" | "\u{371}" | "\u{372}" | "\u{373}" | "\u{374}" | "\u{375}" | "\u{376}" | "\u{377}" +// } + +// hex_entity = { "&#" ~ ("X"|"x") ~ ('0'..'9' | 'a'..'f' | 'A'..'F')+ ~ ";" } +// dec_entity = { "&#" ~ ASCII_DIGIT+ ~ ";" } +// char_entity = { "&" ~ ASCII_ALPHANUMERIC+ ~ ";" } + +// indent = _{ "\t" | " " } +// indented_line = { indent ~ line } +// optionally_indented_line = { indent? ~ line } + +// doctest_line = { ">>> " ~ raw_line } + +// line = _{ raw_line } + +// raw_line = _{ (!NEWLINE ~ ANY)* ~ NEWLINE | (!EOI ~ ANY)+ ~ EOI } + +// // Syntax extensions + +// extended_special_char = { +// //&{ extension(EXT_SMART) } ~ +// ("." | "-" | "\"" | "'") | +// //&{ extension(EXT_NOTES) } ~ +// "^" +// } + +// smart = { +// //&{ extension(EXT_SMART) } ~ +// ( ellipsis | dash | single_quoted | double_quoted | apostrophe ) +// } + +// apostrophe = { "'" } + +// ellipsis = { "..." | ". . ." } + +// dash = { em_dash | en_dash } +// en_dash = { "-" ~ &ASCII_DIGIT } +// em_dash = { "---" | "--" } + +// single_quote_start = { "'" ~ !(spacechar | NEWLINE) } +// single_quote_end = { "'" ~ !alphanumeric } +// single_quoted = { single_quote_start ~ ( !single_quote_end ~ inline )+ ~ single_quote_end } + +// double_quote_start = { "\"" } +// double_quote_end = { "\"" } +// double_quoted = { double_quote_start ~ ( !double_quote_end ~ inline )+ ~ double_quote_end } + +// footnote = { "[#" ~ (!"]" ~ inline)+ ~ "]_" } + +// definition = { +// &( (!defmark ~ nonspacechar ~ raw_line) ~ blank_line? ~ defmark) ~ +// d_list_title+ ~ +// (def_tight | def_loose) +// } +// d_list_title = { !defmark ~ &nonspacechar ~ (!endline ~ inline)+ ~ sp ~ NEWLINE } +// def_tight = { &defmark ~ list_tight } +// def_loose = { blank_line ~ &defmark ~ list_loose } +// defmark = { (":" | "~") ~ spacechar+ } +// def_marker = { +// //&{ extension(EXT_DLISTS) } ~ +// defmark +// } diff --git a/parser/src/simplify.rs b/parser/src/simplify.rs new file mode 100644 index 0000000..7974991 --- /dev/null +++ b/parser/src/simplify.rs @@ -0,0 +1,662 @@ +/* +http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets + +Links can have internal or external targets. +In the source, targets look like: + + .. targetname1: + .. targetname2: + + some paragraph or list item or so + +or: + + .. targetname1: + .. targetname2: https://link + +There’s also anonymous links and targets without names. + +TODO: continue documenting how it’s done via https://repo.or.cz/docutils.git/blob/HEAD:/docutils/docutils/transforms/references.py +*/ + +use std::collections::HashMap; + +use document_tree::{ + url::Url, + Document, + HasChildren, + attribute_types::NameToken, + elements::{self as e, Element}, + element_categories as c, + extra_attributes::ExtraAttributes, +}; + + +#[derive(Debug)] +enum NamedTargetType { + NumberedFootnote(usize), + LabeledFootnote(usize), + Citation, + InternalLink, + ExternalLink(Url), + IndirectLink(NameToken), + SectionTitle, +} +impl NamedTargetType { + fn is_implicit_target(&self) -> bool { + match self { + NamedTargetType::SectionTitle => true, + _ => false, + } + } +} + +#[derive(Clone, Debug)] +struct Substitution { + content: Vec, + /// If true and the sibling before the reference is a text node, + /// the text node gets right-trimmed. + ltrim: bool, + /// Same as `ltrim` with the sibling after the reference. + rtrim: bool, +} + +#[derive(Default, Debug)] +struct TargetsCollected { + named_targets: HashMap, + substitutions: HashMap, + normalized_substitutions: HashMap, +} +impl TargetsCollected { + fn target_url<'t>(self: &'t TargetsCollected, refname: &[NameToken]) -> Option<&'t Url> { + // TODO: Check if the target would expand circularly + if refname.len() != 1 { + panic!("Expected exactly one name in a reference."); + } + let name = refname[0].clone(); + match self.named_targets.get(&name)? { + NamedTargetType::ExternalLink(url) => Some(url), + _ => unimplemented!(), + } + } + + fn substitution<'t>(self: &'t TargetsCollected, refname: &[NameToken]) -> Option<&'t Substitution> { + // TODO: Check if the substitution would expand circularly + if refname.len() != 1 { + panic!("Expected exactly one name in a substitution reference."); + } + let name = refname[0].clone(); + self.substitutions.get(&name).or_else(|| { + self.normalized_substitutions.get(&name.0.to_lowercase()) + }) + } +} + +trait ResolvableRefs { + fn populate_targets(&self, refs: &mut TargetsCollected); + fn resolve_refs(self, refs: &TargetsCollected) -> Vec where Self: Sized; +} + +pub fn resolve_references(mut doc: Document) -> Document { + let mut references: TargetsCollected = Default::default(); + for c in doc.children() { + c.populate_targets(&mut references); + } + let new: Vec<_> = doc.children_mut().drain(..).flat_map(|c| c.resolve_refs(&references)).collect(); + Document::with_children(new) +} + +fn sub_pop(parent: &P, refs: &mut TargetsCollected) where P: HasChildren, C: ResolvableRefs { + for c in parent.children() { + c.populate_targets(refs); + } +} + +fn sub_res(mut parent: P, refs: &TargetsCollected) -> P where P: e::Element + HasChildren, C: ResolvableRefs { + let new: Vec<_> = parent.children_mut().drain(..).flat_map(|c| c.resolve_refs(refs)).collect(); + parent.children_mut().extend(new); + parent +} + +fn sub_sub_pop(parent: &P, refs: &mut TargetsCollected) where P: HasChildren, C1: HasChildren, C2: ResolvableRefs { + for c in parent.children() { + sub_pop(c, refs); + } +} + +fn sub_sub_res(mut parent: P, refs: &TargetsCollected) -> P where P: e::Element + HasChildren, C1: e::Element + HasChildren, C2: ResolvableRefs { + let new: Vec<_> = parent.children_mut().drain(..).map(|c| sub_res(c, refs)).collect(); + parent.children_mut().extend(new); + parent +} + +impl ResolvableRefs for c::StructuralSubElement { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::StructuralSubElement::*; + match self { + Title(e) => sub_pop(&**e, refs), + Subtitle(e) => sub_pop(&**e, refs), + Decoration(e) => sub_pop(&**e, refs), + Docinfo(e) => sub_pop(&**e, refs), + SubStructure(e) => e.populate_targets(refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::StructuralSubElement::*; + vec![match self { + Title(e) => sub_res(*e, refs).into(), + Subtitle(e) => sub_res(*e, refs).into(), + Decoration(e) => sub_res(*e, refs).into(), + Docinfo(e) => sub_res(*e, refs).into(), + SubStructure(e) => return e.resolve_refs(refs).drain(..).map(Into::into).collect(), + }] + } +} + +impl ResolvableRefs for c::SubStructure { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::SubStructure::*; + match self { + Topic(e) => sub_pop(&**e, refs), + Sidebar(e) => sub_pop(&**e, refs), + Transition(_) => {}, + Section(e) => sub_pop(&**e, refs), + BodyElement(e) => e.populate_targets(refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::SubStructure::*; + vec![match self { + Topic(e) => sub_res(*e, refs).into(), + Sidebar(e) => sub_res(*e, refs).into(), + Transition(e) => Transition(e), + Section(e) => sub_res(*e, refs).into(), + BodyElement(e) => return e.resolve_refs(refs).drain(..).map(Into::into).collect(), + }] + } +} + +impl ResolvableRefs for c::BodyElement { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::BodyElement::*; + match self { + Paragraph(e) => sub_pop(&**e, refs), + LiteralBlock(e) => sub_pop(&**e, refs), + DoctestBlock(e) => sub_pop(&**e, refs), + MathBlock(_) => {}, + Rubric(e) => sub_pop(&**e, refs), + SubstitutionDefinition(e) => { + let subst = Substitution { + content: e.children().clone(), + ltrim: e.extra().ltrim, + rtrim: e.extra().rtrim + }; + for name in e.names() { + if refs.substitutions.contains_key(name) { + // TODO: Duplicate substitution name (level 3 system message). + } + // Intentionally overriding any previous values. + refs.substitutions.insert(name.clone(), subst.clone()); + refs.normalized_substitutions.insert(name.0.to_lowercase(), subst.clone()); + } + }, + Comment(_) => {}, + Pending(_) => { + unimplemented!(); + }, + Target(e) => { + if let Some(uri) = &e.extra().refuri { + for name in e.names() { + refs.named_targets.insert(name.clone(), NamedTargetType::ExternalLink(uri.clone())); + } + } + // TODO: as is, people can only refer to the target directly containing the URL. + // add refid and refnames to some HashMap and follow those later. + }, + Raw(_) => {}, + Image(_) => {}, + Compound(e) => sub_pop(&**e, refs), + Container(e) => sub_pop(&**e, refs), + BulletList(e) => sub_sub_pop(&**e, refs), + EnumeratedList(e) => sub_sub_pop(&**e, refs), + DefinitionList(e) => sub_sub_pop(&**e, refs), + FieldList(e) => sub_sub_pop(&**e, refs), + OptionList(e) => sub_sub_pop(&**e, refs), + LineBlock(e) => sub_pop(&**e, refs), + BlockQuote(e) => sub_pop(&**e, refs), + Admonition(e) => sub_pop(&**e, refs), + Attention(e) => sub_pop(&**e, refs), + Hint(e) => sub_pop(&**e, refs), + Note(e) => sub_pop(&**e, refs), + Caution(e) => sub_pop(&**e, refs), + Danger(e) => sub_pop(&**e, refs), + Error(e) => sub_pop(&**e, refs), + Important(e) => sub_pop(&**e, refs), + Tip(e) => sub_pop(&**e, refs), + Warning(e) => sub_pop(&**e, refs), + Footnote(e) => sub_pop(&**e, refs), + Citation(e) => sub_pop(&**e, refs), + SystemMessage(e) => sub_pop(&**e, refs), + Figure(e) => sub_pop(&**e, refs), + Table(e) => sub_pop(&**e, refs) + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::BodyElement::*; + vec![match self { + Paragraph(e) => sub_res(*e, refs).into(), + LiteralBlock(e) => sub_res(*e, refs).into(), + DoctestBlock(e) => sub_res(*e, refs).into(), + MathBlock(e) => MathBlock(e), + Rubric(e) => sub_res(*e, refs).into(), + SubstitutionDefinition(_) => return vec![], + Comment(e) => Comment(e), + Pending(e) => Pending(e), + Target(e) => Target(e), + Raw(e) => Raw(e), + Image(e) => Image(e), + Compound(e) => sub_res(*e, refs).into(), + Container(e) => sub_res(*e, refs).into(), + BulletList(e) => sub_sub_res(*e, refs).into(), + EnumeratedList(e) => sub_sub_res(*e, refs).into(), + DefinitionList(e) => sub_sub_res(*e, refs).into(), + FieldList(e) => sub_sub_res(*e, refs).into(), + OptionList(e) => sub_sub_res(*e, refs).into(), + LineBlock(e) => sub_res(*e, refs).into(), + BlockQuote(e) => sub_res(*e, refs).into(), + Admonition(e) => sub_res(*e, refs).into(), + Attention(e) => sub_res(*e, refs).into(), + Hint(e) => sub_res(*e, refs).into(), + Note(e) => sub_res(*e, refs).into(), + Caution(e) => sub_res(*e, refs).into(), + Danger(e) => sub_res(*e, refs).into(), + Error(e) => sub_res(*e, refs).into(), + Important(e) => sub_res(*e, refs).into(), + Tip(e) => sub_res(*e, refs).into(), + Warning(e) => sub_res(*e, refs).into(), + Footnote(e) => sub_res(*e, refs).into(), + Citation(e) => sub_res(*e, refs).into(), + SystemMessage(e) => sub_res(*e, refs).into(), + Figure(e) => sub_res(*e, refs).into(), + Table(e) => sub_res(*e, refs).into() + }] + } +} + +impl ResolvableRefs for c::BibliographicElement { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::BibliographicElement::*; + match self { + Author(e) => sub_pop(&**e, refs), + Authors(e) => sub_pop(&**e, refs), + Organization(e) => sub_pop(&**e, refs), + Address(e) => sub_pop(&**e, refs), + Contact(e) => sub_pop(&**e, refs), + Version(e) => sub_pop(&**e, refs), + Revision(e) => sub_pop(&**e, refs), + Status(e) => sub_pop(&**e, refs), + Date(e) => sub_pop(&**e, refs), + Copyright(e) => sub_pop(&**e, refs), + Field(e) => sub_pop(&**e, refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::BibliographicElement::*; + vec![match self { + Author(e) => sub_res(*e, refs).into(), + Authors(e) => sub_res(*e, refs).into(), + Organization(e) => sub_res(*e, refs).into(), + Address(e) => sub_res(*e, refs).into(), + Contact(e) => sub_res(*e, refs).into(), + Version(e) => sub_res(*e, refs).into(), + Revision(e) => sub_res(*e, refs).into(), + Status(e) => sub_res(*e, refs).into(), + Date(e) => sub_res(*e, refs).into(), + Copyright(e) => sub_res(*e, refs).into(), + Field(e) => sub_res(*e, refs).into(), + }] + } +} + +impl ResolvableRefs for c::TextOrInlineElement { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::TextOrInlineElement::*; + match self { + String(_) => {}, + Emphasis(e) => sub_pop(&**e, refs), + Strong(e) => sub_pop(&**e, refs), + Literal(e) => sub_pop(&**e, refs), + Reference(e) => sub_pop(&**e, refs), + FootnoteReference(e) => sub_pop(&**e, refs), + CitationReference(e) => sub_pop(&**e, refs), + SubstitutionReference(e) => sub_pop(&**e, refs), + TitleReference(e) => sub_pop(&**e, refs), + Abbreviation(e) => sub_pop(&**e, refs), + Acronym(e) => sub_pop(&**e, refs), + Superscript(e) => sub_pop(&**e, refs), + Subscript(e) => sub_pop(&**e, refs), + Inline(e) => sub_pop(&**e, refs), + Problematic(e) => sub_pop(&**e, refs), + Generated(e) => sub_pop(&**e, refs), + Math(_) => {}, + TargetInline(_) => { + unimplemented!(); + }, + RawInline(_) => {}, + ImageInline(_) => {} + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::TextOrInlineElement::*; + vec![match self { + String(e) => String(e), + Emphasis(e) => sub_res(*e, refs).into(), + Strong(e) => sub_res(*e, refs).into(), + Literal(e) => sub_res(*e, refs).into(), + Reference(mut e) => { + if e.extra().refuri.is_none() { + if let Some(uri) = refs.target_url(&e.extra().refname) { + e.extra_mut().refuri = Some(uri.clone()); + } + } + (*e).into() + }, + FootnoteReference(e) => sub_res(*e, refs).into(), + CitationReference(e) => sub_res(*e, refs).into(), + SubstitutionReference(e) => match refs.substitution(&e.extra().refname) { + Some(Substitution {content, ltrim, rtrim}) => { + // (level 3 system message). + // TODO: ltrim and rtrim. + if *ltrim || *rtrim { + dbg!(content, ltrim, rtrim); + } + return content.clone() + }, + None => { + // Undefined substitution name (level 3 system message). + // TODO: This replaces the reference by a Problematic node. + // The corresponding SystemMessage node should go in a generated + // section with class "system-messages" at the end of the document. + use document_tree::Problematic; + let mut replacement: Box = Box::new(Default::default()); + replacement.children_mut().push( + c::TextOrInlineElement::String(Box::new(format!("|{}|", e.extra().refname[0].0))) + ); + // TODO: Create an ID for replacement for the system_message to reference. + // TODO: replacement.refid pointing to the system_message. + Problematic(replacement) + } + }, + TitleReference(e) => sub_res(*e, refs).into(), + Abbreviation(e) => sub_res(*e, refs).into(), + Acronym(e) => sub_res(*e, refs).into(), + Superscript(e) => sub_res(*e, refs).into(), + Subscript(e) => sub_res(*e, refs).into(), + Inline(e) => sub_res(*e, refs).into(), + Problematic(e) => sub_res(*e, refs).into(), + Generated(e) => sub_res(*e, refs).into(), + Math(e) => Math(e), + TargetInline(e) => TargetInline(e), + RawInline(e) => RawInline(e), + ImageInline(e) => ImageInline(e) + }] + } +} + +impl ResolvableRefs for c::AuthorInfo { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::AuthorInfo::*; + match self { + Author(e) => sub_pop(&**e, refs), + Organization(e) => sub_pop(&**e, refs), + Address(e) => sub_pop(&**e, refs), + Contact(e) => sub_pop(&**e, refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::AuthorInfo::*; + vec![match self { + Author(e) => sub_res(*e, refs).into(), + Organization(e) => sub_res(*e, refs).into(), + Address(e) => sub_res(*e, refs).into(), + Contact(e) => sub_res(*e, refs).into(), + }] + } +} + +impl ResolvableRefs for c::DecorationElement { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::DecorationElement::*; + match self { + Header(e) => sub_pop(&**e, refs), + Footer(e) => sub_pop(&**e, refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::DecorationElement::*; + vec![match self { + Header(e) => sub_res(*e, refs).into(), + Footer(e) => sub_res(*e, refs).into(), + }] + } +} + +impl ResolvableRefs for c::SubTopic { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::SubTopic::*; + match self { + Title(e) => sub_pop(&**e, refs), + BodyElement(e) => e.populate_targets(refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::SubTopic::*; + match self { + Title(e) => vec![sub_res(*e, refs).into()], + BodyElement(e) => e.resolve_refs(refs).drain(..).map(Into::into).collect(), + } + } +} + +impl ResolvableRefs for c::SubSidebar { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::SubSidebar::*; + match self { + Topic(e) => sub_pop(&**e, refs), + Title(e) => sub_pop(&**e, refs), + Subtitle(e) => sub_pop(&**e, refs), + BodyElement(e) => e.populate_targets(refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::SubSidebar::*; + vec![match self { + Topic(e) => sub_res(*e, refs).into(), + Title(e) => sub_res(*e, refs).into(), + Subtitle(e) => sub_res(*e, refs).into(), + BodyElement(e) => return e.resolve_refs(refs).drain(..).map(Into::into).collect(), + }] + } +} + +impl ResolvableRefs for c::SubDLItem { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::SubDLItem::*; + match self { + Term(e) => sub_pop(&**e, refs), + Classifier(e) => sub_pop(&**e, refs), + Definition(e) => sub_pop(&**e, refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::SubDLItem::*; + vec![match self { + Term(e) => sub_res(*e, refs).into(), + Classifier(e) => sub_res(*e, refs).into(), + Definition(e) => sub_res(*e, refs).into(), + }] + } +} + +impl ResolvableRefs for c::SubField { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::SubField::*; + match self { + FieldName(e) => sub_pop(&**e, refs), + FieldBody(e) => sub_pop(&**e, refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::SubField::*; + vec![match self { + FieldName(e) => sub_res(*e, refs).into(), + FieldBody(e) => sub_res(*e, refs).into(), + }] + } +} + +impl ResolvableRefs for c::SubOptionListItem { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::SubOptionListItem::*; + match self { + OptionGroup(e) => sub_sub_pop(&**e, refs), + Description(e) => sub_pop(&**e, refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::SubOptionListItem::*; + vec![match self { + OptionGroup(e) => sub_sub_res(*e, refs).into(), + Description(e) => sub_res(*e, refs).into(), + }] + } +} + +impl ResolvableRefs for c::SubOption { + fn populate_targets(&self, _: &mut TargetsCollected) {} + fn resolve_refs(self, _: &TargetsCollected) -> Vec { vec![self] } +} + +impl ResolvableRefs for c::SubLineBlock { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::SubLineBlock::*; + match self { + LineBlock(e) => sub_pop(&**e, refs), + Line(e) => sub_pop(&**e, refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::SubLineBlock::*; + vec![match self { + LineBlock(e) => sub_res(*e, refs).into(), + Line(e) => sub_res(*e, refs).into(), + }] + } +} + +impl ResolvableRefs for c::SubBlockQuote { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::SubBlockQuote::*; + match self { + Attribution(e) => sub_pop(&**e, refs), + BodyElement(e) => e.populate_targets(refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::SubBlockQuote::*; + match self { + Attribution(e) => vec![sub_res(*e, refs).into()], + BodyElement(e) => e.resolve_refs(refs).drain(..).map(Into::into).collect(), + } + } +} + +impl ResolvableRefs for c::SubFootnote { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::SubFootnote::*; + match self { + Label(e) => sub_pop(&**e, refs), + BodyElement(e) => e.populate_targets(refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::SubFootnote::*; + match self { + Label(e) => vec![sub_res(*e, refs).into()], + BodyElement(e) => e.resolve_refs(refs).drain(..).map(Into::into).collect(), + } + } +} + +impl ResolvableRefs for c::SubFigure { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::SubFigure::*; + match self { + Caption(e) => sub_pop(&**e, refs), + Legend(e) => sub_pop(&**e, refs), + BodyElement(e) => e.populate_targets(refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::SubFigure::*; + vec![match self { + Caption(e) => sub_res(*e, refs).into(), + Legend(e) => sub_res(*e, refs).into(), + BodyElement(e) => return e.resolve_refs(refs).drain(..).map(Into::into).collect(), + }] + } +} + +impl ResolvableRefs for c::SubTable { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::SubTable::*; + match self { + Title(e) => sub_pop(&**e, refs), + TableGroup(e) => sub_pop(&**e, refs), + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::SubTable::*; + vec![match self { + Title(e) => sub_res(*e, refs).into(), + TableGroup(e) => sub_res(*e, refs).into(), + }] + } +} + +impl ResolvableRefs for c::SubTableGroup { + fn populate_targets(&self, refs: &mut TargetsCollected) { + use c::SubTableGroup::*; + match self { + TableColspec(_) => { + unimplemented!(); + }, + TableHead(e) => { + for c in e.children() { + sub_sub_pop(c, refs); + } + }, + TableBody(e) => { + for c in e.children() { + sub_sub_pop(c, refs); + } + }, + } + } + fn resolve_refs(self, refs: &TargetsCollected) -> Vec { + use c::SubTableGroup::*; + vec![match self { + TableColspec(e) => TableColspec(e), + TableHead(mut e) => { + let new: Vec<_> = e.children_mut().drain(..).map(|c| sub_sub_res(c, refs)).collect(); + e.children_mut().extend(new); + TableHead(e) + }, + TableBody(mut e) => { + let new: Vec<_> = e.children_mut().drain(..).map(|c| sub_sub_res(c, refs)).collect(); + e.children_mut().extend(new); + TableBody(e) + }, + }] + } +} diff --git a/parser/src/tests.rs b/parser/src/tests.rs new file mode 100644 index 0000000..1ef965a --- /dev/null +++ b/parser/src/tests.rs @@ -0,0 +1,242 @@ +use pest::consumes_to; +use pest::parses_to; + +use crate::pest_rst::{RstParser, Rule}; + +#[test] +fn plain() { + parses_to! { + parser: RstParser, + input: "line\n", + rule: Rule::paragraph, + tokens: [ + paragraph(0, 4, [ + str(0, 4) + ]) + ] + }; +} + +#[test] +fn emph_only() { + parses_to! { + parser: RstParser, + input: "*emphasis*", + rule: Rule::emph_outer, + tokens: [ + emph(1, 9, [str_nested(1, 9)]) + ] + }; +} + +#[test] +fn emph() { + parses_to! { + parser: RstParser, + input: "line *with markup*\n", + rule: Rule::paragraph, + tokens: [ + paragraph(0, 18, [ + str(0, 5), + emph(6, 17, [str_nested(6, 17)]), + ]) + ] + }; +} + +#[test] +fn title() { + parses_to! { + parser: RstParser, + input: "\ +Title +===== +", + rule: Rule::title, + tokens: [ + title(0, 12, [ title_single(0, 12, [ + line(0, 6, [ str(0, 5) ]), + adornments(6, 11), + ]) ]) + ] + }; +} + +#[test] +fn title_overline() { + parses_to! { + parser: RstParser, + input: "\ +----- +Title +----- +", + rule: Rule::title, + tokens: [ + title(0, 17, [ title_double(0, 17, [ + adornments(0, 5), + line(6, 12, [ str(6, 11) ]), + ]) ]) + ] + }; +} + +#[allow(clippy::cognitive_complexity)] +#[test] +fn two_targets() { + parses_to! { + parser: RstParser, + input: "\ +.. _a: http://example.com +.. _`b_`: https://example.org +", + rule: Rule::document, + tokens: [ + target(0, 26, [ + target_name_uq(4, 5), + link_target(7, 25), + ]), + target(26, 56, [ + target_name_qu(31, 33), + link_target(36, 55), + ]), + ] + }; +} + +#[allow(clippy::cognitive_complexity)] +#[test] +fn admonitions() { + parses_to! { + parser: RstParser, + input: "\ +.. note:: + Just next line +.. admonition:: In line title + + Next line + +.. danger:: Just this line +", + rule: Rule::document, + tokens: [ + admonition_gen(0, 27, [ + admonition_type(3, 7), + paragraph(13, 27, [ str(13, 27) ]), + ]), + admonition(28, 71, [ + line(43, 58, [ str(43, 57) ]), + paragraph(62, 71, [ str(62, 71) ]), + ]), + admonition_gen(73, 100, [ + admonition_type(76, 82), + line(84, 100, [ str(84, 99) ]), + ]), + ] + }; +} + + +#[allow(clippy::cognitive_complexity)] +#[test] +fn substitutions() { + parses_to! { + parser: RstParser, + input: "\ +A |subst| in-line + +.. |subst| replace:: substitution +.. |subst2| replace:: it can also + be hanging +", + rule: Rule::document, + tokens: [ + paragraph(0, 17, [ + str(0, 2), + substitution_name(3, 8), + str(9, 17), + ]), + substitution_def(19, 52, [ + substitution_name(23, 28), + replace(30, 52, [ paragraph(40, 52, [str(40, 52)]) ]), + ]), + substitution_def(53, 101, [ + substitution_name(57, 63), + replace(65, 101, [ paragraph(75, 101, [ + str(75, 86), ws_newline(86, 87), + str(88, 100), + ]) ]), + ]), + ] + }; +} + + +#[allow(clippy::cognitive_complexity)] +#[test] +fn substitution_image() { + parses_to! { + parser: RstParser, + input: "\ +.. |subst| image:: thing.png + :target: foo.html +", + rule: Rule::document, + tokens: [ + substitution_def(0, 50, [ + substitution_name(4, 9), + image(11, 50, [ + line(18, 29, [ str(18, 28) ]), + image_option(32, 50, [ + image_opt_name(33, 39), + line(40, 50, [ str(40, 49) ]), + ]), + ]), + ]), + ] + }; +} + +// TODO: test images + +#[allow(clippy::cognitive_complexity)] +#[test] +fn nested_lists() { + parses_to! { + parser: RstParser, + input: "\ +paragraph + +- item 1 +- item 2 + more text + more text 2 + more text 3 + - nested item 1 + - nested item 2 + - nested item 3 +", + rule: Rule::document, + tokens: [ + paragraph(0, 9, [ str(0, 9) ]), + bullet_list(11, 131, [ + bullet_item(11, 21, [ + line(14, 21, [ str(14, 20) ]), + ]), + bullet_item(21, 131, [ + line(24, 31, [ str(24, 30) ]), + paragraph(34, 74, [ + str(34, 43), ws_newline(43, 44), + str(47, 58), ws_newline(58, 59), + str(62, 73), + ]), + bullet_list(77, 131, [ + bullet_item( 77, 93, [ line( 79, 93, [str( 79, 92)]) ]), + bullet_item( 96, 112, [ line( 98, 112, [str( 98, 111)]) ]), + bullet_item(115, 131, [ line(117, 131, [str(117, 130)]) ]), + ]), + ]), + ]), + ] + } +} diff --git a/parser/src/token.rs b/parser/src/token.rs new file mode 100644 index 0000000..b3b7bac --- /dev/null +++ b/parser/src/token.rs @@ -0,0 +1,16 @@ +//http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#bullet-lists + +// *, +, -, •, ‣, ⁃ +pub enum BulletListType { Ast, Plus, Minus, Bullet, TriBullet, HyphenBullet } +// 1, A, a, I, i +pub enum EnumListChar { Arabic, AlphaUpper, AlphaLower, RomanUpper, RomanLower, Auto } +// 1., (1), 1) +pub enum EnumListType { Period, ParenEnclosed, Paren } +// ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ +pub enum AdornmentChar { + Bang, DQuote, Hash, Dollar, Percent, Amp, SQuote, LParen, RParen, Ast, Plus, Comma, + Minus, Period, Slash, Colon, Semicolon, Less, Eq, More, Question, At, LBrack, + Backslash, RBrack, Caret, Underscore, Backtick, LBrace, Pipe, RBrace, Tilde, +} +// [1], [#], [*], [#foo] +pub enum FootnoteType { Numbered(usize), AutoNumber, AutoSymbol, AutoNamed(String) } diff --git a/renderer/Cargo.toml b/renderer/Cargo.toml new file mode 100644 index 0000000..bc80adc --- /dev/null +++ b/renderer/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = 'rst_renderer' +version = '0.2.0' +authors = ['Philipp A. '] +edition = '2018' +description = 'a reStructuredText renderer' +license = 'MIT OR Apache-2.0' + +documentation = 'https://flying-sheep.github.io/rust-rst' +homepage = 'https://github.com/flying-sheep/rust-rst' +repository = 'https://github.com/flying-sheep/rust-rst.git' + +[dependencies] +document_tree = { path = '../document_tree' } + +failure = '0.1.6' +serde_json = '1.0.44' +serde-xml-rs = '0.3.1' + +[dev-dependencies] +rst_parser = { path = '../parser' } + +pretty_assertions = '0.6.1' diff --git a/renderer/src/html.rs b/renderer/src/html.rs new file mode 100644 index 0000000..73b994d --- /dev/null +++ b/renderer/src/html.rs @@ -0,0 +1,393 @@ +#[cfg(test)] +pub mod tests; + +use std::io::Write; + +use failure::Error; + +// use crate::url::Url; +use document_tree::{ + Document,Element,HasChildren,ExtraAttributes, + elements as e, + element_categories as c, +}; + + +// static FOOTNOTE_SYMBOLS: [char; 10] = ['*', '†', '‡', '§', '¶', '#', '♠', '♥', '♦', '♣']; + +pub fn render_html(document: &Document, stream: W, standalone: bool) -> Result<(), Error> where W: Write { + let mut renderer = HTMLRenderer { stream, level: 0 }; + if standalone { + document.render_html(&mut renderer) + } else { + for c in document.children() { + (*c).render_html(&mut renderer)?; + writeln!(renderer.stream)?; + } + Ok(()) + } +} + +fn escape_html(text: &str) -> String { + text.replace('&', "&") + .replace('<', "<") + .replace('>', ">") + .replace('"', """) +} + +struct HTMLRenderer where W: Write { + stream: W, + level: u8, +} + +trait HTMLRender { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write; +} + +macro_rules! impl_html_render_cat {($cat:ident { $($member:ident),+ }) => { + impl HTMLRender for c::$cat { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + match self {$( + c::$cat::$member(elem) => (**elem).render_html(renderer), + )+} + } + } +}} + +macro_rules! impl_html_render_simple { + ( + $type1:ident => $tag1:ident $( [$($post1:tt)+] )?, + $( $type:ident => $tag:ident $( [$($post:tt)+] )? ),+ + ) => { + impl_html_render_simple!($type1 => $tag1 $([$($post1)+])?); + $( impl_html_render_simple!($type => $tag $([$($post)+])?); )+ + }; + ( $type:ident => $tag:ident ) => { + impl_html_render_simple!($type => $tag[""]); + }; + ( $type:ident => $tag:ident [ $post:expr ] ) => { + impl HTMLRender for e::$type { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + let multiple_children = self.children().len() > 1; + write!(renderer.stream, "<{}>", stringify!($tag))?; + if multiple_children { write!(renderer.stream, $post)?; } + for c in self.children() { + (*c).render_html(renderer)?; + if multiple_children { write!(renderer.stream, $post)?; } + } + write!(renderer.stream, "", stringify!($tag))?; + Ok(()) + } + } + }; +} + +macro_rules! impl_html_render_simple_nochildren {( $($type:ident => $tag:ident),+ ) => { $( + impl HTMLRender for e::$type { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + write!(renderer.stream, "<{0}>", stringify!($tag))?; + Ok(()) + } + } +)+ }} + +// Impl + +impl HTMLRender for Document { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + writeln!(renderer.stream, "")?; + for c in self.children() { + (*c).render_html(renderer)?; + writeln!(renderer.stream)?; + } + writeln!(renderer.stream, "")?; + Ok(()) + } +} + +impl_html_render_cat!(StructuralSubElement { Title, Subtitle, Decoration, Docinfo, SubStructure }); +impl_html_render_simple!(Subtitle => h2); + +impl HTMLRender for e::Title { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + let level = if renderer.level > 6 { 6 } else { renderer.level }; + write!(renderer.stream, "", level)?; + for c in self.children() { + (*c).render_html(renderer)?; + } + write!(renderer.stream, "", level)?; + Ok(()) + } +} + +impl HTMLRender for e::Docinfo { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + // Like “YAML frontmatter” in Markdown + unimplemented!(); + } +} + +impl HTMLRender for e::Decoration { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + // Header or footer + unimplemented!(); + } +} + +impl_html_render_cat!(SubStructure { Topic, Sidebar, Transition, Section, BodyElement }); +impl_html_render_simple!(Sidebar => aside); + +impl HTMLRender for e::Section { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + renderer.level += 1; + writeln!(renderer.stream, "
", self.ids()[0].0)?; + for c in self.children() { + (*c).render_html(renderer)?; + writeln!(renderer.stream)?; + } + write!(renderer.stream, "
")?; + Ok(()) + } +} + +impl HTMLRender for e::Transition { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + write!(renderer.stream, "
")?; + Ok(()) + } +} + +impl HTMLRender for e::Topic { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + // A mini section with title + unimplemented!(); + } +} + +impl_html_render_cat!(BodyElement { Paragraph, LiteralBlock, DoctestBlock, MathBlock, Rubric, SubstitutionDefinition, Comment, Pending, Target, Raw, Image, Compound, Container, BulletList, EnumeratedList, DefinitionList, FieldList, OptionList, LineBlock, BlockQuote, Admonition, Attention, Hint, Note, Caution, Danger, Error, Important, Tip, Warning, Footnote, Citation, SystemMessage, Figure, Table }); +impl_html_render_simple!(Paragraph => p, LiteralBlock => pre, MathBlock => math, Rubric => a, Compound => p, Container => div, BulletList => ul["\n"], EnumeratedList => ol["\n"], DefinitionList => dl["\n"], FieldList => dl["\n"], OptionList => pre, LineBlock => div["\n"], BlockQuote => blockquote, Admonition => aside, Attention => aside, Hint => aside, Note => aside, Caution => aside, Danger => aside, Error => aside, Important => aside, Tip => aside, Warning => aside, Figure => figure); +impl_html_render_simple_nochildren!(Table => table); //TODO: after implementing the table, move it to elems with children + +//impl HTMLRender for I where I: e::Element + a::ExtraAttributes +macro_rules! impl_render_html_image { ($t:ty) => { impl HTMLRender for $t { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + let extra = self.extra(); + if let Some(ref target) = extra.target { + write!(renderer.stream, "
", escape_html(target.as_str()))?; + } + write!(renderer.stream, " + // TODO: height: Option + // TODO: width: Option + // TODO: scale: Option + write!(renderer.stream, " src=\"{}\" />", escape_html(extra.uri.as_str()))?; + if extra.target.is_some() { + write!(renderer.stream, "")?; + } + Ok(()) + } +}}} +impl_render_html_image!(e::Image); +impl_render_html_image!(e::ImageInline); + +impl HTMLRender for e::DoctestBlock { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + // TODO + unimplemented!(); + } +} + +impl HTMLRender for e::SubstitutionDefinition { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + // TODO: Should those be removed after resolving them + Ok(()) + } +} + +impl HTMLRender for e::Comment { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + write!(renderer.stream, "")?; + Ok(()) + } +} + +impl HTMLRender for e::Pending { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + // Will those be resolved by the time we get here? + unimplemented!(); + } +} + +impl HTMLRender for e::Target { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + // Should be resolved by now + Ok(()) + } +} + +impl HTMLRender for e::Raw { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + for c in self.children() { + write!(renderer.stream, "{}", c)?; + } + Ok(()) + } +} + +impl HTMLRender for e::Footnote { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + unimplemented!(); + } +} + +impl HTMLRender for e::Citation { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + unimplemented!(); + } +} + +impl HTMLRender for e::SystemMessage { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + write!(renderer.stream, "
System Message")?; + for c in self.children() { + (*c).render_html(renderer)?; + } + write!(renderer.stream, "
")?; + Ok(()) + } +} + +impl_html_render_cat!(TextOrInlineElement { String, Emphasis, Strong, Literal, Reference, FootnoteReference, CitationReference, SubstitutionReference, TitleReference, Abbreviation, Acronym, Superscript, Subscript, Inline, Problematic, Generated, Math, TargetInline, RawInline, ImageInline }); +impl_html_render_simple!(Emphasis => em, Strong => strong, Literal => code, FootnoteReference => a, CitationReference => a, TitleReference => a, Abbreviation => abbr, Acronym => acronym, Superscript => sup, Subscript => sub, Inline => span, Math => math, TargetInline => a); + +impl HTMLRender for String { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + write!(renderer.stream, "{}", escape_html(self))?; + Ok(()) + } +} + +impl HTMLRender for e::Reference { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + let extra = self.extra(); + write!(renderer.stream, "")?; + for c in self.children() { + (*c).render_html(renderer)?; + } + write!(renderer.stream, "")?; + Ok(()) + } +} + +impl HTMLRender for e::SubstitutionReference { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + // Will those be resolved by the time we get here? + unimplemented!(); + } +} + +impl HTMLRender for e::Problematic { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + // Broken inline markup leads to insertion of this in docutils + unimplemented!(); + } +} + +impl HTMLRender for e::Generated { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + // Section numbers and so on + unimplemented!(); + } +} + +impl HTMLRender for e::RawInline { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + for c in self.children() { + write!(renderer.stream, "{}", c)?; + } + Ok(()) + } +} + + +//--------------\\ +//Content Models\\ +//--------------\\ + +impl_html_render_cat!(SubTopic { Title, BodyElement }); +impl_html_render_cat!(SubSidebar { Topic, Title, Subtitle, BodyElement }); +impl_html_render_simple!(ListItem => li); + +impl HTMLRender for e::DefinitionListItem { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + // Term→dt, Definition→dd, Classifier→??? + unimplemented!(); + } +} + +impl HTMLRender for e::Field { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + // FieldName→dt, FieldBody→dd + unimplemented!(); + } +} + +impl HTMLRender for e::OptionListItem { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + // OptionGroup→dt(s), Description→dd + unimplemented!(); + } +} + +impl_html_render_cat!(SubLineBlock { LineBlock, Line }); + +impl HTMLRender for e::Line { + fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + for c in self.children() { + (*c).render_html(renderer)?; + } + write!(renderer.stream, "
")?; + Ok(()) + } +} + +impl_html_render_cat!(SubBlockQuote { Attribution, BodyElement }); +impl_html_render_simple!(Attribution => cite); //TODO: correct? + +impl_html_render_cat!(SubFigure { Caption, Legend, BodyElement }); +impl_html_render_simple!(Caption => caption); + +impl HTMLRender for e::Legend { + fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { + unimplemented!(); + } +} + +//------------\\ +//Things to do\\ +//------------\\ + +//TODO: prettyprint option list +//TODO: render admonitions: Admonition, Attention, Hint, Note, Caution, Danger, Error, Important, Tip, Warning +//TODO: properly render tables + +//TODO: add reference target: FootnoteReference, CitationReference, TitleReference +//TODO: add title: Abbr, Acronym +//TODO: convert math, set display attr +//TODO: add id: Rubric, Target, TargetInline diff --git a/renderer/src/html/tests.rs b/renderer/src/html/tests.rs new file mode 100644 index 0000000..8477699 --- /dev/null +++ b/renderer/src/html/tests.rs @@ -0,0 +1,275 @@ +use pretty_assertions::assert_eq; + +use rst_parser::parse; + +use crate::html::render_html; + +fn check_renders_to(rst: &str, expected: &str) { + println!("Rendering:\n{}\n---", rst); + let doc = parse(rst).expect("Cannot parse"); + let mut result_data: Vec = vec![]; + render_html(&doc, &mut result_data, false).expect("Render error"); + let result = String::from_utf8(result_data).expect("Could not decode"); + assert_eq!(result.as_str().trim(), expected); +} + +#[test] +fn simple_string() { + check_renders_to( + "Simple String", + "

Simple String

", + ); +} + +#[test] +fn simple_string_with_markup() { + check_renders_to( + "Simple String with *emph* and **strong**", + "

Simple String with emph and strong

", + ); +} + +#[test] +fn inline_literal() { + check_renders_to( + "Simple String with an even simpler ``inline literal``", + "

Simple String with an even simpler inline literal

", + ); +} + +/* +#[test] +fn test_reference_anonymous() { + check_renders_to("\ +A simple `anonymous reference`__ + +__ http://www.test.com/test_url +", "\ +

A simple anonymous reference

\ +"); +} +*/ + +#[test] +fn two_paragraphs() { + check_renders_to( + "One paragraph.\n\nTwo paragraphs.", + "

One paragraph.

\n

Two paragraphs.

", + ); +} + +#[test] +fn named_reference() { + check_renders_to("\ +A simple `named reference`_ with stuff in between the +reference and the target. + +.. _`named reference`: http://www.test.com/test_url +", "\ +

A simple named reference with stuff in between the \ +reference and the target.

\ +"); +} + +#[test] +fn substitution() { + check_renders_to("\ +A |subst|. + +.. |subst| replace:: text substitution +", "

A text substitution.

"); +} + +/* +#[test] +fn test_section_hierarchy() { + check_renders_to("\ ++++++ +Title ++++++ + +Subtitle +======== + +Some stuff + +Section +------- + +Some more stuff + +Another Section +............... + +And even more stuff +", "\ +

Some stuff

+
+

Section

+

Some more stuff

+
+

Another Section

+

And even more stuff

+
+
\ +"); +} + +#[test] +fn test_docinfo_title() { + check_renders_to("\ ++++++ +Title ++++++ + +:author: me + +Some stuff +", "\ +
+

Title

+
+
Author
+

me

+
+

Some stuff

+
\ +"); +} +*/ + +#[test] +fn section_hierarchy() { + check_renders_to("\ ++++++ +Title ++++++ + +Not A Subtitle +============== + +Some stuff + +Section +------- + +Some more stuff + +Another Section +............... + +And even more stuff +", "\ +
+

Title

+
+

Not A Subtitle

+

Some stuff

+
+

Section

+

Some more stuff

+
+

Another Section

+

And even more stuff

+
+
+
+
\ +"); +} + +#[test] +fn bullet_list() { + check_renders_to("\ +* bullet +* list +", "\ +
    +
  • bullet

  • +
  • list

  • +
\ +"); +} + +/* +#[test] +fn test_table() { + check_renders_to("\ +.. table:: + :align: right + + +-----+-----+ + | 1 | 2 | + +-----+-----+ + | 3 | 4 | + +-----+-----+ +", "\ + ++++ + + + + + + + + +

1

2

3

4

\ +"); +} +*/ + +/* +#[test] +fn test_field_list() { + check_renders_to("\ +Not a docinfo. + +:This: .. _target: + + is +:a: +:simple: +:field: list +", "\ +

Not a docinfo.

+
+
This
+

is

+
+
a
+

+
simple
+

+
field
+

list

+
+
\ +"); +} +*/ + +/* +#[test] +fn test_field_list_long() { + check_renders_to("\ +Not a docinfo. + +:This is: a +:simple field list with loooong field: names +", "\ +

Not a docinfo.

+
+
This is
+

a

+
+
simple field list with loooong field
+

names

+
+
\ +"); +} +*/ diff --git a/renderer/src/lib.rs b/renderer/src/lib.rs new file mode 100644 index 0000000..4d6bfdb --- /dev/null +++ b/renderer/src/lib.rs @@ -0,0 +1,21 @@ +mod html; + + +use std::io::Write; + +use failure::Error; + +use document_tree::Document; + + +pub fn render_json(document: &Document, stream: W) -> Result<(), Error> where W: Write { + serde_json::to_writer(stream, &document)?; + Ok(()) +} + +pub fn render_xml(document: &Document, stream: W) -> Result<(), Error> where W: Write { + serde_xml_rs::to_writer(stream, &document).map_err(failure::SyncFailure::new)?; + Ok(()) +} + +pub use html::render_html; diff --git a/rst/Cargo.toml b/rst/Cargo.toml new file mode 100644 index 0000000..3d1d6f2 --- /dev/null +++ b/rst/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = 'rst' +version = '0.2.0' +authors = ['Philipp A. '] +edition = '2018' +description = 'a reStructuredText parser and renderer for the command line' +license = 'MIT OR Apache-2.0' + +documentation = 'https://flying-sheep.github.io/rust-rst' +homepage = 'https://github.com/flying-sheep/rust-rst' +repository = 'https://github.com/flying-sheep/rust-rst.git' + +[dependencies] +rst_renderer = { path = '../renderer' } +rst_parser = { path = '../parser' } + +quicli = '0.4.0' +structopt = '0.2.15' +clap = '2.32.0' diff --git a/rst/src/main.rs b/rst/src/main.rs new file mode 100644 index 0000000..3c0b8e5 --- /dev/null +++ b/rst/src/main.rs @@ -0,0 +1,47 @@ +use structopt::StructOpt; +use clap::arg_enum; +use quicli::{ + fs::read_file, + prelude::{CliResult,Verbosity}, +}; + +use rst_parser::parse; +use rst_renderer::{ + render_json, + render_xml, + render_html, +}; + +arg_enum! { + #[derive(Debug)] + #[allow(non_camel_case_types)] + enum Format { json, xml, html } +} + +#[derive(Debug, StructOpt)] +#[structopt(raw(setting = "structopt::clap::AppSettings::ColoredHelp"))] +struct Cli { + #[structopt( + long = "format", short = "f", default_value = "html", // xml is pretty defunct… + raw(possible_values = "&Format::variants()", case_insensitive = "true"), + )] + format: Format, + file: String, + #[structopt(flatten)] + verbosity: Verbosity, +} + +fn main() -> CliResult { + let args = Cli::from_args(); + args.verbosity.setup_env_logger("rst")?; + + let content = read_file(args.file)?; + let document = parse(&content)?; + let stdout = std::io::stdout(); + match args.format { + Format::json => render_json(&document, stdout)?, + Format::xml => render_xml (&document, stdout)?, + Format::html => render_html(&document, stdout, true)?, + } + Ok(()) +} diff --git a/src/bin.rs b/src/bin.rs deleted file mode 100644 index 394b416..0000000 --- a/src/bin.rs +++ /dev/null @@ -1,55 +0,0 @@ -#![recursion_limit="256"] - -pub mod document_tree; -pub mod parser; -pub mod renderer; -pub mod url; - - -use structopt::StructOpt; -use clap::{_clap_count_exprs, arg_enum}; -use quicli::{ - fs::read_file, - prelude::{CliResult,Verbosity}, -}; - -use self::parser::parse; -use self::renderer::{ - render_json, - render_xml, - render_html, -}; - -arg_enum! { - #[derive(Debug)] - #[allow(non_camel_case_types)] - enum Format { json, xml, html } -} - -#[derive(Debug, StructOpt)] -#[structopt(raw(setting = "structopt::clap::AppSettings::ColoredHelp"))] -struct Cli { - #[structopt( - long = "format", short = "f", default_value = "html", // xml is pretty defunct… - raw(possible_values = "&Format::variants()", case_insensitive = "true"), - )] - format: Format, - file: String, - #[structopt(flatten)] - verbosity: Verbosity, -} - -fn main() -> CliResult { - let args = Cli::from_args(); - args.verbosity.setup_env_logger("rst")?; - - let content = read_file(args.file)?; - let document = parse(&content)?; - let stdout = std::io::stdout(); - match args.format { - Format::json => render_json(&document, stdout)?, - Format::xml => render_xml (&document, stdout)?, - Format::html => render_html(&document, stdout, true)?, - } - Ok(()) -} diff --git a/src/document_tree.rs b/src/document_tree.rs deleted file mode 100644 index 0af47ba..0000000 --- a/src/document_tree.rs +++ /dev/null @@ -1,35 +0,0 @@ -///http://docutils.sourceforge.net/docs/ref/doctree.html -///serves as AST - -#[macro_use] -mod macro_util; - -pub mod elements; -pub mod element_categories; -pub mod extra_attributes; -pub mod attribute_types; - -pub use self::elements::*; //Element,CommonAttributes,HasExtraAndChildren -pub use self::extra_attributes::ExtraAttributes; -pub use self::element_categories::HasChildren; - -#[test] -fn test_imperative() { - let mut doc = Document::default(); - let mut title = Title::default(); - title.append_child("Hi"); - doc.append_child(title); - - println!("{:?}", doc); -} - -#[test] -fn test_descriptive() { - let doc = Document::with_children(vec![ - Title::with_children(vec![ - "Hi".into() - ]).into() - ]); - - println!("{:?}", doc); -} diff --git a/src/document_tree/attribute_types.rs b/src/document_tree/attribute_types.rs deleted file mode 100644 index 30f3767..0000000 --- a/src/document_tree/attribute_types.rs +++ /dev/null @@ -1,155 +0,0 @@ -use std::str::FromStr; - -use failure::{Error,bail,format_err}; -use serde_derive::Serialize; -use regex::Regex; - -use crate::url::Url; - -#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] -pub enum EnumeratedListType { - Arabic, - LowerAlpha, - UpperAlpha, - LowerRoman, - UpperRoman, -} - -#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] -pub enum FixedSpace { Default, Preserve } // yes, default really is not “Default” -impl Default for FixedSpace { fn default() -> FixedSpace { FixedSpace::Preserve } } - -#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub enum AlignH { Left, Center, Right} -#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub enum AlignHV { Top, Middle, Bottom, Left, Center, Right } -#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub enum AlignV { Top, Middle, Bottom } - -#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub enum TableAlignH { Left, Right, Center, Justify, Char } -#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub enum TableBorder { Top, Bottom, TopBottom, All, Sides, None } - -#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub struct ID(pub String); -#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub struct NameToken(pub String); - -// The table DTD has the cols attribute of tgroup as required, but having -// TableGroupCols not implement Default would leave no possible implementation -// for TableGroup::with_children. -#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub struct TableGroupCols(pub usize); -impl Default for TableGroupCols { - fn default() -> Self { - TableGroupCols(0) - } -} - -// no eq for f64 -#[derive(Debug,PartialEq,Serialize,Clone)] -pub enum Measure { // http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#length-units - Em(f64), - Ex(f64), - Mm(f64), - Cm(f64), - In(f64), - Px(f64), - Pt(f64), - Pc(f64), -} - -impl FromStr for AlignHV { - type Err = Error; - fn from_str(s: &str) -> Result { - use self::AlignHV::*; - Ok(match s { - "top" => Top, - "middle" => Middle, - "bottom" => Bottom, - "left" => Left, - "center" => Center, - "right" => Right, - s => bail!("Invalid Alignment {}", s), - }) - } -} - -impl From<&str> for ID { - fn from(s: &str) -> Self { - ID(s.to_owned().replace(' ', "-")) - } -} - -impl From<&str> for NameToken { - fn from(s: &str) -> Self { - NameToken(s.to_owned()) - } -} - -impl FromStr for Measure { - type Err = Error; - fn from_str(s: &str) -> Result { - use self::Measure::*; - let re = Regex::new(r"(?P\d+\.\d*|\.?\d+)\s*(?Pem|ex|mm|cm|in|px|pt|pc)").unwrap(); - let caps: regex::Captures = re.captures(s).ok_or_else(|| format_err!("Invalid measure"))?; - let value: f64 = caps["float"].parse()?; - Ok(match &caps["unit"] { - "em" => Em(value), - "ex" => Ex(value), - "mm" => Mm(value), - "cm" => Cm(value), - "in" => In(value), - "px" => Px(value), - "pt" => Pt(value), - "pc" => Pc(value), - _ => unreachable!(), - }) - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_parse_measure() { - let _a: Measure = "1.5em".parse().unwrap(); - let _b: Measure = "20 mm".parse().unwrap(); - let _c: Measure = ".5in".parse().unwrap(); - let _d: Measure = "1.pc".parse().unwrap(); - } -} - -pub(crate) trait CanBeEmpty { - fn is_empty(&self) -> bool; -} - -/* Specialization necessary -impl CanBeEmpty for T { - fn is_empty(&self) -> bool { false } -} -*/ -macro_rules! impl_cannot_be_empty { - ($t:ty) => { - impl CanBeEmpty for $t { - fn is_empty(&self) -> bool { false } - } - }; - ($t:ty, $($ts:ty),*) => { - impl_cannot_be_empty!($t); - impl_cannot_be_empty!($($ts),*); - }; -} -impl_cannot_be_empty!(Url); -impl_cannot_be_empty!(TableGroupCols); - -impl CanBeEmpty for Option { - fn is_empty(&self) -> bool { self.is_none() } -} - -impl CanBeEmpty for Vec { - fn is_empty(&self) -> bool { self.is_empty() } -} - -impl CanBeEmpty for bool { - fn is_empty(&self) -> bool { !self } -} - -impl CanBeEmpty for FixedSpace { - fn is_empty(&self) -> bool { self == &FixedSpace::default() } -} - diff --git a/src/document_tree/element_categories.rs b/src/document_tree/element_categories.rs deleted file mode 100644 index db3f420..0000000 --- a/src/document_tree/element_categories.rs +++ /dev/null @@ -1,130 +0,0 @@ -use std::fmt::{self,Debug,Formatter}; - -use serde_derive::Serialize; - -use super::elements::*; - -pub trait HasChildren { - fn with_children(children: Vec) -> Self; - fn children(&self) -> &Vec; - fn children_mut(&mut self) -> &mut Vec; - fn append_child>(&mut self, child: R) { - self.children_mut().push(child.into()); - } - fn append_children + Clone>(&mut self, more: &[R]) { - let children = self.children_mut(); - children.reserve(more.len()); - for child in more { - children.push(child.clone().into()); - } - } -} - -macro_rules! impl_into { - ([ $( (($subcat:ident :: $entry:ident), $supcat:ident), )+ ]) => { - $( impl_into!($subcat::$entry => $supcat); )+ - }; - ($subcat:ident :: $entry:ident => $supcat:ident ) => { - impl Into<$supcat> for $entry { - fn into(self) -> $supcat { - $supcat::$subcat(Box::new(self.into())) - } - } - }; -} - -macro_rules! synonymous_enum { - ( $subcat:ident : $($supcat:ident),+ ; $midcat:ident : $supsupcat:ident { $($entry:ident),+ $(,)* } ) => { - synonymous_enum!($subcat : $( $supcat ),+ , $midcat { $($entry,)* }); - $( impl_into!($midcat::$entry => $supsupcat); )+ - }; - ( $subcat:ident : $($supcat:ident),+ { $($entry:ident),+ $(,)* } ) => { - synonymous_enum!($subcat { $( $entry, )* }); - cartesian!(impl_into, [ $( ($subcat::$entry) ),+ ], [ $($supcat),+ ]); - }; - ( $name:ident { $( $entry:ident ),+ $(,)* } ) => { - #[derive(PartialEq,Serialize,Clone)] - pub enum $name { $( - $entry(Box<$entry>), - )* } - - impl Debug for $name { - fn fmt(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> { - match *self { - $( $name::$entry(ref inner) => inner.fmt(fmt), )* - } - } - } - - $( impl Into<$name> for $entry { - fn into(self) -> $name { - $name::$entry(Box::new(self)) - } - } )* - }; -} - -synonymous_enum!(StructuralSubElement { Title, Subtitle, Decoration, Docinfo, SubStructure }); -synonymous_enum!(SubStructure: StructuralSubElement { Topic, Sidebar, Transition, Section, BodyElement }); -synonymous_enum!(BodyElement: SubTopic, SubSidebar, SubBlockQuote, SubFootnote, SubFigure; SubStructure: StructuralSubElement { - //Simple - Paragraph, LiteralBlock, DoctestBlock, MathBlock, Rubric, SubstitutionDefinition, Comment, Pending, Target, Raw, Image, - //Compound - Compound, Container, - BulletList, EnumeratedList, DefinitionList, FieldList, OptionList, - LineBlock, BlockQuote, Admonition, Attention, Hint, Note, Caution, Danger, Error, Important, Tip, Warning, Footnote, Citation, SystemMessage, Figure, Table -}); - -synonymous_enum!(BibliographicElement { Author, Authors, Organization, Address, Contact, Version, Revision, Status, Date, Copyright, Field }); - -synonymous_enum!(TextOrInlineElement { - String, Emphasis, Strong, Literal, Reference, FootnoteReference, CitationReference, SubstitutionReference, TitleReference, Abbreviation, Acronym, Superscript, Subscript, Inline, Problematic, Generated, Math, - //also have non-inline versions. Inline image is no figure child, inline target has content - TargetInline, RawInline, ImageInline -}); - -//--------------\\ -//Content Models\\ -//--------------\\ - -synonymous_enum!(AuthorInfo { Author, Organization, Address, Contact }); -synonymous_enum!(DecorationElement { Header, Footer }); -synonymous_enum!(SubTopic { Title, BodyElement }); -synonymous_enum!(SubSidebar { Topic, Title, Subtitle, BodyElement }); -synonymous_enum!(SubDLItem { Term, Classifier, Definition }); -synonymous_enum!(SubField { FieldName, FieldBody }); -synonymous_enum!(SubOptionListItem { OptionGroup, Description }); -synonymous_enum!(SubOption { OptionString, OptionArgument }); -synonymous_enum!(SubLineBlock { LineBlock, Line }); -synonymous_enum!(SubBlockQuote { Attribution, BodyElement }); -synonymous_enum!(SubFootnote { Label, BodyElement }); -synonymous_enum!(SubFigure { Caption, Legend, BodyElement }); -synonymous_enum!(SubTable { Title, TableGroup }); -synonymous_enum!(SubTableGroup { TableColspec, TableHead, TableBody }); - -#[cfg(test)] -mod test { - use std::default::Default; - use super::*; - - #[test] - fn test_convert_basic() { - let _: BodyElement = Paragraph::default().into(); - } - - #[test] - fn test_convert_more() { - let _: SubStructure = Paragraph::default().into(); - } - - #[test] - fn test_convert_even_more() { - let _: StructuralSubElement = Paragraph::default().into(); - } - - #[test] - fn test_convert_super() { - let be: BodyElement = Paragraph::default().into(); - let _: StructuralSubElement = be.into(); - } -} diff --git a/src/document_tree/element_types.rs b/src/document_tree/element_types.rs deleted file mode 100644 index 429573e..0000000 --- a/src/document_tree/element_types.rs +++ /dev/null @@ -1,96 +0,0 @@ - -// enum ElementType { -// //structual elements -// Section, Topic, Sidebar, -// -// //structural subelements -// Title, Subtitle, Decoration, Docinfo, Transition, -// -// //bibliographic elements -// Author, Authors, Organization, -// Address { space: FixedSpace }, -// Contact, Version, Revision, Status, -// Date, Copyright, Field, -// -// //decoration elements -// Header, Footer, -// -// //simple body elements -// Paragraph, -// LiteralBlock { space: FixedSpace }, -// DoctestBlock { space: FixedSpace }, -// MathBlock, Rubric, -// SubstitutionDefinition { ltrim: bool, rtrim: bool }, -// Comment { space: FixedSpace }, -// Pending, -// Target { refuri: Url, refid: ID, refname: Vec, anonymous: bool }, -// Raw { space: FixedSpace, format: Vec }, -// Image { -// align: AlignHV, -// uri: Url, -// alt: String, -// height: Measure, -// width: Measure, -// scale: f64, -// }, -// -// //compound body elements -// Compound, Container, -// -// BulletList { bullet: String }, -// EnumeratedList { enumtype: EnumeratedListType, prefix: String, suffix: String }, -// DefinitionList, FieldList, OptionList, -// -// LineBlock, BlockQuote, -// Admonition, Attention, Hint, Note, -// Caution, Danger, Error, Important, -// Tip, Warning, -// Footnote { backrefs: Vec, auto: bool }, -// Citation { backrefs: Vec }, -// SystemMessage { backrefs: Vec, level: usize, line: usize, type_: NameToken }, -// Figure { align: AlignH, width: usize }, -// Table, //TODO: Table -// -// //body sub elements -// ListItem, -// -// DefinitionListItem, Term, -// Classifier, Definition, -// -// FieldName, FieldBody, -// -// OptionListItem, OptionGroup, Description, Option_, OptionString, -// OptionArgument { delimiter: String }, -// -// Line, Attribution, Label, -// -// Caption, Legend, -// -// //inline elements -// Emphasis, Strong, Literal, -// Reference { name: String, refuri: Url, refid: ID, refname: Vec }, -// FootnoteReference { refid: ID, refname: Vec, auto: bool }, -// CitationReference { refid: ID, refname: Vec }, -// SubstitutionReference { refname: Vec }, -// TitleReference, -// Abbreviation, Acronym, -// Superscript, Subscript, -// Inline, -// Problematic { refid: ID }, -// Generated, Math, -// -// //also have non-inline versions. Inline image is no figure child, inline target has content -// TargetInline { refuri: Url, refid: ID, refname: Vec, anonymous: bool }, -// RawInline { space: FixedSpace, format: Vec }, -// ImageInline { -// align: AlignHV, -// uri: Url, -// alt: String, -// height: Measure, -// width: Measure, -// scale: f64, -// }, -// -// //text element -// TextElement, -// } diff --git a/src/document_tree/elements.rs b/src/document_tree/elements.rs deleted file mode 100644 index 7406cd7..0000000 --- a/src/document_tree/elements.rs +++ /dev/null @@ -1,288 +0,0 @@ -use std::path::PathBuf; -use serde_derive::Serialize; - -use super::attribute_types::{CanBeEmpty,ID,NameToken}; -use super::extra_attributes::{self,ExtraAttributes}; -use super::element_categories::*; - - -//-----------------\\ -//Element hierarchy\\ -//-----------------\\ - -pub trait Element { - /// A list containing one or more unique identifier keys - fn ids (& self) -> & Vec; - fn ids_mut(&mut self) -> &mut Vec; - /// a list containing the names of an element, typically originating from the element's title or content. - /// Each name in names must be unique; if there are name conflicts (two or more elements want to the same name), - /// the contents will be transferred to the dupnames attribute on the duplicate elements. - /// An element may have at most one of the names or dupnames attributes, but not both. - fn names (& self) -> & Vec; - fn names_mut(&mut self) -> &mut Vec; - fn source (& self) -> & Option; - fn source_mut(&mut self) -> &mut Option; - fn classes (& self) -> & Vec; - fn classes_mut(&mut self) -> &mut Vec; -} - -#[derive(Debug,Default,PartialEq,Serialize,Clone)] -pub struct CommonAttributes { - #[serde(skip_serializing_if = "CanBeEmpty::is_empty")] - ids: Vec, - #[serde(skip_serializing_if = "CanBeEmpty::is_empty")] - names: Vec, - #[serde(skip_serializing_if = "CanBeEmpty::is_empty")] - source: Option, - #[serde(skip_serializing_if = "CanBeEmpty::is_empty")] - classes: Vec, - //TODO: dupnames -} - -//----\\ -//impl\\ -//----\\ - -macro_rules! impl_element { ($name:ident) => ( - impl Element for $name { - fn ids (& self) -> & Vec { & self.common.ids } - fn ids_mut(&mut self) -> &mut Vec { &mut self.common.ids } - fn names (& self) -> & Vec { & self.common.names } - fn names_mut(&mut self) -> &mut Vec { &mut self.common.names } - fn source (& self) -> & Option { & self.common.source } - fn source_mut(&mut self) -> &mut Option { &mut self.common.source } - fn classes (& self) -> & Vec { & self.common.classes } - fn classes_mut(&mut self) -> &mut Vec { &mut self.common.classes } - } -)} - -macro_rules! impl_children { ($name:ident, $childtype:ident) => ( - impl HasChildren<$childtype> for $name { - #[allow(clippy::needless_update)] - fn with_children(children: Vec<$childtype>) -> $name { $name { children: children, ..Default::default() } } - fn children (& self) -> & Vec<$childtype> { & self.children } - fn children_mut(&mut self) -> &mut Vec<$childtype> { &mut self.children } - } -)} - -macro_rules! impl_extra { ($name:ident $($more:tt)*) => ( - impl ExtraAttributes for $name { - #[allow(clippy::needless_update)] - fn with_extra(extra: extra_attributes::$name) -> $name { $name { common: Default::default(), extra: extra $($more)* } } - fn extra (& self) -> & extra_attributes::$name { & self.extra } - fn extra_mut(&mut self) -> &mut extra_attributes::$name { &mut self.extra } - } -)} - -trait HasExtraAndChildren { - fn with_extra_and_children(extra: A, children: Vec) -> Self; -} - -impl HasExtraAndChildren for T where T: HasChildren + ExtraAttributes { - #[allow(clippy::needless_update)] - fn with_extra_and_children(extra: A, mut children: Vec) -> Self { - let mut r = Self::with_extra(extra); - r.children_mut().extend(children.drain(..)); - r - } -} - -macro_rules! impl_new {( - $(#[$attr:meta])* - pub struct $name:ident { $( - $(#[$fattr:meta])* - $field:ident : $typ:path - ),* $(,)* } -) => ( - $(#[$attr])* - #[derive(Debug,PartialEq,Serialize,Clone)] - pub struct $name { $( - $(#[$fattr])* $field: $typ, - )* } - impl $name { - pub fn new( $( $field: $typ, )* ) -> $name { $name { $( $field: $field, )* } } - } -)} - -macro_rules! impl_elem { - ($name:ident) => { - impl_new!(#[derive(Default)] pub struct $name { - #[serde(flatten)] common: CommonAttributes, - }); - impl_element!($name); - }; - ($name:ident; +) => { - impl_new!(#[derive(Default)] pub struct $name { - #[serde(flatten)] common: CommonAttributes, - #[serde(flatten)] extra: extra_attributes::$name, - }); - impl_element!($name); impl_extra!($name, ..Default::default()); - }; - ($name:ident; *) => { //same as above with no default - impl_new!(pub struct $name { - #[serde(flatten)] common: CommonAttributes, - #[serde(flatten)] extra: extra_attributes::$name - }); - impl_element!($name); impl_extra!($name); - }; - ($name:ident, $childtype:ident) => { - impl_new!(#[derive(Default)] pub struct $name { - #[serde(flatten)] common: CommonAttributes, - children: Vec<$childtype>, - }); - impl_element!($name); impl_children!($name, $childtype); - }; - ($name:ident, $childtype:ident; +) => { - impl_new!(#[derive(Default)] pub struct $name { - #[serde(flatten)] common: CommonAttributes, - #[serde(flatten)] extra: extra_attributes::$name, - children: Vec<$childtype>, - }); - impl_element!($name); impl_extra!($name, ..Default::default()); impl_children!($name, $childtype); - }; -} - -macro_rules! impl_elems { ( $( ($($args:tt)*) )* ) => ( - $( impl_elem!($($args)*); )* -)} - - -#[derive(Default,Debug,Serialize)] -pub struct Document { children: Vec } -impl_children!(Document, StructuralSubElement); - -impl_elems!( - //structual elements - (Section, StructuralSubElement) - (Topic, SubTopic) - (Sidebar, SubSidebar) - - //structural subelements - (Title, TextOrInlineElement) - (Subtitle, TextOrInlineElement) - (Decoration, DecorationElement) - (Docinfo, BibliographicElement) - (Transition) - - //bibliographic elements - (Author, TextOrInlineElement) - (Authors, AuthorInfo) - (Organization, TextOrInlineElement) - (Address, TextOrInlineElement; +) - (Contact, TextOrInlineElement) - (Version, TextOrInlineElement) - (Revision, TextOrInlineElement) - (Status, TextOrInlineElement) - (Date, TextOrInlineElement) - (Copyright, TextOrInlineElement) - (Field, SubField) - - //decoration elements - (Header, BodyElement) - (Footer, BodyElement) - - //simple body elements - (Paragraph, TextOrInlineElement) - (LiteralBlock, TextOrInlineElement; +) - (DoctestBlock, TextOrInlineElement; +) - (MathBlock, String) - (Rubric, TextOrInlineElement) - (SubstitutionDefinition, TextOrInlineElement; +) - (Comment, TextOrInlineElement; +) - (Pending) - (Target; +) - (Raw, String; +) - (Image; *) - - //compound body elements - (Compound, BodyElement) - (Container, BodyElement) - - (BulletList, ListItem; +) - (EnumeratedList, ListItem; +) - (DefinitionList, DefinitionListItem) - (FieldList, Field) - (OptionList, OptionListItem) - - (LineBlock, SubLineBlock) - (BlockQuote, SubBlockQuote) - (Admonition, SubTopic) - (Attention, BodyElement) - (Hint, BodyElement) - (Note, BodyElement) - (Caution, BodyElement) - (Danger, BodyElement) - (Error, BodyElement) - (Important, BodyElement) - (Tip, BodyElement) - (Warning, BodyElement) - (Footnote, SubFootnote; +) - (Citation, SubFootnote; +) - (SystemMessage, BodyElement; +) - (Figure, SubFigure; +) - (Table, SubTable; +) - - //table elements - (TableGroup, SubTableGroup; +) - (TableHead, TableRow; +) - (TableBody, TableRow; +) - (TableRow, TableEntry; +) - (TableEntry, BodyElement; +) - (TableColspec; +) - - //body sub elements - (ListItem, BodyElement) - - (DefinitionListItem, SubDLItem) - (Term, TextOrInlineElement) - (Classifier, TextOrInlineElement) - (Definition, BodyElement) - - (FieldName, TextOrInlineElement) - (FieldBody, BodyElement) - - (OptionListItem, SubOptionListItem) - (OptionGroup, Option_) - (Description, BodyElement) - (Option_, SubOption) - (OptionString, String) - (OptionArgument, String; +) - - (Line, TextOrInlineElement) - (Attribution, TextOrInlineElement) - (Label, TextOrInlineElement) - - (Caption, TextOrInlineElement) - (Legend, BodyElement) - - //inline elements - (Emphasis, TextOrInlineElement) - (Literal, TextOrInlineElement) - (Reference, TextOrInlineElement; +) - (Strong, TextOrInlineElement) - (FootnoteReference, TextOrInlineElement; +) - (CitationReference, TextOrInlineElement; +) - (SubstitutionReference, TextOrInlineElement; +) - (TitleReference, TextOrInlineElement) - (Abbreviation, TextOrInlineElement) - (Acronym, TextOrInlineElement) - (Superscript, TextOrInlineElement) - (Subscript, TextOrInlineElement) - (Inline, TextOrInlineElement) - (Problematic, TextOrInlineElement; +) - (Generated, TextOrInlineElement) - (Math, String) - - //also have non-inline versions. Inline image is no figure child, inline target has content - (TargetInline, String; +) - (RawInline, String; +) - (ImageInline; *) - - //text element = String -); - -impl<'a> From<&'a str> for TextOrInlineElement { - fn from(s: &'a str) -> Self { - s.to_owned().into() - } -} diff --git a/src/document_tree/extra_attributes.rs b/src/document_tree/extra_attributes.rs deleted file mode 100644 index 55896ab..0000000 --- a/src/document_tree/extra_attributes.rs +++ /dev/null @@ -1,112 +0,0 @@ -use serde_derive::Serialize; - -use crate::url::Url; -use super::attribute_types::{CanBeEmpty,FixedSpace,ID,NameToken,AlignHV,AlignH,AlignV,TableAlignH,TableBorder,TableGroupCols,Measure,EnumeratedListType}; - -pub trait ExtraAttributes { - fn with_extra(extra: A) -> Self; - fn extra (& self) -> & A; - fn extra_mut(&mut self) -> &mut A; -} - -macro_rules! impl_extra { - ( $name:ident { $( $(#[$pattr:meta])* $param:ident : $type:ty ),* $(,)* } ) => ( - impl_extra!( - #[derive(Default,Debug,PartialEq,Serialize,Clone)] - $name { $( $(#[$pattr])* $param : $type, )* } - ); - ); - ( $(#[$attr:meta])+ $name:ident { $( $(#[$pattr:meta])* $param:ident : $type:ty ),* $(,)* } ) => ( - $(#[$attr])+ - pub struct $name { $( - $(#[$pattr])* - #[serde(skip_serializing_if = "CanBeEmpty::is_empty")] - pub $param : $type, - )* } - ); -} - -impl_extra!(Address { space: FixedSpace }); -impl_extra!(LiteralBlock { space: FixedSpace }); -impl_extra!(DoctestBlock { space: FixedSpace }); -impl_extra!(SubstitutionDefinition { ltrim: bool, rtrim: bool }); -impl_extra!(Comment { space: FixedSpace }); -impl_extra!(Target { - /// External reference to a URI/URL - refuri: Option, - /// References to ids attributes in other elements - refid: Option, - /// Internal reference to the names attribute of another element. May resolve to either an internal or external reference. - refname: Vec, - anonymous: bool, -}); -impl_extra!(Raw { space: FixedSpace, format: Vec }); -impl_extra!(#[derive(Debug,PartialEq,Serialize,Clone)] Image { - uri: Url, - align: Option, - alt: Option, - height: Option, - width: Option, - scale: Option, - target: Option, // Not part of the DTD but a valid argument -}); - -//bools usually are XML yesorno. “auto” however either exists and is set to something random like “1” or doesn’t exist -//does auto actually mean the numbering prefix? - -impl_extra!(BulletList { bullet: Option }); -impl_extra!(EnumeratedList { enumtype: Option, prefix: Option, suffix: Option }); - -impl_extra!(Footnote { backrefs: Vec, auto: bool }); -impl_extra!(Citation { backrefs: Vec }); -impl_extra!(SystemMessage { backrefs: Vec, level: Option, line: Option, type_: Option }); -impl_extra!(Figure { align: Option, width: Option }); -impl_extra!(Table { frame: Option, colsep: Option, rowsep: Option, pgwide: Option }); - -impl_extra!(TableGroup { cols: TableGroupCols, colsep: Option, rowsep: Option, align: Option }); -impl_extra!(TableHead { valign: Option }); -impl_extra!(TableBody { valign: Option }); -impl_extra!(TableRow { rowsep: Option, valign: Option }); -impl_extra!(TableEntry { colname: Option, namest: Option, nameend: Option, morerows: Option, colsep: Option, rowsep: Option, align: Option, r#char: Option, charoff: Option, valign: Option, morecols: Option }); -impl_extra!(TableColspec { colnum: Option, colname: Option, colwidth: Option, colsep: Option, rowsep: Option, align: Option, r#char: Option, charoff: Option, stub: Option }); - -impl_extra!(OptionArgument { delimiter: Option }); - -impl_extra!(Reference { - name: Option, //TODO: is CDATA in the DTD, so maybe no nametoken? - /// External reference to a URI/URL - refuri: Option, - /// References to ids attributes in other elements - refid: Option, - /// Internal reference to the names attribute of another element - refname: Vec, -}); -impl_extra!(FootnoteReference { refid: Option, refname: Vec, auto: bool }); -impl_extra!(CitationReference { refid: Option, refname: Vec }); -impl_extra!(SubstitutionReference { refname: Vec }); -impl_extra!(Problematic { refid: Option }); - -//also have non-inline versions. Inline image is no figure child, inline target has content -impl_extra!(TargetInline { - /// External reference to a URI/URL - refuri: Option, - /// References to ids attributes in other elements - refid: Option, - /// Internal reference to the names attribute of another element. May resolve to either an internal or external reference. - refname: Vec, - anonymous: bool, -}); -impl_extra!(RawInline { space: FixedSpace, format: Vec }); -pub type ImageInline = Image; - -impl Image { - pub fn new(uri: Url) -> Image { Image { - uri, - align: None, - alt: None, - height: None, - width: None, - scale: None, - target: None, - } } -} diff --git a/src/document_tree/macro_util.rs b/src/document_tree/macro_util.rs deleted file mode 100644 index d9b8a3e..0000000 --- a/src/document_tree/macro_util.rs +++ /dev/null @@ -1,42 +0,0 @@ -macro_rules! cartesian_impl { - ($out:tt [] $b:tt $init_b:tt $submacro:tt) => { - $submacro!{$out} - }; - ($out:tt [$a:tt, $($at:tt)*] [] $init_b:tt $submacro:tt) => { - cartesian_impl!{$out [$($at)*] $init_b $init_b $submacro} - }; - ([$($out:tt)*] [$a:tt, $($at:tt)*] [$b:tt, $($bt:tt)*] $init_b:tt $submacro:tt) => { - cartesian_impl!{[$($out)* ($a, $b),] [$a, $($at)*] [$($bt)*] $init_b $submacro} - }; -} - -macro_rules! cartesian { - ( $submacro:tt, [$($a:tt)*], [$($b:tt)*]) => { - cartesian_impl!{[] [$($a)*,] [$($b)*,] [$($b)*,] $submacro} - }; -} - - -#[cfg(test)] -mod test { - macro_rules! print_cartesian { - ( [ $(($a1:tt, $a2:tt)),* , ] ) => { - fn test_f(x:i64, y:i64) -> Result<(i64, i64), ()> { - match (x, y) { - $( - ($a1, $a2) => { Ok(($a1, $a2)) } - )* - _ => { Err(()) } - } - } - }; - } - - #[test] - fn test_print_cartesian() { - cartesian!(print_cartesian, [1, 2, 3], [4, 5, 6]); - assert_eq!(test_f(1, 4), Ok((1, 4))); - assert_eq!(test_f(1, 3), Err(())); - assert_eq!(test_f(3, 5), Ok((3, 5))); - } -} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 6e39b1a..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,6 +0,0 @@ -#![recursion_limit="256"] - -pub mod document_tree; -pub mod parser; -pub mod renderer; -pub mod url; diff --git a/src/parser.rs b/src/parser.rs deleted file mode 100644 index 00c967d..0000000 --- a/src/parser.rs +++ /dev/null @@ -1,28 +0,0 @@ -pub mod token; -mod conversion; -mod simplify; -mod pest_rst; -mod pair_ext_parse; -#[cfg(test)] -pub mod tests; - -use failure::Error; -use pest::Parser; - -use crate::document_tree::Document; - -use self::pest_rst::{RstParser,Rule}; -use self::conversion::convert_document; -use self::simplify::resolve_references; - - -/// Parse into a document tree and resolve sections, but not references. -pub fn parse_only(source: &str) -> Result { - let pairs = RstParser::parse(Rule::document, source)?; - convert_document(pairs) -} - -/// Parse into a document tree and resolve sections and references. -pub fn parse(source: &str) -> Result { - parse_only(source).map(resolve_references) -} diff --git a/src/parser/conversion.rs b/src/parser/conversion.rs deleted file mode 100644 index f9e2a78..0000000 --- a/src/parser/conversion.rs +++ /dev/null @@ -1,165 +0,0 @@ -mod block; -mod inline; - -use failure::Error; -use pest::iterators::Pairs; - -use crate::document_tree::{ - Element,HasChildren, - elements as e, - element_categories as c, - attribute_types as at, -}; - -use super::pest_rst::Rule; - - -fn ssubel_to_section_unchecked_mut(ssubel: &mut c::StructuralSubElement) -> &mut e::Section { - match ssubel { - c::StructuralSubElement::SubStructure(ref mut b) => match **b { - c::SubStructure::Section(ref mut s) => s, - _ => unreachable!(), - }, - _ => unreachable!(), - } -} - - -fn get_level<'tl>(toplevel: &'tl mut Vec, section_idxs: &[Option]) -> &'tl mut Vec { - let mut level = toplevel; - for maybe_i in section_idxs { - if let Some(i) = *maybe_i { - level = ssubel_to_section_unchecked_mut(&mut level[i]).children_mut(); - } - } - level -} - - -pub fn convert_document(pairs: Pairs) -> Result { - use self::block::TitleOrSsubel::*; - - let mut toplevel: Vec = vec![]; - // The kinds of section titles encountered. - // `section_idx[x]` has the kind `kinds[x]`, but `kinds` can be longer - let mut kinds: Vec = vec![]; - // Recursive indices into the tree, pointing at the active sections. - // `None`s indicate skipped section levels: - // toplevel[section_idxs.flatten()[0]].children[section_idxs.flatten()[1]]... - let mut section_idxs: Vec> = vec![]; - - for pair in pairs { - if let Some(ssubel) = block::convert_ssubel(pair)? { match ssubel { - Title(title, kind) => { - match kinds.iter().position(|k| k == &kind) { - // Idx points to the level we want to add, - // so idx-1 needs to be the last valid index. - Some(idx) => { - // If idx < len: Remove found section and all below - section_idxs.truncate(idx); - // If idx > len: Add None for skipped levels - // TODO: test skipped levels - while section_idxs.len() < idx { section_idxs.push(None) } - }, - None => kinds.push(kind), - } - let super_level = get_level(&mut toplevel, §ion_idxs); - let slug = title.names().iter().next().map(|at::NameToken(name)| at::ID(name.to_owned())); - let mut section = e::Section::with_children(vec![title.into()]); - section.ids_mut().extend(slug.into_iter()); - super_level.push(section.into()); - section_idxs.push(Some(super_level.len() - 1)); - }, - Ssubel(elem) => get_level(&mut toplevel, §ion_idxs).push(elem), - }} - } - Ok(e::Document::with_children(toplevel)) -} - -/// Normalizes a name in terms of whitespace. Equivalent to docutils's -/// `docutils.nodes.whitespace_normalize_name`. -pub fn whitespace_normalize_name(name: &str) -> String { - // Python's string.split() defines whitespace differently than Rust does. - let split_iter = name.split( - |ch: char| ch.is_whitespace() || (ch >= '\x1C' && ch <= '\x1F') - ).filter(|split| !split.is_empty()); - let mut ret = String::new(); - for split in split_iter { - if !ret.is_empty() { - ret.push(' '); - } - ret.push_str(split); - } - ret -} - - -#[cfg(test)] -mod tests { - use crate::{ - parser::parse, - document_tree::{ - elements as e, - element_categories as c, - HasChildren, - } - }; - - fn ssubel_to_section(ssubel: &c::StructuralSubElement) -> &e::Section { - match ssubel { - c::StructuralSubElement::SubStructure(ref b) => match **b { - c::SubStructure::Section(ref s) => s, - ref c => panic!("Expected section, not {:?}", c), - }, - ref c => panic!("Expected SubStructure, not {:?}", c), - } - } - - const SECTIONS: &str = "\ -Intro before first section title - -Level 1 -******* - -------- -Level 2 -------- - -Level 3 -======= - -L1 again -******** - -L3 again, skipping L2 -===================== -"; - - #[test] - fn convert_skipped_section() { - let doctree = parse(SECTIONS).unwrap(); - let lvl0 = doctree.children(); - assert_eq!(lvl0.len(), 3, "Should be a paragraph and 2 sections: {:?}", lvl0); - - assert_eq!(lvl0[0], e::Paragraph::with_children(vec![ - "Intro before first section title".to_owned().into() - ]).into(), "The intro text should fit"); - - let lvl1a = ssubel_to_section(&lvl0[1]).children(); - assert_eq!(lvl1a.len(), 2, "The 1st lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1a); - //TODO: test title lvl1a[0] - let lvl2 = ssubel_to_section(&lvl1a[1]).children(); - assert_eq!(lvl2.len(), 2, "The lvl2 section should have (a title and) a single lvl3 section as child: {:?}", lvl2); - //TODO: test title lvl2[0] - let lvl3a = ssubel_to_section(&lvl2[1]).children(); - assert_eq!(lvl3a.len(), 1, "The 1st lvl3 section should just a title: {:?}", lvl3a); - //TODO: test title lvl3a[0] - - let lvl1b = ssubel_to_section(&lvl0[2]).children(); - assert_eq!(lvl1b.len(), 2, "The 2nd lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1b); - //TODO: test title lvl1b[0] - let lvl3b = ssubel_to_section(&lvl1b[1]).children(); - assert_eq!(lvl3b.len(), 1, "The 2nd lvl3 section should have just a title: {:?}", lvl3b); - //TODO: test title lvl3b[0] - } -} diff --git a/src/parser/conversion/block.rs b/src/parser/conversion/block.rs deleted file mode 100644 index b14c2b5..0000000 --- a/src/parser/conversion/block.rs +++ /dev/null @@ -1,202 +0,0 @@ -use failure::{Error,bail}; -use pest::iterators::Pair; - -use crate::document_tree::{ - Element,HasChildren,ExtraAttributes, - elements as e, - element_categories as c, - extra_attributes as a, - attribute_types as at -}; - -use crate::parser::{ - pest_rst::Rule, - pair_ext_parse::PairExt, -}; -use super::{whitespace_normalize_name, inline::convert_inlines}; - - -#[derive(PartialEq)] -pub(super) enum TitleKind { Double(char), Single(char) } - -pub(super) enum TitleOrSsubel { - Title(e::Title, TitleKind), - Ssubel(c::StructuralSubElement), -} - - -pub(super) fn convert_ssubel(pair: Pair) -> Result, Error> { - use self::TitleOrSsubel::*; - Ok(Some(match pair.as_rule() { - Rule::title => { let (t, k) = convert_title(pair)?; Title(t, k) }, - //TODO: subtitle, decoration, docinfo - Rule::EOI => return Ok(None), - _ => Ssubel(convert_substructure(pair)?.into()), - })) -} - - -fn convert_substructure(pair: Pair) -> Result { - Ok(match pair.as_rule() { - // todo: Topic, Sidebar, Transition - // no section here, as it’s constructed from titles - _ => convert_body_elem(pair)?.into(), - }) -} - - -fn convert_body_elem(pair: Pair) -> Result { - Ok(match pair.as_rule() { - Rule::paragraph => convert_paragraph(pair)?.into(), - Rule::target => convert_target(pair)?.into(), - Rule::substitution_def => convert_substitution_def(pair)?.into(), - Rule::admonition_gen => convert_admonition_gen(pair)?.into(), - Rule::image => convert_image::(pair)?.into(), - Rule::bullet_list => convert_bullet_list(pair)?.into(), - rule => unimplemented!("unhandled rule {:?}", rule), - }) -} - - -fn convert_title(pair: Pair) -> Result<(e::Title, TitleKind), Error> { - let mut title: Option = None; - let mut title_inlines: Option> = None; - let mut adornment_char: Option = None; - // title_double or title_single. Extract kind before consuming - let inner_pair = pair.into_inner().next().unwrap(); - let kind = inner_pair.as_rule(); - for p in inner_pair.into_inner() { - match p.as_rule() { - Rule::line => { - title = Some(p.as_str().to_owned()); - title_inlines = Some(convert_inlines(p)?); - }, - Rule::adornments => adornment_char = Some(p.as_str().chars().next().expect("Empty adornment?")), - rule => unimplemented!("Unexpected rule in title: {:?}", rule), - }; - } - // now we encountered one line of text and one of adornments - // TODO: emit error if the adornment line is too short (has to match title length) - let mut elem = e::Title::with_children(title_inlines.expect("No text in title")); - if let Some(title) = title { - //TODO: slugify properly - let slug = title.to_lowercase().replace("\n", "").replace(" ", "-"); - elem.names_mut().push(at::NameToken(slug)); - } - let title_kind = match kind { - Rule::title_double => TitleKind::Double(adornment_char.unwrap()), - Rule::title_single => TitleKind::Single(adornment_char.unwrap()), - _ => unreachable!(), - }; - Ok((elem, title_kind)) -} - - -fn convert_paragraph(pair: Pair) -> Result { - Ok(e::Paragraph::with_children(convert_inlines(pair)?)) -} - - -fn convert_target(pair: Pair) -> Result { - let mut elem: e::Target = Default::default(); - elem.extra_mut().anonymous = false; - for p in pair.into_inner() { - match p.as_rule() { - Rule::target_name_uq | Rule::target_name_qu => { - elem.ids_mut().push(p.as_str().into()); - elem.names_mut().push(p.as_str().into()); - }, - // TODO: also handle non-urls - Rule::link_target => elem.extra_mut().refuri = Some(p.parse()?), - rule => panic!("Unexpected rule in target: {:?}", rule), - } - } - Ok(elem) -} - -fn convert_substitution_def(pair: Pair) -> Result { - let mut pairs = pair.into_inner(); - let name = whitespace_normalize_name(pairs.next().unwrap().as_str()); // Rule::substitution_name - let inner_pair = pairs.next().unwrap(); - let inner: Vec = match inner_pair.as_rule() { - Rule::replace => convert_replace(inner_pair)?, - Rule::image => vec![convert_image::(inner_pair)?.into()], - rule => panic!("Unknown substitution rule {:?}", rule), - }; - let mut subst_def = e::SubstitutionDefinition::with_children(inner); - subst_def.names_mut().push(at::NameToken(name)); - Ok(subst_def) -} - -fn convert_replace(pair: Pair) -> Result, Error> { - let mut pairs = pair.into_inner(); - let paragraph = pairs.next().unwrap(); - convert_inlines(paragraph) -} - -fn convert_image(pair: Pair) -> Result where I: Element + ExtraAttributes { - let mut pairs = pair.into_inner(); - let mut image = I::with_extra(a::Image::new( - pairs.next().unwrap().as_str().trim().parse()?, // line - )); - for opt in pairs { - let mut opt_iter = opt.into_inner(); - let opt_name = opt_iter.next().unwrap(); - let opt_val = opt_iter.next().unwrap(); - match opt_name.as_str() { - "class" => image.classes_mut().push(opt_val.as_str().to_owned()), - "name" => image.names_mut().push(opt_val.as_str().into()), - "alt" => image.extra_mut().alt = Some(opt_val.as_str().to_owned()), - "height" => image.extra_mut().height = Some(opt_val.parse()?), - "width" => image.extra_mut().width = Some(opt_val.parse()?), - "scale" => image.extra_mut().scale = Some(parse_scale(&opt_val)?), - "align" => image.extra_mut().align = Some(opt_val.parse()?), - "target" => image.extra_mut().target = Some(opt_val.parse()?), - name => bail!("Unknown Image option {}", name), - } - } - Ok(image) -} - -fn parse_scale(pair: &Pair) -> Result { - let input = if pair.as_str().chars().rev().next() == Some('%') { &pair.as_str()[..pair.as_str().len()-1] } else { pair.as_str() }; - use pest::error::{Error,ErrorVariant}; - Ok(input.parse().map_err(|e: std::num::ParseIntError| { - let var: ErrorVariant = ErrorVariant::CustomError { message: e.to_string() }; - Error::new_from_span(var, pair.as_span()) - })?) -} - -fn convert_admonition_gen(pair: Pair) -> Result { - let mut iter = pair.into_inner(); - let typ = iter.next().unwrap().as_str(); - // TODO: in reality it contains body elements. - let children: Vec = iter.map(|p| e::Paragraph::with_children(vec![p.as_str().into()]).into()).collect(); - Ok(match typ { - "attention" => e::Attention::with_children(children).into(), - "hint" => e::Hint::with_children(children).into(), - "note" => e::Note::with_children(children).into(), - "caution" => e::Caution::with_children(children).into(), - "danger" => e::Danger::with_children(children).into(), - "error" => e::Error::with_children(children).into(), - "important" => e::Important::with_children(children).into(), - "tip" => e::Tip::with_children(children).into(), - "warning" => e::Warning::with_children(children).into(), - typ => panic!("Unknown admontion type {}!", typ), - }) -} - -fn convert_bullet_list(pair: Pair) -> Result { - Ok(e::BulletList::with_children(pair.into_inner().map(convert_bullet_item).collect::>()?)) -} - -fn convert_bullet_item(pair: Pair) -> Result { - let mut iter = pair.into_inner(); - let mut children: Vec = vec![ - convert_paragraph(iter.next().unwrap())?.into() - ]; - for p in iter { - children.push(convert_body_elem(p)?); - } - Ok(e::ListItem::with_children(children)) -} diff --git a/src/parser/conversion/inline.rs b/src/parser/conversion/inline.rs deleted file mode 100644 index b2fffa5..0000000 --- a/src/parser/conversion/inline.rs +++ /dev/null @@ -1,161 +0,0 @@ -use failure::Error; -use pest::iterators::Pair; - -use crate::document_tree::{ - HasChildren, - elements as e, - element_categories as c, - extra_attributes as a, - attribute_types as at, -}; - -use crate::parser::{ - pest_rst::Rule, -// pair_ext_parse::PairExt, -}; - -use crate::url::Url; -use super::whitespace_normalize_name; - - -pub fn convert_inline(pair: Pair) -> Result { - Ok(match pair.as_rule() { - Rule::str | Rule::str_nested => pair.as_str().into(), - Rule::ws_newline => " ".to_owned().into(), - Rule::reference => convert_reference(pair)?, - Rule::substitution_name => convert_substitution_ref(pair)?.into(), - Rule::emph => e::Emphasis::with_children(convert_inlines(pair)?).into(), - Rule::strong => e::Strong::with_children(convert_inlines(pair)?).into(), - Rule::literal => e::Literal::with_children(convert_inlines(pair)?).into(), - rule => unimplemented!("unknown rule {:?}", rule), - }) -} - -pub fn convert_inlines(pair: Pair) -> Result, Error> { - pair.into_inner().map(convert_inline).collect() -} - -fn convert_reference(pair: Pair) -> Result { - let name; - let refuri; - let refid; - let mut refname = vec![]; - let mut children: Vec = vec![]; - let concrete = pair.into_inner().next().unwrap(); - match concrete.as_rule() { - Rule::reference_target => { - let rt_inner = concrete.into_inner().next().unwrap(); // reference_target_uq or target_name_qu - match rt_inner.as_rule() { - Rule::reference_target_uq => { - refid = None; - name = Some(rt_inner.as_str().into()); - refuri = None; - refname.push(rt_inner.as_str().into()); - children.push(rt_inner.as_str().into()); - }, - Rule::reference_target_qu => { - let (text, reference) = { - let mut text = None; - let mut reference = None; - for inner in rt_inner.clone().into_inner() { - match inner.as_rule() { - Rule::reference_text => text = Some(inner), - Rule::reference_bracketed => reference = Some(inner), - _ => unreachable!() - } - } - (text, reference) - }; - let trimmed_text = match (&text, &reference) { - (Some(text), None) => text.as_str(), - (_, Some(reference)) => { - text - .map(|text| text.as_str().trim_end_matches(|ch| " \n\r".contains(ch))) - .filter(|text| !text.is_empty()) - .unwrap_or_else(|| reference.clone().into_inner().next().unwrap().as_str()) - } - (None, None) => unreachable!() - }; - refid = None; - name = Some(trimmed_text.into()); - refuri = if let Some(reference) = reference { - let inner = reference.into_inner().next().unwrap(); - match inner.as_rule() { - // The URL rules in our parser accept a narrow superset of - // valid URLs, so we need to handle false positives. - Rule::url => if let Ok(target) = Url::parse_absolute(inner.as_str()) { - Some(target) - } else if inner.as_str().ends_with('_') { - // like target_name_qu (minus the final underscore) - let full_str = inner.as_str(); - refname.push(full_str[0..full_str.len() - 1].into()); - None - } else { - // like relative_reference - Some(Url::parse_relative(inner.as_str())?) - }, - Rule::target_name_qu => { - refname.push(inner.as_str().into()); - None - }, - Rule::relative_reference => { - Some(Url::parse_relative(inner.as_str())?) - }, - _ => unreachable!() - } - } else { - refname.push(trimmed_text.into()); - None - }; - children.push(trimmed_text.into()); - }, - _ => unreachable!() - } - }, - Rule::reference_explicit => unimplemented!("explicit reference"), - Rule::reference_auto => { - let rt_inner = concrete.into_inner().next().unwrap(); - match rt_inner.as_rule() { - Rule::url_auto => match Url::parse_absolute(rt_inner.as_str()) { - Ok(target) => { - refuri = Some(target); - name = None; - refid = None; - children.push(rt_inner.as_str().into()); - }, - // if our parser got a URL wrong, return it as a string - Err(_) => return Ok(rt_inner.as_str().into()) - }, - Rule::email => { - let mailto_url = String::from("mailto:") + rt_inner.as_str(); - match Url::parse_absolute(&mailto_url) { - Ok(target) => { - refuri = Some(target); - name = None; - refid = None; - children.push(rt_inner.as_str().into()); - }, - // if our parser got a URL wrong, return it as a string - Err(_) => return Ok(rt_inner.as_str().into()) - } - }, - _ => unreachable!() - } - }, - _ => unreachable!(), - }; - Ok(e::Reference::new( - Default::default(), - a::Reference { name, refuri, refid, refname }, - children - ).into()) -} - -fn convert_substitution_ref(pair: Pair) -> Result { - let name = whitespace_normalize_name(pair.as_str()); - Ok(a::ExtraAttributes::with_extra( - a::SubstitutionReference { - refname: vec![at::NameToken(name)] - } - )) -} diff --git a/src/parser/pair_ext_parse.rs b/src/parser/pair_ext_parse.rs deleted file mode 100644 index a04b3dd..0000000 --- a/src/parser/pair_ext_parse.rs +++ /dev/null @@ -1,21 +0,0 @@ -use std::str::FromStr; - -use pest::Span; -use pest::iterators::Pair; -use pest::error::{Error,ErrorVariant}; - - -pub trait PairExt where R: pest::RuleType { - fn parse(&self) -> Result> where T: FromStr, E: ToString; -} - -impl<'l, R> PairExt for Pair<'l, R> where R: pest::RuleType { - fn parse(&self) -> Result> where T: FromStr, E: ToString { - self.as_str().parse().map_err(|e| to_parse_error(self.as_span(), &e)) - } -} - -pub(crate) fn to_parse_error(span: Span, e: &E) -> Error where E: ToString, R: pest::RuleType { - let var: ErrorVariant = ErrorVariant::CustomError { message: e.to_string() }; - Error::new_from_span(var, span) -} diff --git a/src/parser/pest_rst.rs b/src/parser/pest_rst.rs deleted file mode 100644 index 74199a8..0000000 --- a/src/parser/pest_rst.rs +++ /dev/null @@ -1,7 +0,0 @@ -#![allow(clippy::redundant_closure)] - -use pest_derive::Parser; - -#[derive(Parser)] -#[grammar = "rst.pest"] -pub struct RstParser; diff --git a/src/parser/simplify.rs b/src/parser/simplify.rs deleted file mode 100644 index cc169ee..0000000 --- a/src/parser/simplify.rs +++ /dev/null @@ -1,662 +0,0 @@ -/* -http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets - -Links can have internal or external targets. -In the source, targets look like: - - .. targetname1: - .. targetname2: - - some paragraph or list item or so - -or: - - .. targetname1: - .. targetname2: https://link - -There’s also anonymous links and targets without names. - -TODO: continue documenting how it’s done via https://repo.or.cz/docutils.git/blob/HEAD:/docutils/docutils/transforms/references.py -*/ - -use std::collections::HashMap; - -use crate::url::Url; -use crate::document_tree::{ - Document, - HasChildren, - attribute_types::NameToken, - elements::{self as e, Element}, - element_categories as c, - extra_attributes::ExtraAttributes, -}; - - -#[derive(Debug)] -enum NamedTargetType { - NumberedFootnote(usize), - LabeledFootnote(usize), - Citation, - InternalLink, - ExternalLink(Url), - IndirectLink(NameToken), - SectionTitle, -} -impl NamedTargetType { - fn is_implicit_target(&self) -> bool { - match self { - NamedTargetType::SectionTitle => true, - _ => false, - } - } -} - -#[derive(Clone, Debug)] -struct Substitution { - content: Vec, - /// If true and the sibling before the reference is a text node, - /// the text node gets right-trimmed. - ltrim: bool, - /// Same as `ltrim` with the sibling after the reference. - rtrim: bool, -} - -#[derive(Default, Debug)] -struct TargetsCollected { - named_targets: HashMap, - substitutions: HashMap, - normalized_substitutions: HashMap, -} -impl TargetsCollected { - fn target_url<'t>(self: &'t TargetsCollected, refname: &[NameToken]) -> Option<&'t Url> { - // TODO: Check if the target would expand circularly - if refname.len() != 1 { - panic!("Expected exactly one name in a reference."); - } - let name = refname[0].clone(); - match self.named_targets.get(&name)? { - NamedTargetType::ExternalLink(url) => Some(url), - _ => unimplemented!(), - } - } - - fn substitution<'t>(self: &'t TargetsCollected, refname: &[NameToken]) -> Option<&'t Substitution> { - // TODO: Check if the substitution would expand circularly - if refname.len() != 1 { - panic!("Expected exactly one name in a substitution reference."); - } - let name = refname[0].clone(); - self.substitutions.get(&name).or_else(|| { - self.normalized_substitutions.get(&name.0.to_lowercase()) - }) - } -} - -trait ResolvableRefs { - fn populate_targets(&self, refs: &mut TargetsCollected); - fn resolve_refs(self, refs: &TargetsCollected) -> Vec where Self: Sized; -} - -pub fn resolve_references(mut doc: Document) -> Document { - let mut references: TargetsCollected = Default::default(); - for c in doc.children() { - c.populate_targets(&mut references); - } - let new: Vec<_> = doc.children_mut().drain(..).flat_map(|c| c.resolve_refs(&references)).collect(); - Document::with_children(new) -} - -fn sub_pop(parent: &P, refs: &mut TargetsCollected) where P: HasChildren, C: ResolvableRefs { - for c in parent.children() { - c.populate_targets(refs); - } -} - -fn sub_res(mut parent: P, refs: &TargetsCollected) -> P where P: e::Element + HasChildren, C: ResolvableRefs { - let new: Vec<_> = parent.children_mut().drain(..).flat_map(|c| c.resolve_refs(refs)).collect(); - parent.children_mut().extend(new); - parent -} - -fn sub_sub_pop(parent: &P, refs: &mut TargetsCollected) where P: HasChildren, C1: HasChildren, C2: ResolvableRefs { - for c in parent.children() { - sub_pop(c, refs); - } -} - -fn sub_sub_res(mut parent: P, refs: &TargetsCollected) -> P where P: e::Element + HasChildren, C1: e::Element + HasChildren, C2: ResolvableRefs { - let new: Vec<_> = parent.children_mut().drain(..).map(|c| sub_res(c, refs)).collect(); - parent.children_mut().extend(new); - parent -} - -impl ResolvableRefs for c::StructuralSubElement { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::StructuralSubElement::*; - match self { - Title(e) => sub_pop(&**e, refs), - Subtitle(e) => sub_pop(&**e, refs), - Decoration(e) => sub_pop(&**e, refs), - Docinfo(e) => sub_pop(&**e, refs), - SubStructure(e) => e.populate_targets(refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::StructuralSubElement::*; - vec![match self { - Title(e) => sub_res(*e, refs).into(), - Subtitle(e) => sub_res(*e, refs).into(), - Decoration(e) => sub_res(*e, refs).into(), - Docinfo(e) => sub_res(*e, refs).into(), - SubStructure(e) => return e.resolve_refs(refs).drain(..).map(Into::into).collect(), - }] - } -} - -impl ResolvableRefs for c::SubStructure { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::SubStructure::*; - match self { - Topic(e) => sub_pop(&**e, refs), - Sidebar(e) => sub_pop(&**e, refs), - Transition(_) => {}, - Section(e) => sub_pop(&**e, refs), - BodyElement(e) => e.populate_targets(refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::SubStructure::*; - vec![match self { - Topic(e) => sub_res(*e, refs).into(), - Sidebar(e) => sub_res(*e, refs).into(), - Transition(e) => Transition(e), - Section(e) => sub_res(*e, refs).into(), - BodyElement(e) => return e.resolve_refs(refs).drain(..).map(Into::into).collect(), - }] - } -} - -impl ResolvableRefs for c::BodyElement { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::BodyElement::*; - match self { - Paragraph(e) => sub_pop(&**e, refs), - LiteralBlock(e) => sub_pop(&**e, refs), - DoctestBlock(e) => sub_pop(&**e, refs), - MathBlock(_) => {}, - Rubric(e) => sub_pop(&**e, refs), - SubstitutionDefinition(e) => { - let subst = Substitution { - content: e.children().clone(), - ltrim: e.extra().ltrim, - rtrim: e.extra().rtrim - }; - for name in e.names() { - if refs.substitutions.contains_key(name) { - // TODO: Duplicate substitution name (level 3 system message). - } - // Intentionally overriding any previous values. - refs.substitutions.insert(name.clone(), subst.clone()); - refs.normalized_substitutions.insert(name.0.to_lowercase(), subst.clone()); - } - }, - Comment(_) => {}, - Pending(_) => { - unimplemented!(); - }, - Target(e) => { - if let Some(uri) = &e.extra().refuri { - for name in e.names() { - refs.named_targets.insert(name.clone(), NamedTargetType::ExternalLink(uri.clone())); - } - } - // TODO: as is, people can only refer to the target directly containing the URL. - // add refid and refnames to some HashMap and follow those later. - }, - Raw(_) => {}, - Image(_) => {}, - Compound(e) => sub_pop(&**e, refs), - Container(e) => sub_pop(&**e, refs), - BulletList(e) => sub_sub_pop(&**e, refs), - EnumeratedList(e) => sub_sub_pop(&**e, refs), - DefinitionList(e) => sub_sub_pop(&**e, refs), - FieldList(e) => sub_sub_pop(&**e, refs), - OptionList(e) => sub_sub_pop(&**e, refs), - LineBlock(e) => sub_pop(&**e, refs), - BlockQuote(e) => sub_pop(&**e, refs), - Admonition(e) => sub_pop(&**e, refs), - Attention(e) => sub_pop(&**e, refs), - Hint(e) => sub_pop(&**e, refs), - Note(e) => sub_pop(&**e, refs), - Caution(e) => sub_pop(&**e, refs), - Danger(e) => sub_pop(&**e, refs), - Error(e) => sub_pop(&**e, refs), - Important(e) => sub_pop(&**e, refs), - Tip(e) => sub_pop(&**e, refs), - Warning(e) => sub_pop(&**e, refs), - Footnote(e) => sub_pop(&**e, refs), - Citation(e) => sub_pop(&**e, refs), - SystemMessage(e) => sub_pop(&**e, refs), - Figure(e) => sub_pop(&**e, refs), - Table(e) => sub_pop(&**e, refs) - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::BodyElement::*; - vec![match self { - Paragraph(e) => sub_res(*e, refs).into(), - LiteralBlock(e) => sub_res(*e, refs).into(), - DoctestBlock(e) => sub_res(*e, refs).into(), - MathBlock(e) => MathBlock(e), - Rubric(e) => sub_res(*e, refs).into(), - SubstitutionDefinition(_) => return vec![], - Comment(e) => Comment(e), - Pending(e) => Pending(e), - Target(e) => Target(e), - Raw(e) => Raw(e), - Image(e) => Image(e), - Compound(e) => sub_res(*e, refs).into(), - Container(e) => sub_res(*e, refs).into(), - BulletList(e) => sub_sub_res(*e, refs).into(), - EnumeratedList(e) => sub_sub_res(*e, refs).into(), - DefinitionList(e) => sub_sub_res(*e, refs).into(), - FieldList(e) => sub_sub_res(*e, refs).into(), - OptionList(e) => sub_sub_res(*e, refs).into(), - LineBlock(e) => sub_res(*e, refs).into(), - BlockQuote(e) => sub_res(*e, refs).into(), - Admonition(e) => sub_res(*e, refs).into(), - Attention(e) => sub_res(*e, refs).into(), - Hint(e) => sub_res(*e, refs).into(), - Note(e) => sub_res(*e, refs).into(), - Caution(e) => sub_res(*e, refs).into(), - Danger(e) => sub_res(*e, refs).into(), - Error(e) => sub_res(*e, refs).into(), - Important(e) => sub_res(*e, refs).into(), - Tip(e) => sub_res(*e, refs).into(), - Warning(e) => sub_res(*e, refs).into(), - Footnote(e) => sub_res(*e, refs).into(), - Citation(e) => sub_res(*e, refs).into(), - SystemMessage(e) => sub_res(*e, refs).into(), - Figure(e) => sub_res(*e, refs).into(), - Table(e) => sub_res(*e, refs).into() - }] - } -} - -impl ResolvableRefs for c::BibliographicElement { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::BibliographicElement::*; - match self { - Author(e) => sub_pop(&**e, refs), - Authors(e) => sub_pop(&**e, refs), - Organization(e) => sub_pop(&**e, refs), - Address(e) => sub_pop(&**e, refs), - Contact(e) => sub_pop(&**e, refs), - Version(e) => sub_pop(&**e, refs), - Revision(e) => sub_pop(&**e, refs), - Status(e) => sub_pop(&**e, refs), - Date(e) => sub_pop(&**e, refs), - Copyright(e) => sub_pop(&**e, refs), - Field(e) => sub_pop(&**e, refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::BibliographicElement::*; - vec![match self { - Author(e) => sub_res(*e, refs).into(), - Authors(e) => sub_res(*e, refs).into(), - Organization(e) => sub_res(*e, refs).into(), - Address(e) => sub_res(*e, refs).into(), - Contact(e) => sub_res(*e, refs).into(), - Version(e) => sub_res(*e, refs).into(), - Revision(e) => sub_res(*e, refs).into(), - Status(e) => sub_res(*e, refs).into(), - Date(e) => sub_res(*e, refs).into(), - Copyright(e) => sub_res(*e, refs).into(), - Field(e) => sub_res(*e, refs).into(), - }] - } -} - -impl ResolvableRefs for c::TextOrInlineElement { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::TextOrInlineElement::*; - match self { - String(_) => {}, - Emphasis(e) => sub_pop(&**e, refs), - Strong(e) => sub_pop(&**e, refs), - Literal(e) => sub_pop(&**e, refs), - Reference(e) => sub_pop(&**e, refs), - FootnoteReference(e) => sub_pop(&**e, refs), - CitationReference(e) => sub_pop(&**e, refs), - SubstitutionReference(e) => sub_pop(&**e, refs), - TitleReference(e) => sub_pop(&**e, refs), - Abbreviation(e) => sub_pop(&**e, refs), - Acronym(e) => sub_pop(&**e, refs), - Superscript(e) => sub_pop(&**e, refs), - Subscript(e) => sub_pop(&**e, refs), - Inline(e) => sub_pop(&**e, refs), - Problematic(e) => sub_pop(&**e, refs), - Generated(e) => sub_pop(&**e, refs), - Math(_) => {}, - TargetInline(_) => { - unimplemented!(); - }, - RawInline(_) => {}, - ImageInline(_) => {} - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::TextOrInlineElement::*; - vec![match self { - String(e) => String(e), - Emphasis(e) => sub_res(*e, refs).into(), - Strong(e) => sub_res(*e, refs).into(), - Literal(e) => sub_res(*e, refs).into(), - Reference(mut e) => { - if e.extra().refuri.is_none() { - if let Some(uri) = refs.target_url(&e.extra().refname) { - e.extra_mut().refuri = Some(uri.clone()); - } - } - (*e).into() - }, - FootnoteReference(e) => sub_res(*e, refs).into(), - CitationReference(e) => sub_res(*e, refs).into(), - SubstitutionReference(e) => match refs.substitution(&e.extra().refname) { - Some(Substitution {content, ltrim, rtrim}) => { - // (level 3 system message). - // TODO: ltrim and rtrim. - if *ltrim || *rtrim { - dbg!(content, ltrim, rtrim); - } - return content.clone() - }, - None => { - // Undefined substitution name (level 3 system message). - // TODO: This replaces the reference by a Problematic node. - // The corresponding SystemMessage node should go in a generated - // section with class "system-messages" at the end of the document. - use crate::document_tree::Problematic; - let mut replacement: Box = Box::new(Default::default()); - replacement.children_mut().push( - c::TextOrInlineElement::String(Box::new(format!("|{}|", e.extra().refname[0].0))) - ); - // TODO: Create an ID for replacement for the system_message to reference. - // TODO: replacement.refid pointing to the system_message. - Problematic(replacement) - } - }, - TitleReference(e) => sub_res(*e, refs).into(), - Abbreviation(e) => sub_res(*e, refs).into(), - Acronym(e) => sub_res(*e, refs).into(), - Superscript(e) => sub_res(*e, refs).into(), - Subscript(e) => sub_res(*e, refs).into(), - Inline(e) => sub_res(*e, refs).into(), - Problematic(e) => sub_res(*e, refs).into(), - Generated(e) => sub_res(*e, refs).into(), - Math(e) => Math(e), - TargetInline(e) => TargetInline(e), - RawInline(e) => RawInline(e), - ImageInline(e) => ImageInline(e) - }] - } -} - -impl ResolvableRefs for c::AuthorInfo { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::AuthorInfo::*; - match self { - Author(e) => sub_pop(&**e, refs), - Organization(e) => sub_pop(&**e, refs), - Address(e) => sub_pop(&**e, refs), - Contact(e) => sub_pop(&**e, refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::AuthorInfo::*; - vec![match self { - Author(e) => sub_res(*e, refs).into(), - Organization(e) => sub_res(*e, refs).into(), - Address(e) => sub_res(*e, refs).into(), - Contact(e) => sub_res(*e, refs).into(), - }] - } -} - -impl ResolvableRefs for c::DecorationElement { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::DecorationElement::*; - match self { - Header(e) => sub_pop(&**e, refs), - Footer(e) => sub_pop(&**e, refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::DecorationElement::*; - vec![match self { - Header(e) => sub_res(*e, refs).into(), - Footer(e) => sub_res(*e, refs).into(), - }] - } -} - -impl ResolvableRefs for c::SubTopic { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::SubTopic::*; - match self { - Title(e) => sub_pop(&**e, refs), - BodyElement(e) => e.populate_targets(refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::SubTopic::*; - match self { - Title(e) => vec![sub_res(*e, refs).into()], - BodyElement(e) => e.resolve_refs(refs).drain(..).map(Into::into).collect(), - } - } -} - -impl ResolvableRefs for c::SubSidebar { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::SubSidebar::*; - match self { - Topic(e) => sub_pop(&**e, refs), - Title(e) => sub_pop(&**e, refs), - Subtitle(e) => sub_pop(&**e, refs), - BodyElement(e) => e.populate_targets(refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::SubSidebar::*; - vec![match self { - Topic(e) => sub_res(*e, refs).into(), - Title(e) => sub_res(*e, refs).into(), - Subtitle(e) => sub_res(*e, refs).into(), - BodyElement(e) => return e.resolve_refs(refs).drain(..).map(Into::into).collect(), - }] - } -} - -impl ResolvableRefs for c::SubDLItem { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::SubDLItem::*; - match self { - Term(e) => sub_pop(&**e, refs), - Classifier(e) => sub_pop(&**e, refs), - Definition(e) => sub_pop(&**e, refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::SubDLItem::*; - vec![match self { - Term(e) => sub_res(*e, refs).into(), - Classifier(e) => sub_res(*e, refs).into(), - Definition(e) => sub_res(*e, refs).into(), - }] - } -} - -impl ResolvableRefs for c::SubField { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::SubField::*; - match self { - FieldName(e) => sub_pop(&**e, refs), - FieldBody(e) => sub_pop(&**e, refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::SubField::*; - vec![match self { - FieldName(e) => sub_res(*e, refs).into(), - FieldBody(e) => sub_res(*e, refs).into(), - }] - } -} - -impl ResolvableRefs for c::SubOptionListItem { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::SubOptionListItem::*; - match self { - OptionGroup(e) => sub_sub_pop(&**e, refs), - Description(e) => sub_pop(&**e, refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::SubOptionListItem::*; - vec![match self { - OptionGroup(e) => sub_sub_res(*e, refs).into(), - Description(e) => sub_res(*e, refs).into(), - }] - } -} - -impl ResolvableRefs for c::SubOption { - fn populate_targets(&self, _: &mut TargetsCollected) {} - fn resolve_refs(self, _: &TargetsCollected) -> Vec { vec![self] } -} - -impl ResolvableRefs for c::SubLineBlock { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::SubLineBlock::*; - match self { - LineBlock(e) => sub_pop(&**e, refs), - Line(e) => sub_pop(&**e, refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::SubLineBlock::*; - vec![match self { - LineBlock(e) => sub_res(*e, refs).into(), - Line(e) => sub_res(*e, refs).into(), - }] - } -} - -impl ResolvableRefs for c::SubBlockQuote { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::SubBlockQuote::*; - match self { - Attribution(e) => sub_pop(&**e, refs), - BodyElement(e) => e.populate_targets(refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::SubBlockQuote::*; - match self { - Attribution(e) => vec![sub_res(*e, refs).into()], - BodyElement(e) => e.resolve_refs(refs).drain(..).map(Into::into).collect(), - } - } -} - -impl ResolvableRefs for c::SubFootnote { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::SubFootnote::*; - match self { - Label(e) => sub_pop(&**e, refs), - BodyElement(e) => e.populate_targets(refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::SubFootnote::*; - match self { - Label(e) => vec![sub_res(*e, refs).into()], - BodyElement(e) => e.resolve_refs(refs).drain(..).map(Into::into).collect(), - } - } -} - -impl ResolvableRefs for c::SubFigure { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::SubFigure::*; - match self { - Caption(e) => sub_pop(&**e, refs), - Legend(e) => sub_pop(&**e, refs), - BodyElement(e) => e.populate_targets(refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::SubFigure::*; - vec![match self { - Caption(e) => sub_res(*e, refs).into(), - Legend(e) => sub_res(*e, refs).into(), - BodyElement(e) => return e.resolve_refs(refs).drain(..).map(Into::into).collect(), - }] - } -} - -impl ResolvableRefs for c::SubTable { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::SubTable::*; - match self { - Title(e) => sub_pop(&**e, refs), - TableGroup(e) => sub_pop(&**e, refs), - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::SubTable::*; - vec![match self { - Title(e) => sub_res(*e, refs).into(), - TableGroup(e) => sub_res(*e, refs).into(), - }] - } -} - -impl ResolvableRefs for c::SubTableGroup { - fn populate_targets(&self, refs: &mut TargetsCollected) { - use c::SubTableGroup::*; - match self { - TableColspec(_) => { - unimplemented!(); - }, - TableHead(e) => { - for c in e.children() { - sub_sub_pop(c, refs); - } - }, - TableBody(e) => { - for c in e.children() { - sub_sub_pop(c, refs); - } - }, - } - } - fn resolve_refs(self, refs: &TargetsCollected) -> Vec { - use c::SubTableGroup::*; - vec![match self { - TableColspec(e) => TableColspec(e), - TableHead(mut e) => { - let new: Vec<_> = e.children_mut().drain(..).map(|c| sub_sub_res(c, refs)).collect(); - e.children_mut().extend(new); - TableHead(e) - }, - TableBody(mut e) => { - let new: Vec<_> = e.children_mut().drain(..).map(|c| sub_sub_res(c, refs)).collect(); - e.children_mut().extend(new); - TableBody(e) - }, - }] - } -} diff --git a/src/parser/tests.rs b/src/parser/tests.rs deleted file mode 100644 index a034c0e..0000000 --- a/src/parser/tests.rs +++ /dev/null @@ -1,241 +0,0 @@ -use pest::consumes_to; -use pest::parses_to; -use super::pest_rst::{RstParser, Rule}; - -#[test] -fn plain() { - parses_to! { - parser: RstParser, - input: "line\n", - rule: Rule::paragraph, - tokens: [ - paragraph(0, 4, [ - str(0, 4) - ]) - ] - }; -} - -#[test] -fn emph_only() { - parses_to! { - parser: RstParser, - input: "*emphasis*", - rule: Rule::emph_outer, - tokens: [ - emph(1, 9, [str_nested(1, 9)]) - ] - }; -} - -#[test] -fn emph() { - parses_to! { - parser: RstParser, - input: "line *with markup*\n", - rule: Rule::paragraph, - tokens: [ - paragraph(0, 18, [ - str(0, 5), - emph(6, 17, [str_nested(6, 17)]), - ]) - ] - }; -} - -#[test] -fn title() { - parses_to! { - parser: RstParser, - input: "\ -Title -===== -", - rule: Rule::title, - tokens: [ - title(0, 12, [ title_single(0, 12, [ - line(0, 6, [ str(0, 5) ]), - adornments(6, 11), - ]) ]) - ] - }; -} - -#[test] -fn title_overline() { - parses_to! { - parser: RstParser, - input: "\ ------ -Title ------ -", - rule: Rule::title, - tokens: [ - title(0, 17, [ title_double(0, 17, [ - adornments(0, 5), - line(6, 12, [ str(6, 11) ]), - ]) ]) - ] - }; -} - -#[allow(clippy::cognitive_complexity)] -#[test] -fn two_targets() { - parses_to! { - parser: RstParser, - input: "\ -.. _a: http://example.com -.. _`b_`: https://example.org -", - rule: Rule::document, - tokens: [ - target(0, 26, [ - target_name_uq(4, 5), - link_target(7, 25), - ]), - target(26, 56, [ - target_name_qu(31, 33), - link_target(36, 55), - ]), - ] - }; -} - -#[allow(clippy::cognitive_complexity)] -#[test] -fn admonitions() { - parses_to! { - parser: RstParser, - input: "\ -.. note:: - Just next line -.. admonition:: In line title - - Next line - -.. danger:: Just this line -", - rule: Rule::document, - tokens: [ - admonition_gen(0, 27, [ - admonition_type(3, 7), - paragraph(13, 27, [ str(13, 27) ]), - ]), - admonition(28, 71, [ - line(43, 58, [ str(43, 57) ]), - paragraph(62, 71, [ str(62, 71) ]), - ]), - admonition_gen(73, 100, [ - admonition_type(76, 82), - line(84, 100, [ str(84, 99) ]), - ]), - ] - }; -} - - -#[allow(clippy::cognitive_complexity)] -#[test] -fn substitutions() { - parses_to! { - parser: RstParser, - input: "\ -A |subst| in-line - -.. |subst| replace:: substitution -.. |subst2| replace:: it can also - be hanging -", - rule: Rule::document, - tokens: [ - paragraph(0, 17, [ - str(0, 2), - substitution_name(3, 8), - str(9, 17), - ]), - substitution_def(19, 52, [ - substitution_name(23, 28), - replace(30, 52, [ paragraph(40, 52, [str(40, 52)]) ]), - ]), - substitution_def(53, 101, [ - substitution_name(57, 63), - replace(65, 101, [ paragraph(75, 101, [ - str(75, 86), ws_newline(86, 87), - str(88, 100), - ]) ]), - ]), - ] - }; -} - - -#[allow(clippy::cognitive_complexity)] -#[test] -fn substitution_image() { - parses_to! { - parser: RstParser, - input: "\ -.. |subst| image:: thing.png - :target: foo.html -", - rule: Rule::document, - tokens: [ - substitution_def(0, 50, [ - substitution_name(4, 9), - image(11, 50, [ - line(18, 29, [ str(18, 28) ]), - image_option(32, 50, [ - image_opt_name(33, 39), - line(40, 50, [ str(40, 49) ]), - ]), - ]), - ]), - ] - }; -} - -// TODO: test images - -#[allow(clippy::cognitive_complexity)] -#[test] -fn nested_lists() { - parses_to! { - parser: RstParser, - input: "\ -paragraph - -- item 1 -- item 2 - more text - more text 2 - more text 3 - - nested item 1 - - nested item 2 - - nested item 3 -", - rule: Rule::document, - tokens: [ - paragraph(0, 9, [ str(0, 9) ]), - bullet_list(11, 131, [ - bullet_item(11, 21, [ - line(14, 21, [ str(14, 20) ]), - ]), - bullet_item(21, 131, [ - line(24, 31, [ str(24, 30) ]), - paragraph(34, 74, [ - str(34, 43), ws_newline(43, 44), - str(47, 58), ws_newline(58, 59), - str(62, 73), - ]), - bullet_list(77, 131, [ - bullet_item( 77, 93, [ line( 79, 93, [str( 79, 92)]) ]), - bullet_item( 96, 112, [ line( 98, 112, [str( 98, 111)]) ]), - bullet_item(115, 131, [ line(117, 131, [str(117, 130)]) ]), - ]), - ]), - ]), - ] - } -} diff --git a/src/parser/token.rs b/src/parser/token.rs deleted file mode 100644 index b3b7bac..0000000 --- a/src/parser/token.rs +++ /dev/null @@ -1,16 +0,0 @@ -//http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#bullet-lists - -// *, +, -, •, ‣, ⁃ -pub enum BulletListType { Ast, Plus, Minus, Bullet, TriBullet, HyphenBullet } -// 1, A, a, I, i -pub enum EnumListChar { Arabic, AlphaUpper, AlphaLower, RomanUpper, RomanLower, Auto } -// 1., (1), 1) -pub enum EnumListType { Period, ParenEnclosed, Paren } -// ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ -pub enum AdornmentChar { - Bang, DQuote, Hash, Dollar, Percent, Amp, SQuote, LParen, RParen, Ast, Plus, Comma, - Minus, Period, Slash, Colon, Semicolon, Less, Eq, More, Question, At, LBrack, - Backslash, RBrack, Caret, Underscore, Backtick, LBrace, Pipe, RBrace, Tilde, -} -// [1], [#], [*], [#foo] -pub enum FootnoteType { Numbered(usize), AutoNumber, AutoSymbol, AutoNamed(String) } diff --git a/src/renderer.rs b/src/renderer.rs deleted file mode 100644 index 82a5826..0000000 --- a/src/renderer.rs +++ /dev/null @@ -1,24 +0,0 @@ -mod html; -#[cfg(test)] -pub mod html_tests; - - -use std::io::Write; - -use failure::Error; - -use crate::document_tree::Document; - - -pub fn render_json(document: &Document, stream: W) -> Result<(), Error> where W: Write { - serde_json::to_writer(stream, &document)?; - Ok(()) -} - -pub fn render_xml(document: &Document, stream: W) -> Result<(), Error> where W: Write { - serde_xml_rs::to_writer(stream, &document).map_err(failure::SyncFailure::new)?; - Ok(()) -} - -pub use html::render_html; - diff --git a/src/renderer/html.rs b/src/renderer/html.rs deleted file mode 100644 index 6041ec0..0000000 --- a/src/renderer/html.rs +++ /dev/null @@ -1,388 +0,0 @@ -use std::io::Write; - -use failure::Error; - -// use crate::url::Url; -use crate::document_tree::{ - Document,Element,HasChildren,ExtraAttributes, - elements as e, - element_categories as c, - extra_attributes as a, -}; - - -// static FOOTNOTE_SYMBOLS: [char; 10] = ['*', '†', '‡', '§', '¶', '#', '♠', '♥', '♦', '♣']; - -pub fn render_html(document: &Document, stream: W, standalone: bool) -> Result<(), Error> where W: Write { - let mut renderer = HTMLRenderer { stream, level: 0 }; - if standalone { - document.render_html(&mut renderer) - } else { - for c in document.children() { - (*c).render_html(&mut renderer)?; - writeln!(renderer.stream)?; - } - Ok(()) - } -} - -fn escape_html(text: &str) -> String { - text.replace('&', "&") - .replace('<', "<") - .replace('>', ">") - .replace('"', """) -} - -struct HTMLRenderer where W: Write { - stream: W, - level: u8, -} - -trait HTMLRender { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write; -} - -macro_rules! impl_html_render_cat {($cat:ident { $($member:ident),+ }) => { - impl HTMLRender for c::$cat { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - match self {$( - c::$cat::$member(elem) => (**elem).render_html(renderer), - )+} - } - } -}} - -macro_rules! impl_html_render_simple { - ( - $type1:ident => $tag1:ident $( [$($post1:tt)+] )?, - $( $type:ident => $tag:ident $( [$($post:tt)+] )? ),+ - ) => { - impl_html_render_simple!($type1 => $tag1 $([$($post1)+])?); - $( impl_html_render_simple!($type => $tag $([$($post)+])?); )+ - }; - ( $type:ident => $tag:ident ) => { - impl_html_render_simple!($type => $tag[""]); - }; - ( $type:ident => $tag:ident [ $post:expr ] ) => { - impl HTMLRender for e::$type { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - let multiple_children = self.children().len() > 1; - write!(renderer.stream, "<{}>", stringify!($tag))?; - if multiple_children { write!(renderer.stream, $post)?; } - for c in self.children() { - (*c).render_html(renderer)?; - if multiple_children { write!(renderer.stream, $post)?; } - } - write!(renderer.stream, "", stringify!($tag))?; - Ok(()) - } - } - }; -} - -macro_rules! impl_html_render_simple_nochildren {( $($type:ident => $tag:ident),+ ) => { $( - impl HTMLRender for e::$type { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - write!(renderer.stream, "<{0}>", stringify!($tag))?; - Ok(()) - } - } -)+ }} - -// Impl - -impl HTMLRender for Document { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - writeln!(renderer.stream, "")?; - for c in self.children() { - (*c).render_html(renderer)?; - writeln!(renderer.stream)?; - } - writeln!(renderer.stream, "")?; - Ok(()) - } -} - -impl_html_render_cat!(StructuralSubElement { Title, Subtitle, Decoration, Docinfo, SubStructure }); -impl_html_render_simple!(Subtitle => h2); - -impl HTMLRender for e::Title { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - let level = if renderer.level > 6 { 6 } else { renderer.level }; - write!(renderer.stream, "", level)?; - for c in self.children() { - (*c).render_html(renderer)?; - } - write!(renderer.stream, "", level)?; - Ok(()) - } -} - -impl HTMLRender for e::Docinfo { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - // Like “YAML frontmatter” in Markdown - unimplemented!(); - } -} - -impl HTMLRender for e::Decoration { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - // Header or footer - unimplemented!(); - } -} - -impl_html_render_cat!(SubStructure { Topic, Sidebar, Transition, Section, BodyElement }); -impl_html_render_simple!(Sidebar => aside); - -impl HTMLRender for e::Section { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - renderer.level += 1; - writeln!(renderer.stream, "
", self.ids()[0].0)?; - for c in self.children() { - (*c).render_html(renderer)?; - writeln!(renderer.stream)?; - } - write!(renderer.stream, "
")?; - Ok(()) - } -} - -impl HTMLRender for e::Transition { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - write!(renderer.stream, "
")?; - Ok(()) - } -} - -impl HTMLRender for e::Topic { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - // A mini section with title - unimplemented!(); - } -} - -impl_html_render_cat!(BodyElement { Paragraph, LiteralBlock, DoctestBlock, MathBlock, Rubric, SubstitutionDefinition, Comment, Pending, Target, Raw, Image, Compound, Container, BulletList, EnumeratedList, DefinitionList, FieldList, OptionList, LineBlock, BlockQuote, Admonition, Attention, Hint, Note, Caution, Danger, Error, Important, Tip, Warning, Footnote, Citation, SystemMessage, Figure, Table }); -impl_html_render_simple!(Paragraph => p, LiteralBlock => pre, MathBlock => math, Rubric => a, Compound => p, Container => div, BulletList => ul["\n"], EnumeratedList => ol["\n"], DefinitionList => dl["\n"], FieldList => dl["\n"], OptionList => pre, LineBlock => div["\n"], BlockQuote => blockquote, Admonition => aside, Attention => aside, Hint => aside, Note => aside, Caution => aside, Danger => aside, Error => aside, Important => aside, Tip => aside, Warning => aside, Figure => figure); -impl_html_render_simple_nochildren!(Table => table); //TODO: after implementing the table, move it to elems with children - -impl HTMLRender for I where I: e::Element + a::ExtraAttributes { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - let extra = self.extra(); - if let Some(ref target) = extra.target { - write!(renderer.stream, "
", escape_html(target.as_str()))?; - } - write!(renderer.stream, " - // TODO: height: Option - // TODO: width: Option - // TODO: scale: Option - write!(renderer.stream, " src=\"{}\" />", escape_html(extra.uri.as_str()))?; - if extra.target.is_some() { - write!(renderer.stream, "")?; - } - Ok(()) - } -} - -impl HTMLRender for e::DoctestBlock { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - // TODO - unimplemented!(); - } -} - -impl HTMLRender for e::SubstitutionDefinition { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - // TODO: Should those be removed after resolving them - Ok(()) - } -} - -impl HTMLRender for e::Comment { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - write!(renderer.stream, "")?; - Ok(()) - } -} - -impl HTMLRender for e::Pending { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - // Will those be resolved by the time we get here? - unimplemented!(); - } -} - -impl HTMLRender for e::Target { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - // Should be resolved by now - Ok(()) - } -} - -impl HTMLRender for e::Raw { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - for c in self.children() { - write!(renderer.stream, "{}", c)?; - } - Ok(()) - } -} - -impl HTMLRender for e::Footnote { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - unimplemented!(); - } -} - -impl HTMLRender for e::Citation { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - unimplemented!(); - } -} - -impl HTMLRender for e::SystemMessage { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - write!(renderer.stream, "
System Message")?; - for c in self.children() { - (*c).render_html(renderer)?; - } - write!(renderer.stream, "
")?; - Ok(()) - } -} - -impl_html_render_cat!(TextOrInlineElement { String, Emphasis, Strong, Literal, Reference, FootnoteReference, CitationReference, SubstitutionReference, TitleReference, Abbreviation, Acronym, Superscript, Subscript, Inline, Problematic, Generated, Math, TargetInline, RawInline, ImageInline }); -impl_html_render_simple!(Emphasis => em, Strong => strong, Literal => code, FootnoteReference => a, CitationReference => a, TitleReference => a, Abbreviation => abbr, Acronym => acronym, Superscript => sup, Subscript => sub, Inline => span, Math => math, TargetInline => a); - -impl HTMLRender for String { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - write!(renderer.stream, "{}", escape_html(self))?; - Ok(()) - } -} - -impl HTMLRender for e::Reference { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - let extra = self.extra(); - write!(renderer.stream, "")?; - for c in self.children() { - (*c).render_html(renderer)?; - } - write!(renderer.stream, "")?; - Ok(()) - } -} - -impl HTMLRender for e::SubstitutionReference { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - // Will those be resolved by the time we get here? - unimplemented!(); - } -} - -impl HTMLRender for e::Problematic { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - // Broken inline markup leads to insertion of this in docutils - unimplemented!(); - } -} - -impl HTMLRender for e::Generated { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - // Section numbers and so on - unimplemented!(); - } -} - -impl HTMLRender for e::RawInline { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - for c in self.children() { - write!(renderer.stream, "{}", c)?; - } - Ok(()) - } -} - - -//--------------\\ -//Content Models\\ -//--------------\\ - -impl_html_render_cat!(SubTopic { Title, BodyElement }); -impl_html_render_cat!(SubSidebar { Topic, Title, Subtitle, BodyElement }); -impl_html_render_simple!(ListItem => li); - -impl HTMLRender for e::DefinitionListItem { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - // Term→dt, Definition→dd, Classifier→??? - unimplemented!(); - } -} - -impl HTMLRender for e::Field { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - // FieldName→dt, FieldBody→dd - unimplemented!(); - } -} - -impl HTMLRender for e::OptionListItem { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - // OptionGroup→dt(s), Description→dd - unimplemented!(); - } -} - -impl_html_render_cat!(SubLineBlock { LineBlock, Line }); - -impl HTMLRender for e::Line { - fn render_html(&self, renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - for c in self.children() { - (*c).render_html(renderer)?; - } - write!(renderer.stream, "
")?; - Ok(()) - } -} - -impl_html_render_cat!(SubBlockQuote { Attribution, BodyElement }); -impl_html_render_simple!(Attribution => cite); //TODO: correct? - -impl_html_render_cat!(SubFigure { Caption, Legend, BodyElement }); -impl_html_render_simple!(Caption => caption); - -impl HTMLRender for e::Legend { - fn render_html(&self, _renderer: &mut HTMLRenderer) -> Result<(), Error> where W: Write { - unimplemented!(); - } -} - -//------------\\ -//Things to do\\ -//------------\\ - -//TODO: prettyprint option list -//TODO: render admonitions: Admonition, Attention, Hint, Note, Caution, Danger, Error, Important, Tip, Warning -//TODO: properly render tables - -//TODO: add reference target: FootnoteReference, CitationReference, TitleReference -//TODO: add title: Abbr, Acronym -//TODO: convert math, set display attr -//TODO: add id: Rubric, Target, TargetInline diff --git a/src/renderer/html_tests.rs b/src/renderer/html_tests.rs deleted file mode 100644 index 117b2d4..0000000 --- a/src/renderer/html_tests.rs +++ /dev/null @@ -1,274 +0,0 @@ -use pretty_assertions::assert_eq; - -use crate::parser::parse; -use super::html::render_html; - -fn check_renders_to(rst: &str, expected: &str) { - println!("Rendering:\n{}\n---", rst); - let doc = parse(rst).expect("Cannot parse"); - let mut result_data: Vec = vec![]; - render_html(&doc, &mut result_data, false).expect("Render error"); - let result = String::from_utf8(result_data).expect("Could not decode"); - assert_eq!(result.as_str().trim(), expected); -} - -#[test] -fn test_simple_string() { - check_renders_to( - "Simple String", - "

Simple String

", - ); -} - -#[test] -fn test_simple_string_with_markup() { - check_renders_to( - "Simple String with *emph* and **strong**", - "

Simple String with emph and strong

", - ); -} - -#[test] -fn test_check_inline_literal() { - check_renders_to( - "Simple String with an even simpler ``inline literal``", - "

Simple String with an even simpler inline literal

", - ); -} - -/* -#[test] -fn test_reference_anonymous() { - check_renders_to("\ -A simple `anonymous reference`__ - -__ http://www.test.com/test_url -", "\ -

A simple anonymous reference

\ -"); -} -*/ - -#[test] -fn test_two_paragraphs() { - check_renders_to( - "One paragraph.\n\nTwo paragraphs.", - "

One paragraph.

\n

Two paragraphs.

", - ); -} - -#[test] -fn test_named_reference() { - check_renders_to("\ -A simple `named reference`_ with stuff in between the -reference and the target. - -.. _`named reference`: http://www.test.com/test_url -", "\ -

A simple named reference with stuff in between the \ -reference and the target.

\ -"); -} - -#[test] -fn test_substitution() { - check_renders_to("\ -A |subst|. - -.. |subst| replace:: text substitution -", "

A text substitution.

"); -} - -/* -#[test] -fn test_section_hierarchy() { - check_renders_to("\ -+++++ -Title -+++++ - -Subtitle -======== - -Some stuff - -Section -------- - -Some more stuff - -Another Section -............... - -And even more stuff -", "\ -

Some stuff

-
-

Section

-

Some more stuff

-
-

Another Section

-

And even more stuff

-
-
\ -"); -} - -#[test] -fn test_docinfo_title() { - check_renders_to("\ -+++++ -Title -+++++ - -:author: me - -Some stuff -", "\ -
-

Title

-
-
Author
-

me

-
-

Some stuff

-
\ -"); -} -*/ - -#[test] -fn test_section_hierarchy() { - check_renders_to("\ -+++++ -Title -+++++ - -Not A Subtitle -============== - -Some stuff - -Section -------- - -Some more stuff - -Another Section -............... - -And even more stuff -", "\ -
-

Title

-
-

Not A Subtitle

-

Some stuff

-
-

Section

-

Some more stuff

-
-

Another Section

-

And even more stuff

-
-
-
-
\ -"); -} - -#[test] -fn test_bullet_list() { - check_renders_to("\ -* bullet -* list -", "\ -
    -
  • bullet

  • -
  • list

  • -
\ -"); -} - -/* -#[test] -fn test_table() { - check_renders_to("\ -.. table:: - :align: right - - +-----+-----+ - | 1 | 2 | - +-----+-----+ - | 3 | 4 | - +-----+-----+ -", "\ - ---- - - - - - - - - -

1

2

3

4

\ -"); -} -*/ - -/* -#[test] -fn test_field_list() { - check_renders_to("\ -Not a docinfo. - -:This: .. _target: - - is -:a: -:simple: -:field: list -", "\ -

Not a docinfo.

-
-
This
-

is

-
-
a
-

-
simple
-

-
field
-

list

-
-
\ -"); -} -*/ - -/* -#[test] -fn test_field_list_long() { - check_renders_to("\ -Not a docinfo. - -:This is: a -:simple field list with loooong field: names -", "\ -

Not a docinfo.

-
-
This is
-

a

-
-
simple field list with loooong field
-

names

-
-
\ -"); -} -*/ diff --git a/src/rst.pest b/src/rst.pest deleted file mode 100644 index f3a1516..0000000 --- a/src/rst.pest +++ /dev/null @@ -1,474 +0,0 @@ -// Entry point: the document. - -// This grammar is aligned to the doctree names when possible. -// It will however contain blocks, as we can’t parse sections: -// Section headers define the hierarchy by their delimiters, -// and pest only has one stack that we need for indentation. - -document = _{ SOI ~ blocks ~ EOI } -blocks = _{ block ~ (blank_line* ~ block)* ~ blank_line? } -block = _{ PEEK[..] ~ hanging_block } - -// This is the list of all block-level elements -// They’re defined hanging, i.e. without the first PEEK[..] -// This is d -hanging_block = _{ - substitution_def - | image_directive - | admonition - | admonition_gen - | target - | title - | bullet_list - | paragraph -// TODO: implement all those things: -// | block_quote -// | verbatim -// | image ✓ -// | code_block -// | doctest_block -// | admonition ✓ -// | target ✓ -// | horizontal_rule -// | title ✓ -// | table -// | ordered_list -// | bullet_list ✓ -// | paragraph ✓ -// | plain -} - -// Substitution definition. A block type -substitution_def = { ".." ~ PUSH(" "+) ~ "|" ~ substitution_name ~ "|" ~ " "+ ~ inline_dirblock ~ DROP } -substitution_name = { !" " ~ (!(" "|"|") ~ ANY)+ ~ (" "+ ~ (!(" "|"|") ~ ANY)+)* } -inline_dirblock = _{ replace | image } // TODO: implement others - -// Target. A block type -target = { target_qu | target_uq } -target_uq = _{ ".. _" ~ target_name_uq ~ ":" ~ (" " ~ link_target)? ~ " "* ~ NEWLINE } -target_qu = _{ ".. _`" ~ !"``" ~ target_name_qu ~ !"``:" ~ "`:" ~ (" " ~ link_target)? ~ " "* ~ NEWLINE } -target_name_uq = { ( !("_"|":"|"`") ~ !NEWLINE ~ ANY )* } -target_name_qu = { ( !(":"|"`"|"_>") ~ ANY )* } -link_target = { nonspacechar+ } - -// Title. A block type -title = { title_double | title_single } -title_double = { PUSH(adornments) ~ NEWLINE ~ PEEK[..-1] ~ " "* ~ line ~ PEEK[..-1] ~ POP } -title_single = { line ~ PEEK[..] ~ adornments ~ NEWLINE } - -// Bullet list. A block type. -bullet_list = { bullet_item ~ (PEEK[..] ~ bullet_item)* } -bullet_item = { bullet_marker ~ PUSH(" "+) ~ line ~ blank_line* ~ blist_body? ~ DROP } -blist_body = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ hanging_block ~ block* } - -// paragraph. A block type. -paragraph = { inlines } - - -/* Directives: http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#directives - * .. name:: arguments ~ :options: ~ blank_line+ ~ content - * Everything except for the first argument has to be indented - */ - - -// Directives with options can have these or specific ones: -common_opt_name = { "class" | "name" } - -// Replace. A directive only usable in substitutions. - -replace = { ^"replace::" ~ " "* ~ paragraph } - -// Image. A directive. - -image_directive = _{ ".." ~ PUSH(" "+) ~ image ~ DROP } -image = { ^"image::" ~ line ~ image_opt_block? } -image_opt_block = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ image_option } //TODO: merge with other directives? -image_option = { ":" ~ image_opt_name ~ ":" ~ line } -image_opt_name = { common_opt_name | "alt" | "height" | "width" | "scale" | "align" | "target" } - -// Admonition. A directive. The generic one has a title - -admonition = { ".." ~ PUSH(" "+) ~ ^"admonition::" ~ line ~ blank_line* ~ admonition_content? ~ DROP } -admonition_gen = { ".." ~ PUSH(" "+) ~ admonition_type ~ "::" ~ (blank_line | line) ~ blank_line* ~ admonition_content? ~ DROP } -admonition_type = { ^"attention" | ^"caution" | ^"danger" | ^"error" | ^"hint" | ^"important" | ^"note" | ^"tip" | ^"warning" } -admonition_content = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ hanging_block ~ block* } //TODO: merge with other directives? - - - -/* - * inlines - */ - - -line = { !marker ~ inline+ ~ NEWLINE } -blank_line = _{ !marker ~ !inline ~ " "* ~ NEWLINE } - -inlines = _{ !marker ~ inline+ ~ ( ( ws_newline ~ PEEK[..] ~ !marker ~ inline+ )+ ~ NEWLINE )? } -ws_newline = { NEWLINE } -inline = _{ inline_special | str } -inline_special = _{ - reference - | substitution_ref - | emph_outer - | strong_outer - | literal_outer -// | ul_or_star_line -// | space -// | note_reference -// | footnote -// //| citation -// | code -// | application_depent -// | entity -// | escaped_char -// | smart -// | symbol -} - -str = { (!(NEWLINE | inline_special) ~ ANY)+ } - -// simple formatting -inline_nested = _{ inline_special | str_nested } -str_nested = { word_nested ~ ( " "+ ~ word_nested)* } -// TODO: allow ` in emph and * in literal -word_nested = _{ (!(NEWLINE | " " | inline_special | "*" | "`") ~ ANY)+ } - -emph_outer = _{ "*" ~ emph ~ "*" } -emph = { (!("*"|" ") ~ inline_nested)+ ~ (" "+ ~ (!("*"|" ") ~ inline_nested)+)* } -strong_outer = _{ "**" ~ strong ~ "**" } -strong = { (!("*"|" ") ~ inline_nested)+ ~ (" "+ ~ (!("*"|" ") ~ inline_nested)+)* } -literal_outer = _{ "``" ~ literal ~ "``" } -literal = { (!("`"|" ") ~ inline_nested)+ ~ (" "+ ~ (!("`"|" ") ~ inline_nested)+)* } - -// inline links -reference = { reference_target | reference_explicit | reference_auto } - -reference_target = { reference_target_uq ~ "_" | reference_target_qu } -reference_target_uq = { (!("_"|":"|"`") ~ nonspacechar)+ } -reference_target_qu = { ( !("`"? ~ "`_") ~ "`" ~ !"``" ) ~ reference_text? ~ ("<" ~ reference_bracketed ~ ">")? ~ ( "`" ~ !"``" ) ~ "_" } -reference_text = { !"<" ~ ( !("`"|"<") ~ ANY )+ } -reference_bracketed = { url | (target_name_qu ~ "_") | relative_reference } -relative_reference = { (!("`"|">") ~ ANY)+ } - -reference_explicit = { reference_label ~ "(" ~ " "* ~ reference_source ~ " "* ~ (NEWLINE ~ PEEK[..])? ~ reference_title ~ " "* ~ ")" } -reference_label = { "[" ~ !"^" ~ (!"]" ~ inline)* ~ "]" } -reference_source = { reference_source_contents } -reference_source_contents = _{ ( (!("("|")"|">") ~ nonspacechar)+ | "(" ~ reference_source_contents ~ ")" )* } -reference_title = { ( reference_title_single | reference_title_double | "" ) } -reference_title_single = { "'" ~ ( !("'" ~ " "+ ~ (")" | NEWLINE)) ~ ANY )* ~ "'" } -reference_title_double = { "\"" ~ ( !("\"" ~ " "+ ~ (")" | NEWLINE)) ~ ANY )* ~ "\"" } - -// Emails can't end with punctuation, but URLs must use a separate rule. -reference_auto = { url_auto | email } -//reference_embedded = { "`" ~ reference_embedded_source ~ "<" ~ absolute_url_with_fragment ~ ">`_" ~ "_"? } -//reference_embedded_source = { ( !("<"|":"|"`") ~ ( " " | nonspacechar | blank_line ) )* } - -substitution_ref = _{ "|" ~ substitution_name ~ "|" } - -/* URLs as defined by the WHATWG URL standard. */ -url = { absolute_url_no_query ~ ("?" ~ url_unit*)? ~ ("#" ~ url_unit*)? } -absolute_url_no_query = { - ( special_url_scheme ~ ":" ~ scheme_relative_special_url ) | - ( ^"file:" ~ scheme_relative_file_url ) | - ( arbitrary_scheme ~ ":" ~ relative_url ) -} -scheme_relative_special_url = { "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url? } -path_absolute_url = { "/" ~ path_relative_url } -path_relative_url = { ( url_path_segment_unit* ~ "/" )* ~ url_path_segment_unit* } -url_path_segment_unit = { !("/"|"?") ~ url_unit } -url_port = { ASCII_DIGIT* } -scheme_relative_file_url = { "//" ~ ( host ~ !("/:/"|"/|/") )? ~ path_absolute_url } -relative_url = { ( "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url? ) | path_absolute_url | (!(arbitrary_scheme ~ ":") ~ path_relative_url) } -/* this is approximately a superset of valid hosts and opaque hosts */ -host = { ( !(":"|"/"|"?"|"#") ~ url_unit)+ | ("["~(ASCII_HEX_DIGIT|"."|":")+~"]") } -special_url_scheme = { ^"ftp" | (^"http" | ^"ws") ~ ^"s"? } /* doesn't include "file" */ -arbitrary_scheme = { ASCII_ALPHA ~ ASCII_ALPHANUMERIC* } -url_unit = { - ASCII_ALPHANUMERIC | - "!"|"$"|"&"|"'"|"("|")"|"*"|"+"|","|"-"|"."|"/"|":"|";"|"="|"?"|"@"|"_"|"~" | - (!(SURROGATE|NONCHARACTER_CODE_POINT) ~ '\u{A0}'..'\u{10FFFD}') | - ("%" ~ ASCII_HEX_DIGIT{2}) -} - -/* - * Rules for URLs that don't end in punctuation. - * This is a modification of the rules above to incorporate the docutils rules - * for the final character in an auto URL and for the character after it. - * The patterns used here to emulate the behavior of docutils' regex are taken - * from . - */ -url_auto = { - ( absolute_url_no_query ~ ("?" ~ url_unit*)? ~ "#" ~ url_units_auto ) | - ( absolute_url_no_query ~ "?" ~ url_units_auto ) | - ( special_url_scheme ~ "://" ~ host ~ (":" ~ url_port)? ~ path_absolute_url_auto ) | - ( special_url_scheme ~ "://" ~ host ~ ":" ~ url_port ~ &follows_auto_url ) | - ( special_url_scheme ~ "://" ~ ( domain_host_auto | "["~(ASCII_HEX_DIGIT|"."|":")+~"]" ~ &follows_auto_url ) ) | - ( ^"file://" ~ ( host ~ !("/:/"|"/|/") )? ~ path_absolute_url_auto ) | - ( arbitrary_scheme ~ ":" ~ relative_url_auto ) -} -domain_host_auto = { - ( !(":"|"/"|"?"|"#") ~ url_unit ~ url_units_auto ) | - ( !(":"|"/"|"?"|"#") ~ url_unit ~ &">" ) | - ( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"/"|"="|"+") ~ &follows_auto_url ) -} -path_absolute_url_auto = { "/" ~ path_relative_url_auto } -path_relative_url_auto = { prua1 | prua2 | &follows_auto_url } -prua1 = { ( url_path_segment_unit ~ prua1 ) | ( "/" ~ path_relative_url_auto ) } -prua2 = { ( url_path_segment_unit ~ prua2 ) | ( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"="|"+") ~ &follows_auto_url ) } -relative_url_auto = { - ( "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url_auto ) | - ( "//" ~ host ~ ":" ~ url_port ~ &follows_auto_url ) | - ( "//" ~ ( domain_host_auto | "["~(ASCII_HEX_DIGIT|"."|":")+~"]" ~ &follows_auto_url ) ) | - path_absolute_url_auto | - // (prua1|prua2) is path_relative_url_auto minus the &follows_auto_url case - (!(arbitrary_scheme ~ ":") ~ (prua1 | prua2)) -} -url_units_auto = { - ( url_unit ~ url_units_auto ) | - ( url_unit ~ &">" ~ &follows_auto_url ) | - ( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"/"|"="|"+") ~ &follows_auto_url ) -} -follows_auto_url = { - EOI|"\x00"|WHITE_SPACE|">"|"\u{201A}"|"\u{201E}"| - (!(CONNECTOR_PUNCTUATION|OPEN_PUNCTUATION|"#"|"%"|"&"|"*"|"@") ~ PUNCTUATION) -} - -/* Rules for emails as defined by the HTML standard */ -email = { ( email_atext | "." )+ ~ "@" ~ email_label ~ ( "." ~ email_label )* } -email_atext = { ASCII_ALPHANUMERIC|"!"|"#"|"$"|"%"|"&"|"'"|"/"|"="|"?"|"^"|"_"|"`"|"{"|"|"|"}"|"~" } -email_label = { ASCII_ALPHANUMERIC ~ ( !("-"+ ~ !ASCII_ALPHANUMERIC) ~ (ASCII_ALPHANUMERIC|"-") ){0,62} } - -/* - * character classes - */ - - -bullet_marker = _{ "+" | "*" | "-" } -adornments = { - // recommended - "="+ | "-"+ | "`"+ | ":"+ | "."+ | "'"+ | "\""+ | "~"+ | "^"+ | "_"+ | "*"+ | "+"+ | "#"+ | - // parentheses - "("+ | ")"+ | "["+ | "]"+ | "{"+ | "}"+ | - // punctuation - ","+ | ";"+ | "!"+ | "?"+ | - // operators - "&"+ | "|"+ | "/"+ | "%"+ | "<"+ | ">"+ | - // misc - "$"+ | "@"+ | "\\"+ -} -nonspacechar = _{ !(" " | NEWLINE) ~ ANY } - - -/* - * lookaheads. do not use in another position - */ - - -marker = _{ (bullet_marker | "..") ~ " " } - - - -//################################################################################# - - - -// code_block = { -// ".. code" ~ "-block"? ~ ":: " ~ source ~ blank_line ~ -// NEWLINE ~ verbatim_chunk+ -// } - -// doctest_block = { (doctest_line+ ~ (!(">" | blank_line) ~ line)*)+ } - -// block_quote_raw = { ":" ~ blank_line ~ NEWLINE ~ nonblank_indented_line+ } - -// block_quote_chunk = { -// !"::" ~ ":" ~ blank_line ~ -// NEWLINE ~ -// blank_line* ~ -// nonblank_indented_line+ -// } - -// block_quote = { block_quote_chunk+ } - -// nonblank_indented_line = { !blank_line ~ indented_line } - -// verbatim_chunk = { blank_line* ~ nonblank_indented_line+ } - -// verbatim = { verbatim_chunk+ } - -// horizontal_rule = { -// ( "=" ~ sp ~ "=" ~ sp ~ "=" ~ (sp ~ "=")* -// | "-" ~ sp ~ "-" ~ sp ~ "-" ~ (sp ~ "-")* -// | "*" ~ sp ~ "*" ~ sp ~ "*" ~ (sp ~ "*")* -// | "^" ~ sp ~ "^" ~ sp ~ "^" ~ (sp ~ "^")* -// | "~" ~ sp ~ "~" ~ sp ~ "~" ~ (sp ~ "~")* -// | "_" ~ sp ~ "_" ~ sp ~ "_" ~ (sp ~ "_")* -// ) ~ -// sp ~ NEWLINE ~ blank_line+ -// } - -// table = { grid_table | header_less_grid_table | simple_table } - -// simple_table = { "NotImplemented" ~ "simple_table" } - -// grid_table = { grid_table_header ~ grid_table_header_sep ~ grid_table_body+ } -// header_less_grid_table = { grid_table_sep ~ grid_table_body+ } -// grid_table_header = { sp ~ "+" ~ ( "-"+ ~ "+" )+ ~ blank_line ~ grid_table_row+ } -// grid_table_body = { ( grid_table_row ~ grid_table_sep )+ } -// grid_table_row = { sp ~ "|" ~ sp ~ ( table_cell ~ sp ~ "|" )+ ~ blank_line } -// table_cell = { ( ":" | ">" | "<" | "/" | "-" | spacechar | escaped_char | alphanumeric )+ } -// grid_table_header_sep = { sp ~ "+" ~ ( "="+ ~ "+" )+ ~ blank_line } -// grid_table_sep = { sp ~ "+" ~ ( "-"+ ~ "+" )+ ~ blank_line } - -// bullet = { !horizontal_rule ~ ("+" | "*" | "-") ~ spacechar+ } - -// bullet_list = { &bullet ~ (list_tight | list_loose) } - -// list_tight = { list_item_tight+ ~ blank_line* ~ !(bullet | enumerator | def_marker) } -// list_loose = { ( list_item ~ blank_line* )+ } - -// list_item = { (bullet | enumerator | def_marker) ~ list_block ~ list_continuation_block* } -// list_item_tight = { -// (bullet | enumerator | def_marker) ~ -// list_block ~ -// (!blank_line ~ list_continuation_block)* ~ -// !list_continuation_block -// } - -// list_block = { !blank_line ~ line ~ list_block_line* } - -// list_continuation_block = { blank_line* ~ ( indent ~ list_block )+ } - -// enumerator = { (ASCII_DIGIT+ | "#"+) ~ "." ~ spacechar+ } - -// ordered_list = { &enumerator ~ (list_tight | list_loose) } - -// list_block_line = { -// !blank_line ~ -// !( (indent? ~ (bullet | enumerator)) | def_marker ) ~ -// !horizontal_rule ~ -// optionally_indented_line -// } - - - -// space = _{ spacechar+ } - -// str = { normal_char+ ~ str_chunk* } -// str_chunk = _{ (normal_char | "_"+ ~ &alphanumeric)+ } - -// escaped_char = { "\\" ~ !NEWLINE ~ ("-" | "\\" | "`" | "|" | "*" | "_" | "{" | "}" | "[" | "]" | "(" | ")" | "#" | "+" | "." | "!" | ">" | "<") } - -// entity = { hex_entity | dec_entity | char_entity } - -// endline = _{ line_break | terminal_endline | normal_endline } -// normal_endline = _{ sp ~ NEWLINE ~ !(blank_line | ">" | line ~ ("="+ | "-"+) ~ NEWLINE) } -// terminal_endline = _{ sp ~ NEWLINE ~ EOI } -// line_break = _{ " " ~ normal_endline } - -// symbol = { special_char } - -// application_depent = { !("`_" | "``_") ~ "`" ~ !"``" ~ target_name_qu ~ "`" ~ !("``" | "_") } - -// // This keeps the parser from getting bogged down on long strings of "*" or "_", -// // or strings of "*" or "_" with space on each side: -// ul_or_star_line = { ul_line | star_line } -// star_line = { "****" ~ "*"* | spacechar ~ "*"+ ~ &spacechar } -// ul_line = { "____" ~ "_"* | spacechar ~ "_"+ ~ &spacechar } - - -// empty_title = { "" } - -// ticks_2 = { "``" ~ !"`" } - -// code = { ticks_2 ~ ( (!"`" ~ nonspacechar)+ | "_" | !ticks_2 ~ "`" | !(sp ~ ticks_2) ~ (spacechar | NEWLINE ~ !blank_line) )+ ~ ticks_2 } - - -// quoted = { -// "\"" ~ (!"\"" ~ ANY)* ~ "\"" | -// "'" ~ (!"'" ~ ANY)* ~ "'" -// } -// spacechar = _{ " " | "\t" } -// sp = _{ spacechar* } -// spnl = _{ sp ~ (NEWLINE ~ sp)? } -// special_char = _{ "~" | "*" | "_" | "`" | "&" | "[" | "]" | "(" | ")" | "<" | "!" | "#" | "\\" | "\"" | "'" | extended_special_char } -// normal_char = _{ !( special_char | spacechar | NEWLINE ) ~ ANY } -// alphanumeric = { -// ASCII_ALPHANUMERIC | -// "\u{200}" | "\u{201}" | "\u{202}" | "\u{203}" | "\u{204}" | "\u{205}" | "\u{206}" | "\u{207}" | -// "\u{210}" | "\u{211}" | "\u{212}" | "\u{213}" | "\u{214}" | "\u{215}" | "\u{216}" | "\u{217}" | -// "\u{220}" | "\u{221}" | "\u{222}" | "\u{223}" | "\u{224}" | "\u{225}" | "\u{226}" | "\u{227}" | -// "\u{230}" | "\u{231}" | "\u{232}" | "\u{233}" | "\u{234}" | "\u{235}" | "\u{236}" | "\u{237}" | -// "\u{240}" | "\u{241}" | "\u{242}" | "\u{243}" | "\u{244}" | "\u{245}" | "\u{246}" | "\u{247}" | -// "\u{250}" | "\u{251}" | "\u{252}" | "\u{253}" | "\u{254}" | "\u{255}" | "\u{256}" | "\u{257}" | -// "\u{260}" | "\u{261}" | "\u{262}" | "\u{263}" | "\u{264}" | "\u{265}" | "\u{266}" | "\u{267}" | -// "\u{270}" | "\u{271}" | "\u{272}" | "\u{273}" | "\u{274}" | "\u{275}" | "\u{276}" | "\u{277}" | -// "\u{300}" | "\u{301}" | "\u{302}" | "\u{303}" | "\u{304}" | "\u{305}" | "\u{306}" | "\u{307}" | -// "\u{310}" | "\u{311}" | "\u{312}" | "\u{313}" | "\u{314}" | "\u{315}" | "\u{316}" | "\u{317}" | -// "\u{320}" | "\u{321}" | "\u{322}" | "\u{323}" | "\u{324}" | "\u{325}" | "\u{326}" | "\u{327}" | -// "\u{330}" | "\u{331}" | "\u{332}" | "\u{333}" | "\u{334}" | "\u{335}" | "\u{336}" | "\u{337}" | -// "\u{340}" | "\u{341}" | "\u{342}" | "\u{343}" | "\u{344}" | "\u{345}" | "\u{346}" | "\u{347}" | -// "\u{350}" | "\u{351}" | "\u{352}" | "\u{353}" | "\u{354}" | "\u{355}" | "\u{356}" | "\u{357}" | -// "\u{360}" | "\u{361}" | "\u{362}" | "\u{363}" | "\u{364}" | "\u{365}" | "\u{366}" | "\u{367}" | -// "\u{370}" | "\u{371}" | "\u{372}" | "\u{373}" | "\u{374}" | "\u{375}" | "\u{376}" | "\u{377}" -// } - -// hex_entity = { "&#" ~ ("X"|"x") ~ ('0'..'9' | 'a'..'f' | 'A'..'F')+ ~ ";" } -// dec_entity = { "&#" ~ ASCII_DIGIT+ ~ ";" } -// char_entity = { "&" ~ ASCII_ALPHANUMERIC+ ~ ";" } - -// indent = _{ "\t" | " " } -// indented_line = { indent ~ line } -// optionally_indented_line = { indent? ~ line } - -// doctest_line = { ">>> " ~ raw_line } - -// line = _{ raw_line } - -// raw_line = _{ (!NEWLINE ~ ANY)* ~ NEWLINE | (!EOI ~ ANY)+ ~ EOI } - -// // Syntax extensions - -// extended_special_char = { -// //&{ extension(EXT_SMART) } ~ -// ("." | "-" | "\"" | "'") | -// //&{ extension(EXT_NOTES) } ~ -// "^" -// } - -// smart = { -// //&{ extension(EXT_SMART) } ~ -// ( ellipsis | dash | single_quoted | double_quoted | apostrophe ) -// } - -// apostrophe = { "'" } - -// ellipsis = { "..." | ". . ." } - -// dash = { em_dash | en_dash } -// en_dash = { "-" ~ &ASCII_DIGIT } -// em_dash = { "---" | "--" } - -// single_quote_start = { "'" ~ !(spacechar | NEWLINE) } -// single_quote_end = { "'" ~ !alphanumeric } -// single_quoted = { single_quote_start ~ ( !single_quote_end ~ inline )+ ~ single_quote_end } - -// double_quote_start = { "\"" } -// double_quote_end = { "\"" } -// double_quoted = { double_quote_start ~ ( !double_quote_end ~ inline )+ ~ double_quote_end } - -// footnote = { "[#" ~ (!"]" ~ inline)+ ~ "]_" } - -// definition = { -// &( (!defmark ~ nonspacechar ~ raw_line) ~ blank_line? ~ defmark) ~ -// d_list_title+ ~ -// (def_tight | def_loose) -// } -// d_list_title = { !defmark ~ &nonspacechar ~ (!endline ~ inline)+ ~ sp ~ NEWLINE } -// def_tight = { &defmark ~ list_tight } -// def_loose = { blank_line ~ &defmark ~ list_loose } -// defmark = { (":" | "~") ~ spacechar+ } -// def_marker = { -// //&{ extension(EXT_DLISTS) } ~ -// defmark -// } diff --git a/src/url.rs b/src/url.rs deleted file mode 100644 index 31a0536..0000000 --- a/src/url.rs +++ /dev/null @@ -1,78 +0,0 @@ -use std::fmt; -use std::str::FromStr; - -use url::{self,ParseError}; -use serde_derive::Serialize; - - -fn starts_with_scheme(input: &str) -> bool { - let scheme = input.split(':').next().unwrap(); - if scheme == input || scheme.is_empty() { - return false; - } - let mut chars = input.chars(); - // First character. - if !chars.next().unwrap().is_ascii_alphabetic() { - return false; - } - for ch in chars { - if !ch.is_ascii_alphanumeric() && ch != '+' && ch != '-' && ch != '.' { - return false; - } - } - true -} - -/// The string representation of a URL, either absolute or relative, that has -/// been verified as a valid URL on construction. -#[derive(Debug,PartialEq,Serialize,Clone)] -#[serde(transparent)] -pub struct Url(String); - -impl Url { - pub fn parse_absolute(input: &str) -> Result { - Ok(url::Url::parse(input)?.into()) - } - pub fn parse_relative(input: &str) -> Result { - // We're assuming that any scheme through which RsT documents are being - // accessed is a hierarchical scheme, and so we can parse relative to a - // random hierarchical URL. - if input.starts_with('/') || !starts_with_scheme(input) { - // Continue only if the parse succeeded, disregarding its result. - let random_base_url = url::Url::parse("https://a/b").unwrap(); - url::Url::options() - .base_url(Some(&random_base_url)) - .parse(input)?; - Ok(Url(input.into())) - } else { - // If this is a URL at all, it's an absolute one. - // There's no appropriate variant of url::ParseError really. - Err(ParseError::SetHostOnCannotBeABaseUrl) - } - } - pub fn as_str(&self) -> &str { - self.0.as_str() - } -} - -impl From for Url { - fn from(url: url::Url) -> Self { - Url(url.into_string()) - } -} - - -impl fmt::Display for Url { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.as_str()) - } -} - - -impl FromStr for Url { - type Err = ParseError; - fn from_str(input: &str) -> Result { - Url::parse_absolute(input) - .or_else(|_| Url::parse_relative(input)) - } -} -- cgit v1.2.3