diff options
| author | Andreu Botella Botella | 2019-11-06 03:09:22 +0100 |
|---|---|---|
| committer | Philipp A | 2019-11-07 09:27:38 +0100 |
| commit | df6b7645d845a022ca7eeba08b1ecb761a020195 (patch) | |
| tree | b6809aefdafcf9aec6e833e53a8d2304fea17362 | |
| parent | 5387291c1a2d4cfd0e5acdad26dcc7e33329d39a (diff) | |
| download | rust-rst-df6b7645d845a022ca7eeba08b1ecb761a020195.tar.bz2 | |
Handling relative URLs in the conversion code.
Closes #10.
| -rw-r--r-- | src/bin.rs | 2 | ||||
| -rw-r--r-- | src/document_tree/attribute_types.rs | 4 | ||||
| -rw-r--r-- | src/document_tree/elements.rs | 20 | ||||
| -rw-r--r-- | src/document_tree/extra_attributes.rs | 14 | ||||
| -rw-r--r-- | src/lib.rs | 2 | ||||
| -rw-r--r-- | src/parser/conversion/inline.rs | 32 | ||||
| -rw-r--r-- | src/parser/simplify.rs | 4 | ||||
| -rw-r--r-- | src/target.rs | 55 | ||||
| -rw-r--r-- | src/url.rs | 78 |
9 files changed, 122 insertions, 89 deletions
@@ -3,7 +3,7 @@ pub mod document_tree; pub mod parser; pub mod renderer; -pub mod target; +pub mod url; use structopt::StructOpt; diff --git a/src/document_tree/attribute_types.rs b/src/document_tree/attribute_types.rs index b6819c7..30f3767 100644 --- a/src/document_tree/attribute_types.rs +++ b/src/document_tree/attribute_types.rs @@ -4,7 +4,7 @@ use failure::{Error,bail,format_err}; use serde_derive::Serialize; use regex::Regex; -use crate::target; +use crate::url::Url; #[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)] pub enum EnumeratedListType { @@ -134,7 +134,7 @@ macro_rules! impl_cannot_be_empty { impl_cannot_be_empty!($($ts),*); }; } -impl_cannot_be_empty!(target::Target); +impl_cannot_be_empty!(Url); impl_cannot_be_empty!(TableGroupCols); impl<T> CanBeEmpty for Option<T> { diff --git a/src/document_tree/elements.rs b/src/document_tree/elements.rs index cefe044..f717f6a 100644 --- a/src/document_tree/elements.rs +++ b/src/document_tree/elements.rs @@ -1,6 +1,6 @@ +use std::path::PathBuf; use serde_derive::Serialize; -use crate::target; use super::attribute_types::{CanBeEmpty,ID,NameToken}; use super::extra_attributes::{self,ExtraAttributes}; use super::element_categories::*; @@ -20,8 +20,8 @@ pub trait Element { /// An element may have at most one of the names or dupnames attributes, but not both. fn names (& self) -> & Vec<NameToken>; fn names_mut(&mut self) -> &mut Vec<NameToken>; - fn source (& self) -> & Option<target::Target>; - fn source_mut(&mut self) -> &mut Option<target::Target>; + fn source (& self) -> & Option<PathBuf>; + fn source_mut(&mut self) -> &mut Option<PathBuf>; fn classes (& self) -> & Vec<String>; fn classes_mut(&mut self) -> &mut Vec<String>; } @@ -33,7 +33,7 @@ pub struct CommonAttributes { #[serde(skip_serializing_if = "CanBeEmpty::is_empty")] names: Vec<NameToken>, #[serde(skip_serializing_if = "CanBeEmpty::is_empty")] - source: Option<target::Target>, + source: Option<PathBuf>, #[serde(skip_serializing_if = "CanBeEmpty::is_empty")] classes: Vec<String>, //TODO: dupnames @@ -45,12 +45,12 @@ pub struct CommonAttributes { macro_rules! impl_element { ($name:ident) => ( impl Element for $name { - fn ids (& self) -> & Vec<ID> { & self.common.ids } - fn ids_mut(&mut self) -> &mut Vec<ID> { &mut self.common.ids } - fn names (& self) -> & Vec<NameToken> { & self.common.names } - fn names_mut(&mut self) -> &mut Vec<NameToken> { &mut self.common.names } - fn source (& self) -> & Option<target::Target> { & self.common.source } - fn source_mut(&mut self) -> &mut Option<target::Target> { &mut self.common.source } + fn ids (& self) -> & Vec<ID> { & self.common.ids } + fn ids_mut(&mut self) -> &mut Vec<ID> { &mut self.common.ids } + fn names (& self) -> & Vec<NameToken> { & self.common.names } + fn names_mut(&mut self) -> &mut Vec<NameToken> { &mut self.common.names } + fn source (& self) -> & Option<PathBuf> { & self.common.source } + fn source_mut(&mut self) -> &mut Option<PathBuf> { &mut self.common.source } fn classes (& self) -> & Vec<String> { & self.common.classes } fn classes_mut(&mut self) -> &mut Vec<String> { &mut self.common.classes } } diff --git a/src/document_tree/extra_attributes.rs b/src/document_tree/extra_attributes.rs index e72b288..55896ab 100644 --- a/src/document_tree/extra_attributes.rs +++ b/src/document_tree/extra_attributes.rs @@ -1,6 +1,6 @@ use serde_derive::Serialize; -use crate::target; +use crate::url::Url; use super::attribute_types::{CanBeEmpty,FixedSpace,ID,NameToken,AlignHV,AlignH,AlignV,TableAlignH,TableBorder,TableGroupCols,Measure,EnumeratedListType}; pub trait ExtraAttributes<A> { @@ -33,7 +33,7 @@ impl_extra!(SubstitutionDefinition { ltrim: bool, rtrim: bool }); impl_extra!(Comment { space: FixedSpace }); impl_extra!(Target { /// External reference to a URI/URL - refuri: Option<target::Target>, + refuri: Option<Url>, /// References to ids attributes in other elements refid: Option<ID>, /// Internal reference to the names attribute of another element. May resolve to either an internal or external reference. @@ -42,13 +42,13 @@ impl_extra!(Target { }); impl_extra!(Raw { space: FixedSpace, format: Vec<NameToken> }); impl_extra!(#[derive(Debug,PartialEq,Serialize,Clone)] Image { - uri: target::Target, + uri: Url, align: Option<AlignHV>, alt: Option<String>, height: Option<Measure>, width: Option<Measure>, scale: Option<u8>, - target: Option<target::Target>, // Not part of the DTD but a valid argument + target: Option<Url>, // Not part of the DTD but a valid argument }); //bools usually are XML yesorno. “auto” however either exists and is set to something random like “1” or doesn’t exist @@ -75,7 +75,7 @@ impl_extra!(OptionArgument { delimiter: Option<String> }); impl_extra!(Reference { name: Option<NameToken>, //TODO: is CDATA in the DTD, so maybe no nametoken? /// External reference to a URI/URL - refuri: Option<target::Target>, + refuri: Option<Url>, /// References to ids attributes in other elements refid: Option<ID>, /// Internal reference to the names attribute of another element @@ -89,7 +89,7 @@ impl_extra!(Problematic { refid: Option<ID> }); //also have non-inline versions. Inline image is no figure child, inline target has content impl_extra!(TargetInline { /// External reference to a URI/URL - refuri: Option<target::Target>, + refuri: Option<Url>, /// References to ids attributes in other elements refid: Option<ID>, /// Internal reference to the names attribute of another element. May resolve to either an internal or external reference. @@ -100,7 +100,7 @@ impl_extra!(RawInline { space: FixedSpace, format: Vec<NameToken> }); pub type ImageInline = Image; impl Image { - pub fn new(uri: target::Target) -> Image { Image { + pub fn new(uri: Url) -> Image { Image { uri, align: None, alt: None, @@ -3,4 +3,4 @@ pub mod document_tree; pub mod parser; pub mod renderer; -pub mod target; +pub mod url; diff --git a/src/parser/conversion/inline.rs b/src/parser/conversion/inline.rs index c51b2d9..c942732 100644 --- a/src/parser/conversion/inline.rs +++ b/src/parser/conversion/inline.rs @@ -1,6 +1,5 @@ use failure::Error; use pest::iterators::Pair; -use url::Url; use crate::document_tree::{ ExtraAttributes, @@ -15,6 +14,7 @@ use crate::parser::{ // pair_ext_parse::PairExt, }; +use crate::url::Url; use super::whitespace_normalize_name; @@ -73,16 +73,26 @@ fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> refuri = if let Some(reference) = reference { let inner = reference.into_inner().next().unwrap(); match inner.as_rule() { - Rule::url => if let Ok(url) = Url::parse(inner.as_str()) { - Some(url.into()) + // The URL rules in our parser accept a narrow superset of + // valid URLs, so we need to handle false positives. + Rule::url => if let Ok(target) = Url::parse_absolute(inner.as_str()) { + Some(target) + } else if inner.as_str().ends_with('_') { + // like target_name_qu (minus the final underscore) + let full_str = inner.as_str(); + refname.push(full_str[0..full_str.len() - 1].into()); + None } else { - unimplemented!("reference to a relative URL") + // like relative_reference + Some(Url::parse_relative(inner.as_str())?) }, Rule::target_name_qu => { refname.push(inner.as_str().into()); None }, - Rule::relative_reference => unimplemented!("reference to a relative URL"), + Rule::relative_reference => { + Some(Url::parse_relative(inner.as_str())?) + }, _ => unreachable!() } } else { @@ -98,9 +108,9 @@ fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> Rule::reference_auto => { let rt_inner = concrete.into_inner().next().unwrap(); match rt_inner.as_rule() { - Rule::url_auto => match Url::parse(rt_inner.as_str()) { - Ok(url) => { - refuri = Some(url.into()); + Rule::url_auto => match Url::parse_absolute(rt_inner.as_str()) { + Ok(target) => { + refuri = Some(target); name = None; refid = None; children.push(rt_inner.as_str().into()); @@ -110,9 +120,9 @@ fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> }, Rule::email => { let mailto_url = String::from("mailto:") + rt_inner.as_str(); - match Url::parse(&mailto_url) { - Ok(url) => { - refuri = Some(url.into()); + match Url::parse_absolute(&mailto_url) { + Ok(target) => { + refuri = Some(target); name = None; refid = None; children.push(rt_inner.as_str().into()); diff --git a/src/parser/simplify.rs b/src/parser/simplify.rs index bb47760..f6f0e8a 100644 --- a/src/parser/simplify.rs +++ b/src/parser/simplify.rs @@ -21,7 +21,7 @@ TODO: continue documenting how it’s done via http://svn.code.sf.net/p/docutils use std::collections::HashMap; -use crate::target::Target; +use crate::url::Url; use crate::document_tree::{ Document, HasChildren, @@ -37,7 +37,7 @@ enum NamedTargetType { LabeledFootnote(usize), Citation, InternalLink, - ExternalLink(Target), + ExternalLink(Url), IndirectLink(NameToken), SectionTitle } diff --git a/src/target.rs b/src/target.rs deleted file mode 100644 index 3ce7d75..0000000 --- a/src/target.rs +++ /dev/null @@ -1,55 +0,0 @@ -use std::path::PathBuf; -use std::fmt; -use std::str::FromStr; -use std::string::ParseError; - -use url::{self,Url}; -use serde_derive::Serialize; - - -#[derive(Debug,PartialEq,Serialize,Clone)] -#[serde(untagged)] -pub enum Target { - #[serde(serialize_with = "serialize_url")] - Url(Url), - Path(PathBuf), -} - -impl From<Url> for Target { - fn from(url: Url) -> Self { - Target::Url(url) - } -} - -impl From<PathBuf> for Target { - fn from(path: PathBuf) -> Self { - Target::Path(path) - } -} - - -impl fmt::Display for Target { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use Target::*; - match *self { - Url (ref url) => write!(f, "{}", url), - Path(ref path) => write!(f, "{}", path.display()), - } - } -} - - -impl FromStr for Target { - type Err = ParseError; - fn from_str(input: &str) -> Result<Self, Self::Err> { - Ok(match Url::parse(input) { - Ok(url) => url.into(), - Err(_) => PathBuf::from(input.trim()).into(), - }) - } -} - - -pub fn serialize_url<S>(url: &Url, serializer: S) -> Result<S::Ok, S::Error> where S: serde::ser::Serializer { - serializer.serialize_str(url.as_str()) -} diff --git a/src/url.rs b/src/url.rs new file mode 100644 index 0000000..31a0536 --- /dev/null +++ b/src/url.rs @@ -0,0 +1,78 @@ +use std::fmt; +use std::str::FromStr; + +use url::{self,ParseError}; +use serde_derive::Serialize; + + +fn starts_with_scheme(input: &str) -> bool { + let scheme = input.split(':').next().unwrap(); + if scheme == input || scheme.is_empty() { + return false; + } + let mut chars = input.chars(); + // First character. + if !chars.next().unwrap().is_ascii_alphabetic() { + return false; + } + for ch in chars { + if !ch.is_ascii_alphanumeric() && ch != '+' && ch != '-' && ch != '.' { + return false; + } + } + true +} + +/// The string representation of a URL, either absolute or relative, that has +/// been verified as a valid URL on construction. +#[derive(Debug,PartialEq,Serialize,Clone)] +#[serde(transparent)] +pub struct Url(String); + +impl Url { + pub fn parse_absolute(input: &str) -> Result<Self, ParseError> { + Ok(url::Url::parse(input)?.into()) + } + pub fn parse_relative(input: &str) -> Result<Self, ParseError> { + // We're assuming that any scheme through which RsT documents are being + // accessed is a hierarchical scheme, and so we can parse relative to a + // random hierarchical URL. + if input.starts_with('/') || !starts_with_scheme(input) { + // Continue only if the parse succeeded, disregarding its result. + let random_base_url = url::Url::parse("https://a/b").unwrap(); + url::Url::options() + .base_url(Some(&random_base_url)) + .parse(input)?; + Ok(Url(input.into())) + } else { + // If this is a URL at all, it's an absolute one. + // There's no appropriate variant of url::ParseError really. + Err(ParseError::SetHostOnCannotBeABaseUrl) + } + } + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl From<url::Url> for Url { + fn from(url: url::Url) -> Self { + Url(url.into_string()) + } +} + + +impl fmt::Display for Url { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} + + +impl FromStr for Url { + type Err = ParseError; + fn from_str(input: &str) -> Result<Self, Self::Err> { + Url::parse_absolute(input) + .or_else(|_| Url::parse_relative(input)) + } +} |
