diff options
| author | Andreu Botella Botella | 2019-11-06 03:09:22 +0100 | 
|---|---|---|
| committer | Philipp A | 2019-11-07 09:27:38 +0100 | 
| commit | df6b7645d845a022ca7eeba08b1ecb761a020195 (patch) | |
| tree | b6809aefdafcf9aec6e833e53a8d2304fea17362 | |
| parent | 5387291c1a2d4cfd0e5acdad26dcc7e33329d39a (diff) | |
| download | rust-rst-df6b7645d845a022ca7eeba08b1ecb761a020195.tar.bz2 | |
Handling relative URLs in the conversion code.
Closes #10.
| -rw-r--r-- | src/bin.rs | 2 | ||||
| -rw-r--r-- | src/document_tree/attribute_types.rs | 4 | ||||
| -rw-r--r-- | src/document_tree/elements.rs | 20 | ||||
| -rw-r--r-- | src/document_tree/extra_attributes.rs | 14 | ||||
| -rw-r--r-- | src/lib.rs | 2 | ||||
| -rw-r--r-- | src/parser/conversion/inline.rs | 32 | ||||
| -rw-r--r-- | src/parser/simplify.rs | 4 | ||||
| -rw-r--r-- | src/target.rs | 55 | ||||
| -rw-r--r-- | src/url.rs | 78 | 
9 files changed, 122 insertions, 89 deletions
| @@ -3,7 +3,7 @@  pub mod document_tree;  pub mod parser;  pub mod renderer; -pub mod target; +pub mod url;  use structopt::StructOpt; diff --git a/src/document_tree/attribute_types.rs b/src/document_tree/attribute_types.rs index b6819c7..30f3767 100644 --- a/src/document_tree/attribute_types.rs +++ b/src/document_tree/attribute_types.rs @@ -4,7 +4,7 @@ use failure::{Error,bail,format_err};  use serde_derive::Serialize;  use regex::Regex; -use crate::target; +use crate::url::Url;  #[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)]  pub enum EnumeratedListType { @@ -134,7 +134,7 @@ macro_rules! impl_cannot_be_empty {  		impl_cannot_be_empty!($($ts),*);  	};  } -impl_cannot_be_empty!(target::Target); +impl_cannot_be_empty!(Url);  impl_cannot_be_empty!(TableGroupCols);  impl<T> CanBeEmpty for Option<T> { diff --git a/src/document_tree/elements.rs b/src/document_tree/elements.rs index cefe044..f717f6a 100644 --- a/src/document_tree/elements.rs +++ b/src/document_tree/elements.rs @@ -1,6 +1,6 @@ +use std::path::PathBuf;  use serde_derive::Serialize; -use crate::target;  use super::attribute_types::{CanBeEmpty,ID,NameToken};  use super::extra_attributes::{self,ExtraAttributes};  use super::element_categories::*; @@ -20,8 +20,8 @@ pub trait Element {  	/// An element may have at most one of the names or dupnames attributes, but not both.  	fn   names    (&    self) -> &    Vec<NameToken>;  	fn   names_mut(&mut self) -> &mut Vec<NameToken>; -	fn  source    (&    self) -> &    Option<target::Target>; -	fn  source_mut(&mut self) -> &mut Option<target::Target>; +	fn  source    (&    self) -> &    Option<PathBuf>; +	fn  source_mut(&mut self) -> &mut Option<PathBuf>;  	fn classes    (&    self) -> &    Vec<String>;  	fn classes_mut(&mut self) -> &mut Vec<String>;  } @@ -33,7 +33,7 @@ pub struct CommonAttributes {  	#[serde(skip_serializing_if = "CanBeEmpty::is_empty")]  	names: Vec<NameToken>,  	#[serde(skip_serializing_if = "CanBeEmpty::is_empty")] -	source: Option<target::Target>, +	source: Option<PathBuf>,  	#[serde(skip_serializing_if = "CanBeEmpty::is_empty")]  	classes: Vec<String>,  	//TODO: dupnames @@ -45,12 +45,12 @@ pub struct CommonAttributes {  macro_rules! impl_element { ($name:ident) => (  	impl Element for $name { -		fn     ids    (&    self) -> &    Vec<ID>        { &    self.common.ids     } -		fn     ids_mut(&mut self) -> &mut Vec<ID>        { &mut self.common.ids     } -		fn   names    (&    self) -> &    Vec<NameToken> { &    self.common.names   } -		fn   names_mut(&mut self) -> &mut Vec<NameToken> { &mut self.common.names   } -		fn  source    (&    self) -> &    Option<target::Target> { &    self.common.source  } -		fn  source_mut(&mut self) -> &mut Option<target::Target> { &mut self.common.source  } +		fn     ids    (&    self) -> &    Vec<ID>         { &    self.common.ids     } +		fn     ids_mut(&mut self) -> &mut Vec<ID>         { &mut self.common.ids     } +		fn   names    (&    self) -> &    Vec<NameToken>  { &    self.common.names   } +		fn   names_mut(&mut self) -> &mut Vec<NameToken>  { &mut self.common.names   } +		fn  source    (&    self) -> &    Option<PathBuf> { &    self.common.source  } +		fn  source_mut(&mut self) -> &mut Option<PathBuf> { &mut self.common.source  }  		fn classes    (&    self) -> &    Vec<String> { &    self.common.classes }  		fn classes_mut(&mut self) -> &mut Vec<String> { &mut self.common.classes }  	} diff --git a/src/document_tree/extra_attributes.rs b/src/document_tree/extra_attributes.rs index e72b288..55896ab 100644 --- a/src/document_tree/extra_attributes.rs +++ b/src/document_tree/extra_attributes.rs @@ -1,6 +1,6 @@  use serde_derive::Serialize; -use crate::target; +use crate::url::Url;  use super::attribute_types::{CanBeEmpty,FixedSpace,ID,NameToken,AlignHV,AlignH,AlignV,TableAlignH,TableBorder,TableGroupCols,Measure,EnumeratedListType};  pub trait ExtraAttributes<A> { @@ -33,7 +33,7 @@ impl_extra!(SubstitutionDefinition { ltrim: bool, rtrim: bool });  impl_extra!(Comment { space: FixedSpace });  impl_extra!(Target {  	/// External reference to a URI/URL -	refuri: Option<target::Target>, +	refuri: Option<Url>,  	/// References to ids attributes in other elements  	refid: Option<ID>,  	/// Internal reference to the names attribute of another element. May resolve to either an internal or external reference. @@ -42,13 +42,13 @@ impl_extra!(Target {  });  impl_extra!(Raw { space: FixedSpace, format: Vec<NameToken> });  impl_extra!(#[derive(Debug,PartialEq,Serialize,Clone)] Image { -	uri: target::Target, +	uri: Url,  	align: Option<AlignHV>,  	alt: Option<String>,  	height: Option<Measure>,  	width: Option<Measure>,  	scale: Option<u8>, -	target: Option<target::Target>,  // Not part of the DTD but a valid argument +	target: Option<Url>,  // Not part of the DTD but a valid argument  });  //bools usually are XML yesorno. “auto” however either exists and is set to something random like “1” or doesn’t exist @@ -75,7 +75,7 @@ impl_extra!(OptionArgument { delimiter: Option<String> });  impl_extra!(Reference {  	name: Option<NameToken>,  //TODO: is CDATA in the DTD, so maybe no nametoken?  	/// External reference to a URI/URL -	refuri: Option<target::Target>, +	refuri: Option<Url>,  	/// References to ids attributes in other elements  	refid: Option<ID>,  	/// Internal reference to the names attribute of another element @@ -89,7 +89,7 @@ impl_extra!(Problematic { refid: Option<ID> });  //also have non-inline versions. Inline image is no figure child, inline target has content  impl_extra!(TargetInline {  	/// External reference to a URI/URL -	refuri: Option<target::Target>, +	refuri: Option<Url>,  	/// References to ids attributes in other elements  	refid: Option<ID>,  	/// Internal reference to the names attribute of another element. May resolve to either an internal or external reference. @@ -100,7 +100,7 @@ impl_extra!(RawInline { space: FixedSpace, format: Vec<NameToken> });  pub type ImageInline = Image;  impl Image { -	pub fn new(uri: target::Target) -> Image { Image { +	pub fn new(uri: Url) -> Image { Image {  		uri,  		align: None,  		alt: None, @@ -3,4 +3,4 @@  pub mod document_tree;  pub mod parser;  pub mod renderer; -pub mod target; +pub mod url; diff --git a/src/parser/conversion/inline.rs b/src/parser/conversion/inline.rs index c51b2d9..c942732 100644 --- a/src/parser/conversion/inline.rs +++ b/src/parser/conversion/inline.rs @@ -1,6 +1,5 @@  use failure::Error;  use pest::iterators::Pair; -use url::Url;  use crate::document_tree::{  	ExtraAttributes, @@ -15,6 +14,7 @@ use crate::parser::{  //    pair_ext_parse::PairExt,  }; +use crate::url::Url;  use super::whitespace_normalize_name; @@ -73,16 +73,26 @@ fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error>  					refuri = if let Some(reference) = reference {  						let inner = reference.into_inner().next().unwrap();  						match inner.as_rule() { -							Rule::url => if let Ok(url) = Url::parse(inner.as_str()) { -								Some(url.into()) +							// The URL rules in our parser accept a narrow superset of +							// valid URLs, so we need to handle false positives. +							Rule::url => if let Ok(target) = Url::parse_absolute(inner.as_str()) { +								Some(target) +							} else if inner.as_str().ends_with('_') { +								// like target_name_qu (minus the final underscore) +								let full_str = inner.as_str(); +								refname.push(full_str[0..full_str.len() - 1].into()); +								None  							} else { -								unimplemented!("reference to a relative URL") +								// like relative_reference +								Some(Url::parse_relative(inner.as_str())?)  							},  							Rule::target_name_qu => {  								refname.push(inner.as_str().into());  								None  							}, -							Rule::relative_reference => unimplemented!("reference to a relative URL"), +							Rule::relative_reference => { +								Some(Url::parse_relative(inner.as_str())?) +							},  							_ => unreachable!()  						}  					} else { @@ -98,9 +108,9 @@ fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error>  		Rule::reference_auto => {  			let rt_inner = concrete.into_inner().next().unwrap();  			match rt_inner.as_rule() { -				Rule::url_auto => match Url::parse(rt_inner.as_str()) { -					Ok(url) => { -						refuri = Some(url.into()); +				Rule::url_auto => match Url::parse_absolute(rt_inner.as_str()) { +					Ok(target) => { +						refuri = Some(target);  						name   = None;  						refid  = None;  						children.push(rt_inner.as_str().into()); @@ -110,9 +120,9 @@ fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error>  				},  				Rule::email => {  					let mailto_url = String::from("mailto:") + rt_inner.as_str(); -					match Url::parse(&mailto_url) { -						Ok(url) => { -							refuri = Some(url.into()); +					match Url::parse_absolute(&mailto_url) { +						Ok(target) => { +							refuri = Some(target);  							name   = None;  							refid  = None;  							children.push(rt_inner.as_str().into()); diff --git a/src/parser/simplify.rs b/src/parser/simplify.rs index bb47760..f6f0e8a 100644 --- a/src/parser/simplify.rs +++ b/src/parser/simplify.rs @@ -21,7 +21,7 @@ TODO: continue documenting how it’s done via http://svn.code.sf.net/p/docutils  use std::collections::HashMap; -use crate::target::Target; +use crate::url::Url;  use crate::document_tree::{  	Document,  	HasChildren, @@ -37,7 +37,7 @@ enum NamedTargetType {  	LabeledFootnote(usize),  	Citation,  	InternalLink, -	ExternalLink(Target), +	ExternalLink(Url),  	IndirectLink(NameToken),  	SectionTitle  } diff --git a/src/target.rs b/src/target.rs deleted file mode 100644 index 3ce7d75..0000000 --- a/src/target.rs +++ /dev/null @@ -1,55 +0,0 @@ -use std::path::PathBuf; -use std::fmt; -use std::str::FromStr; -use std::string::ParseError; - -use url::{self,Url}; -use serde_derive::Serialize; - - -#[derive(Debug,PartialEq,Serialize,Clone)] -#[serde(untagged)] -pub enum Target { -	#[serde(serialize_with = "serialize_url")] -	Url(Url), -	Path(PathBuf), -} - -impl From<Url> for Target { -	fn from(url: Url) -> Self { -		Target::Url(url) -	} -} - -impl From<PathBuf> for Target { -	fn from(path: PathBuf) -> Self { -		Target::Path(path) -	} -} - - -impl fmt::Display for Target { -	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -		use Target::*; -		match *self { -			Url (ref url)  => write!(f, "{}", url), -			Path(ref path) => write!(f, "{}", path.display()), -		} -	} -} - - -impl FromStr for Target { -	type Err = ParseError; -	fn from_str(input: &str) -> Result<Self, Self::Err> { -		Ok(match Url::parse(input) { -			Ok(url) => url.into(), -			Err(_) => PathBuf::from(input.trim()).into(), -		}) -	} -} - - -pub fn serialize_url<S>(url: &Url, serializer: S) -> Result<S::Ok, S::Error> where S: serde::ser::Serializer { -	serializer.serialize_str(url.as_str()) -} diff --git a/src/url.rs b/src/url.rs new file mode 100644 index 0000000..31a0536 --- /dev/null +++ b/src/url.rs @@ -0,0 +1,78 @@ +use std::fmt; +use std::str::FromStr; + +use url::{self,ParseError}; +use serde_derive::Serialize; + + +fn starts_with_scheme(input: &str) -> bool { +	let scheme = input.split(':').next().unwrap(); +	if scheme == input || scheme.is_empty() { +		return false; +	} +	let mut chars = input.chars(); +	// First character. +	if !chars.next().unwrap().is_ascii_alphabetic() { +		return false; +	} +	for ch in chars { +		if !ch.is_ascii_alphanumeric() && ch != '+' && ch != '-' && ch != '.' { +			return false; +		} +	} +	true +} + +/// The string representation of a URL, either absolute or relative, that has +/// been verified as a valid URL on construction. +#[derive(Debug,PartialEq,Serialize,Clone)] +#[serde(transparent)] +pub struct Url(String); + +impl Url { +	pub fn parse_absolute(input: &str) -> Result<Self, ParseError> { +		Ok(url::Url::parse(input)?.into()) +	} +	pub fn parse_relative(input: &str) -> Result<Self, ParseError> { +		// We're assuming that any scheme through which RsT documents are being +		// accessed is a hierarchical scheme, and so we can parse relative to a +		// random hierarchical URL. +		if input.starts_with('/') || !starts_with_scheme(input) { +			// Continue only if the parse succeeded, disregarding its result. +			let random_base_url = url::Url::parse("https://a/b").unwrap(); +			url::Url::options() +				.base_url(Some(&random_base_url)) +				.parse(input)?; +			Ok(Url(input.into())) +		} else { +			// If this is a URL at all, it's an absolute one. +			// There's no appropriate variant of url::ParseError really. +			Err(ParseError::SetHostOnCannotBeABaseUrl) +		} +	} +	pub fn as_str(&self) -> &str { +		self.0.as_str() +	} +} + +impl From<url::Url> for Url { +	fn from(url: url::Url) -> Self { +		Url(url.into_string()) +	} +} + + +impl fmt::Display for Url { +	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +		write!(f, "{}", self.as_str()) +	} +} + + +impl FromStr for Url { +	type Err = ParseError; +	fn from_str(input: &str) -> Result<Self, Self::Err> { +		Url::parse_absolute(input) +			.or_else(|_| Url::parse_relative(input)) +	} +} | 
