aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndreu Botella Botella2019-11-06 03:09:22 +0100
committerPhilipp A2019-11-07 09:27:38 +0100
commitdf6b7645d845a022ca7eeba08b1ecb761a020195 (patch)
treeb6809aefdafcf9aec6e833e53a8d2304fea17362
parent5387291c1a2d4cfd0e5acdad26dcc7e33329d39a (diff)
downloadrust-rst-df6b7645d845a022ca7eeba08b1ecb761a020195.tar.bz2
Handling relative URLs in the conversion code.
Closes #10.
-rw-r--r--src/bin.rs2
-rw-r--r--src/document_tree/attribute_types.rs4
-rw-r--r--src/document_tree/elements.rs20
-rw-r--r--src/document_tree/extra_attributes.rs14
-rw-r--r--src/lib.rs2
-rw-r--r--src/parser/conversion/inline.rs32
-rw-r--r--src/parser/simplify.rs4
-rw-r--r--src/target.rs55
-rw-r--r--src/url.rs78
9 files changed, 122 insertions, 89 deletions
diff --git a/src/bin.rs b/src/bin.rs
index 7b9cee2..6916af1 100644
--- a/src/bin.rs
+++ b/src/bin.rs
@@ -3,7 +3,7 @@
pub mod document_tree;
pub mod parser;
pub mod renderer;
-pub mod target;
+pub mod url;
use structopt::StructOpt;
diff --git a/src/document_tree/attribute_types.rs b/src/document_tree/attribute_types.rs
index b6819c7..30f3767 100644
--- a/src/document_tree/attribute_types.rs
+++ b/src/document_tree/attribute_types.rs
@@ -4,7 +4,7 @@ use failure::{Error,bail,format_err};
use serde_derive::Serialize;
use regex::Regex;
-use crate::target;
+use crate::url::Url;
#[derive(Debug,PartialEq,Eq,Hash,Serialize,Clone)]
pub enum EnumeratedListType {
@@ -134,7 +134,7 @@ macro_rules! impl_cannot_be_empty {
impl_cannot_be_empty!($($ts),*);
};
}
-impl_cannot_be_empty!(target::Target);
+impl_cannot_be_empty!(Url);
impl_cannot_be_empty!(TableGroupCols);
impl<T> CanBeEmpty for Option<T> {
diff --git a/src/document_tree/elements.rs b/src/document_tree/elements.rs
index cefe044..f717f6a 100644
--- a/src/document_tree/elements.rs
+++ b/src/document_tree/elements.rs
@@ -1,6 +1,6 @@
+use std::path::PathBuf;
use serde_derive::Serialize;
-use crate::target;
use super::attribute_types::{CanBeEmpty,ID,NameToken};
use super::extra_attributes::{self,ExtraAttributes};
use super::element_categories::*;
@@ -20,8 +20,8 @@ pub trait Element {
/// An element may have at most one of the names or dupnames attributes, but not both.
fn names (& self) -> & Vec<NameToken>;
fn names_mut(&mut self) -> &mut Vec<NameToken>;
- fn source (& self) -> & Option<target::Target>;
- fn source_mut(&mut self) -> &mut Option<target::Target>;
+ fn source (& self) -> & Option<PathBuf>;
+ fn source_mut(&mut self) -> &mut Option<PathBuf>;
fn classes (& self) -> & Vec<String>;
fn classes_mut(&mut self) -> &mut Vec<String>;
}
@@ -33,7 +33,7 @@ pub struct CommonAttributes {
#[serde(skip_serializing_if = "CanBeEmpty::is_empty")]
names: Vec<NameToken>,
#[serde(skip_serializing_if = "CanBeEmpty::is_empty")]
- source: Option<target::Target>,
+ source: Option<PathBuf>,
#[serde(skip_serializing_if = "CanBeEmpty::is_empty")]
classes: Vec<String>,
//TODO: dupnames
@@ -45,12 +45,12 @@ pub struct CommonAttributes {
macro_rules! impl_element { ($name:ident) => (
impl Element for $name {
- fn ids (& self) -> & Vec<ID> { & self.common.ids }
- fn ids_mut(&mut self) -> &mut Vec<ID> { &mut self.common.ids }
- fn names (& self) -> & Vec<NameToken> { & self.common.names }
- fn names_mut(&mut self) -> &mut Vec<NameToken> { &mut self.common.names }
- fn source (& self) -> & Option<target::Target> { & self.common.source }
- fn source_mut(&mut self) -> &mut Option<target::Target> { &mut self.common.source }
+ fn ids (& self) -> & Vec<ID> { & self.common.ids }
+ fn ids_mut(&mut self) -> &mut Vec<ID> { &mut self.common.ids }
+ fn names (& self) -> & Vec<NameToken> { & self.common.names }
+ fn names_mut(&mut self) -> &mut Vec<NameToken> { &mut self.common.names }
+ fn source (& self) -> & Option<PathBuf> { & self.common.source }
+ fn source_mut(&mut self) -> &mut Option<PathBuf> { &mut self.common.source }
fn classes (& self) -> & Vec<String> { & self.common.classes }
fn classes_mut(&mut self) -> &mut Vec<String> { &mut self.common.classes }
}
diff --git a/src/document_tree/extra_attributes.rs b/src/document_tree/extra_attributes.rs
index e72b288..55896ab 100644
--- a/src/document_tree/extra_attributes.rs
+++ b/src/document_tree/extra_attributes.rs
@@ -1,6 +1,6 @@
use serde_derive::Serialize;
-use crate::target;
+use crate::url::Url;
use super::attribute_types::{CanBeEmpty,FixedSpace,ID,NameToken,AlignHV,AlignH,AlignV,TableAlignH,TableBorder,TableGroupCols,Measure,EnumeratedListType};
pub trait ExtraAttributes<A> {
@@ -33,7 +33,7 @@ impl_extra!(SubstitutionDefinition { ltrim: bool, rtrim: bool });
impl_extra!(Comment { space: FixedSpace });
impl_extra!(Target {
/// External reference to a URI/URL
- refuri: Option<target::Target>,
+ refuri: Option<Url>,
/// References to ids attributes in other elements
refid: Option<ID>,
/// Internal reference to the names attribute of another element. May resolve to either an internal or external reference.
@@ -42,13 +42,13 @@ impl_extra!(Target {
});
impl_extra!(Raw { space: FixedSpace, format: Vec<NameToken> });
impl_extra!(#[derive(Debug,PartialEq,Serialize,Clone)] Image {
- uri: target::Target,
+ uri: Url,
align: Option<AlignHV>,
alt: Option<String>,
height: Option<Measure>,
width: Option<Measure>,
scale: Option<u8>,
- target: Option<target::Target>, // Not part of the DTD but a valid argument
+ target: Option<Url>, // Not part of the DTD but a valid argument
});
//bools usually are XML yesorno. “auto” however either exists and is set to something random like “1” or doesn’t exist
@@ -75,7 +75,7 @@ impl_extra!(OptionArgument { delimiter: Option<String> });
impl_extra!(Reference {
name: Option<NameToken>, //TODO: is CDATA in the DTD, so maybe no nametoken?
/// External reference to a URI/URL
- refuri: Option<target::Target>,
+ refuri: Option<Url>,
/// References to ids attributes in other elements
refid: Option<ID>,
/// Internal reference to the names attribute of another element
@@ -89,7 +89,7 @@ impl_extra!(Problematic { refid: Option<ID> });
//also have non-inline versions. Inline image is no figure child, inline target has content
impl_extra!(TargetInline {
/// External reference to a URI/URL
- refuri: Option<target::Target>,
+ refuri: Option<Url>,
/// References to ids attributes in other elements
refid: Option<ID>,
/// Internal reference to the names attribute of another element. May resolve to either an internal or external reference.
@@ -100,7 +100,7 @@ impl_extra!(RawInline { space: FixedSpace, format: Vec<NameToken> });
pub type ImageInline = Image;
impl Image {
- pub fn new(uri: target::Target) -> Image { Image {
+ pub fn new(uri: Url) -> Image { Image {
uri,
align: None,
alt: None,
diff --git a/src/lib.rs b/src/lib.rs
index dbabe8d..6e39b1a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,4 +3,4 @@
pub mod document_tree;
pub mod parser;
pub mod renderer;
-pub mod target;
+pub mod url;
diff --git a/src/parser/conversion/inline.rs b/src/parser/conversion/inline.rs
index c51b2d9..c942732 100644
--- a/src/parser/conversion/inline.rs
+++ b/src/parser/conversion/inline.rs
@@ -1,6 +1,5 @@
use failure::Error;
use pest::iterators::Pair;
-use url::Url;
use crate::document_tree::{
ExtraAttributes,
@@ -15,6 +14,7 @@ use crate::parser::{
// pair_ext_parse::PairExt,
};
+use crate::url::Url;
use super::whitespace_normalize_name;
@@ -73,16 +73,26 @@ fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error>
refuri = if let Some(reference) = reference {
let inner = reference.into_inner().next().unwrap();
match inner.as_rule() {
- Rule::url => if let Ok(url) = Url::parse(inner.as_str()) {
- Some(url.into())
+ // The URL rules in our parser accept a narrow superset of
+ // valid URLs, so we need to handle false positives.
+ Rule::url => if let Ok(target) = Url::parse_absolute(inner.as_str()) {
+ Some(target)
+ } else if inner.as_str().ends_with('_') {
+ // like target_name_qu (minus the final underscore)
+ let full_str = inner.as_str();
+ refname.push(full_str[0..full_str.len() - 1].into());
+ None
} else {
- unimplemented!("reference to a relative URL")
+ // like relative_reference
+ Some(Url::parse_relative(inner.as_str())?)
},
Rule::target_name_qu => {
refname.push(inner.as_str().into());
None
},
- Rule::relative_reference => unimplemented!("reference to a relative URL"),
+ Rule::relative_reference => {
+ Some(Url::parse_relative(inner.as_str())?)
+ },
_ => unreachable!()
}
} else {
@@ -98,9 +108,9 @@ fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error>
Rule::reference_auto => {
let rt_inner = concrete.into_inner().next().unwrap();
match rt_inner.as_rule() {
- Rule::url_auto => match Url::parse(rt_inner.as_str()) {
- Ok(url) => {
- refuri = Some(url.into());
+ Rule::url_auto => match Url::parse_absolute(rt_inner.as_str()) {
+ Ok(target) => {
+ refuri = Some(target);
name = None;
refid = None;
children.push(rt_inner.as_str().into());
@@ -110,9 +120,9 @@ fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error>
},
Rule::email => {
let mailto_url = String::from("mailto:") + rt_inner.as_str();
- match Url::parse(&mailto_url) {
- Ok(url) => {
- refuri = Some(url.into());
+ match Url::parse_absolute(&mailto_url) {
+ Ok(target) => {
+ refuri = Some(target);
name = None;
refid = None;
children.push(rt_inner.as_str().into());
diff --git a/src/parser/simplify.rs b/src/parser/simplify.rs
index bb47760..f6f0e8a 100644
--- a/src/parser/simplify.rs
+++ b/src/parser/simplify.rs
@@ -21,7 +21,7 @@ TODO: continue documenting how it’s done via http://svn.code.sf.net/p/docutils
use std::collections::HashMap;
-use crate::target::Target;
+use crate::url::Url;
use crate::document_tree::{
Document,
HasChildren,
@@ -37,7 +37,7 @@ enum NamedTargetType {
LabeledFootnote(usize),
Citation,
InternalLink,
- ExternalLink(Target),
+ ExternalLink(Url),
IndirectLink(NameToken),
SectionTitle
}
diff --git a/src/target.rs b/src/target.rs
deleted file mode 100644
index 3ce7d75..0000000
--- a/src/target.rs
+++ /dev/null
@@ -1,55 +0,0 @@
-use std::path::PathBuf;
-use std::fmt;
-use std::str::FromStr;
-use std::string::ParseError;
-
-use url::{self,Url};
-use serde_derive::Serialize;
-
-
-#[derive(Debug,PartialEq,Serialize,Clone)]
-#[serde(untagged)]
-pub enum Target {
- #[serde(serialize_with = "serialize_url")]
- Url(Url),
- Path(PathBuf),
-}
-
-impl From<Url> for Target {
- fn from(url: Url) -> Self {
- Target::Url(url)
- }
-}
-
-impl From<PathBuf> for Target {
- fn from(path: PathBuf) -> Self {
- Target::Path(path)
- }
-}
-
-
-impl fmt::Display for Target {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- use Target::*;
- match *self {
- Url (ref url) => write!(f, "{}", url),
- Path(ref path) => write!(f, "{}", path.display()),
- }
- }
-}
-
-
-impl FromStr for Target {
- type Err = ParseError;
- fn from_str(input: &str) -> Result<Self, Self::Err> {
- Ok(match Url::parse(input) {
- Ok(url) => url.into(),
- Err(_) => PathBuf::from(input.trim()).into(),
- })
- }
-}
-
-
-pub fn serialize_url<S>(url: &Url, serializer: S) -> Result<S::Ok, S::Error> where S: serde::ser::Serializer {
- serializer.serialize_str(url.as_str())
-}
diff --git a/src/url.rs b/src/url.rs
new file mode 100644
index 0000000..31a0536
--- /dev/null
+++ b/src/url.rs
@@ -0,0 +1,78 @@
+use std::fmt;
+use std::str::FromStr;
+
+use url::{self,ParseError};
+use serde_derive::Serialize;
+
+
+fn starts_with_scheme(input: &str) -> bool {
+ let scheme = input.split(':').next().unwrap();
+ if scheme == input || scheme.is_empty() {
+ return false;
+ }
+ let mut chars = input.chars();
+ // First character.
+ if !chars.next().unwrap().is_ascii_alphabetic() {
+ return false;
+ }
+ for ch in chars {
+ if !ch.is_ascii_alphanumeric() && ch != '+' && ch != '-' && ch != '.' {
+ return false;
+ }
+ }
+ true
+}
+
+/// The string representation of a URL, either absolute or relative, that has
+/// been verified as a valid URL on construction.
+#[derive(Debug,PartialEq,Serialize,Clone)]
+#[serde(transparent)]
+pub struct Url(String);
+
+impl Url {
+ pub fn parse_absolute(input: &str) -> Result<Self, ParseError> {
+ Ok(url::Url::parse(input)?.into())
+ }
+ pub fn parse_relative(input: &str) -> Result<Self, ParseError> {
+ // We're assuming that any scheme through which RsT documents are being
+ // accessed is a hierarchical scheme, and so we can parse relative to a
+ // random hierarchical URL.
+ if input.starts_with('/') || !starts_with_scheme(input) {
+ // Continue only if the parse succeeded, disregarding its result.
+ let random_base_url = url::Url::parse("https://a/b").unwrap();
+ url::Url::options()
+ .base_url(Some(&random_base_url))
+ .parse(input)?;
+ Ok(Url(input.into()))
+ } else {
+ // If this is a URL at all, it's an absolute one.
+ // There's no appropriate variant of url::ParseError really.
+ Err(ParseError::SetHostOnCannotBeABaseUrl)
+ }
+ }
+ pub fn as_str(&self) -> &str {
+ self.0.as_str()
+ }
+}
+
+impl From<url::Url> for Url {
+ fn from(url: url::Url) -> Self {
+ Url(url.into_string())
+ }
+}
+
+
+impl fmt::Display for Url {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}", self.as_str())
+ }
+}
+
+
+impl FromStr for Url {
+ type Err = ParseError;
+ fn from_str(input: &str) -> Result<Self, Self::Err> {
+ Url::parse_absolute(input)
+ .or_else(|_| Url::parse_relative(input))
+ }
+}