aboutsummaryrefslogtreecommitdiffstats
path: root/src/events/binary_reader.rs
diff options
context:
space:
mode:
authorEdward Barnard2018-05-17 20:30:41 +0100
committerEdward Barnard2018-05-17 20:50:21 +0100
commit2af279c344a6a917ce4102fec82e6cba8cf8b37c (patch)
treea2d3746d4981e8e22e30b91fd3b4d1821444777a /src/events/binary_reader.rs
parentd8a4f4ef2a0e80f1dc6237678410807cce83cce0 (diff)
downloadrust-plist-2af279c344a6a917ce4102fec82e6cba8cf8b37c.tar.bz2
Reorganise crate.
Diffstat (limited to 'src/events/binary_reader.rs')
-rw-r--r--src/events/binary_reader.rs385
1 files changed, 385 insertions, 0 deletions
diff --git a/src/events/binary_reader.rs b/src/events/binary_reader.rs
new file mode 100644
index 0000000..e077904
--- /dev/null
+++ b/src/events/binary_reader.rs
@@ -0,0 +1,385 @@
+use byteorder::{BigEndian, ReadBytesExt};
+use std::io::{Read, Seek, SeekFrom};
+use std::mem::size_of;
+use std::string::{FromUtf16Error, FromUtf8Error};
+
+use events::Event;
+use {u64_to_usize, Date, Error, Result};
+
+impl From<FromUtf8Error> for Error {
+ fn from(_: FromUtf8Error) -> Error {
+ Error::InvalidData
+ }
+}
+
+impl From<FromUtf16Error> for Error {
+ fn from(_: FromUtf16Error) -> Error {
+ Error::InvalidData
+ }
+}
+
+struct StackItem {
+ object_refs: Vec<u64>,
+ ty: StackType,
+}
+
+enum StackType {
+ Array,
+ Dict,
+ Root,
+}
+
+/// https://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c
+/// https://hg.python.org/cpython/file/3.4/Lib/plistlib.py
+pub struct BinaryReader<R> {
+ stack: Vec<StackItem>,
+ object_offsets: Vec<u64>,
+ reader: R,
+ ref_size: u8,
+ finished: bool,
+ // The largest single allocation allowed for this Plist.
+ // Equal to the number of bytes in the Plist minus the magic and trailer.
+ max_allocation_bytes: usize,
+ // The maximum number of nested arrays and dicts allowed in the plist.
+ max_stack_depth: usize,
+ // The maximum number of objects that can be created. Default 10 * object_offsets.len().
+ // Binary plists can contain circular references.
+ max_objects: usize,
+ // The number of objects created so far.
+ current_objects: usize,
+}
+
+impl<R: Read + Seek> BinaryReader<R> {
+ pub fn new(reader: R) -> BinaryReader<R> {
+ BinaryReader {
+ stack: Vec::new(),
+ object_offsets: Vec::new(),
+ reader: reader,
+ ref_size: 0,
+ finished: false,
+ max_allocation_bytes: 0,
+ max_stack_depth: 200,
+ max_objects: 0,
+ current_objects: 0,
+ }
+ }
+
+ fn can_allocate(&self, len: u64, size: usize) -> bool {
+ let byte_len = len.saturating_mul(size as u64);
+ byte_len <= self.max_allocation_bytes as u64
+ }
+
+ fn allocate_vec<T>(&self, len: u64, size: usize) -> Result<Vec<T>> {
+ if self.can_allocate(len, size) {
+ Ok(Vec::with_capacity(len as usize))
+ } else {
+ Err(Error::InvalidData)
+ }
+ }
+
+ fn read_trailer(&mut self) -> Result<()> {
+ self.reader.seek(SeekFrom::Start(0))?;
+ let mut magic = [0; 8];
+ self.reader.read_exact(&mut magic)?;
+ if &magic != b"bplist00" {
+ return Err(Error::InvalidData);
+ }
+
+ // Trailer starts with 6 bytes of padding
+ let trailer_start = self.reader.seek(SeekFrom::End(-32 + 6))?;
+
+ let offset_size = self.reader.read_u8()?;
+ match offset_size {
+ 1 | 2 | 4 | 8 => (),
+ _ => return Err(Error::InvalidData),
+ }
+
+ self.ref_size = self.reader.read_u8()?;
+ match self.ref_size {
+ 1 | 2 | 4 | 8 => (),
+ _ => return Err(Error::InvalidData),
+ }
+
+ let num_objects = self.reader.read_u64::<BigEndian>()?;
+ let top_object = self.reader.read_u64::<BigEndian>()?;
+ let offset_table_offset = self.reader.read_u64::<BigEndian>()?;
+
+ // File size minus trailer and header
+ // Truncated to max(usize)
+ self.max_allocation_bytes = trailer_start.saturating_sub(8) as usize;
+
+ // Read offset table
+ self.reader.seek(SeekFrom::Start(offset_table_offset))?;
+ self.object_offsets = self.read_ints(num_objects, offset_size)?;
+
+ self.max_objects = self.object_offsets.len() * 10;
+
+ // Seek to top object
+ self.stack.push(StackItem {
+ object_refs: vec![top_object],
+ ty: StackType::Root,
+ });
+
+ Ok(())
+ }
+
+ fn read_ints(&mut self, len: u64, size: u8) -> Result<Vec<u64>> {
+ let mut ints = self.allocate_vec(len, size as usize)?;
+ for _ in 0..len {
+ match size {
+ 1 => ints.push(self.reader.read_u8()? as u64),
+ 2 => ints.push(self.reader.read_u16::<BigEndian>()? as u64),
+ 4 => ints.push(self.reader.read_u32::<BigEndian>()? as u64),
+ 8 => ints.push(self.reader.read_u64::<BigEndian>()? as u64),
+ _ => return Err(Error::InvalidData),
+ }
+ }
+ Ok(ints)
+ }
+
+ fn read_refs(&mut self, len: u64) -> Result<Vec<u64>> {
+ let ref_size = self.ref_size;
+ self.read_ints(len, ref_size)
+ }
+
+ fn read_object_len(&mut self, len: u8) -> Result<u64> {
+ if (len & 0x0f) == 0x0f {
+ let len_power_of_two = self.reader.read_u8()? & 0x03;
+ Ok(match len_power_of_two {
+ 0 => self.reader.read_u8()? as u64,
+ 1 => self.reader.read_u16::<BigEndian>()? as u64,
+ 2 => self.reader.read_u32::<BigEndian>()? as u64,
+ 3 => self.reader.read_u64::<BigEndian>()?,
+ _ => return Err(Error::InvalidData),
+ })
+ } else {
+ Ok(len as u64)
+ }
+ }
+
+ fn read_data(&mut self, len: u64) -> Result<Vec<u8>> {
+ let mut data = self.allocate_vec(len, size_of::<u8>())?;
+ data.resize(len as usize, 0);
+ self.reader.read_exact(&mut data)?;
+ Ok(data)
+ }
+
+ fn seek_to_object(&mut self, object_ref: u64) -> Result<u64> {
+ let object_ref = u64_to_usize(object_ref)?;
+ let offset = *self
+ .object_offsets
+ .get(object_ref)
+ .ok_or(Error::InvalidData)?;
+ Ok(self.reader.seek(SeekFrom::Start(offset))?)
+ }
+
+ fn read_next(&mut self) -> Result<Option<Event>> {
+ if self.ref_size == 0 {
+ // Initialise here rather than in new
+ self.read_trailer()?;
+ }
+
+ let object_ref = match self.stack.last_mut() {
+ Some(stack_item) => stack_item.object_refs.pop(),
+ // Reached the end of the plist
+ None => return Ok(None),
+ };
+
+ match object_ref {
+ Some(object_ref) => {
+ if self.current_objects > self.max_objects {
+ return Err(Error::InvalidData);
+ }
+ self.current_objects += 1;
+ self.seek_to_object(object_ref)?;
+ }
+ None => {
+ // We're at the end of an array or dict. Pop the top stack item and return
+ let item = self.stack.pop().unwrap();
+ match item.ty {
+ StackType::Array => return Ok(Some(Event::EndArray)),
+ StackType::Dict => return Ok(Some(Event::EndDictionary)),
+ // We're at the end of the plist
+ StackType::Root => return Ok(None),
+ }
+ }
+ }
+
+ let token = self.reader.read_u8()?;
+ let ty = (token & 0xf0) >> 4;
+ let size = token & 0x0f;
+
+ let result = match (ty, size) {
+ (0x0, 0x00) => return Err(Error::InvalidData), // null
+ (0x0, 0x08) => Some(Event::BooleanValue(false)),
+ (0x0, 0x09) => Some(Event::BooleanValue(true)),
+ (0x0, 0x0f) => return Err(Error::InvalidData), // fill
+ (0x1, 0) => Some(Event::IntegerValue(self.reader.read_u8()? as i64)),
+ (0x1, 1) => Some(Event::IntegerValue(
+ self.reader.read_u16::<BigEndian>()? as i64
+ )),
+ (0x1, 2) => Some(Event::IntegerValue(
+ self.reader.read_u32::<BigEndian>()? as i64
+ )),
+ (0x1, 3) => Some(Event::IntegerValue(self.reader.read_i64::<BigEndian>()?)),
+ (0x1, 4) => return Err(Error::InvalidData), // 128 bit int
+ (0x1, _) => return Err(Error::InvalidData), // variable length int
+ (0x2, 2) => Some(Event::RealValue(self.reader.read_f32::<BigEndian>()? as f64)),
+ (0x2, 3) => Some(Event::RealValue(self.reader.read_f64::<BigEndian>()?)),
+ (0x2, _) => return Err(Error::InvalidData), // odd length float
+ (0x3, 3) => {
+ // Date. Seconds since 1/1/2001 00:00:00.
+ let secs = self.reader.read_f64::<BigEndian>()?;
+ Some(Event::DateValue(Date::from_seconds_since_plist_epoch(
+ secs,
+ )?))
+ }
+ (0x4, n) => {
+ // Data
+ let len = self.read_object_len(n)?;
+ Some(Event::DataValue(self.read_data(len)?))
+ }
+ (0x5, n) => {
+ // ASCII string
+ let len = self.read_object_len(n)?;
+ let raw = self.read_data(len)?;
+ let string = String::from_utf8(raw)?;
+ Some(Event::StringValue(string))
+ }
+ (0x6, n) => {
+ // UTF-16 string
+ let len_utf16_codepoints = self.read_object_len(n)?;
+ let mut raw_utf16 = self.allocate_vec(len_utf16_codepoints, size_of::<u16>())?;
+
+ for _ in 0..len_utf16_codepoints {
+ raw_utf16.push(self.reader.read_u16::<BigEndian>()?);
+ }
+
+ let string = String::from_utf16(&raw_utf16)?;
+ Some(Event::StringValue(string))
+ }
+ (0xa, n) => {
+ // Array
+ let len = self.read_object_len(n)?;
+ let mut object_refs = self.read_refs(len)?;
+ // Reverse so we can pop off the end of the stack in order
+ object_refs.reverse();
+
+ self.stack.push(StackItem {
+ ty: StackType::Array,
+ object_refs: object_refs,
+ });
+
+ Some(Event::StartArray(Some(len)))
+ }
+ (0xd, n) => {
+ // Dict
+ let len = self.read_object_len(n)?;
+ let key_refs = self.read_refs(len)?;
+ let value_refs = self.read_refs(len)?;
+
+ let mut object_refs = self.allocate_vec(len * 2, self.ref_size as usize)?;
+ let len = key_refs.len();
+ for i in 1..len + 1 {
+ // Reverse so we can pop off the end of the stack in order
+ object_refs.push(value_refs[len - i]);
+ object_refs.push(key_refs[len - i]);
+ }
+
+ self.stack.push(StackItem {
+ ty: StackType::Dict,
+ object_refs: object_refs,
+ });
+
+ Some(Event::StartDictionary(Some(len as u64)))
+ }
+ (_, _) => return Err(Error::InvalidData),
+ };
+
+ // Prevent stack overflows when recursively parsing plist.
+ if self.stack.len() > self.max_stack_depth {
+ return Err(Error::InvalidData);
+ }
+
+ Ok(result)
+ }
+}
+
+impl<R: Read + Seek> Iterator for BinaryReader<R> {
+ type Item = Result<Event>;
+
+ fn next(&mut self) -> Option<Result<Event>> {
+ if self.finished {
+ None
+ } else {
+ match self.read_next() {
+ Ok(Some(event)) => Some(Ok(event)),
+ Err(err) => {
+ self.finished = true;
+ Some(Err(err))
+ }
+ Ok(None) => {
+ self.finished = true;
+ None
+ }
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use humantime::parse_rfc3339_weak;
+ use std::fs::File;
+ use std::path::Path;
+
+ use super::*;
+ use events::Event;
+ use events::Event::*;
+
+ #[test]
+ fn streaming_parser() {
+ let reader = File::open(&Path::new("./tests/data/binary.plist")).unwrap();
+ let streaming_parser = BinaryReader::new(reader);
+ let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
+
+ let comparison = &[
+ StartDictionary(Some(6)),
+ StringValue("Lines".to_owned()),
+ StartArray(Some(2)),
+ StringValue("It is a tale told by an idiot,".to_owned()),
+ StringValue("Full of sound and fury, signifying nothing.".to_owned()),
+ EndArray,
+ StringValue("Death".to_owned()),
+ IntegerValue(1564),
+ StringValue("Height".to_owned()),
+ RealValue(1.60),
+ StringValue("Birthdate".to_owned()),
+ DateValue(parse_rfc3339_weak("1981-05-16 11:32:06").unwrap().into()),
+ StringValue("Author".to_owned()),
+ StringValue("William Shakespeare".to_owned()),
+ StringValue("Data".to_owned()),
+ DataValue(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0]),
+ EndDictionary,
+ ];
+
+ assert_eq!(events, comparison);
+ }
+
+ #[test]
+ fn utf16_plist() {
+ let reader = File::open(&Path::new("./tests/data/utf16_bplist.plist")).unwrap();
+ let streaming_parser = BinaryReader::new(reader);
+ let mut events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
+
+ assert_eq!(events[2], StringValue("\u{2605} or better".to_owned()));
+
+ let poem = if let StringValue(ref mut poem) = events[4] {
+ poem
+ } else {
+ panic!("not a string")
+ };
+ assert_eq!(poem.len(), 643);
+ assert_eq!(poem.pop().unwrap(), '\u{2605}');
+ }
+}