use byteorder::{BigEndian, ReadBytesExt}; use chrono::{Duration, TimeZone, UTC}; use std::io::{Cursor, Read, Seek, SeekFrom}; use std::string::{FromUtf8Error, FromUtf16Error}; use {Error, Result, PlistEvent, u64_to_usize}; impl From for Error { fn from(_: FromUtf8Error) -> Error { Error::InvalidData } } impl From for Error { fn from(_: FromUtf16Error) -> Error { Error::InvalidData } } struct StackItem { object_refs: Vec, ty: StackType, } enum StackType { Array, Dict, Root, } /// https://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c /// https://hg.python.org/cpython/file/3.4/Lib/plistlib.py pub struct EventReader { stack: Vec, object_offsets: Vec, reader: R, ref_size: u8, finished: bool, } impl EventReader { pub fn new(reader: R) -> EventReader { EventReader { stack: Vec::new(), object_offsets: Vec::new(), reader: reader, ref_size: 0, finished: false, } } fn read_trailer(&mut self) -> Result<()> { try!(self.reader.seek(SeekFrom::Start(0))); let mut magic = [0; 8]; try!(self.reader.read(&mut magic)); if &magic != b"bplist00" { return Err(Error::InvalidData); } // Trailer starts with 6 bytes of padding try!(self.reader.seek(SeekFrom::End(-32 + 6))); let offset_size = try!(self.reader.read_u8()); self.ref_size = try!(self.reader.read_u8()); let num_objects = try!(self.reader.read_u64::()); let top_object = try!(self.reader.read_u64::()); let offset_table_offset = try!(self.reader.read_u64::()); // Read offset table try!(self.reader.seek(SeekFrom::Start(offset_table_offset))); self.object_offsets = try!(self.read_ints(num_objects, offset_size)); // Seek to top object self.stack.push(StackItem { object_refs: vec![top_object], ty: StackType::Root, }); Ok(()) } fn read_ints(&mut self, len: u64, size: u8) -> Result> { let len = try!(u64_to_usize(len)); let mut ints = Vec::with_capacity(len); // TODO: Is the match hoisted out of the loop? for _ in 0..len { match size { 1 => ints.push(try!(self.reader.read_u8()) as u64), 2 => ints.push(try!(self.reader.read_u16::()) as u64), 4 => ints.push(try!(self.reader.read_u32::()) as u64), 8 => ints.push(try!(self.reader.read_u64::()) as u64), _ => return Err(Error::InvalidData), } } Ok(ints) } fn read_refs(&mut self, len: u64) -> Result> { let ref_size = self.ref_size; self.read_ints(len, ref_size) } fn read_object_len(&mut self, len: u8) -> Result { if (len & 0x0f) == 0x0f { let len_power_of_two = try!(self.reader.read_u8()) & 0x03; Ok(match len_power_of_two { 0 => try!(self.reader.read_u8()) as u64, 1 => try!(self.reader.read_u16::()) as u64, 2 => try!(self.reader.read_u32::()) as u64, 3 => try!(self.reader.read_u64::()), _ => return Err(Error::InvalidData), }) } else { Ok(len as u64) } } fn read_data(&mut self, len: u64) -> Result> { let len = try!(u64_to_usize(len)); let mut data = vec![0; len]; let mut total_read = 0; while total_read < len { let read = try!(self.reader.read(&mut data[total_read..])); if read == 0 { return Err(Error::UnexpectedEof); } total_read += read; } Ok(data) } fn seek_to_object(&mut self, object_ref: u64) -> Result { let object_ref = try!(u64_to_usize(object_ref)); let offset = *&self.object_offsets[object_ref]; let pos = try!(self.reader.seek(SeekFrom::Start(offset))); Ok(pos) } fn read_next(&mut self) -> Result> { if self.ref_size == 0 { // Initialise here rather than in new try!(self.read_trailer()); } let object_ref = match self.stack.last_mut() { Some(stack_item) => stack_item.object_refs.pop(), // Reached the end of the plist None => return Ok(None), }; match object_ref { Some(object_ref) => { try!(self.seek_to_object(object_ref)); } None => { // We're at the end of an array or dict. Pop the top stack item and return let item = self.stack.pop().unwrap(); match item.ty { StackType::Array => return Ok(Some(PlistEvent::EndArray)), StackType::Dict => return Ok(Some(PlistEvent::EndDictionary)), // We're at the end of the plist StackType::Root => return Ok(None), } } } let token = try!(self.reader.read_u8()); let ty = (token & 0xf0) >> 4; let size = token & 0x0f; let result = match (ty, size) { (0x0, 0x00) => return Err(Error::InvalidData), // null (0x0, 0x08) => Some(PlistEvent::BooleanValue(false)), (0x0, 0x09) => Some(PlistEvent::BooleanValue(true)), (0x0, 0x0f) => return Err(Error::InvalidData), // fill (0x1, 0) => Some(PlistEvent::IntegerValue(try!(self.reader.read_u8()) as i64)), (0x1, 1) => { Some(PlistEvent::IntegerValue(try!(self.reader.read_u16::()) as i64)) } (0x1, 2) => { Some(PlistEvent::IntegerValue(try!(self.reader.read_u32::()) as i64)) } (0x1, 3) => Some(PlistEvent::IntegerValue(try!(self.reader.read_i64::()))), (0x1, 4) => return Err(Error::InvalidData), // 128 bit int (0x1, _) => return Err(Error::InvalidData), // variable length int (0x2, 2) => { Some(PlistEvent::RealValue(try!(self.reader.read_f32::()) as f64)) } (0x2, 3) => Some(PlistEvent::RealValue(try!(self.reader.read_f64::()))), (0x2, _) => return Err(Error::InvalidData), // odd length float (0x3, 3) => {; // Date // Seconds since 1/1/2001 00:00:00. let timestamp = try!(self.reader.read_f64::()); let millis = timestamp * 1_000.0; // Chrono's Duration can only millisecond values between ::std::i64::MIN and // ::std::i64::MAX. if millis > ::std::i64::MAX as f64 || millis < ::std::i64::MIN as f64 { return Err(Error::InvalidData); } let whole_millis = millis.floor(); let submilli_nanos = ((millis - whole_millis) * 1_000_000.0).floor(); let dur = Duration::milliseconds(whole_millis as i64); let dur = dur + Duration::nanoseconds(submilli_nanos as i64); let plist_epoch = UTC.ymd(2001, 1, 1).and_hms(0, 0, 0); let date = try!(plist_epoch.checked_add(dur).ok_or(Error::InvalidData)); Some(PlistEvent::DateValue(date)) } (0x4, n) => { // Data let len = try!(self.read_object_len(n)); Some(PlistEvent::DataValue(try!(self.read_data(len)))) } (0x5, n) => { // ASCII string let len = try!(self.read_object_len(n)); let raw = try!(self.read_data(len)); let string = try!(String::from_utf8(raw)); Some(PlistEvent::StringValue(string)) } (0x6, n) => { // UTF-16 string // n is the length of 16 bit code units // len is the number of bytes let len = try!(self.read_object_len(n)) * 2; let raw = try!(self.read_data(len)); let mut cursor = Cursor::new(raw); let len_div_2 = try!(u64_to_usize(len / 2)); let mut raw_utf16 = Vec::with_capacity(len_div_2); while cursor.position() < len { raw_utf16.push(try!(cursor.read_u16::())) } let string = try!(String::from_utf16(&raw_utf16)); Some(PlistEvent::StringValue(string)) } (0xa, n) => { // Array let len = try!(self.read_object_len(n)); let mut object_refs = try!(self.read_refs(len)); // Reverse so we can pop off the end of the stack in order object_refs.reverse(); self.stack.push(StackItem { ty: StackType::Array, object_refs: object_refs, }); Some(PlistEvent::StartArray(Some(len))) } (0xd, n) => { // Dict let len = try!(self.read_object_len(n)); let key_refs = try!(self.read_refs(len)); let value_refs = try!(self.read_refs(len)); let len_mul_2 = try!(u64_to_usize(len * 2)); let len = try!(u64_to_usize(len)); let mut object_refs = Vec::with_capacity(len_mul_2); for i in 1..len + 1 { // Reverse so we can pop off the end of the stack in order object_refs.push(value_refs[len - i]); object_refs.push(key_refs[len - i]); } self.stack.push(StackItem { ty: StackType::Dict, object_refs: object_refs, }); Some(PlistEvent::StartDictionary(Some(len as u64))) } (_, _) => return Err(Error::InvalidData), }; Ok(result) } } impl Iterator for EventReader { type Item = Result; fn next(&mut self) -> Option> { if self.finished { None } else { match self.read_next() { Ok(Some(event)) => Some(Ok(event)), Err(err) => { self.finished = true; Some(Err(err)) } Ok(None) => { self.finished = true; None } } } } } #[cfg(test)] mod tests { use chrono::{TimeZone, UTC}; use std::fs::File; use std::path::Path; use super::*; use PlistEvent; #[test] fn streaming_parser() { use PlistEvent::*; let reader = File::open(&Path::new("./tests/data/binary.plist")).unwrap(); let streaming_parser = EventReader::new(reader); let events: Vec = streaming_parser.map(|e| e.unwrap()).collect(); let comparison = &[StartDictionary(Some(6)), StringValue("Lines".to_owned()), StartArray(Some(2)), StringValue("It is a tale told by an idiot,".to_owned()), StringValue("Full of sound and fury, signifying nothing.".to_owned()), EndArray, StringValue("Death".to_owned()), IntegerValue(1564), StringValue("Height".to_owned()), RealValue(1.60), StringValue("Birthdate".to_owned()), DateValue(UTC.ymd(1981, 05, 16).and_hms(11, 32, 06)), StringValue("Author".to_owned()), StringValue("William Shakespeare".to_owned()), StringValue("Data".to_owned()), DataValue(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0]), EndDictionary]; assert_eq!(events, comparison); } #[test] fn utf16_plist() { use PlistEvent::*; let reader = File::open(&Path::new("./tests/data/utf16_bplist.plist")).unwrap(); let streaming_parser = EventReader::new(reader); let mut events: Vec = streaming_parser.map(|e| e.unwrap()).collect(); assert_eq!(events[2], StringValue("\u{2605} or better".to_owned())); let poem = if let StringValue(ref mut poem) = events[4] { poem } else { panic!("not a string") }; assert_eq!(poem.len(), 643); assert_eq!(poem.pop().unwrap(), '\u{2605}'); } }