1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
//! This library offers the ability to represent XML documents as DOM trees,
//! allowing querying with CSS selectors.
//!
//! ```
//! extern crate rquery;
//!
//! use rquery::Document;
//!
//! fn main() {
//!   let document = Document::new_from_xml_file("tests/fixtures/sample.xml").unwrap();
//!
//!   let title = document.select("title").unwrap();
//!   assert_eq!(title.text(), "Sample Document");
//!   assert_eq!(title.attr("ref").unwrap(), "main-title");
//!
//!   let item_count = document.select_all("item").unwrap().count();
//!   assert_eq!(item_count, 2);
//!
//!   let item_titles = document.select_all("item > title").unwrap()
//!     .map(|element| element.text().clone())
//!     .collect::<Vec<String>>()
//!     .join(", ");
//!   assert_eq!(item_titles, "Another Sample, Other Sample");
//! }
//! ```

#![warn(missing_docs)]

extern crate xml;

mod selector;
mod document;

pub use self::document::{Document, DocumentError};
pub use self::selector::{ CompoundSelector, MatchType, Scope, Selector, UnexpectedTokenError };

use std::rc::Rc;
use std::iter::{ empty, once };
use std::marker::PhantomData;
use std::collections::HashMap;

/// Represents a single element in the DOM tree.
#[derive(Clone, Debug)]
pub struct Element {
    node_index: usize,
    tag_name: String,
    children: Option<Vec<Rc<Element>>>,
    attr_map: HashMap<String, String>,
    text: String,
}

/// Errors which can be returned when performing a select operation.
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum SelectError {
    /// Returned when the selector could not be parsed successfully.
    ParseError(UnexpectedTokenError),
    /// Returned when there were no matches for the selector.
    NoMatchError,
}

struct UniqueElements<'a, I: Iterator<Item=&'a Element> + 'a> {
    next_index: usize,
    inner_iter: I,
    phantom_data: PhantomData<&'a i32>,
}

impl<'a, I: Iterator<Item=&'a Element>> Iterator for UniqueElements<'a, I> {
    type Item = &'a Element;

    fn next(&mut self) -> Option<Self::Item> {
        loop {
            match self.inner_iter.next() {
                Some(element) if element.node_index < self.next_index => {
                    println!("SKIPPED");
                    // do nothing
                },

                Some(element) => {
                    self.next_index = element.node_index + 1;
                    return Some(element);
                },

                None => return None,
            }
        }
    }
}

impl Element {
    /// Searches the elements children for elements matching the given CSS
    /// selector.
    pub fn select_all<'a>(&'a self, selector: &str) -> Result<Box<Iterator<Item=&'a Element> + 'a>, SelectError> {
        CompoundSelector::parse(selector)
            .map_err(|err| SelectError::ParseError(err))
            .and_then(|compound_selectors| {
                let initial_iterator: Box<Iterator<Item=&'a Element>> = Box::new(once(self));

                let iterator = compound_selectors.into_iter()
                    .fold(initial_iterator, |iter, compound_selector| {
                        let scope = compound_selector.scope;

                        let children_iter = iter
                             .flat_map(move |child| {
                                 match scope {
                                     Scope::IndirectChild => child.children_deep_iter(),
                                     Scope::DirectChild => child.children_iter(),
                                 }
                             });

                        let matching_children_iter = children_iter
                            .filter_map(move |child| {
                                if child.matches(&compound_selector) {
                                    Some(child)
                                } else {
                                    None
                                }
                            });

                        let unique_children_iter = UniqueElements {
                            next_index: 0,
                            inner_iter: matching_children_iter,
                            phantom_data: PhantomData,
                        };

                        Box::new(unique_children_iter)
                    });

                return Ok(iterator);
            })
    }

    /// Just like `select_all` but only returns the first match.
    pub fn select<'a>(&'a self, selector: &str) -> Result<&'a Element, SelectError> {
        self.select_all(selector).and_then(|mut iterator| {
            if let Some(element) = iterator.next() {
                Ok(element)
            } else {
                Err(SelectError::NoMatchError)
            }
        })
    }

    /// Returns an iterator over the element’s direct children.
    pub fn children_iter<'a>(&'a self) -> Box<Iterator<Item=&'a Element> + 'a> {
        if let Some(ref children) = self.children {
            Box::new(children.iter().map(|node| -> &'a Element { node }))
        } else {
            Box::new(empty::<&'a Element>())
        }
    }

    /// Returns an iterator over all the element’s children, including indirect
    /// child elements.
    pub fn children_deep_iter<'a>(&'a self) -> Box<Iterator<Item=&'a Element> + 'a> {
        let iterator = self.children_iter()
            .flat_map(|child| once(child).chain(child.children_deep_iter()));

        Box::new(iterator)
    }

    /// Returns the size of the DOM subtree, including the current element.
    pub fn subtree_size(&self) -> usize {
        if let Some(ref children) = self.children {
            children.iter().fold(1, |subtotal, child| child.subtree_size() + subtotal)
        } else {
            1
        }
    }

    /// Returns the name of the element’s tag.
    pub fn tag_name(&self) -> &str {
        &self.tag_name
    }

    /// Returns the value of the element attribute if found.
    pub fn attr(&self, attr_name: &str) -> Option<&String> {
        self.attr_map.get(attr_name)
    }

    /// Returns the text contained within the element.
    pub fn text(&self) -> &String {
        &self.text
    }

    /// Returns true if the element matches the given selector.
    pub fn matches(&self, compound_selector: &CompoundSelector) -> bool {
        compound_selector.parts.iter().all(|part| {
            match part {
                &Selector::TagName(ref name) =>
                    self.tag_name() == name,

                &Selector::Id(ref id) =>
                    self.attr("id") == Some(id),

                &Selector::Attribute(ref attr, MatchType::Equals, ref value) =>
                    self.attr(attr) == Some(value),
            }
        })
    }
    
    /// Returns the node index for the element.
    pub fn node_index(&self) -> usize {
        self.node_index
    }
}