serde_yml/
loader.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
use crate::{
    de::{Event, Progress},
    libyml::{
        error::Mark,
        parser::{Anchor, Event as YamlEvent, Parser},
    },
    modules::error::{self, Error, ErrorImpl, Result},
};
use std::{borrow::Cow, collections::BTreeMap, io::Read, sync::Arc};

/// Represents a YAML loader.
#[derive(Debug)]
pub struct Loader<'input> {
    /// The YAML parser used to parse the input.
    ///
    /// The `Parser` type is defined in the `libyml` module and represents
    /// a low-level YAML parser.
    ///
    /// The `'input` lifetime parameter indicates the lifetime of the input data
    /// being parsed. It ensures that the `Loader` does not outlive the input data.
    pub parser: Option<Parser<'input>>,

    /// The count of documents parsed by the loader.
    ///
    /// This field keeps track of the number of YAML documents encountered during parsing.
    pub parsed_document_count: usize,
}

/// Represents a YAML document.
#[derive(Debug)]
pub struct Document<'input> {
    /// The parsed events of the document.
    ///
    /// This field contains a vector of `(Event<'input>, Mark)` tuples, where:
    /// - `Event<'input>` represents a parsed YAML event, such as a scalar, sequence, or mapping.
    ///   The `'input` lifetime parameter indicates the lifetime of the input data associated
    ///   with the event.
    /// - `Mark` represents the position in the input where the event was encountered.
    pub events: Vec<(Event<'input>, Mark)>,

    /// Any error encountered during parsing.
    ///
    /// This field is an optional `Arc<ErrorImpl>`, where:
    /// - `Arc` is a reference-counted smart pointer that allows multiple ownership of the error.
    /// - `ErrorImpl` is the underlying error type that holds the details of the parsing error.
    ///
    /// If an error occurs during parsing, this field will contain `Some(error)`. Otherwise, it
    /// will be `None`.
    pub error: Option<Arc<ErrorImpl>>,

    /// Map from alias id to index in events.
    ///
    /// This field is a `BTreeMap` that maps alias ids to their corresponding index in the
    /// `events` vector.
    ///
    /// In YAML, an alias is a reference to a previously defined anchor. When an alias is
    /// encountered during parsing, its id is used to look up the index of the corresponding
    /// event in the `events` vector.
    pub anchor_event_map: BTreeMap<usize, usize>,

    /// Map from alias id to name.
    ///
    /// This field is a `BTreeMap` that maps alias ids to their corresponding names.
    pub anchor_names: BTreeMap<usize, String>,
}

impl<'input> Loader<'input> {
    /// Constructs a new `Loader` instance from the given progress.
    ///
    /// # Arguments
    ///
    /// * `progress` - The progress representing the YAML input.
    ///
    /// # Errors
    ///
    /// Returns an error if there is an issue reading the input.
    ///
    /// # Examples
    ///
    /// ```
    /// use serde_yml::loader::Loader;
    /// use serde_yml::de::Progress;
    ///
    /// let input = "---\nkey: value";
    /// let progress = Progress::Str(input);
    /// let loader_result = Loader::new(progress);
    ///
    /// assert!(loader_result.is_ok());
    /// ```
    pub fn new(progress: Progress<'input>) -> Result<Self> {
        let input = match progress {
            Progress::Str(s) => Cow::Borrowed(s.as_bytes()),
            Progress::Slice(bytes) => Cow::Borrowed(bytes),
            Progress::Read(mut rdr) => {
                let mut buffer = Vec::new();
                if let Err(io_error) = rdr.read_to_end(&mut buffer) {
                    return Err(error::new(ErrorImpl::IoError(
                        io_error,
                    )));
                }
                Cow::Owned(buffer)
            }
            Progress::Iterable(_) | Progress::Document(_) => {
                unreachable!()
            }
            Progress::Fail(err) => return Err(error::shared(err)),
        };

        Ok(Loader {
            parser: Some(Parser::new(input)),
            parsed_document_count: 0,
        })
    }

    /// Advances the loader to the next document and returns it.
    ///
    /// # Returns
    ///
    /// Returns `Some(Document)` if a document is successfully parsed, or `None` if there are no more documents.
    ///
    /// # Examples
    ///
    /// ```
    /// use serde_yml::loader::{Loader, Document};
    /// use serde_yml::de::Progress;
    ///
    /// let input = "---\nkey: value";
    /// let progress = Progress::Str(input);
    /// let mut loader = Loader::new(progress).unwrap();
    /// let document = loader.next_document().unwrap();
    ///
    /// assert_eq!(document.events.len(), 4);
    /// ```
    pub fn next_document(&mut self) -> Option<Document<'input>> {
        let parser = match &mut self.parser {
            Some(parser) => parser,
            None => return None,
        };

        let first = self.parsed_document_count == 0;
        self.parsed_document_count += 1;

        let mut anchors = BTreeMap::new();
        let mut document = Document {
            events: Vec::new(),
            error: None,
            anchor_event_map: BTreeMap::new(),
            anchor_names: BTreeMap::new(),
        };

        let anchor_name = |anchor: &Anchor| {
            format!("{:?}", anchor)
                .trim_start_matches("\"")
                .trim_end_matches("\"")
                .to_owned()
        };

        loop {
            let (event, mark) = match parser.parse_next_event() {
                Ok((event, mark)) => (event, mark),
                Err(err) => {
                    document.error = Some(Error::from(err).shared());
                    return Some(document);
                }
            };
            let event = match event {
                YamlEvent::StreamStart => continue,
                YamlEvent::StreamEnd => {
                    self.parser = None;
                    return if first {
                        if document.events.is_empty() {
                            document.events.push((Event::Void, mark));
                        }
                        Some(document)
                    } else {
                        None
                    };
                }
                YamlEvent::DocumentStart => continue,
                YamlEvent::DocumentEnd => return Some(document),

                YamlEvent::Alias(alias) => match anchors.get(&alias) {
                    Some(id) => Event::Alias(*id),
                    None => {
                        document.error = Some(
                            error::new(ErrorImpl::UnknownAnchor(mark))
                                .shared(),
                        );
                        return Some(document);
                    }
                },
                YamlEvent::Scalar(mut scalar) => {
                    if let Some(anchor) = scalar.anchor.take() {
                        let id = anchors.len();
                        document.anchor_names.insert(id, anchor_name(&anchor));
                        document
                            .anchor_event_map
                            .insert(id, document.events.len());
                        anchors.insert(anchor, id);
                    }
                    Event::Scalar(scalar)
                }
                YamlEvent::SequenceStart(mut sequence_start) => {
                    if let Some(anchor) = sequence_start.anchor.take() {
                        let id = anchors.len();
                        document.anchor_names.insert(id, anchor_name(&anchor));
                        document
                            .anchor_event_map
                            .insert(id, document.events.len());
                        anchors.insert(anchor, id);
                    }
                    Event::SequenceStart(sequence_start)
                }
                YamlEvent::SequenceEnd => Event::SequenceEnd,
                YamlEvent::MappingStart(mut mapping_start) => {
                    if let Some(anchor) = mapping_start.anchor.take() {
                        let id = anchors.len();
                        document.anchor_names.insert(id, anchor_name(&anchor));
                        document
                            .anchor_event_map
                            .insert(id, document.events.len());
                        anchors.insert(anchor, id);
                    }
                    Event::MappingStart(mapping_start)
                }
                YamlEvent::MappingEnd => Event::MappingEnd,
            };
            document.events.push((event, mark));
        }
    }
}