serde_yml/loader.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
use crate::{
de::{Event, Progress},
libyml::{
error::Mark,
parser::{Anchor, Event as YamlEvent, Parser},
},
modules::error::{self, Error, ErrorImpl, Result},
};
use std::{borrow::Cow, collections::BTreeMap, io::Read, sync::Arc};
/// Represents a YAML loader.
#[derive(Debug)]
pub struct Loader<'input> {
/// The YAML parser used to parse the input.
///
/// The `Parser` type is defined in the `libyml` module and represents
/// a low-level YAML parser.
///
/// The `'input` lifetime parameter indicates the lifetime of the input data
/// being parsed. It ensures that the `Loader` does not outlive the input data.
pub parser: Option<Parser<'input>>,
/// The count of documents parsed by the loader.
///
/// This field keeps track of the number of YAML documents encountered during parsing.
pub parsed_document_count: usize,
}
/// Represents a YAML document.
#[derive(Debug)]
pub struct Document<'input> {
/// The parsed events of the document.
///
/// This field contains a vector of `(Event<'input>, Mark)` tuples, where:
/// - `Event<'input>` represents a parsed YAML event, such as a scalar, sequence, or mapping.
/// The `'input` lifetime parameter indicates the lifetime of the input data associated
/// with the event.
/// - `Mark` represents the position in the input where the event was encountered.
pub events: Vec<(Event<'input>, Mark)>,
/// Any error encountered during parsing.
///
/// This field is an optional `Arc<ErrorImpl>`, where:
/// - `Arc` is a reference-counted smart pointer that allows multiple ownership of the error.
/// - `ErrorImpl` is the underlying error type that holds the details of the parsing error.
///
/// If an error occurs during parsing, this field will contain `Some(error)`. Otherwise, it
/// will be `None`.
pub error: Option<Arc<ErrorImpl>>,
/// Map from alias id to index in events.
///
/// This field is a `BTreeMap` that maps alias ids to their corresponding index in the
/// `events` vector.
///
/// In YAML, an alias is a reference to a previously defined anchor. When an alias is
/// encountered during parsing, its id is used to look up the index of the corresponding
/// event in the `events` vector.
pub anchor_event_map: BTreeMap<usize, usize>,
/// Map from alias id to name.
///
/// This field is a `BTreeMap` that maps alias ids to their corresponding names.
pub anchor_names: BTreeMap<usize, String>,
}
impl<'input> Loader<'input> {
/// Constructs a new `Loader` instance from the given progress.
///
/// # Arguments
///
/// * `progress` - The progress representing the YAML input.
///
/// # Errors
///
/// Returns an error if there is an issue reading the input.
///
/// # Examples
///
/// ```
/// use serde_yml::loader::Loader;
/// use serde_yml::de::Progress;
///
/// let input = "---\nkey: value";
/// let progress = Progress::Str(input);
/// let loader_result = Loader::new(progress);
///
/// assert!(loader_result.is_ok());
/// ```
pub fn new(progress: Progress<'input>) -> Result<Self> {
let input = match progress {
Progress::Str(s) => Cow::Borrowed(s.as_bytes()),
Progress::Slice(bytes) => Cow::Borrowed(bytes),
Progress::Read(mut rdr) => {
let mut buffer = Vec::new();
if let Err(io_error) = rdr.read_to_end(&mut buffer) {
return Err(error::new(ErrorImpl::IoError(
io_error,
)));
}
Cow::Owned(buffer)
}
Progress::Iterable(_) | Progress::Document(_) => {
unreachable!()
}
Progress::Fail(err) => return Err(error::shared(err)),
};
Ok(Loader {
parser: Some(Parser::new(input)),
parsed_document_count: 0,
})
}
/// Advances the loader to the next document and returns it.
///
/// # Returns
///
/// Returns `Some(Document)` if a document is successfully parsed, or `None` if there are no more documents.
///
/// # Examples
///
/// ```
/// use serde_yml::loader::{Loader, Document};
/// use serde_yml::de::Progress;
///
/// let input = "---\nkey: value";
/// let progress = Progress::Str(input);
/// let mut loader = Loader::new(progress).unwrap();
/// let document = loader.next_document().unwrap();
///
/// assert_eq!(document.events.len(), 4);
/// ```
pub fn next_document(&mut self) -> Option<Document<'input>> {
let parser = match &mut self.parser {
Some(parser) => parser,
None => return None,
};
let first = self.parsed_document_count == 0;
self.parsed_document_count += 1;
let mut anchors = BTreeMap::new();
let mut document = Document {
events: Vec::new(),
error: None,
anchor_event_map: BTreeMap::new(),
anchor_names: BTreeMap::new(),
};
let anchor_name = |anchor: &Anchor| {
format!("{:?}", anchor)
.trim_start_matches("\"")
.trim_end_matches("\"")
.to_owned()
};
loop {
let (event, mark) = match parser.parse_next_event() {
Ok((event, mark)) => (event, mark),
Err(err) => {
document.error = Some(Error::from(err).shared());
return Some(document);
}
};
let event = match event {
YamlEvent::StreamStart => continue,
YamlEvent::StreamEnd => {
self.parser = None;
return if first {
if document.events.is_empty() {
document.events.push((Event::Void, mark));
}
Some(document)
} else {
None
};
}
YamlEvent::DocumentStart => continue,
YamlEvent::DocumentEnd => return Some(document),
YamlEvent::Alias(alias) => match anchors.get(&alias) {
Some(id) => Event::Alias(*id),
None => {
document.error = Some(
error::new(ErrorImpl::UnknownAnchor(mark))
.shared(),
);
return Some(document);
}
},
YamlEvent::Scalar(mut scalar) => {
if let Some(anchor) = scalar.anchor.take() {
let id = anchors.len();
document.anchor_names.insert(id, anchor_name(&anchor));
document
.anchor_event_map
.insert(id, document.events.len());
anchors.insert(anchor, id);
}
Event::Scalar(scalar)
}
YamlEvent::SequenceStart(mut sequence_start) => {
if let Some(anchor) = sequence_start.anchor.take() {
let id = anchors.len();
document.anchor_names.insert(id, anchor_name(&anchor));
document
.anchor_event_map
.insert(id, document.events.len());
anchors.insert(anchor, id);
}
Event::SequenceStart(sequence_start)
}
YamlEvent::SequenceEnd => Event::SequenceEnd,
YamlEvent::MappingStart(mut mapping_start) => {
if let Some(anchor) = mapping_start.anchor.take() {
let id = anchors.len();
document.anchor_names.insert(id, anchor_name(&anchor));
document
.anchor_event_map
.insert(id, document.events.len());
anchors.insert(anchor, id);
}
Event::MappingStart(mapping_start)
}
YamlEvent::MappingEnd => Event::MappingEnd,
};
document.events.push((event, mark));
}
}
}