tiger_lib/parse/
json.rs

1//! Parse a JSON file into a `Block`.
2//!
3//! `Block` is used, instead of a JSON-specific representation, for compatibility with the rest of the code.
4//! Unfortunately can't use serde-json because we need the locations for error reporting.
5
6use std::fs::read_to_string;
7use std::mem::{swap, take};
8
9use crate::block::Eq::Single;
10use crate::block::{BV, Block, Comparator};
11use crate::fileset::FileEntry;
12use crate::report::{ErrorKey, err, warn};
13use crate::token::{Loc, Token};
14
15#[derive(Copy, Clone, Debug)]
16enum State {
17    Neutral,
18    QString,
19    Id,
20    Number,
21}
22
23struct ParseLevel {
24    block: Block,
25    key: Option<Token>,
26    expect_colon: bool,
27    expect_comma: bool,
28    opening_bracket: char,
29    closing_bracket: char,
30}
31
32struct Parser {
33    current: ParseLevel,
34    stack: Vec<ParseLevel>,
35}
36
37impl Parser {
38    fn unknown_char(c: char, loc: Loc) {
39        let msg = format!("Unrecognized character {c}");
40        err(ErrorKey::ParseError).msg(msg).loc(loc).push();
41    }
42
43    fn colon(&mut self, loc: Loc) {
44        if !self.current.expect_colon {
45            err(ErrorKey::ParseError).msg("unexpected `:`").loc(loc).push();
46        }
47        self.current.expect_colon = false;
48    }
49
50    fn check_colon(&mut self, loc: Loc) {
51        if self.current.expect_colon {
52            err(ErrorKey::ParseError).msg("expected `:`").loc(loc).push();
53            self.current.expect_comma = false;
54        }
55    }
56
57    fn comma(&mut self, loc: Loc) {
58        if !self.current.expect_comma {
59            err(ErrorKey::ParseError).msg("unexpected `,`").loc(loc).push();
60        }
61        self.current.expect_comma = false;
62    }
63
64    fn check_comma(&mut self, loc: Loc) {
65        if self.current.expect_comma {
66            err(ErrorKey::ParseError).msg("expected `,`").loc(loc).push();
67            self.current.expect_comma = false;
68        }
69    }
70
71    fn token(&mut self, token: Token) {
72        self.check_comma(token.loc);
73        self.check_colon(token.loc);
74        if let Some(key) = self.current.key.take() {
75            self.current.block.add_key_bv(key, Comparator::Equals(Single), BV::Value(token));
76            self.current.expect_comma = true;
77        } else if self.current.opening_bracket == '[' {
78            self.current.block.add_value(token);
79            self.current.expect_comma = true;
80        } else {
81            self.current.key = Some(token);
82            self.current.expect_colon = true;
83        }
84    }
85
86    fn null(&mut self, null: Token) {
87        self.check_comma(null.loc);
88        self.check_colon(null.loc);
89        if self.current.key.take().is_some() || self.current.opening_bracket == '[' {
90            self.current.expect_comma = true;
91        } else {
92            // A null key is weird but acceptable
93            self.current.key = Some(null);
94            self.current.expect_colon = true;
95        }
96    }
97
98    fn block_value(&mut self, block: Block) {
99        if let Some(key) = self.current.key.take() {
100            self.current.block.add_key_bv(key, Comparator::Equals(Single), BV::Block(block));
101        } else {
102            self.current.block.add_block(block);
103        }
104        self.current.expect_comma = true;
105    }
106
107    fn end_assign(&mut self) {
108        if let Some(key) = self.current.key.take() {
109            let msg = "key without value";
110            err(ErrorKey::ParseError).msg(msg).loc(&key).push();
111            self.current.block.add_value(key);
112        }
113    }
114
115    fn open_bracket(&mut self, loc: Loc, bracket: char) {
116        self.check_colon(loc);
117        self.check_comma(loc);
118        if self.current.opening_bracket == '{' && self.current.key.is_none() {
119            err(ErrorKey::ParseError).msg("expected key not block").loc(loc).push();
120        }
121        let mut new_level = ParseLevel {
122            block: Block::new(loc),
123            key: None,
124            expect_colon: false,
125            expect_comma: false,
126            opening_bracket: bracket,
127            closing_bracket: if bracket == '{' { '}' } else { ']' },
128        };
129        swap(&mut new_level, &mut self.current);
130        self.stack.push(new_level);
131    }
132
133    fn close_bracket(&mut self, loc: Loc, bracket: char) {
134        self.end_assign();
135        if let Some(mut prev_level) = self.stack.pop() {
136            swap(&mut self.current, &mut prev_level);
137            if prev_level.closing_bracket != bracket {
138                let msg = format!("this {bracket} closes a {}", self.current.opening_bracket);
139                err(ErrorKey::ParseError)
140                    .strong()
141                    .msg(msg)
142                    .loc(loc)
143                    .loc_msg(prev_level.block.loc, "here")
144                    .push();
145            }
146            self.block_value(prev_level.block);
147            if loc.column == 1 && !self.stack.is_empty() {
148                let msg = "possible bracket error";
149                let info = "This closing bracket is at the start of a line but does not end a top-level item.";
150                warn(ErrorKey::BracePlacement).msg(msg).info(info).loc(loc).push();
151            }
152        } else {
153            err(ErrorKey::ParseError).msg(format!("Unexpected {bracket}")).loc(loc).push();
154        }
155    }
156
157    fn eof(mut self) -> Block {
158        self.end_assign();
159        while let Some(mut prev_level) = self.stack.pop() {
160            let msg = format!("Opening {} was never closed", prev_level.opening_bracket);
161            err(ErrorKey::ParseError).msg(msg).loc(prev_level.block.loc).push();
162            swap(&mut self.current, &mut prev_level);
163            self.block_value(prev_level.block);
164        }
165        self.current.block
166    }
167}
168
169fn parse(blockloc: Loc, content: &str) -> Block {
170    let mut parser = Parser {
171        current: ParseLevel {
172            block: Block::new(blockloc),
173            key: None,
174            expect_colon: false,
175            expect_comma: false,
176            opening_bracket: '[',
177            closing_bracket: ']',
178        },
179        stack: Vec::new(),
180    };
181    let mut state = State::Neutral;
182    let mut token_start = blockloc;
183    let mut current_id = String::new();
184
185    let mut loc = blockloc;
186    for c in content.chars() {
187        match state {
188            State::Neutral => {
189                if c.is_ascii_whitespace() {
190                } else if c == '"' {
191                    token_start = loc;
192                    state = State::QString;
193                } else if c.is_alphabetic() {
194                    token_start = loc;
195                    current_id.push(c);
196                    state = State::Id;
197                } else if c.is_ascii_digit() {
198                    token_start = loc;
199                    current_id.push(c);
200                    state = State::Number;
201                } else if c == ':' {
202                    parser.colon(loc);
203                } else if c == ',' {
204                    parser.comma(loc);
205                } else if c == '{' {
206                    parser.open_bracket(loc, '{');
207                } else if c == '}' {
208                    parser.close_bracket(loc, '}');
209                } else if c == '[' {
210                    parser.open_bracket(loc, '[');
211                } else if c == ']' {
212                    parser.close_bracket(loc, ']');
213                } else {
214                    Parser::unknown_char(c, loc);
215                }
216            }
217            State::Id => {
218                if c.is_alphabetic() {
219                    current_id.push(c);
220                } else {
221                    let token = Token::new(&take(&mut current_id), token_start);
222                    if token.is("true") || token.is("false") {
223                        parser.token(token);
224                    } else if token.is("null") {
225                        parser.null(token);
226                    } else {
227                        let msg = "unexpected unquoted string";
228                        let info = "expected only true or false or null";
229                        warn(ErrorKey::ParseError).msg(msg).info(info).loc(token).push();
230                    }
231                    state = State::Neutral;
232                    if c.is_ascii_whitespace() {
233                    } else if c == '"' {
234                        token_start = loc;
235                        state = State::QString;
236                    } else if c == ':' {
237                        parser.colon(loc);
238                    } else if c == ',' {
239                        parser.comma(loc);
240                    } else if c == '{' {
241                        parser.open_bracket(loc, '{');
242                    } else if c == '}' {
243                        parser.close_bracket(loc, '}');
244                    } else if c == '[' {
245                        parser.open_bracket(loc, '[');
246                    } else if c == ']' {
247                        parser.close_bracket(loc, ']');
248                    } else {
249                        Parser::unknown_char(c, loc);
250                    }
251                }
252            }
253            State::QString => {
254                if c == '"' {
255                    let token = Token::new(&take(&mut current_id), token_start);
256                    parser.token(token);
257                    state = State::Neutral;
258                } else if c == '\n' {
259                    let token = Token::new(&take(&mut current_id), token_start);
260                    warn(ErrorKey::ParseError).msg("quoted string not closed").loc(token).push();
261                    state = State::Neutral;
262                } else {
263                    current_id.push(c);
264                }
265            }
266            State::Number => {
267                if c.is_ascii_digit() || c == '.' {
268                    current_id.push(c);
269                } else {
270                    let token = Token::new(&take(&mut current_id), token_start);
271                    parser.token(token);
272                    state = State::Neutral;
273                    if c.is_ascii_whitespace() {
274                    } else if c == '"' {
275                        token_start = loc;
276                        state = State::QString;
277                    } else if c == ':' {
278                        parser.colon(loc);
279                    } else if c == ',' {
280                        parser.comma(loc);
281                    } else if c == '{' {
282                        parser.open_bracket(loc, '{');
283                    } else if c == '}' {
284                        parser.close_bracket(loc, '}');
285                    } else if c == '[' {
286                        parser.open_bracket(loc, '[');
287                    } else if c == ']' {
288                        parser.close_bracket(loc, ']');
289                    } else {
290                        Parser::unknown_char(c, loc);
291                    }
292                }
293            }
294        }
295
296        if c == '\n' {
297            loc.line += 1;
298            loc.column = 1;
299        } else {
300            loc.column += 1;
301        }
302    }
303
304    // Deal with state at end of file
305    match state {
306        State::QString => {
307            let token = Token::new(&current_id, token_start);
308            err(ErrorKey::ParseError).msg("Quoted string not closed").loc(&token).push();
309            parser.token(token);
310        }
311        State::Id => {
312            let token = Token::new(&current_id, token_start);
313            if token.is("true") || token.is("false") {
314                parser.token(token);
315            } else {
316                let msg = "unexpected unquoted string";
317                let info = "expected only true or false";
318                warn(ErrorKey::ParseError).msg(msg).info(info).loc(token).push();
319            }
320        }
321        State::Number => {
322            let token = Token::new(&current_id, token_start);
323            parser.token(token);
324        }
325        State::Neutral => (),
326    }
327
328    parser.eof()
329}
330
331#[allow(clippy::module_name_repetitions)]
332pub fn parse_json(entry: &FileEntry, content: &str) -> Block {
333    let mut loc = Loc::from(entry);
334    loc.line = 1;
335    loc.column = 1;
336    parse(loc, content)
337}
338
339#[allow(clippy::module_name_repetitions)]
340pub fn parse_json_file(entry: &FileEntry) -> Option<Block> {
341    let contents = match read_to_string(entry.fullpath()) {
342        Ok(contents) => contents,
343        Err(e) => {
344            err(ErrorKey::ReadError)
345                .msg("could not read file")
346                .info(format!("{e:#}"))
347                .loc(entry)
348                .push();
349            return None;
350        }
351    };
352    if let Some(bomless) = contents.strip_prefix('\u{feff}') {
353        Some(parse_json(entry, bomless))
354    } else {
355        Some(parse_json(entry, &contents))
356    }
357}