1use std::fs::read_to_string;
7use std::mem::{swap, take};
8
9use crate::block::Eq::Single;
10use crate::block::{BV, Block, Comparator};
11use crate::fileset::FileEntry;
12use crate::report::{ErrorKey, err, warn};
13use crate::token::{Loc, Token};
14
15#[derive(Copy, Clone, Debug)]
16enum State {
17 Neutral,
18 QString,
19 Id,
20 Number,
21}
22
23struct ParseLevel {
24 block: Block,
25 key: Option<Token>,
26 expect_colon: bool,
27 expect_comma: bool,
28 opening_bracket: char,
29 closing_bracket: char,
30}
31
32struct Parser {
33 current: ParseLevel,
34 stack: Vec<ParseLevel>,
35}
36
37impl Parser {
38 fn unknown_char(c: char, loc: Loc) {
39 let msg = format!("Unrecognized character {c}");
40 err(ErrorKey::ParseError).msg(msg).loc(loc).push();
41 }
42
43 fn colon(&mut self, loc: Loc) {
44 if !self.current.expect_colon {
45 err(ErrorKey::ParseError).msg("unexpected `:`").loc(loc).push();
46 }
47 self.current.expect_colon = false;
48 }
49
50 fn check_colon(&mut self, loc: Loc) {
51 if self.current.expect_colon {
52 err(ErrorKey::ParseError).msg("expected `:`").loc(loc).push();
53 self.current.expect_comma = false;
54 }
55 }
56
57 fn comma(&mut self, loc: Loc) {
58 if !self.current.expect_comma {
59 err(ErrorKey::ParseError).msg("unexpected `,`").loc(loc).push();
60 }
61 self.current.expect_comma = false;
62 }
63
64 fn check_comma(&mut self, loc: Loc) {
65 if self.current.expect_comma {
66 err(ErrorKey::ParseError).msg("expected `,`").loc(loc).push();
67 self.current.expect_comma = false;
68 }
69 }
70
71 fn token(&mut self, token: Token) {
72 self.check_comma(token.loc);
73 self.check_colon(token.loc);
74 if let Some(key) = self.current.key.take() {
75 self.current.block.add_key_bv(key, Comparator::Equals(Single), BV::Value(token));
76 self.current.expect_comma = true;
77 } else if self.current.opening_bracket == '[' {
78 self.current.block.add_value(token);
79 self.current.expect_comma = true;
80 } else {
81 self.current.key = Some(token);
82 self.current.expect_colon = true;
83 }
84 }
85
86 fn null(&mut self, null: Token) {
87 self.check_comma(null.loc);
88 self.check_colon(null.loc);
89 if self.current.key.take().is_some() || self.current.opening_bracket == '[' {
90 self.current.expect_comma = true;
91 } else {
92 self.current.key = Some(null);
94 self.current.expect_colon = true;
95 }
96 }
97
98 fn block_value(&mut self, block: Block) {
99 if let Some(key) = self.current.key.take() {
100 self.current.block.add_key_bv(key, Comparator::Equals(Single), BV::Block(block));
101 } else {
102 self.current.block.add_block(block);
103 }
104 self.current.expect_comma = true;
105 }
106
107 fn end_assign(&mut self) {
108 if let Some(key) = self.current.key.take() {
109 let msg = "key without value";
110 err(ErrorKey::ParseError).msg(msg).loc(&key).push();
111 self.current.block.add_value(key);
112 }
113 }
114
115 fn open_bracket(&mut self, loc: Loc, bracket: char) {
116 self.check_colon(loc);
117 self.check_comma(loc);
118 if self.current.opening_bracket == '{' && self.current.key.is_none() {
119 err(ErrorKey::ParseError).msg("expected key not block").loc(loc).push();
120 }
121 let mut new_level = ParseLevel {
122 block: Block::new(loc),
123 key: None,
124 expect_colon: false,
125 expect_comma: false,
126 opening_bracket: bracket,
127 closing_bracket: if bracket == '{' { '}' } else { ']' },
128 };
129 swap(&mut new_level, &mut self.current);
130 self.stack.push(new_level);
131 }
132
133 fn close_bracket(&mut self, loc: Loc, bracket: char) {
134 self.end_assign();
135 if let Some(mut prev_level) = self.stack.pop() {
136 swap(&mut self.current, &mut prev_level);
137 if prev_level.closing_bracket != bracket {
138 let msg = format!("this {bracket} closes a {}", self.current.opening_bracket);
139 err(ErrorKey::ParseError)
140 .strong()
141 .msg(msg)
142 .loc(loc)
143 .loc_msg(prev_level.block.loc, "here")
144 .push();
145 }
146 self.block_value(prev_level.block);
147 if loc.column == 1 && !self.stack.is_empty() {
148 let msg = "possible bracket error";
149 let info = "This closing bracket is at the start of a line but does not end a top-level item.";
150 warn(ErrorKey::BracePlacement).msg(msg).info(info).loc(loc).push();
151 }
152 } else {
153 err(ErrorKey::ParseError).msg(format!("Unexpected {bracket}")).loc(loc).push();
154 }
155 }
156
157 fn eof(mut self) -> Block {
158 self.end_assign();
159 while let Some(mut prev_level) = self.stack.pop() {
160 let msg = format!("Opening {} was never closed", prev_level.opening_bracket);
161 err(ErrorKey::ParseError).msg(msg).loc(prev_level.block.loc).push();
162 swap(&mut self.current, &mut prev_level);
163 self.block_value(prev_level.block);
164 }
165 self.current.block
166 }
167}
168
169fn parse(blockloc: Loc, content: &str) -> Block {
170 let mut parser = Parser {
171 current: ParseLevel {
172 block: Block::new(blockloc),
173 key: None,
174 expect_colon: false,
175 expect_comma: false,
176 opening_bracket: '[',
177 closing_bracket: ']',
178 },
179 stack: Vec::new(),
180 };
181 let mut state = State::Neutral;
182 let mut token_start = blockloc;
183 let mut current_id = String::new();
184
185 let mut loc = blockloc;
186 for c in content.chars() {
187 match state {
188 State::Neutral => {
189 if c.is_ascii_whitespace() {
190 } else if c == '"' {
191 token_start = loc;
192 state = State::QString;
193 } else if c.is_alphabetic() {
194 token_start = loc;
195 current_id.push(c);
196 state = State::Id;
197 } else if c.is_ascii_digit() {
198 token_start = loc;
199 current_id.push(c);
200 state = State::Number;
201 } else if c == ':' {
202 parser.colon(loc);
203 } else if c == ',' {
204 parser.comma(loc);
205 } else if c == '{' {
206 parser.open_bracket(loc, '{');
207 } else if c == '}' {
208 parser.close_bracket(loc, '}');
209 } else if c == '[' {
210 parser.open_bracket(loc, '[');
211 } else if c == ']' {
212 parser.close_bracket(loc, ']');
213 } else {
214 Parser::unknown_char(c, loc);
215 }
216 }
217 State::Id => {
218 if c.is_alphabetic() {
219 current_id.push(c);
220 } else {
221 let token = Token::new(&take(&mut current_id), token_start);
222 if token.is("true") || token.is("false") {
223 parser.token(token);
224 } else if token.is("null") {
225 parser.null(token);
226 } else {
227 let msg = "unexpected unquoted string";
228 let info = "expected only true or false or null";
229 warn(ErrorKey::ParseError).msg(msg).info(info).loc(token).push();
230 }
231 state = State::Neutral;
232 if c.is_ascii_whitespace() {
233 } else if c == '"' {
234 token_start = loc;
235 state = State::QString;
236 } else if c == ':' {
237 parser.colon(loc);
238 } else if c == ',' {
239 parser.comma(loc);
240 } else if c == '{' {
241 parser.open_bracket(loc, '{');
242 } else if c == '}' {
243 parser.close_bracket(loc, '}');
244 } else if c == '[' {
245 parser.open_bracket(loc, '[');
246 } else if c == ']' {
247 parser.close_bracket(loc, ']');
248 } else {
249 Parser::unknown_char(c, loc);
250 }
251 }
252 }
253 State::QString => {
254 if c == '"' {
255 let token = Token::new(&take(&mut current_id), token_start);
256 parser.token(token);
257 state = State::Neutral;
258 } else if c == '\n' {
259 let token = Token::new(&take(&mut current_id), token_start);
260 warn(ErrorKey::ParseError).msg("quoted string not closed").loc(token).push();
261 state = State::Neutral;
262 } else {
263 current_id.push(c);
264 }
265 }
266 State::Number => {
267 if c.is_ascii_digit() || c == '.' {
268 current_id.push(c);
269 } else {
270 let token = Token::new(&take(&mut current_id), token_start);
271 parser.token(token);
272 state = State::Neutral;
273 if c.is_ascii_whitespace() {
274 } else if c == '"' {
275 token_start = loc;
276 state = State::QString;
277 } else if c == ':' {
278 parser.colon(loc);
279 } else if c == ',' {
280 parser.comma(loc);
281 } else if c == '{' {
282 parser.open_bracket(loc, '{');
283 } else if c == '}' {
284 parser.close_bracket(loc, '}');
285 } else if c == '[' {
286 parser.open_bracket(loc, '[');
287 } else if c == ']' {
288 parser.close_bracket(loc, ']');
289 } else {
290 Parser::unknown_char(c, loc);
291 }
292 }
293 }
294 }
295
296 if c == '\n' {
297 loc.line += 1;
298 loc.column = 1;
299 } else {
300 loc.column += 1;
301 }
302 }
303
304 match state {
306 State::QString => {
307 let token = Token::new(¤t_id, token_start);
308 err(ErrorKey::ParseError).msg("Quoted string not closed").loc(&token).push();
309 parser.token(token);
310 }
311 State::Id => {
312 let token = Token::new(¤t_id, token_start);
313 if token.is("true") || token.is("false") {
314 parser.token(token);
315 } else {
316 let msg = "unexpected unquoted string";
317 let info = "expected only true or false";
318 warn(ErrorKey::ParseError).msg(msg).info(info).loc(token).push();
319 }
320 }
321 State::Number => {
322 let token = Token::new(¤t_id, token_start);
323 parser.token(token);
324 }
325 State::Neutral => (),
326 }
327
328 parser.eof()
329}
330
331#[allow(clippy::module_name_repetitions)]
332pub fn parse_json(entry: &FileEntry, content: &str) -> Block {
333 let mut loc = Loc::from(entry);
334 loc.line = 1;
335 loc.column = 1;
336 parse(loc, content)
337}
338
339#[allow(clippy::module_name_repetitions)]
340pub fn parse_json_file(entry: &FileEntry) -> Option<Block> {
341 let contents = match read_to_string(entry.fullpath()) {
342 Ok(contents) => contents,
343 Err(e) => {
344 err(ErrorKey::ReadError)
345 .msg("could not read file")
346 .info(format!("{e:#}"))
347 .loc(entry)
348 .push();
349 return None;
350 }
351 };
352 if let Some(bomless) = contents.strip_prefix('\u{feff}') {
353 Some(parse_json(entry, bomless))
354 } else {
355 Some(parse_json(entry, &contents))
356 }
357}