1use std::fmt::{Display, Formatter};
2use std::iter::Peekable;
3use std::str::CharIndices;
4
5use crate::block::Comparator;
6use crate::block::Eq::Single;
7use crate::game::Game;
8use crate::parse::ignore::{IgnoreFilter, IgnoreSize, parse_comment};
9use crate::parse::pdxfile::{CharExt, Cob};
10use crate::report::{ErrorKey, err, register_ignore_filter, untidy, warn};
11use crate::token::{Loc, Token};
12
13const CONTROL_Z: char = '\u{001A}';
15
16#[derive(Debug, Clone)]
17pub enum Lexeme {
18 General(Token), Comparator(Comparator, Token), VariableReference(Token), MacroParam(Token), BlockStart(Token), BlockEnd(Token), CalcStart(Token), CalcEnd(Token), OpenParen(Token), CloseParen(Token), Add(Token), Subtract(Token), Multiply(Token), Divide(Token), Directive(Directive, Token), }
34
35impl Display for Lexeme {
36 fn fmt(&self, f: &mut Formatter) -> Result<(), std::fmt::Error> {
37 match self {
38 Lexeme::General(token) => write!(f, "value `{token}`"),
39 Lexeme::Comparator(_, token) => write!(f, "comparator `{token}`"),
40 Lexeme::VariableReference(token) => write!(f, "variable `{token}`"),
41 Lexeme::MacroParam(token) => write!(f, "parameter `${token}$`"),
42 Lexeme::BlockStart(_) => write!(f, "`{{`"),
43 Lexeme::BlockEnd(_) => write!(f, "`}}`"),
44 Lexeme::CalcStart(_) => write!(f, "`@[`"),
45 Lexeme::CalcEnd(_) => write!(f, "`]`"),
46 Lexeme::OpenParen(_) => write!(f, "`(`"),
47 Lexeme::CloseParen(_) => write!(f, "`)`"),
48 Lexeme::Add(_) => write!(f, "`+`"),
49 Lexeme::Subtract(_) => write!(f, "`-`"),
50 Lexeme::Multiply(_) => write!(f, "`*`"),
51 Lexeme::Divide(_) => write!(f, "`/`"),
52 Lexeme::Directive(_, token) => write!(f, "directive `{token}`"),
53 }
54 }
55}
56
57impl Lexeme {
58 pub fn into_token(self) -> Token {
60 match self {
61 Lexeme::General(token)
62 | Lexeme::Comparator(_, token)
63 | Lexeme::VariableReference(token)
64 | Lexeme::MacroParam(token)
65 | Lexeme::BlockStart(token)
66 | Lexeme::BlockEnd(token)
67 | Lexeme::CalcStart(token)
68 | Lexeme::CalcEnd(token)
69 | Lexeme::OpenParen(token)
70 | Lexeme::CloseParen(token)
71 | Lexeme::Add(token)
72 | Lexeme::Subtract(token)
73 | Lexeme::Multiply(token)
74 | Lexeme::Divide(token)
75 | Lexeme::Directive(_, token) => token,
76 }
77 }
78
79 pub fn get_loc(&self) -> Loc {
81 match self {
82 Lexeme::General(token)
83 | Lexeme::Comparator(_, token)
84 | Lexeme::VariableReference(token)
85 | Lexeme::MacroParam(token)
86 | Lexeme::BlockStart(token)
87 | Lexeme::BlockEnd(token)
88 | Lexeme::CalcStart(token)
89 | Lexeme::CalcEnd(token)
90 | Lexeme::OpenParen(token)
91 | Lexeme::CloseParen(token)
92 | Lexeme::Add(token)
93 | Lexeme::Subtract(token)
94 | Lexeme::Multiply(token)
95 | Lexeme::Divide(token)
96 | Lexeme::Directive(_, token) => token.loc,
97 }
98 }
99
100 pub fn get_cmp(&self) -> Comparator {
103 match self {
104 Lexeme::Comparator(cmp, _) => *cmp,
105 _ => unreachable!(),
106 }
107 }
108}
109
110#[derive(Debug, Clone, Copy)]
111pub enum Directive {
112 RegisterVariable,
113 LoadVariable,
114 Define,
115 Insert,
116 Log,
117 }
119
120pub enum LexError {}
123
124impl Display for LexError {
125 fn fmt(&self, _: &mut Formatter) -> Result<(), std::fmt::Error> {
126 Ok(())
127 }
128}
129
130pub struct Lexer<'input> {
132 inputs: &'input [Token],
136 inputs_index: usize,
138 loc: Loc,
140 iter: Peekable<CharIndices<'input>>,
142 brace_depth: usize,
145 in_calc: bool,
148 pending_line_ignores: Vec<IgnoreFilter>,
150 pending_block_ignores: Vec<IgnoreFilter>,
152 active_block_ignores: Vec<(usize, u32, IgnoreFilter)>,
155 active_range_ignores: Vec<(u32, IgnoreFilter)>,
157}
158
159impl<'input> Lexer<'input> {
160 pub fn new(inputs: &'input [Token]) -> Self {
161 assert!(!inputs.is_empty());
162
163 Lexer {
164 inputs,
165 inputs_index: 0,
166 loc: inputs[0].loc,
167 iter: inputs[0].as_str().char_indices().peekable(),
168 brace_depth: 0,
169 in_calc: false,
170 pending_line_ignores: Vec::new(),
171 pending_block_ignores: Vec::new(),
172 active_block_ignores: Vec::new(),
173 active_range_ignores: Vec::new(),
174 }
175 }
176
177 fn peek(&mut self) -> Option<(usize, char)> {
179 let p = self.iter.peek();
180 if p.is_none() {
181 if self.inputs_index + 1 == self.inputs.len() {
182 None
183 } else {
184 self.inputs_index += 1;
185 self.iter = self.inputs[self.inputs_index].as_str().char_indices().peekable();
186 self.loc = self.inputs[self.inputs_index].loc;
187 self.peek()
188 }
189 } else {
190 p.copied()
191 }
192 }
193
194 fn consume(&mut self) {
196 if self.peek().is_some() {
198 let (_, c) = self.iter.next().unwrap();
199 if c == '\n' {
200 self.loc.line += 1;
201 self.loc.column = 1;
202 } else {
203 self.loc.column += 1;
204 }
205 }
206 }
207
208 fn start_cob(&mut self) -> Cob {
210 let mut cob = Cob::new();
211 if let Some((i, _)) = self.peek() {
212 cob.set(self.inputs[self.inputs_index].as_str(), i, self.loc);
213 }
214 cob
215 }
216
217 fn eof_offset(&self) -> usize {
219 self.inputs[self.inputs_index].as_str().len()
220 }
221
222 fn only_whitespace_left(&mut self) -> bool {
225 while let Some((_, c)) = self.peek() {
226 if !c.is_whitespace() {
227 return false;
228 }
229 self.consume();
230 }
231 true
232 }
233
234 fn apply_line_ignores(&mut self) {
236 let line = self.loc.line;
237 let path = self.loc.pathname();
238 for filter in self.pending_line_ignores.drain(..) {
239 register_ignore_filter(path, line..=line, filter);
240 }
241 }
242
243 fn apply_block_ignores(&mut self) {
245 for filter in self.pending_block_ignores.drain(..) {
246 self.active_block_ignores.push((self.brace_depth, self.loc.line, filter));
247 }
248 }
249
250 fn close_block_ignores(&mut self) {
252 let path = self.loc.pathname();
253 while let Some((depth, line, filter)) = self.active_block_ignores.last() {
254 if self.brace_depth == *depth {
255 register_ignore_filter(path, *line..=self.loc.line, filter.clone());
256 self.active_block_ignores.pop();
257 } else {
258 break;
259 }
260 }
261 }
262}
263
264impl Iterator for Lexer<'_> {
265 type Item = Result<(usize, Lexeme, usize), LexError>;
266
267 fn next(&mut self) -> Option<Self::Item> {
268 while let Some((i, c)) = self.peek() {
269 match c {
270 _ if c.is_ascii_whitespace() => self.consume(),
271 '@' => {
274 self.apply_line_ignores();
276 let mut id = self.start_cob();
277 id.add_char(c);
278 let start_i = i;
279 let loc = self.loc;
280 self.consume();
281 if let Some((_, '[')) = self.peek() {
282 self.consume();
284 self.in_calc = true;
285 let token = Token::from_static_str("@[", loc);
286 return Some(Ok((start_i, Lexeme::CalcStart(token), start_i + 2)));
287 }
288 if let Some((_, ':')) = self.peek() {
289 id.add_char(':');
291 self.consume();
292 let mut end_i = self.eof_offset();
293 while let Some((i, c)) = self.peek() {
294 if c.is_alphanumeric() || c == '_' || c == '-' {
296 id.add_char(c);
297 self.consume();
298 } else {
299 end_i = i;
300 break;
301 }
302 }
303 let token = id.take_to_token();
304 if !Game::is_ck3() {
305 let msg = "reader directives are only for CK3 so far";
306 err(ErrorKey::WrongGame).msg(msg).loc(&token).push();
307 }
308 let lexeme = match token.as_str() {
309 "@:register_variable" => {
310 let msg =
311 "`@:register_variable` is (as of CK3 1.13) not yet supported";
312 let info = "prefer just @name = value";
313 err(ErrorKey::Bugs).msg(msg).info(info).loc(&token).push();
314 Some(Lexeme::Directive(Directive::RegisterVariable, token))
315 }
316 "@:register-variable" => {
317 let msg = format!("unknown reader directive `{token}`");
318 let info = "did you mean `@:register_variable`?";
319 err(ErrorKey::ParseError).msg(msg).info(info).loc(&token).push();
320 None
321 }
322 "@:load_variable" => {
323 let msg = "`@:load_variable` is (as of CK3 1.13) not yet supported";
324 let info = "prefer just @name";
325 err(ErrorKey::Bugs).msg(msg).info(info).loc(&token).push();
326 Some(Lexeme::Directive(Directive::LoadVariable, token))
327 }
328 "@:load-variable" => {
329 let msg = format!("unknown reader directive `{token}`");
330 let info = "did you mean `@:load_variable`?";
331 err(ErrorKey::ParseError).msg(msg).info(info).loc(&token).push();
332 None
333 }
334 "@:define" => Some(Lexeme::Directive(Directive::Define, token)),
335 "@:insert" => Some(Lexeme::Directive(Directive::Insert, token)),
336 "@:assert" => {
337 let msg = "`@:assert` should not be left in the script";
338 err(ErrorKey::Crash).msg(msg).loc(&token).push();
339 None
341 }
342 "@:log" => Some(Lexeme::Directive(Directive::Log, token)),
343 _ => {
344 let msg = format!("unknown reader directive `{token}`");
345 err(ErrorKey::ParseError).msg(msg).loc(&token).push();
346 None
347 }
348 };
349 if let Some(lexeme) = lexeme {
350 return Some(Ok((start_i, lexeme, end_i)));
351 }
352 } else {
353 while let Some((i, c)) = self.peek() {
354 if c.is_local_value_char() {
355 id.add_char(c);
356 self.consume();
357 } else {
358 return Some(Ok((
359 start_i,
360 Lexeme::VariableReference(id.take_to_token()),
361 i,
362 )));
363 }
364 }
365 return Some(Ok((
366 start_i,
367 Lexeme::VariableReference(id.take_to_token()),
368 self.eof_offset(),
369 )));
370 }
371 }
372 _ if !self.in_calc && (c.is_id_char() || c == '+') => {
374 self.apply_line_ignores();
376 let mut id = self.start_cob();
377 id.add_char(c);
378 let start_i = i;
379 self.consume();
380 while let Some((i, c)) = self.peek() {
381 if c.is_id_char() {
382 id.add_char(c);
383 self.consume();
384 } else {
385 let token = id.take_to_token();
386 return Some(Ok((start_i, Lexeme::General(token), i)));
387 }
388 }
389 let token = id.take_to_token();
390 return Some(Ok((start_i, Lexeme::General(token), self.eof_offset())));
391 }
392 _ if c.is_comparator_char() => {
393 self.apply_line_ignores();
394 let mut id = self.start_cob();
395 id.add_char(c);
396 let start_i = i;
397 self.consume();
398 while let Some((i, c)) = self.peek() {
399 if c.is_comparator_char() {
400 id.add_char(c);
401 self.consume();
402 } else {
403 let token = id.take_to_token();
404 let cmp = parse_comparator(&token);
405 return Some(Ok((start_i, Lexeme::Comparator(cmp, token), i)));
406 }
407 }
408 let token = id.take_to_token();
409 let cmp = parse_comparator(&token);
410 return Some(Ok((start_i, Lexeme::Comparator(cmp, token), self.eof_offset())));
411 }
412 _ if self.in_calc && (c.is_local_value_char() || c == '.') => {
413 self.apply_line_ignores();
415 let mut id = self.start_cob();
416 id.add_char(c);
417 let start_i = i;
418 self.consume();
419 while let Some((i, c)) = self.peek() {
420 if c.is_local_value_char() || c == '.' {
421 id.add_char(c);
422 self.consume();
423 } else {
424 return Some(Ok((start_i, Lexeme::General(id.take_to_token()), i)));
425 }
426 }
427 return Some(Ok((
428 start_i,
429 Lexeme::General(id.take_to_token()),
430 self.eof_offset(),
431 )));
432 }
433 ';' => {
436 self.apply_line_ignores();
437 self.consume();
438 }
439 '"' => {
440 self.apply_line_ignores();
442 let start_i = i;
443 let start_loc = self.loc;
444 let mut prev_char = c;
445 self.consume();
446 let mut escaped = false;
447 let mut id = self.start_cob();
448 while let Some((i, c)) = self.peek() {
449 if c == '\n' {
450 if Game::is_hoi4() {
451 let msg = "quoted string not closed";
453 let info = "reached end of line";
454 warn(ErrorKey::ParseError).msg(msg).info(info).loc(self.loc).push();
455 self.consume();
456 let token = id.take_to_token();
457 return Some(Ok((start_i, Lexeme::General(token), i + 1)));
458 }
459 id.add_char(c);
460 self.consume();
461 } else if c == '\\' && !escaped {
462 self.consume();
463 id.make_owned();
464 escaped = true;
465 continue;
466 } else if c == '"' && !escaped {
467 let token = id.take_to_token();
468 let close_loc = self.loc;
469 self.consume();
470
471 let next_char = self.peek();
472 if
473 ( prev_char.is_ascii_whitespace()
475 || prev_char.is_comparator_end_char()
476 )
477 && !next_char.is_some_and(|(_, nc)| nc.is_ascii_whitespace() || nc.is_comparator_char() || nc == '}')
482 {
483 let msg = "quoted string not closed";
484 let info = "Matching close quote looks like it was intended to open. If this is a false positive, consider adding whitespace after the close quote.";
485 warn(ErrorKey::ParseError)
486 .weak()
487 .msg(msg)
488 .loc(start_loc)
489 .loc_msg(close_loc, info)
490 .push();
491 }
492
493 return Some(Ok((start_i, Lexeme::General(token), i + 1)));
494 } else {
495 if Game::is_hoi4() && i - start_i == 255 {
496 let msg = "string too long";
497 let info = "in Hoi4 strings are limited to 255 bytes";
498 err(ErrorKey::Overflow)
499 .strong()
500 .msg(msg)
501 .info(info)
502 .loc(self.loc)
503 .push();
504 }
505 id.add_char(c);
506 self.consume();
507 }
508 prev_char = c;
509 escaped = false;
510 }
511 let msg = "quoted string not closed";
512 let info = "reached end of file";
513 err(ErrorKey::ParseError).msg(msg).info(info).loc(start_loc).push();
514 let token = if matches!(id, Cob::Uninit) {
515 Token::from_static_str("", self.loc)
516 } else {
517 id.take_to_token()
518 };
519 return Some(Ok((start_i, Lexeme::General(token), self.eof_offset())));
520 }
521 '#' => {
522 self.consume();
524 let mut comment = self.start_cob();
525 while let Some((_, c)) = self.peek() {
526 if c == '\n' {
527 self.consume();
528 break;
529 }
530 comment.add_char(c);
531 self.consume();
532 }
533 let s = if matches!(comment, Cob::Uninit) {
534 ""
535 } else {
536 comment.take_to_token().as_str()
537 };
538 if let Some(spec) = parse_comment(s) {
539 match spec.size {
540 IgnoreSize::Line => self.pending_line_ignores.push(spec.filter),
541 IgnoreSize::Block => self.pending_block_ignores.push(spec.filter),
542 IgnoreSize::File => {
543 let path = self.loc.pathname();
544 register_ignore_filter(path, .., spec.filter);
545 }
546 IgnoreSize::Begin => {
547 self.active_range_ignores.push((self.loc.line + 1, spec.filter));
548 }
549 IgnoreSize::End => {
550 if let Some((start_line, filter)) = self.active_range_ignores.pop()
551 {
552 let path = self.loc.pathname();
553 register_ignore_filter(path, start_line..self.loc.line, filter);
554 }
555 }
556 }
557 }
558 }
559 '$' => {
560 self.apply_line_ignores();
562 let start_i = i;
563 let start_loc = self.loc;
564 self.consume();
565 let mut id = self.start_cob();
566 while let Some((i, c)) = self.peek() {
567 if c.is_id_char() {
568 id.add_char(c);
569 self.consume();
570 } else if c == '$' {
571 let token = id.take_to_token();
572 self.consume();
573 return Some(Ok((start_i, Lexeme::MacroParam(token), i + 1)));
574 } else {
575 let msg = "macro parameter not closed";
576 err(ErrorKey::ParseError).msg(msg).loc(self.loc).push();
577 let token = id.take_to_token();
580 return Some(Ok((start_i, Lexeme::General(token), i)));
581 }
582 }
583 let msg = "macro parameter not closed";
584 err(ErrorKey::ParseError).msg(msg).loc(start_loc).push();
585 let token = if matches!(id, Cob::Uninit) {
586 Token::from_static_str("", self.loc)
587 } else {
588 id.take_to_token()
589 };
590 return Some(Ok((start_i, Lexeme::General(token), self.eof_offset())));
591 }
592 '{' => {
593 self.brace_depth += 1;
594 self.apply_line_ignores();
595 self.apply_block_ignores();
596 let token = Token::from_static_str("{", self.loc);
597 self.consume();
598 return Some(Ok((i, Lexeme::BlockStart(token), i + 1)));
599 }
600 '}' => {
601 self.apply_line_ignores();
602 self.close_block_ignores();
603 if self.brace_depth > 0 {
604 self.brace_depth -= 1;
605 }
606 if self.loc.column == 1 && self.brace_depth > 0 {
607 let msg = "possible brace error";
608 let info = "This closing brace is at the start of the line but does not close a top-level block.";
609 warn(ErrorKey::BracePlacement)
610 .weak()
611 .msg(msg)
612 .info(info)
613 .loc(self.loc)
614 .push();
615 }
616 let token = Token::from_static_str("}", self.loc);
617 self.consume();
618 self.in_calc = false; return Some(Ok((i, Lexeme::BlockEnd(token), i + 1)));
620 }
621 ']' => {
622 self.apply_line_ignores();
623 let token = Token::from_static_str("]", self.loc);
624 self.consume();
625 self.in_calc = false;
626 return Some(Ok((i, Lexeme::CalcEnd(token), i + 1)));
627 }
628 '(' => {
629 self.apply_line_ignores();
630 let token = Token::from_static_str("(", self.loc);
631 self.consume();
632 return Some(Ok((i, Lexeme::OpenParen(token), i + 1)));
633 }
634 ')' => {
635 self.apply_line_ignores();
636 let token = Token::from_static_str(")", self.loc);
637 self.consume();
638 return Some(Ok((i, Lexeme::CloseParen(token), i + 1)));
639 }
640 '+' => {
641 self.apply_line_ignores();
642 let token = Token::from_static_str("+", self.loc);
643 self.consume();
644 return Some(Ok((i, Lexeme::Add(token), i + 1)));
645 }
646 '-' => {
647 self.apply_line_ignores();
648 let token = Token::from_static_str("-", self.loc);
649 self.consume();
650 return Some(Ok((i, Lexeme::Subtract(token), i + 1)));
651 }
652 '*' => {
653 self.apply_line_ignores();
654 let token = Token::from_static_str("*", self.loc);
655 self.consume();
656 return Some(Ok((i, Lexeme::Multiply(token), i + 1)));
657 }
658 '/' => {
659 self.apply_line_ignores();
660 let token = Token::from_static_str("/", self.loc);
661 self.consume();
662 return Some(Ok((i, Lexeme::Divide(token), i + 1)));
663 }
664 CONTROL_Z => {
667 self.apply_line_ignores();
668 let loc = self.loc;
669 self.consume();
670 let msg = "^Z in file";
671 if self.only_whitespace_left() {
672 let info = "This control code means stop reading the file here, which will cause trouble if you add more code later.";
673 untidy(ErrorKey::ParseError).msg(msg).info(info).loc(loc).push();
674 } else {
675 let info = "This control code means stop reading the file here. Nothing that follows will be read.";
676 err(ErrorKey::ParseError).msg(msg).info(info).loc(loc).push();
677 }
678 return None;
679 }
680 _ => {
681 self.apply_line_ignores();
682 let msg = format!("unrecognized character `{c}`");
683 err(ErrorKey::ParseError).msg(msg).loc(self.loc).push();
684 self.consume();
685 }
686 }
687 }
688 None
689 }
690}
691
692fn parse_comparator(token: &Token) -> Comparator {
693 let s = token.as_str();
694 s.parse::<Comparator>().unwrap_or_else(|_| {
695 let msg = format!("unrecognized comparator `{s}`");
696 err(ErrorKey::ParseError).msg(msg).loc(token).push();
697 Comparator::Equals(Single) })
699}