tiger_lib/parse/
pdxfile.rs

1//! Parses a Pdx script file into a [`Block`].
2//!
3//! The main entry points are [`parse_pdx_file`], [`parse_pdx_macro`], and [`parse_pdx_internal`].
4
5use std::path::PathBuf;
6use std::slice;
7use std::sync::LazyLock;
8
9use lalrpop_util::{ParseError, lalrpop_mod};
10
11use crate::block::{Block, Comparator, Eq};
12use crate::fileset::{FileEntry, FileKind, FileStage};
13use crate::game::Game;
14use crate::parse::ParserMemory;
15use crate::parse::cob::Cob;
16use crate::parse::pdxfile::lexer::{LexError, Lexeme, Lexer};
17use crate::parse::pdxfile::memory::CombinedMemory;
18pub use crate::parse::pdxfile::memory::PdxfileMemory;
19use crate::report::{ErrorKey, err, store_source_file};
20use crate::token::{Loc, Token};
21
22mod lexer;
23pub mod memory;
24lalrpop_mod! {
25    #[allow(unused_variables)]
26    #[allow(unused_imports)]
27    #[allow(dead_code)]
28    #[rustfmt::skip]
29    #[allow(clippy::pedantic)]
30    #[allow(clippy::if_then_some_else_none)]
31    parser, "/parse/pdxfile/parser.rs"
32}
33static FILE_PARSER: LazyLock<parser::FileParser> = LazyLock::new(parser::FileParser::new);
34
35/// Re-parse a macro (which is a scripted effect, trigger, or modifier that uses $ parameters)
36/// after argument substitution. A full re-parse is needed because the game engine allows tricks
37/// such as passing `#` as a macro argument in order to comment out the rest of a line.
38pub fn parse_pdx_macro(inputs: &[Token], global: &PdxfileMemory, local: &PdxfileMemory) -> Block {
39    let mut combined = CombinedMemory::from_local(global, local.clone());
40    match FILE_PARSER.parse(inputs, &mut combined, Lexer::new(inputs)) {
41        Ok(block) => block,
42        Err(e) => {
43            eprintln!("Internal error: re-parsing macro failed.\n{e}");
44            Block::new(inputs[0].loc)
45        }
46    }
47}
48
49/// Parse a whole file into a `Block`.
50fn parse_pdx(entry: &FileEntry, content: &'static str, memory: &ParserMemory) -> Block {
51    let file_loc = Loc::from(entry);
52    let mut loc = file_loc;
53    loc.line = 1;
54    loc.column = 1;
55    let inputs = [Token::from_static_str(content, loc)];
56    let mut combined = CombinedMemory::new(&memory.pdxfile);
57    match FILE_PARSER.parse(&inputs, &mut combined, Lexer::new(&inputs)) {
58        Ok(mut block) => {
59            block.loc = file_loc;
60            block
61        }
62        Err(e) => {
63            eprintln!("Internal error: parsing file {} failed.\n{e}", entry.path().display());
64            Block::new(inputs[0].loc)
65        }
66    }
67}
68
69/// Parse the content associated with the [`FileEntry`].
70pub fn parse_pdx_file(
71    entry: &FileEntry,
72    content: String,
73    offset: usize,
74    parser: &ParserMemory,
75) -> Block {
76    let content = content.leak();
77    store_source_file(entry.fullpath().to_path_buf(), &content[offset..]);
78    parse_pdx(entry, &content[offset..], parser)
79}
80
81/// Parse the content associated with the [`FileEntry`], and update the global parser memory.
82#[cfg(feature = "ck3")]
83pub fn parse_reader_export(
84    entry: &FileEntry,
85    content: String,
86    offset: usize,
87    global: &mut PdxfileMemory,
88) {
89    let content = content.leak();
90    store_source_file(entry.fullpath().to_path_buf(), &content[offset..]);
91    let content = &content[offset..];
92    let mut loc = Loc::from(entry);
93    loc.line = 1;
94    loc.column = 1;
95    let inputs = [Token::from_static_str(content, loc)];
96    let mut combined = CombinedMemory::new(global);
97    match FILE_PARSER.parse(&inputs, &mut combined, Lexer::new(&inputs)) {
98        Ok(_) => {
99            global.merge(combined.into_local());
100        }
101        Err(e) => {
102            eprintln!("Internal error: parsing file {} failed.\n{e}", entry.path().display());
103        }
104    }
105}
106
107/// Parse a string into a [`Block`]. This function is meant for use by the validator itself, to
108/// allow it to load game description data from internal strings that are in pdx script format.
109pub fn parse_pdx_internal(input: &'static str, desc: &str) -> Block {
110    let entry = FileEntry::new(
111        PathBuf::from(desc),
112        FileStage::NoStage,
113        FileKind::Internal,
114        PathBuf::from(desc),
115    );
116    parse_pdx(&entry, input, &ParserMemory::default())
117}
118
119#[derive(Debug, PartialEq, Eq, Clone, Copy)]
120/// Kinds of [`MacroComponent`].
121pub enum MacroComponentKind {
122    Source,
123    Macro,
124}
125
126#[derive(Debug, PartialEq, Eq, Clone)]
127/// Macro components output from [`split_macros`].
128pub struct MacroComponent {
129    kind: MacroComponentKind,
130    token: Token,
131}
132
133impl MacroComponent {
134    pub fn kind(&self) -> MacroComponentKind {
135        self.kind
136    }
137
138    pub fn token(&self) -> &Token {
139        &self.token
140    }
141}
142
143/// Split a block that contains macro parameters (represented here as a [`Token`] containing its
144/// source script) into [`MacroComponent`].
145///
146/// Having this available will speed up macro re-parsing later.
147///
148/// The function is aware of comments and quoted strings and will avoid detecting macro parameters
149/// inside those.
150fn split_macros(token: &Token) -> Vec<MacroComponent> {
151    let mut vec = Vec::new();
152    let mut index_loc = (0, token.loc);
153    for lex in Lexer::new(slice::from_ref(token)).flatten() {
154        #[allow(clippy::cast_possible_truncation)]
155        if let (start, Lexeme::MacroParam(param), end) = lex {
156            // The param token does not include the enclosing `$` chars, but the start..end range does.
157            vec.push(MacroComponent {
158                kind: MacroComponentKind::Source,
159                token: token.subtoken(index_loc.0..start, index_loc.1),
160            });
161            // Do this before pushing `param` to the vec, because it uses `param`.
162            index_loc = (end, param.loc);
163            index_loc.1.column += 1 + param.as_str().chars().count() as u32;
164            vec.push(MacroComponent { kind: MacroComponentKind::Macro, token: param });
165        }
166    }
167    vec.push(MacroComponent {
168        kind: MacroComponentKind::Source,
169        token: token.subtoken(index_loc.0.., index_loc.1),
170    });
171    vec
172}
173
174// Definitions used by parser.lalrpop
175
176type HasMacroParams = bool;
177
178fn define_var(memory: &mut CombinedMemory, token: &Token, cmp: Comparator, value: Token) {
179    // A direct `@name = value` assignment gets the leading `@`,
180    // while a `@:register_variable name = value` does not.
181    let name = match token.as_str().strip_prefix('@') {
182        Some(name) => name,
183        None => token.as_str(),
184    };
185    if !matches!(cmp, Comparator::Equals(Eq::Single)) {
186        let msg = format!("expected `{name} =`");
187        err(ErrorKey::ReaderDirectives).msg(msg).loc(token).push();
188    }
189    if memory.has_variable(name) {
190        let msg = format!("`{name}` is already defined as a reader variable");
191        err(ErrorKey::ReaderDirectives).msg(msg).loc(token).push();
192    } else if Game::is_jomini() && !name.starts_with(|c: char| c.is_ascii_alphabetic()) {
193        let msg = "reader variable names must start with an ascii letter";
194        err(ErrorKey::ReaderDirectives).msg(msg).loc(token).push();
195    } else {
196        memory.set_variable(name.to_string(), value);
197    }
198}
199
200fn warn_macros(token: &Token, has_macro_params: bool) {
201    if has_macro_params {
202        let msg = "$-substitutions only work inside blocks";
203        err(ErrorKey::Macro).msg(msg).loc(token).push();
204    }
205}
206
207fn report_error(error: ParseError<usize, Lexeme, LexError>, mut file_loc: Loc) {
208    match error {
209        ParseError::InvalidToken { location: _ } // we don't pass `LexError`s
210        | ParseError::User { error: _ } => unreachable!(),
211        ParseError::UnrecognizedEof { location: _, expected: _ } => {
212            let msg = "unexpected end of file";
213            file_loc.line = 0;
214            file_loc.column = 0;
215            err(ErrorKey::ParseError).msg(msg).loc(file_loc).push();
216        }
217        ParseError::UnrecognizedToken { token: (_, lexeme, _), expected: _ }
218        | ParseError::ExtraToken { token: (_, lexeme, _) } => {
219            let msg = format!("unexpected {lexeme}");
220            let token = lexeme.into_token();
221            err(ErrorKey::ParseError).msg(msg).loc(token).push();
222        }
223    }
224}
225
226fn get_numeric_var(memory: &CombinedMemory, name: &Token) -> f64 {
227    if let Some(value) = name.get_number() {
228        value
229    } else if let Some(v) = memory.get_variable(name.as_str()) {
230        if let Some(value) = v.get_number() {
231            value
232        } else {
233            let msg = format!("expected reader variable `{name}` to be numeric");
234            err(ErrorKey::ReaderDirectives).msg(msg).loc(name).loc_msg(v, "defined here").push();
235            0.0
236        }
237    } else {
238        let msg = format!("reader variable {name} not defined");
239        err(ErrorKey::ReaderDirectives).msg(msg).loc(name).push();
240        0.0
241    }
242}
243
244/// A convenience trait to add some methods to [`char`]
245#[allow(clippy::wrong_self_convention)]
246trait CharExt {
247    /// Can the char be part of an unquoted token?
248    fn is_id_char(self) -> bool;
249    /// Can the char be part of a reader variable name?
250    fn is_local_value_char(self) -> bool;
251    /// Can the char be part of a [`Comparator`]?
252    fn is_comparator_char(self) -> bool;
253    /// Can the char be the last charatern in a [`Comparator`]?
254    fn is_comparator_end_char(self) -> bool;
255}
256
257impl CharExt for char {
258    fn is_id_char(self) -> bool {
259        self.is_alphabetic()
260            || self.is_ascii_digit()
261            // %, [, ] added for parsing .gui files
262            || matches!(self, '.' | ':' | '_' | '-' | '&' | '/' | '|' | '\'' | '%' | '[' | ']')
263            || (Game::is_hoi4() && (self == '?' || self == '@' || self == '^'))
264    }
265
266    fn is_local_value_char(self) -> bool {
267        self.is_ascii_alphanumeric() || self == '_'
268    }
269
270    // Comparator list for reference
271    // <, <=, =, !=, >, >=, ?=
272    fn is_comparator_char(self) -> bool {
273        self.is_comparator_end_char() || matches!(self, '!' | '?')
274    }
275
276    fn is_comparator_end_char(self) -> bool {
277        matches!(self, '<' | '>' | '=')
278    }
279}