tiger_lib/
token.rs

1//! Contains the core [`Token`] and [`Loc`] types, which represent pieces of game script and where
2//! in the game files they came from.
3
4use std::borrow::{Borrow, Cow};
5use std::cmp::Ordering;
6use std::ffi::OsStr;
7use std::fmt::{Debug, Display, Error, Formatter};
8use std::hash::Hash;
9use std::mem::ManuallyDrop;
10use std::ops::{Bound, Range, RangeBounds};
11use std::path::{Path, PathBuf};
12use std::slice::SliceIndex;
13
14use bumpalo::Bump;
15
16use crate::date::Date;
17use crate::fileset::{FileEntry, FileKind, FileStage};
18use crate::macros::{MACRO_MAP, MacroMapIndex};
19use crate::pathtable::{PathTable, PathTableIndex};
20use crate::report::{ErrorKey, err, untidy};
21
22#[derive(Clone, Copy, Eq, PartialEq, Hash)]
23pub struct Loc {
24    pub(crate) idx: PathTableIndex,
25    pub stage: FileStage,
26    pub kind: FileKind,
27    /// line 0 means the loc applies to the file as a whole.
28    pub line: u32,
29    pub column: u32,
30    /// Used in macro expansions to point to the macro invocation
31    /// in the macro table
32    pub link_idx: Option<MacroMapIndex>,
33}
34
35impl Loc {
36    #[must_use]
37    pub(crate) fn for_file(
38        pathname: PathBuf,
39        stage: FileStage,
40        kind: FileKind,
41        fullpath: PathBuf,
42    ) -> Self {
43        let idx = PathTable::store(pathname, fullpath);
44        Loc { idx, stage, kind, line: 0, column: 0, link_idx: None }
45    }
46
47    pub fn filename(self) -> Cow<'static, str> {
48        PathTable::lookup_path(self.idx)
49            .file_name()
50            .unwrap_or_else(|| OsStr::new(""))
51            .to_string_lossy()
52    }
53
54    pub fn pathname(self) -> &'static Path {
55        PathTable::lookup_path(self.idx)
56    }
57
58    pub fn fullpath(self) -> &'static Path {
59        PathTable::lookup_fullpath(self.idx)
60    }
61
62    #[inline]
63    pub fn same_file(self, other: Loc) -> bool {
64        self.idx == other.idx
65    }
66}
67
68impl PartialOrd for Loc {
69    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
70        Some(self.cmp(other))
71    }
72}
73
74impl Ord for Loc {
75    fn cmp(&self, other: &Self) -> Ordering {
76        self.idx
77            .cmp(&other.idx)
78            .then(self.line.cmp(&other.line))
79            .then(self.column.cmp(&other.column))
80            .then(
81                self.link_idx
82                    .map(|link| MACRO_MAP.get_loc(link))
83                    .cmp(&other.link_idx.map(|link| MACRO_MAP.get_loc(link))),
84            )
85    }
86}
87
88impl From<&FileEntry> for Loc {
89    fn from(entry: &FileEntry) -> Self {
90        if let Some(idx) = entry.path_idx() {
91            Loc {
92                idx,
93                stage: entry.stage(),
94                kind: entry.kind(),
95                line: 0,
96                column: 0,
97                link_idx: None,
98            }
99        } else {
100            Self::for_file(
101                entry.path().to_path_buf(),
102                entry.stage(),
103                entry.kind(),
104                entry.fullpath().to_path_buf(),
105            )
106        }
107    }
108}
109
110impl From<&mut FileEntry> for Loc {
111    fn from(entry: &mut FileEntry) -> Self {
112        (&*entry).into()
113    }
114}
115
116impl From<FileEntry> for Loc {
117    fn from(entry: FileEntry) -> Self {
118        (&entry).into()
119    }
120}
121
122impl Debug for Loc {
123    /// Roll our own `Debug` implementation to handle the path field
124    fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
125        f.debug_struct("Loc")
126            .field("pathindex", &self.idx)
127            .field("pathname", &self.pathname())
128            .field("fullpath", &self.fullpath())
129            .field("stage", &self.stage)
130            .field("kind", &self.kind)
131            .field("line", &self.line)
132            .field("column", &self.column)
133            .field("linkindex", &self.link_idx)
134            .finish()
135    }
136}
137
138thread_local!(static STR_BUMP: ManuallyDrop<Bump> = ManuallyDrop::new(Bump::new()));
139
140/// Allocate the string on heap with a bump allocator.
141///
142/// SAFETY: This is safe as long as no `Bump::reset` is called to deallocate memory
143/// and `STR_BUMP` is not dropped when thread exits.
144pub(crate) fn bump(s: &str) -> &'static str {
145    STR_BUMP.with(|bump| {
146        let s = bump.alloc_str(s);
147        unsafe {
148            let s_ptr: *const str = s;
149            &*s_ptr
150        }
151    })
152}
153
154/// A Token consists of a string and its location in the parsed files.
155#[allow(missing_copy_implementations)]
156#[derive(Clone, Debug)]
157pub struct Token {
158    s: &'static str,
159    pub loc: Loc,
160}
161
162impl Token {
163    #[must_use]
164    pub fn new(s: &str, loc: Loc) -> Self {
165        Token { s: bump(s), loc }
166    }
167
168    #[must_use]
169    pub fn from_static_str(s: &'static str, loc: Loc) -> Self {
170        Token { s, loc }
171    }
172
173    /// Create a `Token` from a substring of the given `Token`.
174    #[must_use]
175    pub fn subtoken<R>(&self, range: R, loc: Loc) -> Token
176    where
177        R: RangeBounds<usize> + SliceIndex<str, Output = str>,
178    {
179        Token { s: &self.s[range], loc }
180    }
181
182    /// Create a `Token` from a subtring of the given `Token`,
183    /// stripping any whitespace from the created token.
184    #[must_use]
185    pub fn subtoken_stripped(&self, mut range: Range<usize>, mut loc: Loc) -> Token {
186        let mut start = match range.start_bound() {
187            Bound::Included(&i) => i,
188            Bound::Excluded(&i) => i + 1,
189            Bound::Unbounded => 0,
190        };
191        let mut end = match range.end_bound() {
192            Bound::Included(&i) => i + 1,
193            Bound::Excluded(&i) => i,
194            Bound::Unbounded => self.s.len(),
195        };
196        for (i, c) in self.s[range.clone()].char_indices() {
197            if !c.is_whitespace() {
198                start += i;
199                range = start..end;
200                break;
201            }
202            loc.column += 1;
203        }
204        for (i, c) in self.s[range.clone()].char_indices().rev() {
205            if !c.is_whitespace() {
206                end = start + i + c.len_utf8();
207                range = start..end;
208                break;
209            }
210        }
211        Token { s: &self.s[range], loc }
212    }
213
214    pub fn as_str(&self) -> &'static str {
215        self.s
216    }
217
218    pub fn is(&self, s: &str) -> bool {
219        self.s == s
220    }
221
222    pub fn lowercase_is(&self, s: &str) -> bool {
223        self.s.to_ascii_lowercase() == s
224    }
225
226    pub fn starts_with(&self, s: &str) -> bool {
227        self.s.starts_with(s)
228    }
229
230    #[must_use]
231    /// Split the token into one or more subtokens, with `ch` as the delimiter.
232    /// Updates the locs for the created subtokens.
233    /// This is not meant for multiline tokens.
234    /// # Panics
235    /// May panic if the token's column location exceeds 4,294,967,296.
236    pub fn split(&self, ch: char) -> Vec<Token> {
237        let mut pos = 0;
238        let mut vec = Vec::new();
239        let mut loc = self.loc;
240        let mut lines: u32 = 0;
241        for (cols, (i, c)) in self.s.char_indices().enumerate() {
242            let cols = u32::try_from(cols).expect("internal error: 2^32 columns");
243            if c == ch {
244                vec.push(self.subtoken(pos..i, loc));
245                pos = i + 1;
246                loc.column = self.loc.column + cols + 1;
247                loc.line = self.loc.line + lines;
248            }
249            if c == '\n' {
250                lines += 1;
251            }
252        }
253        vec.push(self.subtoken(pos.., loc));
254        vec
255    }
256
257    #[must_use]
258    pub fn strip_suffix(&self, sfx: &str) -> Option<Token> {
259        self.s.strip_suffix(sfx).map(|pfx| Token::from_static_str(pfx, self.loc))
260    }
261
262    #[must_use]
263    pub fn strip_prefix(&self, pfx: &str) -> Option<Token> {
264        #[allow(clippy::cast_possible_truncation)]
265        self.s.strip_prefix(pfx).map(|sfx| {
266            let mut loc = self.loc;
267            loc.column += pfx.chars().count() as u32;
268            Token::from_static_str(sfx, loc)
269        })
270    }
271
272    #[must_use]
273    /// Split the token into two subtokens, with the split at the first occurrence of `ch`.
274    /// Updates the locs for the created subtokens.
275    /// This is not meant for multiline tokens.
276    /// Returns `None` if `ch` was not found in the token.
277    /// # Panics
278    /// May panic if the token's column location exceeds 4,294,967,296.
279    pub fn split_once(&self, ch: char) -> Option<(Token, Token)> {
280        for (cols, (i, c)) in self.s.char_indices().enumerate() {
281            let cols = u32::try_from(cols).expect("internal error: 2^32 columns");
282            if c == ch {
283                let token1 = self.subtoken(..i, self.loc);
284                let mut loc = self.loc;
285                loc.column += cols + 1;
286                let token2 = self.subtoken(i + 1.., loc);
287                return Some((token1, token2));
288            }
289        }
290        None
291    }
292
293    /// Split the token into two subtokens, with the split at the first instance of `ch`, such that `ch` is part of the first returned token.
294    /// Updates the locs for the created subtokens.
295    /// This is not meant for multiline tokens.
296    /// Returns `None` if `ch` was not found in the token.
297    /// # Panics
298    /// May panic if the token's column location exceeds 4,294,967,296.
299    #[must_use]
300    pub fn split_after(&self, ch: char) -> Option<(Token, Token)> {
301        for (cols, (i, c)) in self.s.char_indices().enumerate() {
302            let cols = u32::try_from(cols).expect("internal error: 2^32 columns");
303            #[allow(clippy::cast_possible_truncation)] // chlen can't be more than 6
304            if c == ch {
305                let chlen = ch.len_utf8();
306                let token1 = self.subtoken(..i + chlen, self.loc);
307                let mut loc = self.loc;
308                loc.column += cols + chlen as u32;
309                let token2 = self.subtoken(i + chlen.., loc);
310                return Some((token1, token2));
311            }
312        }
313        None
314    }
315
316    /// Create a new token that is a concatenation of this token and `other`, with `c` between them.
317    pub fn combine(&mut self, other: &Token, c: char) {
318        let mut s = self.s.to_string();
319        s.push(c);
320        s.push_str(other.s);
321        self.s = bump(&s);
322    }
323
324    #[must_use]
325    /// Return a subtoken of this token, such that all whitespace is removed from the start and end.
326    /// Will update the loc of the subtoken.
327    /// This is not meant for multiline tokens.
328    /// # Panics
329    /// May panic if the token's column location exceeds 4,294,967,296.
330    pub fn trim(&self) -> Token {
331        let mut real_start = None;
332        let mut real_end = self.s.len();
333        for (cols, (i, c)) in self.s.char_indices().enumerate() {
334            let cols = u32::try_from(cols).expect("internal error: 2^32 columns");
335            if c != ' ' {
336                real_start = Some((cols, i));
337                break;
338            }
339        }
340        // looping over the indices is safe here because we're only skipping spaces
341        while real_end > 0 && &self.s[real_end - 1..real_end] == " " {
342            real_end -= 1;
343        }
344        if let Some((cols, i)) = real_start {
345            let mut loc = self.loc;
346            loc.column += cols;
347            self.subtoken(i..real_end, loc)
348        } else {
349            // all spaces
350            Token::from_static_str("", self.loc)
351        }
352    }
353
354    pub fn expect_number(&self) -> Option<f64> {
355        self.check_number();
356        // Trim "f" from the end of numbers
357        let s = self.s.trim_end_matches('f');
358        if let Ok(v) = s.parse::<f64>() {
359            Some(v)
360        } else {
361            err(ErrorKey::Validation).msg("expected number").loc(self).push();
362            None
363        }
364    }
365
366    /// Gets the field as a fixed-width decimal, specifically the value multiplied by 100,000
367    pub fn get_fixed_number(&self) -> Option<i64> {
368        if !self.s.contains('.') {
369            return Some(self.s.parse::<i64>().ok()? * 100_000);
370        }
371
372        let r = self.s.find('.')?;
373        let whole = &self.s[..r];
374        let fraction = &self.s[r + 1..];
375
376        if fraction.len() > 5 {
377            return None;
378        }
379        format!("{whole}{fraction:0<5}").parse::<i64>().ok()
380    }
381
382    pub fn get_number(&self) -> Option<f64> {
383        self.s.parse::<f64>().ok()
384    }
385
386    pub fn is_number(&self) -> bool {
387        self.s.parse::<f64>().is_ok()
388    }
389
390    pub fn check_number(&self) {
391        if let Some(idx) = self.s.find('.') {
392            if self.s.len() - idx > 6 {
393                let msg = "only 5 decimals are supported";
394                let info =
395                    "if you give more decimals, you get an error and the number is read as 0";
396                err(ErrorKey::Validation).msg(msg).info(info).loc(self).push();
397            }
398        }
399    }
400
401    /// Some files seem not to have the 5-decimal limitation
402    pub fn expect_precise_number(&self) -> Option<f64> {
403        // Trim "f" from the end of precise numbers
404        let s = if self.s.ends_with("inf") { self.s } else { self.s.trim_end_matches('f') };
405        if let Ok(v) = s.parse::<f64>() {
406            Some(v)
407        } else {
408            err(ErrorKey::Validation).msg("expected number").loc(self).push();
409            None
410        }
411    }
412
413    pub fn expect_integer(&self) -> Option<i64> {
414        if let Ok(v) = self.s.parse::<i64>() {
415            Some(v)
416        } else {
417            err(ErrorKey::Validation).msg("expected integer").loc(self).push();
418            None
419        }
420    }
421
422    pub fn get_integer(&self) -> Option<i64> {
423        self.s.parse::<i64>().ok()
424    }
425
426    pub fn is_integer(&self) -> bool {
427        self.s.parse::<i64>().is_ok()
428    }
429
430    pub fn expect_date(&self) -> Option<Date> {
431        if let Ok(v) = self.s.parse::<Date>() {
432            if self.s.ends_with('.') {
433                untidy(ErrorKey::Validation).msg("trailing dot on date").loc(self).push();
434            }
435            Some(v)
436        } else {
437            err(ErrorKey::Validation).msg("expected date").loc(self).push();
438            None
439        }
440    }
441
442    pub fn get_date(&self) -> Option<Date> {
443        self.s.parse::<Date>().ok()
444    }
445
446    pub fn is_date(&self) -> bool {
447        self.s.parse::<Date>().is_ok()
448    }
449
450    /// Tests if the taken is lowercase
451    pub fn is_lowercase(&self) -> bool {
452        !self.s.chars().any(char::is_uppercase)
453    }
454
455    #[must_use]
456    pub fn linked(mut self, link_idx: Option<MacroMapIndex>) -> Self {
457        self.loc.link_idx = link_idx;
458        self
459    }
460}
461
462impl From<&Token> for Token {
463    fn from(token: &Token) -> Token {
464        token.clone()
465    }
466}
467
468/// Tokens are compared for equality regardless of their loc.
469impl PartialEq for Token {
470    fn eq(&self, other: &Self) -> bool {
471        self.s == other.s
472    }
473}
474
475impl Eq for Token {}
476
477impl Hash for Token {
478    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
479        self.s.hash(state);
480    }
481}
482
483impl Borrow<str> for Token {
484    fn borrow(&self) -> &str {
485        self.s
486    }
487}
488
489impl Borrow<str> for &Token {
490    fn borrow(&self) -> &str {
491        self.s
492    }
493}
494
495impl From<Loc> for Token {
496    fn from(loc: Loc) -> Self {
497        Token { s: "", loc }
498    }
499}
500
501impl Display for Token {
502    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
503        write!(f, "{}", self.s)
504    }
505}
506
507/// A wrapper around [`Token`] that compares for equality with its loc as well as its string.
508#[derive(Debug, Clone)]
509#[repr(transparent)]
510pub struct TokenIdentity(Token);
511
512impl PartialEq for TokenIdentity {
513    fn eq(&self, other: &Self) -> bool {
514        self.0.s == other.0.s && self.0.loc == other.0.loc
515    }
516}
517
518impl Eq for TokenIdentity {}
519
520impl Hash for TokenIdentity {
521    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
522        self.0.s.hash(state);
523        self.0.loc.hash(state);
524    }
525}
526
527impl TokenIdentity {
528    pub fn new(token: Token) -> Self {
529        TokenIdentity(token)
530    }
531
532    #[allow(dead_code)]
533    pub fn inner(&self) -> &Token {
534        &self.0
535    }
536
537    #[allow(dead_code)]
538    pub fn into_inner(self) -> Token {
539        self.0
540    }
541}