tiger_lib/
pdxfile.rs

1//! Helper functions for loading pdx script files in various character encodings.
2//!
3//! The main entry point is [`PdxFile`].
4
5#[cfg(feature = "ck3")]
6use std::fs::read;
7use std::fs::read_to_string;
8
9#[cfg(feature = "ck3")]
10use encoding_rs::{UTF_8, WINDOWS_1252};
11
12use crate::block::Block;
13use crate::fileset::FileEntry;
14use crate::parse::ParserMemory;
15use crate::parse::pdxfile::parse_pdx_file;
16#[cfg(feature = "ck3")]
17use crate::parse::pdxfile::{PdxfileMemory, parse_reader_export};
18use crate::report::{ErrorKey, err, warn};
19
20const BOM_UTF8_BYTES: &[u8] = b"\xef\xbb\xbf";
21const BOM_UTF8_LEN: usize = BOM_UTF8_BYTES.len();
22const BOM_CHAR: char = '\u{feff}';
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25pub enum PdxEncoding {
26    Utf8Bom,
27    #[cfg(feature = "jomini")]
28    Utf8OptionalBom,
29    #[cfg(feature = "ck3")]
30    Detect,
31    #[cfg(feature = "hoi4")]
32    Utf8NoBom,
33}
34
35pub struct PdxFile {}
36
37impl PdxFile {
38    /// Internal function to read a file in UTF-8 encoding.
39    fn read_utf8(entry: &FileEntry) -> Option<String> {
40        match read_to_string(entry.fullpath()) {
41            Ok(contents) => Some(contents),
42            Err(e) => {
43                let msg = "could not read file";
44                let info = &format!("{e:#}");
45                err(ErrorKey::ReadError).msg(msg).info(info).loc(entry).push();
46                None
47            }
48        }
49    }
50
51    /// Parse a UTF-8 file that should start with a BOM (Byte Order Marker).
52    pub fn read(entry: &FileEntry, parser: &ParserMemory) -> Option<Block> {
53        let contents = Self::read_utf8(entry)?;
54        if contents.starts_with(BOM_CHAR) {
55            Some(parse_pdx_file(entry, contents, BOM_UTF8_LEN, parser))
56        } else {
57            let msg = "Expected UTF-8 BOM encoding";
58            warn(ErrorKey::Encoding).msg(msg).abbreviated(entry).push();
59            Some(parse_pdx_file(entry, contents, 0, parser))
60        }
61    }
62
63    /// Parse a UTF-8 file that may must start with a BOM (Byte Order Marker).
64    #[cfg(feature = "hoi4")]
65    pub fn read_no_bom(entry: &FileEntry, parser: &ParserMemory) -> Option<Block> {
66        let contents = Self::read_utf8(entry)?;
67        if contents.starts_with(BOM_CHAR) {
68            let msg = "Expected UTF-8 encoding without BOM";
69            err(ErrorKey::Encoding).msg(msg).abbreviated(entry).push();
70            Some(parse_pdx_file(entry, contents, BOM_UTF8_LEN, parser))
71        } else {
72            Some(parse_pdx_file(entry, contents, 0, parser))
73        }
74    }
75
76    /// Parse a UTF-8 file that may optionally start with a BOM (Byte Order Marker).
77    pub fn read_optional_bom(entry: &FileEntry, parser: &ParserMemory) -> Option<Block> {
78        let contents = Self::read_utf8(entry)?;
79        if contents.starts_with(BOM_CHAR) {
80            Some(parse_pdx_file(entry, contents, BOM_UTF8_LEN, parser))
81        } else {
82            Some(parse_pdx_file(entry, contents, 0, parser))
83        }
84    }
85
86    /// Parse a file that may be in UTF-8 with BOM encoding, or Windows-1252 encoding.
87    #[cfg(feature = "ck3")]
88    pub fn read_detect_encoding(entry: &FileEntry, parser: &ParserMemory) -> Option<Block> {
89        let bytes = match read(entry.fullpath()) {
90            Ok(bytes) => bytes,
91            Err(e) => {
92                let msg = "could not read file";
93                let info = format!("{e:#}");
94                err(ErrorKey::ReadError).msg(msg).info(info).abbreviated(entry).push();
95                return None;
96            }
97        };
98        if bytes.starts_with(BOM_UTF8_BYTES) {
99            let (contents, errors) = UTF_8.decode_without_bom_handling(&bytes[BOM_UTF8_LEN..]);
100            if errors {
101                let msg = "could not decode UTF-8 file";
102                err(ErrorKey::Encoding).msg(msg).abbreviated(entry).push();
103                None
104            } else {
105                Some(parse_pdx_file(entry, contents.into_owned(), 0, parser))
106            }
107        } else {
108            let (contents, errors) = WINDOWS_1252.decode_without_bom_handling(&bytes);
109            if errors {
110                let msg = "could not decode WINDOWS-1252 file";
111                err(ErrorKey::Encoding).msg(msg).abbreviated(entry).push();
112                None
113            } else {
114                Some(parse_pdx_file(entry, contents.into_owned(), 0, parser))
115            }
116        }
117    }
118
119    pub fn read_encoded(
120        entry: &FileEntry,
121        encoding: PdxEncoding,
122        parser: &ParserMemory,
123    ) -> Option<Block> {
124        match encoding {
125            PdxEncoding::Utf8Bom => Self::read(entry, parser),
126            #[cfg(feature = "jomini")]
127            PdxEncoding::Utf8OptionalBom => Self::read_optional_bom(entry, parser),
128            #[cfg(feature = "ck3")]
129            PdxEncoding::Detect => Self::read_detect_encoding(entry, parser),
130            #[cfg(feature = "hoi4")]
131            PdxEncoding::Utf8NoBom => Self::read_no_bom(entry, parser),
132        }
133    }
134
135    #[cfg(feature = "ck3")]
136    pub fn reader_export(entry: &FileEntry, memory: &mut PdxfileMemory) {
137        if let Some(contents) = Self::read_utf8(entry) {
138            if contents.starts_with(BOM_CHAR) {
139                parse_reader_export(entry, contents, BOM_UTF8_LEN, memory);
140            } else {
141                let msg = "Expected UTF-8 BOM encoding";
142                warn(ErrorKey::Encoding).msg(msg).abbreviated(entry).push();
143                parse_reader_export(entry, contents, 0, memory);
144            }
145        }
146    }
147}