tiger_lib/data/
localization.rs

1//! Validate `.yml` localization files
2
3use std::borrow::Borrow;
4use std::cmp::Ordering;
5use std::collections::hash_map::Entry;
6use std::ffi::OsStr;
7use std::fs::read_to_string;
8#[cfg(any(feature = "ck3", feature = "vic3", feature = "imperator"))]
9use std::io::Cursor;
10use std::path::{Path, PathBuf};
11use std::sync::LazyLock;
12use std::sync::atomic::AtomicBool;
13use std::sync::atomic::Ordering::Relaxed;
14
15use bitvec::order::Lsb0;
16use bitvec::{BitArr, bitarr};
17#[cfg(any(feature = "ck3", feature = "vic3", feature = "imperator"))]
18use murmur3::murmur3_32;
19use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
20use rayon::scope;
21use strum::{EnumCount, IntoEnumIterator};
22use strum_macros::{Display, EnumCount, EnumIter, EnumString, FromRepr, IntoStaticStr};
23
24use crate::block::Block;
25#[cfg(feature = "ck3")]
26use crate::ck3::tables::localization::{BUILTIN_MACROS_CK3, COMPLEX_TOOLTIPS_CK3};
27use crate::context::ScopeContext;
28use crate::datacontext::DataContext;
29use crate::datatype::{CodeChain, Datatype, validate_datatypes};
30#[cfg(feature = "eu5")]
31use crate::eu5::tables::localization::BUILTIN_MACROS_EU5;
32use crate::everything::Everything;
33use crate::fileset::{FileEntry, FileHandler, FileKind};
34use crate::game::Game;
35#[cfg(any(feature = "ck3", feature = "vic3", feature = "imperator"))]
36use crate::helpers::TigerHashMapExt;
37use crate::helpers::{TigerHashMap, dup_error, stringify_list};
38#[cfg(feature = "hoi4")]
39use crate::hoi4::tables::localization::BUILTIN_MACROS_HOI4;
40#[cfg(feature = "imperator")]
41use crate::imperator::tables::localization::BUILTIN_MACROS_IMPERATOR;
42use crate::item::Item;
43use crate::macros::{MACRO_MAP, MacroMapIndex};
44use crate::parse::ParserMemory;
45use crate::parse::localization::{ValueParser, parse_loca};
46use crate::report::{ErrorKey, Severity, err, report, tips, warn};
47use crate::scopes::Scopes;
48use crate::token::Token;
49#[cfg(feature = "vic3")]
50use crate::vic3::tables::localization::BUILTIN_MACROS_VIC3;
51
52#[derive(Debug)]
53pub struct Languages([TigerHashMap<&'static str, LocaEntry>; Language::COUNT]);
54
55impl core::ops::Index<Language> for Languages {
56    type Output = TigerHashMap<&'static str, LocaEntry>;
57
58    fn index(&self, index: Language) -> &Self::Output {
59        &self.0[index.to_idx()]
60    }
61}
62
63impl core::ops::IndexMut<Language> for Languages {
64    fn index_mut(&mut self, index: Language) -> &mut Self::Output {
65        &mut self.0[index.to_idx()]
66    }
67}
68
69/// Database of all loaded localization keys and their values, for all supported languages.
70#[derive(Debug)]
71pub struct Localization {
72    /// Which languages to check, according to the config file.
73    check_langs: BitArr!(for Language::COUNT, in u16),
74    /// Which languages also actually exist in the mod.
75    /// This is used to not warn about missing loca when a mod doesn't have the language at all.
76    /// (This saves them the effort of configuring `check_langs`).
77    mod_langs: BitArr!(for Language::COUNT, in u16),
78    /// Database of all localizations, indexed first by language and then by localization key.
79    locas: Languages,
80}
81
82/// List of languages that are supported by the game engine.
83// LAST UPDATED CK3 VERSION 1.15.0
84// LAST UPDATED VIC3 VERSION 1.7.6
85#[derive(
86    Debug,
87    PartialEq,
88    Eq,
89    Clone,
90    Copy,
91    EnumString,
92    EnumCount,
93    EnumIter,
94    FromRepr,
95    IntoStaticStr,
96    Display,
97)]
98#[strum(serialize_all = "snake_case")]
99#[repr(u8)]
100pub enum Language {
101    English,
102    Spanish,
103    French,
104    German,
105    Russian,
106    #[cfg(any(feature = "ck3", feature = "vic3", feature = "eu5"))]
107    Korean,
108    SimpChinese,
109    #[cfg(any(feature = "vic3", feature = "hoi4", feature = "eu5"))]
110    BrazPor,
111    #[cfg(any(feature = "ck3", feature = "vic3", feature = "hoi4", feature = "eu5"))]
112    Japanese,
113    #[cfg(any(feature = "ck3", feature = "vic3", feature = "hoi4", feature = "eu5"))]
114    Polish,
115    #[cfg(any(feature = "vic3", feature = "eu5"))]
116    Turkish,
117}
118
119static L_LANGS: LazyLock<Box<[Box<str>]>> =
120    LazyLock::new(|| Language::iter().map(|l| format!("l_{l}").into_boxed_str()).collect());
121
122static LANG_LIST: LazyLock<Box<str>> = LazyLock::new(|| {
123    Language::iter().map(|l| l.to_string()).collect::<Vec<String>>().join(",").into_boxed_str()
124});
125
126impl Language {
127    fn from_idx(idx: usize) -> Self {
128        // SAFETY: This is safe to call assuming all indices were obtained from `to_idx`.
129        #[allow(clippy::cast_possible_truncation)]
130        Self::from_repr(idx as u8).unwrap()
131    }
132    fn to_idx(self) -> usize {
133        self as usize
134    }
135}
136
137/// List of known built-in keys used between `$...$` in any localization.
138/// This list is used to avoid reporting false positives.
139// TODO: maybe make the list more specific about which keys can contain which builtins
140fn is_builtin_macro<S: Borrow<str>>(s: S) -> bool {
141    let s = s.borrow();
142    match Game::game() {
143        #[cfg(feature = "ck3")]
144        Game::Ck3 => BUILTIN_MACROS_CK3.contains(&s),
145        #[cfg(feature = "vic3")]
146        Game::Vic3 => BUILTIN_MACROS_VIC3.contains(&s),
147        #[cfg(feature = "imperator")]
148        Game::Imperator => BUILTIN_MACROS_IMPERATOR.contains(&s),
149        #[cfg(feature = "eu5")]
150        Game::Eu5 => BUILTIN_MACROS_EU5.contains(&s),
151        #[cfg(feature = "hoi4")]
152        Game::Hoi4 => BUILTIN_MACROS_HOI4.contains(&s),
153    }
154}
155
156/// One parsed key: value line from the localization values.
157#[derive(Debug)]
158pub struct LocaEntry {
159    key: Token,
160    value: LocaValue,
161    /// The original unparsed value, with enclosing `"` stripped.
162    /// This is used for macro replacement.
163    orig: Option<Token>,
164    /// Whether this entry has been "used" (looked up) by anything in the mod
165    used: AtomicBool,
166    /// Whether this entry has been validated with a `ScopeContext`
167    validated: AtomicBool,
168}
169
170impl PartialEq for LocaEntry {
171    fn eq(&self, other: &LocaEntry) -> bool {
172        self.key.loc == other.key.loc
173    }
174}
175
176impl Eq for LocaEntry {}
177
178impl PartialOrd for LocaEntry {
179    fn partial_cmp(&self, other: &LocaEntry) -> Option<Ordering> {
180        Some(self.cmp(other))
181    }
182}
183
184impl Ord for LocaEntry {
185    fn cmp(&self, other: &LocaEntry) -> Ordering {
186        self.key.loc.cmp(&other.key.loc)
187    }
188}
189
190impl LocaEntry {
191    pub fn new(key: Token, value: LocaValue, orig: Option<Token>) -> Self {
192        Self { key, value, orig, used: AtomicBool::new(false), validated: AtomicBool::new(false) }
193    }
194
195    // returns false to abort expansion in case of an error
196    fn expand_macros<'a>(
197        &'a self,
198        vec: &mut Vec<Token>,
199        from: &'a TigerHashMap<&'a str, LocaEntry>,
200        count: &mut usize,
201        sc: &mut ScopeContext,
202        link: Option<MacroMapIndex>,
203        data: &Everything,
204    ) -> bool {
205        // Are we (probably) stuck in a macro loop?
206        if *count > 250 {
207            return false;
208        }
209        *count += 1;
210
211        if let LocaValue::Macro(v) = &self.value {
212            for macrovalue in v {
213                match macrovalue {
214                    MacroValue::Text(token) => vec.push(token.clone().linked(link)),
215                    MacroValue::Keyword(keyword) => {
216                        if let Some(entry) = from.get(keyword.as_str()) {
217                            entry.used.store(true, Relaxed);
218                            entry.validated.store(true, Relaxed);
219                            if !entry.expand_macros(
220                                vec,
221                                from,
222                                count,
223                                sc,
224                                Some(MACRO_MAP.get_or_insert_loc(keyword.loc)),
225                                data,
226                            ) {
227                                return false;
228                            }
229                        } else if is_builtin_macro(keyword) {
230                            // we can't know what value it really has, so replace it with itself to
231                            // at least get comprehensible error messages
232                            vec.push(keyword.clone().linked(link));
233                        } else if let Some(scopes) = sc.is_name_defined(keyword.as_str(), data) {
234                            if scopes.contains(Scopes::Value) {
235                                // same as above... we can't know what value it really has
236                                vec.push(keyword.clone().linked(link));
237                            } else {
238                                let msg = &format!(
239                                    "The substitution parameter ${keyword}$ is not defined anywhere as a key."
240                                );
241                                warn(ErrorKey::Localization).msg(msg).loc(keyword).push();
242                            }
243                        } else {
244                            let msg = &format!(
245                                "The substitution parameter ${keyword}$ is not defined anywhere as a key."
246                            );
247                            warn(ErrorKey::Localization).msg(msg).loc(keyword).push();
248                        }
249                    }
250                }
251            }
252            true
253        } else if let Some(orig) = &self.orig {
254            vec.push(orig.clone().linked(link));
255            true
256        } else {
257            false
258        }
259    }
260}
261
262#[derive(Clone, Debug, Default)]
263pub enum LocaValue {
264    // If the LocaValue is a Macro type, then it should be re-parsed after the macro values
265    // have been filled in. Some macro values are supplied at runtime and we'll have to guess
266    // at those.
267    Macro(Vec<MacroValue>),
268    Concat(Vec<LocaValue>),
269    #[allow(dead_code)] // the Token is only used for ck3
270    Text(Token),
271    Markup,
272    MarkupEnd,
273    Tooltip(Token),
274    // Tag, key, value. Tag can influence how tooltip is looked up. If tag is `GAME_TRAIT`,
275    // tooltip is a trait name and value is a character id. Any of the tokens may be a datatype
276    // expression, which is passed through unparsed here.
277    // The value is not stored in the enum because we don't validate it.
278    // TODO: instead of Token here, maybe need Box<LocaValue> or a Vec<LocaValue>, or maybe a type
279    // that's specifically "Token or CodeChain"
280    ComplexTooltip(Box<Token>, Token),
281    // The optional token is the formatting
282    Code(CodeChain, Option<Token>),
283    Icon(Token),
284    // An Icon with an [ ] expression inside it
285    CalculatedIcon(Vec<LocaValue>),
286    Flag(Token),
287    #[default]
288    Error,
289}
290
291#[derive(Clone, Debug)]
292pub enum MacroValue {
293    Text(Token),
294    // The formatting is not stored in the enum because it's not validated.
295    Keyword(Token),
296}
297
298fn get_file_lang(filename: &OsStr) -> Option<Language> {
299    // Deliberate discrepancy here between the check and the error msg below.
300    // `l_{}` anywhere in the filename works, but `_l_{}.yml` is still recommended.
301    //
302    // Using to_string_lossy is ok here because non-unicode sequences will
303    // never match the suffix anyway.
304    let filename = filename.to_string_lossy();
305    L_LANGS.iter().position(|l| filename.contains(l.as_ref())).map(Language::from_idx)
306}
307
308impl Localization {
309    fn iter_lang(&self) -> impl Iterator<Item = Language> {
310        Language::iter().filter(|i| self.mod_langs[i.to_idx()])
311    }
312
313    pub fn exists(&self, key: &str) -> bool {
314        for lang in self.iter_lang() {
315            if !self.locas[lang].contains_key(key) {
316                return false;
317            }
318        }
319        true
320    }
321
322    // Undocumented; the hash algorithm was revealed by inspecting error.log and reverse
323    // engineering of CK3 binary through magic numbers. CK3 and VIC3 are supported.
324    #[cfg(any(feature = "ck3", feature = "vic3", feature = "imperator"))]
325    fn all_collision_keys(&self, lang: Language) -> TigerHashMap<u32, Vec<&LocaEntry>> {
326        let loca_hashes: Vec<_> = self.locas[lang]
327            .par_iter()
328            .map(|(_, loca)| (loca, murmur3_32(&mut Cursor::new(loca.key.as_str()), 0).unwrap()))
329            .collect();
330        let mut result: TigerHashMap<u32, Vec<&LocaEntry>> =
331            TigerHashMap::with_capacity(loca_hashes.len());
332
333        for (l, h) in loca_hashes {
334            result.entry(h).or_default().push(l);
335        }
336        result.retain(|_, locas| locas.len() > 1);
337        result
338    }
339
340    pub fn iter_keys(&self) -> impl Iterator<Item = &Token> {
341        self.iter_lang()
342            .map(|i| &self.locas[i])
343            .flat_map(|hash| hash.values().map(|item| &item.key))
344    }
345
346    pub fn verify_exists_implied(&self, key: &str, token: &Token, max_sev: Severity) {
347        if key.is_empty() {
348            return;
349        }
350        let langs_missing = self.mark_used_return_missing(key);
351        if !langs_missing.is_empty() {
352            let msg = format!("missing {} localization key {key}", stringify_list(&langs_missing));
353            // TODO: get confidence level from caller
354            report(ErrorKey::MissingLocalization, Item::Localization.severity().at_most(max_sev))
355                .msg(msg)
356                .loc(token)
357                .push();
358        }
359    }
360
361    #[cfg(feature = "ck3")]
362    pub fn verify_name_exists(&self, name: &Token, max_sev: Severity) {
363        if name.as_str().is_empty() {
364            report(ErrorKey::MissingLocalization, Severity::Warning.at_most(max_sev))
365                .msg("empty name")
366                .loc(name)
367                .push();
368            return;
369        }
370
371        let langs_missing = self.mark_used_return_missing(name.as_str());
372        if !langs_missing.is_empty() {
373            // It's merely untidy if the name is only missing in latin-script languages and the
374            // name doesn't have indicators that it really needs to be localized (such as underscores
375            // or extra uppercase letters). In all other cases it's a warning.
376            //
377            // TODO: this logic assumes the input name is in English and it doesn't consider for example
378            // a Russian mod that only supports Russian localization and has names in Cyrillic.
379            let sev = if only_latin_script(&langs_missing)
380                && !name.as_str().contains('_')
381                && normal_capitalization_for_name(name.as_str())
382            {
383                Severity::Untidy
384            } else {
385                Severity::Warning
386            };
387
388            let msg =
389                format!("missing {} localization for name {name}", stringify_list(&langs_missing));
390            report(ErrorKey::MissingLocalization, sev.at_most(max_sev))
391                .strong()
392                .msg(msg)
393                .loc(name)
394                .push();
395        }
396    }
397
398    #[allow(dead_code)]
399    pub fn exists_lang(&self, key: &str, lang: Language) -> bool {
400        if !self.locas[lang].contains_key(key) {
401            return false;
402        }
403        true
404    }
405
406    pub fn verify_exists_lang(&self, token: &Token, lang: Option<Language>) {
407        self.verify_exists_implied_lang(token.as_str(), token, lang);
408    }
409
410    pub fn verify_exists_implied_lang(&self, key: &str, token: &Token, lang: Option<Language>) {
411        if key.is_empty() {
412            return;
413        }
414        if let Some(lang) = lang {
415            if !self.mark_used_lang_return_exists(key, lang) {
416                let msg = format!("missing {lang} localization key {key}");
417                // TODO: get confidence level from caller
418                warn(ErrorKey::MissingLocalization).msg(msg).loc(token).push();
419            }
420        } else {
421            self.verify_exists_implied(key, token, Severity::Warning);
422        }
423    }
424
425    /// Marks a localization key as used for all languages.
426    /// Returns whether the key exists for any language (same as [`Localization::exists`]).
427    pub fn mark_used_return_exists(&self, key: &str) -> bool {
428        let mut exists = false;
429        for lang in self.iter_lang() {
430            exists |= self.mark_used_lang_return_exists(key, lang);
431        }
432        exists
433    }
434
435    /// Marks a localization key as used for all languages.
436    /// Returns a [`Vec<&str>`] containing the languages for which the key does not exist.
437    fn mark_used_return_missing(&self, key: &str) -> Vec<&'static str> {
438        let mut langs_missing = Vec::new();
439        for lang in self.iter_lang() {
440            if !self.mark_used_lang_return_exists(key, lang) {
441                langs_missing.push(lang.into());
442            }
443        }
444        langs_missing
445    }
446
447    /// Marks a localization key as used for one language.
448    /// Returns whether the key exists for this language (same as [`Localization::exists_lang`]).
449    fn mark_used_lang_return_exists(&self, key: &str, lang: Language) -> bool {
450        if let Some(entry) = self.locas[lang].get(key) {
451            entry.used.store(true, Relaxed);
452            return true;
453        }
454        false
455    }
456
457    #[allow(dead_code)]
458    pub fn suggest(&self, key: &str, token: &Token) {
459        if key.is_empty() {
460            return;
461        }
462        let langs_missing = self.mark_used_return_missing(key);
463        // They're all missing
464        if langs_missing.len() == self.iter_lang().count() {
465            let msg = format!("you can define localization `{key}`");
466            tips(ErrorKey::SuggestLocalization).msg(msg).loc(token).push();
467        }
468        // The loca is defined for some languages but not others.
469        // This inconsistency is worth warning about.
470        else if !langs_missing.is_empty() {
471            let msg = format!("missing {} localization key {key}", stringify_list(&langs_missing));
472            report(ErrorKey::MissingLocalization, Item::Localization.severity())
473                .msg(msg)
474                .loc(token)
475                .push();
476        }
477    }
478
479    /// Return whether any language uses the given macro in its loca entry for this key.
480    /// Only a macro at the top level of this entry counts; ones hidden recursively in
481    /// other macros do not.
482    #[allow(dead_code)]
483    pub fn uses_macro(&self, key: &str, look_for: &str) -> bool {
484        let look_for = format!("${look_for}$");
485        for lang in self.iter_lang() {
486            if let Some(entry) = self.locas[lang].get(key) {
487                if let Some(orig) = &entry.orig {
488                    if orig.as_str().contains(&look_for) {
489                        return true;
490                    }
491                }
492            }
493        }
494        false
495    }
496
497    // Does every `[concept|E]` reference have a defined game concept?
498    // Does every other `[code]` block have valid promotes and functions?
499    // Does every $key$ in a macro have a corresponding loca key or named scope?
500    fn check_loca_code(
501        value: &LocaValue,
502        data: &Everything,
503        sc: &mut ScopeContext,
504        lang: Language,
505    ) {
506        match value {
507            LocaValue::Concat(v) | LocaValue::CalculatedIcon(v) => {
508                for value in v {
509                    Self::check_loca_code(value, data, sc, lang);
510                }
511            }
512            // TODO: validate the formatting codes
513            LocaValue::Code(chain, format) => {
514                // |E is the formatting used for game concepts in ck3
515                #[cfg(feature = "ck3")]
516                if Game::is_ck3() {
517                    if let Some(format) = format {
518                        if format.as_str().contains('E') || format.as_str().contains('e') {
519                            if let Some(name) = chain.as_gameconcept() {
520                                if !is_builtin_macro(name) {
521                                    data.verify_exists(Item::GameConcept, name);
522                                }
523                                return;
524                            }
525                        }
526                    }
527                }
528
529                // TODO: datatype is not really Unknown here, it should be a CString or CFixedPoint or some kind of number.
530                // But we can't express that yet.
531                validate_datatypes(
532                    chain,
533                    data,
534                    sc,
535                    &DataContext::new(),
536                    Datatype::Unknown,
537                    Some(lang),
538                    format.as_ref(),
539                    false,
540                );
541            }
542            LocaValue::Tooltip(token) => {
543                // TODO: should this be validated with validate_localization_sc ? (remember to avoid infinite loops)
544                if !(Game::is_vic3() && token.is("BREAKDOWN_TAG")) {
545                    data.localization.verify_exists_lang(token, Some(lang));
546                }
547            }
548            #[allow(unused_variables)] // tag only used by ck3
549            LocaValue::ComplexTooltip(tag, token) => {
550                // TODO: if any of the three are datatype expressions, validate them.
551                #[cfg(feature = "ck3")]
552                if Game::is_ck3() && !token.starts_with("[") && !is_builtin_macro(token) {
553                    match COMPLEX_TOOLTIPS_CK3.get(&*tag.as_str().to_lowercase()).copied() {
554                        None => {
555                            // TODO: should this be validated with validate_localization_sc ? (remember to avoid infinite loops)
556                            data.localization.verify_exists_lang(token, Some(lang));
557                        }
558                        Some(None) => (), // token is a runtime id
559                        Some(Some(itype)) => data.verify_exists(itype, token),
560                    }
561                }
562                #[cfg(feature = "vic3")]
563                if Game::is_vic3() && !token.starts_with("[") && !is_builtin_macro(token) {
564                    data.localization.verify_exists_lang(token, Some(lang));
565                }
566                // TODO: - imperator -
567            }
568            LocaValue::Icon(token) => {
569                if !is_builtin_macro(token) && !token.is("ICONKEY_icon") && !token.is("KEY_icon") {
570                    data.verify_exists(Item::TextIcon, token);
571                }
572            }
573            #[allow(unused_variables)]
574            LocaValue::Flag(token) => {
575                // TODO: Instead of this awkward 'contains TAG' heuristic, mark macros in the text
576                // somehow.
577                #[cfg(feature = "hoi4")]
578                if !is_builtin_macro(token) && !token.as_str().contains("TAG") {
579                    data.verify_exists(Item::CountryTag, token);
580                    let pathname = format!("gfx/flags/{token}.tga");
581                    data.verify_exists_implied(Item::File, &pathname, token);
582                }
583            }
584            _ => (),
585        }
586    }
587
588    #[cfg(feature = "ck3")]
589    pub fn verify_key_has_options(&self, loca: &str, key: &Token, n: i64, prefix: &str) {
590        for lang in self.iter_lang() {
591            if let Some(entry) = self.locas[lang].get(loca) {
592                if let Some(ref orig) = entry.orig {
593                    for i in 1..=n {
594                        let find = format!("${prefix}{i}$");
595                        let find2 = format!("${prefix}{i}|");
596                        if !orig.as_str().contains(&find) && !orig.as_str().contains(&find2) {
597                            warn(ErrorKey::Validation)
598                                .msg(format!("localization is missing {find}"))
599                                .loc(key)
600                                .loc_msg(&entry.key, "here")
601                                .push();
602                        }
603                    }
604                    let find = format!("${prefix}{}$", n + 1);
605                    let find2 = format!("${prefix}{}|", n + 1);
606                    if orig.as_str().contains(&find) && !orig.as_str().contains(&find2) {
607                        warn(ErrorKey::Validation)
608                            .msg("localization has too many options")
609                            .loc(key)
610                            .loc_msg(&entry.key, "here")
611                            .push();
612                    }
613                } else if n > 0 {
614                    let msg = format!("localization is missing ${prefix}1$");
615                    warn(ErrorKey::Validation).msg(msg).loc(key).loc_msg(&entry.key, "here").push();
616                }
617            }
618        }
619    }
620
621    fn validate_loca<'b>(
622        entry: &LocaEntry,
623        from: &'b TigerHashMap<&'b str, LocaEntry>,
624        data: &Everything,
625        sc: &mut ScopeContext,
626        lang: Language,
627    ) {
628        if matches!(entry.value, LocaValue::Macro(_)) {
629            let mut new_line = Vec::new();
630            let mut count = 0;
631            if entry.expand_macros(&mut new_line, from, &mut count, sc, None, data) {
632                // re-parse after macro expansion
633                let new_line_as_ref = new_line.iter().collect();
634                let value = ValueParser::new(new_line_as_ref).parse();
635                Self::check_loca_code(&value, data, sc, lang);
636            }
637        } else {
638            Self::check_loca_code(&entry.value, data, sc, lang);
639        }
640    }
641
642    pub fn validate_use(&self, key: &str, data: &Everything, sc: &mut ScopeContext) {
643        for lang in self.iter_lang() {
644            let loca = &self.locas[lang];
645            if let Some(entry) = loca.get(key) {
646                entry.used.store(true, Relaxed);
647                entry.validated.store(true, Relaxed);
648                Self::validate_loca(entry, loca, data, sc, lang);
649            }
650        }
651    }
652
653    #[cfg(any(feature = "ck3", feature = "vic3", feature = "imperator"))]
654    fn check_collisions(&self, lang: Language) {
655        for (k, v) in self.all_collision_keys(lang) {
656            let mut rep = report(ErrorKey::LocalizationKeyCollision, Severity::Error)
657                .strong()
658                .msg(format!(
659                    "localization keys '{}' have same MURMUR3A hash '0x{k:08X}'",
660                    stringify_list(&v.iter().map(|loca| loca.key.as_str()).collect::<Vec<&str>>())
661                ))
662                .info("localization keys hash collision will cause some of them fail to load")
663                .loc(&v[0].key);
664            for loc in v.iter().skip(1) {
665                rep = rep.loc_msg(&loc.key, "here");
666            }
667            rep.push();
668        }
669    }
670
671    // This is in pass2 to make sure all `validated` entries have been marked.
672    pub fn validate_pass2(&self, data: &Everything) {
673        #[allow(unused_variables)]
674        scope(|s| {
675            for lang in self.iter_lang() {
676                let loca = &self.locas[lang];
677                // Check localization key collisions
678                #[cfg(any(feature = "ck3", feature = "vic3", feature = "imperator"))]
679                s.spawn(move |_| self.check_collisions(lang));
680
681                // Collect and sort the entries before looping, to create more stable output
682                let mut unvalidated_entries: Vec<&LocaEntry> =
683                    loca.values().filter(|e| !e.validated.load(Relaxed)).collect();
684                unvalidated_entries.sort_unstable();
685                unvalidated_entries.par_iter().for_each(|entry| {
686                    // Technically we can now store true in entry.validated,
687                    // but the value is not needed anymore after this.
688                    let mut sc = ScopeContext::new_unrooted(Scopes::all(), &entry.key);
689                    sc.set_strict_scopes(false);
690                    Self::validate_loca(entry, loca, data, &mut sc, lang);
691                });
692            }
693        });
694    }
695
696    pub fn mark_category_used(&self, prefix: &str) {
697        let mut i = 0;
698        loop {
699            let loca = format!("{prefix}{i}");
700            if !self.mark_used_return_exists(&loca) {
701                break;
702            }
703            i += 1;
704        }
705    }
706
707    pub fn check_unused(&self, _data: &Everything) {
708        self.mark_category_used("LOADING_TIP_");
709        self.mark_category_used("HYBRID_NAME_FORMAT_");
710        self.mark_category_used("DIVERGE_NAME_FORMAT_");
711
712        for lang in self.iter_lang() {
713            let mut vec = Vec::new();
714            for entry in self.locas[lang].values() {
715                if !entry.used.load(Relaxed) {
716                    vec.push(entry);
717                }
718            }
719            vec.sort_unstable_by_key(|entry| &entry.key.loc);
720            for entry in vec {
721                report(ErrorKey::UnusedLocalization, Severity::Untidy)
722                    .msg("Unused localization")
723                    .abbreviated(&entry.key)
724                    .push();
725            }
726        }
727    }
728
729    #[cfg(feature = "ck3")]
730    pub fn check_pod_loca(&self, data: &Everything) {
731        for lang in self.iter_lang() {
732            for key in data.database.iter_keys(Item::PerkTree) {
733                let loca = format!("{key}_name");
734                if let Some(entry) = self.locas[lang].get(loca.as_str()) {
735                    if let LocaValue::Text(token) = &entry.value {
736                        if token.as_str().ends_with("_visible") {
737                            data.verify_exists(Item::ScriptedGui, token);
738                            data.verify_exists(Item::Localization, token);
739                        }
740                        continue;
741                    }
742                }
743                let msg = format!("missing loca `{key}_name: \"{key}_visible\"`");
744                let info = "this is needed for the `window_character_lifestyle.gui` code";
745                err(ErrorKey::PrincesOfDarkness).msg(msg).info(info).loc(key).push();
746            }
747        }
748    }
749}
750
751impl FileHandler<(Language, Vec<LocaEntry>)> for Localization {
752    fn config(&mut self, config: &Block) {
753        if let Some(block) = config.get_field_block("languages") {
754            // By default, self.check_langs is all true.
755            // If a languages block exists in the config, then check_langs
756            // should contain only the configured languages, so langs is
757            // initialized to all false here.
758            let mut langs = bitarr![u16, Lsb0; 0; Language::COUNT];
759
760            // TODO: warn if there are unknown languages in check or skip?
761            let check = block.get_field_values("check");
762            let skip = block.get_field_values("skip");
763
764            // If check is used, then check only those languages.
765            // If instead skip is used, then check all languages except the skipped ones.
766            for lang in Language::iter() {
767                let lang_str = lang.into();
768                if check.iter().any(|t| t.is(lang_str))
769                    || (check.is_empty() && skip.iter().all(|t| !t.is(lang_str)))
770                {
771                    langs.set(lang.to_idx(), true);
772                }
773            }
774            self.check_langs = langs;
775        }
776    }
777
778    fn subpath(&self) -> PathBuf {
779        if Game::is_hoi4() { PathBuf::from("localisation") } else { PathBuf::from("localization") }
780    }
781
782    fn load_file(
783        &self,
784        entry: &FileEntry,
785        _parser: &ParserMemory,
786    ) -> Option<(Language, Vec<LocaEntry>)> {
787        if !entry.filename().to_string_lossy().ends_with(".yml") {
788            return None;
789        }
790
791        // unwrap is safe here because we're only handed files under localization/
792        // to_string_lossy is ok because we compare lang against a set of known strings.
793        let lang_str = entry.path().components().nth(1).unwrap().as_os_str().to_string_lossy();
794
795        // special case for this file
796        if lang_str == "languages.yml" {
797            return None;
798        }
799
800        if let Some(filelang) = get_file_lang(entry.filename()) {
801            if !self.check_langs[filelang.to_idx()] {
802                return None;
803            }
804            // Localization files don't have to be in a subdirectory corresponding to their language.
805            // However, if there's one in a subdirectory for a *different* language than the one in its name,
806            // then something is probably wrong.
807            if let Ok(lang) = Language::try_from(lang_str.as_ref()) {
808                if filelang != lang {
809                    let msg = "localization file with wrong name or in wrong directory";
810                    let info = "A localization file should be in a subdirectory corresponding to its language.";
811                    warn(ErrorKey::Filename).msg(msg).info(info).loc(entry).push();
812                }
813            }
814            match read_to_string(entry.fullpath()) {
815                Ok(content) => {
816                    return Some((filelang, parse_loca(entry, content, filelang).collect()));
817                }
818                Err(e) => {
819                    let msg = "could not read file";
820                    let info = &format!("{e:#}");
821                    err(ErrorKey::ReadError).msg(msg).info(info).loc(entry).push();
822                }
823            }
824        } else if entry.kind() >= FileKind::Vanilla {
825            // Check for `FileKind::Vanilla` because Jomini and Clausewitz support more languages
826            let msg = "could not determine language from filename";
827            let info = format!(
828                "Localization filenames should end in _l_language.yml, where language is one of {}",
829                *LANG_LIST
830            );
831            err(ErrorKey::Filename).msg(msg).info(info).loc(entry).push();
832        }
833        None
834    }
835
836    fn handle_file(&mut self, entry: &FileEntry, loaded: (Language, Vec<LocaEntry>)) {
837        let (filelang, vec) = loaded;
838        let hash = &mut self.locas[filelang];
839        if hash.is_empty() {
840            // empirically ~290k for each lang of ck3
841            hash.reserve(300_000);
842        }
843
844        if entry.kind() == FileKind::Mod {
845            self.mod_langs.set(filelang.to_idx(), true);
846        }
847
848        for loca in vec {
849            match hash.entry(loca.key.as_str()) {
850                Entry::Occupied(mut occupied_entry) => {
851                    let other = occupied_entry.get();
852                    // other.key and loca.key are in the other order than usual here,
853                    // because in loca the older definition overrides the later one.
854                    if is_replace_path(entry.path()) {
855                        occupied_entry.insert(loca);
856                    } else if other.key.loc.kind == entry.kind() && other.orig != loca.orig {
857                        dup_error(&other.key, &loca.key, "localization");
858                    }
859                }
860                Entry::Vacant(vacant_entry) => {
861                    vacant_entry.insert(loca);
862                }
863            }
864        }
865    }
866}
867
868impl Default for Localization {
869    fn default() -> Self {
870        Localization {
871            check_langs: bitarr![u16, Lsb0; 1; Language::COUNT],
872            mod_langs: bitarr![u16, Lsb0; 0; Language::COUNT],
873            locas: Languages(std::array::from_fn(|_| TigerHashMap::default())),
874        }
875    }
876}
877
878/// It's been tested that localization/replace/english and localization/english/replace both work
879fn is_replace_path(path: &Path) -> bool {
880    for element in path {
881        if element.to_string_lossy() == "replace" {
882            return true;
883        }
884    }
885    false
886}
887
888/// These are the languages in which it's reasonable to present an ascii name unchanged.
889#[cfg(feature = "ck3")]
890const LATIN_SCRIPT_LANGS: &[&str] =
891    &["english", "french", "german", "spanish", "braz_por", "polish", "turkish"];
892
893/// Return true iff `langs` only contains languages in which it's reasonable to present an ascii
894/// name unchanged.
895#[cfg(feature = "ck3")]
896fn only_latin_script(langs: &[&str]) -> bool {
897    langs.iter().all(|lang| LATIN_SCRIPT_LANGS.contains(lang))
898}
899
900/// Check that the string only has capital letters at the start or after a space or hyphen
901#[cfg(feature = "ck3")]
902fn normal_capitalization_for_name(name: &str) -> bool {
903    let mut expect_cap = true;
904    for ch in name.chars() {
905        if ch.is_uppercase() && !expect_cap {
906            return false;
907        }
908        expect_cap = ch == ' ' || ch == '-';
909    }
910    true
911}
912
913#[cfg(all(test, feature = "ck3"))]
914mod tests {
915    use super::*;
916    use crate::fileset::{FileKind, FileStage};
917    use crate::token::{Loc, Token};
918    use std::path::PathBuf;
919
920    #[test]
921    fn test_only_latin_script() {
922        let mut langs = vec!["english", "french", "german"];
923        assert!(only_latin_script(&langs));
924        langs.push("korean");
925        assert!(!only_latin_script(&langs));
926        langs.clear();
927        assert!(only_latin_script(&langs));
928    }
929
930    #[test]
931    fn test_normal_capitalization_for_name() {
932        assert!(normal_capitalization_for_name("George"));
933        assert!(normal_capitalization_for_name("george"));
934        assert!(!normal_capitalization_for_name("BjOrn"));
935        assert!(normal_capitalization_for_name("Jean-Claude"));
936        assert!(normal_capitalization_for_name("Abu-l-Fadl al-Malik"));
937        assert!(normal_capitalization_for_name("Abu Abdallah Muhammad"));
938        assert!(!normal_capitalization_for_name("AbuAbdallahMuhammad"));
939    }
940
941    #[test]
942    fn test_collision_detection() {
943        // build a localization database containing known colliding keys
944        let mut loc = Localization::default();
945        let lang = Language::English;
946        // dummy location for tokens
947        let dummy_loc =
948            Loc::for_file(PathBuf::new(), FileStage::NoStage, FileKind::Mod, PathBuf::new());
949
950        let pairs = [
951            // CK3 examples
952            ("Mallobald", "laamp_base_contract_schemes.2541.e.tt.employer_has_trait.paranoid"),
953            ("dynn_Hkeng", "debug_min_popular_opinion_modifier"),
954            ("b_hinggan_adj", "grand_wedding_completed_guest"),
955            // Imperator examples
956            ("carthage_mission_trade_metropolis_west", "me_diadochi_empire_events.316.at"),
957            ("Azdumani", "me_patauion_02.43.b_tt"),
958            ("PROV7234_hellenic", "me_kush_15_desc"),
959        ];
960
961        for &(k1, k2) in &pairs {
962            let t1 = Token::from_static_str(k1, dummy_loc);
963            let t2 = Token::from_static_str(k2, dummy_loc);
964            let e1 = LocaEntry::new(t1.clone(), LocaValue::Text(t1.clone()), None);
965            let e2 = LocaEntry::new(t2.clone(), LocaValue::Text(t2.clone()), None);
966            loc.locas[lang].insert(k1, e1);
967            loc.locas[lang].insert(k2, e2);
968        }
969
970        let collisions = loc.all_collision_keys(lang);
971        for &(k1, k2) in &pairs {
972            assert!(
973                collisions.values().any(|vec| {
974                    vec.iter().any(|e| e.key.as_str() == k1)
975                        && vec.iter().any(|e| e.key.as_str() == k2)
976                }),
977                "expected collision between {k1} and {k2}"
978            );
979        }
980    }
981}