Skip to main content

tiger_lib/data/
localization.rs

1//! Validate `.yml` localization files
2
3use std::borrow::Borrow;
4use std::cmp::Ordering;
5use std::collections::hash_map::Entry;
6use std::ffi::OsStr;
7use std::fs::read_to_string;
8#[cfg(any(feature = "ck3", feature = "vic3", feature = "imperator"))]
9use std::io::Cursor;
10use std::path::{Path, PathBuf};
11use std::sync::LazyLock;
12use std::sync::atomic::AtomicBool;
13use std::sync::atomic::Ordering::Relaxed;
14
15use bitvec::order::Lsb0;
16use bitvec::{BitArr, bitarr};
17#[cfg(any(feature = "ck3", feature = "vic3", feature = "imperator"))]
18use murmur3::murmur3_32;
19use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
20use rayon::scope;
21use strum::{EnumCount, IntoEnumIterator};
22use strum_macros::{Display, EnumCount, EnumIter, EnumString, FromRepr, IntoStaticStr};
23
24use crate::block::Block;
25#[cfg(feature = "ck3")]
26use crate::ck3::tables::localization::{BUILTIN_MACROS_CK3, COMPLEX_TOOLTIPS_CK3};
27use crate::context::ScopeContext;
28use crate::datacontext::DataContext;
29use crate::datatype::{CodeChain, Datatype, validate_datatypes};
30#[cfg(feature = "eu5")]
31use crate::eu5::tables::localization::BUILTIN_MACROS_EU5;
32use crate::everything::Everything;
33use crate::fileset::{FileEntry, FileHandler, FileKind};
34use crate::game::Game;
35#[cfg(any(feature = "ck3", feature = "vic3", feature = "imperator"))]
36use crate::helpers::TigerHashMapExt;
37use crate::helpers::{TigerHashMap, dup_error, stringify_list};
38#[cfg(feature = "hoi4")]
39use crate::hoi4::tables::localization::BUILTIN_MACROS_HOI4;
40#[cfg(feature = "imperator")]
41use crate::imperator::tables::localization::BUILTIN_MACROS_IMPERATOR;
42use crate::item::{Item, ItemExt};
43use crate::macros::{MACRO_MAP, MacroMapIndex};
44use crate::parse::ParserMemory;
45use crate::parse::localization::{ValueParser, parse_loca};
46use crate::report::{ErrorKey, Severity, err, report, tips, warn};
47use crate::scopes::Scopes;
48use crate::token::Token;
49#[cfg(feature = "vic3")]
50use crate::vic3::tables::localization::BUILTIN_MACROS_VIC3;
51
52#[derive(Debug)]
53pub struct Languages([TigerHashMap<&'static str, LocaEntry>; Language::COUNT]);
54
55impl core::ops::Index<Language> for Languages {
56    type Output = TigerHashMap<&'static str, LocaEntry>;
57
58    fn index(&self, index: Language) -> &Self::Output {
59        &self.0[index.to_idx()]
60    }
61}
62
63impl core::ops::IndexMut<Language> for Languages {
64    fn index_mut(&mut self, index: Language) -> &mut Self::Output {
65        &mut self.0[index.to_idx()]
66    }
67}
68
69/// Database of all loaded localization keys and their values, for all supported languages.
70#[derive(Debug)]
71pub struct Localization {
72    /// Which languages to check, according to the config file.
73    check_langs: BitArr!(for Language::COUNT, in u16),
74    /// Which languages also actually exist in the mod.
75    /// This is used to not warn about missing loca when a mod doesn't have the language at all.
76    /// (This saves them the effort of configuring `check_langs`).
77    mod_langs: BitArr!(for Language::COUNT, in u16),
78    /// Database of all localizations, indexed first by language and then by localization key.
79    locas: Languages,
80}
81
82/// List of languages that are supported by the game engine.
83// LAST UPDATED CK3 VERSION 1.15.0
84// LAST UPDATED VIC3 VERSION 1.7.6
85#[derive(
86    Debug,
87    PartialEq,
88    Eq,
89    Clone,
90    Copy,
91    EnumString,
92    EnumCount,
93    EnumIter,
94    FromRepr,
95    IntoStaticStr,
96    Display,
97)]
98#[strum(serialize_all = "snake_case")]
99#[repr(u8)]
100pub enum Language {
101    English,
102    Spanish,
103    French,
104    German,
105    Russian,
106    #[cfg(any(feature = "ck3", feature = "vic3", feature = "eu5"))]
107    Korean,
108    SimpChinese,
109    #[cfg(any(feature = "vic3", feature = "hoi4", feature = "eu5"))]
110    BrazPor,
111    #[cfg(any(feature = "ck3", feature = "vic3", feature = "hoi4", feature = "eu5"))]
112    Japanese,
113    #[cfg(any(feature = "ck3", feature = "vic3", feature = "hoi4", feature = "eu5"))]
114    Polish,
115    #[cfg(any(feature = "vic3", feature = "eu5"))]
116    Turkish,
117}
118
119static L_LANGS: LazyLock<Box<[Box<str>]>> =
120    LazyLock::new(|| Language::iter().map(|l| format!("l_{l}").into_boxed_str()).collect());
121
122static LANG_LIST: LazyLock<Box<str>> = LazyLock::new(|| {
123    Language::iter().map(|l| l.to_string()).collect::<Vec<String>>().join(",").into_boxed_str()
124});
125
126impl Language {
127    fn from_idx(idx: usize) -> Self {
128        // SAFETY: This is safe to call assuming all indices were obtained from `to_idx`.
129        #[allow(clippy::cast_possible_truncation)]
130        Self::from_repr(idx as u8).unwrap()
131    }
132    fn to_idx(self) -> usize {
133        self as usize
134    }
135}
136
137/// List of known built-in keys used between `$...$` in any localization.
138/// This list is used to avoid reporting false positives.
139// TODO: maybe make the list more specific about which keys can contain which builtins
140fn is_builtin_macro<S: Borrow<str>>(s: S) -> bool {
141    let s = s.borrow();
142    match Game::game() {
143        #[cfg(feature = "ck3")]
144        Game::Ck3 => BUILTIN_MACROS_CK3.contains(&s),
145        #[cfg(feature = "vic3")]
146        Game::Vic3 => BUILTIN_MACROS_VIC3.contains(&s),
147        #[cfg(feature = "imperator")]
148        Game::Imperator => BUILTIN_MACROS_IMPERATOR.contains(&s),
149        #[cfg(feature = "eu5")]
150        Game::Eu5 => BUILTIN_MACROS_EU5.contains(&s),
151        #[cfg(feature = "hoi4")]
152        Game::Hoi4 => BUILTIN_MACROS_HOI4.contains(&s),
153    }
154}
155
156/// One parsed key: value line from the localization values.
157#[derive(Debug)]
158pub struct LocaEntry {
159    key: Token,
160    value: LocaValue,
161    /// The original unparsed value, with enclosing `"` stripped.
162    /// This is used for macro replacement.
163    orig: Option<Token>,
164    /// Whether this entry has been "used" (looked up) by anything in the mod
165    used: AtomicBool,
166    /// Whether this entry has been validated with a `ScopeContext`
167    validated: AtomicBool,
168}
169
170impl PartialEq for LocaEntry {
171    fn eq(&self, other: &LocaEntry) -> bool {
172        self.key.loc == other.key.loc
173    }
174}
175
176impl Eq for LocaEntry {}
177
178impl PartialOrd for LocaEntry {
179    fn partial_cmp(&self, other: &LocaEntry) -> Option<Ordering> {
180        Some(self.cmp(other))
181    }
182}
183
184impl Ord for LocaEntry {
185    fn cmp(&self, other: &LocaEntry) -> Ordering {
186        self.key.loc.cmp(&other.key.loc)
187    }
188}
189
190impl LocaEntry {
191    pub fn new(key: Token, value: LocaValue, orig: Option<Token>) -> Self {
192        Self { key, value, orig, used: AtomicBool::new(false), validated: AtomicBool::new(false) }
193    }
194
195    // returns false to abort expansion in case of an error
196    fn expand_macros<'a>(
197        &'a self,
198        vec: &mut Vec<Token>,
199        from: &'a TigerHashMap<&'a str, LocaEntry>,
200        count: &mut usize,
201        sc: &mut ScopeContext,
202        link: Option<MacroMapIndex>,
203        data: &Everything,
204    ) -> bool {
205        // Are we (probably) stuck in a macro loop?
206        if *count > 250 {
207            return false;
208        }
209        *count += 1;
210
211        if let LocaValue::Macro(v) = &self.value {
212            for macrovalue in v {
213                match macrovalue {
214                    MacroValue::Text(token) => vec.push(token.clone().linked(link)),
215                    MacroValue::Keyword(keyword) => {
216                        if let Some(entry) = from.get(keyword.as_str()) {
217                            entry.used.store(true, Relaxed);
218                            entry.validated.store(true, Relaxed);
219                            if !entry.expand_macros(
220                                vec,
221                                from,
222                                count,
223                                sc,
224                                Some(MACRO_MAP.get_or_insert_loc(keyword.loc)),
225                                data,
226                            ) {
227                                return false;
228                            }
229                        } else if is_builtin_macro(keyword) {
230                            // we can't know what value it really has, so replace it with itself to
231                            // at least get comprehensible error messages
232                            vec.push(keyword.clone().linked(link));
233                        } else if let Some(scopes) = sc.is_name_defined(keyword.as_str(), data) {
234                            if scopes.contains(Scopes::Value) {
235                                // same as above... we can't know what value it really has
236                                vec.push(keyword.clone().linked(link));
237                            } else {
238                                let msg = &format!(
239                                    "The substitution parameter ${keyword}$ is not defined anywhere as a key."
240                                );
241                                warn(ErrorKey::Localization).msg(msg).loc(keyword).push();
242                            }
243                        } else {
244                            let msg = &format!(
245                                "The substitution parameter ${keyword}$ is not defined anywhere as a key."
246                            );
247                            warn(ErrorKey::Localization).msg(msg).loc(keyword).push();
248                        }
249                    }
250                }
251            }
252            true
253        } else if let Some(orig) = &self.orig {
254            vec.push(orig.clone().linked(link));
255            true
256        } else {
257            false
258        }
259    }
260}
261
262#[derive(Clone, Debug, Default)]
263pub enum LocaValue {
264    // If the LocaValue is a Macro type, then it should be re-parsed after the macro values
265    // have been filled in. Some macro values are supplied at runtime and we'll have to guess
266    // at those.
267    Macro(Vec<MacroValue>),
268    Concat(Vec<LocaValue>),
269    #[allow(dead_code)] // the Token is only used for ck3
270    Text(Token),
271    Markup,
272    MarkupEnd,
273    Tooltip(Token),
274    // Tag, key, value. Tag can influence how tooltip is looked up. If tag is `GAME_TRAIT`,
275    // tooltip is a trait name and value is a character id. Any of the tokens may be a datatype
276    // expression, which is passed through unparsed here.
277    // The value is not stored in the enum because we don't validate it.
278    // TODO: instead of Token here, maybe need Box<LocaValue> or a Vec<LocaValue>, or maybe a type
279    // that's specifically "Token or CodeChain"
280    ComplexTooltip(Box<Token>, Token),
281    // The optional token is the formatting
282    Code(CodeChain, Option<Token>),
283    Icon(Token),
284    // An Icon with an [ ] expression inside it
285    CalculatedIcon(Vec<LocaValue>),
286    Flag(Token),
287    #[default]
288    Error,
289}
290
291#[derive(Clone, Debug)]
292pub enum MacroValue {
293    Text(Token),
294    // The formatting is not stored in the enum because it's not validated.
295    Keyword(Token),
296}
297
298fn get_file_lang(filename: &OsStr) -> Option<Language> {
299    // Deliberate discrepancy here between the check and the error msg below.
300    // `l_{}` anywhere in the filename works, but `_l_{}.yml` is still recommended.
301    //
302    // Using to_string_lossy is ok here because non-unicode sequences will
303    // never match the suffix anyway.
304    let filename = filename.to_string_lossy();
305    L_LANGS.iter().position(|l| filename.contains(l.as_ref())).map(Language::from_idx)
306}
307
308impl Localization {
309    fn iter_lang(&self) -> impl Iterator<Item = Language> {
310        Language::iter().filter(|i| self.mod_langs[i.to_idx()])
311    }
312
313    pub fn exists(&self, key: &str) -> bool {
314        for lang in self.iter_lang() {
315            if !self.locas[lang].contains_key(key) {
316                return false;
317            }
318        }
319        true
320    }
321
322    // Undocumented; the hash algorithm was revealed by inspecting error.log and reverse
323    // engineering of CK3 binary through magic numbers. CK3 and VIC3 are supported.
324    #[cfg(any(feature = "ck3", feature = "vic3", feature = "imperator"))]
325    fn all_collision_keys(&self, lang: Language) -> TigerHashMap<u32, Vec<&LocaEntry>> {
326        let loca_hashes: Vec<_> = self.locas[lang]
327            .par_iter()
328            .map(|(_, loca)| (loca, murmur3_32(&mut Cursor::new(loca.key.as_str()), 0).unwrap()))
329            .collect();
330        let mut result: TigerHashMap<u32, Vec<&LocaEntry>> =
331            TigerHashMap::with_capacity(loca_hashes.len());
332
333        for (l, h) in loca_hashes {
334            result.entry(h).or_default().push(l);
335        }
336        result.retain(|_, locas| locas.len() > 1);
337        result
338    }
339
340    pub fn iter_keys(&self) -> impl Iterator<Item = &Token> {
341        self.iter_lang()
342            .map(|i| &self.locas[i])
343            .flat_map(|hash| hash.values().map(|item| &item.key))
344    }
345
346    pub fn verify_exists_implied(&self, key: &str, token: &Token, max_sev: Severity) {
347        if key.is_empty() {
348            return;
349        }
350        let langs_missing = self.mark_used_return_missing(key);
351        if !langs_missing.is_empty() {
352            let msg = format!("missing {} localization key {key}", stringify_list(&langs_missing));
353            // TODO: get confidence level from caller
354            report(ErrorKey::MissingLocalization, Item::Localization.severity().at_most(max_sev))
355                .msg(msg)
356                .loc(token)
357                .push();
358        }
359    }
360
361    #[cfg(feature = "ck3")]
362    pub fn verify_name_exists(&self, name: &Token, max_sev: Severity) {
363        if name.as_str().is_empty() {
364            report(ErrorKey::MissingLocalization, Severity::Warning.at_most(max_sev))
365                .msg("empty name")
366                .loc(name)
367                .push();
368            return;
369        }
370
371        let langs_missing = self.mark_used_return_missing(name.as_str());
372        if !langs_missing.is_empty() {
373            // It's merely untidy if the name is only missing in latin-script languages and the
374            // name doesn't have indicators that it really needs to be localized (such as underscores
375            // or extra uppercase letters). In all other cases it's a warning.
376            //
377            // TODO: this logic assumes the input name is in English and it doesn't consider for example
378            // a Russian mod that only supports Russian localization and has names in Cyrillic.
379            let sev = if only_latin_script(&langs_missing)
380                && !name.as_str().contains('_')
381                && normal_capitalization_for_name(name.as_str())
382            {
383                Severity::Untidy
384            } else {
385                Severity::Warning
386            };
387
388            let msg =
389                format!("missing {} localization for name {name}", stringify_list(&langs_missing));
390            report(ErrorKey::MissingLocalization, sev.at_most(max_sev))
391                .strong()
392                .msg(msg)
393                .loc(name)
394                .push();
395        }
396    }
397
398    #[allow(dead_code)]
399    pub fn exists_lang(&self, key: &str, lang: Language) -> bool {
400        if !self.locas[lang].contains_key(key) {
401            return false;
402        }
403        true
404    }
405
406    pub fn verify_exists_lang(&self, token: &Token, lang: Option<Language>) {
407        self.verify_exists_implied_lang(token.as_str(), token, lang);
408    }
409
410    pub fn verify_exists_implied_lang(&self, key: &str, token: &Token, lang: Option<Language>) {
411        if key.is_empty() {
412            return;
413        }
414        if let Some(lang) = lang {
415            if !self.mark_used_lang_return_exists(key, lang) {
416                let msg = format!("missing {lang} localization key {key}");
417                // TODO: get confidence level from caller
418                warn(ErrorKey::MissingLocalization).msg(msg).loc(token).push();
419            }
420        } else {
421            self.verify_exists_implied(key, token, Severity::Warning);
422        }
423    }
424
425    /// Marks a localization key as used for all languages.
426    /// Returns whether the key exists for any language (same as [`Localization::exists`]).
427    pub fn mark_used_return_exists(&self, key: &str) -> bool {
428        let mut exists = false;
429        for lang in self.iter_lang() {
430            exists |= self.mark_used_lang_return_exists(key, lang);
431        }
432        exists
433    }
434
435    /// Marks a localization key as used for all languages.
436    /// Returns a [`Vec<&str>`] containing the languages for which the key does not exist.
437    fn mark_used_return_missing(&self, key: &str) -> Vec<&'static str> {
438        let mut langs_missing = Vec::new();
439        for lang in self.iter_lang() {
440            if !self.mark_used_lang_return_exists(key, lang) {
441                langs_missing.push(lang.into());
442            }
443        }
444        langs_missing
445    }
446
447    /// Marks a localization key as used for one language.
448    /// Returns whether the key exists for this language (same as [`Localization::exists_lang`]).
449    fn mark_used_lang_return_exists(&self, key: &str, lang: Language) -> bool {
450        if let Some(entry) = self.locas[lang].get(key) {
451            entry.used.store(true, Relaxed);
452            return true;
453        }
454        false
455    }
456
457    #[allow(dead_code)]
458    pub fn suggest(&self, key: &str, token: &Token) {
459        if key.is_empty() {
460            return;
461        }
462        let langs_missing = self.mark_used_return_missing(key);
463        // They're all missing
464        if langs_missing.len() == self.iter_lang().count() {
465            let msg = format!("you can define localization `{key}`");
466            tips(ErrorKey::SuggestLocalization).msg(msg).loc(token).push();
467        }
468        // The loca is defined for some languages but not others.
469        // This inconsistency is worth warning about.
470        else if !langs_missing.is_empty() {
471            let msg = format!("missing {} localization key {key}", stringify_list(&langs_missing));
472            report(ErrorKey::MissingLocalization, Item::Localization.severity())
473                .msg(msg)
474                .loc(token)
475                .push();
476        }
477    }
478
479    /// Return whether any language uses the given macro in its loca entry for this key.
480    /// Only a macro at the top level of this entry counts; ones hidden recursively in
481    /// other macros do not.
482    #[allow(dead_code)]
483    pub fn uses_macro(&self, key: &str, look_for: &str) -> bool {
484        let look_for = format!("${look_for}$");
485        for lang in self.iter_lang() {
486            if let Some(entry) = self.locas[lang].get(key)
487                && let Some(orig) = &entry.orig
488                && orig.as_str().contains(&look_for)
489            {
490                return true;
491            }
492        }
493        false
494    }
495
496    // Does every `[concept|E]` reference have a defined game concept?
497    // Does every other `[code]` block have valid promotes and functions?
498    // Does every $key$ in a macro have a corresponding loca key or named scope?
499    fn check_loca_code(
500        value: &LocaValue,
501        data: &Everything,
502        sc: &mut ScopeContext,
503        lang: Language,
504    ) {
505        #[allow(clippy::collapsible_match)]
506        match value {
507            LocaValue::Concat(v) | LocaValue::CalculatedIcon(v) => {
508                for value in v {
509                    Self::check_loca_code(value, data, sc, lang);
510                }
511            }
512            // TODO: validate the formatting codes
513            LocaValue::Code(chain, format) => {
514                // |E is the formatting used for game concepts in ck3
515                #[cfg(feature = "ck3")]
516                if Game::is_ck3()
517                    && let Some(format) = format
518                    && (format.as_str().contains('E') || format.as_str().contains('e'))
519                    && let Some(name) = chain.as_gameconcept()
520                {
521                    if !is_builtin_macro(name) {
522                        data.verify_exists(Item::GameConcept, name);
523                    }
524                    return;
525                }
526
527                // TODO: datatype is not really Unknown here, it should be a CString or CFixedPoint or some kind of number.
528                // But we can't express that yet.
529                validate_datatypes(
530                    chain,
531                    data,
532                    sc,
533                    &DataContext::new(),
534                    Datatype::Unknown,
535                    Some(lang),
536                    format.as_ref(),
537                    false,
538                );
539            }
540            LocaValue::Tooltip(token) => {
541                // TODO: should this be validated with validate_localization_sc ? (remember to avoid infinite loops)
542                if !(Game::is_vic3() && token.is("BREAKDOWN_TAG")) {
543                    data.localization.verify_exists_lang(token, Some(lang));
544                }
545            }
546            #[allow(unused_variables)] // tag only used by ck3
547            LocaValue::ComplexTooltip(tag, token) => {
548                // TODO: if any of the three are datatype expressions, validate them.
549                #[cfg(feature = "ck3")]
550                if Game::is_ck3() && !token.starts_with("[") && !is_builtin_macro(token) {
551                    match COMPLEX_TOOLTIPS_CK3.get(&*tag.as_str().to_lowercase()).copied() {
552                        None => {
553                            // TODO: should this be validated with validate_localization_sc ? (remember to avoid infinite loops)
554                            data.localization.verify_exists_lang(token, Some(lang));
555                        }
556                        Some(None) => (), // token is a runtime id
557                        Some(Some(itype)) => data.verify_exists(itype, token),
558                    }
559                }
560                #[cfg(feature = "vic3")]
561                if Game::is_vic3() && !token.starts_with("[") && !is_builtin_macro(token) {
562                    data.localization.verify_exists_lang(token, Some(lang));
563                }
564                // TODO: - imperator -
565            }
566            LocaValue::Icon(token) => {
567                if !is_builtin_macro(token) && !token.is("ICONKEY_icon") && !token.is("KEY_icon") {
568                    data.verify_exists(Item::TextIcon, token);
569                }
570            }
571            #[allow(unused_variables)]
572            LocaValue::Flag(token) => {
573                // TODO: Instead of this awkward 'contains TAG' heuristic, mark macros in the text
574                // somehow.
575                #[cfg(feature = "hoi4")]
576                if !is_builtin_macro(token) && !token.as_str().contains("TAG") {
577                    data.verify_exists(Item::CountryTag, token);
578                    let pathname = format!("gfx/flags/{token}.tga");
579                    data.verify_exists_implied(Item::File, &pathname, token);
580                }
581            }
582            _ => (),
583        }
584    }
585
586    #[cfg(feature = "ck3")]
587    pub fn verify_key_has_options(&self, loca: &str, key: &Token, n: i64, prefix: &str) {
588        for lang in self.iter_lang() {
589            if let Some(entry) = self.locas[lang].get(loca) {
590                if let Some(ref orig) = entry.orig {
591                    for i in 1..=n {
592                        let find = format!("${prefix}{i}$");
593                        let find2 = format!("${prefix}{i}|");
594                        if !orig.as_str().contains(&find) && !orig.as_str().contains(&find2) {
595                            warn(ErrorKey::Validation)
596                                .msg(format!("localization is missing {find}"))
597                                .loc(key)
598                                .loc_msg(&entry.key, "here")
599                                .push();
600                        }
601                    }
602                    let find = format!("${prefix}{}$", n + 1);
603                    let find2 = format!("${prefix}{}|", n + 1);
604                    if orig.as_str().contains(&find) && !orig.as_str().contains(&find2) {
605                        warn(ErrorKey::Validation)
606                            .msg("localization has too many options")
607                            .loc(key)
608                            .loc_msg(&entry.key, "here")
609                            .push();
610                    }
611                } else if n > 0 {
612                    let msg = format!("localization is missing ${prefix}1$");
613                    warn(ErrorKey::Validation).msg(msg).loc(key).loc_msg(&entry.key, "here").push();
614                }
615            }
616        }
617    }
618
619    fn validate_loca<'b>(
620        entry: &LocaEntry,
621        from: &'b TigerHashMap<&'b str, LocaEntry>,
622        data: &Everything,
623        sc: &mut ScopeContext,
624        lang: Language,
625    ) {
626        if matches!(entry.value, LocaValue::Macro(_)) {
627            let mut new_line = Vec::new();
628            let mut count = 0;
629            if entry.expand_macros(&mut new_line, from, &mut count, sc, None, data) {
630                // re-parse after macro expansion
631                let new_line_as_ref = new_line.iter().collect();
632                let value = ValueParser::new(new_line_as_ref).parse();
633                Self::check_loca_code(&value, data, sc, lang);
634            }
635        } else {
636            Self::check_loca_code(&entry.value, data, sc, lang);
637        }
638    }
639
640    pub fn validate_use(&self, key: &str, data: &Everything, sc: &mut ScopeContext) {
641        for lang in self.iter_lang() {
642            let loca = &self.locas[lang];
643            if let Some(entry) = loca.get(key) {
644                entry.used.store(true, Relaxed);
645                entry.validated.store(true, Relaxed);
646                Self::validate_loca(entry, loca, data, sc, lang);
647            }
648        }
649    }
650
651    #[cfg(any(feature = "ck3", feature = "vic3", feature = "imperator"))]
652    fn check_collisions(&self, lang: Language) {
653        for (k, v) in self.all_collision_keys(lang) {
654            let mut rep = report(ErrorKey::LocalizationKeyCollision, Severity::Error)
655                .strong()
656                .msg(format!(
657                    "localization keys '{}' have same MURMUR3A hash '0x{k:08X}'",
658                    stringify_list(&v.iter().map(|loca| loca.key.as_str()).collect::<Vec<&str>>())
659                ))
660                .info("localization keys hash collision will cause some of them fail to load")
661                .loc(&v[0].key);
662            for loc in v.iter().skip(1) {
663                rep = rep.loc_msg(&loc.key, "here");
664            }
665            rep.push();
666        }
667    }
668
669    // This is in pass2 to make sure all `validated` entries have been marked.
670    pub fn validate_pass2(&self, data: &Everything) {
671        #[allow(unused_variables)]
672        scope(|s| {
673            for lang in self.iter_lang() {
674                let loca = &self.locas[lang];
675                // Check localization key collisions
676                #[cfg(any(feature = "ck3", feature = "vic3", feature = "imperator"))]
677                s.spawn(move |_| self.check_collisions(lang));
678
679                // Collect and sort the entries before looping, to create more stable output
680                let mut unvalidated_entries: Vec<&LocaEntry> =
681                    loca.values().filter(|e| !e.validated.load(Relaxed)).collect();
682                unvalidated_entries.sort_unstable();
683                unvalidated_entries.par_iter().for_each(|entry| {
684                    // Technically we can now store true in entry.validated,
685                    // but the value is not needed anymore after this.
686                    let mut sc = ScopeContext::new_unrooted(Scopes::all(), &entry.key);
687                    sc.set_strict_scopes(false);
688                    Self::validate_loca(entry, loca, data, &mut sc, lang);
689                });
690            }
691        });
692    }
693
694    pub fn mark_category_used(&self, prefix: &str) {
695        let mut i = 0;
696        loop {
697            let loca = format!("{prefix}{i}");
698            if !self.mark_used_return_exists(&loca) {
699                break;
700            }
701            i += 1;
702        }
703    }
704
705    pub fn check_unused(&self, _data: &Everything) {
706        self.mark_category_used("LOADING_TIP_");
707        self.mark_category_used("HYBRID_NAME_FORMAT_");
708        self.mark_category_used("DIVERGE_NAME_FORMAT_");
709
710        for lang in self.iter_lang() {
711            let mut vec = Vec::new();
712            for entry in self.locas[lang].values() {
713                if !entry.used.load(Relaxed) {
714                    vec.push(entry);
715                }
716            }
717            vec.sort_unstable_by_key(|entry| &entry.key.loc);
718            for entry in vec {
719                report(ErrorKey::UnusedLocalization, Severity::Untidy)
720                    .msg("Unused localization")
721                    .abbreviated(&entry.key)
722                    .push();
723            }
724        }
725    }
726
727    #[cfg(feature = "ck3")]
728    pub fn check_pod_loca(&self, data: &Everything) {
729        for lang in self.iter_lang() {
730            for key in data.database.iter_keys(Item::PerkTree) {
731                let loca = format!("{key}_name");
732                if let Some(entry) = self.locas[lang].get(loca.as_str())
733                    && let LocaValue::Text(token) = &entry.value
734                {
735                    if token.as_str().ends_with("_visible") {
736                        data.verify_exists(Item::ScriptedGui, token);
737                        data.verify_exists(Item::Localization, token);
738                    }
739                    continue;
740                }
741                let msg = format!("missing loca `{key}_name: \"{key}_visible\"`");
742                let info = "this is needed for the `window_character_lifestyle.gui` code";
743                err(ErrorKey::PrincesOfDarkness).msg(msg).info(info).loc(key).push();
744            }
745        }
746    }
747}
748
749impl FileHandler<(Language, Vec<LocaEntry>)> for Localization {
750    fn config(&mut self, config: &Block) {
751        if let Some(block) = config.get_field_block("languages") {
752            // By default, self.check_langs is all true.
753            // If a languages block exists in the config, then check_langs
754            // should contain only the configured languages, so langs is
755            // initialized to all false here.
756            let mut langs = bitarr![u16, Lsb0; 0; Language::COUNT];
757
758            // TODO: warn if there are unknown languages in check or skip?
759            let check = block.get_field_values("check");
760            let skip = block.get_field_values("skip");
761
762            // If check is used, then check only those languages.
763            // If instead skip is used, then check all languages except the skipped ones.
764            for lang in Language::iter() {
765                let lang_str = lang.into();
766                if check.iter().any(|t| t.is(lang_str))
767                    || (check.is_empty() && skip.iter().all(|t| !t.is(lang_str)))
768                {
769                    langs.set(lang.to_idx(), true);
770                }
771            }
772            self.check_langs = langs;
773        }
774    }
775
776    fn subpath(&self) -> PathBuf {
777        if Game::is_hoi4() { PathBuf::from("localisation") } else { PathBuf::from("localization") }
778    }
779
780    fn load_file(
781        &self,
782        entry: &FileEntry,
783        _parser: &ParserMemory,
784    ) -> Option<(Language, Vec<LocaEntry>)> {
785        if !entry.filename().to_string_lossy().ends_with(".yml") {
786            return None;
787        }
788
789        // unwrap is safe here because we're only handed files under localization/
790        // to_string_lossy is ok because we compare lang against a set of known strings.
791        let lang_str = entry.path().components().nth(1).unwrap().as_os_str().to_string_lossy();
792
793        // special case for this file
794        if lang_str == "languages.yml" {
795            return None;
796        }
797
798        if let Some(filelang) = get_file_lang(entry.filename()) {
799            if !self.check_langs[filelang.to_idx()] {
800                return None;
801            }
802            // Localization files don't have to be in a subdirectory corresponding to their language.
803            // However, if there's one in a subdirectory for a *different* language than the one in its name,
804            // then something is probably wrong.
805            if let Ok(lang) = Language::try_from(lang_str.as_ref())
806                && filelang != lang
807            {
808                let msg = "localization file with wrong name or in wrong directory";
809                let info = "A localization file should be in a subdirectory corresponding to its language.";
810                warn(ErrorKey::Filename).msg(msg).info(info).loc(entry).push();
811            }
812            match read_to_string(entry.fullpath()) {
813                Ok(content) => {
814                    return Some((filelang, parse_loca(entry, content, filelang).collect()));
815                }
816                Err(e) => {
817                    let msg = "could not read file";
818                    let info = &format!("{e:#}");
819                    err(ErrorKey::ReadError).msg(msg).info(info).loc(entry).push();
820                }
821            }
822        } else if entry.kind() >= FileKind::Vanilla {
823            // Check for `FileKind::Vanilla` because Jomini and Clausewitz support more languages
824            let msg = "could not determine language from filename";
825            let info = format!(
826                "Localization filenames should end in _l_language.yml, where language is one of {}",
827                *LANG_LIST
828            );
829            err(ErrorKey::Filename).msg(msg).info(info).loc(entry).push();
830        }
831        None
832    }
833
834    fn handle_file(&mut self, entry: &FileEntry, loaded: (Language, Vec<LocaEntry>)) {
835        let (filelang, vec) = loaded;
836        let hash = &mut self.locas[filelang];
837        if hash.is_empty() {
838            // empirically ~290k for each lang of ck3
839            hash.reserve(300_000);
840        }
841
842        if entry.kind() == FileKind::Mod {
843            self.mod_langs.set(filelang.to_idx(), true);
844        }
845
846        for loca in vec {
847            match hash.entry(loca.key.as_str()) {
848                Entry::Occupied(mut occupied_entry) => {
849                    let other = occupied_entry.get();
850                    // other.key and loca.key are in the other order than usual here,
851                    // because in loca the older definition overrides the later one.
852                    if is_replace_path(entry.path()) {
853                        occupied_entry.insert(loca);
854                    } else if other.key.loc.kind == entry.kind() && other.orig != loca.orig {
855                        dup_error(&other.key, &loca.key, "localization");
856                    }
857                }
858                Entry::Vacant(vacant_entry) => {
859                    vacant_entry.insert(loca);
860                }
861            }
862        }
863    }
864}
865
866impl Default for Localization {
867    fn default() -> Self {
868        Localization {
869            check_langs: bitarr![u16, Lsb0; 1; Language::COUNT],
870            mod_langs: bitarr![u16, Lsb0; 0; Language::COUNT],
871            locas: Languages(std::array::from_fn(|_| TigerHashMap::default())),
872        }
873    }
874}
875
876/// It's been tested that localization/replace/english and localization/english/replace both work
877fn is_replace_path(path: &Path) -> bool {
878    for element in path {
879        if element.to_string_lossy() == "replace" {
880            return true;
881        }
882    }
883    false
884}
885
886/// These are the languages in which it's reasonable to present an ascii name unchanged.
887#[cfg(feature = "ck3")]
888const LATIN_SCRIPT_LANGS: &[&str] =
889    &["english", "french", "german", "spanish", "braz_por", "polish", "turkish"];
890
891/// Return true iff `langs` only contains languages in which it's reasonable to present an ascii
892/// name unchanged.
893#[cfg(feature = "ck3")]
894fn only_latin_script(langs: &[&str]) -> bool {
895    langs.iter().all(|lang| LATIN_SCRIPT_LANGS.contains(lang))
896}
897
898/// Check that the string only has capital letters at the start or after a space or hyphen
899#[cfg(feature = "ck3")]
900fn normal_capitalization_for_name(name: &str) -> bool {
901    let mut expect_cap = true;
902    for ch in name.chars() {
903        if ch.is_uppercase() && !expect_cap {
904            return false;
905        }
906        expect_cap = ch == ' ' || ch == '-';
907    }
908    true
909}
910
911#[cfg(all(test, feature = "ck3"))]
912mod tests {
913    use super::*;
914    use crate::fileset::{FileKind, FileStage};
915    use crate::token::{Loc, Token};
916    use std::path::PathBuf;
917
918    #[test]
919    fn test_only_latin_script() {
920        let mut langs = vec!["english", "french", "german"];
921        assert!(only_latin_script(&langs));
922        langs.push("korean");
923        assert!(!only_latin_script(&langs));
924        langs.clear();
925        assert!(only_latin_script(&langs));
926    }
927
928    #[test]
929    fn test_normal_capitalization_for_name() {
930        assert!(normal_capitalization_for_name("George"));
931        assert!(normal_capitalization_for_name("george"));
932        assert!(!normal_capitalization_for_name("BjOrn"));
933        assert!(normal_capitalization_for_name("Jean-Claude"));
934        assert!(normal_capitalization_for_name("Abu-l-Fadl al-Malik"));
935        assert!(normal_capitalization_for_name("Abu Abdallah Muhammad"));
936        assert!(!normal_capitalization_for_name("AbuAbdallahMuhammad"));
937    }
938
939    #[test]
940    fn test_collision_detection() {
941        // build a localization database containing known colliding keys
942        let mut loc = Localization::default();
943        let lang = Language::English;
944        // dummy location for tokens
945        let dummy_loc =
946            Loc::for_file(PathBuf::new(), FileStage::NoStage, FileKind::Mod, PathBuf::new());
947
948        let pairs = [
949            // CK3 examples
950            ("Mallobald", "laamp_base_contract_schemes.2541.e.tt.employer_has_trait.paranoid"),
951            ("dynn_Hkeng", "debug_min_popular_opinion_modifier"),
952            ("b_hinggan_adj", "grand_wedding_completed_guest"),
953            // Imperator examples
954            ("carthage_mission_trade_metropolis_west", "me_diadochi_empire_events.316.at"),
955            ("Azdumani", "me_patauion_02.43.b_tt"),
956            ("PROV7234_hellenic", "me_kush_15_desc"),
957        ];
958
959        for &(k1, k2) in &pairs {
960            let t1 = Token::from_static_str(k1, dummy_loc);
961            let t2 = Token::from_static_str(k2, dummy_loc);
962            let e1 = LocaEntry::new(t1.clone(), LocaValue::Text(t1.clone()), None);
963            let e2 = LocaEntry::new(t2.clone(), LocaValue::Text(t2.clone()), None);
964            loc.locas[lang].insert(k1, e1);
965            loc.locas[lang].insert(k2, e2);
966        }
967
968        let collisions = loc.all_collision_keys(lang);
969        for &(k1, k2) in &pairs {
970            assert!(
971                collisions.values().any(|vec| {
972                    vec.iter().any(|e| e.key.as_str() == k1)
973                        && vec.iter().any(|e| e.key.as_str() == k2)
974                }),
975                "expected collision between {k1} and {k2}"
976            );
977        }
978    }
979}