fix incorrect prunning of composite words

This commit is contained in:
JOLIMAITRE Matthieu 2024-04-30 16:38:45 +02:00
parent f5e8aa12d9
commit e5b23bfd16

View file

@ -17,8 +17,8 @@ export class Dict {
for (const word of content.split("\n")) {
const word_ = word.trim().toLowerCase();
if (word_.length !== length) continue;
for (const forbidden of [" ", "-", "."]) if (word_.includes(forbidden)) continue;
words.add(remove_accent(word_));
if (contains_any(word_, [" ", "-", "."])) continue;
words.add(remove_accents(word_));
}
return new Dict(words, length);
}
@ -27,3 +27,22 @@ export class Dict {
return `Dict { ${this.words.size} words }`;
}
}
export function contains_any(text: string, words: string[]) {
for (const word of words) if (text.includes(word)) return true;
return false;
}
export function remove_accents(text: string) {
const accents = [
...[["à", "a"], ["â", "a"], ["ä", "a"]],
...[["ç", "c"]],
...[["é", "e"], ["è", "e"], ["ê", "e"], ["ë", "e"]],
...[["î", "i"], ["ï", "i"]],
...[["ô", "o"], ["ö", "o"]],
...[["û", "u"]],
];
let result = text;
for (const [accent, alternative] of accents) result = result.replaceAll(accent, alternative);
return result;
}