From e5b23bfd165461be012c8a64f37bcbb3205a9447 Mon Sep 17 00:00:00 2001 From: JOLIMAITRE Matthieu Date: Tue, 30 Apr 2024 16:38:45 +0200 Subject: [PATCH] fix incorrect prunning of composite words --- src/lib/dict.ts | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/lib/dict.ts b/src/lib/dict.ts index 8e08a95..8bc4c5d 100644 --- a/src/lib/dict.ts +++ b/src/lib/dict.ts @@ -17,8 +17,8 @@ export class Dict { for (const word of content.split("\n")) { const word_ = word.trim().toLowerCase(); if (word_.length !== length) continue; - for (const forbidden of [" ", "-", "."]) if (word_.includes(forbidden)) continue; - words.add(remove_accent(word_)); + if (contains_any(word_, [" ", "-", "."])) continue; + words.add(remove_accents(word_)); } return new Dict(words, length); } @@ -27,3 +27,22 @@ export class Dict { return `Dict { ${this.words.size} words }`; } } + +export function contains_any(text: string, words: string[]) { + for (const word of words) if (text.includes(word)) return true; + return false; +} + +export function remove_accents(text: string) { + const accents = [ + ...[["à", "a"], ["â", "a"], ["ä", "a"]], + ...[["ç", "c"]], + ...[["é", "e"], ["è", "e"], ["ê", "e"], ["ë", "e"]], + ...[["î", "i"], ["ï", "i"]], + ...[["ô", "o"], ["ö", "o"]], + ...[["û", "u"]], + ]; + let result = text; + for (const [accent, alternative] of accents) result = result.replaceAll(accent, alternative); + return result; +}