diff options
author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-03-03 05:49:41 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-03 04:49:41 +0000 |
commit | 7f28dc887fcafddeea48fb351328df9c7885266a (patch) | |
tree | 023224e54209da87d62cb39c402c1f530894aedc /ext/js | |
parent | 5ec2344df95da39cda4ad07cfe5181b05fa24830 (diff) |
add diacritics (#756)
Diffstat (limited to 'ext/js')
-rw-r--r-- | ext/js/language/ar/arabic-text-preprocessors.js | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/ext/js/language/ar/arabic-text-preprocessors.js b/ext/js/language/ar/arabic-text-preprocessors.js index f0118564..7adb39e9 100644 --- a/ext/js/language/ar/arabic-text-preprocessors.js +++ b/ext/js/language/ar/arabic-text-preprocessors.js @@ -17,12 +17,28 @@ import {basicTextPreprocessorOptions} from '../text-preprocessors.js'; +const optionalDiacritics = [ + '\u0618', // Small Fatha + '\u0619', // Small Damma + '\u061A', // Small Kasra + '\u064B', // Fathatan + '\u064C', // Dammatan + '\u064D', // Kasratan + '\u064E', // Fatha + '\u064F', // Damma + '\u0650', // Kasra + '\u0651', // Shadda + '\u0652' // Sukun +]; + +const diacriticsRegex = new RegExp(`[${optionalDiacritics.join('')}]`, 'g'); + /** @type {import('language').TextPreprocessor<boolean>} */ export const removeArabicScriptDiacritics = { name: 'Remove diacritics', description: 'وَلَدَ ⬅️ ولد', options: basicTextPreprocessorOptions, process: (text, setting) => { - return setting ? text.replace(/[\u064E-\u0650]/g, '') : text; + return setting ? text.replace(diacriticsRegex, '') : text; } }; |