From 7f28dc887fcafddeea48fb351328df9c7885266a Mon Sep 17 00:00:00 2001 From: StefanVukovic99 Date: Sun, 3 Mar 2024 05:49:41 +0100 Subject: add diacritics (#756) --- ext/js/language/ar/arabic-text-preprocessors.js | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'ext/js/language/ar') diff --git a/ext/js/language/ar/arabic-text-preprocessors.js b/ext/js/language/ar/arabic-text-preprocessors.js index f0118564..7adb39e9 100644 --- a/ext/js/language/ar/arabic-text-preprocessors.js +++ b/ext/js/language/ar/arabic-text-preprocessors.js @@ -17,12 +17,28 @@ import {basicTextPreprocessorOptions} from '../text-preprocessors.js'; +const optionalDiacritics = [ + '\u0618', // Small Fatha + '\u0619', // Small Damma + '\u061A', // Small Kasra + '\u064B', // Fathatan + '\u064C', // Dammatan + '\u064D', // Kasratan + '\u064E', // Fatha + '\u064F', // Damma + '\u0650', // Kasra + '\u0651', // Shadda + '\u0652' // Sukun +]; + +const diacriticsRegex = new RegExp(`[${optionalDiacritics.join('')}]`, 'g'); + /** @type {import('language').TextPreprocessor} */ export const removeArabicScriptDiacritics = { name: 'Remove diacritics', description: 'وَلَدَ ⬅️ ولد', options: basicTextPreprocessorOptions, process: (text, setting) => { - return setting ? text.replace(/[\u064E-\u0650]/g, '') : text; + return setting ? text.replace(diacriticsRegex, '') : text; } }; -- cgit v1.2.3