add language select, abstract text transformations (#584)

* Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions * part1 * frotend done? * fix tests * offscreen and type complications * add tests * start fixing tests * keep fixing tests * fix tests * Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions * delete pt * renames * add tests * kebab-case filenames * lint * minor fixes * merge * fixes * fix part of comments * fix more comments * delete unused types * comment * comment * do backend * other files * move fetch utils to own file * remove extra line * add extra line * remove unnecessary export * simplify folder structure * remove redundant async * fix param type in api * fix language index * undo changes to cssStyleApplier * undo changes to utilities.js * undo changes to utilities.js * simplify language util * lint * undo phantom changes to anki integration * require textTransformations options * explicit locale in localeCompare * punctuate notes * prefer early exit * rename LanguageOptionsObjectMap * rename to textPreprocessor * tuple with names instead of boolean array * safe data setting * optional chaining * simplify LanguageOptions * encapsulate languages * delete language util * nullable language in text preprocessors controller * rename transform to process * remove settings * make translation advanced again * remove unused getTextTransformations api call * comments * change language types * RIP flags * comments * fix tests * lint * Text preprocessor type changes (#10) * Add types * Update types * Simplify type check * Refactor typing and structuring of language definitions * lint * update translator benchmark * undo markdown changes * undo markdown changes * undo markdown changes * more merge * simplify language controller --------- Co-authored-by: toasted-nutbread <toasted-nutbread@users.noreply.github.com> Co-authored-by: Darius Jahandarie <djahandarie@gmail.com>
author: StefanVukovic99 <stefanvukovic44@gmail.com> 2024-02-17 02:45:24 +0100
committer: GitHub <noreply@github.com> 2024-02-17 01:45:24 +0000
commit: 4aaa9f15d97668203741c1731f15e710ae8b8294 (patch)
tree: d1885f7fbd7d1510a71176597169d6847ae26572
parent: 4e77741d22778bd09b772fc53f1cbd64107e3d24 (diff)
32 files changed, 1648 insertions, 387 deletions
diff --git a/.eslintrc.json b/.eslintrc.json
index 361e5f24..a5f0f85e 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -637,9 +637,13 @@
                 "ext/js/general/object-property-accessor.js",
                 "ext/js/general/regex-util.js",
                 "ext/js/general/text-source-map.js",
+                "ext/js/language/en/language-english.js",
                 "ext/js/language/ja/japanese-wanakana.js",
                 "ext/js/language/ja/japanese.js",
+                "ext/js/language/ja/language-japanese.js",
                 "ext/js/language/language-transformer.js",
+                "ext/js/language/languages.js",
+                "ext/js/language/text-preprocessors.js",
                 "ext/js/language/translator.js",
                 "ext/js/media/audio-downloader.js",
                 "ext/js/media/media-util.js",
diff --git a/benches/translator.bench.js b/benches/translator.bench.js
index 1231c31c..8b9b5118 100644
--- a/benches/translator.bench.js
+++ b/benches/translator.bench.js
@@ -20,8 +20,8 @@ import {fileURLToPath} from 'node:url';
 import path from 'path';
 import {bench, describe} from 'vitest';
 import {parseJson} from '../dev/json.js';
-import {createFindKanjiOptions, createFindTermsOptions} from '../test/utilities/translator.js';
 import {createTranslatorContext} from '../test/fixtures/translator-test.js';
+import {createFindKanjiOptions, createFindTermsOptions} from '../test/utilities/translator.js';
 
 const dirname = path.dirname(fileURLToPath(import.meta.url));
 const dictionaryName = 'Test Dictionary 2';
@@ -33,10 +33,9 @@ describe('Translator', () => {
     const {optionsPresets, tests} = parseJson(readFileSync(testInputsFilePath, {encoding: 'utf8'}));
 
     const findKanjiTests = tests.filter((data) => data.options === 'kanji');
-    const findTermTests = tests.filter((data) => data.options === 'default');
-    const findTermWithTextTransformationsTests = tests.filter((data) => data.options !== 'kanji' && data.options !== 'default');
+    const findTermTests = tests.filter((data) => data.options !== 'kanji');
 
-    bench(`Translator.prototype.findTerms - no text transformations  (n=${findTermTests.length})`, async () => {
+    bench(`Translator.prototype.findTerms - (n=${findTermTests.length})`, async () => {
         for (const data of /** @type {import('test/translator').TestInputFindTerm[]} */ (findTermTests)) {
             const {mode, text} = data;
             const options = createFindTermsOptions(dictionaryName, optionsPresets, data.options);
@@ -44,14 +43,6 @@ describe('Translator', () => {
         }
     });
 
-    bench(`Translator.prototype.findTerms - text transformations  (n=${findTermWithTextTransformationsTests.length})`, async () => {
-        for (const data of /** @type {import('test/translator').TestInputFindTerm[]} */ (findTermWithTextTransformationsTests)) {
-            const {mode, text} = data;
-            const options = createFindTermsOptions(dictionaryName, optionsPresets, data.options);
-            await translator.findTerms(mode, text, options);
-        }
-    });
-
     bench(`Translator.prototype.findKanji - (n=${findKanjiTests.length})`, async () => {
         for (const data of /** @type {import('test/translator').TestInputFindKanji[]} */ (findKanjiTests)) {
             const {text} = data;
diff --git a/dev/jsconfig.json b/dev/jsconfig.json
index d9465108..6a5fb13b 100644
--- a/dev/jsconfig.json
+++ b/dev/jsconfig.json
@@ -28,6 +28,9 @@
             "error": ["../types/ext/error"],
             "event-listener-collection": ["../types/ext/event-listener-collection"],
             "japanese-util": ["../types/ext/japanese-util"],
+            "language": ["../types/ext/language"],
+            "language-english": ["../types/ext/language-english"],
+            "language-japanese": ["../types/ext/language-japanese"],
             "ext/json-schema": ["../types/ext/json-schema"],
             "language-transformer": ["../types/ext/language-transformer"],
             "language-transformer-internal": ["../types/ext/language-transformer-internal"],
diff --git a/docs/anki-integration.md b/docs/anki-integration.md
index 2bd9fad9..9bd5bd94 100644
--- a/docs/anki-integration.md
+++ b/docs/anki-integration.md
@@ -23,71 +23,71 @@ Flashcard fields can be configured with the following steps:
 
     #### Markers for Term Cards
 
-    | Marker                     | Description                                                                                                              |
-    | -------------------------- | ------------------------------------------------------------------------------------------------------------------------ |
-    | `{audio}`                  | Audio sample of a native speaker's pronunciation in MP3 format (if available).                                           |
-    | `{clipboard-image}`        | An image which is stored in the system clipboard, if present.                                                            |
-    | `{clipboard-text}`         | Text which is stored in the system clipboard, if present.                                                                |
-    | `{cloze-body}`             | Raw, inflected term as it appeared before being reduced to dictionary form by Yomitan.                                   |
-    | `{cloze-body-kana}`        | Kana reading for `{cloze-body}`.                                                                                         |
-    | `{cloze-prefix}`           | Fragment of the containing `{sentence}` starting at the beginning of `{sentence}` until the beginning of `{cloze-body}`. |
-    | `{cloze-suffix}`           | Fragment of the containing `{sentence}` starting at the end of `{cloze-body}` until the end of `{sentence}`.             |
-    | `{conjugation}`            | Conjugation path from the raw inflected term to the source term.                                                         |
-    | `{dictionary}`             | Name of the dictionary from which the card is being created (unavailable in _grouped_ mode).                             |
-    | `{document-title}`         | Title of the web page that the term appeared in.                                                                         |
-    | `{expression}`             | Term expressed as kanji (will be displayed in kana if kanji is not available).                                           |
-    | `{frequencies}`            | Frequency information for the term.                                                                                      |
-    | `{frequency-harmonic-rank}` | The harmonic mean of frequency data for the current term. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based term usage. |
+    | Marker                            | Description                                                                                                                                                            |
+    | --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+    | `{audio}`                         | Audio sample of a native speaker's pronunciation in MP3 format (if available).                                                                                         |
+    | `{clipboard-image}`               | An image which is stored in the system clipboard, if present.                                                                                                          |
+    | `{clipboard-text}`                | Text which is stored in the system clipboard, if present.                                                                                                              |
+    | `{cloze-body}`                    | Raw, inflected term as it appeared before being reduced to dictionary form by Yomitan.                                                                                 |
+    | `{cloze-body-kana}`               | Kana reading for `{cloze-body}`.                                                                                                                                       |
+    | `{cloze-prefix}`                  | Fragment of the containing `{sentence}` starting at the beginning of `{sentence}` until the beginning of `{cloze-body}`.                                               |
+    | `{cloze-suffix}`                  | Fragment of the containing `{sentence}` starting at the end of `{cloze-body}` until the end of `{sentence}`.                                                           |
+    | `{conjugation}`                   | Conjugation path from the raw inflected term to the source term.                                                                                                       |
+    | `{dictionary}`                    | Name of the dictionary from which the card is being created (unavailable in _grouped_ mode).                                                                           |
+    | `{document-title}`                | Title of the web page that the term appeared in.                                                                                                                       |
+    | `{expression}`                    | Term expressed as kanji (will be displayed in kana if kanji is not available).                                                                                         |
+    | `{frequencies}`                   | Frequency information for the term.                                                                                                                                    |
+    | `{frequency-harmonic-rank}`       | The harmonic mean of frequency data for the current term. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based term usage.   |
     | `{frequency-harmonic-occurrence}` | The harmonic mean of frequency data for the current term. Defaults to 0 occurrences when frequency data is not found, the lowest possible occurrence-based term usage. |
-    | `{frequency-average-rank}` | The average of frequency data for the current term. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based term usage. |
-    | `{frequency-average-occurrence}` | The average of frequency data for the current term. Defaults to 0 occurrences when frequency data is not found, the lowest possible occurrence-based term usage. |
-    | `{furigana}`               | Term expressed as kanji with furigana displayed above it (e.g. <ruby>日本語<rt>にほんご</rt></ruby>).                    |
-    | `{furigana-plain}`         | Term expressed as kanji with furigana displayed next to it in brackets (e.g. 日本語[にほんご]).                          |
-    | `{glossary}`               | List of definitions for the term (output format depends on whether running in _grouped_ mode).                           |
-    | `{glossary-brief}`         | List of definitions for the term in a more compact format.                                                               |
-    | `{glossary-no-dictionary}` | List of definitions for the term, except the dictionary tag is omitted.                                                  |
-    | `{part-of-speech}`         | Part of speech information for the term.                                                                                 |
-    | `{phonetic-transcriptions}`| List of phonetic transcriptions for the term.                                                                            |
-    | `{pitch-accents}`          | List of pitch accent downstep notations for the term.                                                                    |
-    | `{pitch-accent-graphs}`    | List of pitch accent graphs for the term.                                                                                |
-    | `{pitch-accent-positions}` | List of accent downstep positions for the term as a number.                                                              |
-    | `{pitch-accent-categories}`| List of pitch accent categories for the term (e.g. heiban, kifuku, atamadaka, odaka, nakadaka).                          |
-    | `{reading}`                | Kana reading for the term (empty for terms where the expression is the reading).                                         |
-    | `{screenshot}`             | Screenshot of the web page taken at the time the term was added.                                                         |
-    | `{search-query}`           | The full search query shown on the search page.                                                                          |
-    | `{selection-text}`         | The selected text on the search page or popup.                                                                           |
-    | `{sentence}`               | Sentence, quote, or phrase that the term appears in from the source content.                                             |
-    | `{sentence-furigana}`      | Sentence, quote, or phrase that the term appears in from the source content, with furigana added.                        |
-    | `{tags}`                   | Grammar and usage tags providing information about the term (unavailable in _grouped_ mode).                             |
-    | `{url}`                    | Address of the web page in which the term appeared in.                                                                   |
+    | `{frequency-average-rank}`        | The average of frequency data for the current term. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based term usage.         |
+    | `{frequency-average-occurrence}`  | The average of frequency data for the current term. Defaults to 0 occurrences when frequency data is not found, the lowest possible occurrence-based term usage.       |
+    | `{furigana}`                      | Term expressed as kanji with furigana displayed above it (e.g. <ruby>日本語<rt>にほんご</rt></ruby>).                                                                  |
+    | `{furigana-plain}`                | Term expressed as kanji with furigana displayed next to it in brackets (e.g. 日本語[にほんご]).                                                                        |
+    | `{glossary}`                      | List of definitions for the term (output format depends on whether running in _grouped_ mode).                                                                         |
+    | `{glossary-brief}`                | List of definitions for the term in a more compact format.                                                                                                             |
+    | `{glossary-no-dictionary}`        | List of definitions for the term, except the dictionary tag is omitted.                                                                                                |
+    | `{part-of-speech}`                | Part of speech information for the term.                                                                                                                               |
+    | `{phonetic-transcriptions}`       | List of phonetic transcriptions for the term.                                                                                                                          |
+    | `{pitch-accents}`                 | List of pitch accent downstep notations for the term.                                                                                                                  |
+    | `{pitch-accent-graphs}`           | List of pitch accent graphs for the term.                                                                                                                              |
+    | `{pitch-accent-positions}`        | List of accent downstep positions for the term as a number.                                                                                                            |
+    | `{pitch-accent-categories}`       | List of pitch accent categories for the term (e.g. heiban, kifuku, atamadaka, odaka, nakadaka).                                                                        |
+    | `{reading}`                       | Kana reading for the term (empty for terms where the expression is the reading).                                                                                       |
+    | `{screenshot}`                    | Screenshot of the web page taken at the time the term was added.                                                                                                       |
+    | `{search-query}`                  | The full search query shown on the search page.                                                                                                                        |
+    | `{selection-text}`                | The selected text on the search page or popup.                                                                                                                         |
+    | `{sentence}`                      | Sentence, quote, or phrase that the term appears in from the source content.                                                                                           |
+    | `{sentence-furigana}`             | Sentence, quote, or phrase that the term appears in from the source content, with furigana added.                                                                      |
+    | `{tags}`                          | Grammar and usage tags providing information about the term (unavailable in _grouped_ mode).                                                                           |
+    | `{url}`                           | Address of the web page in which the term appeared in.                                                                                                                 |
 
     #### Markers for Kanji Cards
 
-    | Marker                | Description                                                                                                              |
-    | --------------------- | ------------------------------------------------------------------------------------------------------------------------ |
-    | `{character}`         | Unicode glyph representing the current kanji.                                                                            |
-    | `{clipboard-image}`   | An image which is stored in the system clipboard, if present.                                                            |
-    | `{clipboard-text}`    | Text which is stored in the system clipboard, if present.                                                                |
-    | `{cloze-body}`        | Raw, inflected parent term as it appeared before being reduced to dictionary form by Yomitan.                            |
-    | `{cloze-prefix}`      | Fragment of the containing `{sentence}` starting at the beginning of `{sentence}` until the beginning of `{cloze-body}`. |
-    | `{cloze-suffix}`      | Fragment of the containing `{sentence}` starting at the end of `{cloze-body}` until the end of `{sentence}`.             |
-    | `{dictionary}`        | Name of the dictionary from which the card is being created.                                                             |
-    | `{document-title}`    | Title of the web page that the kanji appeared in.                                                                        |
-    | `{frequencies}`       | Frequency information for the kanji.                                                                                     |
-    | `{frequency-harmonic-rank}` | The harmonic mean of frequency data for the current kanji. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based kanji usage. |
+    | Marker                            | Description                                                                                                                                                              |
+    | --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+    | `{character}`                     | Unicode glyph representing the current kanji.                                                                                                                            |
+    | `{clipboard-image}`               | An image which is stored in the system clipboard, if present.                                                                                                            |
+    | `{clipboard-text}`                | Text which is stored in the system clipboard, if present.                                                                                                                |
+    | `{cloze-body}`                    | Raw, inflected parent term as it appeared before being reduced to dictionary form by Yomitan.                                                                            |
+    | `{cloze-prefix}`                  | Fragment of the containing `{sentence}` starting at the beginning of `{sentence}` until the beginning of `{cloze-body}`.                                                 |
+    | `{cloze-suffix}`                  | Fragment of the containing `{sentence}` starting at the end of `{cloze-body}` until the end of `{sentence}`.                                                             |
+    | `{dictionary}`                    | Name of the dictionary from which the card is being created.                                                                                                             |
+    | `{document-title}`                | Title of the web page that the kanji appeared in.                                                                                                                        |
+    | `{frequencies}`                   | Frequency information for the kanji.                                                                                                                                     |
+    | `{frequency-harmonic-rank}`       | The harmonic mean of frequency data for the current kanji. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based kanji usage.   |
     | `{frequency-harmonic-occurrence}` | The harmonic mean of frequency data for the current kanji. Defaults to 0 occurrences when frequency data is not found, the lowest possible occurrence-based kanji usage. |
-    | `{frequency-average-rank}` | The average of frequency data for the current kanji. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based kanji usage. |
-    | `{frequency-average-occurrence}` | The average of frequency data for the current kanji. Defaults to 0 occurrences when frequency data is not found, the lowest possible occurrence-based kanji usage. |
-    | `{glossary}`          | List of definitions for the kanji.                                                                                       |
-    | `{kunyomi}`           | Kunyomi (Japanese reading) for the kanji expressed as katakana.                                                          |
-    | `{onyomi}`            | Onyomi (Chinese reading) for the kanji expressed as hiragana.                                                            |
-    | `{screenshot}`        | Screenshot of the web page taken at the time the kanji was added.                                                        |
-    | `{search-query}`      | The full search query shown on the search page.                                                                          |
-    | `{selection-text}`    | The selected text on the search page or popup.                                                                           |
-    | `{sentence}`          | Sentence, quote, or phrase that the character appears in from the source content.                                        |
-    | `{sentence-furigana}` | Sentence, quote, or phrase that the character appears in from the source content, with furigana added.                   |
-    | `{stroke-count}`      | Number of strokes that the kanji character has.                                                                          |
-    | `{url}`               | Address of the web page in which the kanji appeared in.                                                                  |
+    | `{frequency-average-rank}`        | The average of frequency data for the current kanji. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based kanji usage.         |
+    | `{frequency-average-occurrence}`  | The average of frequency data for the current kanji. Defaults to 0 occurrences when frequency data is not found, the lowest possible occurrence-based kanji usage.       |
+    | `{glossary}`                      | List of definitions for the kanji.                                                                                                                                       |
+    | `{kunyomi}`                       | Kunyomi (Japanese reading) for the kanji expressed as katakana.                                                                                                          |
+    | `{onyomi}`                        | Onyomi (Chinese reading) for the kanji expressed as hiragana.                                                                                                            |
+    | `{screenshot}`                    | Screenshot of the web page taken at the time the kanji was added.                                                                                                        |
+    | `{search-query}`                  | The full search query shown on the search page.                                                                                                                          |
+    | `{selection-text}`                | The selected text on the search page or popup.                                                                                                                           |
+    | `{sentence}`                      | Sentence, quote, or phrase that the character appears in from the source content.                                                                                        |
+    | `{sentence-furigana}`             | Sentence, quote, or phrase that the character appears in from the source content, with furigana added.                                                                   |
+    | `{stroke-count}`                  | Number of strokes that the kanji character has.                                                                                                                          |
+    | `{url}`                           | Address of the web page in which the kanji appeared in.                                                                                                                  |
 
 When creating your model for Yomitan, _make sure that you pick a unique field to be first_; fields that will
 contain `{expression}` or `{character}` are ideal candidates for this. Anki does not allow duplicate flashcards to be
diff --git a/ext/data/schemas/options-schema.json b/ext/data/schemas/options-schema.json
index ea7caf0f..d86eedf7 100644
--- a/ext/data/schemas/options-schema.json
+++ b/ext/data/schemas/options-schema.json
@@ -81,6 +81,7 @@
                                 "type": "object",
                                 "required": [
                                     "enable",
+                                    "language",
                                     "resultOutputMode",
                                     "debugInfo",
                                     "maxResults",
@@ -126,6 +127,10 @@
                                         "type": "boolean",
                                         "default": true
                                     },
+                                    "language": {
+                                        "type": "string",
+                                        "default": "ja"
+                                    },
                                     "resultOutputMode": {
                                         "type": "string",
                                         "enum": ["group", "merge", "split"],
@@ -722,12 +727,6 @@
                             "translation": {
                                 "type": "object",
                                 "required": [
-                                    "convertHalfWidthCharacters",
-                                    "convertNumericCharacters",
-                                    "convertAlphabeticCharacters",
-                                    "convertHiraganaToKatakana",
-                                    "convertKatakanaToHiragana",
-                                    "collapseEmphaticSequences",
                                     "textReplacements",
                                     "searchResolution"
                                 ],
@@ -740,36 +739,6 @@
                                         ],
                                         "default": "letter"
                                     },
-                                    "convertHalfWidthCharacters": {
-                                        "type": "string",
-                                        "enum": ["false", "true", "variant"],
-                                        "default": "false"
-                                    },
-                                    "convertNumericCharacters": {
-                                        "type": "string",
-                                        "enum": ["false", "true", "variant"],
-                                        "default": "false"
-                                    },
-                                    "convertAlphabeticCharacters": {
-                                        "type": "string",
-                                        "enum": ["false", "true", "variant"],
-                                        "default": "false"
-                                    },
-                                    "convertHiraganaToKatakana": {
-                                        "type": "string",
-                                        "enum": ["false", "true", "variant"],
-                                        "default": "false"
-                                    },
-                                    "convertKatakanaToHiragana": {
-                                        "type": "string",
-                                        "enum": ["false", "true", "variant"],
-                                        "default": "variant"
-                                    },
-                                    "collapseEmphaticSequences": {
-                                        "type": "string",
-                                        "enum": ["false", "true", "full"],
-                                        "default": "false"
-                                    },
                                     "textReplacements": {
                                         "type": "object",
                                         "required": [
diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js
index e246f0bb..31191612 100644
--- a/ext/js/background/backend.js
+++ b/ext/js/background/backend.js
@@ -34,6 +34,7 @@ import {DictionaryDatabase} from '../dictionary/dictionary-database.js';
 import {Environment} from '../extension/environment.js';
 import {ObjectPropertyAccessor} from '../general/object-property-accessor.js';
 import {distributeFuriganaInflected, isCodePointJapanese, isStringPartiallyJapanese, convertKatakanaToHiragana as jpConvertKatakanaToHiragana} from '../language/ja/japanese.js';
+import {getLanguageSummaries} from '../language/languages.js';
 import {Translator} from '../language/translator.js';
 import {AudioDownloader} from '../media/audio-downloader.js';
 import {getFileExtensionFromAudioMediaType, getFileExtensionFromImageMediaType} from '../media/media-util.js';
@@ -183,7 +184,8 @@ export class Backend {
             ['textHasJapaneseCharacters',    this._onApiTextHasJapaneseCharacters.bind(this)],
             ['getTermFrequencies',           this._onApiGetTermFrequencies.bind(this)],
             ['findAnkiNotes',                this._onApiFindAnkiNotes.bind(this)],
-            ['openCrossFramePort',           this._onApiOpenCrossFramePort.bind(this)]
+            ['openCrossFramePort',           this._onApiOpenCrossFramePort.bind(this)],
+            ['getLanguageSummaries',         this._onApiGetLanguageSummaries.bind(this)]
         ]);
         /* eslint-enable @stylistic/no-multi-spaces */
 
@@ -906,6 +908,11 @@ export class Backend {
         return {targetTabId, targetFrameId};
     }
 
+    /** @type {import('api').ApiHandler<'getLanguageSummaries'>} */
+    _onApiGetLanguageSummaries() {
+        return getLanguageSummaries();
+    }
+
     // Command handlers
 
     /**
@@ -2361,15 +2368,9 @@ export class Backend {
         if (typeof deinflect !== 'boolean') { deinflect = true; }
         const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);
         const {
-            general: {mainDictionary, sortFrequencyDictionary, sortFrequencyDictionaryOrder},
+            general: {mainDictionary, sortFrequencyDictionary, sortFrequencyDictionaryOrder, language},
             scanning: {alphanumeric},
             translation: {
-                convertHalfWidthCharacters,
-                convertNumericCharacters,
-                convertAlphabeticCharacters,
-                convertHiraganaToKatakana,
-                convertKatakanaToHiragana,
-                collapseEmphaticSequences,
                 textReplacements: textReplacementsOptions,
                 searchResolution
             }
@@ -2394,16 +2395,11 @@ export class Backend {
             sortFrequencyDictionary,
             sortFrequencyDictionaryOrder,
             removeNonJapaneseCharacters: !alphanumeric,
-            convertHalfWidthCharacters,
-            convertNumericCharacters,
-            convertAlphabeticCharacters,
-            convertHiraganaToKatakana,
-            convertKatakanaToHiragana,
-            collapseEmphaticSequences,
             searchResolution,
             textReplacements,
             enabledDictionaryMap,
-            excludeDictionaryDefinitions
+            excludeDictionaryDefinitions,
+            language
         };
     }
 
diff --git a/ext/js/comm/api.js b/ext/js/comm/api.js
index b4fdbeb5..40b8e252 100644
--- a/ext/js/comm/api.js
+++ b/ext/js/comm/api.js
@@ -361,6 +361,13 @@ export class API {
         return this._invoke('openCrossFramePort', {targetTabId, targetFrameId});
     }
 
+    /**
+     * @returns {Promise<import('api').ApiReturn<'getLanguageSummaries'>>}
+     */
+    getLanguageSummaries() {
+        return this._invoke('getLanguageSummaries', void 0);
+    }
+
     // Utilities
 
     /**
diff --git a/ext/js/data/options-util.js b/ext/js/data/options-util.js
index 1644df2f..7952eafc 100644
--- a/ext/js/data/options-util.js
+++ b/ext/js/data/options-util.js
@@ -522,7 +522,8 @@ export class OptionsUtil {
             this._updateVersion22,
             this._updateVersion23,
             this._updateVersion24,
-            this._updateVersion25
+            this._updateVersion25,
+            this._updateVersion26
         ];
         /* eslint-enable @typescript-eslint/unbound-method */
         if (typeof targetVersion === 'number' && targetVersion < result.length) {
@@ -1155,6 +1156,31 @@ export class OptionsUtil {
     }
 
     /**
+     * - Added general.language.
+     * - Modularized text preprocessors.
+     * @type {import('options-util').UpdateFunction}
+     */
+    _updateVersion26(options) {
+        const textPreprocessors = [
+            'convertHalfWidthCharacters',
+            'convertNumericCharacters',
+            'convertAlphabeticCharacters',
+            'convertHiraganaToKatakana',
+            'convertKatakanaToHiragana',
+            'collapseEmphaticSequences'
+        ];
+
+        for (const {options: profileOptions} of options.profiles) {
+            profileOptions.general.language = 'ja';
+
+            for (const preprocessor of textPreprocessors) {
+                delete profileOptions.translation[preprocessor];
+            }
+        }
+    }
+
+
+    /**
      * @param {string} url
      * @returns {Promise<chrome.tabs.Tab>}
      */
diff --git a/ext/js/language/en/language-english.js b/ext/js/language/en/language-english.js
new file mode 100644
index 00000000..8268653f
--- /dev/null
+++ b/ext/js/language/en/language-english.js
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2024  Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {capitalizeFirstLetter, decapitalize} from '../text-preprocessors.js';
+
+/** @type {import('language-english').EnglishLanguageDescriptor} */
+export const descriptor = {
+    name: 'English',
+    iso: 'en',
+    exampleText: 'read',
+    textPreprocessors: {
+        capitalizeFirstLetter,
+        decapitalize
+    }
+};
diff --git a/ext/js/language/ja/language-japanese.js b/ext/js/language/ja/language-japanese.js
new file mode 100644
index 00000000..ced34bcd
--- /dev/null
+++ b/ext/js/language/ja/language-japanese.js
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2024  Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {basicTextPreprocessorOptions} from '../text-preprocessors.js';
+import {convertAlphabeticToKana} from './japanese-wanakana.js';
+import {collapseEmphaticSequences, convertHalfWidthKanaToFullWidth, convertHiraganaToKatakana, convertKatakanaToHiragana, convertNumericToFullWidth} from './japanese.js';
+
+/** @type {import('language-japanese').JapaneseLanguageDescriptor} */
+export const descriptor = {
+    name: 'Japanese',
+    iso: 'ja',
+    exampleText: '読め',
+    textPreprocessors: {
+        convertHalfWidthCharacters: {
+            name: 'Convert half width characters to full width',
+            description: 'ﾖﾐﾁｬﾝ → ヨミチャン',
+            options: basicTextPreprocessorOptions,
+            /** @type {import('language').TextPreprocessorFunction<boolean>} */
+            process: (str, setting, sourceMap) => (setting ? convertHalfWidthKanaToFullWidth(str, sourceMap) : str)
+        },
+        convertNumericCharacters: {
+            name: 'Convert numeric characters to full width',
+            description: '1234 → １２３４',
+            options: basicTextPreprocessorOptions,
+            /** @type {import('language').TextPreprocessorFunction<boolean>} */
+            process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str)
+        },
+        convertAlphabeticCharacters: {
+            name: 'Convert alphabetic characters to hiragana',
+            description: 'yomichan → よみちゃん',
+            options: basicTextPreprocessorOptions,
+            /** @type {import('language').TextPreprocessorFunction<boolean>} */
+            process: (str, setting, sourceMap) => (setting ? convertAlphabeticToKana(str, sourceMap) : str)
+        },
+        convertHiraganaToKatakana: {
+            name: 'Convert hiragana to katakana',
+            description: 'よみちゃん → ヨミチャン',
+            options: basicTextPreprocessorOptions,
+            /** @type {import('language').TextPreprocessorFunction<boolean>} */
+            process: (str, setting) => (setting ? convertHiraganaToKatakana(str) : str)
+        },
+        convertKatakanaToHiragana: {
+            name: 'Convert katakana to hiragana',
+            description: 'ヨミチャン → よみちゃん',
+            options: basicTextPreprocessorOptions,
+            /** @type {import('language').TextPreprocessorFunction<boolean>} */
+            process: (str, setting) => (setting ? convertKatakanaToHiragana(str) : str)
+        },
+        collapseEmphaticSequences: {
+            name: 'Collapse emphatic character sequences',
+            description: 'すっっごーーい → すっごーい / すごい',
+            options: [[false, false], [true, false], [true, true]],
+            /** @type {import('language').TextPreprocessorFunction<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */
+            process: (str, setting, sourceMap) => {
+                const [collapseEmphatic, collapseEmphaticFull] = setting;
+                if (collapseEmphatic) {
+                    str = collapseEmphaticSequences(str, collapseEmphaticFull, sourceMap);
+                }
+                return str;
+            }
+        }
+    }
+};
diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js
new file mode 100755
index 00000000..f51ca163
--- /dev/null
+++ b/ext/js/language/languages.js
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2024  Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {descriptor as descriptorEnglish} from './en/language-english.js';
+import {descriptor as descriptorJapanese} from './ja/language-japanese.js';
+
+const languageDescriptors = [
+    descriptorEnglish,
+    descriptorJapanese
+];
+
+/** @type {Map<string, typeof languageDescriptors[0]>} */
+const languageDescriptorMap = new Map();
+for (const languageDescriptor of languageDescriptors) {
+    languageDescriptorMap.set(languageDescriptor.iso, languageDescriptor);
+}
+
+/**
+ * @returns {import('language').LanguageSummary[]}
+ */
+export function getLanguageSummaries() {
+    const results = [];
+    for (const {name, iso, exampleText} of languageDescriptorMap.values()) {
+        results.push({name, iso, exampleText});
+    }
+    return results;
+}
+
+/**
+ * @returns {import('language').LanguageAndPreprocessors[]}
+ * @throws {Error}
+ */
+export function getAllLanguageTextPreprocessors() {
+    const results = [];
+    for (const {iso, textPreprocessors} of languageDescriptorMap.values()) {
+        /** @type {import('language').TextPreprocessorWithId<unknown>[]} */
+        const textPreprocessorsArray = [];
+        for (const [id, textPreprocessor] of Object.entries(textPreprocessors)) {
+            textPreprocessorsArray.push({
+                id,
+                textPreprocessor: /** @type {import('language').TextPreprocessor<unknown>} */ (textPreprocessor)
+            });
+        }
+        results.push({iso, textPreprocessors: textPreprocessorsArray});
+    }
+    return results;
+}
diff --git a/ext/js/language/text-preprocessors.js b/ext/js/language/text-preprocessors.js
new file mode 100755
index 00000000..12b3d1b6
--- /dev/null
+++ b/ext/js/language/text-preprocessors.js
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2024  Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/** @type {import('language').TextPreprocessorOptions<boolean>} */
+export const basicTextPreprocessorOptions = [false, true];
+
+/** @type {import('language').TextPreprocessor<boolean>} */
+export const decapitalize = {
+    name: 'Decapitalize text',
+    description: 'CAPITALIZED TEXT → capitalized text',
+    options: basicTextPreprocessorOptions,
+    process: (str, setting) => (setting ? str.toLowerCase() : str)
+};
+
+/** @type {import('language').TextPreprocessor<boolean>} */
+export const capitalizeFirstLetter = {
+    name: 'Capitalize first letter',
+    description: 'lowercase text → Lowercase text',
+    options: basicTextPreprocessorOptions,
+    process: (str, setting) => (setting ? str.charAt(0).toUpperCase() + str.slice(1) : str)
+};
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js
index b2342e8d..4f9304b5 100644
--- a/ext/js/language/translator.js
+++ b/ext/js/language/translator.js
@@ -18,9 +18,9 @@
 
 import {applyTextReplacement} from '../general/regex-util.js';
 import {TextSourceMap} from '../general/text-source-map.js';
-import {convertAlphabeticToKana} from './ja/japanese-wanakana.js';
-import {collapseEmphaticSequences, convertHalfWidthKanaToFullWidth, convertHiraganaToKatakana, convertKatakanaToHiragana, convertNumericToFullWidth, isCodePointJapanese} from './ja/japanese.js';
+import {isCodePointJapanese} from './ja/japanese.js';
 import {LanguageTransformer} from './language-transformer.js';
+import {getAllLanguageTextPreprocessors} from './languages.js';
 
 /**
  * Class which finds term and kanji dictionary entries for text.
@@ -41,6 +41,8 @@ export class Translator {
         this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
         /** @type {RegExp} */
         this._numberRegex = /[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/;
+        /** @type {Map<string, {textPreprocessors: import('language').TextPreprocessorWithId<unknown>[], optionSpace: import('translation-internal').PreprocessorOptionsSpace}>} */
+        this._textPreprocessors = new Map();
     }
 
     /**
@@ -49,6 +51,14 @@ export class Translator {
      */
     prepare(descriptor) {
         this._languageTransformer.addDescriptor(descriptor);
+        for (const {iso, textPreprocessors} of getAllLanguageTextPreprocessors()) {
+            /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */
+            const optionSpace = new Map();
+            for (const {id, textPreprocessor} of textPreprocessors) {
+                optionSpace.set(id, textPreprocessor.options);
+            }
+            this._textPreprocessors.set(iso, {textPreprocessors, optionSpace});
+        }
     }
 
     /**
@@ -415,51 +425,45 @@ export class Translator {
         }
     }
 
-    // Deinflections and text transformations
+    // Deinflections and text preprocessing
 
     /**
      * @param {string} text
      * @param {import('translation').FindTermsOptions} options
      * @returns {import('translation-internal').DatabaseDeinflection[]}
+     * @throws {Error}
      */
     _getAlgorithmDeinflections(text, options) {
-        /** @type {import('translation-internal').TextDeinflectionOptionsArrays} */
-        const textOptionVariantArray = [
-            this._getTextReplacementsVariants(options),
-            this._getTextOptionEntryVariants(options.convertHalfWidthCharacters),
-            this._getTextOptionEntryVariants(options.convertNumericCharacters),
-            this._getTextOptionEntryVariants(options.convertAlphabeticCharacters),
-            this._getTextOptionEntryVariants(options.convertHiraganaToKatakana),
-            this._getTextOptionEntryVariants(options.convertKatakanaToHiragana),
-            this._getCollapseEmphaticOptions(options)
-        ];
+        const {language} = options;
+        const info = this._textPreprocessors.get(language);
+        if (typeof info === 'undefined') { throw new Error(`Unsupported language: ${language}`); }
+        const {textPreprocessors, optionSpace: textPreprocessorOptionsSpace} = info;
+
+        /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */
+        const variantSpace = new Map();
+        variantSpace.set('textReplacements', this._getTextReplacementsVariants(options));
+        for (const [key, value] of textPreprocessorOptionsSpace) {
+            variantSpace.set(key, value);
+        }
 
         /** @type {import('translation-internal').DatabaseDeinflection[]} */
         const deinflections = [];
         const used = new Set();
-        for (const [textReplacements, halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of /** @type {Generator<import('translation-internal').TextDeinflectionOptions, void, unknown>} */ (this._getArrayVariants(textOptionVariantArray))) {
+
+        for (const arrayVariant of this._generateArrayVariants(variantSpace)) {
+            const textReplacements = /** @type {import('translation').FindTermsTextReplacement[] | null} */ (arrayVariant.get('textReplacements'));
+
             let text2 = text;
             const sourceMap = new TextSourceMap(text2);
+
             if (textReplacements !== null) {
                 text2 = this._applyTextReplacements(text2, sourceMap, textReplacements);
             }
-            if (halfWidth) {
-                text2 = convertHalfWidthKanaToFullWidth(text2, sourceMap);
-            }
-            if (numeric) {
-                text2 = convertNumericToFullWidth(text2);
-            }
-            if (alphabetic) {
-                text2 = convertAlphabeticToKana(text2, sourceMap);
-            }
-            if (katakana) {
-                text2 = convertHiraganaToKatakana(text2);
-            }
-            if (hiragana) {
-                text2 = convertKatakanaToHiragana(text2);
-            }
-            if (collapseEmphatic) {
-                text2 = collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap);
+
+            for (const preprocessor of textPreprocessors.values()) {
+                const {id, textPreprocessor} = preprocessor;
+                const setting = arrayVariant.get(id);
+                text2 = textPreprocessor.process(text2, setting, sourceMap);
             }
 
             for (
@@ -527,36 +531,6 @@ export class Translator {
     }
 
     /**
-     * @param {import('translation').FindTermsVariantMode} value
-     * @returns {boolean[]}
-     */
-    _getTextOptionEntryVariants(value) {
-        switch (value) {
-            case 'true': return [true];
-            case 'variant': return [false, true];
-            default: return [false];
-        }
-    }
-
-    /**
-     * @param {import('translation').FindTermsOptions} options
-     * @returns {[collapseEmphatic: boolean, collapseEmphaticFull: boolean][]}
-     */
-    _getCollapseEmphaticOptions(options) {
-        /** @type {[collapseEmphatic: boolean, collapseEmphaticFull: boolean][]} */
-        const collapseEmphaticOptions = [[false, false]];
-        switch (options.collapseEmphaticSequences) {
-            case 'true':
-                collapseEmphaticOptions.push([true, false]);
-                break;
-            case 'full':
-                collapseEmphaticOptions.push([true, false], [true, true]);
-                break;
-        }
-        return collapseEmphaticOptions;
-    }
-
-    /**
      * @param {import('translation').FindTermsOptions} options
      * @returns {(import('translation').FindTermsTextReplacement[] | null)[]}
      */
@@ -1343,26 +1317,32 @@ export class Translator {
     }
 
     /**
-     * @param {[...args: unknown[][]]} arrayVariants
-     * @yields {[...args: unknown[]]}
-     * @returns {Generator<unknown[], void, unknown>}
+     * @param {Map<string, unknown[]>} arrayVariants
+     * @yields {Map<string, unknown>}
+     * @returns {Generator<Map<string, unknown>, void, void>}
      */
-    *_getArrayVariants(arrayVariants) {
-        const ii = arrayVariants.length;
-
-        let total = 1;
-        for (let i = 0; i < ii; ++i) {
-            total *= arrayVariants[i].length;
+    *_generateArrayVariants(arrayVariants) {
+        const variantKeys = [...arrayVariants.keys()];
+        const entryVariantLengths = [];
+        for (const key of variantKeys) {
+            const entryVariants = /** @type {unknown[]} */ (arrayVariants.get(key));
+            entryVariantLengths.push(entryVariants.length);
         }
+        const totalVariants = entryVariantLengths.reduce((acc, length) => acc * length, 1);
+
+        for (let variantIndex = 0; variantIndex < totalVariants; ++variantIndex) {
+            /** @type {Map<string, unknown>} */
+            const variant = new Map();
+            let remainingIndex = variantIndex;
 
-        for (let a = 0; a < total; ++a) {
-            const variant = [];
-            let index = a;
-            for (let i = 0; i < ii; ++i) {
-                const entryVariants = arrayVariants[i];
-                variant.push(entryVariants[index % entryVariants.length]);
-                index = Math.floor(index / entryVariants.length);
+            for (let keyIndex = 0; keyIndex < variantKeys.length; ++keyIndex) {
+                const key = variantKeys[keyIndex];
+                const entryVariants = /** @type {unknown[]} */ (arrayVariants.get(key));
+                const entryIndex = remainingIndex % entryVariants.length;
+                variant.set(key, entryVariants[entryIndex]);
+                remainingIndex = Math.floor(remainingIndex / entryVariants.length);
             }
+
             yield variant;
         }
     }
diff --git a/ext/js/pages/settings/languages-controller.js b/ext/js/pages/settings/languages-controller.js
new file mode 100755
index 00000000..78f036df
--- /dev/null
+++ b/ext/js/pages/settings/languages-controller.js
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2023-2024  Yomitan Authors
+ * Copyright (C) 2021-2022  Yomichan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {querySelectorNotNull} from '../../dom/query-selector.js';
+
+export class LanguagesController {
+    /**
+     * @param {import('./settings-controller.js').SettingsController} settingsController
+     */
+    constructor(settingsController) {
+        /** @type {import('./settings-controller.js').SettingsController} */
+        this._settingsController = settingsController;
+    }
+
+    /** */
+    async prepare() {
+        const languages = await this._settingsController.application.api.getLanguageSummaries();
+        languages.sort((a, b) => a.iso.localeCompare(b.iso, 'en'));
+        this._fillSelect(languages);
+    }
+
+    /**
+     * @param {import('language').LanguageSummary[]} languages
+     */
+    _fillSelect(languages) {
+        const selectElement = querySelectorNotNull(document, '#language-select');
+        for (const {iso, name} of languages) {
+            const option = document.createElement('option');
+            option.value = iso;
+            option.text = `(${iso}) ${name}`;
+            selectElement.appendChild(option);
+        }
+    }
+}
diff --git a/ext/js/pages/settings/settings-main.js b/ext/js/pages/settings/settings-main.js
index dc4b36c9..0b115246 100644
--- a/ext/js/pages/settings/settings-main.js
+++ b/ext/js/pages/settings/settings-main.js
@@ -30,6 +30,7 @@ import {DictionaryImportController} from './dictionary-import-controller.js';
 import {ExtensionKeyboardShortcutController} from './extension-keyboard-shortcuts-controller.js';
 import {GenericSettingController} from './generic-setting-controller.js';
 import {KeyboardShortcutController} from './keyboard-shortcuts-controller.js';
+import {LanguagesController} from './languages-controller.js';
 import {MecabController} from './mecab-controller.js';
 import {ModalController} from './modal-controller.js';
 import {NestedPopupsController} from './nested-popups-controller.js';
@@ -137,6 +138,9 @@ await Application.main(async (application) => {
     const secondarySearchDictionaryController = new SecondarySearchDictionaryController(settingsController);
     secondarySearchDictionaryController.prepare();
 
+    const languagesController = new LanguagesController(settingsController);
+    languagesController.prepare();
+
     const translationTextReplacementsController = new TranslationTextReplacementsController(settingsController);
     translationTextReplacementsController.prepare();
 
diff --git a/ext/settings.html b/ext/settings.html
index 2af2a666..8dc70cf3 100644
--- a/ext/settings.html
+++ b/ext/settings.html
@@ -212,6 +212,19 @@
                 <label class="toggle"><input type="checkbox" data-setting="general.enable"><span class="toggle-body"><span class="toggle-track"></span><span class="toggle-knob"></span></span></label>
             </div>
         </div></div>
+        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable">
+            <div class="settings-item-left">
+                <div class="settings-item-label">
+                    Language
+                </div>
+                <div class="settings-item-description">
+                    Language of the text that is being looked up.
+                </div>
+            </div>
+            <div class="settings-item-right">
+                <select id="language-select" data-setting="general.language"></select>
+            </div>
+        </div></div>
         <div class="settings-item"><div class="settings-item-inner">
             <div class="settings-item-left">
                 <div class="settings-item-label">Show the <a href="/welcome.html" target="_blank" rel="noopener">welcome guide</a> on browser startup</div>
@@ -1531,84 +1544,6 @@
                 <button type="button" class="icon-button"><span class="icon-button-inner"><span class="icon" data-icon="material-right-arrow"></span></span></button>
             </div>
         </div></div>
-        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable">
-            <div class="settings-item-left">
-                <div class="settings-item-label">Convert half width characters to full width</div>
-                <div class="settings-item-description" lang="ja">ﾖﾐﾀﾝ&#x3000;&rarr;&#x3000;ヨミタン</div>
-            </div>
-            <div class="settings-item-right">
-                <select data-setting="translation.convertHalfWidthCharacters">
-                    <option value="false">Disabled</option>
-                    <option value="true">Enabled</option>
-                    <option value="variant">Use both variants</option>
-                </select>
-            </div>
-        </div></div>
-        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable">
-            <div class="settings-item-left">
-                <div class="settings-item-label">Convert numeric characters to full width</div>
-                <div class="settings-item-description" lang="ja">1234&#x3000;&rarr;&#x3000;１２３４</div>
-            </div>
-            <div class="settings-item-right">
-                <select data-setting="translation.convertNumericCharacters">
-                    <option value="false">Disabled</option>
-                    <option value="true">Enabled</option>
-                    <option value="variant">Use both variants</option>
-                </select>
-            </div>
-        </div></div>
-        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable">
-            <div class="settings-item-left">
-                <div class="settings-item-label">Convert alphabetic characters to hiragana</div>
-                <div class="settings-item-description" lang="ja">yomitan&#x3000;&rarr;&#x3000;よみたん</div>
-            </div>
-            <div class="settings-item-right">
-                <select data-setting="translation.convertAlphabeticCharacters">
-                    <option value="false">Disabled</option>
-                    <option value="true">Enabled</option>
-                    <option value="variant">Use both variants</option>
-                </select>
-            </div>
-        </div></div>
-        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable">
-            <div class="settings-item-left">
-                <div class="settings-item-label">Convert hiragana to katakana</div>
-                <div class="settings-item-description" lang="ja">よみたん&#x3000;&rarr;&#x3000;ヨミタン</div>
-            </div>
-            <div class="settings-item-right">
-                <select data-setting="translation.convertHiraganaToKatakana">
-                    <option value="false">Disabled</option>
-                    <option value="true">Enabled</option>
-                    <option value="variant">Use both variants</option>
-                </select>
-            </div>
-        </div></div>
-        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable">
-            <div class="settings-item-left">
-                <div class="settings-item-label">Convert katakana to hiragana</div>
-                <div class="settings-item-description" lang="ja">ヨミタン&#x3000;&rarr;&#x3000;よみたん</div>
-            </div>
-            <div class="settings-item-right">
-                <select data-setting="translation.convertKatakanaToHiragana">
-                    <option value="false">Disabled</option>
-                    <option value="true">Enabled</option>
-                    <option value="variant">Use both variants</option>
-                </select>
-            </div>
-        </div></div>
-        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable">
-            <div class="settings-item-left">
-                <div class="settings-item-label">Collapse emphatic character sequences</div>
-                <div class="settings-item-description" lang="ja">すっっごーーい&#x3000;&rarr;&#x3000;すっごーい / すごい</div>
-            </div>
-            <div class="settings-item-right">
-                <select data-setting="translation.collapseEmphaticSequences">
-                    <option value="false">Disabled</option>
-                    <option value="true">Collapse into single character</option>
-                    <option value="full">Remove all characters</option>
-                </select>
-            </div>
-        </div></div>
     </div>
 
     <!-- Anki -->
diff --git a/test/data/anki-note-builder-test-results.json b/test/data/anki-note-builder-test-results.json
index a2dd0923..1d84712d 100644
--- a/test/data/anki-note-builder-test-results.json
+++ b/test/data/anki-note-builder-test-results.json
@@ -869,6 +869,43 @@
         "sentence-furigana": "cloze-prefixダースcloze-suffix",
         "tags": "abbr, n",
         "url": "<a href=\"url:\">url:</a>"
+      },
+      {
+        "audio": "",
+        "clipboard-image": "",
+        "clipboard-text": "",
+        "cloze-body": "ダ",
+        "cloze-body-kana": "ダ",
+        "cloze-prefix": "cloze-prefix",
+        "cloze-suffix": "cloze-suffix",
+        "conjugation": "",
+        "dictionary": "Test Dictionary 2",
+        "document-title": "title",
+        "expression": "打",
+        "frequencies": "<ul style=\"text-align: left;\"><li>Test Dictionary 2: 1</li><li>Test Dictionary 2: four</li><li>Test Dictionary 2: five (5)</li><li>Test Dictionary 2: 8</li><li>Test Dictionary 2: fourteen</li><li>Test Dictionary 2: twenty (20)</li><li>Test Dictionary 2: 26</li></ul>",
+        "frequency-harmonic-rank": "1",
+        "frequency-harmonic-occurrence": "1",
+        "frequency-average-rank": "1",
+        "frequency-average-occurrence": "1",
+        "furigana": "<ruby>打<rt>だ</rt></ruby>",
+        "furigana-plain": "打[だ]",
+        "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> <ul><li>da definition 1</li><li>da definition 2</li></ul></div>",
+        "glossary-brief": "<div style=\"text-align: left;\"><ul><li>da definition 1</li><li>da definition 2</li></ul></div>",
+        "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> <ul><li>da definition 1</li><li>da definition 2</li></ul></div>",
+        "part-of-speech": "Noun",
+        "pitch-accents": "No pitch accent data",
+        "pitch-accent-graphs": "No pitch accent data",
+        "pitch-accent-positions": "No pitch accent data",
+        "pitch-accent-categories": "",
+        "phonetic-transcriptions": "",
+        "reading": "だ",
+        "screenshot": "",
+        "search-query": "fullQuery",
+        "selection-text": "",
+        "sentence": "cloze-prefixダcloze-suffix",
+        "sentence-furigana": "cloze-prefixダcloze-suffix",
+        "tags": "n",
+        "url": "<a href=\"url:\">url:</a>"
       }
     ]
   },
@@ -3437,7 +3474,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertNumericCharacters",
+    "name": "Test text preprocessors - convertNumericCharacters",
     "results": [
       {
         "audio": "",
@@ -3479,7 +3516,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertAlphabeticCharacters",
+    "name": "Test text preprocessors - convertAlphabeticCharacters",
     "results": [
       {
         "audio": "",
@@ -3558,7 +3595,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertKatakanaToHiragana",
+    "name": "Test text preprocessors - convertKatakanaToHiragana",
     "results": [
       {
         "audio": "",
@@ -3637,7 +3674,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertHiraganaToKatakana",
+    "name": "Test text preprocessors - convertHiraganaToKatakana",
     "results": [
       {
         "audio": "",
@@ -3679,7 +3716,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana",
+    "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",
     "results": [
       {
         "audio": "",
@@ -3758,7 +3795,7 @@
     ]
   },
   {
-    "name": "Test text transformations - collapseEmphaticSequences",
+    "name": "Test text preprocessors - collapseEmphaticSequences",
     "results": [
       {
         "audio": "",
@@ -3768,7 +3805,7 @@
         "cloze-body-kana": "すっっごーーい",
         "cloze-prefix": "cloze-prefix",
         "cloze-suffix": "cloze-suffix",
-        "conjugation": "",
+        "conjugation": "<ul><li>adv « kansai-ben</li></ul>",
         "dictionary": "Test Dictionary 2",
         "document-title": "title",
         "expression": "凄い",
@@ -3798,5 +3835,89 @@
         "url": "<a href=\"url:\">url:</a>"
       }
     ]
+  },
+  {
+    "name": "Test text preprocessors - capitalizeFirstLetter",
+    "results": [
+      {
+        "audio": "",
+        "clipboard-image": "",
+        "clipboard-text": "",
+        "cloze-body": "english",
+        "cloze-body-kana": "english",
+        "cloze-prefix": "cloze-prefix",
+        "cloze-suffix": "cloze-suffix",
+        "conjugation": "",
+        "dictionary": "Test Dictionary 2",
+        "document-title": "title",
+        "expression": "English",
+        "frequencies": "",
+        "frequency-harmonic-rank": "9999999",
+        "frequency-harmonic-occurrence": "0",
+        "frequency-average-rank": "9999999",
+        "frequency-average-occurrence": "0",
+        "furigana": "English",
+        "furigana-plain": "English",
+        "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>",
+        "glossary-brief": "<div style=\"text-align: left;\">English definition</div>",
+        "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>",
+        "part-of-speech": "Noun",
+        "pitch-accents": "No pitch accent data",
+        "pitch-accent-graphs": "No pitch accent data",
+        "pitch-accent-positions": "No pitch accent data",
+        "pitch-accent-categories": "",
+        "phonetic-transcriptions": "",
+        "reading": "English",
+        "screenshot": "",
+        "search-query": "fullQuery",
+        "selection-text": "",
+        "sentence": "cloze-prefixenglishcloze-suffix",
+        "sentence-furigana": "cloze-prefixenglishcloze-suffix",
+        "tags": "n",
+        "url": "<a href=\"url:\">url:</a>"
+      }
+    ]
+  },
+  {
+    "name": "Test text preprocessors - decapitalize",
+    "results": [
+      {
+        "audio": "",
+        "clipboard-image": "",
+        "clipboard-text": "",
+        "cloze-body": "LANGUAGE",
+        "cloze-body-kana": "LANGUAGE",
+        "cloze-prefix": "cloze-prefix",
+        "cloze-suffix": "cloze-suffix",
+        "conjugation": "",
+        "dictionary": "Test Dictionary 2",
+        "document-title": "title",
+        "expression": "language",
+        "frequencies": "",
+        "frequency-harmonic-rank": "9999999",
+        "frequency-harmonic-occurrence": "0",
+        "frequency-average-rank": "9999999",
+        "frequency-average-occurrence": "0",
+        "furigana": "language",
+        "furigana-plain": "language",
+        "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> language definition</div>",
+        "glossary-brief": "<div style=\"text-align: left;\">language definition</div>",
+        "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> language definition</div>",
+        "part-of-speech": "Noun",
+        "pitch-accents": "No pitch accent data",
+        "pitch-accent-graphs": "No pitch accent data",
+        "pitch-accent-positions": "No pitch accent data",
+        "pitch-accent-categories": "",
+        "phonetic-transcriptions": "",
+        "reading": "language",
+        "screenshot": "",
+        "search-query": "fullQuery",
+        "selection-text": "",
+        "sentence": "cloze-prefixLANGUAGEcloze-suffix",
+        "sentence-furigana": "cloze-prefixLANGUAGEcloze-suffix",
+        "tags": "n",
+        "url": "<a href=\"url:\">url:</a>"
+      }
+    ]
   }
 ]
diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json
index 6d0f74ce..5747f59e 100644
--- a/test/data/database-test-cases.json
+++ b/test/data/database-test-cases.json
@@ -27,7 +27,7 @@
         "ipa": 1
       },
       "terms": {
-        "total": 27
+        "total": 29
       }
     }
   },
@@ -36,7 +36,7 @@
       {
         "kanji": 2,
         "kanjiMeta": 6,
-        "terms": 27,
+        "terms": 29,
         "termMeta": 39,
         "tagMeta": 15,
         "media": 6
@@ -45,7 +45,7 @@
     "total": {
       "kanji": 2,
       "kanjiMeta": 6,
-      "terms": 27,
+      "terms": 29,
       "termMeta": 39,
       "tagMeta": 15,
       "media": 6
diff --git a/test/data/dictionaries/valid-dictionary1/term_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_bank_1.json
index 03a9900c..e7fb015c 100644
--- a/test/data/dictionaries/valid-dictionary1/term_bank_1.json
+++ b/test/data/dictionaries/valid-dictionary1/term_bank_1.json
@@ -341,5 +341,7 @@
     ["のたまう", "のたまう", "v5", "v5", 1, ["notamau definition"], 15, ""],
     ["のたもうた", "のたもうた", "", "", 1, [["のたまう", ["past"]]], 16, ""],
     ["３９", "さんきゅう", "", "", 1, ["sankyuu definition"], 17, ""],
-    ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""]
+    ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""],
+    ["English", "", "n", "n", 1, ["English definition"], 19, ""],
+    ["language", "", "n", "n", 1, ["language definition"], 20, ""]
 ]
diff --git a/test/data/translator-test-inputs.json b/test/data/translator-test-inputs.json
index 03ec938a..c9047716 100644
--- a/test/data/translator-test-inputs.json
+++ b/test/data/translator-test-inputs.json
@@ -21,12 +21,7 @@
             "sortFrequencyDictionary": null,
             "sortFrequencyDictionaryOrder": "descending",
             "removeNonJapaneseCharacters": true,
-            "convertHalfWidthCharacters": "false",
-            "convertNumericCharacters": "false",
-            "convertAlphabeticCharacters": "false",
-            "convertHiraganaToKatakana": "false",
-            "convertKatakanaToHiragana": "false",
-            "collapseEmphaticSequences": "false",
+            "language": "ja",
             "textReplacements": [
                 null
             ],
@@ -361,7 +356,7 @@
             "options": "default"
         },
         {
-            "name": "Test text transformations - convertNumericCharacters",
+            "name": "Test text preprocessors - convertNumericCharacters",
             "func": "findTerms",
             "mode": "split",
             "text": "39",
@@ -369,13 +364,12 @@
                 "default",
                 {
                     "type": "terms",
-                    "convertNumericCharacters": "true",
                     "removeNonJapaneseCharacters": false
                 }
             ]
         },
         {
-            "name": "Test text transformations - convertAlphabeticCharacters",
+            "name": "Test text preprocessors - convertAlphabeticCharacters",
             "func": "findTerms",
             "mode": "split",
             "text": "utsu",
@@ -383,61 +377,83 @@
                 "default",
                 {
                     "type": "terms",
-                    "convertAlphabeticCharacters": "true",
                     "removeNonJapaneseCharacters": false
                 }
             ]
         },
         {
-            "name": "Test text transformations - convertKatakanaToHiragana",
+            "name": "Test text preprocessors - convertKatakanaToHiragana",
             "func": "findTerms",
             "mode": "split",
             "text": "ウツ",
             "options": [
                 "default",
                 {
-                    "type": "terms",
-                    "convertKatakanaToHiragana": "true"
+                    "type": "terms"
                 }
             ]
         },
         {
-            "name": "Test text transformations - convertHiraganaToKatakana",
+            "name": "Test text preprocessors - convertHiraganaToKatakana",
             "func": "findTerms",
             "mode": "split",
             "text": "てきすと",
             "options": [
                 "default",
                 {
-                    "type": "terms",
-                    "convertHiraganaToKatakana": "true"
+                    "type": "terms"
                 }
             ]
         },
         {
-            "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana",
+            "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",
             "func": "findTerms",
             "mode": "split",
             "text": "ｳﾂ",
             "options": [
                 "default",
                 {
-                    "type": "terms",
-                    "convertHalfWidthCharacters": "true",
-                    "convertKatakanaToHiragana": "true"
+                    "type": "terms"
                 }
             ]
         },
         {
-            "name": "Test text transformations - collapseEmphaticSequences",
+            "name": "Test text preprocessors - collapseEmphaticSequences",
             "func": "findTerms",
             "mode": "split",
             "text": "すっっごーーい",
             "options": [
                 "default",
                 {
+                    "type": "terms"
+                }
+            ]
+        },
+        {
+            "name": "Test text preprocessors - capitalizeFirstLetter",
+            "func": "findTerms",
+            "mode": "split",
+            "text": "english",
+            "options": [
+                "default",
+                {
                     "type": "terms",
-                    "collapseEmphaticSequences": "full"
+                    "language": "en",
+                    "removeNonJapaneseCharacters": false
+                }
+            ]
+        },
+        {
+            "name": "Test text preprocessors - decapitalize",
+            "func": "findTerms",
+            "mode": "split",
+            "text": "LANGUAGE",
+            "options": [
+                "default",
+                {
+                    "type": "terms",
+                    "language": "en",
+                    "removeNonJapaneseCharacters": false
                 }
             ]
         }
diff --git a/test/data/translator-test-results-note-data1.json b/test/data/translator-test-results-note-data1.json
index f84f28a4..f580ac53 100644
--- a/test/data/translator-test-results-note-data1.json
+++ b/test/data/translator-test-results-note-data1.json
@@ -7276,6 +7276,320 @@
           }
         },
         "media": {}
+      },
+      {
+        "marker": "{marker}",
+        "definition": {
+          "type": "term",
+          "id": 1,
+          "source": "だ",
+          "rawSource": "ダ",
+          "sourceTerm": "だ",
+          "inflectionRuleChainCandidates": [
+            {
+              "source": "algorithm",
+              "inflectionRules": []
+            }
+          ],
+          "score": 1,
+          "isPrimary": true,
+          "sequence": 1,
+          "dictionary": "Test Dictionary 2",
+          "dictionaryOrder": {
+            "index": 0,
+            "priority": 0
+          },
+          "dictionaryNames": [
+            "Test Dictionary 2"
+          ],
+          "expression": "打",
+          "reading": "だ",
+          "expressions": [
+            {
+              "sourceTerm": "だ",
+              "expression": "打",
+              "reading": "だ",
+              "termTags": [
+                {
+                  "name": "E1",
+                  "category": "default",
+                  "notes": "example tag 1",
+                  "order": 0,
+                  "score": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "redundant": false
+                }
+              ],
+              "frequencies": [
+                {
+                  "index": 0,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": false,
+                  "frequency": 1
+                },
+                {
+                  "index": 1,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": false,
+                  "frequency": "four"
+                },
+                {
+                  "index": 2,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": false,
+                  "frequency": "five (5)"
+                },
+                {
+                  "index": 3,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": true,
+                  "frequency": 8
+                },
+                {
+                  "index": 4,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": true,
+                  "frequency": "fourteen"
+                },
+                {
+                  "index": 5,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": true,
+                  "frequency": "twenty (20)"
+                },
+                {
+                  "index": 6,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": true,
+                  "frequency": 26
+                }
+              ],
+              "pitches": [],
+              "furiganaSegments": [
+                {
+                  "text": "打",
+                  "furigana": "だ"
+                }
+              ],
+              "termFrequency": "normal",
+              "wordClasses": [
+                "n"
+              ]
+            }
+          ],
+          "glossary": [
+            "da definition 1",
+            "da definition 2"
+          ],
+          "definitionTags": [
+            {
+              "name": "n",
+              "category": "partOfSpeech",
+              "notes": "noun",
+              "order": 0,
+              "score": 0,
+              "dictionary": "Test Dictionary 2",
+              "redundant": false
+            }
+          ],
+          "termTags": [
+            {
+              "name": "E1",
+              "category": "default",
+              "notes": "example tag 1",
+              "order": 0,
+              "score": 0,
+              "dictionary": "Test Dictionary 2",
+              "redundant": false
+            }
+          ],
+          "frequencies": [
+            {
+              "index": 0,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": false,
+              "frequency": 1
+            },
+            {
+              "index": 1,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": false,
+              "frequency": "four"
+            },
+            {
+              "index": 2,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": false,
+              "frequency": "five (5)"
+            },
+            {
+              "index": 3,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": true,
+              "frequency": 8
+            },
+            {
+              "index": 4,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": true,
+              "frequency": "fourteen"
+            },
+            {
+              "index": 5,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": true,
+              "frequency": "twenty (20)"
+            },
+            {
+              "index": 6,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": true,
+              "frequency": 26
+            }
+          ],
+          "frequencyHarmonic": 1,
+          "frequencyAverage": 1,
+          "pitches": [],
+          "phoneticTranscriptions": [],
+          "sourceTermExactMatchCount": 0,
+          "url": "url:",
+          "cloze": {
+            "sentence": "",
+            "prefix": "",
+            "body": "",
+            "bodyKana": "",
+            "suffix": ""
+          },
+          "furiganaSegments": [
+            {
+              "text": "打",
+              "furigana": "だ"
+            }
+          ]
+        },
+        "glossaryLayoutMode": "default",
+        "compactTags": false,
+        "group": false,
+        "merge": false,
+        "modeTermKanji": false,
+        "modeTermKana": false,
+        "modeKanji": false,
+        "compactGlossaries": false,
+        "uniqueExpressions": [
+          "打"
+        ],
+        "uniqueReadings": [
+          "だ"
+        ],
+        "pitches": [],
+        "pitchCount": 0,
+        "phoneticTranscriptions": [],
+        "context": {
+          "query": "query",
+          "fullQuery": "fullQuery",
+          "document": {
+            "title": "title"
+          }
+        },
+        "media": {}
       }
     ]
   },
@@ -30117,7 +30431,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertNumericCharacters",
+    "name": "Test text preprocessors - convertNumericCharacters",
     "noteDataList": [
       {
         "marker": "{marker}",
@@ -30219,7 +30533,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertAlphabeticCharacters",
+    "name": "Test text preprocessors - convertAlphabeticCharacters",
     "noteDataList": [
       {
         "marker": "{marker}",
@@ -30852,7 +31166,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertKatakanaToHiragana",
+    "name": "Test text preprocessors - convertKatakanaToHiragana",
     "noteDataList": [
       {
         "marker": "{marker}",
@@ -31485,7 +31799,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertHiraganaToKatakana",
+    "name": "Test text preprocessors - convertHiraganaToKatakana",
     "noteDataList": [
       {
         "marker": "{marker}",
@@ -31638,7 +31952,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana",
+    "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",
     "noteDataList": [
       {
         "marker": "{marker}",
@@ -32271,7 +32585,7 @@
     ]
   },
   {
-    "name": "Test text transformations - collapseEmphaticSequences",
+    "name": "Test text preprocessors - collapseEmphaticSequences",
     "noteDataList": [
       {
         "marker": "{marker}",
@@ -32285,6 +32599,13 @@
             {
               "source": "algorithm",
               "inflectionRules": []
+            },
+            {
+              "source": "algorithm",
+              "inflectionRules": [
+                "adv",
+                "kansai-ben"
+              ]
             }
           ],
           "score": 1,
@@ -32391,5 +32712,233 @@
         "media": {}
       }
     ]
+  },
+  {
+    "name": "Test text preprocessors - capitalizeFirstLetter",
+    "noteDataList": [
+      {
+        "marker": "{marker}",
+        "definition": {
+          "type": "term",
+          "id": 27,
+          "source": "English",
+          "rawSource": "english",
+          "sourceTerm": "English",
+          "inflectionRuleChainCandidates": [
+            {
+              "source": "algorithm",
+              "inflectionRules": []
+            }
+          ],
+          "score": 1,
+          "isPrimary": true,
+          "sequence": 19,
+          "dictionary": "Test Dictionary 2",
+          "dictionaryOrder": {
+            "index": 0,
+            "priority": 0
+          },
+          "dictionaryNames": [
+            "Test Dictionary 2"
+          ],
+          "expression": "English",
+          "reading": "English",
+          "expressions": [
+            {
+              "sourceTerm": "English",
+              "expression": "English",
+              "reading": "English",
+              "termTags": [],
+              "frequencies": [],
+              "pitches": [],
+              "furiganaSegments": [
+                {
+                  "text": "English",
+                  "furigana": ""
+                }
+              ],
+              "termFrequency": "normal",
+              "wordClasses": [
+                "n"
+              ]
+            }
+          ],
+          "glossary": [
+            "English definition"
+          ],
+          "definitionTags": [
+            {
+              "name": "n",
+              "category": "partOfSpeech",
+              "notes": "noun",
+              "order": 0,
+              "score": 0,
+              "dictionary": "Test Dictionary 2",
+              "redundant": false
+            }
+          ],
+          "termTags": [],
+          "frequencies": [],
+          "frequencyHarmonic": -1,
+          "frequencyAverage": -1,
+          "pitches": [],
+          "phoneticTranscriptions": [],
+          "sourceTermExactMatchCount": 1,
+          "url": "url:",
+          "cloze": {
+            "sentence": "",
+            "prefix": "",
+            "body": "",
+            "bodyKana": "",
+            "suffix": ""
+          },
+          "furiganaSegments": [
+            {
+              "text": "English",
+              "furigana": ""
+            }
+          ]
+        },
+        "glossaryLayoutMode": "default",
+        "compactTags": false,
+        "group": false,
+        "merge": false,
+        "modeTermKanji": false,
+        "modeTermKana": false,
+        "modeKanji": false,
+        "compactGlossaries": false,
+        "uniqueExpressions": [
+          "English"
+        ],
+        "uniqueReadings": [
+          "English"
+        ],
+        "pitches": [],
+        "pitchCount": 0,
+        "phoneticTranscriptions": [],
+        "context": {
+          "query": "query",
+          "fullQuery": "fullQuery",
+          "document": {
+            "title": "title"
+          }
+        },
+        "media": {}
+      }
+    ]
+  },
+  {
+    "name": "Test text preprocessors - decapitalize",
+    "noteDataList": [
+      {
+        "marker": "{marker}",
+        "definition": {
+          "type": "term",
+          "id": 28,
+          "source": "language",
+          "rawSource": "LANGUAGE",
+          "sourceTerm": "language",
+          "inflectionRuleChainCandidates": [
+            {
+              "source": "algorithm",
+              "inflectionRules": []
+            }
+          ],
+          "score": 1,
+          "isPrimary": true,
+          "sequence": 20,
+          "dictionary": "Test Dictionary 2",
+          "dictionaryOrder": {
+            "index": 0,
+            "priority": 0
+          },
+          "dictionaryNames": [
+            "Test Dictionary 2"
+          ],
+          "expression": "language",
+          "reading": "language",
+          "expressions": [
+            {
+              "sourceTerm": "language",
+              "expression": "language",
+              "reading": "language",
+              "termTags": [],
+              "frequencies": [],
+              "pitches": [],
+              "furiganaSegments": [
+                {
+                  "text": "language",
+                  "furigana": ""
+                }
+              ],
+              "termFrequency": "normal",
+              "wordClasses": [
+                "n"
+              ]
+            }
+          ],
+          "glossary": [
+            "language definition"
+          ],
+          "definitionTags": [
+            {
+              "name": "n",
+              "category": "partOfSpeech",
+              "notes": "noun",
+              "order": 0,
+              "score": 0,
+              "dictionary": "Test Dictionary 2",
+              "redundant": false
+            }
+          ],
+          "termTags": [],
+          "frequencies": [],
+          "frequencyHarmonic": -1,
+          "frequencyAverage": -1,
+          "pitches": [],
+          "phoneticTranscriptions": [],
+          "sourceTermExactMatchCount": 1,
+          "url": "url:",
+          "cloze": {
+            "sentence": "",
+            "prefix": "",
+            "body": "",
+            "bodyKana": "",
+            "suffix": ""
+          },
+          "furiganaSegments": [
+            {
+              "text": "language",
+              "furigana": ""
+            }
+          ]
+        },
+        "glossaryLayoutMode": "default",
+        "compactTags": false,
+        "group": false,
+        "merge": false,
+        "modeTermKanji": false,
+        "modeTermKana": false,
+        "modeKanji": false,
+        "compactGlossaries": false,
+        "uniqueExpressions": [
+          "language"
+        ],
+        "uniqueReadings": [
+          "language"
+        ],
+        "pitches": [],
+        "pitchCount": 0,
+        "phoneticTranscriptions": [],
+        "context": {
+          "query": "query",
+          "fullQuery": "fullQuery",
+          "document": {
+            "title": "title"
+          }
+        },
+        "media": {}
+      }
+    ]
   }
 ]
diff --git a/test/data/translator-test-results.json b/test/data/translator-test-results.json
index e11cb4a9..cd3f7ab6 100644
--- a/test/data/translator-test-results.json
+++ b/test/data/translator-test-results.json
@@ -3969,6 +3969,174 @@
             "displayValueParsed": false
           }
         ]
+      },
+      {
+        "type": "term",
+        "isPrimary": true,
+        "inflectionRuleChainCandidates": [
+          {
+            "source": "algorithm",
+            "inflectionRules": []
+          }
+        ],
+        "score": 1,
+        "frequencyOrder": 0,
+        "dictionaryIndex": 0,
+        "dictionaryPriority": 0,
+        "sourceTermExactMatchCount": 0,
+        "maxTransformedTextLength": 1,
+        "headwords": [
+          {
+            "index": 0,
+            "term": "打",
+            "reading": "だ",
+            "sources": [
+              {
+                "originalText": "ダ",
+                "transformedText": "だ",
+                "deinflectedText": "だ",
+                "matchType": "exact",
+                "matchSource": "reading",
+                "isPrimary": true
+              }
+            ],
+            "tags": [
+              {
+                "name": "E1",
+                "category": "default",
+                "order": 0,
+                "score": 0,
+                "content": [
+                  "example tag 1"
+                ],
+                "dictionaries": [
+                  "Test Dictionary 2"
+                ],
+                "redundant": false
+              }
+            ],
+            "wordClasses": [
+              "n"
+            ]
+          }
+        ],
+        "definitions": [
+          {
+            "index": 0,
+            "headwordIndices": [
+              0
+            ],
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "id": 1,
+            "score": 1,
+            "frequencyOrder": 0,
+            "sequences": [
+              1
+            ],
+            "isPrimary": true,
+            "tags": [
+              {
+                "name": "n",
+                "category": "partOfSpeech",
+                "order": 0,
+                "score": 0,
+                "content": [
+                  "noun"
+                ],
+                "dictionaries": [
+                  "Test Dictionary 2"
+                ],
+                "redundant": false
+              }
+            ],
+            "entries": [
+              "da definition 1",
+              "da definition 2"
+            ]
+          }
+        ],
+        "pronunciations": [],
+        "frequencies": [
+          {
+            "index": 0,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": false,
+            "frequency": 1,
+            "displayValue": null,
+            "displayValueParsed": false
+          },
+          {
+            "index": 1,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": false,
+            "frequency": 0,
+            "displayValue": "four",
+            "displayValueParsed": true
+          },
+          {
+            "index": 2,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": false,
+            "frequency": 5,
+            "displayValue": "five (5)",
+            "displayValueParsed": true
+          },
+          {
+            "index": 3,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": true,
+            "frequency": 8,
+            "displayValue": null,
+            "displayValueParsed": false
+          },
+          {
+            "index": 4,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": true,
+            "frequency": 0,
+            "displayValue": "fourteen",
+            "displayValueParsed": true
+          },
+          {
+            "index": 5,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": true,
+            "frequency": 20,
+            "displayValue": "twenty (20)",
+            "displayValueParsed": true
+          },
+          {
+            "index": 6,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": true,
+            "frequency": 26,
+            "displayValue": null,
+            "displayValueParsed": false
+          }
+        ]
       }
     ]
   },
@@ -16978,7 +17146,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertNumericCharacters",
+    "name": "Test text preprocessors - convertNumericCharacters",
     "originalTextLength": 2,
     "dictionaryEntries": [
       {
@@ -17043,7 +17211,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertAlphabeticCharacters",
+    "name": "Test text preprocessors - convertAlphabeticCharacters",
     "originalTextLength": 4,
     "dictionaryEntries": [
       {
@@ -17389,7 +17557,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertKatakanaToHiragana",
+    "name": "Test text preprocessors - convertKatakanaToHiragana",
     "originalTextLength": 2,
     "dictionaryEntries": [
       {
@@ -17735,7 +17903,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertHiraganaToKatakana",
+    "name": "Test text preprocessors - convertHiraganaToKatakana",
     "originalTextLength": 4,
     "dictionaryEntries": [
       {
@@ -17844,7 +18012,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana",
+    "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",
     "originalTextLength": 2,
     "dictionaryEntries": [
       {
@@ -18190,7 +18358,7 @@
     ]
   },
   {
-    "name": "Test text transformations - collapseEmphaticSequences",
+    "name": "Test text preprocessors - collapseEmphaticSequences",
     "originalTextLength": 7,
     "dictionaryEntries": [
       {
@@ -18200,6 +18368,13 @@
           {
             "source": "algorithm",
             "inflectionRules": []
+          },
+          {
+            "source": "algorithm",
+            "inflectionRules": [
+              "adv",
+              "kansai-ben"
+            ]
           }
         ],
         "score": 1,
@@ -18267,5 +18442,167 @@
         "frequencies": []
       }
     ]
+  },
+  {
+    "name": "Test text preprocessors - capitalizeFirstLetter",
+    "originalTextLength": 7,
+    "dictionaryEntries": [
+      {
+        "type": "term",
+        "isPrimary": true,
+        "inflectionRuleChainCandidates": [
+          {
+            "source": "algorithm",
+            "inflectionRules": []
+          }
+        ],
+        "score": 1,
+        "frequencyOrder": 0,
+        "dictionaryIndex": 0,
+        "dictionaryPriority": 0,
+        "sourceTermExactMatchCount": 1,
+        "maxTransformedTextLength": 7,
+        "headwords": [
+          {
+            "index": 0,
+            "term": "English",
+            "reading": "English",
+            "sources": [
+              {
+                "originalText": "english",
+                "transformedText": "English",
+                "deinflectedText": "English",
+                "matchType": "exact",
+                "matchSource": "term",
+                "isPrimary": true
+              }
+            ],
+            "tags": [],
+            "wordClasses": [
+              "n"
+            ]
+          }
+        ],
+        "definitions": [
+          {
+            "index": 0,
+            "headwordIndices": [
+              0
+            ],
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "id": 27,
+            "score": 1,
+            "frequencyOrder": 0,
+            "sequences": [
+              19
+            ],
+            "isPrimary": true,
+            "tags": [
+              {
+                "name": "n",
+                "category": "partOfSpeech",
+                "order": 0,
+                "score": 0,
+                "content": [
+                  "noun"
+                ],
+                "dictionaries": [
+                  "Test Dictionary 2"
+                ],
+                "redundant": false
+              }
+            ],
+            "entries": [
+              "English definition"
+            ]
+          }
+        ],
+        "pronunciations": [],
+        "frequencies": []
+      }
+    ]
+  },
+  {
+    "name": "Test text preprocessors - decapitalize",
+    "originalTextLength": 8,
+    "dictionaryEntries": [
+      {
+        "type": "term",
+        "isPrimary": true,
+        "inflectionRuleChainCandidates": [
+          {
+            "source": "algorithm",
+            "inflectionRules": []
+          }
+        ],
+        "score": 1,
+        "frequencyOrder": 0,
+        "dictionaryIndex": 0,
+        "dictionaryPriority": 0,
+        "sourceTermExactMatchCount": 1,
+        "maxTransformedTextLength": 8,
+        "headwords": [
+          {
+            "index": 0,
+            "term": "language",
+            "reading": "language",
+            "sources": [
+              {
+                "originalText": "LANGUAGE",
+                "transformedText": "language",
+                "deinflectedText": "language",
+                "matchType": "exact",
+                "matchSource": "term",
+                "isPrimary": true
+              }
+            ],
+            "tags": [],
+            "wordClasses": [
+              "n"
+            ]
+          }
+        ],
+        "definitions": [
+          {
+            "index": 0,
+            "headwordIndices": [
+              0
+            ],
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "id": 28,
+            "score": 1,
+            "frequencyOrder": 0,
+            "sequences": [
+              20
+            ],
+            "isPrimary": true,
+            "tags": [
+              {
+                "name": "n",
+                "category": "partOfSpeech",
+                "order": 0,
+                "score": 0,
+                "content": [
+                  "noun"
+                ],
+                "dictionaries": [
+                  "Test Dictionary 2"
+                ],
+                "redundant": false
+              }
+            ],
+            "entries": [
+              "language definition"
+            ]
+          }
+        ],
+        "pronunciations": [],
+        "frequencies": []
+      }
+    ]
   }
 ]
diff --git a/test/options-util.test.js b/test/options-util.test.js
index 3a1b1efb..8c2bd775 100644
--- a/test/options-util.test.js
+++ b/test/options-util.test.js
@@ -241,6 +241,7 @@ function createProfileOptionsUpdatedTestData1() {
     return {
         general: {
             enable: true,
+            language: 'ja',
             resultOutputMode: 'group',
             debugInfo: false,
             maxResults: 32,
@@ -405,12 +406,6 @@ function createProfileOptionsUpdatedTestData1() {
             ]
         },
         translation: {
-            convertHalfWidthCharacters: 'false',
-            convertNumericCharacters: 'false',
-            convertAlphabeticCharacters: 'false',
-            convertHiraganaToKatakana: 'false',
-            convertKatakanaToHiragana: 'variant',
-            collapseEmphaticSequences: 'false',
             searchResolution: 'letter',
             textReplacements: {
                 searchOriginal: true,
@@ -604,7 +599,7 @@ function createOptionsUpdatedTestData1() {
             }
         ],
         profileCurrent: 0,
-        version: 25,
+        version: 26,
         global: {
             database: {
                 prefixWildcardsSupported: false
diff --git a/test/utilities/translator.js b/test/utilities/translator.js
index f452e688..e9c08443 100644
--- a/test/utilities/translator.js
+++ b/test/utilities/translator.js
@@ -124,14 +124,9 @@ export function createFindTermsOptions(dictionaryName, optionsPresets, optionsAr
         sortFrequencyDictionary,
         sortFrequencyDictionaryOrder,
         removeNonJapaneseCharacters,
-        convertHalfWidthCharacters,
-        convertNumericCharacters,
-        convertAlphabeticCharacters,
-        convertHiraganaToKatakana,
-        convertKatakanaToHiragana,
-        collapseEmphaticSequences,
         excludeDictionaryDefinitions,
-        searchResolution
+        searchResolution,
+        language
     } = preset;
 
     return {
@@ -141,15 +136,10 @@ export function createFindTermsOptions(dictionaryName, optionsPresets, optionsAr
         sortFrequencyDictionary: typeof sortFrequencyDictionary !== 'undefined' ? sortFrequencyDictionary : null,
         sortFrequencyDictionaryOrder: typeof sortFrequencyDictionaryOrder !== 'undefined' ? sortFrequencyDictionaryOrder : 'ascending',
         removeNonJapaneseCharacters: typeof removeNonJapaneseCharacters !== 'undefined' ? removeNonJapaneseCharacters : false,
-        convertHalfWidthCharacters: typeof convertHalfWidthCharacters !== 'undefined' ? convertHalfWidthCharacters : 'false',
-        convertNumericCharacters: typeof convertNumericCharacters !== 'undefined' ? convertNumericCharacters : 'false',
-        convertAlphabeticCharacters: typeof convertAlphabeticCharacters !== 'undefined' ? convertAlphabeticCharacters : 'false',
-        convertHiraganaToKatakana: typeof convertHiraganaToKatakana !== 'undefined' ? convertHiraganaToKatakana : 'false',
-        convertKatakanaToHiragana: typeof convertKatakanaToHiragana !== 'undefined' ? convertKatakanaToHiragana : 'false',
-        collapseEmphaticSequences: typeof collapseEmphaticSequences !== 'undefined' ? collapseEmphaticSequences : 'false',
         textReplacements,
         enabledDictionaryMap,
         excludeDictionaryDefinitions: Array.isArray(excludeDictionaryDefinitions) ? new Set(excludeDictionaryDefinitions) : null,
-        searchResolution: typeof searchResolution !== 'undefined' ? searchResolution : 'letter'
+        searchResolution: typeof searchResolution !== 'undefined' ? searchResolution : 'letter',
+        language: typeof language !== 'undefined' ? language : 'ja'
     };
 }
diff --git a/types/ext/api.d.ts b/types/ext/api.d.ts
index 1f4fc0a9..85f4c146 100644
--- a/types/ext/api.d.ts
+++ b/types/ext/api.d.ts
@@ -26,6 +26,7 @@ import type * as DictionaryDatabase from './dictionary-database';
 import type * as DictionaryImporter from './dictionary-importer';
 import type * as Environment from './environment';
 import type * as Extension from './extension';
+import type * as Language from './language';
 import type * as Log from './log';
 import type * as Settings from './settings';
 import type * as SettingsModifications from './settings-modifications';
@@ -380,6 +381,10 @@ type ApiSurface = {
         params: void;
         return: boolean;
     };
+    getLanguageSummaries: {
+        params: void;
+        return: Language.LanguageSummary[];
+    };
 };
 
 type ApiExtraArgs = [sender: chrome.runtime.MessageSender];
diff --git a/types/ext/language-english.d.ts b/types/ext/language-english.d.ts
new file mode 100644
index 00000000..ed501d57
--- /dev/null
+++ b/types/ext/language-english.d.ts
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2024  Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import type {LanguageDescriptor, TextPreprocessor} from './language';
+
+export type EnglishTextPreprocessorDescriptor = {
+    capitalizeFirstLetter: TextPreprocessor<boolean>;
+    decapitalize: TextPreprocessor<boolean>;
+};
+
+export type EnglishLanguageDescriptor = LanguageDescriptor<EnglishTextPreprocessorDescriptor>;
diff --git a/types/ext/language-japanese.d.ts b/types/ext/language-japanese.d.ts
new file mode 100644
index 00000000..1a627ed1
--- /dev/null
+++ b/types/ext/language-japanese.d.ts
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2024  Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import type {LanguageDescriptor, TextPreprocessor} from './language';
+
+export type JapaneseTextPreprocessorDescriptor = {
+    convertHalfWidthCharacters: TextPreprocessor<boolean>;
+    convertNumericCharacters: TextPreprocessor<boolean>;
+    convertAlphabeticCharacters: TextPreprocessor<boolean>;
+    convertHiraganaToKatakana: TextPreprocessor<boolean>;
+    convertKatakanaToHiragana: TextPreprocessor<boolean>;
+    collapseEmphaticSequences: TextPreprocessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>;
+};
+
+export type JapaneseLanguageDescriptor = LanguageDescriptor<JapaneseTextPreprocessorDescriptor>;
diff --git a/types/ext/language.d.ts b/types/ext/language.d.ts
new file mode 100644
index 00000000..247c7795
--- /dev/null
+++ b/types/ext/language.d.ts
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2024  Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import type {TextSourceMap} from '../../ext/js/general/text-source-map.js';
+import type {SafeAny} from './core';
+
+export type TextPreprocessorOptions<T = unknown> = T[];
+
+export type TextPreprocessorFunction<T = unknown> = (str: string, setting: T, sourceMap: TextSourceMap) => string;
+
+export type TextPreprocessor<T = unknown> = {
+    name: string;
+    description: string;
+    options: TextPreprocessorOptions<T>;
+    process: TextPreprocessorFunction<T>;
+};
+
+export type LanguageAndPreprocessors = {
+    iso: string;
+    textPreprocessors: TextPreprocessorWithId<unknown>[];
+};
+
+export type TextPreprocessorWithId<T = unknown> = {
+    id: string;
+    textPreprocessor: TextPreprocessor<T>;
+};
+
+export type LanguageSummary = {
+    name: string;
+    iso: string;
+    exampleText: string;
+};
+
+export type LanguageDescriptor<TTextPreprocessorDescriptor extends TextPreprocessorDescriptor> = {
+    name: string;
+    iso: string;
+    exampleText: string;
+    textPreprocessors: TTextPreprocessorDescriptor;
+};
+
+export type TextPreprocessorDescriptor = {
+    [key: string]: TextPreprocessor<SafeAny>;
+};
diff --git a/types/ext/settings.d.ts b/types/ext/settings.d.ts
index a900dbe6..45466c3d 100644
--- a/types/ext/settings.d.ts
+++ b/types/ext/settings.d.ts
@@ -101,6 +101,7 @@ export type ProfileOptions = {
 
 export type GeneralOptions = {
     enable: boolean;
+    language: string;
     resultOutputMode: ResultOutputMode;
     debugInfo: boolean;
     maxResults: number;
diff --git a/types/ext/translation-internal.d.ts b/types/ext/translation-internal.d.ts
index 82704c54..7006221e 100644
--- a/types/ext/translation-internal.d.ts
+++ b/types/ext/translation-internal.d.ts
@@ -18,6 +18,7 @@
 import type * as DictionaryDatabase from './dictionary-database';
 import type * as Dictionary from './dictionary';
 import type * as Translation from './translation';
+import type * as Language from './language';
 
 export type TextDeinflectionOptions = [
     textReplacements: Translation.FindTermsTextReplacement[] | null,
@@ -47,3 +48,5 @@ export type DatabaseDeinflection = {
     inflectionRuleChainCandidates: Dictionary.InflectionRuleChainCandidate[];
     databaseEntries: DictionaryDatabase.TermEntry[];
 };
+
+export type PreprocessorOptionsSpace = Map<string, Language.TextPreprocessorOptions<unknown>>;
diff --git a/types/ext/translation.d.ts b/types/ext/translation.d.ts
index c9a61be0..2e4d1a66 100644
--- a/types/ext/translation.d.ts
+++ b/types/ext/translation.d.ts
@@ -81,30 +81,6 @@ export type FindTermsOptions = {
      */
     removeNonJapaneseCharacters: boolean;
     /**
-     * Whether or not half-width characters should be converted to full-width characters.
-     */
-    convertHalfWidthCharacters: FindTermsVariantMode;
-    /**
-     * Whether or not ASCII numeric characters should be converted to full-width numeric characters.
-     */
-    convertNumericCharacters: FindTermsVariantMode;
-    /**
-     * Whether or not alphabetic characters should be converted to kana.
-     */
-    convertAlphabeticCharacters: FindTermsVariantMode;
-    /**
-     * Whether or not hiragana characters should be converted to katakana.
-     */
-    convertHiraganaToKatakana: FindTermsVariantMode;
-    /**
-     * Whether or not katakana characters should be converted to hiragana.
-     */
-    convertKatakanaToHiragana: FindTermsVariantMode;
-    /**
-     * How emphatic character sequences should be collapsed.
-     */
-    collapseEmphaticSequences: FindTermsEmphaticSequencesMode;
-    /**
      * An iterable sequence of text replacements to be applied during the term lookup process.
      */
     textReplacements: FindTermsTextReplacements;
@@ -121,6 +97,10 @@ export type FindTermsOptions = {
      * Whether every substring should be searched for, or only whole words.
      */
     searchResolution: SearchResolution;
+    /**
+     * ISO-639 code of the language.
+     */
+    language: string;
 };
 
 /**
@@ -134,16 +114,6 @@ export type FindTermsMatchType = Dictionary.TermSourceMatchType;
 export type FindTermsSortOrder = 'ascending' | 'descending';
 
 /**
- * Mode describing how to handle variations.
- */
-export type FindTermsVariantMode = 'false' | 'true' | 'variant';
-
-/**
- * Mode describing how to handle emphatic sequence variations.
- */
-export type FindTermsEmphaticSequencesMode = 'false' | 'true' | 'full';
-
-/**
  * Information about how text should be replaced when looking up terms.
  */
 export type FindTermsTextReplacement = {
diff --git a/types/test/translator.d.ts b/types/test/translator.d.ts
index e3199225..efd5cc3f 100644
--- a/types/test/translator.d.ts
+++ b/types/test/translator.d.ts
@@ -15,8 +15,8 @@
  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
 
-import type {FindTermsMatchType, FindTermsSortOrder, FindTermsVariantMode, FindTermsEmphaticSequencesMode, FindKanjiDictionary, FindTermDictionary} from '../ext/translation';
-import type {SearchResolution} from 'settings';
+import type {FindTermsMatchType, FindTermsSortOrder, FindKanjiDictionary, FindTermDictionary} from '../ext/translation';
+import type {SearchResolution} from '../ext/settings';
 import type {FindTermsMode} from 'translator';
 import type {DictionaryEntry} from 'dictionary';
 import type {NoteData} from 'anki-templates';
@@ -44,16 +44,11 @@ export type FindTermsOptionsPreset = {
     sortFrequencyDictionary?: string | null;
     sortFrequencyDictionaryOrder?: FindTermsSortOrder;
     removeNonJapaneseCharacters?: boolean;
-    convertHalfWidthCharacters?: FindTermsVariantMode;
-    convertNumericCharacters?: FindTermsVariantMode;
-    convertAlphabeticCharacters?: FindTermsVariantMode;
-    convertHiraganaToKatakana?: FindTermsVariantMode;
-    convertKatakanaToHiragana?: FindTermsVariantMode;
-    collapseEmphaticSequences?: FindTermsEmphaticSequencesMode;
     textReplacements?: (FindTermsTextReplacement[] | null)[];
     enabledDictionaryMap?: [key: string, value: FindTermDictionary][];
     excludeDictionaryDefinitions?: string[] | null;
     searchResolution?: SearchResolution;
+    language?: string;
 };
 
 export type OptionsType = OptionsPreset['type'];
author	StefanVukovic99 <stefanvukovic44@gmail.com>	2024-02-17 02:45:24 +0100
committer	GitHub <noreply@github.com>	2024-02-17 01:45:24 +0000
commit	4aaa9f15d97668203741c1731f15e710ae8b8294 (patch)
tree	d1885f7fbd7d1510a71176597169d6847ae26572
parent	4e77741d22778bd09b772fc53f1cbd64107e3d24 (diff)