add language select, abstract text transformations (#584)

* Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions * part1 * frotend done? * fix tests * offscreen and type complications * add tests * start fixing tests * keep fixing tests * fix tests * Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions * delete pt * renames * add tests * kebab-case filenames * lint * minor fixes * merge * fixes * fix part of comments * fix more comments * delete unused types * comment * comment * do backend * other files * move fetch utils to own file * remove extra line * add extra line * remove unnecessary export * simplify folder structure * remove redundant async * fix param type in api * fix language index * undo changes to cssStyleApplier * undo changes to utilities.js * undo changes to utilities.js * simplify language util * lint * undo phantom changes to anki integration * require textTransformations options * explicit locale in localeCompare * punctuate notes * prefer early exit * rename LanguageOptionsObjectMap * rename to textPreprocessor * tuple with names instead of boolean array * safe data setting * optional chaining * simplify LanguageOptions * encapsulate languages * delete language util * nullable language in text preprocessors controller * rename transform to process * remove settings * make translation advanced again * remove unused getTextTransformations api call * comments * change language types * RIP flags * comments * fix tests * lint * Text preprocessor type changes (#10) * Add types * Update types * Simplify type check * Refactor typing and structuring of language definitions * lint * update translator benchmark * undo markdown changes * undo markdown changes * undo markdown changes * more merge * simplify language controller --------- Co-authored-by: toasted-nutbread <toasted-nutbread@users.noreply.github.com> Co-authored-by: Darius Jahandarie <djahandarie@gmail.com>
author: StefanVukovic99 <stefanvukovic44@gmail.com> 2024-02-17 02:45:24 +0100
committer: GitHub <noreply@github.com> 2024-02-17 01:45:24 +0000
commit: 4aaa9f15d97668203741c1731f15e710ae8b8294 (patch)
tree: d1885f7fbd7d1510a71176597169d6847ae26572 /test/data
parent: 4e77741d22778bd09b772fc53f1cbd64107e3d24 (diff)
6 files changed, 1070 insertions, 45 deletions
diff --git a/test/data/anki-note-builder-test-results.json b/test/data/anki-note-builder-test-results.json
index a2dd0923..1d84712d 100644
--- a/test/data/anki-note-builder-test-results.json
+++ b/test/data/anki-note-builder-test-results.json
@@ -869,6 +869,43 @@
         "sentence-furigana": "cloze-prefixダースcloze-suffix",
         "tags": "abbr, n",
         "url": "<a href=\"url:\">url:</a>"
+      },
+      {
+        "audio": "",
+        "clipboard-image": "",
+        "clipboard-text": "",
+        "cloze-body": "ダ",
+        "cloze-body-kana": "ダ",
+        "cloze-prefix": "cloze-prefix",
+        "cloze-suffix": "cloze-suffix",
+        "conjugation": "",
+        "dictionary": "Test Dictionary 2",
+        "document-title": "title",
+        "expression": "打",
+        "frequencies": "<ul style=\"text-align: left;\"><li>Test Dictionary 2: 1</li><li>Test Dictionary 2: four</li><li>Test Dictionary 2: five (5)</li><li>Test Dictionary 2: 8</li><li>Test Dictionary 2: fourteen</li><li>Test Dictionary 2: twenty (20)</li><li>Test Dictionary 2: 26</li></ul>",
+        "frequency-harmonic-rank": "1",
+        "frequency-harmonic-occurrence": "1",
+        "frequency-average-rank": "1",
+        "frequency-average-occurrence": "1",
+        "furigana": "<ruby>打<rt>だ</rt></ruby>",
+        "furigana-plain": "打[だ]",
+        "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> <ul><li>da definition 1</li><li>da definition 2</li></ul></div>",
+        "glossary-brief": "<div style=\"text-align: left;\"><ul><li>da definition 1</li><li>da definition 2</li></ul></div>",
+        "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> <ul><li>da definition 1</li><li>da definition 2</li></ul></div>",
+        "part-of-speech": "Noun",
+        "pitch-accents": "No pitch accent data",
+        "pitch-accent-graphs": "No pitch accent data",
+        "pitch-accent-positions": "No pitch accent data",
+        "pitch-accent-categories": "",
+        "phonetic-transcriptions": "",
+        "reading": "だ",
+        "screenshot": "",
+        "search-query": "fullQuery",
+        "selection-text": "",
+        "sentence": "cloze-prefixダcloze-suffix",
+        "sentence-furigana": "cloze-prefixダcloze-suffix",
+        "tags": "n",
+        "url": "<a href=\"url:\">url:</a>"
       }
     ]
   },
@@ -3437,7 +3474,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertNumericCharacters",
+    "name": "Test text preprocessors - convertNumericCharacters",
     "results": [
       {
         "audio": "",
@@ -3479,7 +3516,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertAlphabeticCharacters",
+    "name": "Test text preprocessors - convertAlphabeticCharacters",
     "results": [
       {
         "audio": "",
@@ -3558,7 +3595,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertKatakanaToHiragana",
+    "name": "Test text preprocessors - convertKatakanaToHiragana",
     "results": [
       {
         "audio": "",
@@ -3637,7 +3674,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertHiraganaToKatakana",
+    "name": "Test text preprocessors - convertHiraganaToKatakana",
     "results": [
       {
         "audio": "",
@@ -3679,7 +3716,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana",
+    "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",
     "results": [
       {
         "audio": "",
@@ -3758,7 +3795,7 @@
     ]
   },
   {
-    "name": "Test text transformations - collapseEmphaticSequences",
+    "name": "Test text preprocessors - collapseEmphaticSequences",
     "results": [
       {
         "audio": "",
@@ -3768,7 +3805,7 @@
         "cloze-body-kana": "すっっごーーい",
         "cloze-prefix": "cloze-prefix",
         "cloze-suffix": "cloze-suffix",
-        "conjugation": "",
+        "conjugation": "<ul><li>adv « kansai-ben</li></ul>",
         "dictionary": "Test Dictionary 2",
         "document-title": "title",
         "expression": "凄い",
@@ -3798,5 +3835,89 @@
         "url": "<a href=\"url:\">url:</a>"
       }
     ]
+  },
+  {
+    "name": "Test text preprocessors - capitalizeFirstLetter",
+    "results": [
+      {
+        "audio": "",
+        "clipboard-image": "",
+        "clipboard-text": "",
+        "cloze-body": "english",
+        "cloze-body-kana": "english",
+        "cloze-prefix": "cloze-prefix",
+        "cloze-suffix": "cloze-suffix",
+        "conjugation": "",
+        "dictionary": "Test Dictionary 2",
+        "document-title": "title",
+        "expression": "English",
+        "frequencies": "",
+        "frequency-harmonic-rank": "9999999",
+        "frequency-harmonic-occurrence": "0",
+        "frequency-average-rank": "9999999",
+        "frequency-average-occurrence": "0",
+        "furigana": "English",
+        "furigana-plain": "English",
+        "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>",
+        "glossary-brief": "<div style=\"text-align: left;\">English definition</div>",
+        "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>",
+        "part-of-speech": "Noun",
+        "pitch-accents": "No pitch accent data",
+        "pitch-accent-graphs": "No pitch accent data",
+        "pitch-accent-positions": "No pitch accent data",
+        "pitch-accent-categories": "",
+        "phonetic-transcriptions": "",
+        "reading": "English",
+        "screenshot": "",
+        "search-query": "fullQuery",
+        "selection-text": "",
+        "sentence": "cloze-prefixenglishcloze-suffix",
+        "sentence-furigana": "cloze-prefixenglishcloze-suffix",
+        "tags": "n",
+        "url": "<a href=\"url:\">url:</a>"
+      }
+    ]
+  },
+  {
+    "name": "Test text preprocessors - decapitalize",
+    "results": [
+      {
+        "audio": "",
+        "clipboard-image": "",
+        "clipboard-text": "",
+        "cloze-body": "LANGUAGE",
+        "cloze-body-kana": "LANGUAGE",
+        "cloze-prefix": "cloze-prefix",
+        "cloze-suffix": "cloze-suffix",
+        "conjugation": "",
+        "dictionary": "Test Dictionary 2",
+        "document-title": "title",
+        "expression": "language",
+        "frequencies": "",
+        "frequency-harmonic-rank": "9999999",
+        "frequency-harmonic-occurrence": "0",
+        "frequency-average-rank": "9999999",
+        "frequency-average-occurrence": "0",
+        "furigana": "language",
+        "furigana-plain": "language",
+        "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> language definition</div>",
+        "glossary-brief": "<div style=\"text-align: left;\">language definition</div>",
+        "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> language definition</div>",
+        "part-of-speech": "Noun",
+        "pitch-accents": "No pitch accent data",
+        "pitch-accent-graphs": "No pitch accent data",
+        "pitch-accent-positions": "No pitch accent data",
+        "pitch-accent-categories": "",
+        "phonetic-transcriptions": "",
+        "reading": "language",
+        "screenshot": "",
+        "search-query": "fullQuery",
+        "selection-text": "",
+        "sentence": "cloze-prefixLANGUAGEcloze-suffix",
+        "sentence-furigana": "cloze-prefixLANGUAGEcloze-suffix",
+        "tags": "n",
+        "url": "<a href=\"url:\">url:</a>"
+      }
+    ]
   }
 ]
diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json
index 6d0f74ce..5747f59e 100644
--- a/test/data/database-test-cases.json
+++ b/test/data/database-test-cases.json
@@ -27,7 +27,7 @@
         "ipa": 1
       },
       "terms": {
-        "total": 27
+        "total": 29
       }
     }
   },
@@ -36,7 +36,7 @@
       {
         "kanji": 2,
         "kanjiMeta": 6,
-        "terms": 27,
+        "terms": 29,
         "termMeta": 39,
         "tagMeta": 15,
         "media": 6
@@ -45,7 +45,7 @@
     "total": {
       "kanji": 2,
       "kanjiMeta": 6,
-      "terms": 27,
+      "terms": 29,
       "termMeta": 39,
       "tagMeta": 15,
       "media": 6
diff --git a/test/data/dictionaries/valid-dictionary1/term_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_bank_1.json
index 03a9900c..e7fb015c 100644
--- a/test/data/dictionaries/valid-dictionary1/term_bank_1.json
+++ b/test/data/dictionaries/valid-dictionary1/term_bank_1.json
@@ -341,5 +341,7 @@
     ["のたまう", "のたまう", "v5", "v5", 1, ["notamau definition"], 15, ""],
     ["のたもうた", "のたもうた", "", "", 1, [["のたまう", ["past"]]], 16, ""],
     ["３９", "さんきゅう", "", "", 1, ["sankyuu definition"], 17, ""],
-    ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""]
+    ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""],
+    ["English", "", "n", "n", 1, ["English definition"], 19, ""],
+    ["language", "", "n", "n", 1, ["language definition"], 20, ""]
 ]
diff --git a/test/data/translator-test-inputs.json b/test/data/translator-test-inputs.json
index 03ec938a..c9047716 100644
--- a/test/data/translator-test-inputs.json
+++ b/test/data/translator-test-inputs.json
@@ -21,12 +21,7 @@
             "sortFrequencyDictionary": null,
             "sortFrequencyDictionaryOrder": "descending",
             "removeNonJapaneseCharacters": true,
-            "convertHalfWidthCharacters": "false",
-            "convertNumericCharacters": "false",
-            "convertAlphabeticCharacters": "false",
-            "convertHiraganaToKatakana": "false",
-            "convertKatakanaToHiragana": "false",
-            "collapseEmphaticSequences": "false",
+            "language": "ja",
             "textReplacements": [
                 null
             ],
@@ -361,7 +356,7 @@
             "options": "default"
         },
         {
-            "name": "Test text transformations - convertNumericCharacters",
+            "name": "Test text preprocessors - convertNumericCharacters",
             "func": "findTerms",
             "mode": "split",
             "text": "39",
@@ -369,13 +364,12 @@
                 "default",
                 {
                     "type": "terms",
-                    "convertNumericCharacters": "true",
                     "removeNonJapaneseCharacters": false
                 }
             ]
         },
         {
-            "name": "Test text transformations - convertAlphabeticCharacters",
+            "name": "Test text preprocessors - convertAlphabeticCharacters",
             "func": "findTerms",
             "mode": "split",
             "text": "utsu",
@@ -383,61 +377,83 @@
                 "default",
                 {
                     "type": "terms",
-                    "convertAlphabeticCharacters": "true",
                     "removeNonJapaneseCharacters": false
                 }
             ]
         },
         {
-            "name": "Test text transformations - convertKatakanaToHiragana",
+            "name": "Test text preprocessors - convertKatakanaToHiragana",
             "func": "findTerms",
             "mode": "split",
             "text": "ウツ",
             "options": [
                 "default",
                 {
-                    "type": "terms",
-                    "convertKatakanaToHiragana": "true"
+                    "type": "terms"
                 }
             ]
         },
         {
-            "name": "Test text transformations - convertHiraganaToKatakana",
+            "name": "Test text preprocessors - convertHiraganaToKatakana",
             "func": "findTerms",
             "mode": "split",
             "text": "てきすと",
             "options": [
                 "default",
                 {
-                    "type": "terms",
-                    "convertHiraganaToKatakana": "true"
+                    "type": "terms"
                 }
             ]
         },
         {
-            "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana",
+            "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",
             "func": "findTerms",
             "mode": "split",
             "text": "ｳﾂ",
             "options": [
                 "default",
                 {
-                    "type": "terms",
-                    "convertHalfWidthCharacters": "true",
-                    "convertKatakanaToHiragana": "true"
+                    "type": "terms"
                 }
             ]
         },
         {
-            "name": "Test text transformations - collapseEmphaticSequences",
+            "name": "Test text preprocessors - collapseEmphaticSequences",
             "func": "findTerms",
             "mode": "split",
             "text": "すっっごーーい",
             "options": [
                 "default",
                 {
+                    "type": "terms"
+                }
+            ]
+        },
+        {
+            "name": "Test text preprocessors - capitalizeFirstLetter",
+            "func": "findTerms",
+            "mode": "split",
+            "text": "english",
+            "options": [
+                "default",
+                {
                     "type": "terms",
-                    "collapseEmphaticSequences": "full"
+                    "language": "en",
+                    "removeNonJapaneseCharacters": false
+                }
+            ]
+        },
+        {
+            "name": "Test text preprocessors - decapitalize",
+            "func": "findTerms",
+            "mode": "split",
+            "text": "LANGUAGE",
+            "options": [
+                "default",
+                {
+                    "type": "terms",
+                    "language": "en",
+                    "removeNonJapaneseCharacters": false
                 }
             ]
         }
diff --git a/test/data/translator-test-results-note-data1.json b/test/data/translator-test-results-note-data1.json
index f84f28a4..f580ac53 100644
--- a/test/data/translator-test-results-note-data1.json
+++ b/test/data/translator-test-results-note-data1.json
@@ -7276,6 +7276,320 @@
           }
         },
         "media": {}
+      },
+      {
+        "marker": "{marker}",
+        "definition": {
+          "type": "term",
+          "id": 1,
+          "source": "だ",
+          "rawSource": "ダ",
+          "sourceTerm": "だ",
+          "inflectionRuleChainCandidates": [
+            {
+              "source": "algorithm",
+              "inflectionRules": []
+            }
+          ],
+          "score": 1,
+          "isPrimary": true,
+          "sequence": 1,
+          "dictionary": "Test Dictionary 2",
+          "dictionaryOrder": {
+            "index": 0,
+            "priority": 0
+          },
+          "dictionaryNames": [
+            "Test Dictionary 2"
+          ],
+          "expression": "打",
+          "reading": "だ",
+          "expressions": [
+            {
+              "sourceTerm": "だ",
+              "expression": "打",
+              "reading": "だ",
+              "termTags": [
+                {
+                  "name": "E1",
+                  "category": "default",
+                  "notes": "example tag 1",
+                  "order": 0,
+                  "score": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "redundant": false
+                }
+              ],
+              "frequencies": [
+                {
+                  "index": 0,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": false,
+                  "frequency": 1
+                },
+                {
+                  "index": 1,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": false,
+                  "frequency": "four"
+                },
+                {
+                  "index": 2,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": false,
+                  "frequency": "five (5)"
+                },
+                {
+                  "index": 3,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": true,
+                  "frequency": 8
+                },
+                {
+                  "index": 4,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": true,
+                  "frequency": "fourteen"
+                },
+                {
+                  "index": 5,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": true,
+                  "frequency": "twenty (20)"
+                },
+                {
+                  "index": 6,
+                  "expressionIndex": 0,
+                  "dictionary": "Test Dictionary 2",
+                  "dictionaryOrder": {
+                    "index": 0,
+                    "priority": 0
+                  },
+                  "expression": "打",
+                  "reading": "だ",
+                  "hasReading": true,
+                  "frequency": 26
+                }
+              ],
+              "pitches": [],
+              "furiganaSegments": [
+                {
+                  "text": "打",
+                  "furigana": "だ"
+                }
+              ],
+              "termFrequency": "normal",
+              "wordClasses": [
+                "n"
+              ]
+            }
+          ],
+          "glossary": [
+            "da definition 1",
+            "da definition 2"
+          ],
+          "definitionTags": [
+            {
+              "name": "n",
+              "category": "partOfSpeech",
+              "notes": "noun",
+              "order": 0,
+              "score": 0,
+              "dictionary": "Test Dictionary 2",
+              "redundant": false
+            }
+          ],
+          "termTags": [
+            {
+              "name": "E1",
+              "category": "default",
+              "notes": "example tag 1",
+              "order": 0,
+              "score": 0,
+              "dictionary": "Test Dictionary 2",
+              "redundant": false
+            }
+          ],
+          "frequencies": [
+            {
+              "index": 0,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": false,
+              "frequency": 1
+            },
+            {
+              "index": 1,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": false,
+              "frequency": "four"
+            },
+            {
+              "index": 2,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": false,
+              "frequency": "five (5)"
+            },
+            {
+              "index": 3,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": true,
+              "frequency": 8
+            },
+            {
+              "index": 4,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": true,
+              "frequency": "fourteen"
+            },
+            {
+              "index": 5,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": true,
+              "frequency": "twenty (20)"
+            },
+            {
+              "index": 6,
+              "expressionIndex": 0,
+              "dictionary": "Test Dictionary 2",
+              "dictionaryOrder": {
+                "index": 0,
+                "priority": 0
+              },
+              "expression": "打",
+              "reading": "だ",
+              "hasReading": true,
+              "frequency": 26
+            }
+          ],
+          "frequencyHarmonic": 1,
+          "frequencyAverage": 1,
+          "pitches": [],
+          "phoneticTranscriptions": [],
+          "sourceTermExactMatchCount": 0,
+          "url": "url:",
+          "cloze": {
+            "sentence": "",
+            "prefix": "",
+            "body": "",
+            "bodyKana": "",
+            "suffix": ""
+          },
+          "furiganaSegments": [
+            {
+              "text": "打",
+              "furigana": "だ"
+            }
+          ]
+        },
+        "glossaryLayoutMode": "default",
+        "compactTags": false,
+        "group": false,
+        "merge": false,
+        "modeTermKanji": false,
+        "modeTermKana": false,
+        "modeKanji": false,
+        "compactGlossaries": false,
+        "uniqueExpressions": [
+          "打"
+        ],
+        "uniqueReadings": [
+          "だ"
+        ],
+        "pitches": [],
+        "pitchCount": 0,
+        "phoneticTranscriptions": [],
+        "context": {
+          "query": "query",
+          "fullQuery": "fullQuery",
+          "document": {
+            "title": "title"
+          }
+        },
+        "media": {}
       }
     ]
   },
@@ -30117,7 +30431,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertNumericCharacters",
+    "name": "Test text preprocessors - convertNumericCharacters",
     "noteDataList": [
       {
         "marker": "{marker}",
@@ -30219,7 +30533,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertAlphabeticCharacters",
+    "name": "Test text preprocessors - convertAlphabeticCharacters",
     "noteDataList": [
       {
         "marker": "{marker}",
@@ -30852,7 +31166,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertKatakanaToHiragana",
+    "name": "Test text preprocessors - convertKatakanaToHiragana",
     "noteDataList": [
       {
         "marker": "{marker}",
@@ -31485,7 +31799,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertHiraganaToKatakana",
+    "name": "Test text preprocessors - convertHiraganaToKatakana",
     "noteDataList": [
       {
         "marker": "{marker}",
@@ -31638,7 +31952,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana",
+    "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",
     "noteDataList": [
       {
         "marker": "{marker}",
@@ -32271,7 +32585,7 @@
     ]
   },
   {
-    "name": "Test text transformations - collapseEmphaticSequences",
+    "name": "Test text preprocessors - collapseEmphaticSequences",
     "noteDataList": [
       {
         "marker": "{marker}",
@@ -32285,6 +32599,13 @@
             {
               "source": "algorithm",
               "inflectionRules": []
+            },
+            {
+              "source": "algorithm",
+              "inflectionRules": [
+                "adv",
+                "kansai-ben"
+              ]
             }
           ],
           "score": 1,
@@ -32391,5 +32712,233 @@
         "media": {}
       }
     ]
+  },
+  {
+    "name": "Test text preprocessors - capitalizeFirstLetter",
+    "noteDataList": [
+      {
+        "marker": "{marker}",
+        "definition": {
+          "type": "term",
+          "id": 27,
+          "source": "English",
+          "rawSource": "english",
+          "sourceTerm": "English",
+          "inflectionRuleChainCandidates": [
+            {
+              "source": "algorithm",
+              "inflectionRules": []
+            }
+          ],
+          "score": 1,
+          "isPrimary": true,
+          "sequence": 19,
+          "dictionary": "Test Dictionary 2",
+          "dictionaryOrder": {
+            "index": 0,
+            "priority": 0
+          },
+          "dictionaryNames": [
+            "Test Dictionary 2"
+          ],
+          "expression": "English",
+          "reading": "English",
+          "expressions": [
+            {
+              "sourceTerm": "English",
+              "expression": "English",
+              "reading": "English",
+              "termTags": [],
+              "frequencies": [],
+              "pitches": [],
+              "furiganaSegments": [
+                {
+                  "text": "English",
+                  "furigana": ""
+                }
+              ],
+              "termFrequency": "normal",
+              "wordClasses": [
+                "n"
+              ]
+            }
+          ],
+          "glossary": [
+            "English definition"
+          ],
+          "definitionTags": [
+            {
+              "name": "n",
+              "category": "partOfSpeech",
+              "notes": "noun",
+              "order": 0,
+              "score": 0,
+              "dictionary": "Test Dictionary 2",
+              "redundant": false
+            }
+          ],
+          "termTags": [],
+          "frequencies": [],
+          "frequencyHarmonic": -1,
+          "frequencyAverage": -1,
+          "pitches": [],
+          "phoneticTranscriptions": [],
+          "sourceTermExactMatchCount": 1,
+          "url": "url:",
+          "cloze": {
+            "sentence": "",
+            "prefix": "",
+            "body": "",
+            "bodyKana": "",
+            "suffix": ""
+          },
+          "furiganaSegments": [
+            {
+              "text": "English",
+              "furigana": ""
+            }
+          ]
+        },
+        "glossaryLayoutMode": "default",
+        "compactTags": false,
+        "group": false,
+        "merge": false,
+        "modeTermKanji": false,
+        "modeTermKana": false,
+        "modeKanji": false,
+        "compactGlossaries": false,
+        "uniqueExpressions": [
+          "English"
+        ],
+        "uniqueReadings": [
+          "English"
+        ],
+        "pitches": [],
+        "pitchCount": 0,
+        "phoneticTranscriptions": [],
+        "context": {
+          "query": "query",
+          "fullQuery": "fullQuery",
+          "document": {
+            "title": "title"
+          }
+        },
+        "media": {}
+      }
+    ]
+  },
+  {
+    "name": "Test text preprocessors - decapitalize",
+    "noteDataList": [
+      {
+        "marker": "{marker}",
+        "definition": {
+          "type": "term",
+          "id": 28,
+          "source": "language",
+          "rawSource": "LANGUAGE",
+          "sourceTerm": "language",
+          "inflectionRuleChainCandidates": [
+            {
+              "source": "algorithm",
+              "inflectionRules": []
+            }
+          ],
+          "score": 1,
+          "isPrimary": true,
+          "sequence": 20,
+          "dictionary": "Test Dictionary 2",
+          "dictionaryOrder": {
+            "index": 0,
+            "priority": 0
+          },
+          "dictionaryNames": [
+            "Test Dictionary 2"
+          ],
+          "expression": "language",
+          "reading": "language",
+          "expressions": [
+            {
+              "sourceTerm": "language",
+              "expression": "language",
+              "reading": "language",
+              "termTags": [],
+              "frequencies": [],
+              "pitches": [],
+              "furiganaSegments": [
+                {
+                  "text": "language",
+                  "furigana": ""
+                }
+              ],
+              "termFrequency": "normal",
+              "wordClasses": [
+                "n"
+              ]
+            }
+          ],
+          "glossary": [
+            "language definition"
+          ],
+          "definitionTags": [
+            {
+              "name": "n",
+              "category": "partOfSpeech",
+              "notes": "noun",
+              "order": 0,
+              "score": 0,
+              "dictionary": "Test Dictionary 2",
+              "redundant": false
+            }
+          ],
+          "termTags": [],
+          "frequencies": [],
+          "frequencyHarmonic": -1,
+          "frequencyAverage": -1,
+          "pitches": [],
+          "phoneticTranscriptions": [],
+          "sourceTermExactMatchCount": 1,
+          "url": "url:",
+          "cloze": {
+            "sentence": "",
+            "prefix": "",
+            "body": "",
+            "bodyKana": "",
+            "suffix": ""
+          },
+          "furiganaSegments": [
+            {
+              "text": "language",
+              "furigana": ""
+            }
+          ]
+        },
+        "glossaryLayoutMode": "default",
+        "compactTags": false,
+        "group": false,
+        "merge": false,
+        "modeTermKanji": false,
+        "modeTermKana": false,
+        "modeKanji": false,
+        "compactGlossaries": false,
+        "uniqueExpressions": [
+          "language"
+        ],
+        "uniqueReadings": [
+          "language"
+        ],
+        "pitches": [],
+        "pitchCount": 0,
+        "phoneticTranscriptions": [],
+        "context": {
+          "query": "query",
+          "fullQuery": "fullQuery",
+          "document": {
+            "title": "title"
+          }
+        },
+        "media": {}
+      }
+    ]
   }
 ]
diff --git a/test/data/translator-test-results.json b/test/data/translator-test-results.json
index e11cb4a9..cd3f7ab6 100644
--- a/test/data/translator-test-results.json
+++ b/test/data/translator-test-results.json
@@ -3969,6 +3969,174 @@
             "displayValueParsed": false
           }
         ]
+      },
+      {
+        "type": "term",
+        "isPrimary": true,
+        "inflectionRuleChainCandidates": [
+          {
+            "source": "algorithm",
+            "inflectionRules": []
+          }
+        ],
+        "score": 1,
+        "frequencyOrder": 0,
+        "dictionaryIndex": 0,
+        "dictionaryPriority": 0,
+        "sourceTermExactMatchCount": 0,
+        "maxTransformedTextLength": 1,
+        "headwords": [
+          {
+            "index": 0,
+            "term": "打",
+            "reading": "だ",
+            "sources": [
+              {
+                "originalText": "ダ",
+                "transformedText": "だ",
+                "deinflectedText": "だ",
+                "matchType": "exact",
+                "matchSource": "reading",
+                "isPrimary": true
+              }
+            ],
+            "tags": [
+              {
+                "name": "E1",
+                "category": "default",
+                "order": 0,
+                "score": 0,
+                "content": [
+                  "example tag 1"
+                ],
+                "dictionaries": [
+                  "Test Dictionary 2"
+                ],
+                "redundant": false
+              }
+            ],
+            "wordClasses": [
+              "n"
+            ]
+          }
+        ],
+        "definitions": [
+          {
+            "index": 0,
+            "headwordIndices": [
+              0
+            ],
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "id": 1,
+            "score": 1,
+            "frequencyOrder": 0,
+            "sequences": [
+              1
+            ],
+            "isPrimary": true,
+            "tags": [
+              {
+                "name": "n",
+                "category": "partOfSpeech",
+                "order": 0,
+                "score": 0,
+                "content": [
+                  "noun"
+                ],
+                "dictionaries": [
+                  "Test Dictionary 2"
+                ],
+                "redundant": false
+              }
+            ],
+            "entries": [
+              "da definition 1",
+              "da definition 2"
+            ]
+          }
+        ],
+        "pronunciations": [],
+        "frequencies": [
+          {
+            "index": 0,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": false,
+            "frequency": 1,
+            "displayValue": null,
+            "displayValueParsed": false
+          },
+          {
+            "index": 1,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": false,
+            "frequency": 0,
+            "displayValue": "four",
+            "displayValueParsed": true
+          },
+          {
+            "index": 2,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": false,
+            "frequency": 5,
+            "displayValue": "five (5)",
+            "displayValueParsed": true
+          },
+          {
+            "index": 3,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": true,
+            "frequency": 8,
+            "displayValue": null,
+            "displayValueParsed": false
+          },
+          {
+            "index": 4,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": true,
+            "frequency": 0,
+            "displayValue": "fourteen",
+            "displayValueParsed": true
+          },
+          {
+            "index": 5,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": true,
+            "frequency": 20,
+            "displayValue": "twenty (20)",
+            "displayValueParsed": true
+          },
+          {
+            "index": 6,
+            "headwordIndex": 0,
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "hasReading": true,
+            "frequency": 26,
+            "displayValue": null,
+            "displayValueParsed": false
+          }
+        ]
       }
     ]
   },
@@ -16978,7 +17146,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertNumericCharacters",
+    "name": "Test text preprocessors - convertNumericCharacters",
     "originalTextLength": 2,
     "dictionaryEntries": [
       {
@@ -17043,7 +17211,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertAlphabeticCharacters",
+    "name": "Test text preprocessors - convertAlphabeticCharacters",
     "originalTextLength": 4,
     "dictionaryEntries": [
       {
@@ -17389,7 +17557,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertKatakanaToHiragana",
+    "name": "Test text preprocessors - convertKatakanaToHiragana",
     "originalTextLength": 2,
     "dictionaryEntries": [
       {
@@ -17735,7 +17903,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertHiraganaToKatakana",
+    "name": "Test text preprocessors - convertHiraganaToKatakana",
     "originalTextLength": 4,
     "dictionaryEntries": [
       {
@@ -17844,7 +18012,7 @@
     ]
   },
   {
-    "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana",
+    "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",
     "originalTextLength": 2,
     "dictionaryEntries": [
       {
@@ -18190,7 +18358,7 @@
     ]
   },
   {
-    "name": "Test text transformations - collapseEmphaticSequences",
+    "name": "Test text preprocessors - collapseEmphaticSequences",
     "originalTextLength": 7,
     "dictionaryEntries": [
       {
@@ -18200,6 +18368,13 @@
           {
             "source": "algorithm",
             "inflectionRules": []
+          },
+          {
+            "source": "algorithm",
+            "inflectionRules": [
+              "adv",
+              "kansai-ben"
+            ]
           }
         ],
         "score": 1,
@@ -18267,5 +18442,167 @@
         "frequencies": []
       }
     ]
+  },
+  {
+    "name": "Test text preprocessors - capitalizeFirstLetter",
+    "originalTextLength": 7,
+    "dictionaryEntries": [
+      {
+        "type": "term",
+        "isPrimary": true,
+        "inflectionRuleChainCandidates": [
+          {
+            "source": "algorithm",
+            "inflectionRules": []
+          }
+        ],
+        "score": 1,
+        "frequencyOrder": 0,
+        "dictionaryIndex": 0,
+        "dictionaryPriority": 0,
+        "sourceTermExactMatchCount": 1,
+        "maxTransformedTextLength": 7,
+        "headwords": [
+          {
+            "index": 0,
+            "term": "English",
+            "reading": "English",
+            "sources": [
+              {
+                "originalText": "english",
+                "transformedText": "English",
+                "deinflectedText": "English",
+                "matchType": "exact",
+                "matchSource": "term",
+                "isPrimary": true
+              }
+            ],
+            "tags": [],
+            "wordClasses": [
+              "n"
+            ]
+          }
+        ],
+        "definitions": [
+          {
+            "index": 0,
+            "headwordIndices": [
+              0
+            ],
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "id": 27,
+            "score": 1,
+            "frequencyOrder": 0,
+            "sequences": [
+              19
+            ],
+            "isPrimary": true,
+            "tags": [
+              {
+                "name": "n",
+                "category": "partOfSpeech",
+                "order": 0,
+                "score": 0,
+                "content": [
+                  "noun"
+                ],
+                "dictionaries": [
+                  "Test Dictionary 2"
+                ],
+                "redundant": false
+              }
+            ],
+            "entries": [
+              "English definition"
+            ]
+          }
+        ],
+        "pronunciations": [],
+        "frequencies": []
+      }
+    ]
+  },
+  {
+    "name": "Test text preprocessors - decapitalize",
+    "originalTextLength": 8,
+    "dictionaryEntries": [
+      {
+        "type": "term",
+        "isPrimary": true,
+        "inflectionRuleChainCandidates": [
+          {
+            "source": "algorithm",
+            "inflectionRules": []
+          }
+        ],
+        "score": 1,
+        "frequencyOrder": 0,
+        "dictionaryIndex": 0,
+        "dictionaryPriority": 0,
+        "sourceTermExactMatchCount": 1,
+        "maxTransformedTextLength": 8,
+        "headwords": [
+          {
+            "index": 0,
+            "term": "language",
+            "reading": "language",
+            "sources": [
+              {
+                "originalText": "LANGUAGE",
+                "transformedText": "language",
+                "deinflectedText": "language",
+                "matchType": "exact",
+                "matchSource": "term",
+                "isPrimary": true
+              }
+            ],
+            "tags": [],
+            "wordClasses": [
+              "n"
+            ]
+          }
+        ],
+        "definitions": [
+          {
+            "index": 0,
+            "headwordIndices": [
+              0
+            ],
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "id": 28,
+            "score": 1,
+            "frequencyOrder": 0,
+            "sequences": [
+              20
+            ],
+            "isPrimary": true,
+            "tags": [
+              {
+                "name": "n",
+                "category": "partOfSpeech",
+                "order": 0,
+                "score": 0,
+                "content": [
+                  "noun"
+                ],
+                "dictionaries": [
+                  "Test Dictionary 2"
+                ],
+                "redundant": false
+              }
+            ],
+            "entries": [
+              "language definition"
+            ]
+          }
+        ],
+        "pronunciations": [],
+        "frequencies": []
+      }
+    ]
   }
 ]
author	StefanVukovic99 <stefanvukovic44@gmail.com>	2024-02-17 02:45:24 +0100
committer	GitHub <noreply@github.com>	2024-02-17 01:45:24 +0000
commit	4aaa9f15d97668203741c1731f15e710ae8b8294 (patch)
tree	d1885f7fbd7d1510a71176597169d6847ae26572 /test/data
parent	4e77741d22778bd09b772fc53f1cbd64107e3d24 (diff)