aboutsummaryrefslogtreecommitdiff
path: root/test/data
diff options
context:
space:
mode:
authorStefanVukovic99 <stefanvukovic44@gmail.com>2024-02-17 02:45:24 +0100
committerGitHub <noreply@github.com>2024-02-17 01:45:24 +0000
commit4aaa9f15d97668203741c1731f15e710ae8b8294 (patch)
treed1885f7fbd7d1510a71176597169d6847ae26572 /test/data
parent4e77741d22778bd09b772fc53f1cbd64107e3d24 (diff)
add language select, abstract text transformations (#584)
* Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions * part1 * frotend done? * fix tests * offscreen and type complications * add tests * start fixing tests * keep fixing tests * fix tests * Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions * delete pt * renames * add tests * kebab-case filenames * lint * minor fixes * merge * fixes * fix part of comments * fix more comments * delete unused types * comment * comment * do backend * other files * move fetch utils to own file * remove extra line * add extra line * remove unnecessary export * simplify folder structure * remove redundant async * fix param type in api * fix language index * undo changes to cssStyleApplier * undo changes to utilities.js * undo changes to utilities.js * simplify language util * lint * undo phantom changes to anki integration * require textTransformations options * explicit locale in localeCompare * punctuate notes * prefer early exit * rename LanguageOptionsObjectMap * rename to textPreprocessor * tuple with names instead of boolean array * safe data setting * optional chaining * simplify LanguageOptions * encapsulate languages * delete language util * nullable language in text preprocessors controller * rename transform to process * remove settings * make translation advanced again * remove unused getTextTransformations api call * comments * change language types * RIP flags * comments * fix tests * lint * Text preprocessor type changes (#10) * Add types * Update types * Simplify type check * Refactor typing and structuring of language definitions * lint * update translator benchmark * undo markdown changes * undo markdown changes * undo markdown changes * more merge * simplify language controller --------- Co-authored-by: toasted-nutbread <toasted-nutbread@users.noreply.github.com> Co-authored-by: Darius Jahandarie <djahandarie@gmail.com>
Diffstat (limited to 'test/data')
-rw-r--r--test/data/anki-note-builder-test-results.json135
-rw-r--r--test/data/database-test-cases.json6
-rw-r--r--test/data/dictionaries/valid-dictionary1/term_bank_1.json4
-rw-r--r--test/data/translator-test-inputs.json60
-rw-r--r--test/data/translator-test-results-note-data1.json561
-rw-r--r--test/data/translator-test-results.json349
6 files changed, 1070 insertions, 45 deletions
diff --git a/test/data/anki-note-builder-test-results.json b/test/data/anki-note-builder-test-results.json
index a2dd0923..1d84712d 100644
--- a/test/data/anki-note-builder-test-results.json
+++ b/test/data/anki-note-builder-test-results.json
@@ -869,6 +869,43 @@
"sentence-furigana": "cloze-prefixダースcloze-suffix",
"tags": "abbr, n",
"url": "<a href=\"url:\">url:</a>"
+ },
+ {
+ "audio": "",
+ "clipboard-image": "",
+ "clipboard-text": "",
+ "cloze-body": "ダ",
+ "cloze-body-kana": "ダ",
+ "cloze-prefix": "cloze-prefix",
+ "cloze-suffix": "cloze-suffix",
+ "conjugation": "",
+ "dictionary": "Test Dictionary 2",
+ "document-title": "title",
+ "expression": "打",
+ "frequencies": "<ul style=\"text-align: left;\"><li>Test Dictionary 2: 1</li><li>Test Dictionary 2: four</li><li>Test Dictionary 2: five (5)</li><li>Test Dictionary 2: 8</li><li>Test Dictionary 2: fourteen</li><li>Test Dictionary 2: twenty (20)</li><li>Test Dictionary 2: 26</li></ul>",
+ "frequency-harmonic-rank": "1",
+ "frequency-harmonic-occurrence": "1",
+ "frequency-average-rank": "1",
+ "frequency-average-occurrence": "1",
+ "furigana": "<ruby>打<rt>だ</rt></ruby>",
+ "furigana-plain": "打[だ]",
+ "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> <ul><li>da definition 1</li><li>da definition 2</li></ul></div>",
+ "glossary-brief": "<div style=\"text-align: left;\"><ul><li>da definition 1</li><li>da definition 2</li></ul></div>",
+ "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> <ul><li>da definition 1</li><li>da definition 2</li></ul></div>",
+ "part-of-speech": "Noun",
+ "pitch-accents": "No pitch accent data",
+ "pitch-accent-graphs": "No pitch accent data",
+ "pitch-accent-positions": "No pitch accent data",
+ "pitch-accent-categories": "",
+ "phonetic-transcriptions": "",
+ "reading": "だ",
+ "screenshot": "",
+ "search-query": "fullQuery",
+ "selection-text": "",
+ "sentence": "cloze-prefixダcloze-suffix",
+ "sentence-furigana": "cloze-prefixダcloze-suffix",
+ "tags": "n",
+ "url": "<a href=\"url:\">url:</a>"
}
]
},
@@ -3437,7 +3474,7 @@
]
},
{
- "name": "Test text transformations - convertNumericCharacters",
+ "name": "Test text preprocessors - convertNumericCharacters",
"results": [
{
"audio": "",
@@ -3479,7 +3516,7 @@
]
},
{
- "name": "Test text transformations - convertAlphabeticCharacters",
+ "name": "Test text preprocessors - convertAlphabeticCharacters",
"results": [
{
"audio": "",
@@ -3558,7 +3595,7 @@
]
},
{
- "name": "Test text transformations - convertKatakanaToHiragana",
+ "name": "Test text preprocessors - convertKatakanaToHiragana",
"results": [
{
"audio": "",
@@ -3637,7 +3674,7 @@
]
},
{
- "name": "Test text transformations - convertHiraganaToKatakana",
+ "name": "Test text preprocessors - convertHiraganaToKatakana",
"results": [
{
"audio": "",
@@ -3679,7 +3716,7 @@
]
},
{
- "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana",
+ "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",
"results": [
{
"audio": "",
@@ -3758,7 +3795,7 @@
]
},
{
- "name": "Test text transformations - collapseEmphaticSequences",
+ "name": "Test text preprocessors - collapseEmphaticSequences",
"results": [
{
"audio": "",
@@ -3768,7 +3805,7 @@
"cloze-body-kana": "すっっごーーい",
"cloze-prefix": "cloze-prefix",
"cloze-suffix": "cloze-suffix",
- "conjugation": "",
+ "conjugation": "<ul><li>adv « kansai-ben</li></ul>",
"dictionary": "Test Dictionary 2",
"document-title": "title",
"expression": "凄い",
@@ -3798,5 +3835,89 @@
"url": "<a href=\"url:\">url:</a>"
}
]
+ },
+ {
+ "name": "Test text preprocessors - capitalizeFirstLetter",
+ "results": [
+ {
+ "audio": "",
+ "clipboard-image": "",
+ "clipboard-text": "",
+ "cloze-body": "english",
+ "cloze-body-kana": "english",
+ "cloze-prefix": "cloze-prefix",
+ "cloze-suffix": "cloze-suffix",
+ "conjugation": "",
+ "dictionary": "Test Dictionary 2",
+ "document-title": "title",
+ "expression": "English",
+ "frequencies": "",
+ "frequency-harmonic-rank": "9999999",
+ "frequency-harmonic-occurrence": "0",
+ "frequency-average-rank": "9999999",
+ "frequency-average-occurrence": "0",
+ "furigana": "English",
+ "furigana-plain": "English",
+ "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>",
+ "glossary-brief": "<div style=\"text-align: left;\">English definition</div>",
+ "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>",
+ "part-of-speech": "Noun",
+ "pitch-accents": "No pitch accent data",
+ "pitch-accent-graphs": "No pitch accent data",
+ "pitch-accent-positions": "No pitch accent data",
+ "pitch-accent-categories": "",
+ "phonetic-transcriptions": "",
+ "reading": "English",
+ "screenshot": "",
+ "search-query": "fullQuery",
+ "selection-text": "",
+ "sentence": "cloze-prefixenglishcloze-suffix",
+ "sentence-furigana": "cloze-prefixenglishcloze-suffix",
+ "tags": "n",
+ "url": "<a href=\"url:\">url:</a>"
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - decapitalize",
+ "results": [
+ {
+ "audio": "",
+ "clipboard-image": "",
+ "clipboard-text": "",
+ "cloze-body": "LANGUAGE",
+ "cloze-body-kana": "LANGUAGE",
+ "cloze-prefix": "cloze-prefix",
+ "cloze-suffix": "cloze-suffix",
+ "conjugation": "",
+ "dictionary": "Test Dictionary 2",
+ "document-title": "title",
+ "expression": "language",
+ "frequencies": "",
+ "frequency-harmonic-rank": "9999999",
+ "frequency-harmonic-occurrence": "0",
+ "frequency-average-rank": "9999999",
+ "frequency-average-occurrence": "0",
+ "furigana": "language",
+ "furigana-plain": "language",
+ "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> language definition</div>",
+ "glossary-brief": "<div style=\"text-align: left;\">language definition</div>",
+ "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> language definition</div>",
+ "part-of-speech": "Noun",
+ "pitch-accents": "No pitch accent data",
+ "pitch-accent-graphs": "No pitch accent data",
+ "pitch-accent-positions": "No pitch accent data",
+ "pitch-accent-categories": "",
+ "phonetic-transcriptions": "",
+ "reading": "language",
+ "screenshot": "",
+ "search-query": "fullQuery",
+ "selection-text": "",
+ "sentence": "cloze-prefixLANGUAGEcloze-suffix",
+ "sentence-furigana": "cloze-prefixLANGUAGEcloze-suffix",
+ "tags": "n",
+ "url": "<a href=\"url:\">url:</a>"
+ }
+ ]
}
]
diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json
index 6d0f74ce..5747f59e 100644
--- a/test/data/database-test-cases.json
+++ b/test/data/database-test-cases.json
@@ -27,7 +27,7 @@
"ipa": 1
},
"terms": {
- "total": 27
+ "total": 29
}
}
},
@@ -36,7 +36,7 @@
{
"kanji": 2,
"kanjiMeta": 6,
- "terms": 27,
+ "terms": 29,
"termMeta": 39,
"tagMeta": 15,
"media": 6
@@ -45,7 +45,7 @@
"total": {
"kanji": 2,
"kanjiMeta": 6,
- "terms": 27,
+ "terms": 29,
"termMeta": 39,
"tagMeta": 15,
"media": 6
diff --git a/test/data/dictionaries/valid-dictionary1/term_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_bank_1.json
index 03a9900c..e7fb015c 100644
--- a/test/data/dictionaries/valid-dictionary1/term_bank_1.json
+++ b/test/data/dictionaries/valid-dictionary1/term_bank_1.json
@@ -341,5 +341,7 @@
["のたまう", "のたまう", "v5", "v5", 1, ["notamau definition"], 15, ""],
["のたもうた", "のたもうた", "", "", 1, [["のたまう", ["past"]]], 16, ""],
["39", "さんきゅう", "", "", 1, ["sankyuu definition"], 17, ""],
- ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""]
+ ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""],
+ ["English", "", "n", "n", 1, ["English definition"], 19, ""],
+ ["language", "", "n", "n", 1, ["language definition"], 20, ""]
]
diff --git a/test/data/translator-test-inputs.json b/test/data/translator-test-inputs.json
index 03ec938a..c9047716 100644
--- a/test/data/translator-test-inputs.json
+++ b/test/data/translator-test-inputs.json
@@ -21,12 +21,7 @@
"sortFrequencyDictionary": null,
"sortFrequencyDictionaryOrder": "descending",
"removeNonJapaneseCharacters": true,
- "convertHalfWidthCharacters": "false",
- "convertNumericCharacters": "false",
- "convertAlphabeticCharacters": "false",
- "convertHiraganaToKatakana": "false",
- "convertKatakanaToHiragana": "false",
- "collapseEmphaticSequences": "false",
+ "language": "ja",
"textReplacements": [
null
],
@@ -361,7 +356,7 @@
"options": "default"
},
{
- "name": "Test text transformations - convertNumericCharacters",
+ "name": "Test text preprocessors - convertNumericCharacters",
"func": "findTerms",
"mode": "split",
"text": "39",
@@ -369,13 +364,12 @@
"default",
{
"type": "terms",
- "convertNumericCharacters": "true",
"removeNonJapaneseCharacters": false
}
]
},
{
- "name": "Test text transformations - convertAlphabeticCharacters",
+ "name": "Test text preprocessors - convertAlphabeticCharacters",
"func": "findTerms",
"mode": "split",
"text": "utsu",
@@ -383,61 +377,83 @@
"default",
{
"type": "terms",
- "convertAlphabeticCharacters": "true",
"removeNonJapaneseCharacters": false
}
]
},
{
- "name": "Test text transformations - convertKatakanaToHiragana",
+ "name": "Test text preprocessors - convertKatakanaToHiragana",
"func": "findTerms",
"mode": "split",
"text": "ウツ",
"options": [
"default",
{
- "type": "terms",
- "convertKatakanaToHiragana": "true"
+ "type": "terms"
}
]
},
{
- "name": "Test text transformations - convertHiraganaToKatakana",
+ "name": "Test text preprocessors - convertHiraganaToKatakana",
"func": "findTerms",
"mode": "split",
"text": "てきすと",
"options": [
"default",
{
- "type": "terms",
- "convertHiraganaToKatakana": "true"
+ "type": "terms"
}
]
},
{
- "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana",
+ "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",
"func": "findTerms",
"mode": "split",
"text": "ウツ",
"options": [
"default",
{
- "type": "terms",
- "convertHalfWidthCharacters": "true",
- "convertKatakanaToHiragana": "true"
+ "type": "terms"
}
]
},
{
- "name": "Test text transformations - collapseEmphaticSequences",
+ "name": "Test text preprocessors - collapseEmphaticSequences",
"func": "findTerms",
"mode": "split",
"text": "すっっごーーい",
"options": [
"default",
{
+ "type": "terms"
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - capitalizeFirstLetter",
+ "func": "findTerms",
+ "mode": "split",
+ "text": "english",
+ "options": [
+ "default",
+ {
"type": "terms",
- "collapseEmphaticSequences": "full"
+ "language": "en",
+ "removeNonJapaneseCharacters": false
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - decapitalize",
+ "func": "findTerms",
+ "mode": "split",
+ "text": "LANGUAGE",
+ "options": [
+ "default",
+ {
+ "type": "terms",
+ "language": "en",
+ "removeNonJapaneseCharacters": false
}
]
}
diff --git a/test/data/translator-test-results-note-data1.json b/test/data/translator-test-results-note-data1.json
index f84f28a4..f580ac53 100644
--- a/test/data/translator-test-results-note-data1.json
+++ b/test/data/translator-test-results-note-data1.json
@@ -7276,6 +7276,320 @@
}
},
"media": {}
+ },
+ {
+ "marker": "{marker}",
+ "definition": {
+ "type": "term",
+ "id": 1,
+ "source": "だ",
+ "rawSource": "ダ",
+ "sourceTerm": "だ",
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "isPrimary": true,
+ "sequence": 1,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "dictionaryNames": [
+ "Test Dictionary 2"
+ ],
+ "expression": "打",
+ "reading": "だ",
+ "expressions": [
+ {
+ "sourceTerm": "だ",
+ "expression": "打",
+ "reading": "だ",
+ "termTags": [
+ {
+ "name": "E1",
+ "category": "default",
+ "notes": "example tag 1",
+ "order": 0,
+ "score": 0,
+ "dictionary": "Test Dictionary 2",
+ "redundant": false
+ }
+ ],
+ "frequencies": [
+ {
+ "index": 0,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": false,
+ "frequency": 1
+ },
+ {
+ "index": 1,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": false,
+ "frequency": "four"
+ },
+ {
+ "index": 2,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": false,
+ "frequency": "five (5)"
+ },
+ {
+ "index": 3,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": true,
+ "frequency": 8
+ },
+ {
+ "index": 4,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": true,
+ "frequency": "fourteen"
+ },
+ {
+ "index": 5,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": true,
+ "frequency": "twenty (20)"
+ },
+ {
+ "index": 6,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": true,
+ "frequency": 26
+ }
+ ],
+ "pitches": [],
+ "furiganaSegments": [
+ {
+ "text": "打",
+ "furigana": "だ"
+ }
+ ],
+ "termFrequency": "normal",
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "glossary": [
+ "da definition 1",
+ "da definition 2"
+ ],
+ "definitionTags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "notes": "noun",
+ "order": 0,
+ "score": 0,
+ "dictionary": "Test Dictionary 2",
+ "redundant": false
+ }
+ ],
+ "termTags": [
+ {
+ "name": "E1",
+ "category": "default",
+ "notes": "example tag 1",
+ "order": 0,
+ "score": 0,
+ "dictionary": "Test Dictionary 2",
+ "redundant": false
+ }
+ ],
+ "frequencies": [
+ {
+ "index": 0,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": false,
+ "frequency": 1
+ },
+ {
+ "index": 1,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": false,
+ "frequency": "four"
+ },
+ {
+ "index": 2,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": false,
+ "frequency": "five (5)"
+ },
+ {
+ "index": 3,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": true,
+ "frequency": 8
+ },
+ {
+ "index": 4,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": true,
+ "frequency": "fourteen"
+ },
+ {
+ "index": 5,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": true,
+ "frequency": "twenty (20)"
+ },
+ {
+ "index": 6,
+ "expressionIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "expression": "打",
+ "reading": "だ",
+ "hasReading": true,
+ "frequency": 26
+ }
+ ],
+ "frequencyHarmonic": 1,
+ "frequencyAverage": 1,
+ "pitches": [],
+ "phoneticTranscriptions": [],
+ "sourceTermExactMatchCount": 0,
+ "url": "url:",
+ "cloze": {
+ "sentence": "",
+ "prefix": "",
+ "body": "",
+ "bodyKana": "",
+ "suffix": ""
+ },
+ "furiganaSegments": [
+ {
+ "text": "打",
+ "furigana": "だ"
+ }
+ ]
+ },
+ "glossaryLayoutMode": "default",
+ "compactTags": false,
+ "group": false,
+ "merge": false,
+ "modeTermKanji": false,
+ "modeTermKana": false,
+ "modeKanji": false,
+ "compactGlossaries": false,
+ "uniqueExpressions": [
+ "打"
+ ],
+ "uniqueReadings": [
+ "だ"
+ ],
+ "pitches": [],
+ "pitchCount": 0,
+ "phoneticTranscriptions": [],
+ "context": {
+ "query": "query",
+ "fullQuery": "fullQuery",
+ "document": {
+ "title": "title"
+ }
+ },
+ "media": {}
}
]
},
@@ -30117,7 +30431,7 @@
]
},
{
- "name": "Test text transformations - convertNumericCharacters",
+ "name": "Test text preprocessors - convertNumericCharacters",
"noteDataList": [
{
"marker": "{marker}",
@@ -30219,7 +30533,7 @@
]
},
{
- "name": "Test text transformations - convertAlphabeticCharacters",
+ "name": "Test text preprocessors - convertAlphabeticCharacters",
"noteDataList": [
{
"marker": "{marker}",
@@ -30852,7 +31166,7 @@
]
},
{
- "name": "Test text transformations - convertKatakanaToHiragana",
+ "name": "Test text preprocessors - convertKatakanaToHiragana",
"noteDataList": [
{
"marker": "{marker}",
@@ -31485,7 +31799,7 @@
]
},
{
- "name": "Test text transformations - convertHiraganaToKatakana",
+ "name": "Test text preprocessors - convertHiraganaToKatakana",
"noteDataList": [
{
"marker": "{marker}",
@@ -31638,7 +31952,7 @@
]
},
{
- "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana",
+ "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",
"noteDataList": [
{
"marker": "{marker}",
@@ -32271,7 +32585,7 @@
]
},
{
- "name": "Test text transformations - collapseEmphaticSequences",
+ "name": "Test text preprocessors - collapseEmphaticSequences",
"noteDataList": [
{
"marker": "{marker}",
@@ -32285,6 +32599,13 @@
{
"source": "algorithm",
"inflectionRules": []
+ },
+ {
+ "source": "algorithm",
+ "inflectionRules": [
+ "adv",
+ "kansai-ben"
+ ]
}
],
"score": 1,
@@ -32391,5 +32712,233 @@
"media": {}
}
]
+ },
+ {
+ "name": "Test text preprocessors - capitalizeFirstLetter",
+ "noteDataList": [
+ {
+ "marker": "{marker}",
+ "definition": {
+ "type": "term",
+ "id": 27,
+ "source": "English",
+ "rawSource": "english",
+ "sourceTerm": "English",
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "isPrimary": true,
+ "sequence": 19,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "dictionaryNames": [
+ "Test Dictionary 2"
+ ],
+ "expression": "English",
+ "reading": "English",
+ "expressions": [
+ {
+ "sourceTerm": "English",
+ "expression": "English",
+ "reading": "English",
+ "termTags": [],
+ "frequencies": [],
+ "pitches": [],
+ "furiganaSegments": [
+ {
+ "text": "English",
+ "furigana": ""
+ }
+ ],
+ "termFrequency": "normal",
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "glossary": [
+ "English definition"
+ ],
+ "definitionTags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "notes": "noun",
+ "order": 0,
+ "score": 0,
+ "dictionary": "Test Dictionary 2",
+ "redundant": false
+ }
+ ],
+ "termTags": [],
+ "frequencies": [],
+ "frequencyHarmonic": -1,
+ "frequencyAverage": -1,
+ "pitches": [],
+ "phoneticTranscriptions": [],
+ "sourceTermExactMatchCount": 1,
+ "url": "url:",
+ "cloze": {
+ "sentence": "",
+ "prefix": "",
+ "body": "",
+ "bodyKana": "",
+ "suffix": ""
+ },
+ "furiganaSegments": [
+ {
+ "text": "English",
+ "furigana": ""
+ }
+ ]
+ },
+ "glossaryLayoutMode": "default",
+ "compactTags": false,
+ "group": false,
+ "merge": false,
+ "modeTermKanji": false,
+ "modeTermKana": false,
+ "modeKanji": false,
+ "compactGlossaries": false,
+ "uniqueExpressions": [
+ "English"
+ ],
+ "uniqueReadings": [
+ "English"
+ ],
+ "pitches": [],
+ "pitchCount": 0,
+ "phoneticTranscriptions": [],
+ "context": {
+ "query": "query",
+ "fullQuery": "fullQuery",
+ "document": {
+ "title": "title"
+ }
+ },
+ "media": {}
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - decapitalize",
+ "noteDataList": [
+ {
+ "marker": "{marker}",
+ "definition": {
+ "type": "term",
+ "id": 28,
+ "source": "language",
+ "rawSource": "LANGUAGE",
+ "sourceTerm": "language",
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "isPrimary": true,
+ "sequence": 20,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "dictionaryNames": [
+ "Test Dictionary 2"
+ ],
+ "expression": "language",
+ "reading": "language",
+ "expressions": [
+ {
+ "sourceTerm": "language",
+ "expression": "language",
+ "reading": "language",
+ "termTags": [],
+ "frequencies": [],
+ "pitches": [],
+ "furiganaSegments": [
+ {
+ "text": "language",
+ "furigana": ""
+ }
+ ],
+ "termFrequency": "normal",
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "glossary": [
+ "language definition"
+ ],
+ "definitionTags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "notes": "noun",
+ "order": 0,
+ "score": 0,
+ "dictionary": "Test Dictionary 2",
+ "redundant": false
+ }
+ ],
+ "termTags": [],
+ "frequencies": [],
+ "frequencyHarmonic": -1,
+ "frequencyAverage": -1,
+ "pitches": [],
+ "phoneticTranscriptions": [],
+ "sourceTermExactMatchCount": 1,
+ "url": "url:",
+ "cloze": {
+ "sentence": "",
+ "prefix": "",
+ "body": "",
+ "bodyKana": "",
+ "suffix": ""
+ },
+ "furiganaSegments": [
+ {
+ "text": "language",
+ "furigana": ""
+ }
+ ]
+ },
+ "glossaryLayoutMode": "default",
+ "compactTags": false,
+ "group": false,
+ "merge": false,
+ "modeTermKanji": false,
+ "modeTermKana": false,
+ "modeKanji": false,
+ "compactGlossaries": false,
+ "uniqueExpressions": [
+ "language"
+ ],
+ "uniqueReadings": [
+ "language"
+ ],
+ "pitches": [],
+ "pitchCount": 0,
+ "phoneticTranscriptions": [],
+ "context": {
+ "query": "query",
+ "fullQuery": "fullQuery",
+ "document": {
+ "title": "title"
+ }
+ },
+ "media": {}
+ }
+ ]
}
]
diff --git a/test/data/translator-test-results.json b/test/data/translator-test-results.json
index e11cb4a9..cd3f7ab6 100644
--- a/test/data/translator-test-results.json
+++ b/test/data/translator-test-results.json
@@ -3969,6 +3969,174 @@
"displayValueParsed": false
}
]
+ },
+ {
+ "type": "term",
+ "isPrimary": true,
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "frequencyOrder": 0,
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "sourceTermExactMatchCount": 0,
+ "maxTransformedTextLength": 1,
+ "headwords": [
+ {
+ "index": 0,
+ "term": "打",
+ "reading": "だ",
+ "sources": [
+ {
+ "originalText": "ダ",
+ "transformedText": "だ",
+ "deinflectedText": "だ",
+ "matchType": "exact",
+ "matchSource": "reading",
+ "isPrimary": true
+ }
+ ],
+ "tags": [
+ {
+ "name": "E1",
+ "category": "default",
+ "order": 0,
+ "score": 0,
+ "content": [
+ "example tag 1"
+ ],
+ "dictionaries": [
+ "Test Dictionary 2"
+ ],
+ "redundant": false
+ }
+ ],
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "definitions": [
+ {
+ "index": 0,
+ "headwordIndices": [
+ 0
+ ],
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "id": 1,
+ "score": 1,
+ "frequencyOrder": 0,
+ "sequences": [
+ 1
+ ],
+ "isPrimary": true,
+ "tags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "order": 0,
+ "score": 0,
+ "content": [
+ "noun"
+ ],
+ "dictionaries": [
+ "Test Dictionary 2"
+ ],
+ "redundant": false
+ }
+ ],
+ "entries": [
+ "da definition 1",
+ "da definition 2"
+ ]
+ }
+ ],
+ "pronunciations": [],
+ "frequencies": [
+ {
+ "index": 0,
+ "headwordIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "hasReading": false,
+ "frequency": 1,
+ "displayValue": null,
+ "displayValueParsed": false
+ },
+ {
+ "index": 1,
+ "headwordIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "hasReading": false,
+ "frequency": 0,
+ "displayValue": "four",
+ "displayValueParsed": true
+ },
+ {
+ "index": 2,
+ "headwordIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "hasReading": false,
+ "frequency": 5,
+ "displayValue": "five (5)",
+ "displayValueParsed": true
+ },
+ {
+ "index": 3,
+ "headwordIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "hasReading": true,
+ "frequency": 8,
+ "displayValue": null,
+ "displayValueParsed": false
+ },
+ {
+ "index": 4,
+ "headwordIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "hasReading": true,
+ "frequency": 0,
+ "displayValue": "fourteen",
+ "displayValueParsed": true
+ },
+ {
+ "index": 5,
+ "headwordIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "hasReading": true,
+ "frequency": 20,
+ "displayValue": "twenty (20)",
+ "displayValueParsed": true
+ },
+ {
+ "index": 6,
+ "headwordIndex": 0,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "hasReading": true,
+ "frequency": 26,
+ "displayValue": null,
+ "displayValueParsed": false
+ }
+ ]
}
]
},
@@ -16978,7 +17146,7 @@
]
},
{
- "name": "Test text transformations - convertNumericCharacters",
+ "name": "Test text preprocessors - convertNumericCharacters",
"originalTextLength": 2,
"dictionaryEntries": [
{
@@ -17043,7 +17211,7 @@
]
},
{
- "name": "Test text transformations - convertAlphabeticCharacters",
+ "name": "Test text preprocessors - convertAlphabeticCharacters",
"originalTextLength": 4,
"dictionaryEntries": [
{
@@ -17389,7 +17557,7 @@
]
},
{
- "name": "Test text transformations - convertKatakanaToHiragana",
+ "name": "Test text preprocessors - convertKatakanaToHiragana",
"originalTextLength": 2,
"dictionaryEntries": [
{
@@ -17735,7 +17903,7 @@
]
},
{
- "name": "Test text transformations - convertHiraganaToKatakana",
+ "name": "Test text preprocessors - convertHiraganaToKatakana",
"originalTextLength": 4,
"dictionaryEntries": [
{
@@ -17844,7 +18012,7 @@
]
},
{
- "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana",
+ "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",
"originalTextLength": 2,
"dictionaryEntries": [
{
@@ -18190,7 +18358,7 @@
]
},
{
- "name": "Test text transformations - collapseEmphaticSequences",
+ "name": "Test text preprocessors - collapseEmphaticSequences",
"originalTextLength": 7,
"dictionaryEntries": [
{
@@ -18200,6 +18368,13 @@
{
"source": "algorithm",
"inflectionRules": []
+ },
+ {
+ "source": "algorithm",
+ "inflectionRules": [
+ "adv",
+ "kansai-ben"
+ ]
}
],
"score": 1,
@@ -18267,5 +18442,167 @@
"frequencies": []
}
]
+ },
+ {
+ "name": "Test text preprocessors - capitalizeFirstLetter",
+ "originalTextLength": 7,
+ "dictionaryEntries": [
+ {
+ "type": "term",
+ "isPrimary": true,
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "frequencyOrder": 0,
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "sourceTermExactMatchCount": 1,
+ "maxTransformedTextLength": 7,
+ "headwords": [
+ {
+ "index": 0,
+ "term": "English",
+ "reading": "English",
+ "sources": [
+ {
+ "originalText": "english",
+ "transformedText": "English",
+ "deinflectedText": "English",
+ "matchType": "exact",
+ "matchSource": "term",
+ "isPrimary": true
+ }
+ ],
+ "tags": [],
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "definitions": [
+ {
+ "index": 0,
+ "headwordIndices": [
+ 0
+ ],
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "id": 27,
+ "score": 1,
+ "frequencyOrder": 0,
+ "sequences": [
+ 19
+ ],
+ "isPrimary": true,
+ "tags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "order": 0,
+ "score": 0,
+ "content": [
+ "noun"
+ ],
+ "dictionaries": [
+ "Test Dictionary 2"
+ ],
+ "redundant": false
+ }
+ ],
+ "entries": [
+ "English definition"
+ ]
+ }
+ ],
+ "pronunciations": [],
+ "frequencies": []
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - decapitalize",
+ "originalTextLength": 8,
+ "dictionaryEntries": [
+ {
+ "type": "term",
+ "isPrimary": true,
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "frequencyOrder": 0,
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "sourceTermExactMatchCount": 1,
+ "maxTransformedTextLength": 8,
+ "headwords": [
+ {
+ "index": 0,
+ "term": "language",
+ "reading": "language",
+ "sources": [
+ {
+ "originalText": "LANGUAGE",
+ "transformedText": "language",
+ "deinflectedText": "language",
+ "matchType": "exact",
+ "matchSource": "term",
+ "isPrimary": true
+ }
+ ],
+ "tags": [],
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "definitions": [
+ {
+ "index": 0,
+ "headwordIndices": [
+ 0
+ ],
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "id": 28,
+ "score": 1,
+ "frequencyOrder": 0,
+ "sequences": [
+ 20
+ ],
+ "isPrimary": true,
+ "tags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "order": 0,
+ "score": 0,
+ "content": [
+ "noun"
+ ],
+ "dictionaries": [
+ "Test Dictionary 2"
+ ],
+ "redundant": false
+ }
+ ],
+ "entries": [
+ "language definition"
+ ]
+ }
+ ],
+ "pronunciations": [],
+ "frequencies": []
+ }
+ ]
}
]