diff options
author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-02-17 02:45:24 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-17 01:45:24 +0000 |
commit | 4aaa9f15d97668203741c1731f15e710ae8b8294 (patch) | |
tree | d1885f7fbd7d1510a71176597169d6847ae26572 /test/data | |
parent | 4e77741d22778bd09b772fc53f1cbd64107e3d24 (diff) |
add language select, abstract text transformations (#584)
* Copy functions from JapaneseUtil
* Remove JapaneseUtil
* Update usages of JapaneseUtil functions
* part1
* frotend done?
* fix tests
* offscreen and type complications
* add tests
* start fixing tests
* keep fixing tests
* fix tests
* Copy functions from JapaneseUtil
* Remove JapaneseUtil
* Update usages of JapaneseUtil functions
* delete pt
* renames
* add tests
* kebab-case filenames
* lint
* minor fixes
* merge
* fixes
* fix part of comments
* fix more comments
* delete unused types
* comment
* comment
* do backend
* other files
* move fetch utils to own file
* remove extra line
* add extra line
* remove unnecessary export
* simplify folder structure
* remove redundant async
* fix param type in api
* fix language index
* undo changes to cssStyleApplier
* undo changes to utilities.js
* undo changes to utilities.js
* simplify language util
* lint
* undo phantom changes to anki integration
* require textTransformations options
* explicit locale in localeCompare
* punctuate notes
* prefer early exit
* rename LanguageOptionsObjectMap
* rename to textPreprocessor
* tuple with names instead of boolean array
* safe data setting
* optional chaining
* simplify LanguageOptions
* encapsulate languages
* delete language util
* nullable language in text preprocessors controller
* rename transform to process
* remove settings
* make translation advanced again
* remove unused getTextTransformations api call
* comments
* change language types
* RIP flags
* comments
* fix tests
* lint
* Text preprocessor type changes (#10)
* Add types
* Update types
* Simplify type check
* Refactor typing and structuring of language definitions
* lint
* update translator benchmark
* undo markdown changes
* undo markdown changes
* undo markdown changes
* more merge
* simplify language controller
---------
Co-authored-by: toasted-nutbread <toasted-nutbread@users.noreply.github.com>
Co-authored-by: Darius Jahandarie <djahandarie@gmail.com>
Diffstat (limited to 'test/data')
-rw-r--r-- | test/data/anki-note-builder-test-results.json | 135 | ||||
-rw-r--r-- | test/data/database-test-cases.json | 6 | ||||
-rw-r--r-- | test/data/dictionaries/valid-dictionary1/term_bank_1.json | 4 | ||||
-rw-r--r-- | test/data/translator-test-inputs.json | 60 | ||||
-rw-r--r-- | test/data/translator-test-results-note-data1.json | 561 | ||||
-rw-r--r-- | test/data/translator-test-results.json | 349 |
6 files changed, 1070 insertions, 45 deletions
diff --git a/test/data/anki-note-builder-test-results.json b/test/data/anki-note-builder-test-results.json index a2dd0923..1d84712d 100644 --- a/test/data/anki-note-builder-test-results.json +++ b/test/data/anki-note-builder-test-results.json @@ -869,6 +869,43 @@ "sentence-furigana": "cloze-prefixダースcloze-suffix", "tags": "abbr, n", "url": "<a href=\"url:\">url:</a>" + }, + { + "audio": "", + "clipboard-image": "", + "clipboard-text": "", + "cloze-body": "ダ", + "cloze-body-kana": "ダ", + "cloze-prefix": "cloze-prefix", + "cloze-suffix": "cloze-suffix", + "conjugation": "", + "dictionary": "Test Dictionary 2", + "document-title": "title", + "expression": "打", + "frequencies": "<ul style=\"text-align: left;\"><li>Test Dictionary 2: 1</li><li>Test Dictionary 2: four</li><li>Test Dictionary 2: five (5)</li><li>Test Dictionary 2: 8</li><li>Test Dictionary 2: fourteen</li><li>Test Dictionary 2: twenty (20)</li><li>Test Dictionary 2: 26</li></ul>", + "frequency-harmonic-rank": "1", + "frequency-harmonic-occurrence": "1", + "frequency-average-rank": "1", + "frequency-average-occurrence": "1", + "furigana": "<ruby>打<rt>だ</rt></ruby>", + "furigana-plain": "打[だ]", + "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> <ul><li>da definition 1</li><li>da definition 2</li></ul></div>", + "glossary-brief": "<div style=\"text-align: left;\"><ul><li>da definition 1</li><li>da definition 2</li></ul></div>", + "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> <ul><li>da definition 1</li><li>da definition 2</li></ul></div>", + "part-of-speech": "Noun", + "pitch-accents": "No pitch accent data", + "pitch-accent-graphs": "No pitch accent data", + "pitch-accent-positions": "No pitch accent data", + "pitch-accent-categories": "", + "phonetic-transcriptions": "", + "reading": "だ", + "screenshot": "", + "search-query": "fullQuery", + "selection-text": "", + "sentence": "cloze-prefixダcloze-suffix", + "sentence-furigana": "cloze-prefixダcloze-suffix", + "tags": "n", + "url": "<a href=\"url:\">url:</a>" } ] }, @@ -3437,7 +3474,7 @@ ] }, { - "name": "Test text transformations - convertNumericCharacters", + "name": "Test text preprocessors - convertNumericCharacters", "results": [ { "audio": "", @@ -3479,7 +3516,7 @@ ] }, { - "name": "Test text transformations - convertAlphabeticCharacters", + "name": "Test text preprocessors - convertAlphabeticCharacters", "results": [ { "audio": "", @@ -3558,7 +3595,7 @@ ] }, { - "name": "Test text transformations - convertKatakanaToHiragana", + "name": "Test text preprocessors - convertKatakanaToHiragana", "results": [ { "audio": "", @@ -3637,7 +3674,7 @@ ] }, { - "name": "Test text transformations - convertHiraganaToKatakana", + "name": "Test text preprocessors - convertHiraganaToKatakana", "results": [ { "audio": "", @@ -3679,7 +3716,7 @@ ] }, { - "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana", + "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana", "results": [ { "audio": "", @@ -3758,7 +3795,7 @@ ] }, { - "name": "Test text transformations - collapseEmphaticSequences", + "name": "Test text preprocessors - collapseEmphaticSequences", "results": [ { "audio": "", @@ -3768,7 +3805,7 @@ "cloze-body-kana": "すっっごーーい", "cloze-prefix": "cloze-prefix", "cloze-suffix": "cloze-suffix", - "conjugation": "", + "conjugation": "<ul><li>adv « kansai-ben</li></ul>", "dictionary": "Test Dictionary 2", "document-title": "title", "expression": "凄い", @@ -3798,5 +3835,89 @@ "url": "<a href=\"url:\">url:</a>" } ] + }, + { + "name": "Test text preprocessors - capitalizeFirstLetter", + "results": [ + { + "audio": "", + "clipboard-image": "", + "clipboard-text": "", + "cloze-body": "english", + "cloze-body-kana": "english", + "cloze-prefix": "cloze-prefix", + "cloze-suffix": "cloze-suffix", + "conjugation": "", + "dictionary": "Test Dictionary 2", + "document-title": "title", + "expression": "English", + "frequencies": "", + "frequency-harmonic-rank": "9999999", + "frequency-harmonic-occurrence": "0", + "frequency-average-rank": "9999999", + "frequency-average-occurrence": "0", + "furigana": "English", + "furigana-plain": "English", + "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>", + "glossary-brief": "<div style=\"text-align: left;\">English definition</div>", + "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>", + "part-of-speech": "Noun", + "pitch-accents": "No pitch accent data", + "pitch-accent-graphs": "No pitch accent data", + "pitch-accent-positions": "No pitch accent data", + "pitch-accent-categories": "", + "phonetic-transcriptions": "", + "reading": "English", + "screenshot": "", + "search-query": "fullQuery", + "selection-text": "", + "sentence": "cloze-prefixenglishcloze-suffix", + "sentence-furigana": "cloze-prefixenglishcloze-suffix", + "tags": "n", + "url": "<a href=\"url:\">url:</a>" + } + ] + }, + { + "name": "Test text preprocessors - decapitalize", + "results": [ + { + "audio": "", + "clipboard-image": "", + "clipboard-text": "", + "cloze-body": "LANGUAGE", + "cloze-body-kana": "LANGUAGE", + "cloze-prefix": "cloze-prefix", + "cloze-suffix": "cloze-suffix", + "conjugation": "", + "dictionary": "Test Dictionary 2", + "document-title": "title", + "expression": "language", + "frequencies": "", + "frequency-harmonic-rank": "9999999", + "frequency-harmonic-occurrence": "0", + "frequency-average-rank": "9999999", + "frequency-average-occurrence": "0", + "furigana": "language", + "furigana-plain": "language", + "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> language definition</div>", + "glossary-brief": "<div style=\"text-align: left;\">language definition</div>", + "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> language definition</div>", + "part-of-speech": "Noun", + "pitch-accents": "No pitch accent data", + "pitch-accent-graphs": "No pitch accent data", + "pitch-accent-positions": "No pitch accent data", + "pitch-accent-categories": "", + "phonetic-transcriptions": "", + "reading": "language", + "screenshot": "", + "search-query": "fullQuery", + "selection-text": "", + "sentence": "cloze-prefixLANGUAGEcloze-suffix", + "sentence-furigana": "cloze-prefixLANGUAGEcloze-suffix", + "tags": "n", + "url": "<a href=\"url:\">url:</a>" + } + ] } ] diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json index 6d0f74ce..5747f59e 100644 --- a/test/data/database-test-cases.json +++ b/test/data/database-test-cases.json @@ -27,7 +27,7 @@ "ipa": 1 }, "terms": { - "total": 27 + "total": 29 } } }, @@ -36,7 +36,7 @@ { "kanji": 2, "kanjiMeta": 6, - "terms": 27, + "terms": 29, "termMeta": 39, "tagMeta": 15, "media": 6 @@ -45,7 +45,7 @@ "total": { "kanji": 2, "kanjiMeta": 6, - "terms": 27, + "terms": 29, "termMeta": 39, "tagMeta": 15, "media": 6 diff --git a/test/data/dictionaries/valid-dictionary1/term_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_bank_1.json index 03a9900c..e7fb015c 100644 --- a/test/data/dictionaries/valid-dictionary1/term_bank_1.json +++ b/test/data/dictionaries/valid-dictionary1/term_bank_1.json @@ -341,5 +341,7 @@ ["のたまう", "のたまう", "v5", "v5", 1, ["notamau definition"], 15, ""], ["のたもうた", "のたもうた", "", "", 1, [["のたまう", ["past"]]], 16, ""], ["39", "さんきゅう", "", "", 1, ["sankyuu definition"], 17, ""], - ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""] + ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""], + ["English", "", "n", "n", 1, ["English definition"], 19, ""], + ["language", "", "n", "n", 1, ["language definition"], 20, ""] ] diff --git a/test/data/translator-test-inputs.json b/test/data/translator-test-inputs.json index 03ec938a..c9047716 100644 --- a/test/data/translator-test-inputs.json +++ b/test/data/translator-test-inputs.json @@ -21,12 +21,7 @@ "sortFrequencyDictionary": null, "sortFrequencyDictionaryOrder": "descending", "removeNonJapaneseCharacters": true, - "convertHalfWidthCharacters": "false", - "convertNumericCharacters": "false", - "convertAlphabeticCharacters": "false", - "convertHiraganaToKatakana": "false", - "convertKatakanaToHiragana": "false", - "collapseEmphaticSequences": "false", + "language": "ja", "textReplacements": [ null ], @@ -361,7 +356,7 @@ "options": "default" }, { - "name": "Test text transformations - convertNumericCharacters", + "name": "Test text preprocessors - convertNumericCharacters", "func": "findTerms", "mode": "split", "text": "39", @@ -369,13 +364,12 @@ "default", { "type": "terms", - "convertNumericCharacters": "true", "removeNonJapaneseCharacters": false } ] }, { - "name": "Test text transformations - convertAlphabeticCharacters", + "name": "Test text preprocessors - convertAlphabeticCharacters", "func": "findTerms", "mode": "split", "text": "utsu", @@ -383,61 +377,83 @@ "default", { "type": "terms", - "convertAlphabeticCharacters": "true", "removeNonJapaneseCharacters": false } ] }, { - "name": "Test text transformations - convertKatakanaToHiragana", + "name": "Test text preprocessors - convertKatakanaToHiragana", "func": "findTerms", "mode": "split", "text": "ウツ", "options": [ "default", { - "type": "terms", - "convertKatakanaToHiragana": "true" + "type": "terms" } ] }, { - "name": "Test text transformations - convertHiraganaToKatakana", + "name": "Test text preprocessors - convertHiraganaToKatakana", "func": "findTerms", "mode": "split", "text": "てきすと", "options": [ "default", { - "type": "terms", - "convertHiraganaToKatakana": "true" + "type": "terms" } ] }, { - "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana", + "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana", "func": "findTerms", "mode": "split", "text": "ウツ", "options": [ "default", { - "type": "terms", - "convertHalfWidthCharacters": "true", - "convertKatakanaToHiragana": "true" + "type": "terms" } ] }, { - "name": "Test text transformations - collapseEmphaticSequences", + "name": "Test text preprocessors - collapseEmphaticSequences", "func": "findTerms", "mode": "split", "text": "すっっごーーい", "options": [ "default", { + "type": "terms" + } + ] + }, + { + "name": "Test text preprocessors - capitalizeFirstLetter", + "func": "findTerms", + "mode": "split", + "text": "english", + "options": [ + "default", + { "type": "terms", - "collapseEmphaticSequences": "full" + "language": "en", + "removeNonJapaneseCharacters": false + } + ] + }, + { + "name": "Test text preprocessors - decapitalize", + "func": "findTerms", + "mode": "split", + "text": "LANGUAGE", + "options": [ + "default", + { + "type": "terms", + "language": "en", + "removeNonJapaneseCharacters": false } ] } diff --git a/test/data/translator-test-results-note-data1.json b/test/data/translator-test-results-note-data1.json index f84f28a4..f580ac53 100644 --- a/test/data/translator-test-results-note-data1.json +++ b/test/data/translator-test-results-note-data1.json @@ -7276,6 +7276,320 @@ } }, "media": {} + }, + { + "marker": "{marker}", + "definition": { + "type": "term", + "id": 1, + "source": "だ", + "rawSource": "ダ", + "sourceTerm": "だ", + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [] + } + ], + "score": 1, + "isPrimary": true, + "sequence": 1, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "打", + "reading": "だ", + "expressions": [ + { + "sourceTerm": "だ", + "expression": "打", + "reading": "だ", + "termTags": [ + { + "name": "E1", + "category": "default", + "notes": "example tag 1", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "frequencies": [ + { + "index": 0, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": false, + "frequency": 1 + }, + { + "index": 1, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": false, + "frequency": "four" + }, + { + "index": 2, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": false, + "frequency": "five (5)" + }, + { + "index": 3, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": true, + "frequency": 8 + }, + { + "index": 4, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": true, + "frequency": "fourteen" + }, + { + "index": 5, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": true, + "frequency": "twenty (20)" + }, + { + "index": 6, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": true, + "frequency": 26 + } + ], + "pitches": [], + "furiganaSegments": [ + { + "text": "打", + "furigana": "だ" + } + ], + "termFrequency": "normal", + "wordClasses": [ + "n" + ] + } + ], + "glossary": [ + "da definition 1", + "da definition 2" + ], + "definitionTags": [ + { + "name": "n", + "category": "partOfSpeech", + "notes": "noun", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [ + { + "name": "E1", + "category": "default", + "notes": "example tag 1", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "frequencies": [ + { + "index": 0, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": false, + "frequency": 1 + }, + { + "index": 1, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": false, + "frequency": "four" + }, + { + "index": 2, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": false, + "frequency": "five (5)" + }, + { + "index": 3, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": true, + "frequency": 8 + }, + { + "index": 4, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": true, + "frequency": "fourteen" + }, + { + "index": 5, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": true, + "frequency": "twenty (20)" + }, + { + "index": 6, + "expressionIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "expression": "打", + "reading": "だ", + "hasReading": true, + "frequency": 26 + } + ], + "frequencyHarmonic": 1, + "frequencyAverage": 1, + "pitches": [], + "phoneticTranscriptions": [], + "sourceTermExactMatchCount": 0, + "url": "url:", + "cloze": { + "sentence": "", + "prefix": "", + "body": "", + "bodyKana": "", + "suffix": "" + }, + "furiganaSegments": [ + { + "text": "打", + "furigana": "だ" + } + ] + }, + "glossaryLayoutMode": "default", + "compactTags": false, + "group": false, + "merge": false, + "modeTermKanji": false, + "modeTermKana": false, + "modeKanji": false, + "compactGlossaries": false, + "uniqueExpressions": [ + "打" + ], + "uniqueReadings": [ + "だ" + ], + "pitches": [], + "pitchCount": 0, + "phoneticTranscriptions": [], + "context": { + "query": "query", + "fullQuery": "fullQuery", + "document": { + "title": "title" + } + }, + "media": {} } ] }, @@ -30117,7 +30431,7 @@ ] }, { - "name": "Test text transformations - convertNumericCharacters", + "name": "Test text preprocessors - convertNumericCharacters", "noteDataList": [ { "marker": "{marker}", @@ -30219,7 +30533,7 @@ ] }, { - "name": "Test text transformations - convertAlphabeticCharacters", + "name": "Test text preprocessors - convertAlphabeticCharacters", "noteDataList": [ { "marker": "{marker}", @@ -30852,7 +31166,7 @@ ] }, { - "name": "Test text transformations - convertKatakanaToHiragana", + "name": "Test text preprocessors - convertKatakanaToHiragana", "noteDataList": [ { "marker": "{marker}", @@ -31485,7 +31799,7 @@ ] }, { - "name": "Test text transformations - convertHiraganaToKatakana", + "name": "Test text preprocessors - convertHiraganaToKatakana", "noteDataList": [ { "marker": "{marker}", @@ -31638,7 +31952,7 @@ ] }, { - "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana", + "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana", "noteDataList": [ { "marker": "{marker}", @@ -32271,7 +32585,7 @@ ] }, { - "name": "Test text transformations - collapseEmphaticSequences", + "name": "Test text preprocessors - collapseEmphaticSequences", "noteDataList": [ { "marker": "{marker}", @@ -32285,6 +32599,13 @@ { "source": "algorithm", "inflectionRules": [] + }, + { + "source": "algorithm", + "inflectionRules": [ + "adv", + "kansai-ben" + ] } ], "score": 1, @@ -32391,5 +32712,233 @@ "media": {} } ] + }, + { + "name": "Test text preprocessors - capitalizeFirstLetter", + "noteDataList": [ + { + "marker": "{marker}", + "definition": { + "type": "term", + "id": 27, + "source": "English", + "rawSource": "english", + "sourceTerm": "English", + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [] + } + ], + "score": 1, + "isPrimary": true, + "sequence": 19, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "English", + "reading": "English", + "expressions": [ + { + "sourceTerm": "English", + "expression": "English", + "reading": "English", + "termTags": [], + "frequencies": [], + "pitches": [], + "furiganaSegments": [ + { + "text": "English", + "furigana": "" + } + ], + "termFrequency": "normal", + "wordClasses": [ + "n" + ] + } + ], + "glossary": [ + "English definition" + ], + "definitionTags": [ + { + "name": "n", + "category": "partOfSpeech", + "notes": "noun", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [], + "frequencies": [], + "frequencyHarmonic": -1, + "frequencyAverage": -1, + "pitches": [], + "phoneticTranscriptions": [], + "sourceTermExactMatchCount": 1, + "url": "url:", + "cloze": { + "sentence": "", + "prefix": "", + "body": "", + "bodyKana": "", + "suffix": "" + }, + "furiganaSegments": [ + { + "text": "English", + "furigana": "" + } + ] + }, + "glossaryLayoutMode": "default", + "compactTags": false, + "group": false, + "merge": false, + "modeTermKanji": false, + "modeTermKana": false, + "modeKanji": false, + "compactGlossaries": false, + "uniqueExpressions": [ + "English" + ], + "uniqueReadings": [ + "English" + ], + "pitches": [], + "pitchCount": 0, + "phoneticTranscriptions": [], + "context": { + "query": "query", + "fullQuery": "fullQuery", + "document": { + "title": "title" + } + }, + "media": {} + } + ] + }, + { + "name": "Test text preprocessors - decapitalize", + "noteDataList": [ + { + "marker": "{marker}", + "definition": { + "type": "term", + "id": 28, + "source": "language", + "rawSource": "LANGUAGE", + "sourceTerm": "language", + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [] + } + ], + "score": 1, + "isPrimary": true, + "sequence": 20, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "language", + "reading": "language", + "expressions": [ + { + "sourceTerm": "language", + "expression": "language", + "reading": "language", + "termTags": [], + "frequencies": [], + "pitches": [], + "furiganaSegments": [ + { + "text": "language", + "furigana": "" + } + ], + "termFrequency": "normal", + "wordClasses": [ + "n" + ] + } + ], + "glossary": [ + "language definition" + ], + "definitionTags": [ + { + "name": "n", + "category": "partOfSpeech", + "notes": "noun", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [], + "frequencies": [], + "frequencyHarmonic": -1, + "frequencyAverage": -1, + "pitches": [], + "phoneticTranscriptions": [], + "sourceTermExactMatchCount": 1, + "url": "url:", + "cloze": { + "sentence": "", + "prefix": "", + "body": "", + "bodyKana": "", + "suffix": "" + }, + "furiganaSegments": [ + { + "text": "language", + "furigana": "" + } + ] + }, + "glossaryLayoutMode": "default", + "compactTags": false, + "group": false, + "merge": false, + "modeTermKanji": false, + "modeTermKana": false, + "modeKanji": false, + "compactGlossaries": false, + "uniqueExpressions": [ + "language" + ], + "uniqueReadings": [ + "language" + ], + "pitches": [], + "pitchCount": 0, + "phoneticTranscriptions": [], + "context": { + "query": "query", + "fullQuery": "fullQuery", + "document": { + "title": "title" + } + }, + "media": {} + } + ] } ] diff --git a/test/data/translator-test-results.json b/test/data/translator-test-results.json index e11cb4a9..cd3f7ab6 100644 --- a/test/data/translator-test-results.json +++ b/test/data/translator-test-results.json @@ -3969,6 +3969,174 @@ "displayValueParsed": false } ] + }, + { + "type": "term", + "isPrimary": true, + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [] + } + ], + "score": 1, + "frequencyOrder": 0, + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "sourceTermExactMatchCount": 0, + "maxTransformedTextLength": 1, + "headwords": [ + { + "index": 0, + "term": "打", + "reading": "だ", + "sources": [ + { + "originalText": "ダ", + "transformedText": "だ", + "deinflectedText": "だ", + "matchType": "exact", + "matchSource": "reading", + "isPrimary": true + } + ], + "tags": [ + { + "name": "E1", + "category": "default", + "order": 0, + "score": 0, + "content": [ + "example tag 1" + ], + "dictionaries": [ + "Test Dictionary 2" + ], + "redundant": false + } + ], + "wordClasses": [ + "n" + ] + } + ], + "definitions": [ + { + "index": 0, + "headwordIndices": [ + 0 + ], + "dictionary": "Test Dictionary 2", + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "id": 1, + "score": 1, + "frequencyOrder": 0, + "sequences": [ + 1 + ], + "isPrimary": true, + "tags": [ + { + "name": "n", + "category": "partOfSpeech", + "order": 0, + "score": 0, + "content": [ + "noun" + ], + "dictionaries": [ + "Test Dictionary 2" + ], + "redundant": false + } + ], + "entries": [ + "da definition 1", + "da definition 2" + ] + } + ], + "pronunciations": [], + "frequencies": [ + { + "index": 0, + "headwordIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "hasReading": false, + "frequency": 1, + "displayValue": null, + "displayValueParsed": false + }, + { + "index": 1, + "headwordIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "hasReading": false, + "frequency": 0, + "displayValue": "four", + "displayValueParsed": true + }, + { + "index": 2, + "headwordIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "hasReading": false, + "frequency": 5, + "displayValue": "five (5)", + "displayValueParsed": true + }, + { + "index": 3, + "headwordIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "hasReading": true, + "frequency": 8, + "displayValue": null, + "displayValueParsed": false + }, + { + "index": 4, + "headwordIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "hasReading": true, + "frequency": 0, + "displayValue": "fourteen", + "displayValueParsed": true + }, + { + "index": 5, + "headwordIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "hasReading": true, + "frequency": 20, + "displayValue": "twenty (20)", + "displayValueParsed": true + }, + { + "index": 6, + "headwordIndex": 0, + "dictionary": "Test Dictionary 2", + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "hasReading": true, + "frequency": 26, + "displayValue": null, + "displayValueParsed": false + } + ] } ] }, @@ -16978,7 +17146,7 @@ ] }, { - "name": "Test text transformations - convertNumericCharacters", + "name": "Test text preprocessors - convertNumericCharacters", "originalTextLength": 2, "dictionaryEntries": [ { @@ -17043,7 +17211,7 @@ ] }, { - "name": "Test text transformations - convertAlphabeticCharacters", + "name": "Test text preprocessors - convertAlphabeticCharacters", "originalTextLength": 4, "dictionaryEntries": [ { @@ -17389,7 +17557,7 @@ ] }, { - "name": "Test text transformations - convertKatakanaToHiragana", + "name": "Test text preprocessors - convertKatakanaToHiragana", "originalTextLength": 2, "dictionaryEntries": [ { @@ -17735,7 +17903,7 @@ ] }, { - "name": "Test text transformations - convertHiraganaToKatakana", + "name": "Test text preprocessors - convertHiraganaToKatakana", "originalTextLength": 4, "dictionaryEntries": [ { @@ -17844,7 +18012,7 @@ ] }, { - "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana", + "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana", "originalTextLength": 2, "dictionaryEntries": [ { @@ -18190,7 +18358,7 @@ ] }, { - "name": "Test text transformations - collapseEmphaticSequences", + "name": "Test text preprocessors - collapseEmphaticSequences", "originalTextLength": 7, "dictionaryEntries": [ { @@ -18200,6 +18368,13 @@ { "source": "algorithm", "inflectionRules": [] + }, + { + "source": "algorithm", + "inflectionRules": [ + "adv", + "kansai-ben" + ] } ], "score": 1, @@ -18267,5 +18442,167 @@ "frequencies": [] } ] + }, + { + "name": "Test text preprocessors - capitalizeFirstLetter", + "originalTextLength": 7, + "dictionaryEntries": [ + { + "type": "term", + "isPrimary": true, + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [] + } + ], + "score": 1, + "frequencyOrder": 0, + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "sourceTermExactMatchCount": 1, + "maxTransformedTextLength": 7, + "headwords": [ + { + "index": 0, + "term": "English", + "reading": "English", + "sources": [ + { + "originalText": "english", + "transformedText": "English", + "deinflectedText": "English", + "matchType": "exact", + "matchSource": "term", + "isPrimary": true + } + ], + "tags": [], + "wordClasses": [ + "n" + ] + } + ], + "definitions": [ + { + "index": 0, + "headwordIndices": [ + 0 + ], + "dictionary": "Test Dictionary 2", + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "id": 27, + "score": 1, + "frequencyOrder": 0, + "sequences": [ + 19 + ], + "isPrimary": true, + "tags": [ + { + "name": "n", + "category": "partOfSpeech", + "order": 0, + "score": 0, + "content": [ + "noun" + ], + "dictionaries": [ + "Test Dictionary 2" + ], + "redundant": false + } + ], + "entries": [ + "English definition" + ] + } + ], + "pronunciations": [], + "frequencies": [] + } + ] + }, + { + "name": "Test text preprocessors - decapitalize", + "originalTextLength": 8, + "dictionaryEntries": [ + { + "type": "term", + "isPrimary": true, + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [] + } + ], + "score": 1, + "frequencyOrder": 0, + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "sourceTermExactMatchCount": 1, + "maxTransformedTextLength": 8, + "headwords": [ + { + "index": 0, + "term": "language", + "reading": "language", + "sources": [ + { + "originalText": "LANGUAGE", + "transformedText": "language", + "deinflectedText": "language", + "matchType": "exact", + "matchSource": "term", + "isPrimary": true + } + ], + "tags": [], + "wordClasses": [ + "n" + ] + } + ], + "definitions": [ + { + "index": 0, + "headwordIndices": [ + 0 + ], + "dictionary": "Test Dictionary 2", + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "id": 28, + "score": 1, + "frequencyOrder": 0, + "sequences": [ + 20 + ], + "isPrimary": true, + "tags": [ + { + "name": "n", + "category": "partOfSpeech", + "order": 0, + "score": 0, + "content": [ + "noun" + ], + "dictionaries": [ + "Test Dictionary 2" + ], + "redundant": false + } + ], + "entries": [ + "language definition" + ] + } + ], + "pronunciations": [], + "frequencies": [] + } + ] } ] |