diff options
author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-05-22 22:45:39 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-22 20:45:39 +0000 |
commit | d19b898792bffed8ab2d5724472e5b65a5f5b146 (patch) | |
tree | b3e0d5111d748dfcc5d74d9dbf68e79193fa6a7f /test/data/translator-test-results-note-data1.json | |
parent | 125cde3d98c18b08e71e075b4a9776fc7bd4b4a0 (diff) |
[ja] add preprocessor for width of alphabetic characters (#964)
* add japanese text preprocessor for variants in width of alphabetic characters
* try combining with numeric to improve performance
* Update ext/js/language/ja/japanese.js
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
* Update ext/js/language/ja/japanese.js
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
* fix tests
---------
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Diffstat (limited to 'test/data/translator-test-results-note-data1.json')
-rw-r--r-- | test/data/translator-test-results-note-data1.json | 234 |
1 files changed, 231 insertions, 3 deletions
diff --git a/test/data/translator-test-results-note-data1.json b/test/data/translator-test-results-note-data1.json index 0782c8f5..11c9d078 100644 --- a/test/data/translator-test-results-note-data1.json +++ b/test/data/translator-test-results-note-data1.json @@ -30533,7 +30533,235 @@ ] }, { - "name": "Test text preprocessors - convertAlphabeticCharacters", + "name": "Test text preprocessors - alphabeticWidthVariants 1", + "noteDataList": [ + { + "marker": "{marker}", + "definition": { + "type": "term", + "id": 27, + "source": "English", + "rawSource": "English", + "sourceTerm": "English", + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [] + } + ], + "score": 1, + "isPrimary": true, + "sequence": 19, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "English", + "reading": "English", + "expressions": [ + { + "sourceTerm": "English", + "expression": "English", + "reading": "English", + "termTags": [], + "frequencies": [], + "pitches": [], + "furiganaSegments": [ + { + "text": "English", + "furigana": "" + } + ], + "termFrequency": "normal", + "wordClasses": [ + "n" + ] + } + ], + "glossary": [ + "English definition" + ], + "definitionTags": [ + { + "name": "n", + "category": "partOfSpeech", + "notes": "noun", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [], + "frequencies": [], + "frequencyHarmonic": -1, + "frequencyAverage": -1, + "pitches": [], + "phoneticTranscriptions": [], + "sourceTermExactMatchCount": 1, + "url": "url:", + "cloze": { + "sentence": "", + "prefix": "", + "body": "", + "bodyKana": "", + "suffix": "" + }, + "furiganaSegments": [ + { + "text": "English", + "furigana": "" + } + ] + }, + "glossaryLayoutMode": "default", + "compactTags": false, + "group": false, + "merge": false, + "modeTermKanji": false, + "modeTermKana": false, + "modeKanji": false, + "compactGlossaries": false, + "uniqueExpressions": [ + "English" + ], + "uniqueReadings": [ + "English" + ], + "pitches": [], + "pitchCount": 0, + "phoneticTranscriptions": [], + "context": { + "query": "query", + "fullQuery": "fullQuery", + "document": { + "title": "title" + } + }, + "media": {} + } + ] + }, + { + "name": "Test text preprocessors - alphabeticWidthVariants 2", + "noteDataList": [ + { + "marker": "{marker}", + "definition": { + "type": "term", + "id": 29, + "source": "USB", + "rawSource": "USB", + "sourceTerm": "USB", + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [] + } + ], + "score": 1, + "isPrimary": true, + "sequence": 21, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "USB", + "reading": "ユーエスビー", + "expressions": [ + { + "sourceTerm": "USB", + "expression": "USB", + "reading": "ユーエスビー", + "termTags": [], + "frequencies": [], + "pitches": [], + "furiganaSegments": [ + { + "text": "USB", + "furigana": "ユーエスビー" + } + ], + "termFrequency": "normal", + "wordClasses": [ + "n" + ] + } + ], + "glossary": [ + "USB definition" + ], + "definitionTags": [ + { + "name": "n", + "category": "partOfSpeech", + "notes": "noun", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [], + "frequencies": [], + "frequencyHarmonic": -1, + "frequencyAverage": -1, + "pitches": [], + "phoneticTranscriptions": [], + "sourceTermExactMatchCount": 1, + "url": "url:", + "cloze": { + "sentence": "", + "prefix": "", + "body": "", + "bodyKana": "", + "suffix": "" + }, + "furiganaSegments": [ + { + "text": "USB", + "furigana": "ユーエスビー" + } + ] + }, + "glossaryLayoutMode": "default", + "compactTags": false, + "group": false, + "merge": false, + "modeTermKanji": false, + "modeTermKana": false, + "modeKanji": false, + "compactGlossaries": false, + "uniqueExpressions": [ + "USB" + ], + "uniqueReadings": [ + "ユーエスビー" + ], + "pitches": [], + "pitchCount": 0, + "phoneticTranscriptions": [], + "context": { + "query": "query", + "fullQuery": "fullQuery", + "document": { + "title": "title" + } + }, + "media": {} + } + ] + }, + { + "name": "Test text preprocessors - alphabeticToHiragana", "noteDataList": [ { "marker": "{marker}", @@ -32948,7 +33176,7 @@ "marker": "{marker}", "definition": { "type": "term", - "id": 29, + "id": 30, "source": "ㅁㅏㅅㅣㄱㅓㄴㅏ", "rawSource": "마시거나", "sourceTerm": "마시다", @@ -32962,7 +33190,7 @@ ], "score": 1, "isPrimary": true, - "sequence": 21, + "sequence": 22, "dictionary": "Test Dictionary 2", "dictionaryOrder": { "index": 0, |