diff options
author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-05-14 13:32:57 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-14 11:32:57 +0000 |
commit | 82049f3439acae346767351827e0963a2586b702 (patch) | |
tree | 16db804b8dbcd5e6cdd3bae6e97dd78b731d9391 /test | |
parent | a336d176777e865228698a4daaf4b70e05309ca6 (diff) |
add korean (#787)
* prepare preprocessors
* add postprocessors
* add hangul functions (#13)
* move hangul to lib, add test (#14)
* add hangul functions
* hangul lib
* fix tests
* test inputs
* fix hangul disassemble, fix postprocessing
* rename test
* reset transformedText for multiple postprocessors
* add credits
* fix conflict
---------
Co-authored-by: Stefan Vukovic <stefanvukovic44@gmail.com>
* Vetting done (#16)
* add hangul functions
* hangul lib
* fix tests
* test inputs
* fix hangul disassemble, fix postprocessing
* rename test
* reset transformedText for multiple postprocessors
* add credits
* fix conflict
* add transforms
* move substring
* textprocessing function
* mostly fixed
* delete comment
* mostly fixed
(cherry picked from commit 268c8968ec64bf9d1fbd7d448a96d590399265c4)
* prepare preprocessors
(cherry picked from commit d139258398de9285396db31c6d46795ee95eb01a)
* (reset) remove textSourceMap
* rawSource before textreplacements
* update tests
* fix transforms
* fix transforms
* 100% vetted
* add link to dict
---------
Co-authored-by: Stefan Vukovic <stefanvukovic44@gmail.com>
* update unit tests
* update test
---------
Co-authored-by: Lyroxide <33834537+Lyroxide@users.noreply.github.com>
Diffstat (limited to 'test')
-rw-r--r-- | test/data/anki-note-builder-test-results.json | 46 | ||||
-rw-r--r-- | test/data/database-test-cases.json | 6 | ||||
-rw-r--r-- | test/data/dictionaries/valid-dictionary1/term_bank_1.json | 3 | ||||
-rw-r--r-- | test/data/translator-test-inputs.json | 14 | ||||
-rw-r--r-- | test/data/translator-test-results-note-data1.json | 116 | ||||
-rw-r--r-- | test/data/translator-test-results.json | 81 |
6 files changed, 262 insertions, 4 deletions
diff --git a/test/data/anki-note-builder-test-results.json b/test/data/anki-note-builder-test-results.json index 1d597b32..0e51b4aa 100644 --- a/test/data/anki-note-builder-test-results.json +++ b/test/data/anki-note-builder-test-results.json @@ -4315,5 +4315,51 @@ "url": "<a href=\"url:\">url:</a>" } ] + }, + { + "name": "Test text postprocessing", + "results": [ + { + "audio": "", + "clipboard-image": "", + "clipboard-text": "", + "cloze-body": "마시거나", + "cloze-body-kana": "마시거나", + "cloze-prefix": "cloze-prefix", + "cloze-suffix": "cloze-suffix", + "conjugation": "-거나", + "dictionary": "Test Dictionary 2", + "document-title": "title", + "expression": "마시다", + "frequencies": "", + "frequency-harmonic-rank": "9999999", + "frequency-harmonic-occurrence": "0", + "frequency-average-rank": "9999999", + "frequency-average-occurrence": "0", + "furigana": "마시다", + "furigana-plain": "마시다", + "glossary": "<div style=\"text-align: left;\"><i>(v, Test Dictionary 2)</i> masida definition</div>", + "glossary-brief": "<div style=\"text-align: left;\">masida definition</div>", + "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(v)</i> masida definition</div>", + "glossary-first": "<div style=\"text-align: left;\"><i>(v, Test Dictionary 2)</i> masida definition</div>", + "glossary-first-brief": "<div style=\"text-align: left;\">masida definition</div>", + "glossary-first-no-dictionary": "<div style=\"text-align: left;\"><i>(v)</i> masida definition</div>", + "part-of-speech": "v", + "pitch-accents": "No pitch accent data", + "pitch-accent-graphs": "No pitch accent data", + "pitch-accent-graphs-jj": "No pitch accent data", + "pitch-accent-positions": "No pitch accent data", + "pitch-accent-categories": "", + "phonetic-transcriptions": "", + "reading": "마시다", + "screenshot": "", + "search-query": "fullQuery", + "selection-text": "", + "sentence": "cloze-prefix마시거나cloze-suffix", + "sentence-furigana": "cloze-prefix마시거나cloze-suffix", + "tags": "v", + "url": "<a href=\"url:\">url:</a>" + } + ] } ] diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json index 5747f59e..054ede34 100644 --- a/test/data/database-test-cases.json +++ b/test/data/database-test-cases.json @@ -27,7 +27,7 @@ "ipa": 1 }, "terms": { - "total": 29 + "total": 30 } } }, @@ -36,7 +36,7 @@ { "kanji": 2, "kanjiMeta": 6, - "terms": 29, + "terms": 30, "termMeta": 39, "tagMeta": 15, "media": 6 @@ -45,7 +45,7 @@ "total": { "kanji": 2, "kanjiMeta": 6, - "terms": 29, + "terms": 30, "termMeta": 39, "tagMeta": 15, "media": 6 diff --git a/test/data/dictionaries/valid-dictionary1/term_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_bank_1.json index e7fb015c..a362de23 100644 --- a/test/data/dictionaries/valid-dictionary1/term_bank_1.json +++ b/test/data/dictionaries/valid-dictionary1/term_bank_1.json @@ -343,5 +343,6 @@ ["39", "さんきゅう", "", "", 1, ["sankyuu definition"], 17, ""], ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""], ["English", "", "n", "n", 1, ["English definition"], 19, ""], - ["language", "", "n", "n", 1, ["language definition"], 20, ""] + ["language", "", "n", "n", 1, ["language definition"], 20, ""], + ["마시다", "", "v", "v", 1, ["masida definition"], 21, ""] ] diff --git a/test/data/translator-test-inputs.json b/test/data/translator-test-inputs.json index 9e62954e..b7a84d7e 100644 --- a/test/data/translator-test-inputs.json +++ b/test/data/translator-test-inputs.json @@ -456,6 +456,20 @@ "removeNonJapaneseCharacters": false } ] + }, + { + "name": "Test text postprocessing", + "func": "findTerms", + "mode": "split", + "text": "마시거나", + "options": [ + "default", + { + "type": "terms", + "language": "ko", + "removeNonJapaneseCharacters": false + } + ] } ] } diff --git a/test/data/translator-test-results-note-data1.json b/test/data/translator-test-results-note-data1.json index f0f32fa8..eeb348af 100644 --- a/test/data/translator-test-results-note-data1.json +++ b/test/data/translator-test-results-note-data1.json @@ -32940,5 +32940,121 @@ "media": {} } ] + }, + { + "name": "Test text postprocessing", + "noteDataList": [ + { + "marker": "{marker}", + "definition": { + "type": "term", + "id": 29, + "source": "ㅁㅏㅅㅣㄱㅓㄴㅏ", + "rawSource": "마시거나", + "sourceTerm": "마시다", + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [ + "-거나" + ] + } + ], + "score": 1, + "isPrimary": true, + "sequence": 21, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "마시다", + "reading": "마시다", + "expressions": [ + { + "sourceTerm": "마시다", + "expression": "마시다", + "reading": "마시다", + "termTags": [], + "frequencies": [], + "pitches": [], + "furiganaSegments": [ + { + "text": "마시다", + "furigana": "" + } + ], + "termFrequency": "normal", + "wordClasses": [ + "v" + ] + } + ], + "glossary": [ + "masida definition" + ], + "definitionTags": [ + { + "name": "v", + "category": "default", + "notes": "", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [], + "frequencies": [], + "frequencyHarmonic": -1, + "frequencyAverage": -1, + "pitches": [], + "phoneticTranscriptions": [], + "sourceTermExactMatchCount": 1, + "url": "url:", + "cloze": { + "sentence": "", + "prefix": "", + "body": "", + "bodyKana": "", + "suffix": "" + }, + "furiganaSegments": [ + { + "text": "마시다", + "furigana": "" + } + ] + }, + "glossaryLayoutMode": "default", + "compactTags": false, + "group": false, + "merge": false, + "modeTermKanji": false, + "modeTermKana": false, + "modeKanji": false, + "compactGlossaries": false, + "uniqueExpressions": [ + "마시다" + ], + "uniqueReadings": [ + "마시다" + ], + "pitches": [], + "pitchCount": 0, + "phoneticTranscriptions": [], + "context": { + "query": "query", + "fullQuery": "fullQuery", + "document": { + "title": "title" + } + }, + "media": {} + } + ] } ] diff --git a/test/data/translator-test-results.json b/test/data/translator-test-results.json index b3574b46..af5ceff7 100644 --- a/test/data/translator-test-results.json +++ b/test/data/translator-test-results.json @@ -18604,5 +18604,86 @@ "frequencies": [] } ] + }, + { + "name": "Test text postprocessing", + "originalTextLength": 4, + "dictionaryEntries": [ + { + "type": "term", + "isPrimary": true, + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [ + "-거나" + ] + } + ], + "score": 1, + "frequencyOrder": 0, + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "sourceTermExactMatchCount": 1, + "maxOriginalTextLength": 4, + "headwords": [ + { + "index": 0, + "term": "마시다", + "reading": "마시다", + "sources": [ + { + "originalText": "마시거나", + "transformedText": "ㅁㅏㅅㅣㄱㅓㄴㅏ", + "deinflectedText": "마시다", + "matchType": "exact", + "matchSource": "term", + "isPrimary": true + } + ], + "tags": [], + "wordClasses": [ + "v" + ] + } + ], + "definitions": [ + { + "index": 0, + "headwordIndices": [ + 0 + ], + "dictionary": "Test Dictionary 2", + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "id": 29, + "score": 1, + "frequencyOrder": 0, + "sequences": [ + 21 + ], + "isPrimary": true, + "tags": [ + { + "name": "v", + "category": "default", + "order": 0, + "score": 0, + "content": [], + "dictionaries": [ + "Test Dictionary 2" + ], + "redundant": false + } + ], + "entries": [ + "masida definition" + ] + } + ], + "pronunciations": [], + "frequencies": [] + } + ] } ] |