aboutsummaryrefslogtreecommitdiff
path: root/test/data
diff options
context:
space:
mode:
authorStefanVukovic99 <stefanvukovic44@gmail.com>2024-05-22 22:45:39 +0200
committerGitHub <noreply@github.com>2024-05-22 20:45:39 +0000
commitd19b898792bffed8ab2d5724472e5b65a5f5b146 (patch)
treeb3e0d5111d748dfcc5d74d9dbf68e79193fa6a7f /test/data
parent125cde3d98c18b08e71e075b4a9776fc7bd4b4a0 (diff)
[ja] add preprocessor for width of alphabetic characters (#964)
* add japanese text preprocessor for variants in width of alphabetic characters * try combining with numeric to improve performance * Update ext/js/language/ja/japanese.js Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com> Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com> * Update ext/js/language/ja/japanese.js Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com> Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com> * fix tests --------- Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com> Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Diffstat (limited to 'test/data')
-rw-r--r--test/data/anki-note-builder-test-results.json94
-rw-r--r--test/data/database-test-cases.json6
-rw-r--r--test/data/dictionaries/valid-dictionary1/term_bank_1.json3
-rw-r--r--test/data/translator-test-inputs.json28
-rw-r--r--test/data/translator-test-results-note-data1.json234
-rw-r--r--test/data/translator-test-results.json168
6 files changed, 521 insertions, 12 deletions
diff --git a/test/data/anki-note-builder-test-results.json b/test/data/anki-note-builder-test-results.json
index 1ec8d4cd..b4badf10 100644
--- a/test/data/anki-note-builder-test-results.json
+++ b/test/data/anki-note-builder-test-results.json
@@ -3874,7 +3874,99 @@
]
},
{
- "name": "Test text preprocessors - convertAlphabeticCharacters",
+ "name": "Test text preprocessors - alphabeticWidthVariants 1",
+ "results": [
+ {
+ "audio": "",
+ "clipboard-image": "",
+ "clipboard-text": "",
+ "cloze-body": "English",
+ "cloze-body-kana": "English",
+ "cloze-prefix": "cloze-prefix",
+ "cloze-suffix": "cloze-suffix",
+ "conjugation": "",
+ "dictionary": "Test Dictionary 2",
+ "document-title": "title",
+ "expression": "English",
+ "frequencies": "",
+ "frequency-harmonic-rank": "9999999",
+ "frequency-harmonic-occurrence": "0",
+ "frequency-average-rank": "9999999",
+ "frequency-average-occurrence": "0",
+ "furigana": "English",
+ "furigana-plain": "English",
+ "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>",
+ "glossary-brief": "<div style=\"text-align: left;\">English definition</div>",
+ "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>",
+ "glossary-first": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>",
+ "glossary-first-brief": "<div style=\"text-align: left;\">English definition</div>",
+ "glossary-first-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>",
+ "part-of-speech": "Noun",
+ "pitch-accents": "",
+ "pitch-accent-graphs": "",
+ "pitch-accent-graphs-jj": "",
+ "pitch-accent-positions": "",
+ "pitch-accent-categories": "",
+ "phonetic-transcriptions": "",
+ "reading": "English",
+ "screenshot": "",
+ "search-query": "fullQuery",
+ "selection-text": "",
+ "sentence": "cloze-prefixEnglishcloze-suffix",
+ "sentence-furigana": "cloze-prefixEnglishcloze-suffix",
+ "tags": "n",
+ "url": "<a href=\"url:\">url:</a>"
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticWidthVariants 2",
+ "results": [
+ {
+ "audio": "",
+ "clipboard-image": "",
+ "clipboard-text": "",
+ "cloze-body": "USB",
+ "cloze-body-kana": "USB",
+ "cloze-prefix": "cloze-prefix",
+ "cloze-suffix": "cloze-suffix",
+ "conjugation": "",
+ "dictionary": "Test Dictionary 2",
+ "document-title": "title",
+ "expression": "USB",
+ "frequencies": "",
+ "frequency-harmonic-rank": "9999999",
+ "frequency-harmonic-occurrence": "0",
+ "frequency-average-rank": "9999999",
+ "frequency-average-occurrence": "0",
+ "furigana": "<ruby>USB<rt>ユーエスビー</rt></ruby>",
+ "furigana-plain": "USB[ユーエスビー]",
+ "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> USB definition</div>",
+ "glossary-brief": "<div style=\"text-align: left;\">USB definition</div>",
+ "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> USB definition</div>",
+ "glossary-first": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> USB definition</div>",
+ "glossary-first-brief": "<div style=\"text-align: left;\">USB definition</div>",
+ "glossary-first-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> USB definition</div>",
+ "part-of-speech": "Noun",
+ "pitch-accents": "",
+ "pitch-accent-graphs": "",
+ "pitch-accent-graphs-jj": "",
+ "pitch-accent-positions": "",
+ "pitch-accent-categories": "",
+ "phonetic-transcriptions": "",
+ "reading": "ユーエスビー",
+ "screenshot": "",
+ "search-query": "fullQuery",
+ "selection-text": "",
+ "sentence": "cloze-prefixUSBcloze-suffix",
+ "sentence-furigana": "cloze-prefixUSBcloze-suffix",
+ "tags": "n",
+ "url": "<a href=\"url:\">url:</a>"
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticToHiragana",
"results": [
{
"audio": "",
diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json
index 054ede34..04b98aa9 100644
--- a/test/data/database-test-cases.json
+++ b/test/data/database-test-cases.json
@@ -27,7 +27,7 @@
"ipa": 1
},
"terms": {
- "total": 30
+ "total": 31
}
}
},
@@ -36,7 +36,7 @@
{
"kanji": 2,
"kanjiMeta": 6,
- "terms": 30,
+ "terms": 31,
"termMeta": 39,
"tagMeta": 15,
"media": 6
@@ -45,7 +45,7 @@
"total": {
"kanji": 2,
"kanjiMeta": 6,
- "terms": 30,
+ "terms": 31,
"termMeta": 39,
"tagMeta": 15,
"media": 6
diff --git a/test/data/dictionaries/valid-dictionary1/term_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_bank_1.json
index a362de23..0a635b84 100644
--- a/test/data/dictionaries/valid-dictionary1/term_bank_1.json
+++ b/test/data/dictionaries/valid-dictionary1/term_bank_1.json
@@ -344,5 +344,6 @@
["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""],
["English", "", "n", "n", 1, ["English definition"], 19, ""],
["language", "", "n", "n", 1, ["language definition"], 20, ""],
- ["마시다", "", "v", "v", 1, ["masida definition"], 21, ""]
+ ["USB", "ユーエスビー", "n", "n", 1, ["USB definition"], 21, ""],
+ ["마시다", "", "v", "v", 1, ["masida definition"], 22, ""]
]
diff --git a/test/data/translator-test-inputs.json b/test/data/translator-test-inputs.json
index 9f508a1d..ed028273 100644
--- a/test/data/translator-test-inputs.json
+++ b/test/data/translator-test-inputs.json
@@ -369,7 +369,33 @@
]
},
{
- "name": "Test text preprocessors - convertAlphabeticCharacters",
+ "name": "Test text preprocessors - alphabeticWidthVariants 1",
+ "func": "findTerms",
+ "mode": "split",
+ "text": "English",
+ "options": [
+ "default",
+ {
+ "type": "terms",
+ "removeNonJapaneseCharacters": false
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticWidthVariants 2",
+ "func": "findTerms",
+ "mode": "split",
+ "text": "USB",
+ "options": [
+ "default",
+ {
+ "type": "terms",
+ "removeNonJapaneseCharacters": false
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticToHiragana",
"func": "findTerms",
"mode": "split",
"text": "utsu",
diff --git a/test/data/translator-test-results-note-data1.json b/test/data/translator-test-results-note-data1.json
index 0782c8f5..11c9d078 100644
--- a/test/data/translator-test-results-note-data1.json
+++ b/test/data/translator-test-results-note-data1.json
@@ -30533,7 +30533,235 @@
]
},
{
- "name": "Test text preprocessors - convertAlphabeticCharacters",
+ "name": "Test text preprocessors - alphabeticWidthVariants 1",
+ "noteDataList": [
+ {
+ "marker": "{marker}",
+ "definition": {
+ "type": "term",
+ "id": 27,
+ "source": "English",
+ "rawSource": "English",
+ "sourceTerm": "English",
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "isPrimary": true,
+ "sequence": 19,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "dictionaryNames": [
+ "Test Dictionary 2"
+ ],
+ "expression": "English",
+ "reading": "English",
+ "expressions": [
+ {
+ "sourceTerm": "English",
+ "expression": "English",
+ "reading": "English",
+ "termTags": [],
+ "frequencies": [],
+ "pitches": [],
+ "furiganaSegments": [
+ {
+ "text": "English",
+ "furigana": ""
+ }
+ ],
+ "termFrequency": "normal",
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "glossary": [
+ "English definition"
+ ],
+ "definitionTags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "notes": "noun",
+ "order": 0,
+ "score": 0,
+ "dictionary": "Test Dictionary 2",
+ "redundant": false
+ }
+ ],
+ "termTags": [],
+ "frequencies": [],
+ "frequencyHarmonic": -1,
+ "frequencyAverage": -1,
+ "pitches": [],
+ "phoneticTranscriptions": [],
+ "sourceTermExactMatchCount": 1,
+ "url": "url:",
+ "cloze": {
+ "sentence": "",
+ "prefix": "",
+ "body": "",
+ "bodyKana": "",
+ "suffix": ""
+ },
+ "furiganaSegments": [
+ {
+ "text": "English",
+ "furigana": ""
+ }
+ ]
+ },
+ "glossaryLayoutMode": "default",
+ "compactTags": false,
+ "group": false,
+ "merge": false,
+ "modeTermKanji": false,
+ "modeTermKana": false,
+ "modeKanji": false,
+ "compactGlossaries": false,
+ "uniqueExpressions": [
+ "English"
+ ],
+ "uniqueReadings": [
+ "English"
+ ],
+ "pitches": [],
+ "pitchCount": 0,
+ "phoneticTranscriptions": [],
+ "context": {
+ "query": "query",
+ "fullQuery": "fullQuery",
+ "document": {
+ "title": "title"
+ }
+ },
+ "media": {}
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticWidthVariants 2",
+ "noteDataList": [
+ {
+ "marker": "{marker}",
+ "definition": {
+ "type": "term",
+ "id": 29,
+ "source": "USB",
+ "rawSource": "USB",
+ "sourceTerm": "USB",
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "isPrimary": true,
+ "sequence": 21,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "dictionaryNames": [
+ "Test Dictionary 2"
+ ],
+ "expression": "USB",
+ "reading": "ユーエスビー",
+ "expressions": [
+ {
+ "sourceTerm": "USB",
+ "expression": "USB",
+ "reading": "ユーエスビー",
+ "termTags": [],
+ "frequencies": [],
+ "pitches": [],
+ "furiganaSegments": [
+ {
+ "text": "USB",
+ "furigana": "ユーエスビー"
+ }
+ ],
+ "termFrequency": "normal",
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "glossary": [
+ "USB definition"
+ ],
+ "definitionTags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "notes": "noun",
+ "order": 0,
+ "score": 0,
+ "dictionary": "Test Dictionary 2",
+ "redundant": false
+ }
+ ],
+ "termTags": [],
+ "frequencies": [],
+ "frequencyHarmonic": -1,
+ "frequencyAverage": -1,
+ "pitches": [],
+ "phoneticTranscriptions": [],
+ "sourceTermExactMatchCount": 1,
+ "url": "url:",
+ "cloze": {
+ "sentence": "",
+ "prefix": "",
+ "body": "",
+ "bodyKana": "",
+ "suffix": ""
+ },
+ "furiganaSegments": [
+ {
+ "text": "USB",
+ "furigana": "ユーエスビー"
+ }
+ ]
+ },
+ "glossaryLayoutMode": "default",
+ "compactTags": false,
+ "group": false,
+ "merge": false,
+ "modeTermKanji": false,
+ "modeTermKana": false,
+ "modeKanji": false,
+ "compactGlossaries": false,
+ "uniqueExpressions": [
+ "USB"
+ ],
+ "uniqueReadings": [
+ "ユーエスビー"
+ ],
+ "pitches": [],
+ "pitchCount": 0,
+ "phoneticTranscriptions": [],
+ "context": {
+ "query": "query",
+ "fullQuery": "fullQuery",
+ "document": {
+ "title": "title"
+ }
+ },
+ "media": {}
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticToHiragana",
"noteDataList": [
{
"marker": "{marker}",
@@ -32948,7 +33176,7 @@
"marker": "{marker}",
"definition": {
"type": "term",
- "id": 29,
+ "id": 30,
"source": "ㅁㅏㅅㅣㄱㅓㄴㅏ",
"rawSource": "마시거나",
"sourceTerm": "마시다",
@@ -32962,7 +33190,7 @@
],
"score": 1,
"isPrimary": true,
- "sequence": 21,
+ "sequence": 22,
"dictionary": "Test Dictionary 2",
"dictionaryOrder": {
"index": 0,
diff --git a/test/data/translator-test-results.json b/test/data/translator-test-results.json
index bc26e0f9..ba251347 100644
--- a/test/data/translator-test-results.json
+++ b/test/data/translator-test-results.json
@@ -17211,7 +17211,169 @@
]
},
{
- "name": "Test text preprocessors - convertAlphabeticCharacters",
+ "name": "Test text preprocessors - alphabeticWidthVariants 1",
+ "originalTextLength": 7,
+ "dictionaryEntries": [
+ {
+ "type": "term",
+ "isPrimary": true,
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "frequencyOrder": 0,
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "sourceTermExactMatchCount": 1,
+ "maxOriginalTextLength": 7,
+ "headwords": [
+ {
+ "index": 0,
+ "term": "English",
+ "reading": "English",
+ "sources": [
+ {
+ "originalText": "English",
+ "transformedText": "English",
+ "deinflectedText": "English",
+ "matchType": "exact",
+ "matchSource": "term",
+ "isPrimary": true
+ }
+ ],
+ "tags": [],
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "definitions": [
+ {
+ "index": 0,
+ "headwordIndices": [
+ 0
+ ],
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "id": 27,
+ "score": 1,
+ "frequencyOrder": 0,
+ "sequences": [
+ 19
+ ],
+ "isPrimary": true,
+ "tags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "order": 0,
+ "score": 0,
+ "content": [
+ "noun"
+ ],
+ "dictionaries": [
+ "Test Dictionary 2"
+ ],
+ "redundant": false
+ }
+ ],
+ "entries": [
+ "English definition"
+ ]
+ }
+ ],
+ "pronunciations": [],
+ "frequencies": []
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticWidthVariants 2",
+ "originalTextLength": 3,
+ "dictionaryEntries": [
+ {
+ "type": "term",
+ "isPrimary": true,
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "frequencyOrder": 0,
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "sourceTermExactMatchCount": 1,
+ "maxOriginalTextLength": 3,
+ "headwords": [
+ {
+ "index": 0,
+ "term": "USB",
+ "reading": "ユーエスビー",
+ "sources": [
+ {
+ "originalText": "USB",
+ "transformedText": "USB",
+ "deinflectedText": "USB",
+ "matchType": "exact",
+ "matchSource": "term",
+ "isPrimary": true
+ }
+ ],
+ "tags": [],
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "definitions": [
+ {
+ "index": 0,
+ "headwordIndices": [
+ 0
+ ],
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "id": 29,
+ "score": 1,
+ "frequencyOrder": 0,
+ "sequences": [
+ 21
+ ],
+ "isPrimary": true,
+ "tags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "order": 0,
+ "score": 0,
+ "content": [
+ "noun"
+ ],
+ "dictionaries": [
+ "Test Dictionary 2"
+ ],
+ "redundant": false
+ }
+ ],
+ "entries": [
+ "USB definition"
+ ]
+ }
+ ],
+ "pronunciations": [],
+ "frequencies": []
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticToHiragana",
"originalTextLength": 4,
"dictionaryEntries": [
{
@@ -18656,11 +18818,11 @@
"dictionary": "Test Dictionary 2",
"dictionaryIndex": 0,
"dictionaryPriority": 0,
- "id": 29,
+ "id": 30,
"score": 1,
"frequencyOrder": 0,
"sequences": [
- 21
+ 22
],
"isPrimary": true,
"tags": [