From 1e927dd66e24b72ac3ba129dfb578746ce896ce2 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 19 Feb 2021 18:39:43 -0500 Subject: Fix translation regex replacements (#1423) * Fix regex replacements having issues at the start of scanned text * Fix test cases * Add tests --- ext/js/language/translator.js | 2 +- .../dictionaries/valid-dictionary2/tag_bank_1.json | 4 +- .../valid-dictionary2/term_bank_1.json | 4 +- test/data/test-translator-data.json | 335 ++++++++++++++++++++- 4 files changed, 336 insertions(+), 9 deletions(-) diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index 729c8294..fc71bf7f 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -1361,8 +1361,8 @@ class Translator { pattern.lastIndex += delta; if (actualReplacementLength > 0) { - sourceMap.combine(Math.max(0, index - 1), matchText.length); sourceMap.insert(index, ...(new Array(actualReplacementLength).fill(0))); + sourceMap.combine(index - 1 + actualReplacementLength, matchText.length); } else { sourceMap.combine(index, matchText.length); } diff --git a/test/data/dictionaries/valid-dictionary2/tag_bank_1.json b/test/data/dictionaries/valid-dictionary2/tag_bank_1.json index 109ad395..d66db94b 100644 --- a/test/data/dictionaries/valid-dictionary2/tag_bank_1.json +++ b/test/data/dictionaries/valid-dictionary2/tag_bank_1.json @@ -3,5 +3,7 @@ ["tag2", "category2", 0, "tag2 notes", 0], ["tag3", "category3", 0, "tag3 notes", 0], ["tag4", "category4", 0, "tag4 notes", 0], - ["tag5", "category5", 0, "tag5 notes", 0] + ["tag5", "category5", 0, "tag5 notes", 0], + ["popular", "popular", 0, "popular term", 0], + ["vt", "partOfSpeech", 0, "transitive verb", 0] ] \ No newline at end of file diff --git a/test/data/dictionaries/valid-dictionary2/term_bank_1.json b/test/data/dictionaries/valid-dictionary2/term_bank_1.json index 32dc1ede..fcfe7365 100644 --- a/test/data/dictionaries/valid-dictionary2/term_bank_1.json +++ b/test/data/dictionaries/valid-dictionary2/term_bank_1.json @@ -9,5 +9,7 @@ ["打ち込む", "うちこむ", "tag1 tag2", "v5", 1, ["definition15", "definition16"], 4, "tag5 tag6 tag7"], ["打ち込む", "ぶちこむ", "tag1 tag2", "v5", 10, ["definition17", "definition18"], 4, "tag3 tag4 tag5"], ["打ち込む", "ぶちこむ", "tag1 tag2", "v5", 1, ["definition19", "definition20"], 4, "tag3 tag4 tag5"], - ["画像", "がぞう", "tag1 tag2", "", 1, ["definition21", {"type": "image", "path": "image.gif", "width": 350, "height": 350, "description": "An image", "pixelated": true}], 5, "tag3 tag4 tag5"] + ["画像", "がぞう", "tag1 tag2", "", 1, ["definition21", {"type": "image", "path": "image.gif", "width": 350, "height": 350, "description": "An image", "pixelated": true}], 5, "tag3 tag4 tag5"], + ["読む", "よむ", "popular", "v5", 100, ["to read"], 6, "vt"], + ["強み", "つよみ", "popular", "n", 90, ["strong point"], 7, ""] ] \ No newline at end of file diff --git a/test/data/test-translator-data.json b/test/data/test-translator-data.json index 651b87d1..87184698 100644 --- a/test/data/test-translator-data.json +++ b/test/data/test-translator-data.json @@ -18803,7 +18803,7 @@ "type": "term", "id": 3, "source": "打ち", - "rawSource": "(打)(ち)(込)", + "rawSource": "(打)(ち)", "sourceTerm": "打つ", "reasons": [ "masu stem" @@ -18967,7 +18967,7 @@ "type": "term", "id": 5, "source": "打ち", - "rawSource": "(打)(ち)(込)", + "rawSource": "(打)(ち)", "sourceTerm": "打つ", "reasons": [ "masu stem" @@ -19131,7 +19131,7 @@ "type": "term", "id": 4, "source": "打ち", - "rawSource": "(打)(ち)(込)", + "rawSource": "(打)(ち)", "sourceTerm": "打つ", "reasons": [ "masu stem" @@ -19295,7 +19295,7 @@ "type": "term", "id": 6, "source": "打ち", - "rawSource": "(打)(ち)(込)", + "rawSource": "(打)(ち)", "sourceTerm": "打つ", "reasons": [ "masu stem" @@ -19459,7 +19459,7 @@ "type": "term", "id": 1, "source": "打", - "rawSource": "(打)(ち)", + "rawSource": "(打)", "sourceTerm": "打", "reasons": [], "score": 1, @@ -19613,7 +19613,7 @@ "type": "term", "id": 2, "source": "打", - "rawSource": "(打)(ち)", + "rawSource": "(打)", "sourceTerm": "打", "reasons": [], "score": 1, @@ -19765,6 +19765,329 @@ } ] } + }, + { + "comment": "Test non-empty replacement", + "func": "findTerms", + "mode": "split", + "text": "test", + "options": [ + "default", + { + "alphanumeric": true, + "textReplacements": [ + null, + [ + { + "pattern": "test", + "flags": "g", + "replacement": "よみ" + } + ] + ] + } + ], + "expected": { + "length": 4, + "definitions": [ + { + "type": "term", + "id": 12, + "source": "よみ", + "rawSource": "test", + "sourceTerm": "よむ", + "reasons": [ + "masu stem" + ], + "score": 100, + "sequence": 6, + "dictionary": "Test Dictionary 2", + "dictionaryPriority": 0, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "読む", + "reading": "よむ", + "expressions": [ + { + "sourceTerm": "よむ", + "expression": "読む", + "reading": "よむ", + "furiganaSegments": [ + { + "text": "読", + "furigana": "よ" + }, + { + "text": "む", + "furigana": "" + } + ], + "termTags": [ + { + "name": "vt", + "category": "partOfSpeech", + "notes": "transitive verb", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termFrequency": "normal", + "frequencies": [], + "pitches": [] + } + ], + "furiganaSegments": [ + { + "text": "読", + "furigana": "よ" + }, + { + "text": "む", + "furigana": "" + } + ], + "glossary": [ + "to read" + ], + "definitionTags": [ + { + "name": "popular", + "category": "popular", + "notes": "popular term", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [ + { + "name": "vt", + "category": "partOfSpeech", + "notes": "transitive verb", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "frequencies": [], + "pitches": [], + "sourceTermExactMatchCount": 0 + } + ] + } + }, + { + "comment": "Test non-empty replacement at end", + "func": "findTerms", + "mode": "split", + "text": "つtest", + "options": [ + "default", + { + "alphanumeric": true, + "textReplacements": [ + null, + [ + { + "pattern": "test", + "flags": "g", + "replacement": "よみ" + } + ] + ] + } + ], + "expected": { + "length": 5, + "definitions": [ + { + "type": "term", + "id": 13, + "source": "つよみ", + "rawSource": "つtest", + "sourceTerm": "つよみ", + "reasons": [], + "score": 90, + "sequence": 7, + "dictionary": "Test Dictionary 2", + "dictionaryPriority": 0, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "強み", + "reading": "つよみ", + "expressions": [ + { + "sourceTerm": "つよみ", + "expression": "強み", + "reading": "つよみ", + "furiganaSegments": [ + { + "text": "強", + "furigana": "つよ" + }, + { + "text": "み", + "furigana": "" + } + ], + "termTags": [], + "termFrequency": "normal", + "frequencies": [], + "pitches": [] + } + ], + "furiganaSegments": [ + { + "text": "強", + "furigana": "つよ" + }, + { + "text": "み", + "furigana": "" + } + ], + "glossary": [ + "strong point" + ], + "definitionTags": [ + { + "name": "popular", + "category": "popular", + "notes": "popular term", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [], + "frequencies": [], + "pitches": [], + "sourceTermExactMatchCount": 0 + } + ] + } + }, + { + "comment": "Test non-empty replacement at start", + "func": "findTerms", + "mode": "split", + "text": "testました", + "options": [ + "default", + { + "alphanumeric": true, + "textReplacements": [ + null, + [ + { + "pattern": "test", + "flags": "g", + "replacement": "よみ" + } + ] + ] + } + ], + "expected": { + "length": 7, + "definitions": [ + { + "type": "term", + "id": 12, + "source": "よみました", + "rawSource": "testました", + "sourceTerm": "よむ", + "reasons": [ + "polite past" + ], + "score": 100, + "sequence": 6, + "dictionary": "Test Dictionary 2", + "dictionaryPriority": 0, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "読む", + "reading": "よむ", + "expressions": [ + { + "sourceTerm": "よむ", + "expression": "読む", + "reading": "よむ", + "furiganaSegments": [ + { + "text": "読", + "furigana": "よ" + }, + { + "text": "む", + "furigana": "" + } + ], + "termTags": [ + { + "name": "vt", + "category": "partOfSpeech", + "notes": "transitive verb", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termFrequency": "normal", + "frequencies": [], + "pitches": [] + } + ], + "furiganaSegments": [ + { + "text": "読", + "furigana": "よ" + }, + { + "text": "む", + "furigana": "" + } + ], + "glossary": [ + "to read" + ], + "definitionTags": [ + { + "name": "popular", + "category": "popular", + "notes": "popular term", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [ + { + "name": "vt", + "category": "partOfSpeech", + "notes": "transitive verb", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "frequencies": [], + "pitches": [], + "sourceTermExactMatchCount": 0 + } + ] + } } ] } \ No newline at end of file -- cgit v1.2.3