From 1e927dd66e24b72ac3ba129dfb578746ce896ce2 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 19 Feb 2021 18:39:43 -0500 Subject: Fix translation regex replacements (#1423) * Fix regex replacements having issues at the start of scanned text * Fix test cases * Add tests --- test/data/test-translator-data.json | 335 +++++++++++++++++++++++++++++++++++- 1 file changed, 329 insertions(+), 6 deletions(-) (limited to 'test/data/test-translator-data.json') diff --git a/test/data/test-translator-data.json b/test/data/test-translator-data.json index 651b87d1..87184698 100644 --- a/test/data/test-translator-data.json +++ b/test/data/test-translator-data.json @@ -18803,7 +18803,7 @@ "type": "term", "id": 3, "source": "打ち", - "rawSource": "(打)(ち)(込)", + "rawSource": "(打)(ち)", "sourceTerm": "打つ", "reasons": [ "masu stem" @@ -18967,7 +18967,7 @@ "type": "term", "id": 5, "source": "打ち", - "rawSource": "(打)(ち)(込)", + "rawSource": "(打)(ち)", "sourceTerm": "打つ", "reasons": [ "masu stem" @@ -19131,7 +19131,7 @@ "type": "term", "id": 4, "source": "打ち", - "rawSource": "(打)(ち)(込)", + "rawSource": "(打)(ち)", "sourceTerm": "打つ", "reasons": [ "masu stem" @@ -19295,7 +19295,7 @@ "type": "term", "id": 6, "source": "打ち", - "rawSource": "(打)(ち)(込)", + "rawSource": "(打)(ち)", "sourceTerm": "打つ", "reasons": [ "masu stem" @@ -19459,7 +19459,7 @@ "type": "term", "id": 1, "source": "打", - "rawSource": "(打)(ち)", + "rawSource": "(打)", "sourceTerm": "打", "reasons": [], "score": 1, @@ -19613,7 +19613,7 @@ "type": "term", "id": 2, "source": "打", - "rawSource": "(打)(ち)", + "rawSource": "(打)", "sourceTerm": "打", "reasons": [], "score": 1, @@ -19765,6 +19765,329 @@ } ] } + }, + { + "comment": "Test non-empty replacement", + "func": "findTerms", + "mode": "split", + "text": "test", + "options": [ + "default", + { + "alphanumeric": true, + "textReplacements": [ + null, + [ + { + "pattern": "test", + "flags": "g", + "replacement": "よみ" + } + ] + ] + } + ], + "expected": { + "length": 4, + "definitions": [ + { + "type": "term", + "id": 12, + "source": "よみ", + "rawSource": "test", + "sourceTerm": "よむ", + "reasons": [ + "masu stem" + ], + "score": 100, + "sequence": 6, + "dictionary": "Test Dictionary 2", + "dictionaryPriority": 0, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "読む", + "reading": "よむ", + "expressions": [ + { + "sourceTerm": "よむ", + "expression": "読む", + "reading": "よむ", + "furiganaSegments": [ + { + "text": "読", + "furigana": "よ" + }, + { + "text": "む", + "furigana": "" + } + ], + "termTags": [ + { + "name": "vt", + "category": "partOfSpeech", + "notes": "transitive verb", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termFrequency": "normal", + "frequencies": [], + "pitches": [] + } + ], + "furiganaSegments": [ + { + "text": "読", + "furigana": "よ" + }, + { + "text": "む", + "furigana": "" + } + ], + "glossary": [ + "to read" + ], + "definitionTags": [ + { + "name": "popular", + "category": "popular", + "notes": "popular term", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [ + { + "name": "vt", + "category": "partOfSpeech", + "notes": "transitive verb", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "frequencies": [], + "pitches": [], + "sourceTermExactMatchCount": 0 + } + ] + } + }, + { + "comment": "Test non-empty replacement at end", + "func": "findTerms", + "mode": "split", + "text": "つtest", + "options": [ + "default", + { + "alphanumeric": true, + "textReplacements": [ + null, + [ + { + "pattern": "test", + "flags": "g", + "replacement": "よみ" + } + ] + ] + } + ], + "expected": { + "length": 5, + "definitions": [ + { + "type": "term", + "id": 13, + "source": "つよみ", + "rawSource": "つtest", + "sourceTerm": "つよみ", + "reasons": [], + "score": 90, + "sequence": 7, + "dictionary": "Test Dictionary 2", + "dictionaryPriority": 0, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "強み", + "reading": "つよみ", + "expressions": [ + { + "sourceTerm": "つよみ", + "expression": "強み", + "reading": "つよみ", + "furiganaSegments": [ + { + "text": "強", + "furigana": "つよ" + }, + { + "text": "み", + "furigana": "" + } + ], + "termTags": [], + "termFrequency": "normal", + "frequencies": [], + "pitches": [] + } + ], + "furiganaSegments": [ + { + "text": "強", + "furigana": "つよ" + }, + { + "text": "み", + "furigana": "" + } + ], + "glossary": [ + "strong point" + ], + "definitionTags": [ + { + "name": "popular", + "category": "popular", + "notes": "popular term", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [], + "frequencies": [], + "pitches": [], + "sourceTermExactMatchCount": 0 + } + ] + } + }, + { + "comment": "Test non-empty replacement at start", + "func": "findTerms", + "mode": "split", + "text": "testました", + "options": [ + "default", + { + "alphanumeric": true, + "textReplacements": [ + null, + [ + { + "pattern": "test", + "flags": "g", + "replacement": "よみ" + } + ] + ] + } + ], + "expected": { + "length": 7, + "definitions": [ + { + "type": "term", + "id": 12, + "source": "よみました", + "rawSource": "testました", + "sourceTerm": "よむ", + "reasons": [ + "polite past" + ], + "score": 100, + "sequence": 6, + "dictionary": "Test Dictionary 2", + "dictionaryPriority": 0, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "読む", + "reading": "よむ", + "expressions": [ + { + "sourceTerm": "よむ", + "expression": "読む", + "reading": "よむ", + "furiganaSegments": [ + { + "text": "読", + "furigana": "よ" + }, + { + "text": "む", + "furigana": "" + } + ], + "termTags": [ + { + "name": "vt", + "category": "partOfSpeech", + "notes": "transitive verb", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termFrequency": "normal", + "frequencies": [], + "pitches": [] + } + ], + "furiganaSegments": [ + { + "text": "読", + "furigana": "よ" + }, + { + "text": "む", + "furigana": "" + } + ], + "glossary": [ + "to read" + ], + "definitionTags": [ + { + "name": "popular", + "category": "popular", + "notes": "popular term", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [ + { + "name": "vt", + "category": "partOfSpeech", + "notes": "transitive verb", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "frequencies": [], + "pitches": [], + "sourceTermExactMatchCount": 0 + } + ] + } } ] } \ No newline at end of file -- cgit v1.2.3