From 948d2311a0fdf2bb5641861824e6c629d1f2a93d Mon Sep 17 00:00:00 2001 From: lonkaars Date: Fri, 14 Jul 2023 15:44:19 +0200 Subject: fix api/japanese kana/kanji matching --- api/japanese.ts | 8 +++++--- api/word.ts | 3 ++- test/single/api-japanese.test.ts | 3 ++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/api/japanese.ts b/api/japanese.ts index add897a..4f176a9 100644 --- a/api/japanese.ts +++ b/api/japanese.ts @@ -107,7 +107,7 @@ export default class Japanese { possibilities.push(path); return; } - // skip until next 'anchor' token + // skip until next 'anchor' token (token with no-kanji characters only) if (tokens[tokenIndex].normalized.length == 0) return match(tokenIndex + 1, searchStart, path); // try all positions where current (anchor) token fits in this.reading @@ -120,14 +120,16 @@ export default class Japanese { // create index slices from possibilities var slices = possibilities .map(match => { // convert start index of anchor to start and stop index (based on anchor length) - var out = [0]; + var out = []; let matchIndex = 0; + if (tokens[0].ruby) out.push(0); for (let tokenIndex = 0; tokenIndex < tokens.length; tokenIndex++) { if (tokens[tokenIndex].normalized.length == 0) continue; out.push(match[matchIndex], match[matchIndex] + tokens[tokenIndex].writing.length); matchIndex++; } - if (out.peek() != this.reading.length) out.push(this.reading.length); + if (tokens.peek().ruby) out.push(this.reading.length); + // if (out.peek() != this.reading.length) out.push(this.reading.length); return out; }) .filter(slice => slice.length == tokens.length + 1) diff --git a/api/word.ts b/api/word.ts index b519789..e92bc19 100644 --- a/api/word.ts +++ b/api/word.ts @@ -104,7 +104,8 @@ export default class Word extends APIBase { // generate conjugated version of verb with kanji this.text = new Japanese(input.source, reading); } else { - this.text = this.base; + // add dictionary reading to this.source as writing (could contain kanji) + this.text = new Japanese(input.source, this.base.reading); } this.id = input.id; } diff --git a/test/single/api-japanese.test.ts b/test/single/api-japanese.test.ts index 4e89c64..e604f61 100644 --- a/test/single/api-japanese.test.ts +++ b/test/single/api-japanese.test.ts @@ -17,10 +17,11 @@ const cases = [ { input: ["気を引き締める", "きをひきしめる"], output: "[気](き)を[引](ひ)き[締](し)める" }, // https://japanese.stackexchange.com/questions/69521/reading-per-kanji-irregular-readings { input: ["大口魚", "たら"], output: "[大口魚](たら)" }, + { input: ["この辺に", "このへんに"], output: "この[辺](へん)に" }, ] satisfies Test[]; cases.forEach(({ input, output }) => { - Deno.test(`Japanese API class - ${input}`, async () => { + Deno.test(`Japanese API class - ${input[0]} (${input[1]}) -> ${output}`, async () => { var jp = new Japanese(...input); assertEquals(jp.furigana("refold-tools"), output); }); -- cgit v1.2.3