diff options
author | lonkaars <loek@pipeframe.xyz> | 2023-07-14 15:44:19 +0200 |
---|---|---|
committer | lonkaars <loek@pipeframe.xyz> | 2023-07-14 15:44:19 +0200 |
commit | 948d2311a0fdf2bb5641861824e6c629d1f2a93d (patch) | |
tree | 9f715895f135cfca8e1173842a092c8725f5c75b | |
parent | a15c3fefe33f96c8f85147c61ee266abc43b4f65 (diff) |
fix api/japanese kana/kanji matching
-rw-r--r-- | api/japanese.ts | 8 | ||||
-rw-r--r-- | api/word.ts | 3 | ||||
-rw-r--r-- | test/single/api-japanese.test.ts | 3 |
3 files changed, 9 insertions, 5 deletions
diff --git a/api/japanese.ts b/api/japanese.ts index add897a..4f176a9 100644 --- a/api/japanese.ts +++ b/api/japanese.ts @@ -107,7 +107,7 @@ export default class Japanese { possibilities.push(path); return; } - // skip until next 'anchor' token + // skip until next 'anchor' token (token with no-kanji characters only) if (tokens[tokenIndex].normalized.length == 0) return match(tokenIndex + 1, searchStart, path); // try all positions where current (anchor) token fits in this.reading @@ -120,14 +120,16 @@ export default class Japanese { // create index slices from possibilities var slices = possibilities .map(match => { // convert start index of anchor to start and stop index (based on anchor length) - var out = [0]; + var out = []; let matchIndex = 0; + if (tokens[0].ruby) out.push(0); for (let tokenIndex = 0; tokenIndex < tokens.length; tokenIndex++) { if (tokens[tokenIndex].normalized.length == 0) continue; out.push(match[matchIndex], match[matchIndex] + tokens[tokenIndex].writing.length); matchIndex++; } - if (out.peek() != this.reading.length) out.push(this.reading.length); + if (tokens.peek().ruby) out.push(this.reading.length); + // if (out.peek() != this.reading.length) out.push(this.reading.length); return out; }) .filter(slice => slice.length == tokens.length + 1) diff --git a/api/word.ts b/api/word.ts index b519789..e92bc19 100644 --- a/api/word.ts +++ b/api/word.ts @@ -104,7 +104,8 @@ export default class Word extends APIBase { // generate conjugated version of verb with kanji this.text = new Japanese(input.source, reading); } else { - this.text = this.base; + // add dictionary reading to this.source as writing (could contain kanji) + this.text = new Japanese(input.source, this.base.reading); } this.id = input.id; } diff --git a/test/single/api-japanese.test.ts b/test/single/api-japanese.test.ts index 4e89c64..e604f61 100644 --- a/test/single/api-japanese.test.ts +++ b/test/single/api-japanese.test.ts @@ -17,10 +17,11 @@ const cases = [ { input: ["気を引き締める", "きをひきしめる"], output: "[気](き)を[引](ひ)き[締](し)める" }, // https://japanese.stackexchange.com/questions/69521/reading-per-kanji-irregular-readings { input: ["大口魚", "たら"], output: "[大口魚](たら)" }, + { input: ["この辺に", "このへんに"], output: "この[辺](へん)に" }, ] satisfies Test[]; cases.forEach(({ input, output }) => { - Deno.test(`Japanese API class - ${input}`, async () => { + Deno.test(`Japanese API class - ${input[0]} (${input[1]}) -> ${output}`, async () => { var jp = new Japanese(...input); assertEquals(jp.furigana("refold-tools"), output); }); |