aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-07-14 15:44:19 +0200
committerlonkaars <loek@pipeframe.xyz>2023-07-14 15:44:19 +0200
commit948d2311a0fdf2bb5641861824e6c629d1f2a93d (patch)
tree9f715895f135cfca8e1173842a092c8725f5c75b
parenta15c3fefe33f96c8f85147c61ee266abc43b4f65 (diff)
fix api/japanese kana/kanji matching
-rw-r--r--api/japanese.ts8
-rw-r--r--api/word.ts3
-rw-r--r--test/single/api-japanese.test.ts3
3 files changed, 9 insertions, 5 deletions
diff --git a/api/japanese.ts b/api/japanese.ts
index add897a..4f176a9 100644
--- a/api/japanese.ts
+++ b/api/japanese.ts
@@ -107,7 +107,7 @@ export default class Japanese {
possibilities.push(path);
return;
}
- // skip until next 'anchor' token
+ // skip until next 'anchor' token (token with no-kanji characters only)
if (tokens[tokenIndex].normalized.length == 0) return match(tokenIndex + 1, searchStart, path);
// try all positions where current (anchor) token fits in this.reading
@@ -120,14 +120,16 @@ export default class Japanese {
// create index slices from possibilities
var slices = possibilities
.map(match => { // convert start index of anchor to start and stop index (based on anchor length)
- var out = [0];
+ var out = [];
let matchIndex = 0;
+ if (tokens[0].ruby) out.push(0);
for (let tokenIndex = 0; tokenIndex < tokens.length; tokenIndex++) {
if (tokens[tokenIndex].normalized.length == 0) continue;
out.push(match[matchIndex], match[matchIndex] + tokens[tokenIndex].writing.length);
matchIndex++;
}
- if (out.peek() != this.reading.length) out.push(this.reading.length);
+ if (tokens.peek().ruby) out.push(this.reading.length);
+ // if (out.peek() != this.reading.length) out.push(this.reading.length);
return out;
})
.filter(slice => slice.length == tokens.length + 1)
diff --git a/api/word.ts b/api/word.ts
index b519789..e92bc19 100644
--- a/api/word.ts
+++ b/api/word.ts
@@ -104,7 +104,8 @@ export default class Word extends APIBase {
// generate conjugated version of verb with kanji
this.text = new Japanese(input.source, reading);
} else {
- this.text = this.base;
+ // add dictionary reading to this.source as writing (could contain kanji)
+ this.text = new Japanese(input.source, this.base.reading);
}
this.id = input.id;
}
diff --git a/test/single/api-japanese.test.ts b/test/single/api-japanese.test.ts
index 4e89c64..e604f61 100644
--- a/test/single/api-japanese.test.ts
+++ b/test/single/api-japanese.test.ts
@@ -17,10 +17,11 @@ const cases = [
{ input: ["気を引き締める", "きをひきしめる"], output: "[気](き)を[引](ひ)き[締](し)める" },
// https://japanese.stackexchange.com/questions/69521/reading-per-kanji-irregular-readings
{ input: ["大口魚", "たら"], output: "[大口魚](たら)" },
+ { input: ["この辺に", "このへんに"], output: "この[辺](へん)に" },
] satisfies Test[];
cases.forEach(({ input, output }) => {
- Deno.test(`Japanese API class - ${input}`, async () => {
+ Deno.test(`Japanese API class - ${input[0]} (${input[1]}) -> ${output}`, async () => {
var jp = new Japanese(...input);
assertEquals(jp.furigana("refold-tools"), output);
});