fix api/japanese kana/kanji matching

author: lonkaars <loek@pipeframe.xyz> 2023-07-14 15:44:19 +0200
committer: lonkaars <loek@pipeframe.xyz> 2023-07-14 15:44:19 +0200
commit: 948d2311a0fdf2bb5641861824e6c629d1f2a93d (patch)
tree: 9f715895f135cfca8e1173842a092c8725f5c75b
parent: a15c3fefe33f96c8f85147c61ee266abc43b4f65 (diff)
3 files changed, 9 insertions, 5 deletions
diff --git a/api/japanese.ts b/api/japanese.ts
index add897a..4f176a9 100644
--- a/api/japanese.ts
+++ b/api/japanese.ts
@@ -107,7 +107,7 @@ export default class Japanese {
 				possibilities.push(path);
 				return;
 			}
-			// skip until next 'anchor' token
+			// skip until next 'anchor' token (token with no-kanji characters only)
 			if (tokens[tokenIndex].normalized.length == 0) return match(tokenIndex + 1, searchStart, path);
 
 			// try all positions where current (anchor) token fits in this.reading
@@ -120,14 +120,16 @@ export default class Japanese {
 		// create index slices from possibilities
 		var slices = possibilities
 			.map(match => { // convert start index of anchor to start and stop index (based on anchor length)
-				var out = [0];
+				var out = [];
 				let matchIndex = 0;
+				if (tokens[0].ruby) out.push(0);
 				for (let tokenIndex = 0; tokenIndex < tokens.length; tokenIndex++) {
 					if (tokens[tokenIndex].normalized.length == 0) continue;
 					out.push(match[matchIndex], match[matchIndex] + tokens[tokenIndex].writing.length);
 					matchIndex++;
 				}
-				if (out.peek() != this.reading.length) out.push(this.reading.length);
+				if (tokens.peek().ruby) out.push(this.reading.length);
+				// if (out.peek() != this.reading.length) out.push(this.reading.length);
 				return out;
 			})
 			.filter(slice => slice.length == tokens.length + 1)
diff --git a/api/word.ts b/api/word.ts
index b519789..e92bc19 100644
--- a/api/word.ts
+++ b/api/word.ts
@@ -104,7 +104,8 @@ export default class Word extends APIBase {
 				// generate conjugated version of verb with kanji
 				this.text = new Japanese(input.source, reading);
 			} else {
-				this.text = this.base;
+				// add dictionary reading to this.source as writing (could contain kanji)
+				this.text = new Japanese(input.source, this.base.reading);
 			}
 			this.id = input.id;
 		}
diff --git a/test/single/api-japanese.test.ts b/test/single/api-japanese.test.ts
index 4e89c64..e604f61 100644
--- a/test/single/api-japanese.test.ts
+++ b/test/single/api-japanese.test.ts
@@ -17,10 +17,11 @@ const cases = [
 	{ input: ["気を引き締める", "きをひきしめる"], output: "[気](き)を[引](ひ)き[締](し)める" },
 // https://japanese.stackexchange.com/questions/69521/reading-per-kanji-irregular-readings
 	{ input: ["大口魚", "たら"], output: "[大口魚](たら)" },
+	{ input: ["この辺に", "このへんに"], output: "この[辺](へん)に" },
 ] satisfies Test[];
 
 cases.forEach(({ input, output }) => {
-	Deno.test(`Japanese API class - ${input}`, async () => {
+	Deno.test(`Japanese API class - ${input[0]} (${input[1]}) -> ${output}`, async () => {
 		var jp = new Japanese(...input);
 		assertEquals(jp.furigana("refold-tools"), output);
 	});
author	lonkaars <loek@pipeframe.xyz>	2023-07-14 15:44:19 +0200
committer	lonkaars <loek@pipeframe.xyz>	2023-07-14 15:44:19 +0200
commit	948d2311a0fdf2bb5641861824e6c629d1f2a93d (patch)
tree	9f715895f135cfca8e1173842a092c8725f5c75b
parent	a15c3fefe33f96c8f85147c61ee266abc43b4f65 (diff)