constrain deinflection test cases more

author: lonkaars <loek@pipeframe.xyz> 2023-07-10 18:58:15 +0200
committer: lonkaars <loek@pipeframe.xyz> 2023-07-10 18:58:15 +0200
commit: 65e7b2260d728a9c747d126f828e90ae34f05b40 (patch)
tree: 80bce68628fc763b8d24b97a089d79ef5c0d039c
parent: f7bfb89d7f400b48539b6f0712040caa6c6d3165 (diff)
5 files changed, 89 insertions, 79 deletions
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql
index 0dbe19c..fd6ffc8 100644
--- a/db/dict/deinflections.sql
+++ b/db/dict/deinflections.sql
@@ -157,7 +157,7 @@ insert into deinflection_temp values
 
 	-- obligation <https://guidetojapanese.org/learn/grammar/must>
   -- TODO: manually write these out instead of splitting particle and suffix
-	('infl:must infl:tmp:must:res infl:negative', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated
+	('infl:negative infl:must infl:tmp:must:res', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated
 	('infl:must infl:tmp:must:res', 'いける', '', 'ru', 'ot'), -- はいけない -> positive (stored this way because obligatory could be in past)
 	('infl:must infl:tmp:must:res', 'なる', '', 'u', 'ot'), -- はならない -> positive
 	('infl:must', 'は', '', 'ot', 'nt'), -- removes particle (negative -te + は + だめ／いけない／ならない)
diff --git a/db/find.sql b/db/find.sql
index e2d6ad8..6c8a80e 100644
--- a/db/find.sql
+++ b/db/find.sql
@@ -9,73 +9,73 @@
 
 -- explain query plan -- testing only
 with results(id, expression, reading, tags, depth, rules, original, deinflected) as (
-  -- stripped deinflection table (remove some columns and duplicates)
-  with deinflections(term, tags, depth, original, rules) as (
-    -- recursively generated deinflection table
-    with deinflect(length, term, tags, rules, rules_in, rules_out, depth) as (
-      -- input term all substrings until length 1
-      with inputs(length, term, tags, rules, rules_in, rules_out, depth) as (
-        select length(:term), :term, '', -1, 0, 0, 0
-        union
-        select
-          inputs.length - 1,
-          substr(inputs.term, 1, inputs.length - 1),
-          inputs.tags,
-          inputs.rules,
-          inputs.rules_in,
-          inputs.rules_out,
-          inputs.depth
-        from inputs
-        where inputs.length > 1
-      )
-      select * from inputs
-      union -- join all recursive rows into one large table
-      select
-        deinflect.length,
-        substr(deinflect.term, 1, length(deinflect.term)-length(deinflection.kana_in)) || deinflection.kana_out,
-        deinflect.tags || ' ' || deinflection.tag, -- parsed to TokenTag[] on (sql) client-side
-        deinflection.rules_out,
-        deinflection.rules_in,
-        deinflect.rules,
-        deinflect.depth + 1
-      from deinflect -- temp table
-      inner join deinflection -- deinflection rules table
-      on
-        -- rules_in has to contain any of the current deconjugation rules
-        (deinflect.rules & deinflection.rules_in != 0) and
-        -- term.endsWith(kana_in)
-        (substr(term, length(term) - length(kana_in) + 1) = kana_in) and
-        -- can't deconjugate to length <1
-        (length(term) > 0)
-      limit 100 -- failsafe to catch any infinite loops
-    )
-    select term, tags, depth, substr(:term, 1, deinflect.length), rules
-    from deinflect
-  )
-  select
-    term.id,
-    term.expression,
-    term.reading,
-    deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags,
-    deinflections.depth,
-    rules,
-    deinflections.original,
+	-- stripped deinflection table (remove some columns and duplicates)
+	with deinflections(term, tags, depth, original, rules) as (
+		-- recursively generated deinflection table
+		with deinflect(length, term, tags, rules, rules_in, rules_out, depth) as (
+			-- input term all substrings until length 1
+			with inputs(length, term, tags, rules, rules_in, rules_out, depth) as (
+				select length(:term), :term, '', -1, 0, 0, 0
+				union
+				select
+					inputs.length - 1,
+					substr(inputs.term, 1, inputs.length - 1),
+					inputs.tags,
+					inputs.rules,
+					inputs.rules_in,
+					inputs.rules_out,
+					inputs.depth
+				from inputs
+				where inputs.length > 1
+			)
+			select * from inputs
+			union -- join all recursive rows into one large table
+			select
+				deinflect.length,
+				substr(deinflect.term, 1, length(deinflect.term)-length(deinflection.kana_in)) || deinflection.kana_out,
+				deinflect.tags || ' ' || deinflection.tag, -- parsed to TokenTag[] on (sql) client-side
+				deinflection.rules_out,
+				deinflection.rules_in,
+				deinflect.rules,
+				deinflect.depth + 1
+			from deinflect -- temp table
+			inner join deinflection -- deinflection rules table
+			on
+				-- rules_in has to contain any of the current deconjugation rules
+				(deinflect.rules & deinflection.rules_in != 0) and
+				-- term.endsWith(kana_in)
+				(substr(term, length(term) - length(kana_in) + 1) = kana_in) and
+				-- can't deconjugate to length <1
+				(length(term) > 0)
+			limit 100 -- failsafe to catch any infinite loops
+		)
+		select term, tags, depth, substr(:term, 1, deinflect.length), rules
+		from deinflect
+	)
+	select
+		term.id,
+		term.expression,
+		term.reading,
+		deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags,
+		deinflections.depth,
+		rules,
+		deinflections.original,
 		deinflections.term
-  from deinflections
-  inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term)
-  inner join term_tag on term_tag.term_id = term.id
-  inner join tag on term_tag.tag_id = tag.id
-  group by term.id, deinflections.original, deinflections.rules
-  having term.id is not null
+	from deinflections
+	inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term)
+	inner join term_tag on term_tag.term_id = term.id
+	inner join tag on term_tag.tag_id = tag.id
+	group by term.id, deinflections.original, deinflections.rules
+	having term.id is not null
 )
 select
-  results.id,
-  results.expression,
-  results.reading,
-  results.tags,
-  group_concat(deinflection_rules.tag, ' ') as rules,
-  results.depth,
-  results.original,
+	results.id,
+	results.expression,
+	results.reading,
+	results.tags,
+	group_concat(deinflection_rules.tag, ' ') as rules,
+	results.depth,
+	results.original,
 	results.deinflected,
 	root_overlay.sort as root_overlay,
 	user_overlay.sort as user_overlay
diff --git a/search/tags.ts b/search/tags.ts
index 92279c5..fa70fe8 100644
--- a/search/tags.ts
+++ b/search/tags.ts
@@ -180,29 +180,37 @@ export function parseTags(input: string) {
 	var tags = input.replaceAll(/ +/g, " ").trim().split(" ") as TokenTag[];
 	var filteredTags: TokenTag[] = [];
 	var negationCount = 0;
-	for (var tag of tags) {
+	var lastTag = "";
+	var tag = "";
+	let i = 0;
+	for (i = 0, tag = tags[i]; i < tags.length; lastTag = tags[i], i++, tag = tags[i]) {
 		// conjugations that are used as "stepping stones" for others should be
 		// filtered in this loop. checking if a combination of tags is valid should
 		// be done in ./parser.ts
 
 		// skip past tense tag if used as step for -te and -tari inflection
-		if (tag == Tag.Inflection.Tense.Past &&
-				filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue;
+		if (tag == Tag.Inflection.Tense.Past && [
+			Tag.Inflection.Suffix.Te,
+			Tag.Inflection.Suffix.Tari
+		].includes(lastTag as any)) continue;
 
 		// skip -te suffix tag if used for
-		if (tag == Tag.Inflection.Suffix.Te && filteredTags.anyOf([
+		if (tag == Tag.Inflection.Suffix.Te && [
 			Tag.Inflection.Tense.Continuous, // base for continuous tense
 			Tag.Inflection.Obligatory, // base for obligatory inflection
 			Tag.Inflection.Attempt.Miru, // base for 〜みる attempt
-		])) continue;
+		].includes(lastTag as any)) continue;
 	
 		// skip volitional tag if used for 〜とする attempt
-		if (tag == Tag.Inflection.Desirable.Volitional &&
-				filteredTags.anyOf([Tag.Inflection.Attempt.ToSuru])) continue;
+		if (tag == Tag.Inflection.Desirable.Volitional && [
+			Tag.Inflection.Attempt.ToSuru,
+		].includes(lastTag as any)) continue;
 
 		// skip conditional 〜ば if used for obligatory inflection
-		if (tag == Tag.Inflection.Conditional.Ba &&
-				filteredTags.anyOf([Tag.Inflection.Obligatory])) continue;
+		if (tag == Tag.Inflection.Conditional.Ba && [
+			Tag.Inflection.Obligatory,
+			Tag.Inflection.Temporary.ObligatoryResult,
+		].includes(lastTag as any)) continue;
 
 		// normalize multiple Inflection.Negative to single Inflection.Affirmative or Inflection.Negative
 		if (tag == Tag.Inflection.Negative) {
diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts
index c29bdf1..3f03f6d 100644
--- a/test/deinflection/cases.ts
+++ b/test/deinflection/cases.ts
@@ -10,7 +10,7 @@ interface Test {
 
 export default [
 	// jisho.org generated conjugations for 取る (u-verb)
-	{ input: "取る", mustHave: [], mustNotHave: [], },
+	{ input: "取る", mustHave: [ Inflection.Affirmative ], mustNotHave: [], },
 	{ input: "取らない", mustHave: [ Inflection.Negative ], mustNotHave: [], },
 	{ input: "取ります", mustHave: [ Inflection.Polite.Masu ], mustNotHave: [], },
 	{ input: "取りません", mustHave: [ Inflection.Negative, Inflection.Polite.Masu ], mustNotHave: [], },
@@ -38,13 +38,13 @@ export default [
 	{ input: "食べさせる", mustHave: [ Inflection.Causative ], mustNotHave: [], },
 	{ input: "落ちられる", mustHave: [ Inflection.Potential ], mustNotHave: [], },
 	{ input: "言われる", mustHave: [ Inflection.Passive ], mustNotHave: [], },
-	{ input: "喋っている", mustHave: [ Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Suffix.Te ], },
+	{ input: "喋っている", mustHave: [ Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Suffix.Te, Inflection.Tense.Past ], },
 	{ input: "泳げれば", mustHave: [ Inflection.Conditional.Ba ], mustNotHave: [], },
 	{ input: "取らなければ", mustHave: [ Inflection.Conditional.Ba, Inflection.Negative ], mustNotHave: [], },
 	{ input: "入ってはいけない", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, // obliged + negative = must not ~
 	{ input: "行かなくてはなりません", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, // obliged + affirmative = must ~
-	{ input: "しなくちゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
-	{ input: "食べなきゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
+	{ input: "しなくちゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative, Inflection.Suffix.Te ], },
+	{ input: "食べなきゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative, Inflection.Conditional.Ba ], },
 	{ input: "行かないと", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
 	{ input: "入っちゃだめ", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], },
 	{ input: "死んじゃだめ", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], },
@@ -67,7 +67,7 @@ export default [
 	{ input: "切ってみなかった", mustHave: [ Inflection.Attempt.Miru, Inflection.Negative, Inflection.Tense.Past ], mustNotHave: [ Inflection.Suffix.Te ] },
 	{ input: "見ようとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], },
 	{ input: "行こうとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], },
-	{ input: "避けようとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], },
+	{ input: "避けようとする", force: { writing: "避ける", reading: "さける" }, mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional, Inflection.Potential ], },
 	{ input: "入ろうとしている", mustHave: [ Inflection.Attempt.ToSuru, Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Desirable.Volitional ], },
 	{ input: "食べなさい", mustHave: [ Inflection.Polite.Nasai ], mustNotHave: [], },
 	{ input: "飲みなさい", mustHave: [ Inflection.Polite.Nasai ], mustNotHave: [], },
diff --git a/test/deinflection/test.ts b/test/deinflection/test.ts
index 1d2f172..6c1667b 100644
--- a/test/deinflection/test.ts
+++ b/test/deinflection/test.ts
@@ -25,6 +25,8 @@ cases.forEach(({ input, mustHave, mustNotHave, force }) => {
 		for (tag of mustNotHave)
 			if (result.tags.includes(tag))
 				throw new Error(`Deconjugation includes unallowed tag ${tag}`);
+
+		console.log(result.writing + " + " + result.tags.filter(tag => tag.startsWith("infl:") && !tag.startsWith("infl:reason:")).join(" + "));
 	});
 })
author	lonkaars <loek@pipeframe.xyz>	2023-07-10 18:58:15 +0200
committer	lonkaars <loek@pipeframe.xyz>	2023-07-10 18:58:15 +0200
commit	65e7b2260d728a9c747d126f828e90ae34f05b40 (patch)
tree	80bce68628fc763b8d24b97a089d79ef5c0d039c
parent	f7bfb89d7f400b48539b6f0712040caa6c6d3165 (diff)