aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-07-10 18:58:15 +0200
committerlonkaars <loek@pipeframe.xyz>2023-07-10 18:58:15 +0200
commit65e7b2260d728a9c747d126f828e90ae34f05b40 (patch)
tree80bce68628fc763b8d24b97a089d79ef5c0d039c
parentf7bfb89d7f400b48539b6f0712040caa6c6d3165 (diff)
constrain deinflection test cases more
-rw-r--r--db/dict/deinflections.sql2
-rw-r--r--db/find.sql128
-rw-r--r--search/tags.ts26
-rw-r--r--test/deinflection/cases.ts10
-rw-r--r--test/deinflection/test.ts2
5 files changed, 89 insertions, 79 deletions
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql
index 0dbe19c..fd6ffc8 100644
--- a/db/dict/deinflections.sql
+++ b/db/dict/deinflections.sql
@@ -157,7 +157,7 @@ insert into deinflection_temp values
-- obligation <https://guidetojapanese.org/learn/grammar/must>
-- TODO: manually write these out instead of splitting particle and suffix
- ('infl:must infl:tmp:must:res infl:negative', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated
+ ('infl:negative infl:must infl:tmp:must:res', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated
('infl:must infl:tmp:must:res', 'いける', '', 'ru', 'ot'), -- はいけない -> positive (stored this way because obligatory could be in past)
('infl:must infl:tmp:must:res', 'なる', '', 'u', 'ot'), -- はならない -> positive
('infl:must', 'は', '', 'ot', 'nt'), -- removes particle (negative -te + は + だめ/いけない/ならない)
diff --git a/db/find.sql b/db/find.sql
index e2d6ad8..6c8a80e 100644
--- a/db/find.sql
+++ b/db/find.sql
@@ -9,73 +9,73 @@
-- explain query plan -- testing only
with results(id, expression, reading, tags, depth, rules, original, deinflected) as (
- -- stripped deinflection table (remove some columns and duplicates)
- with deinflections(term, tags, depth, original, rules) as (
- -- recursively generated deinflection table
- with deinflect(length, term, tags, rules, rules_in, rules_out, depth) as (
- -- input term all substrings until length 1
- with inputs(length, term, tags, rules, rules_in, rules_out, depth) as (
- select length(:term), :term, '', -1, 0, 0, 0
- union
- select
- inputs.length - 1,
- substr(inputs.term, 1, inputs.length - 1),
- inputs.tags,
- inputs.rules,
- inputs.rules_in,
- inputs.rules_out,
- inputs.depth
- from inputs
- where inputs.length > 1
- )
- select * from inputs
- union -- join all recursive rows into one large table
- select
- deinflect.length,
- substr(deinflect.term, 1, length(deinflect.term)-length(deinflection.kana_in)) || deinflection.kana_out,
- deinflect.tags || ' ' || deinflection.tag, -- parsed to TokenTag[] on (sql) client-side
- deinflection.rules_out,
- deinflection.rules_in,
- deinflect.rules,
- deinflect.depth + 1
- from deinflect -- temp table
- inner join deinflection -- deinflection rules table
- on
- -- rules_in has to contain any of the current deconjugation rules
- (deinflect.rules & deinflection.rules_in != 0) and
- -- term.endsWith(kana_in)
- (substr(term, length(term) - length(kana_in) + 1) = kana_in) and
- -- can't deconjugate to length <1
- (length(term) > 0)
- limit 100 -- failsafe to catch any infinite loops
- )
- select term, tags, depth, substr(:term, 1, deinflect.length), rules
- from deinflect
- )
- select
- term.id,
- term.expression,
- term.reading,
- deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags,
- deinflections.depth,
- rules,
- deinflections.original,
+ -- stripped deinflection table (remove some columns and duplicates)
+ with deinflections(term, tags, depth, original, rules) as (
+ -- recursively generated deinflection table
+ with deinflect(length, term, tags, rules, rules_in, rules_out, depth) as (
+ -- input term all substrings until length 1
+ with inputs(length, term, tags, rules, rules_in, rules_out, depth) as (
+ select length(:term), :term, '', -1, 0, 0, 0
+ union
+ select
+ inputs.length - 1,
+ substr(inputs.term, 1, inputs.length - 1),
+ inputs.tags,
+ inputs.rules,
+ inputs.rules_in,
+ inputs.rules_out,
+ inputs.depth
+ from inputs
+ where inputs.length > 1
+ )
+ select * from inputs
+ union -- join all recursive rows into one large table
+ select
+ deinflect.length,
+ substr(deinflect.term, 1, length(deinflect.term)-length(deinflection.kana_in)) || deinflection.kana_out,
+ deinflect.tags || ' ' || deinflection.tag, -- parsed to TokenTag[] on (sql) client-side
+ deinflection.rules_out,
+ deinflection.rules_in,
+ deinflect.rules,
+ deinflect.depth + 1
+ from deinflect -- temp table
+ inner join deinflection -- deinflection rules table
+ on
+ -- rules_in has to contain any of the current deconjugation rules
+ (deinflect.rules & deinflection.rules_in != 0) and
+ -- term.endsWith(kana_in)
+ (substr(term, length(term) - length(kana_in) + 1) = kana_in) and
+ -- can't deconjugate to length <1
+ (length(term) > 0)
+ limit 100 -- failsafe to catch any infinite loops
+ )
+ select term, tags, depth, substr(:term, 1, deinflect.length), rules
+ from deinflect
+ )
+ select
+ term.id,
+ term.expression,
+ term.reading,
+ deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags,
+ deinflections.depth,
+ rules,
+ deinflections.original,
deinflections.term
- from deinflections
- inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term)
- inner join term_tag on term_tag.term_id = term.id
- inner join tag on term_tag.tag_id = tag.id
- group by term.id, deinflections.original, deinflections.rules
- having term.id is not null
+ from deinflections
+ inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term)
+ inner join term_tag on term_tag.term_id = term.id
+ inner join tag on term_tag.tag_id = tag.id
+ group by term.id, deinflections.original, deinflections.rules
+ having term.id is not null
)
select
- results.id,
- results.expression,
- results.reading,
- results.tags,
- group_concat(deinflection_rules.tag, ' ') as rules,
- results.depth,
- results.original,
+ results.id,
+ results.expression,
+ results.reading,
+ results.tags,
+ group_concat(deinflection_rules.tag, ' ') as rules,
+ results.depth,
+ results.original,
results.deinflected,
root_overlay.sort as root_overlay,
user_overlay.sort as user_overlay
diff --git a/search/tags.ts b/search/tags.ts
index 92279c5..fa70fe8 100644
--- a/search/tags.ts
+++ b/search/tags.ts
@@ -180,29 +180,37 @@ export function parseTags(input: string) {
var tags = input.replaceAll(/ +/g, " ").trim().split(" ") as TokenTag[];
var filteredTags: TokenTag[] = [];
var negationCount = 0;
- for (var tag of tags) {
+ var lastTag = "";
+ var tag = "";
+ let i = 0;
+ for (i = 0, tag = tags[i]; i < tags.length; lastTag = tags[i], i++, tag = tags[i]) {
// conjugations that are used as "stepping stones" for others should be
// filtered in this loop. checking if a combination of tags is valid should
// be done in ./parser.ts
// skip past tense tag if used as step for -te and -tari inflection
- if (tag == Tag.Inflection.Tense.Past &&
- filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue;
+ if (tag == Tag.Inflection.Tense.Past && [
+ Tag.Inflection.Suffix.Te,
+ Tag.Inflection.Suffix.Tari
+ ].includes(lastTag as any)) continue;
// skip -te suffix tag if used for
- if (tag == Tag.Inflection.Suffix.Te && filteredTags.anyOf([
+ if (tag == Tag.Inflection.Suffix.Te && [
Tag.Inflection.Tense.Continuous, // base for continuous tense
Tag.Inflection.Obligatory, // base for obligatory inflection
Tag.Inflection.Attempt.Miru, // base for 〜みる attempt
- ])) continue;
+ ].includes(lastTag as any)) continue;
// skip volitional tag if used for 〜とする attempt
- if (tag == Tag.Inflection.Desirable.Volitional &&
- filteredTags.anyOf([Tag.Inflection.Attempt.ToSuru])) continue;
+ if (tag == Tag.Inflection.Desirable.Volitional && [
+ Tag.Inflection.Attempt.ToSuru,
+ ].includes(lastTag as any)) continue;
// skip conditional 〜ば if used for obligatory inflection
- if (tag == Tag.Inflection.Conditional.Ba &&
- filteredTags.anyOf([Tag.Inflection.Obligatory])) continue;
+ if (tag == Tag.Inflection.Conditional.Ba && [
+ Tag.Inflection.Obligatory,
+ Tag.Inflection.Temporary.ObligatoryResult,
+ ].includes(lastTag as any)) continue;
// normalize multiple Inflection.Negative to single Inflection.Affirmative or Inflection.Negative
if (tag == Tag.Inflection.Negative) {
diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts
index c29bdf1..3f03f6d 100644
--- a/test/deinflection/cases.ts
+++ b/test/deinflection/cases.ts
@@ -10,7 +10,7 @@ interface Test {
export default [
// jisho.org generated conjugations for 取る (u-verb)
- { input: "取る", mustHave: [], mustNotHave: [], },
+ { input: "取る", mustHave: [ Inflection.Affirmative ], mustNotHave: [], },
{ input: "取らない", mustHave: [ Inflection.Negative ], mustNotHave: [], },
{ input: "取ります", mustHave: [ Inflection.Polite.Masu ], mustNotHave: [], },
{ input: "取りません", mustHave: [ Inflection.Negative, Inflection.Polite.Masu ], mustNotHave: [], },
@@ -38,13 +38,13 @@ export default [
{ input: "食べさせる", mustHave: [ Inflection.Causative ], mustNotHave: [], },
{ input: "落ちられる", mustHave: [ Inflection.Potential ], mustNotHave: [], },
{ input: "言われる", mustHave: [ Inflection.Passive ], mustNotHave: [], },
- { input: "喋っている", mustHave: [ Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Suffix.Te ], },
+ { input: "喋っている", mustHave: [ Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Suffix.Te, Inflection.Tense.Past ], },
{ input: "泳げれば", mustHave: [ Inflection.Conditional.Ba ], mustNotHave: [], },
{ input: "取らなければ", mustHave: [ Inflection.Conditional.Ba, Inflection.Negative ], mustNotHave: [], },
{ input: "入ってはいけない", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, // obliged + negative = must not ~
{ input: "行かなくてはなりません", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, // obliged + affirmative = must ~
- { input: "しなくちゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
- { input: "食べなきゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
+ { input: "しなくちゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative, Inflection.Suffix.Te ], },
+ { input: "食べなきゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative, Inflection.Conditional.Ba ], },
{ input: "行かないと", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
{ input: "入っちゃだめ", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], },
{ input: "死んじゃだめ", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], },
@@ -67,7 +67,7 @@ export default [
{ input: "切ってみなかった", mustHave: [ Inflection.Attempt.Miru, Inflection.Negative, Inflection.Tense.Past ], mustNotHave: [ Inflection.Suffix.Te ] },
{ input: "見ようとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], },
{ input: "行こうとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], },
- { input: "避けようとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], },
+ { input: "避けようとする", force: { writing: "避ける", reading: "さける" }, mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional, Inflection.Potential ], },
{ input: "入ろうとしている", mustHave: [ Inflection.Attempt.ToSuru, Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Desirable.Volitional ], },
{ input: "食べなさい", mustHave: [ Inflection.Polite.Nasai ], mustNotHave: [], },
{ input: "飲みなさい", mustHave: [ Inflection.Polite.Nasai ], mustNotHave: [], },
diff --git a/test/deinflection/test.ts b/test/deinflection/test.ts
index 1d2f172..6c1667b 100644
--- a/test/deinflection/test.ts
+++ b/test/deinflection/test.ts
@@ -25,6 +25,8 @@ cases.forEach(({ input, mustHave, mustNotHave, force }) => {
for (tag of mustNotHave)
if (result.tags.includes(tag))
throw new Error(`Deconjugation includes unallowed tag ${tag}`);
+
+ console.log(result.writing + " + " + result.tags.filter(tag => tag.startsWith("infl:") && !tag.startsWith("infl:reason:")).join(" + "));
});
})