diff options
-rw-r--r-- | db/dict/deinflections.sql | 2 | ||||
-rw-r--r-- | db/find.sql | 128 | ||||
-rw-r--r-- | search/tags.ts | 26 | ||||
-rw-r--r-- | test/deinflection/cases.ts | 10 | ||||
-rw-r--r-- | test/deinflection/test.ts | 2 |
5 files changed, 89 insertions, 79 deletions
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql index 0dbe19c..fd6ffc8 100644 --- a/db/dict/deinflections.sql +++ b/db/dict/deinflections.sql @@ -157,7 +157,7 @@ insert into deinflection_temp values -- obligation <https://guidetojapanese.org/learn/grammar/must> -- TODO: manually write these out instead of splitting particle and suffix - ('infl:must infl:tmp:must:res infl:negative', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated + ('infl:negative infl:must infl:tmp:must:res', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated ('infl:must infl:tmp:must:res', 'いける', '', 'ru', 'ot'), -- はいけない -> positive (stored this way because obligatory could be in past) ('infl:must infl:tmp:must:res', 'なる', '', 'u', 'ot'), -- はならない -> positive ('infl:must', 'は', '', 'ot', 'nt'), -- removes particle (negative -te + は + だめ/いけない/ならない) diff --git a/db/find.sql b/db/find.sql index e2d6ad8..6c8a80e 100644 --- a/db/find.sql +++ b/db/find.sql @@ -9,73 +9,73 @@ -- explain query plan -- testing only with results(id, expression, reading, tags, depth, rules, original, deinflected) as ( - -- stripped deinflection table (remove some columns and duplicates) - with deinflections(term, tags, depth, original, rules) as ( - -- recursively generated deinflection table - with deinflect(length, term, tags, rules, rules_in, rules_out, depth) as ( - -- input term all substrings until length 1 - with inputs(length, term, tags, rules, rules_in, rules_out, depth) as ( - select length(:term), :term, '', -1, 0, 0, 0 - union - select - inputs.length - 1, - substr(inputs.term, 1, inputs.length - 1), - inputs.tags, - inputs.rules, - inputs.rules_in, - inputs.rules_out, - inputs.depth - from inputs - where inputs.length > 1 - ) - select * from inputs - union -- join all recursive rows into one large table - select - deinflect.length, - substr(deinflect.term, 1, length(deinflect.term)-length(deinflection.kana_in)) || deinflection.kana_out, - deinflect.tags || ' ' || deinflection.tag, -- parsed to TokenTag[] on (sql) client-side - deinflection.rules_out, - deinflection.rules_in, - deinflect.rules, - deinflect.depth + 1 - from deinflect -- temp table - inner join deinflection -- deinflection rules table - on - -- rules_in has to contain any of the current deconjugation rules - (deinflect.rules & deinflection.rules_in != 0) and - -- term.endsWith(kana_in) - (substr(term, length(term) - length(kana_in) + 1) = kana_in) and - -- can't deconjugate to length <1 - (length(term) > 0) - limit 100 -- failsafe to catch any infinite loops - ) - select term, tags, depth, substr(:term, 1, deinflect.length), rules - from deinflect - ) - select - term.id, - term.expression, - term.reading, - deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags, - deinflections.depth, - rules, - deinflections.original, + -- stripped deinflection table (remove some columns and duplicates) + with deinflections(term, tags, depth, original, rules) as ( + -- recursively generated deinflection table + with deinflect(length, term, tags, rules, rules_in, rules_out, depth) as ( + -- input term all substrings until length 1 + with inputs(length, term, tags, rules, rules_in, rules_out, depth) as ( + select length(:term), :term, '', -1, 0, 0, 0 + union + select + inputs.length - 1, + substr(inputs.term, 1, inputs.length - 1), + inputs.tags, + inputs.rules, + inputs.rules_in, + inputs.rules_out, + inputs.depth + from inputs + where inputs.length > 1 + ) + select * from inputs + union -- join all recursive rows into one large table + select + deinflect.length, + substr(deinflect.term, 1, length(deinflect.term)-length(deinflection.kana_in)) || deinflection.kana_out, + deinflect.tags || ' ' || deinflection.tag, -- parsed to TokenTag[] on (sql) client-side + deinflection.rules_out, + deinflection.rules_in, + deinflect.rules, + deinflect.depth + 1 + from deinflect -- temp table + inner join deinflection -- deinflection rules table + on + -- rules_in has to contain any of the current deconjugation rules + (deinflect.rules & deinflection.rules_in != 0) and + -- term.endsWith(kana_in) + (substr(term, length(term) - length(kana_in) + 1) = kana_in) and + -- can't deconjugate to length <1 + (length(term) > 0) + limit 100 -- failsafe to catch any infinite loops + ) + select term, tags, depth, substr(:term, 1, deinflect.length), rules + from deinflect + ) + select + term.id, + term.expression, + term.reading, + deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags, + deinflections.depth, + rules, + deinflections.original, deinflections.term - from deinflections - inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term) - inner join term_tag on term_tag.term_id = term.id - inner join tag on term_tag.tag_id = tag.id - group by term.id, deinflections.original, deinflections.rules - having term.id is not null + from deinflections + inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term) + inner join term_tag on term_tag.term_id = term.id + inner join tag on term_tag.tag_id = tag.id + group by term.id, deinflections.original, deinflections.rules + having term.id is not null ) select - results.id, - results.expression, - results.reading, - results.tags, - group_concat(deinflection_rules.tag, ' ') as rules, - results.depth, - results.original, + results.id, + results.expression, + results.reading, + results.tags, + group_concat(deinflection_rules.tag, ' ') as rules, + results.depth, + results.original, results.deinflected, root_overlay.sort as root_overlay, user_overlay.sort as user_overlay diff --git a/search/tags.ts b/search/tags.ts index 92279c5..fa70fe8 100644 --- a/search/tags.ts +++ b/search/tags.ts @@ -180,29 +180,37 @@ export function parseTags(input: string) { var tags = input.replaceAll(/ +/g, " ").trim().split(" ") as TokenTag[]; var filteredTags: TokenTag[] = []; var negationCount = 0; - for (var tag of tags) { + var lastTag = ""; + var tag = ""; + let i = 0; + for (i = 0, tag = tags[i]; i < tags.length; lastTag = tags[i], i++, tag = tags[i]) { // conjugations that are used as "stepping stones" for others should be // filtered in this loop. checking if a combination of tags is valid should // be done in ./parser.ts // skip past tense tag if used as step for -te and -tari inflection - if (tag == Tag.Inflection.Tense.Past && - filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue; + if (tag == Tag.Inflection.Tense.Past && [ + Tag.Inflection.Suffix.Te, + Tag.Inflection.Suffix.Tari + ].includes(lastTag as any)) continue; // skip -te suffix tag if used for - if (tag == Tag.Inflection.Suffix.Te && filteredTags.anyOf([ + if (tag == Tag.Inflection.Suffix.Te && [ Tag.Inflection.Tense.Continuous, // base for continuous tense Tag.Inflection.Obligatory, // base for obligatory inflection Tag.Inflection.Attempt.Miru, // base for 〜みる attempt - ])) continue; + ].includes(lastTag as any)) continue; // skip volitional tag if used for 〜とする attempt - if (tag == Tag.Inflection.Desirable.Volitional && - filteredTags.anyOf([Tag.Inflection.Attempt.ToSuru])) continue; + if (tag == Tag.Inflection.Desirable.Volitional && [ + Tag.Inflection.Attempt.ToSuru, + ].includes(lastTag as any)) continue; // skip conditional 〜ば if used for obligatory inflection - if (tag == Tag.Inflection.Conditional.Ba && - filteredTags.anyOf([Tag.Inflection.Obligatory])) continue; + if (tag == Tag.Inflection.Conditional.Ba && [ + Tag.Inflection.Obligatory, + Tag.Inflection.Temporary.ObligatoryResult, + ].includes(lastTag as any)) continue; // normalize multiple Inflection.Negative to single Inflection.Affirmative or Inflection.Negative if (tag == Tag.Inflection.Negative) { diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts index c29bdf1..3f03f6d 100644 --- a/test/deinflection/cases.ts +++ b/test/deinflection/cases.ts @@ -10,7 +10,7 @@ interface Test { export default [ // jisho.org generated conjugations for 取る (u-verb) - { input: "取る", mustHave: [], mustNotHave: [], }, + { input: "取る", mustHave: [ Inflection.Affirmative ], mustNotHave: [], }, { input: "取らない", mustHave: [ Inflection.Negative ], mustNotHave: [], }, { input: "取ります", mustHave: [ Inflection.Polite.Masu ], mustNotHave: [], }, { input: "取りません", mustHave: [ Inflection.Negative, Inflection.Polite.Masu ], mustNotHave: [], }, @@ -38,13 +38,13 @@ export default [ { input: "食べさせる", mustHave: [ Inflection.Causative ], mustNotHave: [], }, { input: "落ちられる", mustHave: [ Inflection.Potential ], mustNotHave: [], }, { input: "言われる", mustHave: [ Inflection.Passive ], mustNotHave: [], }, - { input: "喋っている", mustHave: [ Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Suffix.Te ], }, + { input: "喋っている", mustHave: [ Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Suffix.Te, Inflection.Tense.Past ], }, { input: "泳げれば", mustHave: [ Inflection.Conditional.Ba ], mustNotHave: [], }, { input: "取らなければ", mustHave: [ Inflection.Conditional.Ba, Inflection.Negative ], mustNotHave: [], }, { input: "入ってはいけない", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, // obliged + negative = must not ~ { input: "行かなくてはなりません", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, // obliged + affirmative = must ~ - { input: "しなくちゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, - { input: "食べなきゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, + { input: "しなくちゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative, Inflection.Suffix.Te ], }, + { input: "食べなきゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative, Inflection.Conditional.Ba ], }, { input: "行かないと", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, { input: "入っちゃだめ", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, { input: "死んじゃだめ", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, @@ -67,7 +67,7 @@ export default [ { input: "切ってみなかった", mustHave: [ Inflection.Attempt.Miru, Inflection.Negative, Inflection.Tense.Past ], mustNotHave: [ Inflection.Suffix.Te ] }, { input: "見ようとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], }, { input: "行こうとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], }, - { input: "避けようとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], }, + { input: "避けようとする", force: { writing: "避ける", reading: "さける" }, mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional, Inflection.Potential ], }, { input: "入ろうとしている", mustHave: [ Inflection.Attempt.ToSuru, Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Desirable.Volitional ], }, { input: "食べなさい", mustHave: [ Inflection.Polite.Nasai ], mustNotHave: [], }, { input: "飲みなさい", mustHave: [ Inflection.Polite.Nasai ], mustNotHave: [], }, diff --git a/test/deinflection/test.ts b/test/deinflection/test.ts index 1d2f172..6c1667b 100644 --- a/test/deinflection/test.ts +++ b/test/deinflection/test.ts @@ -25,6 +25,8 @@ cases.forEach(({ input, mustHave, mustNotHave, force }) => { for (tag of mustNotHave) if (result.tags.includes(tag)) throw new Error(`Deconjugation includes unallowed tag ${tag}`); + + console.log(result.writing + " + " + result.tags.filter(tag => tag.startsWith("infl:") && !tag.startsWith("infl:reason:")).join(" + ")); }); }) |