diff options
author | lonkaars <loek@pipeframe.xyz> | 2023-07-09 00:50:58 +0200 |
---|---|---|
committer | lonkaars <loek@pipeframe.xyz> | 2023-07-09 00:50:58 +0200 |
commit | 1138ac8fc8764cf5cd987383a7a0332879be6cca (patch) | |
tree | 63aa25275e866d986c2b532f1f050c4f2cf99ac1 | |
parent | d36cefb50ddf67daa08a221d2de4d3eaae9e2492 (diff) |
rename conditionals and deconjugate obligatory inflections
-rw-r--r-- | db/dict/deinflections.sql | 26 | ||||
-rw-r--r-- | import/jmdict/jmdict.ts | 1 | ||||
-rw-r--r-- | language/tags.ts | 35 | ||||
-rw-r--r-- | test/deinflection/cases.ts | 4 |
4 files changed, 53 insertions, 13 deletions
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql index a6070a8..7d67b02 100644 --- a/db/dict/deinflections.sql +++ b/db/dict/deinflections.sql @@ -142,17 +142,23 @@ insert into deinflection_temp values ('infl:potential', 'ありえる', 'ある', 'a', ''), -- exception -- conditionals <https://guidetojapanese.org/learn/grammar/conditionals> - ('infl:suffix:ba', 'えば', 'う', 'a', 'u'), - ('infl:suffix:ba', 'けば', 'く', 'a', 'u'), - ('infl:suffix:ba', 'げば', 'ぐ', 'a', 'u'), - ('infl:suffix:ba', 'せば', 'す', 'a', 'u'), - ('infl:suffix:ba', 'てば', 'つ', 'a', 'u'), - ('infl:suffix:ba', 'ねば', 'ぬ', 'a', 'u'), - ('infl:suffix:ba', 'べば', 'ぶ', 'a', 'u'), - ('infl:suffix:ba', 'めば', 'む', 'a', 'u'), - ('infl:suffix:ba', 'れば', 'る', 'a', 'u ru'), - ('infl:suffix:ba', 'ければ', 'い', 'a', 'a'), + ('infl:cond:ba', 'えば', 'う', 'a', 'u'), + ('infl:cond:ba', 'けば', 'く', 'a', 'u'), + ('infl:cond:ba', 'げば', 'ぐ', 'a', 'u'), + ('infl:cond:ba', 'せば', 'す', 'a', 'u'), + ('infl:cond:ba', 'てば', 'つ', 'a', 'u'), + ('infl:cond:ba', 'ねば', 'ぬ', 'a', 'u'), + ('infl:cond:ba', 'べば', 'ぶ', 'a', 'u'), + ('infl:cond:ba', 'めば', 'む', 'a', 'u'), + ('infl:cond:ba', 'れば', 'る', 'a', 'u ru'), + ('infl:cond:ba', 'ければ', 'い', 'a', 'a'), -- TODO: 〜であれば (deconjugates to です i think?) + ('infl:cond:ra', 'ら', '', 'a', 'a'), -- TODO: constrain valid matches to after past conjugation? + + -- obligation <https://guidetojapanese.org/learn/grammar/must> + ('infl:must infl:negative', 'はだめ', '', 'a', 'a'), -- built-in negative because だめ can't be deconjugated + ('infl:must', 'はいける', '', 'a', 'a'), -- はいけない -> positive + ('infl:must', 'はなる', '', 'a', 'a'), -- はならない -> positive -- auxiliary rules ('class:verb:suru-included', 'する', '', 's', ''); -- deconjugate suru verbs into stem diff --git a/import/jmdict/jmdict.ts b/import/jmdict/jmdict.ts index 6109c9b..1d14ade 100644 --- a/import/jmdict/jmdict.ts +++ b/import/jmdict/jmdict.ts @@ -21,6 +21,7 @@ const tagLookup = { ["class/v1"]: Tag.Class.Verb.Ru, ["class/v5"]: Tag.Class.Verb.U, ["class/v5k"]: Tag.Class.Verb.U, + ["class/v5k-s"]: Tag.Class.Verb.U, ["class/v5uru"]: Tag.Class.Verb.U, ["class/v5r-i"]: Tag.Class.Verb.U, ["class/v5u-s"]: Tag.Class.Verb.U, diff --git a/language/tags.ts b/language/tags.ts index a9fc5ca..312a594 100644 --- a/language/tags.ts +++ b/language/tags.ts @@ -67,6 +67,15 @@ export const Tag = { * e.g. 来ない -> 来る [infl:negative] */ Inflection: { + /** + * @constant affirmative conjugations + * + * This conjugation should not be added by any deconjugation rules, but is + * calculated based on the amount of negations. Even counts of negative + * inflections (including 0) add this tag, while odd counts don't add this + * tag. + */ + Affirmative: "infl:affirmative", /** @constant negative conjugations */ Negative: "infl:negative", /** @constant time-related conjugations */ @@ -89,8 +98,6 @@ export const Tag = { Te: "infl:suffix:te", /** @constant -tari ending (e.g. 遊んだり) */ Tari: "infl:suffix:tari", - /** @constant -ba ending for conditionals (e.g. 泳げれば)*/ - Ba: "infl:suffix:ba", }, /** @constant internal deinflection rules */ Reason: { @@ -116,6 +123,15 @@ export const Tag = { Causative: "infl:causative", /** @constant imperative form (e.g. 聞け) */ Command: "infl:command", + /** @constant conditional forms */ + Conditional: { + /** @constant -ba ending (e.g. 泳げれば) */ + Ba: "infl:cond:ba", + /** @constant -ra ending (e.g. 取ったら) */ + Ra: "infl:cond:ra", + }, + /** @constant makes a verb obligatory (e.g. 入ってはいけない) */ + Obliged: "infl:must", }, /** @constant uncategorized tags */ Auxiliary: { @@ -140,16 +156,31 @@ export type TokenTags = Array<TokenTag>; export function parseTags(input: string) { var tags = input.replaceAll(/ +/g, " ").trim().split(" ") as TokenTag[]; var filteredTags: TokenTag[] = []; + var negationCount = 0; for (var tag of tags) { + // conjugations that are used as "stepping stones" for others should be + // filtered in this loop. checking if a combination of tags is valid should + // be done in ./parser.ts + // skip past tense tag if used as step for -te and -tari inflection if (tag == Tag.Inflection.Tense.Past && filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue; // skip -te suffix tag if it's a base for continuous tense if (tag == Tag.Inflection.Suffix.Te && filteredTags.anyOf([Tag.Inflection.Tense.Continuous])) continue; + // skip -te suffix tag if it's a base for obligatory inflection + if (tag == Tag.Inflection.Suffix.Te && + filteredTags.anyOf([Tag.Inflection.Obliged])) continue; + + // normalize multiple Inflection.Negative to single Inflection.Affirmative or Inflection.Negative + if (tag == Tag.Inflection.Negative) { + negationCount++; + continue; + } filteredTags.push(tag); } + filteredTags.push(negationCount % 2 == 0 ? Tag.Inflection.Affirmative : Tag.Inflection.Negative); return filteredTags.set().arr() as TokenTags; // make sure array doesn't contain duplicates } diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts index e0b2137..143e2a4 100644 --- a/test/deinflection/cases.ts +++ b/test/deinflection/cases.ts @@ -38,8 +38,10 @@ export default [ { input: "落ちられる", mustHave: [ Inflection.Potential ], mustNotHave: [], }, { input: "言われる", mustHave: [ Inflection.Passive ], mustNotHave: [], }, { input: "喋っている", mustHave: [ Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Suffix.Te ], }, - { input: "泳げれば", mustHave: [ Inflection.Suffix.Ba ], mustNotHave: [], }, + { input: "泳げれば", mustHave: [ Inflection.Conditional.Ba ], mustNotHave: [], }, { input: "取らなければ", mustHave: [ Inflection.Potential, Inflection.Negative ], mustNotHave: [], }, + { input: "入ってはいけない", mustHave: [ Inflection.Obliged, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, // obliged + negative = must not ~ + { input: "行かなくてはなりません", mustHave: [ Inflection.Obliged, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, // obliged + affirmative = must ~ // TODO: りゃ for いることは // TODO: じゃ for では // TODO: なきゃ + なくちゃ |