diff options
-rw-r--r-- | db/dict/deinflections.sql | 78 | ||||
-rw-r--r-- | import/jmdict/jmdict.ts | 1 | ||||
-rw-r--r-- | language/tags.ts | 56 | ||||
-rw-r--r-- | test/deinflection/cases.ts | 32 |
4 files changed, 142 insertions, 25 deletions
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql index 7d67b02..a4a35d5 100644 --- a/db/dict/deinflections.sql +++ b/db/dict/deinflections.sql @@ -142,23 +142,71 @@ insert into deinflection_temp values ('infl:potential', 'ありえる', 'ある', 'a', ''), -- exception -- conditionals <https://guidetojapanese.org/learn/grammar/conditionals> - ('infl:cond:ba', 'えば', 'う', 'a', 'u'), - ('infl:cond:ba', 'けば', 'く', 'a', 'u'), - ('infl:cond:ba', 'げば', 'ぐ', 'a', 'u'), - ('infl:cond:ba', 'せば', 'す', 'a', 'u'), - ('infl:cond:ba', 'てば', 'つ', 'a', 'u'), - ('infl:cond:ba', 'ねば', 'ぬ', 'a', 'u'), - ('infl:cond:ba', 'べば', 'ぶ', 'a', 'u'), - ('infl:cond:ba', 'めば', 'む', 'a', 'u'), - ('infl:cond:ba', 'れば', 'る', 'a', 'u ru'), - ('infl:cond:ba', 'ければ', 'い', 'a', 'a'), + ('infl:cond:ba', 'えば', 'う', 'nt', 'u'), + ('infl:cond:ba', 'けば', 'く', 'nt', 'u'), + ('infl:cond:ba', 'げば', 'ぐ', 'nt', 'u'), + ('infl:cond:ba', 'せば', 'す', 'nt', 'u'), + ('infl:cond:ba', 'てば', 'つ', 'nt', 'u'), + ('infl:cond:ba', 'ねば', 'ぬ', 'nt', 'u'), + ('infl:cond:ba', 'べば', 'ぶ', 'nt', 'u'), + ('infl:cond:ba', 'めば', 'む', 'nt', 'u'), + ('infl:cond:ba', 'れば', 'る', 'nt', 'u ru'), + ('infl:cond:ba', 'ければ', 'い', 'nt', 'a'), -- TODO: 〜であれば (deconjugates to です i think?) ('infl:cond:ra', 'ら', '', 'a', 'a'), -- TODO: constrain valid matches to after past conjugation? -- obligation <https://guidetojapanese.org/learn/grammar/must> - ('infl:must infl:negative', 'はだめ', '', 'a', 'a'), -- built-in negative because だめ can't be deconjugated - ('infl:must', 'はいける', '', 'a', 'a'), -- はいけない -> positive - ('infl:must', 'はなる', '', 'a', 'a'), -- はならない -> positive + ('infl:tmp:must:res infl:negative', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated + ('infl:tmp:must:res', 'いける', '', 'a', 'ot'), -- はいけない -> positive (stored this way because obligatory could be in past) + ('infl:tmp:must:res', 'なる', '', 'a', 'ot'), -- はならない -> positive + ('infl:must', 'は', '', 'ot', 'a'), -- removes particle (negative -te + は + だめ/いけない/ならない) + ('infl:tmp:must:prt infl:must', 'と', '', 'ot', 'a'), -- removes particle (negative + と + だめ/いけない/ならない) + ('infl:must', 'ば', 'ば', 'ot', 'a'), -- removes ot rule, keeps 〜ば (negative + ば + だめ/いけない/ならない) + ('infl:must infl:tmp:must:prt', 'なくちゃ', 'なくて', 'a', 'a'), -- colloquial abbreviation + ('infl:must infl:tmp:must:prt', 'なきゃ', 'なければ', 'a', 'a'), -- colloquial abbreviation + ('infl:tmp:must:prt', 'ちゃ', 'ては', 'a', 'a'), -- colloquial abbreviation + ('infl:tmp:must:prt', 'じゃ', 'では', 'a', 'a'), -- colloquial abbreviation + + -- ~tai endings <https://guidetojapanese.org/learn/grammar/desire> + ('infl:desire:itai', 'たい', 'る', 'i', 'ru'), + ('infl:desire:itai', 'いたい', 'う', 'i', 'u'), + ('infl:desire:itai', 'きたい', 'く', 'i', 'u'), + ('infl:desire:itai', 'ぎたい', 'ぐ', 'i', 'u'), + ('infl:desire:itai', 'したい', 'す', 'i', 'u'), + ('infl:desire:itai', 'ちたい', 'つ', 'i', 'u'), + ('infl:desire:itai', 'にたい', 'ぬ', 'i', 'u'), + ('infl:desire:itai', 'びたい', 'ぶ', 'i', 'u'), + ('infl:desire:itai', 'みたい', 'む', 'i', 'u'), + ('infl:desire:itai', 'りたい', 'る', 'i', 'u'), + ('infl:desire:itai', 'したい', 'する', 'i', 's'), + ('infl:desire:itai', 'きたい', 'くる', 'i', 'k'), + ('infl:desire:itai', '来たい', '来る', 'i', 'k'), + + -- volitional form <https://guidetojapanese.org/learn/grammar/desire> + ('infl:desire:volitional', 'よう', 'る', 'a', 'ru'), + ('infl:desire:volitional', 'おう', 'う', 'a', 'u'), + ('infl:desire:volitional', 'こう', 'く', 'a', 'u'), + ('infl:desire:volitional', 'ごう', 'ぐ', 'a', 'u'), + ('infl:desire:volitional', 'そう', 'す', 'a', 'u'), + ('infl:desire:volitional', 'とう', 'つ', 'a', 'u'), + ('infl:desire:volitional', 'のう', 'ぬ', 'a', 'u'), + ('infl:desire:volitional', 'ぼう', 'ぶ', 'a', 'u'), + ('infl:desire:volitional', 'もう', 'む', 'a', 'u'), + ('infl:desire:volitional', 'ろう', 'る', 'a', 'u'), + ('infl:desire:volitional', 'しよう', 'する', 'a', 's'), + ('infl:desire:volitional', 'こよう', 'くる', 'a', 'k'), + ('infl:desire:volitional', '来よう', '来る', 'a', 'k'), + + -- polite volitional <https://guidetojapanese.org/learn/grammar/desire> + ('infl:desire:volitional', 'ましょう', 'ます', 'nt', 'nt'), + + -- TODO: making suggestions using the 「ば」 or 「たら」 conditional + + -- attempt (〜みる) <https://guidetojapanese.org/learn/grammar/try> + ('infl:attempt:miru', 'みる', '', 'ru', 'a'), + + -- attempt (〜とする) <https://guidetojapanese.org/learn/grammar/try> + ('infl:attempt:tosuru', 'とする', '', 's', 'a'), -- auxiliary rules ('class:verb:suru-included', 'する', '', 's', ''); -- deconjugate suru verbs into stem @@ -175,7 +223,9 @@ insert into rule_map values (null, 'z', 1 << 4), -- ずる (zuru) ('infl:reason:adj:i', 'i', 1 << 5), -- 形容詞 (i-adjective) (null, 'iru', 1 << 6), -- 〜いる (temporary iru for progressive tense) - ('infl:reason:adj:na', 'na', 1 << 7); -- 形容動詞 (na-adjective) + ('infl:reason:adj:na', 'na', 1 << 7), -- 形容動詞 (na-adjective) + (null, 'ot', 1 << 8), -- (temporary rule for deconjugating obligatory endings) + (null, 'nt', (1 << 6) - 1); -- (utility no temp, allows anything but temporaries) -- add tags to db insert into deinflection_rules (mask, tag) diff --git a/import/jmdict/jmdict.ts b/import/jmdict/jmdict.ts index 1d14ade..155c423 100644 --- a/import/jmdict/jmdict.ts +++ b/import/jmdict/jmdict.ts @@ -18,6 +18,7 @@ const tagLookup = { ["misc/uk"]: Tag.Auxiliary.UsuallyKana, ["class/adv"]: Tag.Class.Adverb, ["class/vs"]: Tag.Class.Verb.Suru, + ["class/vs-i"]: Tag.Class.Verb.Suru, ["class/v1"]: Tag.Class.Verb.Ru, ["class/v5"]: Tag.Class.Verb.U, ["class/v5k"]: Tag.Class.Verb.U, diff --git a/language/tags.ts b/language/tags.ts index 312a594..de1a3ea 100644 --- a/language/tags.ts +++ b/language/tags.ts @@ -131,7 +131,28 @@ export const Tag = { Ra: "infl:cond:ra", }, /** @constant makes a verb obligatory (e.g. 入ってはいけない) */ - Obliged: "infl:must", + Obligatory: "infl:must", + /** @constant verbs that someone wants to do / be done */ + Desirable: { + /** @constant 〜たい endings (e.g. 買いたい) */ + Itai: "infl:desire:itai", + /** @constant 〜おう endings (e.g. 寝よう) */ + Volitional: "infl:desire:volitional", + }, + /** @constant makes a verb an attempt */ + Attempt: { + /** @constant 〜みる to try something out (e.g. 飲んでみた) */ + Miru: "infl:attempt:miru", + /** @constant 〜とする attempts (e.g. 入ろうとしている) */ + ToSuru: "infl:attempt:tosuru", + }, + /** @constant temporary tags (removed by parseTags) */ + Temporary: { + /** @constant particle of obligatory conjugation (e.g. 行かない*と*だめ), or colloquial abbreviation */ + ObligatoryParticle: "infl:tmp:must:prt", + /** @constant resulting action part of obligatory conjugation (e.g. 行かないと*だめ*) */ + ObligatoryResult: "infl:tmp:must:res", + }, }, /** @constant uncategorized tags */ Auxiliary: { @@ -165,12 +186,17 @@ export function parseTags(input: string) { // skip past tense tag if used as step for -te and -tari inflection if (tag == Tag.Inflection.Tense.Past && filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue; - // skip -te suffix tag if it's a base for continuous tense - if (tag == Tag.Inflection.Suffix.Te && - filteredTags.anyOf([Tag.Inflection.Tense.Continuous])) continue; - // skip -te suffix tag if it's a base for obligatory inflection - if (tag == Tag.Inflection.Suffix.Te && - filteredTags.anyOf([Tag.Inflection.Obliged])) continue; + + // skip -te suffix tag if used for + if (tag == Tag.Inflection.Suffix.Te && filteredTags.anyOf([ + Tag.Inflection.Tense.Continuous, // base for continuous tense + Tag.Inflection.Obligatory, // base for obligatory inflection + Tag.Inflection.Attempt.Miru, // base for 〜みる attempt + ])) continue; + + // skip volitional tag if used for 〜とする attempt + if (tag == Tag.Inflection.Desirable.Volitional && + filteredTags.anyOf([Tag.Inflection.Attempt.ToSuru])) continue; // normalize multiple Inflection.Negative to single Inflection.Affirmative or Inflection.Negative if (tag == Tag.Inflection.Negative) { @@ -180,7 +206,21 @@ export function parseTags(input: string) { filteredTags.push(tag); } + + // negative + と without resulting action = implicit affirmative obligatory + if (filteredTags.includes(Tag.Inflection.Temporary.ObligatoryParticle) && + !filteredTags.includes(Tag.Inflection.Temporary.ObligatoryResult)) { + negationCount = 0; // -> make resulting tags affirmative + } + + // normalize affirmative/negative filteredTags.push(negationCount % 2 == 0 ? Tag.Inflection.Affirmative : Tag.Inflection.Negative); - return filteredTags.set().arr() as TokenTags; // make sure array doesn't contain duplicates + + // filter any remaining temporary tags + type tempTag = typeof Tag.Inflection.Temporary[keyof typeof Tag.Inflection.Temporary]; + filteredTags = filteredTags.filter(t => !Object.values(Tag.Inflection.Temporary).includes(t as tempTag)); + + // filter any duplicates + return filteredTags.set().arr() as TokenTags; } diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts index 143e2a4..a54dba9 100644 --- a/test/deinflection/cases.ts +++ b/test/deinflection/cases.ts @@ -39,9 +39,35 @@ export default [ { input: "言われる", mustHave: [ Inflection.Passive ], mustNotHave: [], }, { input: "喋っている", mustHave: [ Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Suffix.Te ], }, { input: "泳げれば", mustHave: [ Inflection.Conditional.Ba ], mustNotHave: [], }, - { input: "取らなければ", mustHave: [ Inflection.Potential, Inflection.Negative ], mustNotHave: [], }, - { input: "入ってはいけない", mustHave: [ Inflection.Obliged, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, // obliged + negative = must not ~ - { input: "行かなくてはなりません", mustHave: [ Inflection.Obliged, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, // obliged + affirmative = must ~ + { input: "取らなければ", mustHave: [ Inflection.Conditional.Ba, Inflection.Negative ], mustNotHave: [], }, + { input: "入ってはいけない", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, // obliged + negative = must not ~ + { input: "行かなくてはなりません", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, // obliged + affirmative = must ~ + { input: "しなくちゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, + { input: "食べなきゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, + { input: "行かないと", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, + { input: "入っちゃだめ", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, + { input: "死んじゃだめ", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, + { input: "しなくてはいけなかった", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, + { input: "行かないとだめ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, + { input: "しないといけない", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, + { input: "行かなければいけません", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, + { input: "しなければだめ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, + { input: "行かないと", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, + { input: "買いたい", mustHave: [ Inflection.Desirable.Itai ], mustNotHave: [], }, + { input: "寝よう", mustHave: [ Inflection.Desirable.Volitional ], mustNotHave: [], }, + { input: "しましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], }, + { input: "きましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], }, + { input: "寝ましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], }, + { input: "行きましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], }, + { input: "遊びましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], }, + { input: "食べてみた", mustHave: [ Inflection.Attempt.Miru, Inflection.Tense.Past ], mustNotHave: [ Inflection.Suffix.Te ], }, + { input: "飲んでみました", mustHave: [ Inflection.Attempt.Miru, Inflection.Tense.Past, Inflection.Polite.Masu ], mustNotHave: [ Inflection.Suffix.Te ], }, + { input: "食べてみたい", mustHave: [ Inflection.Attempt.Miru, Inflection.Desirable.Itai ], mustNotHave: [ Inflection.Suffix.Te ], }, + { input: "切ってみなかった", mustHave: [ Inflection.Attempt.Miru, Inflection.Negative, Inflection.Tense.Past ], mustNotHave: [ Inflection.Suffix.Te ] }, + { input: "見ようとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], }, + { input: "行こうとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], }, + { input: "避けようとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], }, + { input: "入ろうとしている", mustHave: [ Inflection.Attempt.ToSuru, Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Desirable.Volitional ], }, // TODO: りゃ for いることは // TODO: じゃ for では // TODO: なきゃ + なくちゃ |