diff options
-rw-r--r-- | api/word.ts | 72 | ||||
-rw-r--r-- | db/dict/deinflections.sql | 16 | ||||
-rw-r--r-- | search/tags.ts | 30 | ||||
-rw-r--r-- | test/deinflection/cases.ts | 3 | ||||
-rw-r--r-- | test/deinflection/test.ts | 16 | ||||
-rw-r--r-- | util/object.ts | 12 |
6 files changed, 89 insertions, 60 deletions
diff --git a/api/word.ts b/api/word.ts index 4c09cff..2e07c98 100644 --- a/api/word.ts +++ b/api/word.ts @@ -1,47 +1,57 @@ import Glossary from "./glossary.ts"; import APIBase from "./base.ts"; import Japanese, { JapaneseFormatter } from "./japanese.ts"; - import "../util/string.ts"; +import "../util/object.ts"; import { Tag, TagGroup, TokenTags } from "../search/tags.ts"; import { SearchWord } from "../search/types.ts"; +import { recursiveValues } from "../util/object.ts"; // irregular stems taken from <https://en.wikipedia.org/wiki/Japanese_irregular_verbs#suru_and_kuru> function irregularSuruStem(tags: TokenTags): string { - if (tags.anyOf([ - Tag.Inflection.Polite.Masu, - Tag.Inflection.Suffix.Te, - Tag.Inflection.Tense.Past, - Tag.Inflection.Desirable.Itai, // part of Wikipedia's -ta form - Tag.Inflection.Negative, - Tag.Inflection.Desirable.Volitional, - Tag.Inflection.Command, - ])) return "し"; - if (tags.anyOf([ - Tag.Inflection.Passive, - Tag.Inflection.Causative, - ])) return "さ"; - if (tags.anyOf([ - Tag.Inflection.Potential, - ])) return "でき"; + for (let i = 0, tag = tags[i]; i < tags.length; i++, tag = tags[i]) { + if (!recursiveValues(Tag.Inflection).includes(tag)) continue; + if (recursiveValues(Tag.Inflection.Reason).includes(tag)) continue; + if ([ + Tag.Inflection.Polite.Masu, + Tag.Inflection.Suffix.Te, + Tag.Inflection.Tense.Past, + Tag.Inflection.Desirable.Itai, // part of Wikipedia's -ta form + Tag.Inflection.Negative, + Tag.Inflection.Desirable.Volitional, + Tag.Inflection.Command, + ].includes(tag as any)) return "し"; + if ([ + Tag.Inflection.Passive, + Tag.Inflection.Causative, + ].includes(tag as any)) return "さ"; + // wikipedia has できる as the potential form for する, but できる here + // means it's already foobar'd + break; + } return "す"; } function irregularKuruStem(tags: TokenTags): string { - if (tags.anyOf([ - Tag.Inflection.Polite.Masu, - Tag.Inflection.Suffix.Te, - Tag.Inflection.Tense.Past, - Tag.Inflection.Desirable.Itai, // part of Wikipedia's -ta form - ])) return "き"; - if (tags.anyOf([ - Tag.Inflection.Negative, - Tag.Inflection.Desirable.Volitional, - Tag.Inflection.Passive, - Tag.Inflection.Causative, - Tag.Inflection.Potential, - Tag.Inflection.Command, - ])) return "こ"; + for (let i = 0, tag = tags[i]; i < tags.length; i++, tag = tags[i]) { + if (!recursiveValues(Tag.Inflection).includes(tag)) continue; + if (recursiveValues(Tag.Inflection.Reason).includes(tag)) continue; + if ([ + Tag.Inflection.Polite.Masu, + Tag.Inflection.Suffix.Te, + Tag.Inflection.Tense.Past, + Tag.Inflection.Desirable.Itai, // part of Wikipedia's -ta form + ].includes(tag as any)) return "き"; + if ([ + Tag.Inflection.Negative, + Tag.Inflection.Desirable.Volitional, + Tag.Inflection.Passive, + Tag.Inflection.Causative, + Tag.Inflection.Potential, + Tag.Inflection.Command, + ].includes(tag as any)) return "こ"; + break; + } return "く"; } diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql index fd6ffc8..21a634d 100644 --- a/db/dict/deinflections.sql +++ b/db/dict/deinflections.sql @@ -157,16 +157,16 @@ insert into deinflection_temp values -- obligation <https://guidetojapanese.org/learn/grammar/must> -- TODO: manually write these out instead of splitting particle and suffix - ('infl:negative infl:must infl:tmp:must:res', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated - ('infl:must infl:tmp:must:res', 'いける', '', 'ru', 'ot'), -- はいけない -> positive (stored this way because obligatory could be in past) - ('infl:must infl:tmp:must:res', 'なる', '', 'u', 'ot'), -- はならない -> positive + ('infl:negative infl:must tmp:infl:must:res', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated + ('infl:must tmp:infl:must:res', 'いける', '', 'ru', 'ot'), -- はいけない -> positive (stored this way because obligatory could be in past) + ('infl:must tmp:infl:must:res', 'なる', '', 'u', 'ot'), -- はならない -> positive ('infl:must', 'は', '', 'ot', 'nt'), -- removes particle (negative -te + は + だめ/いけない/ならない) - ('infl:tmp:must:prt infl:must', 'と', '', 'ot', 'nt'), -- removes particle (negative + と + だめ/いけない/ならない) + ('tmp:infl:must:prt infl:must', 'と', '', 'ot', 'nt'), -- removes particle (negative + と + だめ/いけない/ならない) -- ('infl:must', 'ば', 'ば', 'ot', 'a'), -- causes infinite loop - ('infl:must infl:tmp:must:prt', 'なくちゃ', 'なくて', 'a', 'a'), -- colloquial abbreviation - ('infl:must infl:tmp:must:prt', 'なきゃ', 'なければ', 'a', 'a'), -- colloquial abbreviation - ('infl:tmp:must:prt', 'ちゃ', 'ては', 'a', 'a'), -- colloquial abbreviation - ('infl:tmp:must:prt', 'じゃ', 'では', 'a', 'a'), -- colloquial abbreviation + ('infl:must tmp:infl:must:prt', 'なくちゃ', 'なくて', 'a', 'a'), -- colloquial abbreviation + ('infl:must tmp:infl:must:prt', 'なきゃ', 'なければ', 'a', 'a'), -- colloquial abbreviation + ('tmp:infl:must:prt', 'ちゃ', 'ては', 'a', 'a'), -- colloquial abbreviation + ('tmp:infl:must:prt', 'じゃ', 'では', 'a', 'a'), -- colloquial abbreviation -- ~tai endings <https://guidetojapanese.org/learn/grammar/desire> ('infl:desire:itai', 'たい', 'る', 'i', 'ru'), diff --git a/search/tags.ts b/search/tags.ts index 22ea315..32ce02f 100644 --- a/search/tags.ts +++ b/search/tags.ts @@ -148,19 +148,19 @@ export const Tag = { /** @constant 〜とする attempts (e.g. 入ろうとしている) */ ToSuru: "infl:attempt:tosuru", }, - /** @constant temporary tags (removed by parseTags) */ - Temporary: { - /** @constant particle of obligatory conjugation (e.g. 行かない*と*だめ), or colloquial abbreviation */ - ObligatoryParticle: "infl:tmp:must:prt", - /** @constant resulting action part of obligatory conjugation (e.g. 行かないと*だめ*) */ - ObligatoryResult: "infl:tmp:must:res", - }, }, /** @constant uncategorized tags */ Auxiliary: { /** @constant word usually written using only kana (but also has kanji) */ UsuallyKana: "aux:uk", }, + /** @constant temporary tags (removed by parseTags) */ + Temporary: { + /** @constant particle of obligatory conjugation (e.g. 行かない*と*だめ), or colloquial abbreviation */ + ObligatoryParticle: "tmp:infl:must:prt", + /** @constant resulting action part of obligatory conjugation (e.g. 行かないと*だめ*) */ + ObligatoryResult: "tmp:infl:must:res", + }, } as const; export const TagGroup = { @@ -198,7 +198,7 @@ export function parseTags(input: string) { if (tag == Tag.Inflection.Suffix.Te && [ Tag.Inflection.Tense.Continuous, // base for continuous tense Tag.Inflection.Obligatory, // base for obligatory inflection - Tag.Inflection.Temporary.ObligatoryParticle, // base for obligatory inflection + Tag.Temporary.ObligatoryParticle, // base for obligatory inflection Tag.Inflection.Attempt.Miru, // base for 〜みる attempt ].includes(lastTag as any)) continue; @@ -210,8 +210,8 @@ export function parseTags(input: string) { // skip conditional 〜ば if used for obligatory inflection if (tag == Tag.Inflection.Conditional.Ba && [ Tag.Inflection.Obligatory, - Tag.Inflection.Temporary.ObligatoryResult, - Tag.Inflection.Temporary.ObligatoryParticle, + Tag.Temporary.ObligatoryResult, + Tag.Temporary.ObligatoryParticle, ].includes(lastTag as any)) continue; // normalize multiple Inflection.Negative to single Inflection.Affirmative or Inflection.Negative @@ -220,12 +220,12 @@ export function parseTags(input: string) { continue; } - filteredTags.push(tag); + filteredTags.unshift(tag); } // negative + と without resulting action = implicit affirmative obligatory - if (filteredTags.includes(Tag.Inflection.Temporary.ObligatoryParticle) && - !filteredTags.includes(Tag.Inflection.Temporary.ObligatoryResult)) { + if (filteredTags.includes(Tag.Temporary.ObligatoryParticle) && + !filteredTags.includes(Tag.Temporary.ObligatoryResult)) { negationCount = 0; // -> make resulting tags affirmative } @@ -233,8 +233,8 @@ export function parseTags(input: string) { filteredTags.push(negationCount % 2 == 0 ? Tag.Inflection.Affirmative : Tag.Inflection.Negative); // filter any remaining temporary tags - type tempTag = typeof Tag.Inflection.Temporary[keyof typeof Tag.Inflection.Temporary]; - filteredTags = filteredTags.filter(t => !Object.values(Tag.Inflection.Temporary).includes(t as tempTag)); + type tempTag = typeof Tag.Temporary[keyof typeof Tag.Temporary]; + filteredTags = filteredTags.filter(t => !Object.values(Tag.Temporary).includes(t as tempTag)); // filter any duplicates return filteredTags.set().arr() as TokenTags; diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts index e7a987e..df8d893 100644 --- a/test/deinflection/cases.ts +++ b/test/deinflection/cases.ts @@ -75,7 +75,8 @@ export default [ { input: "聞きなさい", mustHave: [ Inflection.Polite.Nasai ], mustNotHave: [], }, { input: "座りなさい", mustHave: [ Inflection.Polite.Nasai ], mustNotHave: [], }, { input: "食べさせられる", mustHave: [ Inflection.Passive, Inflection.Causative ], mustNotHave: [], }, - { input: "見極めなければならない", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Conditional.Ba, Inflection.Negative ] } + { input: "見極めなければならない", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Conditional.Ba, Inflection.Negative ] }, + { input: "ならなきゃいけない", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Conditional.Ba, Inflection.Negative ] }, // TODO: りゃ for いることは // TODO: じゃ for では // and more! diff --git a/test/deinflection/test.ts b/test/deinflection/test.ts index 291ed83..fac757e 100644 --- a/test/deinflection/test.ts +++ b/test/deinflection/test.ts @@ -1,6 +1,7 @@ import cases from "./cases.ts"; import { core } from '../base.ts'; -import { TokenTag } from "../../search/tags.ts"; +import { Tag, TokenTag } from "../../search/tags.ts"; +import { recursiveValues } from "../../util/object.ts"; cases.forEach(({ input, mustHave, mustNotHave, force }) => { Deno.test(`deinflection - ${input}`, async () => { @@ -18,16 +19,21 @@ cases.forEach(({ input, mustHave, mustNotHave, force }) => { if (!result) throw new Error("No deconjugation found for input"); + function bail(msg: string) { + console.log(` wanted tags: ${mustHave.join(" + ")}`); + console.log(`unwanted tags: ${mustNotHave.join(" + ")}`); + console.log(`actual result: ${result.writing} + ${result.tags.filter(tag => recursiveValues(Tag.Inflection).includes(tag) && !recursiveValues(Tag.Inflection.Reason).includes(tag)).join(" + ")}`); + throw new Error(msg); + } + let tag: TokenTag; for (tag of mustHave) if (!result.tags.includes(tag)) - throw new Error(`Deconjugation doesn't include required tag ${tag}`); + return bail(`Deconjugation doesn't include required tag ${tag}`); for (tag of mustNotHave) if (result.tags.includes(tag)) - throw new Error(`Deconjugation includes unallowed tag ${tag}`); - - // console.log(result.writing + " + " + result.tags.filter(tag => tag.startsWith("infl:") && !tag.startsWith("infl:reason:")).join(" + ")); + return bail(`Deconjugation includes unallowed tag ${tag}`); }); }) diff --git a/util/object.ts b/util/object.ts new file mode 100644 index 0000000..fc25f50 --- /dev/null +++ b/util/object.ts @@ -0,0 +1,12 @@ +export function recursiveValues(obj: { [k: string]: any }): any[] { + let values = []; + for (let key in obj) { + let val = obj[key]; + if (typeof val === "object") { + values.push(...recursiveValues(val)); + } else { + values.push(val); + } + } + return values; +} |