aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-07-11 13:01:41 +0200
committerlonkaars <loek@pipeframe.xyz>2023-07-11 13:01:41 +0200
commite99ae80f7adc0f0e677381c3cc1549235d3877ab (patch)
tree57362ddfa0ee2704cf7042d72559c479283ea1df
parent479836dbf3c7cc6e5940abe698ccc5e1d7b440c7 (diff)
small cleanup
-rw-r--r--api/word.ts72
-rw-r--r--db/dict/deinflections.sql16
-rw-r--r--search/tags.ts30
-rw-r--r--test/deinflection/cases.ts3
-rw-r--r--test/deinflection/test.ts16
-rw-r--r--util/object.ts12
6 files changed, 89 insertions, 60 deletions
diff --git a/api/word.ts b/api/word.ts
index 4c09cff..2e07c98 100644
--- a/api/word.ts
+++ b/api/word.ts
@@ -1,47 +1,57 @@
import Glossary from "./glossary.ts";
import APIBase from "./base.ts";
import Japanese, { JapaneseFormatter } from "./japanese.ts";
-
import "../util/string.ts";
+import "../util/object.ts";
import { Tag, TagGroup, TokenTags } from "../search/tags.ts";
import { SearchWord } from "../search/types.ts";
+import { recursiveValues } from "../util/object.ts";
// irregular stems taken from <https://en.wikipedia.org/wiki/Japanese_irregular_verbs#suru_and_kuru>
function irregularSuruStem(tags: TokenTags): string {
- if (tags.anyOf([
- Tag.Inflection.Polite.Masu,
- Tag.Inflection.Suffix.Te,
- Tag.Inflection.Tense.Past,
- Tag.Inflection.Desirable.Itai, // part of Wikipedia's -ta form
- Tag.Inflection.Negative,
- Tag.Inflection.Desirable.Volitional,
- Tag.Inflection.Command,
- ])) return "し";
- if (tags.anyOf([
- Tag.Inflection.Passive,
- Tag.Inflection.Causative,
- ])) return "さ";
- if (tags.anyOf([
- Tag.Inflection.Potential,
- ])) return "でき";
+ for (let i = 0, tag = tags[i]; i < tags.length; i++, tag = tags[i]) {
+ if (!recursiveValues(Tag.Inflection).includes(tag)) continue;
+ if (recursiveValues(Tag.Inflection.Reason).includes(tag)) continue;
+ if ([
+ Tag.Inflection.Polite.Masu,
+ Tag.Inflection.Suffix.Te,
+ Tag.Inflection.Tense.Past,
+ Tag.Inflection.Desirable.Itai, // part of Wikipedia's -ta form
+ Tag.Inflection.Negative,
+ Tag.Inflection.Desirable.Volitional,
+ Tag.Inflection.Command,
+ ].includes(tag as any)) return "し";
+ if ([
+ Tag.Inflection.Passive,
+ Tag.Inflection.Causative,
+ ].includes(tag as any)) return "さ";
+ // wikipedia has できる as the potential form for する, but できる here
+ // means it's already foobar'd
+ break;
+ }
return "す";
}
function irregularKuruStem(tags: TokenTags): string {
- if (tags.anyOf([
- Tag.Inflection.Polite.Masu,
- Tag.Inflection.Suffix.Te,
- Tag.Inflection.Tense.Past,
- Tag.Inflection.Desirable.Itai, // part of Wikipedia's -ta form
- ])) return "き";
- if (tags.anyOf([
- Tag.Inflection.Negative,
- Tag.Inflection.Desirable.Volitional,
- Tag.Inflection.Passive,
- Tag.Inflection.Causative,
- Tag.Inflection.Potential,
- Tag.Inflection.Command,
- ])) return "こ";
+ for (let i = 0, tag = tags[i]; i < tags.length; i++, tag = tags[i]) {
+ if (!recursiveValues(Tag.Inflection).includes(tag)) continue;
+ if (recursiveValues(Tag.Inflection.Reason).includes(tag)) continue;
+ if ([
+ Tag.Inflection.Polite.Masu,
+ Tag.Inflection.Suffix.Te,
+ Tag.Inflection.Tense.Past,
+ Tag.Inflection.Desirable.Itai, // part of Wikipedia's -ta form
+ ].includes(tag as any)) return "き";
+ if ([
+ Tag.Inflection.Negative,
+ Tag.Inflection.Desirable.Volitional,
+ Tag.Inflection.Passive,
+ Tag.Inflection.Causative,
+ Tag.Inflection.Potential,
+ Tag.Inflection.Command,
+ ].includes(tag as any)) return "こ";
+ break;
+ }
return "く";
}
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql
index fd6ffc8..21a634d 100644
--- a/db/dict/deinflections.sql
+++ b/db/dict/deinflections.sql
@@ -157,16 +157,16 @@ insert into deinflection_temp values
-- obligation <https://guidetojapanese.org/learn/grammar/must>
-- TODO: manually write these out instead of splitting particle and suffix
- ('infl:negative infl:must infl:tmp:must:res', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated
- ('infl:must infl:tmp:must:res', 'いける', '', 'ru', 'ot'), -- はいけない -> positive (stored this way because obligatory could be in past)
- ('infl:must infl:tmp:must:res', 'なる', '', 'u', 'ot'), -- はならない -> positive
+ ('infl:negative infl:must tmp:infl:must:res', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated
+ ('infl:must tmp:infl:must:res', 'いける', '', 'ru', 'ot'), -- はいけない -> positive (stored this way because obligatory could be in past)
+ ('infl:must tmp:infl:must:res', 'なる', '', 'u', 'ot'), -- はならない -> positive
('infl:must', 'は', '', 'ot', 'nt'), -- removes particle (negative -te + は + だめ/いけない/ならない)
- ('infl:tmp:must:prt infl:must', 'と', '', 'ot', 'nt'), -- removes particle (negative + と + だめ/いけない/ならない)
+ ('tmp:infl:must:prt infl:must', 'と', '', 'ot', 'nt'), -- removes particle (negative + と + だめ/いけない/ならない)
-- ('infl:must', 'ば', 'ば', 'ot', 'a'), -- causes infinite loop
- ('infl:must infl:tmp:must:prt', 'なくちゃ', 'なくて', 'a', 'a'), -- colloquial abbreviation
- ('infl:must infl:tmp:must:prt', 'なきゃ', 'なければ', 'a', 'a'), -- colloquial abbreviation
- ('infl:tmp:must:prt', 'ちゃ', 'ては', 'a', 'a'), -- colloquial abbreviation
- ('infl:tmp:must:prt', 'じゃ', 'では', 'a', 'a'), -- colloquial abbreviation
+ ('infl:must tmp:infl:must:prt', 'なくちゃ', 'なくて', 'a', 'a'), -- colloquial abbreviation
+ ('infl:must tmp:infl:must:prt', 'なきゃ', 'なければ', 'a', 'a'), -- colloquial abbreviation
+ ('tmp:infl:must:prt', 'ちゃ', 'ては', 'a', 'a'), -- colloquial abbreviation
+ ('tmp:infl:must:prt', 'じゃ', 'では', 'a', 'a'), -- colloquial abbreviation
-- ~tai endings <https://guidetojapanese.org/learn/grammar/desire>
('infl:desire:itai', 'たい', 'る', 'i', 'ru'),
diff --git a/search/tags.ts b/search/tags.ts
index 22ea315..32ce02f 100644
--- a/search/tags.ts
+++ b/search/tags.ts
@@ -148,19 +148,19 @@ export const Tag = {
/** @constant 〜とする attempts (e.g. 入ろうとしている) */
ToSuru: "infl:attempt:tosuru",
},
- /** @constant temporary tags (removed by parseTags) */
- Temporary: {
- /** @constant particle of obligatory conjugation (e.g. 行かない*と*だめ), or colloquial abbreviation */
- ObligatoryParticle: "infl:tmp:must:prt",
- /** @constant resulting action part of obligatory conjugation (e.g. 行かないと*だめ*) */
- ObligatoryResult: "infl:tmp:must:res",
- },
},
/** @constant uncategorized tags */
Auxiliary: {
/** @constant word usually written using only kana (but also has kanji) */
UsuallyKana: "aux:uk",
},
+ /** @constant temporary tags (removed by parseTags) */
+ Temporary: {
+ /** @constant particle of obligatory conjugation (e.g. 行かない*と*だめ), or colloquial abbreviation */
+ ObligatoryParticle: "tmp:infl:must:prt",
+ /** @constant resulting action part of obligatory conjugation (e.g. 行かないと*だめ*) */
+ ObligatoryResult: "tmp:infl:must:res",
+ },
} as const;
export const TagGroup = {
@@ -198,7 +198,7 @@ export function parseTags(input: string) {
if (tag == Tag.Inflection.Suffix.Te && [
Tag.Inflection.Tense.Continuous, // base for continuous tense
Tag.Inflection.Obligatory, // base for obligatory inflection
- Tag.Inflection.Temporary.ObligatoryParticle, // base for obligatory inflection
+ Tag.Temporary.ObligatoryParticle, // base for obligatory inflection
Tag.Inflection.Attempt.Miru, // base for 〜みる attempt
].includes(lastTag as any)) continue;
@@ -210,8 +210,8 @@ export function parseTags(input: string) {
// skip conditional 〜ば if used for obligatory inflection
if (tag == Tag.Inflection.Conditional.Ba && [
Tag.Inflection.Obligatory,
- Tag.Inflection.Temporary.ObligatoryResult,
- Tag.Inflection.Temporary.ObligatoryParticle,
+ Tag.Temporary.ObligatoryResult,
+ Tag.Temporary.ObligatoryParticle,
].includes(lastTag as any)) continue;
// normalize multiple Inflection.Negative to single Inflection.Affirmative or Inflection.Negative
@@ -220,12 +220,12 @@ export function parseTags(input: string) {
continue;
}
- filteredTags.push(tag);
+ filteredTags.unshift(tag);
}
// negative + と without resulting action = implicit affirmative obligatory
- if (filteredTags.includes(Tag.Inflection.Temporary.ObligatoryParticle) &&
- !filteredTags.includes(Tag.Inflection.Temporary.ObligatoryResult)) {
+ if (filteredTags.includes(Tag.Temporary.ObligatoryParticle) &&
+ !filteredTags.includes(Tag.Temporary.ObligatoryResult)) {
negationCount = 0; // -> make resulting tags affirmative
}
@@ -233,8 +233,8 @@ export function parseTags(input: string) {
filteredTags.push(negationCount % 2 == 0 ? Tag.Inflection.Affirmative : Tag.Inflection.Negative);
// filter any remaining temporary tags
- type tempTag = typeof Tag.Inflection.Temporary[keyof typeof Tag.Inflection.Temporary];
- filteredTags = filteredTags.filter(t => !Object.values(Tag.Inflection.Temporary).includes(t as tempTag));
+ type tempTag = typeof Tag.Temporary[keyof typeof Tag.Temporary];
+ filteredTags = filteredTags.filter(t => !Object.values(Tag.Temporary).includes(t as tempTag));
// filter any duplicates
return filteredTags.set().arr() as TokenTags;
diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts
index e7a987e..df8d893 100644
--- a/test/deinflection/cases.ts
+++ b/test/deinflection/cases.ts
@@ -75,7 +75,8 @@ export default [
{ input: "聞きなさい", mustHave: [ Inflection.Polite.Nasai ], mustNotHave: [], },
{ input: "座りなさい", mustHave: [ Inflection.Polite.Nasai ], mustNotHave: [], },
{ input: "食べさせられる", mustHave: [ Inflection.Passive, Inflection.Causative ], mustNotHave: [], },
- { input: "見極めなければならない", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Conditional.Ba, Inflection.Negative ] }
+ { input: "見極めなければならない", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Conditional.Ba, Inflection.Negative ] },
+ { input: "ならなきゃいけない", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Conditional.Ba, Inflection.Negative ] },
// TODO: りゃ for いることは
// TODO: じゃ for では
// and more!
diff --git a/test/deinflection/test.ts b/test/deinflection/test.ts
index 291ed83..fac757e 100644
--- a/test/deinflection/test.ts
+++ b/test/deinflection/test.ts
@@ -1,6 +1,7 @@
import cases from "./cases.ts";
import { core } from '../base.ts';
-import { TokenTag } from "../../search/tags.ts";
+import { Tag, TokenTag } from "../../search/tags.ts";
+import { recursiveValues } from "../../util/object.ts";
cases.forEach(({ input, mustHave, mustNotHave, force }) => {
Deno.test(`deinflection - ${input}`, async () => {
@@ -18,16 +19,21 @@ cases.forEach(({ input, mustHave, mustNotHave, force }) => {
if (!result)
throw new Error("No deconjugation found for input");
+ function bail(msg: string) {
+ console.log(` wanted tags: ${mustHave.join(" + ")}`);
+ console.log(`unwanted tags: ${mustNotHave.join(" + ")}`);
+ console.log(`actual result: ${result.writing} + ${result.tags.filter(tag => recursiveValues(Tag.Inflection).includes(tag) && !recursiveValues(Tag.Inflection.Reason).includes(tag)).join(" + ")}`);
+ throw new Error(msg);
+ }
+
let tag: TokenTag;
for (tag of mustHave)
if (!result.tags.includes(tag))
- throw new Error(`Deconjugation doesn't include required tag ${tag}`);
+ return bail(`Deconjugation doesn't include required tag ${tag}`);
for (tag of mustNotHave)
if (result.tags.includes(tag))
- throw new Error(`Deconjugation includes unallowed tag ${tag}`);
-
- // console.log(result.writing + " + " + result.tags.filter(tag => tag.startsWith("infl:") && !tag.startsWith("infl:reason:")).join(" + "));
+ return bail(`Deconjugation includes unallowed tag ${tag}`);
});
})
diff --git a/util/object.ts b/util/object.ts
new file mode 100644
index 0000000..fc25f50
--- /dev/null
+++ b/util/object.ts
@@ -0,0 +1,12 @@
+export function recursiveValues(obj: { [k: string]: any }): any[] {
+ let values = [];
+ for (let key in obj) {
+ let val = obj[key];
+ if (typeof val === "object") {
+ values.push(...recursiveValues(val));
+ } else {
+ values.push(val);
+ }
+ }
+ return values;
+}