diff options
| author | lonkaars <loek@pipeframe.xyz> | 2023-07-09 18:00:33 +0200 | 
|---|---|---|
| committer | lonkaars <loek@pipeframe.xyz> | 2023-07-09 18:00:33 +0200 | 
| commit | 80b46d2f6152129f25f5734e4960cb7c15edfcf0 (patch) | |
| tree | 191c528e5ed18c6c85091b5dadb152231cb857aa | |
| parent | 1138ac8fc8764cf5cd987383a7a0332879be6cca (diff) | |
more deinflections + tests
| -rw-r--r-- | db/dict/deinflections.sql | 78 | ||||
| -rw-r--r-- | import/jmdict/jmdict.ts | 1 | ||||
| -rw-r--r-- | language/tags.ts | 56 | ||||
| -rw-r--r-- | test/deinflection/cases.ts | 32 | 
4 files changed, 142 insertions, 25 deletions
| diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql index 7d67b02..a4a35d5 100644 --- a/db/dict/deinflections.sql +++ b/db/dict/deinflections.sql @@ -142,23 +142,71 @@ insert into deinflection_temp values  	('infl:potential', 'ありえる', 'ある', 'a', ''), -- exception  	-- conditionals <https://guidetojapanese.org/learn/grammar/conditionals> -	('infl:cond:ba',   'えば', 'う', 'a', 'u'), -	('infl:cond:ba',   'けば', 'く', 'a', 'u'), -	('infl:cond:ba',   'げば', 'ぐ', 'a', 'u'), -	('infl:cond:ba',   'せば', 'す', 'a', 'u'), -	('infl:cond:ba',   'てば', 'つ', 'a', 'u'), -	('infl:cond:ba',   'ねば', 'ぬ', 'a', 'u'), -	('infl:cond:ba',   'べば', 'ぶ', 'a', 'u'), -	('infl:cond:ba',   'めば', 'む', 'a', 'u'), -	('infl:cond:ba',   'れば', 'る', 'a', 'u ru'), -	('infl:cond:ba', 'ければ', 'い', 'a', 'a'), +	('infl:cond:ba',   'えば', 'う', 'nt', 'u'), +	('infl:cond:ba',   'けば', 'く', 'nt', 'u'), +	('infl:cond:ba',   'げば', 'ぐ', 'nt', 'u'), +	('infl:cond:ba',   'せば', 'す', 'nt', 'u'), +	('infl:cond:ba',   'てば', 'つ', 'nt', 'u'), +	('infl:cond:ba',   'ねば', 'ぬ', 'nt', 'u'), +	('infl:cond:ba',   'べば', 'ぶ', 'nt', 'u'), +	('infl:cond:ba',   'めば', 'む', 'nt', 'u'), +	('infl:cond:ba',   'れば', 'る', 'nt', 'u ru'), +	('infl:cond:ba', 'ければ', 'い', 'nt', 'a'),  	-- TODO: 〜であれば (deconjugates to です i think?)  	('infl:cond:ra', 'ら', '', 'a', 'a'), -- TODO: constrain valid matches to after past conjugation?  	-- obligation <https://guidetojapanese.org/learn/grammar/must> -	('infl:must infl:negative', 'はだめ', '', 'a', 'a'), -- built-in negative because だめ can't be deconjugated -	('infl:must', 'はいける', '', 'a', 'a'), -- はいけない -> positive -	('infl:must', 'はなる', '', 'a', 'a'), -- はならない -> positive +	('infl:tmp:must:res infl:negative', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated +	('infl:tmp:must:res', 'いける', '', 'a', 'ot'), -- はいけない -> positive (stored this way because obligatory could be in past) +	('infl:tmp:must:res', 'なる', '', 'a', 'ot'), -- はならない -> positive +	('infl:must', 'は', '', 'ot', 'a'), -- removes particle (negative -te + は + だめ/いけない/ならない) +	('infl:tmp:must:prt infl:must', 'と', '', 'ot', 'a'), -- removes particle (negative + と + だめ/いけない/ならない) +	('infl:must', 'ば', 'ば', 'ot', 'a'), -- removes ot rule, keeps 〜ば (negative + ば + だめ/いけない/ならない) +	('infl:must infl:tmp:must:prt', 'なくちゃ', 'なくて', 'a', 'a'), -- colloquial abbreviation +	('infl:must infl:tmp:must:prt', 'なきゃ', 'なければ', 'a', 'a'), -- colloquial abbreviation +	('infl:tmp:must:prt', 'ちゃ', 'ては', 'a', 'a'), -- colloquial abbreviation +	('infl:tmp:must:prt', 'じゃ', 'では', 'a', 'a'), -- colloquial abbreviation + +	-- ~tai endings <https://guidetojapanese.org/learn/grammar/desire> +	('infl:desire:itai',   'たい', 'る', 'i', 'ru'), +	('infl:desire:itai', 'いたい', 'う', 'i', 'u'), +	('infl:desire:itai', 'きたい', 'く', 'i', 'u'), +	('infl:desire:itai', 'ぎたい', 'ぐ', 'i', 'u'), +	('infl:desire:itai', 'したい', 'す', 'i', 'u'), +	('infl:desire:itai', 'ちたい', 'つ', 'i', 'u'), +	('infl:desire:itai', 'にたい', 'ぬ', 'i', 'u'), +	('infl:desire:itai', 'びたい', 'ぶ', 'i', 'u'), +	('infl:desire:itai', 'みたい', 'む', 'i', 'u'), +	('infl:desire:itai', 'りたい', 'る', 'i', 'u'), +	('infl:desire:itai', 'したい', 'する', 'i', 's'), +	('infl:desire:itai', 'きたい', 'くる', 'i', 'k'), +	('infl:desire:itai', '来たい', '来る', 'i', 'k'), + +	-- volitional form <https://guidetojapanese.org/learn/grammar/desire> +	('infl:desire:volitional',   'よう',   'る', 'a', 'ru'), +	('infl:desire:volitional',   'おう',   'う', 'a', 'u'), +	('infl:desire:volitional',   'こう',   'く', 'a', 'u'), +	('infl:desire:volitional',   'ごう',   'ぐ', 'a', 'u'), +	('infl:desire:volitional',   'そう',   'す', 'a', 'u'), +	('infl:desire:volitional',   'とう',   'つ', 'a', 'u'), +	('infl:desire:volitional',   'のう',   'ぬ', 'a', 'u'), +	('infl:desire:volitional',   'ぼう',   'ぶ', 'a', 'u'), +	('infl:desire:volitional',   'もう',   'む', 'a', 'u'), +	('infl:desire:volitional',   'ろう',   'る', 'a', 'u'), +	('infl:desire:volitional', 'しよう', 'する', 'a', 's'), +	('infl:desire:volitional', 'こよう', 'くる', 'a', 'k'), +	('infl:desire:volitional', '来よう', '来る', 'a', 'k'), + +	-- polite volitional <https://guidetojapanese.org/learn/grammar/desire> +	('infl:desire:volitional', 'ましょう', 'ます', 'nt', 'nt'), + +	-- TODO: making suggestions using the 「ば」 or 「たら」 conditional + +	-- attempt (〜みる) <https://guidetojapanese.org/learn/grammar/try> +	('infl:attempt:miru', 'みる', '', 'ru', 'a'), + +	-- attempt (〜とする) <https://guidetojapanese.org/learn/grammar/try> +	('infl:attempt:tosuru', 'とする', '', 's', 'a'),  	-- auxiliary rules  	('class:verb:suru-included', 'する', '', 's', ''); -- deconjugate suru verbs into stem @@ -175,7 +223,9 @@ insert into rule_map values  	(null,                 'z',   1 << 4), -- ずる     (zuru)  	('infl:reason:adj:i',  'i',   1 << 5), -- 形容詞   (i-adjective)  	(null,                 'iru', 1 << 6), -- 〜いる   (temporary iru for progressive tense) -	('infl:reason:adj:na', 'na',  1 << 7); -- 形容動詞 (na-adjective) +	('infl:reason:adj:na', 'na',  1 << 7), -- 形容動詞 (na-adjective) +	(null,                 'ot',  1 << 8), --          (temporary rule for deconjugating obligatory endings) +	(null,                 'nt',  (1 << 6) - 1); --    (utility no temp, allows anything but temporaries)  -- add tags to db  insert into deinflection_rules (mask, tag) diff --git a/import/jmdict/jmdict.ts b/import/jmdict/jmdict.ts index 1d14ade..155c423 100644 --- a/import/jmdict/jmdict.ts +++ b/import/jmdict/jmdict.ts @@ -18,6 +18,7 @@ const tagLookup = {  	["misc/uk"]: Tag.Auxiliary.UsuallyKana,  	["class/adv"]: Tag.Class.Adverb,  	["class/vs"]: Tag.Class.Verb.Suru, +	["class/vs-i"]: Tag.Class.Verb.Suru,  	["class/v1"]: Tag.Class.Verb.Ru,  	["class/v5"]: Tag.Class.Verb.U,  	["class/v5k"]: Tag.Class.Verb.U, diff --git a/language/tags.ts b/language/tags.ts index 312a594..de1a3ea 100644 --- a/language/tags.ts +++ b/language/tags.ts @@ -131,7 +131,28 @@ export const Tag = {  			Ra: "infl:cond:ra",  		},  		/** @constant makes a verb obligatory (e.g. 入ってはいけない) */ -		Obliged: "infl:must", +		Obligatory: "infl:must", +		/** @constant verbs that someone wants to do / be done */ +		Desirable: { +			/** @constant 〜たい endings (e.g. 買いたい) */ +			Itai: "infl:desire:itai", +			/** @constant 〜おう endings (e.g. 寝よう) */ +			Volitional: "infl:desire:volitional", +		}, +		/** @constant makes a verb an attempt */ +		Attempt: { +			/** @constant 〜みる to try something out (e.g. 飲んでみた) */ +			Miru: "infl:attempt:miru", +			/** @constant 〜とする attempts (e.g. 入ろうとしている) */ +			ToSuru: "infl:attempt:tosuru", +		}, +		/** @constant temporary tags (removed by parseTags) */ +		Temporary: { +			/** @constant particle of obligatory conjugation (e.g. 行かない*と*だめ), or colloquial abbreviation */ +			ObligatoryParticle: "infl:tmp:must:prt", +			/** @constant resulting action part of obligatory conjugation (e.g. 行かないと*だめ*) */ +			ObligatoryResult: "infl:tmp:must:res", +		},  	},  	/** @constant uncategorized tags */  	Auxiliary: { @@ -165,12 +186,17 @@ export function parseTags(input: string) {  		// skip past tense tag if used as step for -te and -tari inflection  		if (tag == Tag.Inflection.Tense.Past &&  				filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue; -		// skip -te suffix tag if it's a base for continuous tense -		if (tag == Tag.Inflection.Suffix.Te && -				filteredTags.anyOf([Tag.Inflection.Tense.Continuous])) continue; -		// skip -te suffix tag if it's a base for obligatory inflection -		if (tag == Tag.Inflection.Suffix.Te && -				filteredTags.anyOf([Tag.Inflection.Obliged])) continue; + +		// skip -te suffix tag if used for +		if (tag == Tag.Inflection.Suffix.Te && filteredTags.anyOf([ +			Tag.Inflection.Tense.Continuous, // base for continuous tense +			Tag.Inflection.Obligatory, // base for obligatory inflection +			Tag.Inflection.Attempt.Miru, // base for 〜みる attempt +		])) continue; +	 +		// skip volitional tag if used for 〜とする attempt +		if (tag == Tag.Inflection.Desirable.Volitional && +				filteredTags.anyOf([Tag.Inflection.Attempt.ToSuru])) continue;  		// normalize multiple Inflection.Negative to single Inflection.Affirmative or Inflection.Negative  		if (tag == Tag.Inflection.Negative) { @@ -180,7 +206,21 @@ export function parseTags(input: string) {  		filteredTags.push(tag);  	} + +	// negative + と without resulting action = implicit affirmative obligatory +	if (filteredTags.includes(Tag.Inflection.Temporary.ObligatoryParticle) && +			!filteredTags.includes(Tag.Inflection.Temporary.ObligatoryResult)) { +		negationCount = 0; // -> make resulting tags affirmative +	} + +	// normalize affirmative/negative  	filteredTags.push(negationCount % 2 == 0 ? Tag.Inflection.Affirmative : Tag.Inflection.Negative); -	return filteredTags.set().arr() as TokenTags; // make sure array doesn't contain duplicates + +	// filter any remaining temporary tags +	type tempTag = typeof Tag.Inflection.Temporary[keyof typeof Tag.Inflection.Temporary]; +	filteredTags = filteredTags.filter(t => !Object.values(Tag.Inflection.Temporary).includes(t as tempTag)); + +	// filter any duplicates +	return filteredTags.set().arr() as TokenTags;  } diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts index 143e2a4..a54dba9 100644 --- a/test/deinflection/cases.ts +++ b/test/deinflection/cases.ts @@ -39,9 +39,35 @@ export default [  	{ input: "言われる", mustHave: [ Inflection.Passive ], mustNotHave: [], },  	{ input: "喋っている", mustHave: [ Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Suffix.Te ], },  	{ input: "泳げれば", mustHave: [ Inflection.Conditional.Ba ], mustNotHave: [], }, -	{ input: "取らなければ", mustHave: [ Inflection.Potential, Inflection.Negative ], mustNotHave: [], }, -	{ input: "入ってはいけない", mustHave: [ Inflection.Obliged, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, // obliged + negative = must not ~ -	{ input: "行かなくてはなりません", mustHave: [ Inflection.Obliged, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, // obliged + affirmative = must ~ +	{ input: "取らなければ", mustHave: [ Inflection.Conditional.Ba, Inflection.Negative ], mustNotHave: [], }, +	{ input: "入ってはいけない", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, // obliged + negative = must not ~ +	{ input: "行かなくてはなりません", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, // obliged + affirmative = must ~ +	{ input: "しなくちゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, +	{ input: "食べなきゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, +	{ input: "行かないと", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, +	{ input: "入っちゃだめ", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, +	{ input: "死んじゃだめ", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, +	{ input: "しなくてはいけなかった", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, +	{ input: "行かないとだめ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, +	{ input: "しないといけない", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, +	{ input: "行かなければいけません", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, +	{ input: "しなければだめ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, +	{ input: "行かないと", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, +	{ input: "買いたい", mustHave: [ Inflection.Desirable.Itai ], mustNotHave: [], }, +	{ input: "寝よう", mustHave: [ Inflection.Desirable.Volitional ], mustNotHave: [], }, +	{ input: "しましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], }, +	{ input: "きましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], }, +	{ input: "寝ましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], }, +	{ input: "行きましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], }, +	{ input: "遊びましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], }, +	{ input: "食べてみた", mustHave: [ Inflection.Attempt.Miru, Inflection.Tense.Past ], mustNotHave: [ Inflection.Suffix.Te ], }, +	{ input: "飲んでみました", mustHave: [ Inflection.Attempt.Miru, Inflection.Tense.Past, Inflection.Polite.Masu ], mustNotHave: [ Inflection.Suffix.Te ], }, +	{ input: "食べてみたい", mustHave: [ Inflection.Attempt.Miru, Inflection.Desirable.Itai ], mustNotHave: [ Inflection.Suffix.Te ], }, +	{ input: "切ってみなかった", mustHave: [ Inflection.Attempt.Miru, Inflection.Negative, Inflection.Tense.Past ], mustNotHave: [ Inflection.Suffix.Te ] }, +	{ input: "見ようとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], }, +	{ input: "行こうとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], }, +	{ input: "避けようとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], }, +	{ input: "入ろうとしている", mustHave: [ Inflection.Attempt.ToSuru, Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Desirable.Volitional ], },  	// TODO: りゃ for いることは  	// TODO: じゃ for では  	// TODO: なきゃ + なくちゃ |