diff options
| -rw-r--r-- | language/parser.ts | 6 | ||||
| -rw-r--r-- | language/readme.md | 4 | ||||
| -rw-r--r-- | language/tags.ts | 23 | ||||
| -rw-r--r-- | readme.md | 9 | ||||
| -rw-r--r-- | test/deinflection/cases.ts | 8 | ||||
| -rw-r--r-- | test/reading/cases.ts | 10 | 
6 files changed, 40 insertions, 20 deletions
diff --git a/language/parser.ts b/language/parser.ts index 6398595..7fd3981 100644 --- a/language/parser.ts +++ b/language/parser.ts @@ -30,7 +30,7 @@ export default class Parser {  			depth: optional?.depth ?? ParseDepth.Term,  			priorityMod: {  				high: optional?.priorityMod?.high ?? 10, -				low: optional?.priorityMod?.low ?? 0.1, +				low: optional?.priorityMod?.low ?? -10,  			},  			breaks: optional?.breaks ?? [],  		} @@ -95,12 +95,12 @@ export default class Parser {  				// give higher priority to suffixes when last token was a name, else lower priority  				if (result.tags.includes(Tag.Class.Suffix)) -					result.sort *= lastTokenName ? options.priorityMod.high : options.priorityMod.low; +					result.sort += lastTokenName ? options.priorityMod.high : options.priorityMod.low;  				// give lower priority to terms matched only by their readings, and are  				// usually written in kanji  				if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.kanji) -					result.sort *= options.priorityMod.low; +					result.sort += options.priorityMod.low;  				return result;  			}); diff --git a/language/readme.md b/language/readme.md index c889c9d..99a7d69 100644 --- a/language/readme.md +++ b/language/readme.md @@ -30,7 +30,7 @@ Some tag classes impact the parser's behavior. For example, the input text  to deconjugate a noun with the verb 「する」 back into the stem.  Other uses of this behavior include more accurate automatic kanji reading -generation, for example 「城」 b:ing read as 「じょう」 in 「ハイラル城」 +generation, for example 「城」 being read as 「じょう」 in 「ハイラル城」  because 「ハイラル」 has the tag `name:place` in the database, and  「城(じょう)」 has `class:suffix`, while 「城(しろ)」 has `class:noun`. @@ -44,7 +44,7 @@ instead of 「トト湖(こ)」 as an expression to fix the reading of the kanji  If Yomikun doesn't generate the correct reading, and the reading isn't based on  natural language context (=a computer *could* accurately decide which reading  is correct based on other words/tags in the sentence), please submit a pull -request with the sentence and it's (expected) reading. An example of a +request with the sentence and its (expected) reading. An example of a  non-deterministic reading is 「何」 in the sentence 「何できた?」 which can be  read as both 「なん」 in which case 「何で」 turns into a single word, or  「なに」 where 「何」 is a regular word and 「で」 is particle. diff --git a/language/tags.ts b/language/tags.ts index 4205e72..7f5757f 100644 --- a/language/tags.ts +++ b/language/tags.ts @@ -6,18 +6,21 @@ export const Tag = {  	Class: {  		/** @constant verb subgroup */  		Verb: { -			/** @constant noun that can be conjugated into a verb by adding する */ +			/** @constant noun that can be conjugated into a verb by adding する and する itself */  			Suru: "class:verb:suru",  			/** -			 * @constant verb stored as conjugated noun in database +			 * @constant verb stored as conjugated noun in database (nominal verb)  			 * -			 * some dictionaries do this, also used internally to represent -			 * conjugation if found for suru-verb +			 * @deprecated The use of conjugated forms in dictionaries is discouraged. +			 * +			 * This tag is added by the deconjugation code to check for a legal +			 * deconjugation if する has been deconjugated away for a word marked +			 * suru-verb.  			 */  			SuruIncluded: "class:verb:suru-included", -			/** @constant godan verbs (〜う in [taekim]) */ +			/** @constant 〜う verbs in [taekim] (godan) */  			U: "class:verb:u", -			/** @constant ichidan verbs (〜る in [taekim]) */ +			/** @constant 〜る verbs in [taekim] (ichidan) */  			Ru: "class:verb:ru",  			/** @constant kuru (来る) */  			Kuru: "class:verb:kuru", @@ -101,12 +104,14 @@ export const Tag = {  				Na: "infl:reason:adj:na",  			},  		}, -		/** @constant passive form (e.g. 言われる) */ +		/** @constant makes a verb usable without specifying who carries it out (e.g. 言われる) */  		Passive: "infl:passive",  		/** @constant indicates that a verb *can* happen (e.g. 落ちられる) */  		Potential: "infl:potential", -		Causative: "infl:causative", // TODO: jsdoc this -		Imperative: "infl:imperative", +		/** @constant indicates that someone makes a verb happen (e.g. ⾷べさせる) */ +		Causative: "infl:causative", +		/** @constant imperative form (e.g. 聞け) */ +		Command: "infl:command",  	},  	/** @constant uncategorized tags */  	Auxiliary: { @@ -52,9 +52,12 @@ NONE OF THESE ARE IMPLEMENTED YET  Some general documentation is done in markdown, but other general documentation  should be done in JSDoc format in the corresponding code files. The -documentation also makes frequent references to [Tae Kim's Japanese grammar -guide][taekim], which is abbreviated to [taekim] instead of copying the link -into the source code each time. +documentation also makes frequent references to, and uses terminology from [Tae +Kim's Japanese grammar guide][taekim], which is abbreviated to [taekim] instead +of copying the link into the source code each time. Tae Kim uses slightly +different terms for grammatical concepts. The 'Tae Kim-version' of these terms +is used for named constants in code. See [tags.ts](language/tags.ts) for an +overview of relevant grammatical terms for the Yomikun parser.  ## The dream diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts index 04df2bd..4bff5e3 100644 --- a/test/deinflection/cases.ts +++ b/test/deinflection/cases.ts @@ -25,7 +25,11 @@ export default [  	{ input: "取らせない", tags: [ Inflection.Negative, Inflection.Causative ], },  	{ input: "取らせられる", tags: [ Inflection.Causative, Inflection.Passive ], },  	{ input: "取らせられない", tags: [ Inflection.Negative, Inflection.Causative, Inflection.Passive ], }, -	{ input: "取れ", tags: [ Inflection.Imperative ], }, -	{ input: "取るな", tags: [ Inflection.Negative, Inflection.Imperative ], }, +	{ input: "取れ", tags: [ Inflection.Command ], }, +	{ input: "取るな", tags: [ Inflection.Negative, Inflection.Command ], }, +	// TODO: りゃ for いることは +	// TODO: じゃ for では +	// TODO: なきゃ + なくちゃ +	// and more!  ] satisfies Test[]; diff --git a/test/reading/cases.ts b/test/reading/cases.ts index 9fe916f..e6b0787 100644 --- a/test/reading/cases.ts +++ b/test/reading/cases.ts @@ -6,6 +6,7 @@ interface Test {  };  export default [ +	// BEGIN BULK IMPORT (ANKI)  	{  		input: "家の主をなめるなよ…",  		reading: "うちのあるじをなめるなよ…", @@ -9461,5 +9462,12 @@ export default [  		reading: "かゆいところはないかな?",  		output: "かゆいところはないかな?",  		tags: [ "お兄ちゃんはおしまい!" ] -	} +	}, +	// END BULK IMPORT (ANKI) +	{ // お兄ちゃんはおしまい! episode 09 @ 06:46 +		input: "ええ~ デート?\n違わい!", +		reading: "ええ~ デート?\nちがわい!", +		output: "ええ~ デート?\n[違](ちが)わい!", +		tags: [ "お兄ちゃんはおしまい!" ] +	},  ] satisfies Test[];  |