diff options
author | lonkaars <loek@pipeframe.xyz> | 2023-06-29 11:33:23 +0200 |
---|---|---|
committer | lonkaars <loek@pipeframe.xyz> | 2023-06-29 11:33:23 +0200 |
commit | c998e1c0477d51c886f9e4246e102dec4d7ef8dd (patch) | |
tree | 4d979c57f16b138ff4b2ce5fb3151ce241af6881 /language | |
parent | 67dbb6421976254658c5e38045513129dd18187a (diff) |
add jmdict importer to repo
Diffstat (limited to 'language')
-rw-r--r-- | language/tags.ts | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/language/tags.ts b/language/tags.ts index 4c1f134..d56ce98 100644 --- a/language/tags.ts +++ b/language/tags.ts @@ -5,7 +5,7 @@ export const Tag = { /** @constant verb subgroup */ Verb: { /** @constant any verb (fallback for vague dictionaries) */ - Unspecified: "class:verb", + Unspecified: "class:verb", // TODO: deprecate this property and implement verb classifier in ../import/util.ts /** @constant noun that can be conjugated into a verb by adding する */ Suru: "class:verb:suru", /** @@ -100,3 +100,17 @@ export type TokenTag = string; // no way around it export type TokenTags = Set<TokenTag>; +/** @summary parse concatenated tag string to TokenTags */ +export function parseTags(input: string) { + var tags = input.replaceAll(/ +/g, " ").trim().split(" ") as TokenTag[]; + var filteredTags: TokenTag[] = []; + for (var tag of tags) { + // skip past tense tags after -te and -tari deinflection + if (tag == Tag.Inflection.Tense.Past && + filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue; + + filteredTags.push(tag); + } + return new Set(filteredTags) as TokenTags; +} + |