diff options
Diffstat (limited to 'language')
| -rw-r--r-- | language/tags.ts | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/language/tags.ts b/language/tags.ts index 4c1f134..d56ce98 100644 --- a/language/tags.ts +++ b/language/tags.ts @@ -5,7 +5,7 @@ export const Tag = { /** @constant verb subgroup */ Verb: { /** @constant any verb (fallback for vague dictionaries) */ - Unspecified: "class:verb", + Unspecified: "class:verb", // TODO: deprecate this property and implement verb classifier in ../import/util.ts /** @constant noun that can be conjugated into a verb by adding する */ Suru: "class:verb:suru", /** @@ -100,3 +100,17 @@ export type TokenTag = string; // no way around it export type TokenTags = Set<TokenTag>; +/** @summary parse concatenated tag string to TokenTags */ +export function parseTags(input: string) { + var tags = input.replaceAll(/ +/g, " ").trim().split(" ") as TokenTag[]; + var filteredTags: TokenTag[] = []; + for (var tag of tags) { + // skip past tense tags after -te and -tari deinflection + if (tag == Tag.Inflection.Tense.Past && + filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue; + + filteredTags.push(tag); + } + return new Set(filteredTags) as TokenTags; +} + |