aboutsummaryrefslogtreecommitdiff
path: root/language
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-06-29 11:33:23 +0200
committerlonkaars <loek@pipeframe.xyz>2023-06-29 11:33:23 +0200
commitc998e1c0477d51c886f9e4246e102dec4d7ef8dd (patch)
tree4d979c57f16b138ff4b2ce5fb3151ce241af6881 /language
parent67dbb6421976254658c5e38045513129dd18187a (diff)
add jmdict importer to repo
Diffstat (limited to 'language')
-rw-r--r--language/tags.ts16
1 files changed, 15 insertions, 1 deletions
diff --git a/language/tags.ts b/language/tags.ts
index 4c1f134..d56ce98 100644
--- a/language/tags.ts
+++ b/language/tags.ts
@@ -5,7 +5,7 @@ export const Tag = {
/** @constant verb subgroup */
Verb: {
/** @constant any verb (fallback for vague dictionaries) */
- Unspecified: "class:verb",
+ Unspecified: "class:verb", // TODO: deprecate this property and implement verb classifier in ../import/util.ts
/** @constant noun that can be conjugated into a verb by adding する */
Suru: "class:verb:suru",
/**
@@ -100,3 +100,17 @@ export type TokenTag = string; // no way around it
export type TokenTags = Set<TokenTag>;
+/** @summary parse concatenated tag string to TokenTags */
+export function parseTags(input: string) {
+ var tags = input.replaceAll(/ +/g, " ").trim().split(" ") as TokenTag[];
+ var filteredTags: TokenTag[] = [];
+ for (var tag of tags) {
+ // skip past tense tags after -te and -tari deinflection
+ if (tag == Tag.Inflection.Tense.Past &&
+ filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue;
+
+ filteredTags.push(tag);
+ }
+ return new Set(filteredTags) as TokenTags;
+}
+