summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Yatskov <alex@foosoft.net>2016-03-20 20:34:50 -0700
committerAlex Yatskov <alex@foosoft.net>2016-03-20 20:34:50 -0700
commitf784462c4dd63d7241cdb31b56761db3b5c7bf25 (patch)
treea957c6d1305afe380a7a2d0c2f6e9f6d7a0e29be
parent1ed2e70c6ff9e4a6af421444e50bba41950c2210 (diff)
WIP
-rw-r--r--ext/jp/translator.js122
1 files changed, 76 insertions, 46 deletions
diff --git a/ext/jp/translator.js b/ext/jp/translator.js
index 23f45113..e86ea238 100644
--- a/ext/jp/translator.js
+++ b/ext/jp/translator.js
@@ -51,63 +51,52 @@ class Translator {
}
findTerm(text) {
- const groups = {}
+ const groups = {};
for (let i = text.length; i >= 0; --i) {
const term = text.slice(0, i);
- const deinflections = this.deinflector.deinflect(term, this.validator);
- if (deinflections === null) {
+ const dfs = this.deinflector.deinflect(term, this.validator);
+ if (dfs === null) {
this.processTerm(groups, term);
} else {
- for (const deinflection of deinflections) {
- //fix
- //this.processTerm(groups, **deinflection);
+ for (const df of dfs) {
+ this.processTerm(groups, df.source, df.rules, df.root);
}
}
- const results =
- }
-
-
- // text = util.sanitize(text, wildcards=wildcards)
-
- // groups = dict()
- // for i in xrange(len(text), 0, -1):
- // term = text[:i]
- // deinflections = self.deinflector.deinflect(term, self.validator)
- // if deinflections is None:
- // self.processTerm(groups, term, wildcards=wildcards)
- // else:
- // for deinflection in deinflections:
- // self.processTerm(groups, **deinflection)
+ const results = formatResults(groups).sort(resultSorter);
- // results = map(self.formatResult, groups.items())
- // results = filter(operator.truth, results)
- // results = sorted(results, key=lambda d: (len(d['source']), 'P' in d['tags'], -len(d['rules'])), reverse=True)
-
- // length = 0
- // for result in results:
- // length = max(length, len(result['source']))
+ let length = 0;
+ for (const result of results) {
+ length = Math.max(length, result.source.length);
+ }
- // return results, length
+ return {results: results, length: length};
+ }
}
findKanji(text) {
- // text = util.sanitize(text, kana=False)
- // results = list()
-
- // processed = dict()
- // for c in text:
- // if c not in processed:
- // match = self.dictionary.findCharacter(c)
- // if match is not None:
- // results.append(match)
- // processed[c] = match
-
- // return results
+ let results = [];
+
+ const processed = {};
+ for (const c of text) {
+ if (!processed.has(c)) {
+ results = results.concat(this.dictionary.findKanji(c));
+ processed[c] = true;
+ }
+ }
+
+ return results;
}
processTerm(groups, source, rules=[], root='') {
+ root = root || source;
+
+ // for (const entry of this.dictionary.findTerm(root)) {
+ // const key =
+
+ // }
+
// root = root or source
// for entry in self.dictionary.findTerm(root, wildcards):
@@ -117,12 +106,53 @@ class Translator {
}
formatResult(group) {
- // root = root or source
+ const results = [];
+ for (const [key, value] of groups) {
+ [expression, reading, glossary] = key;
+ [tags, source, rules] = group;
+
+ results.push({
+ expression: expression,
+ reading: reading,
+ glossary: glossary,
+ rules: rules,
+ source: source,
+ tags: tags
+ });
+ }
- // for entry in self.dictionary.findTerm(root, wildcards):
- // key = entry['expression'], entry['reading'], entry['glossary']
- // if key not in groups:
- // groups[key] = entry['tags'], source, rules
+ return results;
+ }
+
+ resultSorter(v1, v2) {
+ const sl1 = v1.source.length;
+ const sl2 = v2.source.length;
+
+ if (sl1 > sl2) {
+ return -1;
+ } else if (sl1 > sl2) {
+ return 1;
+ }
+
+ const p1 = v1.tags.indexOf('P') >= 0;
+ const p2 = v2.tags.indexOf('P') >= 0;
+
+ if (p1 && !p2) {
+ return -1;
+ } else if (!p1 && p2) {
+ return 1;
+ }
+
+ const rl1 = v1.rules.length;
+ const rl2 = v2.rules.length;
+
+ if (rl1 < rl2) {
+ return -1;
+ } else if (rl2 > rl1) {
+ return 1;
+ }
+
+ return 0;
}
validator(term) {