aboutsummaryrefslogtreecommitdiff
path: root/import/jmdict
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-07-15 21:52:57 +0200
committerlonkaars <loek@pipeframe.xyz>2023-07-15 21:52:57 +0200
commit8e179a43e909ce4683f753a90bb3505630f05ad8 (patch)
tree5e46594af33ba7f82d1bd5ea954b99b4a92d0093 /import/jmdict
parent3dc9484fc81db8f3c8ffd4ebb4bab042e66c6214 (diff)
implement alternate writings (failing tests down to 500)
Diffstat (limited to 'import/jmdict')
-rw-r--r--import/jmdict/jmdict.ts28
1 files changed, 20 insertions, 8 deletions
diff --git a/import/jmdict/jmdict.ts b/import/jmdict/jmdict.ts
index 155c423..bf3614e 100644
--- a/import/jmdict/jmdict.ts
+++ b/import/jmdict/jmdict.ts
@@ -1,10 +1,15 @@
import type { JMdict } from "npm:@scriptin/jmdict-simplified-types";
-import { Tag } from "../../language/tags.ts";
+import { Tag } from "../../search/tags.ts";
+import "../../util/string.ts";
// this script is very messy right now, and doesn't transfer all information
// present in the dictionary.
//
// proceed with caution
+//
+// TODO: separate term and glossary tags
+// TODO: dictionary normalization (numbers/half-width/長音符)
+// TODO: use sql synthesis library instead of garbo format strings
const LANG = "eng";
@@ -47,8 +52,8 @@ const tagLookup = {
console.log(`.param set :dict 'jmdict_${LANG}'`);
console.log(".param set :lang 'en'");
-// TODO: separate term and glossary tags
-console.log("insert into ingest(expression, reading, term_tags, glossary_sort, glossary) values");
+var alts = "insert into alts(expression, reading, normal_expression, normal_reading) values\n";
+var ingest = "insert into ingest(expression, reading, term_tags, glossary_sort, glossary) values\n"
// var max = -100;
for (let i = 0; i < jmdict.words.length; i++) {
@@ -75,10 +80,17 @@ for (let i = 0; i < jmdict.words.length; i++) {
])];
var tags = tags.filter(i => i in tagLookup).map(i => tagLookup[i]);
// if (writing == "来る") console.log(term);
- for (let j = 0; j < definitions.length; j++) {
- var out = `\t('${writing}', '${reading}', '${tags.join(" ")}', ${j+1}, '${definitions[j].replaceAll("'", "''")}')${(last && j == definitions.length-1) ? ';' : ','}`;
- if (j == 0 && other_writings.length > 0) out += ` -- TODO: alts: ${other_writings.join(", ")}`;
- console.log(out);
- }
+ definitions.forEach((definition, j) => {
+ ingest += `\t('${writing}', '${reading}', '${tags.join(" ")}', ${j+1}, '${definition.replaceAll("'", "''")}'),\n`;
+ });
+ other_writings.forEach(alt => {
+ alts += `\t('${alt}', '${reading}', '${writing}', '${reading}'),\n`;
+ });
}
+ingest = ingest.replaceLast(",", ";");
+alts = alts.replaceLast(",", ";");
+
+console.log(ingest);
+console.log(alts);
+