diff options
author | lonkaars <loek@pipeframe.xyz> | 2023-07-15 21:52:57 +0200 |
---|---|---|
committer | lonkaars <loek@pipeframe.xyz> | 2023-07-15 21:52:57 +0200 |
commit | 8e179a43e909ce4683f753a90bb3505630f05ad8 (patch) | |
tree | 5e46594af33ba7f82d1bd5ea954b99b4a92d0093 /import/jmdict | |
parent | 3dc9484fc81db8f3c8ffd4ebb4bab042e66c6214 (diff) |
implement alternate writings (failing tests down to 500)
Diffstat (limited to 'import/jmdict')
-rw-r--r-- | import/jmdict/jmdict.ts | 28 |
1 files changed, 20 insertions, 8 deletions
diff --git a/import/jmdict/jmdict.ts b/import/jmdict/jmdict.ts index 155c423..bf3614e 100644 --- a/import/jmdict/jmdict.ts +++ b/import/jmdict/jmdict.ts @@ -1,10 +1,15 @@ import type { JMdict } from "npm:@scriptin/jmdict-simplified-types"; -import { Tag } from "../../language/tags.ts"; +import { Tag } from "../../search/tags.ts"; +import "../../util/string.ts"; // this script is very messy right now, and doesn't transfer all information // present in the dictionary. // // proceed with caution +// +// TODO: separate term and glossary tags +// TODO: dictionary normalization (numbers/half-width/長音符) +// TODO: use sql synthesis library instead of garbo format strings const LANG = "eng"; @@ -47,8 +52,8 @@ const tagLookup = { console.log(`.param set :dict 'jmdict_${LANG}'`); console.log(".param set :lang 'en'"); -// TODO: separate term and glossary tags -console.log("insert into ingest(expression, reading, term_tags, glossary_sort, glossary) values"); +var alts = "insert into alts(expression, reading, normal_expression, normal_reading) values\n"; +var ingest = "insert into ingest(expression, reading, term_tags, glossary_sort, glossary) values\n" // var max = -100; for (let i = 0; i < jmdict.words.length; i++) { @@ -75,10 +80,17 @@ for (let i = 0; i < jmdict.words.length; i++) { ])]; var tags = tags.filter(i => i in tagLookup).map(i => tagLookup[i]); // if (writing == "来る") console.log(term); - for (let j = 0; j < definitions.length; j++) { - var out = `\t('${writing}', '${reading}', '${tags.join(" ")}', ${j+1}, '${definitions[j].replaceAll("'", "''")}')${(last && j == definitions.length-1) ? ';' : ','}`; - if (j == 0 && other_writings.length > 0) out += ` -- TODO: alts: ${other_writings.join(", ")}`; - console.log(out); - } + definitions.forEach((definition, j) => { + ingest += `\t('${writing}', '${reading}', '${tags.join(" ")}', ${j+1}, '${definition.replaceAll("'", "''")}'),\n`; + }); + other_writings.forEach(alt => { + alts += `\t('${alt}', '${reading}', '${writing}', '${reading}'),\n`; + }); } +ingest = ingest.replaceLast(",", ";"); +alts = alts.replaceLast(",", ";"); + +console.log(ingest); +console.log(alts); + |