import type { JMdict } from "npm:@scriptin/jmdict-simplified-types"; // this script is very messy right now, and doesn't transfer all information // present in the dictionary. // // proceed with caution const LANG = "eng"; // no simple way to do this on non-unix using Deno.stdin const input = await Deno.readFile("/dev/stdin"); const jmdict = JSON.parse(new TextDecoder().decode(input)) as JMdict; // TODO: more tags const tagLookup = { ["misc/uk"]: "aux:uk", ["class/adv"]: "class:adverb", ["class/vs"]: "class:verb:suru", ["class/v1"]: "class:verb:ru", ["class/v5"]: "class:verb:u", ["class/n"]: "class:noun", ["class/suf"]: "class:suffix", ["class/prt"]: "class:part", ["class/exp"]: "class:expr", } as { [map: string]: string }; console.log(`.param set :dict 'jmdict_${LANG}'`); console.log(".param set :lang 'en'"); // TODO: separate term and glossary tags console.log("insert into ingest(expression, reading, term_tags, glossary_sort, glossary) values"); // var max = -100; for (let i = 0; i < jmdict.words.length; i++) { // max++; // if (max < 0) continue; // if (max > 400) break; let term = jmdict.words[i]; let last = i == jmdict.words.length - 1; // TODO: properly resolve appliesToKanji/appliesToKana var definitions = term.sense .filter(s => s.gloss[0].lang == LANG) .map(s => s.gloss.map(g => g.text).join(", ")); if (definitions.length == 0) continue; var reading = term.kana[0].text; if (term.kanji.length == 0) term.kanji = term.kana; var writing = term.kanji[0].text; var other_writings = term.kanji.filter(e => e.text != writing).map(e => e.text); var tags = [... new Set([ ...term.sense.map(s => s.field).reduce((acc, current) => [...acc, ...current], []).map(i => `field/${i}`), ...term.sense.map(s => s.dialect).reduce((acc, current) => [...acc, ...current], []).map(i => `dialect/${i}`), ...term.sense.map(s => s.misc).reduce((acc, current) => [...acc, ...current], []).map(i => `misc/${i}`), ...term.sense.map(s => s.partOfSpeech).reduce((acc, current) => [...acc, ...current], []).map(i => `class/${i}`), ])]; var tags = tags.filter(i => i in tagLookup).map(i => tagLookup[i]); for (let j = 0; j < definitions.length; j++) { var out = `\t('${writing}', '${reading}', '${tags.join(" ")}', ${j+1}, '${definitions[j].replaceAll("'", "''")}')${(last && j == definitions.length-1) ? ';' : ','}`; if (j == 0 && other_writings.length > 0) out += ` -- TODO: alts: ${other_writings.join(", ")}`; console.log(out); } }