diff options
author | lonkaars <loek@pipeframe.xyz> | 2023-06-28 23:59:50 +0200 |
---|---|---|
committer | lonkaars <loek@pipeframe.xyz> | 2023-06-28 23:59:50 +0200 |
commit | 67dbb6421976254658c5e38045513129dd18187a (patch) | |
tree | 288b599d1097b26bdbcad3b6749b38e133017cf2 /db/dict/init.sql |
initial public commit
Diffstat (limited to 'db/dict/init.sql')
-rw-r--r-- | db/dict/init.sql | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/db/dict/init.sql b/db/dict/init.sql new file mode 100644 index 0000000..4e9fcc9 --- /dev/null +++ b/db/dict/init.sql @@ -0,0 +1,115 @@ +-- deinflection map (see dict/deinflections.sql) +create table if not exists deinflection ( + tag text not null, + kana_in text not null, + kana_out text not null, + rules_in int not null default 0, + rules_out int not null default 0 +); +create index deinflection_kana_in on deinflection (kana_in); + +-- lookup table for deinflection rule bitmasks +create table if not exists deinflection_rules ( + mask int not null default 0, + tag text not null +); + +-- dictionary metadata +create table if not exists dict ( + id integer primary key autoincrement, + tag text not null, -- TODO: dict.tag -> dict.tag_id + language text null default null, + priority int not null default 1 +); + +-- tag code<->id lookup +create table if not exists tag ( + id integer primary key autoincrement, + code text not null, + unique(code) on conflict ignore +); +create index tag_code on tag (code); + + +-- definition of term (with single meaning) +-- +-- terms that have multiple meanings should have multiple entries in the +-- definition table with the same term_id. glossary contains an explanation or +-- list of synonyms to illustrate the meaning of the term. +create table if not exists definition ( + id integer primary key autoincrement, + term_id int not null, -- reference to term (combined writing and reading) + sort int not null default 1, -- sort multiple meanings/glossaries within dictionary + -- search results for terms are grouped by term, then dictionary, then sort + -- within each dictionary + glossary text null default null, -- explanation or list of synonyms for term + dict_id int not null, -- origin dictionary of term + foreign key(term_id) references term(id), + foreign key(dict_id) references dict(id), + unique(glossary) on conflict ignore +); + +-- human-readable tag label lookup table (used in UI and for translations) +create table if not exists tag_label ( + id integer primary key autoincrement, + tag_id int not null, -- reference to tag + language text not null default "en-US", -- label locale as RFC 5646 tag (e.g. ja) + -- label locale is used when displaying tags in a UI frontend. labels are + -- sorted based on langauge preference, with "en-US" being inserted last as a + -- fallback. + label_short text null default null, -- abbreviated label (e.g. ティアキン) + label text not null, -- full label (e.g. ティアーズ・オブ・ザ・キングダム) + description text null default null, -- short description (e.g. ゼルダの伝説シリーズより) + foreign key(tag_id) references tag(id) +); + +-- standalone combination of kanji and reading +-- +-- terms are added automatically when importing a dictionary, but are shared +-- between dictionaries and thus not removed when disabling/removing a +-- dictionary. terms are ranked outside the database, and the starting rank +-- score can be changed using the user database (TODO) +create table if not exists term ( + id integer primary key autoincrement, + expression text not null, -- writing of term (e.g. 乗り越える) + reading text not null, -- reading of term (e.g. のりこえる) + alt int null default null, -- reference to main writing if this is is an alternate + -- alternate readings are stored as normal terms, but point to their main + -- reading. also used internally to store alternate readings of irregular + -- verbs example: + -- id expression reading alt + -- 1 言葉 ことば NULL + -- 2 詞 ことば 1 + -- 3 辞 ことば 1 + -- 4 来る くる NULL + -- 5 来た きた 4 + -- NOTE: alternate writings can technically be stored recursively this way, + -- but are only resolved one level deep TODO: resolve alternates + foreign key(alt) references term(id), + unique(expression, reading) on conflict ignore +); +create index term_expression on term (expression); +create index term_reading on term (reading); +-- TODO: (automatically) remove unused terms from db (using constraints?) + + +-- allow many<->many relation between definition and tag +create table if not exists definition_tag ( + id integer primary key autoincrement, + definition_id int not null, + tag_id int not null, + foreign key(definition_id) references definition(id), + foreign key(tag_id) references tag(id), + unique(definition_id, tag_id) on conflict ignore +); + +-- allow many<->many relation between term and tag +create table if not exists term_tag ( + id integer primary key autoincrement, + term_id int not null, + tag_id int not null, + foreign key(term_id) references term(id), + foreign key(tag_id) references tag(id), + unique(term_id, tag_id) on conflict ignore +); + |