aboutsummaryrefslogtreecommitdiff
path: root/db/dict/init.sql
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-06-28 23:59:50 +0200
committerlonkaars <loek@pipeframe.xyz>2023-06-28 23:59:50 +0200
commit67dbb6421976254658c5e38045513129dd18187a (patch)
tree288b599d1097b26bdbcad3b6749b38e133017cf2 /db/dict/init.sql
initial public commit
Diffstat (limited to 'db/dict/init.sql')
-rw-r--r--db/dict/init.sql115
1 files changed, 115 insertions, 0 deletions
diff --git a/db/dict/init.sql b/db/dict/init.sql
new file mode 100644
index 0000000..4e9fcc9
--- /dev/null
+++ b/db/dict/init.sql
@@ -0,0 +1,115 @@
+-- deinflection map (see dict/deinflections.sql)
+create table if not exists deinflection (
+ tag text not null,
+ kana_in text not null,
+ kana_out text not null,
+ rules_in int not null default 0,
+ rules_out int not null default 0
+);
+create index deinflection_kana_in on deinflection (kana_in);
+
+-- lookup table for deinflection rule bitmasks
+create table if not exists deinflection_rules (
+ mask int not null default 0,
+ tag text not null
+);
+
+-- dictionary metadata
+create table if not exists dict (
+ id integer primary key autoincrement,
+ tag text not null, -- TODO: dict.tag -> dict.tag_id
+ language text null default null,
+ priority int not null default 1
+);
+
+-- tag code<->id lookup
+create table if not exists tag (
+ id integer primary key autoincrement,
+ code text not null,
+ unique(code) on conflict ignore
+);
+create index tag_code on tag (code);
+
+
+-- definition of term (with single meaning)
+--
+-- terms that have multiple meanings should have multiple entries in the
+-- definition table with the same term_id. glossary contains an explanation or
+-- list of synonyms to illustrate the meaning of the term.
+create table if not exists definition (
+ id integer primary key autoincrement,
+ term_id int not null, -- reference to term (combined writing and reading)
+ sort int not null default 1, -- sort multiple meanings/glossaries within dictionary
+ -- search results for terms are grouped by term, then dictionary, then sort
+ -- within each dictionary
+ glossary text null default null, -- explanation or list of synonyms for term
+ dict_id int not null, -- origin dictionary of term
+ foreign key(term_id) references term(id),
+ foreign key(dict_id) references dict(id),
+ unique(glossary) on conflict ignore
+);
+
+-- human-readable tag label lookup table (used in UI and for translations)
+create table if not exists tag_label (
+ id integer primary key autoincrement,
+ tag_id int not null, -- reference to tag
+ language text not null default "en-US", -- label locale as RFC 5646 tag (e.g. ja)
+ -- label locale is used when displaying tags in a UI frontend. labels are
+ -- sorted based on langauge preference, with "en-US" being inserted last as a
+ -- fallback.
+ label_short text null default null, -- abbreviated label (e.g. ティアキン)
+ label text not null, -- full label (e.g. ティアーズ・オブ・ザ・キングダム)
+ description text null default null, -- short description (e.g. ゼルダの伝説シリーズより)
+ foreign key(tag_id) references tag(id)
+);
+
+-- standalone combination of kanji and reading
+--
+-- terms are added automatically when importing a dictionary, but are shared
+-- between dictionaries and thus not removed when disabling/removing a
+-- dictionary. terms are ranked outside the database, and the starting rank
+-- score can be changed using the user database (TODO)
+create table if not exists term (
+ id integer primary key autoincrement,
+ expression text not null, -- writing of term (e.g. 乗り越える)
+ reading text not null, -- reading of term (e.g. のりこえる)
+ alt int null default null, -- reference to main writing if this is is an alternate
+ -- alternate readings are stored as normal terms, but point to their main
+ -- reading. also used internally to store alternate readings of irregular
+ -- verbs example:
+ -- id expression reading alt
+ -- 1 言葉 ことば NULL
+ -- 2 詞 ことば 1
+ -- 3 辞 ことば 1
+ -- 4 来る くる NULL
+ -- 5 来た きた 4
+ -- NOTE: alternate writings can technically be stored recursively this way,
+ -- but are only resolved one level deep TODO: resolve alternates
+ foreign key(alt) references term(id),
+ unique(expression, reading) on conflict ignore
+);
+create index term_expression on term (expression);
+create index term_reading on term (reading);
+-- TODO: (automatically) remove unused terms from db (using constraints?)
+
+
+-- allow many<->many relation between definition and tag
+create table if not exists definition_tag (
+ id integer primary key autoincrement,
+ definition_id int not null,
+ tag_id int not null,
+ foreign key(definition_id) references definition(id),
+ foreign key(tag_id) references tag(id),
+ unique(definition_id, tag_id) on conflict ignore
+);
+
+-- allow many<->many relation between term and tag
+create table if not exists term_tag (
+ id integer primary key autoincrement,
+ term_id int not null,
+ tag_id int not null,
+ foreign key(term_id) references term(id),
+ foreign key(tag_id) references tag(id),
+ unique(term_id, tag_id) on conflict ignore
+);
+