summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2020-02-11 21:21:37 -0500
committerGitHub <noreply@github.com>2020-02-11 21:21:37 -0500
commit9c5ad3ea67249416a31cf5c2f6e4917c0bed0fbf (patch)
tree3868a4b8ab832c4517cb90e969f3f411b8bb4a68
parent9ffd0cb441f9773651ed734d8bccce3667700ceb (diff)
parent8733e324ecbe10bcb4bc9f1a0b9568c7f32429d3 (diff)
Merge pull request #346 from toasted-nutbread/dictionary-schemas
Dictionary schemas
-rw-r--r--ext/bg/data/dictionary-index-schema.json40
-rw-r--r--ext/bg/data/dictionary-kanji-bank-v1-schema.json33
-rw-r--r--ext/bg/data/dictionary-kanji-bank-v3-schema.json44
-rw-r--r--ext/bg/data/dictionary-kanji-meta-bank-v3-schema.json25
-rw-r--r--ext/bg/data/dictionary-tag-bank-v3-schema.json32
-rw-r--r--ext/bg/data/dictionary-term-bank-v1-schema.json36
-rw-r--r--ext/bg/data/dictionary-term-bank-v3-schema.json48
-rw-r--r--ext/bg/data/dictionary-term-meta-bank-v3-schema.json25
-rw-r--r--test/dictionary-validate.js90
9 files changed, 373 insertions, 0 deletions
diff --git a/ext/bg/data/dictionary-index-schema.json b/ext/bg/data/dictionary-index-schema.json
new file mode 100644
index 00000000..9865fcc1
--- /dev/null
+++ b/ext/bg/data/dictionary-index-schema.json
@@ -0,0 +1,40 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "description": "Index file containing information about the data contained in the dictionary.",
+ "required": [
+ "title",
+ "revision"
+ ],
+ "properties": {
+ "title": {
+ "type": "string",
+ "description": "Title of the dictionary."
+ },
+ "revision": {
+ "type": "string",
+ "description": "Revision of the dictionary. This value is only used for displaying information."
+ },
+ "sequenced": {
+ "type": "boolean",
+ "default": false,
+ "description": "Whether or not this dictionary can be used as the primary dictionary. Primary dictionaries typically contain term/expression definitions."
+ },
+ "format": {
+ "type": "integer",
+ "description": "Format of data found in the JSON data files."
+ },
+ "version": {
+ "type": "integer",
+ "description": "Alias for format."
+ }
+ },
+ "anyOf": [
+ {
+ "required": ["format"]
+ },
+ {
+ "required": ["version"]
+ }
+ ]
+} \ No newline at end of file
diff --git a/ext/bg/data/dictionary-kanji-bank-v1-schema.json b/ext/bg/data/dictionary-kanji-bank-v1-schema.json
new file mode 100644
index 00000000..6dad5a7a
--- /dev/null
+++ b/ext/bg/data/dictionary-kanji-bank-v1-schema.json
@@ -0,0 +1,33 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "array",
+ "description": "Data file containing kanji information.",
+ "additionalItems": {
+ "type": "array",
+ "description": "Information about a single kanji character.",
+ "minItems": 4,
+ "items": [
+ {
+ "type": "string",
+ "description": "Kanji character.",
+ "minLength": 1
+ },
+ {
+ "type": "string",
+ "description": "String of space-separated onyomi readings for the kanji character. An empty string is treated as no readings."
+ },
+ {
+ "type": "string",
+ "description": "String of space-separated kunyomi readings for the kanji character. An empty string is treated as no readings."
+ },
+ {
+ "type": "string",
+ "description": "String of space-separated tags for the kanji character. An empty string is treated as no tags."
+ }
+ ],
+ "additionalItems": {
+ "type": "string",
+ "description": "A meaning for the kanji character."
+ }
+ }
+} \ No newline at end of file
diff --git a/ext/bg/data/dictionary-kanji-bank-v3-schema.json b/ext/bg/data/dictionary-kanji-bank-v3-schema.json
new file mode 100644
index 00000000..a5b82039
--- /dev/null
+++ b/ext/bg/data/dictionary-kanji-bank-v3-schema.json
@@ -0,0 +1,44 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "array",
+ "description": "Data file containing kanji information.",
+ "additionalItems": {
+ "type": "array",
+ "description": "Information about a single kanji character.",
+ "minItems": 6,
+ "items": [
+ {
+ "type": "string",
+ "description": "Kanji character.",
+ "minLength": 1
+ },
+ {
+ "type": "string",
+ "description": "String of space-separated onyomi readings for the kanji character. An empty string is treated as no readings."
+ },
+ {
+ "type": "string",
+ "description": "String of space-separated kunyomi readings for the kanji character. An empty string is treated as no readings."
+ },
+ {
+ "type": "string",
+ "description": "String of space-separated tags for the kanji character. An empty string is treated as no tags."
+ },
+ {
+ "type": "array",
+ "description": "Array of meanings for the kanji character.",
+ "items": {
+ "type": "string",
+ "description": "A meaning for the kanji character."
+ }
+ },
+ {
+ "type": "object",
+ "description": "Various stats for the kanji character.",
+ "additionalProperties": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+} \ No newline at end of file
diff --git a/ext/bg/data/dictionary-kanji-meta-bank-v3-schema.json b/ext/bg/data/dictionary-kanji-meta-bank-v3-schema.json
new file mode 100644
index 00000000..62479026
--- /dev/null
+++ b/ext/bg/data/dictionary-kanji-meta-bank-v3-schema.json
@@ -0,0 +1,25 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "array",
+ "description": "Custom metadata for kanji characters.",
+ "additionalItems": {
+ "type": "array",
+ "description": "Metadata about a single kanji character.",
+ "minItems": 3,
+ "items": [
+ {
+ "type": "string",
+ "minLength": 1
+ },
+ {
+ "type": "string",
+ "enum": ["freq"],
+ "description": "Type of data. \"freq\" corresponds to frequency information."
+ },
+ {
+ "type": ["string", "number"],
+ "description": "Data for the character."
+ }
+ ]
+ }
+} \ No newline at end of file
diff --git a/ext/bg/data/dictionary-tag-bank-v3-schema.json b/ext/bg/data/dictionary-tag-bank-v3-schema.json
new file mode 100644
index 00000000..ee5ca64d
--- /dev/null
+++ b/ext/bg/data/dictionary-tag-bank-v3-schema.json
@@ -0,0 +1,32 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "array",
+ "description": "Data file containing tag information for terms and kanji.",
+ "additionalItems": {
+ "type": "array",
+ "description": "Information about a single tag.",
+ "minItems": 5,
+ "items": [
+ {
+ "type": "string",
+ "description": "Tag name."
+ },
+ {
+ "type": "string",
+ "description": "Category for the tag."
+ },
+ {
+ "type": "number",
+ "description": "Sorting order for the tag."
+ },
+ {
+ "type": "string",
+ "description": "Notes for the tag."
+ },
+ {
+ "type": "number",
+ "description": "Score used to determine popularity. Negative values are more rare and positive values are more frequent. This score is also used to sort search results."
+ }
+ ]
+ }
+} \ No newline at end of file
diff --git a/ext/bg/data/dictionary-term-bank-v1-schema.json b/ext/bg/data/dictionary-term-bank-v1-schema.json
new file mode 100644
index 00000000..6ffb26e6
--- /dev/null
+++ b/ext/bg/data/dictionary-term-bank-v1-schema.json
@@ -0,0 +1,36 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "array",
+ "description": "Data file containing term and expression information.",
+ "additionalItems": {
+ "type": "array",
+ "description": "Information about a single term/expression.",
+ "minItems": 5,
+ "items": [
+ {
+ "type": "string",
+ "description": "Term or expression."
+ },
+ {
+ "type": "string",
+ "description": "Reading of the term/expression, or an empty string if the reading is the same as the term/expression."
+ },
+ {
+ "type": ["string", "null"],
+ "description": "String of space-separated tags for the definition. An empty string is treated as no tags."
+ },
+ {
+ "type": "string",
+ "description": "String of space-separated rule identifiers for the definition which is used to validate delinflection. Valid rule identifiers are: v1: ichidan verb; v5: godan verb; vs: suru verb; vk: kuru verb; adj-i: i-adjective. An empty string corresponds to words which aren't inflected, such as nouns."
+ },
+ {
+ "type": "number",
+ "description": "Score used to determine popularity. Negative values are more rare and positive values are more frequent. This score is also used to sort search results."
+ }
+ ],
+ "additionalItems": {
+ "type": "string",
+ "description": "Single definition for the term/expression."
+ }
+ }
+} \ No newline at end of file
diff --git a/ext/bg/data/dictionary-term-bank-v3-schema.json b/ext/bg/data/dictionary-term-bank-v3-schema.json
new file mode 100644
index 00000000..bb982e36
--- /dev/null
+++ b/ext/bg/data/dictionary-term-bank-v3-schema.json
@@ -0,0 +1,48 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "array",
+ "description": "Data file containing term and expression information.",
+ "additionalItems": {
+ "type": "array",
+ "description": "Information about a single term/expression.",
+ "minItems": 8,
+ "items": [
+ {
+ "type": "string",
+ "description": "Term or expression."
+ },
+ {
+ "type": "string",
+ "description": "Reading of the term/expression, or an empty string if the reading is the same as the term/expression."
+ },
+ {
+ "type": ["string", "null"],
+ "description": "String of space-separated tags for the definition. An empty string is treated as no tags."
+ },
+ {
+ "type": "string",
+ "description": "String of space-separated rule identifiers for the definition which is used to validate delinflection. Valid rule identifiers are: v1: ichidan verb; v5: godan verb; vs: suru verb; vk: kuru verb; adj-i: i-adjective. An empty string corresponds to words which aren't inflected, such as nouns."
+ },
+ {
+ "type": "number",
+ "description": "Score used to determine popularity. Negative values are more rare and positive values are more frequent. This score is also used to sort search results."
+ },
+ {
+ "type": "array",
+ "description": "Array of definitions for the term/expression.",
+ "items": {
+ "type": "string",
+ "description": "Single definition for the term/expression."
+ }
+ },
+ {
+ "type": "integer",
+ "description": "Sequence number for the term/expression. Terms/expressions with the same sequence number can be shown together when the \"resultOutputMode\" option is set to \"merge\"."
+ },
+ {
+ "type": "string",
+ "description": "String of space-separated tags for the term/expression. An empty string is treated as no tags."
+ }
+ ]
+ }
+} \ No newline at end of file
diff --git a/ext/bg/data/dictionary-term-meta-bank-v3-schema.json b/ext/bg/data/dictionary-term-meta-bank-v3-schema.json
new file mode 100644
index 00000000..1cc0557f
--- /dev/null
+++ b/ext/bg/data/dictionary-term-meta-bank-v3-schema.json
@@ -0,0 +1,25 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "array",
+ "description": "Custom metadata for terms/expressions.",
+ "additionalItems": {
+ "type": "array",
+ "description": "Metadata about a single term/expression.",
+ "minItems": 3,
+ "items": [
+ {
+ "type": "string",
+ "description": "Term or expression."
+ },
+ {
+ "type": "string",
+ "enum": ["freq"],
+ "description": "Type of data. \"freq\" corresponds to frequency information."
+ },
+ {
+ "type": ["string", "number"],
+ "description": "Data for the term/expression."
+ }
+ ]
+ }
+} \ No newline at end of file
diff --git a/test/dictionary-validate.js b/test/dictionary-validate.js
new file mode 100644
index 00000000..971c4971
--- /dev/null
+++ b/test/dictionary-validate.js
@@ -0,0 +1,90 @@
+const fs = require('fs');
+const path = require('path');
+
+process.noDeprecation = true; // Suppress a warning about JSZip
+const JSZip = require(path.join(__dirname, '../ext/mixed/lib/jszip.min.js'));
+process.noDeprecation = false;
+
+const jsonSchemaFileName = path.join(__dirname, '../ext/bg/js/json-schema.js');
+const jsonSchemaFileSource = fs.readFileSync(jsonSchemaFileName, {encoding: 'utf8'});
+const JsonSchema = Function(`'use strict';${jsonSchemaFileSource};return JsonSchema;`)();
+
+
+function readSchema(relativeFileName) {
+ const fileName = path.join(__dirname, relativeFileName);
+ const source = fs.readFileSync(fileName, {encoding: 'utf8'});
+ return JSON.parse(source);
+}
+
+
+async function validateDictionaryBanks(zip, fileNameFormat, schema) {
+ let index = 1;
+ while (true) {
+ const fileName = fileNameFormat.replace(/%s/, index);
+
+ const file = zip.files[fileName];
+ if (!file) { break; }
+
+ const data = JSON.parse(await file.async('string'));
+ JsonSchema.validate(data, schema);
+
+ ++index;
+ }
+}
+
+async function validateDictionary(fileName, schemas) {
+ const source = fs.readFileSync(fileName);
+ const zip = await JSZip.loadAsync(source);
+
+ const indexFile = zip.files['index.json'];
+ if (!indexFile) {
+ throw new Error('No dictionary index found in archive');
+ }
+
+ const index = JSON.parse(await indexFile.async('string'));
+ const version = index.format || index.version;
+
+ JsonSchema.validate(index, schemas.index);
+
+ await validateDictionaryBanks(zip, 'term_bank_%s.json', version === 1 ? schemas.termBankV1 : schemas.termBankV3);
+ await validateDictionaryBanks(zip, 'term_meta_bank_%s.json', schemas.termMetaBankV3);
+ await validateDictionaryBanks(zip, 'kanji_bank_%s.json', version === 1 ? schemas.kanjiBankV1 : schemas.kanjiBankV3);
+ await validateDictionaryBanks(zip, 'kanji_meta_bank_%s.json', schemas.kanjiMetaBankV3);
+ await validateDictionaryBanks(zip, 'tag_bank_%s.json', schemas.tagBankV3);
+}
+
+
+async function main() {
+ const dictionaryFileNames = process.argv.slice(2);
+ if (dictionaryFileNames.length === 0) {
+ console.log([
+ 'Usage:',
+ ' node dictionary-validate <dictionary-file-names>...'
+ ].join('\n'));
+ return;
+ }
+
+ const schemas = {
+ index: readSchema('../ext/bg/data/dictionary-index-schema.json'),
+ kanjiBankV1: readSchema('../ext/bg/data/dictionary-kanji-bank-v1-schema.json'),
+ kanjiBankV3: readSchema('../ext/bg/data/dictionary-kanji-bank-v3-schema.json'),
+ kanjiMetaBankV3: readSchema('../ext/bg/data/dictionary-kanji-meta-bank-v3-schema.json'),
+ tagBankV3: readSchema('../ext/bg/data/dictionary-tag-bank-v3-schema.json'),
+ termBankV1: readSchema('../ext/bg/data/dictionary-term-bank-v1-schema.json'),
+ termBankV3: readSchema('../ext/bg/data/dictionary-term-bank-v3-schema.json'),
+ termMetaBankV3: readSchema('../ext/bg/data/dictionary-term-meta-bank-v3-schema.json')
+ };
+
+ for (const dictionaryFileName of dictionaryFileNames) {
+ try {
+ console.log(`Validating ${dictionaryFileName}...`);
+ await validateDictionary(dictionaryFileName, schemas);
+ console.log('No issues found');
+ } catch (e) {
+ console.warn(e);
+ }
+ }
+}
+
+
+main();