diff options
author | Darius Jahandarie <djahandarie@gmail.com> | 2023-11-03 23:32:33 +0900 |
---|---|---|
committer | Darius Jahandarie <djahandarie@gmail.com> | 2023-11-03 23:52:30 +0900 |
commit | 376151096431d4362e4baaacf0cef4a534e169f7 (patch) | |
tree | 18536a224fd4d833aaf691d0363a3a4b59175dd7 /ext | |
parent | b64f51c3b13a46af4dd7f1e43048ac19c781ca7b (diff) |
Replace JsonSchema with ajv for dictionary validation
Diffstat (limited to 'ext')
-rw-r--r-- | ext/data/schemas/custom-audio-list-schema.json | 1 | ||||
-rw-r--r-- | ext/data/schemas/dictionary-index-schema.json | 1 | ||||
-rw-r--r-- | ext/data/schemas/dictionary-kanji-bank-v1-schema.json | 3 | ||||
-rw-r--r-- | ext/data/schemas/dictionary-kanji-bank-v3-schema.json | 3 | ||||
-rw-r--r-- | ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json | 1 | ||||
-rw-r--r-- | ext/data/schemas/dictionary-tag-bank-v3-schema.json | 1 | ||||
-rw-r--r-- | ext/data/schemas/dictionary-term-bank-v1-schema.json | 1 | ||||
-rw-r--r-- | ext/data/schemas/dictionary-term-bank-v3-schema.json | 1 | ||||
-rw-r--r-- | ext/data/schemas/dictionary-term-meta-bank-v3-schema.json | 1 | ||||
-rw-r--r-- | ext/data/schemas/options-schema.json | 1 | ||||
-rw-r--r-- | ext/js/language/dictionary-importer.js | 99 | ||||
-rw-r--r-- | ext/lib/ucs2length.js | 16 |
12 files changed, 53 insertions, 76 deletions
diff --git a/ext/data/schemas/custom-audio-list-schema.json b/ext/data/schemas/custom-audio-list-schema.json index 2cb3ca78..885ad087 100644 --- a/ext/data/schemas/custom-audio-list-schema.json +++ b/ext/data/schemas/custom-audio-list-schema.json @@ -1,4 +1,5 @@ { + "$id": "customAudioList", "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "required": [ diff --git a/ext/data/schemas/dictionary-index-schema.json b/ext/data/schemas/dictionary-index-schema.json index a8ca0f23..98b27143 100644 --- a/ext/data/schemas/dictionary-index-schema.json +++ b/ext/data/schemas/dictionary-index-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryIndex", "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "description": "Index file containing information about the data contained in the dictionary.", diff --git a/ext/data/schemas/dictionary-kanji-bank-v1-schema.json b/ext/data/schemas/dictionary-kanji-bank-v1-schema.json index 5aca2d6a..d506a19d 100644 --- a/ext/data/schemas/dictionary-kanji-bank-v1-schema.json +++ b/ext/data/schemas/dictionary-kanji-bank-v1-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryKanjiBankV1", "$schema": "http://json-schema.org/draft-07/schema#", "type": "array", "description": "Data file containing kanji information.", @@ -30,4 +31,4 @@ "description": "A meaning for the kanji character." } } -}
\ No newline at end of file +} diff --git a/ext/data/schemas/dictionary-kanji-bank-v3-schema.json b/ext/data/schemas/dictionary-kanji-bank-v3-schema.json index ee508294..763ce3b1 100644 --- a/ext/data/schemas/dictionary-kanji-bank-v3-schema.json +++ b/ext/data/schemas/dictionary-kanji-bank-v3-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryKanjiBankV3", "$schema": "http://json-schema.org/draft-07/schema#", "type": "array", "description": "Data file containing kanji information.", @@ -42,4 +43,4 @@ } ] } -}
\ No newline at end of file +} diff --git a/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json b/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json index e478de93..d8f5031b 100644 --- a/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json +++ b/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryKanjiMetaBankV3", "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { "frequency": { diff --git a/ext/data/schemas/dictionary-tag-bank-v3-schema.json b/ext/data/schemas/dictionary-tag-bank-v3-schema.json index f7721119..ab6e3377 100644 --- a/ext/data/schemas/dictionary-tag-bank-v3-schema.json +++ b/ext/data/schemas/dictionary-tag-bank-v3-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryTagBankV3", "$schema": "http://json-schema.org/draft-07/schema#", "type": "array", "description": "Data file containing tag information for terms and kanji.", diff --git a/ext/data/schemas/dictionary-term-bank-v1-schema.json b/ext/data/schemas/dictionary-term-bank-v1-schema.json index 9366e9ff..ab4c49f6 100644 --- a/ext/data/schemas/dictionary-term-bank-v1-schema.json +++ b/ext/data/schemas/dictionary-term-bank-v1-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryTermBankV1", "$schema": "http://json-schema.org/draft-07/schema#", "type": "array", "description": "Data file containing term information.", diff --git a/ext/data/schemas/dictionary-term-bank-v3-schema.json b/ext/data/schemas/dictionary-term-bank-v3-schema.json index 335144c7..7d0b4868 100644 --- a/ext/data/schemas/dictionary-term-bank-v3-schema.json +++ b/ext/data/schemas/dictionary-term-bank-v3-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryTermBankV3", "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { "structuredContent": { diff --git a/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json b/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json index eb4d3fed..86e4af93 100644 --- a/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json +++ b/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryTermMetaBankV3", "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { "frequency": { diff --git a/ext/data/schemas/options-schema.json b/ext/data/schemas/options-schema.json index 601f5d06..8ccbfa94 100644 --- a/ext/data/schemas/options-schema.json +++ b/ext/data/schemas/options-schema.json @@ -1,4 +1,5 @@ { + "$id": "options", "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "required": [ diff --git a/ext/js/language/dictionary-importer.js b/ext/js/language/dictionary-importer.js index 718d9f1c..0cf3d5f5 100644 --- a/ext/js/language/dictionary-importer.js +++ b/ext/js/language/dictionary-importer.js @@ -18,7 +18,6 @@ /* global * JSZip - * JsonSchema * MediaUtil */ @@ -51,8 +50,10 @@ class DictionaryImporter { const index = JSON.parse(await indexFile.async('string')); - const indexSchema = await this._getSchema('/data/schemas/dictionary-index-schema.json'); - this._validateJsonSchema(index, indexSchema, indexFileName); + const ajvSchemas = await import('/lib/validate-schemas.js'); + if (!ajvSchemas.dictionaryIndex(index)) { + throw this._formatAjvSchemaError(ajvSchemas.dictionaryIndex, indexFileName); + } const dictionaryTitle = index.title; const version = index.format || index.version; @@ -75,8 +76,7 @@ class DictionaryImporter { // Load schemas this._progressNextStep(0); - const dataBankSchemaPaths = this._getDataBankSchemaPaths(version); - const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path))); + const dataBankSchemas = this._getDataBankSchemas(version); // Files const termFiles = this._getArchiveFiles(archive, 'term_bank_?.json'); @@ -87,11 +87,11 @@ class DictionaryImporter { // Load data this._progressNextStep(termFiles.length + termMetaFiles.length + kanjiFiles.length + kanjiMetaFiles.length + tagFiles.length); - const termList = await this._readFileSequence(termFiles, convertTermBankEntry, dataBankSchemas[0], dictionaryTitle); - const termMetaList = await this._readFileSequence(termMetaFiles, convertTermMetaBankEntry, dataBankSchemas[1], dictionaryTitle); - const kanjiList = await this._readFileSequence(kanjiFiles, convertKanjiBankEntry, dataBankSchemas[2], dictionaryTitle); - const kanjiMetaList = await this._readFileSequence(kanjiMetaFiles, convertKanjiMetaBankEntry, dataBankSchemas[3], dictionaryTitle); - const tagList = await this._readFileSequence(tagFiles, convertTagBankEntry, dataBankSchemas[4], dictionaryTitle); + const termList = await this._readFileSequence(ajvSchemas, termFiles, convertTermBankEntry, dataBankSchemas[0], dictionaryTitle); + const termMetaList = await this._readFileSequence(ajvSchemas, termMetaFiles, convertTermMetaBankEntry, dataBankSchemas[1], dictionaryTitle); + const kanjiList = await this._readFileSequence(ajvSchemas, kanjiFiles, convertKanjiBankEntry, dataBankSchemas[2], dictionaryTitle); + const kanjiMetaList = await this._readFileSequence(ajvSchemas, kanjiMetaFiles, convertKanjiMetaBankEntry, dataBankSchemas[3], dictionaryTitle); + const tagList = await this._readFileSequence(ajvSchemas, tagFiles, convertTagBankEntry, dataBankSchemas[4], dictionaryTitle); this._addOldIndexTags(index, tagList, dictionaryTitle); // Prefix wildcard support @@ -214,68 +214,27 @@ class DictionaryImporter { return summary; } - async _getSchema(fileName) { - const schema = await this._fetchJsonAsset(fileName); - return new JsonSchema(schema); - } - - _validateJsonSchema(value, schema, fileName) { - try { - schema.validate(value); - } catch (e) { - throw this._formatSchemaError(e, fileName); - } - } - - _formatSchemaError(e, fileName) { - const valuePathString = this._getSchemaErrorPathString(e.valueStack, 'dictionary'); - const schemaPathString = this._getSchemaErrorPathString(e.schemaStack, 'schema'); - - const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`); - e2.data = e; + _formatAjvSchemaError(schema, fileName) { + const e2 = new Error(`Dictionary has invalid data in '${fileName}'`); + e2.data = schema.errors; return e2; } - _getSchemaErrorPathString(infoList, base='') { - let result = base; - for (const {path} of infoList) { - const pathArray = Array.isArray(path) ? path : [path]; - for (const pathPart of pathArray) { - if (pathPart === null) { - result = base; - } else { - switch (typeof pathPart) { - case 'string': - if (result.length > 0) { - result += '.'; - } - result += pathPart; - break; - case 'number': - result += `[${pathPart}]`; - break; - } - } - } - } - return result; - } - - _getDataBankSchemaPaths(version) { + _getDataBankSchemas(version) { const termBank = ( version === 1 ? - '/data/schemas/dictionary-term-bank-v1-schema.json' : - '/data/schemas/dictionary-term-bank-v3-schema.json' + 'dictionaryTermBankV1' : + 'dictionaryTermBankV3' ); - const termMetaBank = '/data/schemas/dictionary-term-meta-bank-v3-schema.json'; + const termMetaBank = 'dictionaryTermMetaBankV3'; const kanjiBank = ( version === 1 ? - '/data/schemas/dictionary-kanji-bank-v1-schema.json' : - '/data/schemas/dictionary-kanji-bank-v3-schema.json' + 'dictionaryKanjiBankV1' : + 'dictionaryKanjiBankV3' ); - const kanjiMetaBank = '/data/schemas/dictionary-kanji-meta-bank-v3-schema.json'; - const tagBank = '/data/schemas/dictionary-tag-bank-v3-schema.json'; + const kanjiMetaBank = 'dictionaryKanjiMetaBankV3'; + const tagBank = 'dictionaryTagBankV3'; return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; } @@ -539,28 +498,20 @@ class DictionaryImporter { return results; } - async _readFileSequence(files, convertEntry, schema, dictionaryTitle) { + async _readFileSequence(ajvSchemas, files, convertEntry, schemaName, dictionaryTitle) { const progressData = this._progressData; - let count = 0; let startIndex = 0; - if (typeof this._onProgress === 'function') { - schema.progressInterval = 1000; - schema.progress = (s) => { - const index = s.getValueStackLength() > 1 ? s.getValueStackItem(1).path : 0; - progressData.index = startIndex + (index / count); - this._progress(); - }; - } const results = []; for (const file of files) { const entries = JSON.parse(await file.async('string')); - count = Array.isArray(entries) ? Math.max(entries.length, 1) : 1; startIndex = progressData.index; this._progress(); - this._validateJsonSchema(entries, schema, file.name); + if (!ajvSchemas[schemaName](entries)) { + throw this._formatAjvSchemaError(ajvSchemas[schemaName], file.name); + } progressData.index = startIndex + 1; this._progress(); diff --git a/ext/lib/ucs2length.js b/ext/lib/ucs2length.js new file mode 100644 index 00000000..120a64d4 --- /dev/null +++ b/ext/lib/ucs2length.js @@ -0,0 +1,16 @@ +export default function ucs2length(str) { + const len = str.length; + let length = 0; + let pos = 0; + let value; + while (pos < len) { + length++; + value = str.charCodeAt(pos++); + if (value >= 0xd800 && value <= 0xdbff && pos < len) { + // high surrogate, and there is a next character + value = str.charCodeAt(pos); + if ((value & 0xfc00) === 0xdc00) pos++; // low surrogate + } + } + return length; +} |