From f6a38f40dc52c4517e41ddb381278ecf5efba056 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 10 Jan 2021 14:43:06 -0500 Subject: Customizable sentence parsing (#1217) * Add new sentenceParsing options * Update TextScanner.setOptions * Assign terminator/quote maps * Pass sentence parsing info to extractSentence * Simplify setting * Add setting for enableTerminationCharacters * Create new settings for sentence termination characters --- ext/mixed/js/display.js | 8 +++--- ext/mixed/js/document-util.js | 47 ++++++++++++++++---------------- ext/mixed/js/text-scanner.js | 62 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 82 insertions(+), 35 deletions(-) (limited to 'ext/mixed') diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js index 4c8d2f91..2b3ea21c 100644 --- a/ext/mixed/js/display.js +++ b/ext/mixed/js/display.js @@ -327,9 +327,9 @@ class Display extends EventDispatcher { touchInputEnabled: scanningOptions.touchInputEnabled, pointerEventsEnabled: scanningOptions.pointerEventsEnabled, scanLength: scanningOptions.length, - sentenceScanExtent: sentenceParsingOptions.scanExtent, layoutAwareScan: scanningOptions.layoutAwareScan, - preventMiddleMouse: scanningOptions.preventMiddleMouse.onSearchQuery + preventMiddleMouse: scanningOptions.preventMiddleMouse.onSearchQuery, + sentenceParsingOptions } }); @@ -1832,9 +1832,9 @@ class Display extends EventDispatcher { touchInputEnabled: false, pointerEventsEnabled: false, scanLength: scanningOptions.length, - sentenceScanExtent: sentenceParsingOptions.scanExtent, layoutAwareScan: scanningOptions.layoutAwareScan, - preventMiddleMouse: false + preventMiddleMouse: false, + sentenceParsingOptions }); this._definitionTextScanner.setEnabled(true); diff --git a/ext/mixed/js/document-util.js b/ext/mixed/js/document-util.js index 647cbedc..42d3556b 100644 --- a/ext/mixed/js/document-util.js +++ b/ext/mixed/js/document-util.js @@ -24,24 +24,6 @@ class DocumentUtil { constructor() { this._transparentColorPattern = /rgba\s*\([^)]*,\s*0(?:\.0+)?\s*\)/; - - const quoteArray = [ - ['「', '」'], - ['『', '』'], - ['\'', '\''], - ['"', '"'] - ]; - const terminatorString = '…。..??!!'; - this._terminatorMap = new Map(); - for (const char of terminatorString) { - this._terminatorMap.set(char, [false, true]); - } - this._forwardQuoteMap = new Map(); - this._backwardQuoteMap = new Map(); - for (const [char1, char2] of quoteArray) { - this._forwardQuoteMap.set(char1, [char2, false]); - this._backwardQuoteMap.set(char2, [char1, false]); - } } getRangeFromPoint(x, y, deepContentScan) { @@ -81,11 +63,30 @@ class DocumentUtil { } } - extractSentence(source, layoutAwareScan, extent) { - const terminatorMap = this._terminatorMap; - const forwardQuoteMap = this._forwardQuoteMap; - const backwardQuoteMap = this._backwardQuoteMap; - + /** + * Extract a sentence from a document. + * @param source The text source object, either `TextSourceRange` or `TextSourceElement`. + * @param layoutAwareScan Whether or not layout-aware scan mode should be used. + * @param extent The length of the sentence to extract. + * @param terminatorMap A mapping of characters that terminate a sentence. + * Format: + * ```js + * new Map([ [character: string, [includeCharacterAtStart: boolean, includeCharacterAtEnd: boolean]], ... ]) + * ``` + * @param forwardQuoteMap A mapping of quote characters that delimit a sentence. + * Format: + * ```js + * new Map([ [character: string, [otherCharacter: string, includeCharacterAtStart: boolean]], ... ]) + * ``` + * @param backwardQuoteMap A mapping of quote characters that delimit a sentence, + * which is the inverse of forwardQuoteMap. + * Format: + * ```js + * new Map([ [character: string, [otherCharacter: string, includeCharacterAtEnd: boolean]], ... ]) + * ``` + * @returns The sentence and the offset to the original source: `{sentence: string, offset: integer}`. + */ + extractSentence(source, layoutAwareScan, extent, terminatorMap, forwardQuoteMap, backwardQuoteMap) { // Scan text source = source.clone(); const startLength = source.setStartOffset(extent, layoutAwareScan); diff --git a/ext/mixed/js/text-scanner.js b/ext/mixed/js/text-scanner.js index f26bcf0e..11a6f88f 100644 --- a/ext/mixed/js/text-scanner.js +++ b/ext/mixed/js/text-scanner.js @@ -59,9 +59,12 @@ class TextScanner extends EventDispatcher { this._touchInputEnabled = false; this._pointerEventsEnabled = false; this._scanLength = 1; - this._sentenceScanExtent = 1; this._layoutAwareScan = false; this._preventMiddleMouse = false; + this._sentenceScanExtent = 0; + this._sentenceTerminatorMap = new Map(); + this._sentenceForwardQuoteMap = new Map(); + this._sentenceBackwardQuoteMap = new Map(); this._inputs = []; this._enabled = false; @@ -142,9 +145,9 @@ class TextScanner extends EventDispatcher { touchInputEnabled, pointerEventsEnabled, scanLength, - sentenceScanExtent, layoutAwareScan, - preventMiddleMouse + preventMiddleMouse, + sentenceParsingOptions }) { if (Array.isArray(inputs)) { this._inputs = inputs.map(({ @@ -193,15 +196,38 @@ class TextScanner extends EventDispatcher { if (typeof scanLength === 'number') { this._scanLength = scanLength; } - if (typeof sentenceScanExtent === 'number') { - this._sentenceScanExtent = sentenceScanExtent; - } if (typeof layoutAwareScan === 'boolean') { this._layoutAwareScan = layoutAwareScan; } if (typeof preventMiddleMouse === 'boolean') { this._preventMiddleMouse = preventMiddleMouse; } + if (typeof sentenceParsingOptions === 'object' && sentenceParsingOptions !== null) { + const {scanExtent, enableTerminationCharacters, terminationCharacters} = sentenceParsingOptions; + const hasTerminationCharacters = (typeof terminationCharacters === 'object' && Array.isArray(terminationCharacters)); + if (typeof scanExtent === 'number') { + this._sentenceScanExtent = sentenceParsingOptions.scanExtent; + } + if (typeof enableTerminationCharacters === 'boolean' || hasTerminationCharacters) { + const sentenceTerminatorMap = this._sentenceTerminatorMap; + const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap; + const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap; + sentenceTerminatorMap.clear(); + sentenceForwardQuoteMap.clear(); + sentenceBackwardQuoteMap.clear(); + if (enableTerminationCharacters !== false && hasTerminationCharacters) { + for (const {enabled, character1, character2, includeCharacterAtStart, includeCharacterAtEnd} of terminationCharacters) { + if (!enabled) { continue; } + if (character2 === null) { + sentenceTerminatorMap.set(character1, [includeCharacterAtStart, includeCharacterAtEnd]); + } else { + sentenceForwardQuoteMap.set(character1, [character2, includeCharacterAtStart]); + sentenceBackwardQuoteMap.set(character2, [character1, includeCharacterAtEnd]); + } + } + } + } + } } getTextSourceContent(textSource, length, layoutAwareScan) { @@ -723,6 +749,9 @@ class TextScanner extends EventDispatcher { async _findTerms(textSource, optionsContext) { const scanLength = this._scanLength; const sentenceScanExtent = this._sentenceScanExtent; + const sentenceTerminatorMap = this._sentenceTerminatorMap; + const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap; + const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap; const layoutAwareScan = this._layoutAwareScan; const searchText = this.getTextSourceContent(textSource, scanLength, layoutAwareScan); if (searchText.length === 0) { return null; } @@ -731,13 +760,23 @@ class TextScanner extends EventDispatcher { if (definitions.length === 0) { return null; } textSource.setEndOffset(length, layoutAwareScan); - const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent); + const sentence = this._documentUtil.extractSentence( + textSource, + layoutAwareScan, + sentenceScanExtent, + sentenceTerminatorMap, + sentenceForwardQuoteMap, + sentenceBackwardQuoteMap + ); return {definitions, sentence, type: 'terms'}; } async _findKanji(textSource, optionsContext) { const sentenceScanExtent = this._sentenceScanExtent; + const sentenceTerminatorMap = this._sentenceTerminatorMap; + const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap; + const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap; const layoutAwareScan = this._layoutAwareScan; const searchText = this.getTextSourceContent(textSource, 1, layoutAwareScan); if (searchText.length === 0) { return null; } @@ -746,7 +785,14 @@ class TextScanner extends EventDispatcher { if (definitions.length === 0) { return null; } textSource.setEndOffset(1, layoutAwareScan); - const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent); + const sentence = this._documentUtil.extractSentence( + textSource, + layoutAwareScan, + sentenceScanExtent, + sentenceTerminatorMap, + sentenceForwardQuoteMap, + sentenceBackwardQuoteMap + ); return {definitions, sentence, type: 'kanji'}; } -- cgit v1.2.3