summaryrefslogtreecommitdiff
path: root/ext/mixed
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-01-10 14:43:06 -0500
committerGitHub <noreply@github.com>2021-01-10 14:43:06 -0500
commitf6a38f40dc52c4517e41ddb381278ecf5efba056 (patch)
tree0b56e9224ee25c0b6cc2c18cf8ae8ab891427569 /ext/mixed
parent083da93142ec6302021ee1c29428121b54fc9e68 (diff)
Customizable sentence parsing (#1217)
* Add new sentenceParsing options * Update TextScanner.setOptions * Assign terminator/quote maps * Pass sentence parsing info to extractSentence * Simplify setting * Add setting for enableTerminationCharacters * Create new settings for sentence termination characters
Diffstat (limited to 'ext/mixed')
-rw-r--r--ext/mixed/js/display.js8
-rw-r--r--ext/mixed/js/document-util.js47
-rw-r--r--ext/mixed/js/text-scanner.js62
3 files changed, 82 insertions, 35 deletions
diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js
index 4c8d2f91..2b3ea21c 100644
--- a/ext/mixed/js/display.js
+++ b/ext/mixed/js/display.js
@@ -327,9 +327,9 @@ class Display extends EventDispatcher {
touchInputEnabled: scanningOptions.touchInputEnabled,
pointerEventsEnabled: scanningOptions.pointerEventsEnabled,
scanLength: scanningOptions.length,
- sentenceScanExtent: sentenceParsingOptions.scanExtent,
layoutAwareScan: scanningOptions.layoutAwareScan,
- preventMiddleMouse: scanningOptions.preventMiddleMouse.onSearchQuery
+ preventMiddleMouse: scanningOptions.preventMiddleMouse.onSearchQuery,
+ sentenceParsingOptions
}
});
@@ -1832,9 +1832,9 @@ class Display extends EventDispatcher {
touchInputEnabled: false,
pointerEventsEnabled: false,
scanLength: scanningOptions.length,
- sentenceScanExtent: sentenceParsingOptions.scanExtent,
layoutAwareScan: scanningOptions.layoutAwareScan,
- preventMiddleMouse: false
+ preventMiddleMouse: false,
+ sentenceParsingOptions
});
this._definitionTextScanner.setEnabled(true);
diff --git a/ext/mixed/js/document-util.js b/ext/mixed/js/document-util.js
index 647cbedc..42d3556b 100644
--- a/ext/mixed/js/document-util.js
+++ b/ext/mixed/js/document-util.js
@@ -24,24 +24,6 @@
class DocumentUtil {
constructor() {
this._transparentColorPattern = /rgba\s*\([^)]*,\s*0(?:\.0+)?\s*\)/;
-
- const quoteArray = [
- ['「', '」'],
- ['『', '』'],
- ['\'', '\''],
- ['"', '"']
- ];
- const terminatorString = '…。..??!!';
- this._terminatorMap = new Map();
- for (const char of terminatorString) {
- this._terminatorMap.set(char, [false, true]);
- }
- this._forwardQuoteMap = new Map();
- this._backwardQuoteMap = new Map();
- for (const [char1, char2] of quoteArray) {
- this._forwardQuoteMap.set(char1, [char2, false]);
- this._backwardQuoteMap.set(char2, [char1, false]);
- }
}
getRangeFromPoint(x, y, deepContentScan) {
@@ -81,11 +63,30 @@ class DocumentUtil {
}
}
- extractSentence(source, layoutAwareScan, extent) {
- const terminatorMap = this._terminatorMap;
- const forwardQuoteMap = this._forwardQuoteMap;
- const backwardQuoteMap = this._backwardQuoteMap;
-
+ /**
+ * Extract a sentence from a document.
+ * @param source The text source object, either `TextSourceRange` or `TextSourceElement`.
+ * @param layoutAwareScan Whether or not layout-aware scan mode should be used.
+ * @param extent The length of the sentence to extract.
+ * @param terminatorMap A mapping of characters that terminate a sentence.
+ * Format:
+ * ```js
+ * new Map([ [character: string, [includeCharacterAtStart: boolean, includeCharacterAtEnd: boolean]], ... ])
+ * ```
+ * @param forwardQuoteMap A mapping of quote characters that delimit a sentence.
+ * Format:
+ * ```js
+ * new Map([ [character: string, [otherCharacter: string, includeCharacterAtStart: boolean]], ... ])
+ * ```
+ * @param backwardQuoteMap A mapping of quote characters that delimit a sentence,
+ * which is the inverse of forwardQuoteMap.
+ * Format:
+ * ```js
+ * new Map([ [character: string, [otherCharacter: string, includeCharacterAtEnd: boolean]], ... ])
+ * ```
+ * @returns The sentence and the offset to the original source: `{sentence: string, offset: integer}`.
+ */
+ extractSentence(source, layoutAwareScan, extent, terminatorMap, forwardQuoteMap, backwardQuoteMap) {
// Scan text
source = source.clone();
const startLength = source.setStartOffset(extent, layoutAwareScan);
diff --git a/ext/mixed/js/text-scanner.js b/ext/mixed/js/text-scanner.js
index f26bcf0e..11a6f88f 100644
--- a/ext/mixed/js/text-scanner.js
+++ b/ext/mixed/js/text-scanner.js
@@ -59,9 +59,12 @@ class TextScanner extends EventDispatcher {
this._touchInputEnabled = false;
this._pointerEventsEnabled = false;
this._scanLength = 1;
- this._sentenceScanExtent = 1;
this._layoutAwareScan = false;
this._preventMiddleMouse = false;
+ this._sentenceScanExtent = 0;
+ this._sentenceTerminatorMap = new Map();
+ this._sentenceForwardQuoteMap = new Map();
+ this._sentenceBackwardQuoteMap = new Map();
this._inputs = [];
this._enabled = false;
@@ -142,9 +145,9 @@ class TextScanner extends EventDispatcher {
touchInputEnabled,
pointerEventsEnabled,
scanLength,
- sentenceScanExtent,
layoutAwareScan,
- preventMiddleMouse
+ preventMiddleMouse,
+ sentenceParsingOptions
}) {
if (Array.isArray(inputs)) {
this._inputs = inputs.map(({
@@ -193,15 +196,38 @@ class TextScanner extends EventDispatcher {
if (typeof scanLength === 'number') {
this._scanLength = scanLength;
}
- if (typeof sentenceScanExtent === 'number') {
- this._sentenceScanExtent = sentenceScanExtent;
- }
if (typeof layoutAwareScan === 'boolean') {
this._layoutAwareScan = layoutAwareScan;
}
if (typeof preventMiddleMouse === 'boolean') {
this._preventMiddleMouse = preventMiddleMouse;
}
+ if (typeof sentenceParsingOptions === 'object' && sentenceParsingOptions !== null) {
+ const {scanExtent, enableTerminationCharacters, terminationCharacters} = sentenceParsingOptions;
+ const hasTerminationCharacters = (typeof terminationCharacters === 'object' && Array.isArray(terminationCharacters));
+ if (typeof scanExtent === 'number') {
+ this._sentenceScanExtent = sentenceParsingOptions.scanExtent;
+ }
+ if (typeof enableTerminationCharacters === 'boolean' || hasTerminationCharacters) {
+ const sentenceTerminatorMap = this._sentenceTerminatorMap;
+ const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
+ const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
+ sentenceTerminatorMap.clear();
+ sentenceForwardQuoteMap.clear();
+ sentenceBackwardQuoteMap.clear();
+ if (enableTerminationCharacters !== false && hasTerminationCharacters) {
+ for (const {enabled, character1, character2, includeCharacterAtStart, includeCharacterAtEnd} of terminationCharacters) {
+ if (!enabled) { continue; }
+ if (character2 === null) {
+ sentenceTerminatorMap.set(character1, [includeCharacterAtStart, includeCharacterAtEnd]);
+ } else {
+ sentenceForwardQuoteMap.set(character1, [character2, includeCharacterAtStart]);
+ sentenceBackwardQuoteMap.set(character2, [character1, includeCharacterAtEnd]);
+ }
+ }
+ }
+ }
+ }
}
getTextSourceContent(textSource, length, layoutAwareScan) {
@@ -723,6 +749,9 @@ class TextScanner extends EventDispatcher {
async _findTerms(textSource, optionsContext) {
const scanLength = this._scanLength;
const sentenceScanExtent = this._sentenceScanExtent;
+ const sentenceTerminatorMap = this._sentenceTerminatorMap;
+ const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
+ const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
const layoutAwareScan = this._layoutAwareScan;
const searchText = this.getTextSourceContent(textSource, scanLength, layoutAwareScan);
if (searchText.length === 0) { return null; }
@@ -731,13 +760,23 @@ class TextScanner extends EventDispatcher {
if (definitions.length === 0) { return null; }
textSource.setEndOffset(length, layoutAwareScan);
- const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent);
+ const sentence = this._documentUtil.extractSentence(
+ textSource,
+ layoutAwareScan,
+ sentenceScanExtent,
+ sentenceTerminatorMap,
+ sentenceForwardQuoteMap,
+ sentenceBackwardQuoteMap
+ );
return {definitions, sentence, type: 'terms'};
}
async _findKanji(textSource, optionsContext) {
const sentenceScanExtent = this._sentenceScanExtent;
+ const sentenceTerminatorMap = this._sentenceTerminatorMap;
+ const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
+ const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
const layoutAwareScan = this._layoutAwareScan;
const searchText = this.getTextSourceContent(textSource, 1, layoutAwareScan);
if (searchText.length === 0) { return null; }
@@ -746,7 +785,14 @@ class TextScanner extends EventDispatcher {
if (definitions.length === 0) { return null; }
textSource.setEndOffset(1, layoutAwareScan);
- const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent);
+ const sentence = this._documentUtil.extractSentence(
+ textSource,
+ layoutAwareScan,
+ sentenceScanExtent,
+ sentenceTerminatorMap,
+ sentenceForwardQuoteMap,
+ sentenceBackwardQuoteMap
+ );
return {definitions, sentence, type: 'kanji'};
}