summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-01-09 23:10:55 -0500
committerGitHub <noreply@github.com>2021-01-09 23:10:55 -0500
commit083da93142ec6302021ee1c29428121b54fc9e68 (patch)
treec07c7e3ae808682e127603303f4698056204abd7
parentda1e1e5c5b4dc20cc6aa46b51c55a496d094ed5c (diff)
Refactor sentence parsing (#1215)
* Rename sentenceExtent with sentenceScanExtent * Update TextScanner.setOptions * Change function argument order * Rename quote map variables * Fix edge case quote handling * Update terminator maps to support character inclusion
-rw-r--r--ext/fg/js/frontend.js4
-rw-r--r--ext/mixed/js/display.js26
-rw-r--r--ext/mixed/js/document-util.js71
-rw-r--r--ext/mixed/js/text-scanner.js27
-rw-r--r--test/data/html/test-document1.html18
-rw-r--r--test/test-document-util.js6
6 files changed, 89 insertions, 63 deletions
diff --git a/ext/fg/js/frontend.js b/ext/fg/js/frontend.js
index c14c2feb..a206e3fb 100644
--- a/ext/fg/js/frontend.js
+++ b/ext/fg/js/frontend.js
@@ -312,7 +312,7 @@ class Frontend {
async _updateOptionsInternal() {
const optionsContext = await this._getOptionsContext();
const options = await api.optionsGet(optionsContext);
- const scanningOptions = options.scanning;
+ const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options;
this._options = options;
await this._updatePopup();
@@ -326,7 +326,7 @@ class Frontend {
touchInputEnabled: scanningOptions.touchInputEnabled,
pointerEventsEnabled: scanningOptions.pointerEventsEnabled,
scanLength: scanningOptions.length,
- sentenceExtent: options.sentenceParsing.scanExtent,
+ sentenceScanExtent: sentenceParsingOptions.scanExtent,
layoutAwareScan: scanningOptions.layoutAwareScan,
preventMiddleMouse
});
diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js
index 60842a3d..4c8d2f91 100644
--- a/ext/mixed/js/display.js
+++ b/ext/mixed/js/display.js
@@ -309,7 +309,7 @@ class Display extends EventDispatcher {
async updateOptions() {
const options = await api.optionsGet(this.getOptionsContext());
- const scanning = options.scanning;
+ const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options;
this._options = options;
this._updateDocumentOptions(options);
@@ -320,16 +320,16 @@ class Display extends EventDispatcher {
selectedParser: options.parsing.selectedParser,
termSpacing: options.parsing.termSpacing,
scanning: {
- inputs: scanning.inputs,
- deepContentScan: scanning.deepDomScan,
- selectText: scanning.selectText,
- delay: scanning.delay,
- touchInputEnabled: scanning.touchInputEnabled,
- pointerEventsEnabled: scanning.pointerEventsEnabled,
- scanLength: scanning.length,
- sentenceExtent: options.sentenceParsing.scanExtent,
- layoutAwareScan: scanning.layoutAwareScan,
- preventMiddleMouse: scanning.preventMiddleMouse.onSearchQuery
+ inputs: scanningOptions.inputs,
+ deepContentScan: scanningOptions.deepDomScan,
+ selectText: scanningOptions.selectText,
+ delay: scanningOptions.delay,
+ touchInputEnabled: scanningOptions.touchInputEnabled,
+ pointerEventsEnabled: scanningOptions.pointerEventsEnabled,
+ scanLength: scanningOptions.length,
+ sentenceScanExtent: sentenceParsingOptions.scanExtent,
+ layoutAwareScan: scanningOptions.layoutAwareScan,
+ preventMiddleMouse: scanningOptions.preventMiddleMouse.onSearchQuery
}
});
@@ -1810,7 +1810,7 @@ class Display extends EventDispatcher {
this._definitionTextScanner.on('searched', this._onDefinitionTextScannerSearched.bind(this));
}
- const scanningOptions = options.scanning;
+ const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options;
this._definitionTextScanner.setOptions({
inputs: [{
include: 'mouse0',
@@ -1832,7 +1832,7 @@ class Display extends EventDispatcher {
touchInputEnabled: false,
pointerEventsEnabled: false,
scanLength: scanningOptions.length,
- sentenceExtent: options.sentenceParsing.scanExtent,
+ sentenceScanExtent: sentenceParsingOptions.scanExtent,
layoutAwareScan: scanningOptions.layoutAwareScan,
preventMiddleMouse: false
});
diff --git a/ext/mixed/js/document-util.js b/ext/mixed/js/document-util.js
index 46ed321e..647cbedc 100644
--- a/ext/mixed/js/document-util.js
+++ b/ext/mixed/js/document-util.js
@@ -31,12 +31,16 @@ class DocumentUtil {
['\'', '\''],
['"', '"']
];
- this._terminatorSet = new Set(['…', '。', '.', '.', '?', '?', '!', '!']);
- this._startQuoteMap = new Map();
- this._endQuoteMap = new Map();
+ const terminatorString = '…。..??!!';
+ this._terminatorMap = new Map();
+ for (const char of terminatorString) {
+ this._terminatorMap.set(char, [false, true]);
+ }
+ this._forwardQuoteMap = new Map();
+ this._backwardQuoteMap = new Map();
for (const [char1, char2] of quoteArray) {
- this._startQuoteMap.set(char1, char2);
- this._endQuoteMap.set(char2, char1);
+ this._forwardQuoteMap.set(char1, [char2, false]);
+ this._backwardQuoteMap.set(char2, [char1, false]);
}
}
@@ -77,10 +81,10 @@ class DocumentUtil {
}
}
- extractSentence(source, extent, layoutAwareScan) {
- const terminatorSet = this._terminatorSet;
- const startQuoteMap = this._startQuoteMap;
- const endQuoteMap = this._endQuoteMap;
+ extractSentence(source, layoutAwareScan, extent) {
+ const terminatorMap = this._terminatorMap;
+ const forwardQuoteMap = this._forwardQuoteMap;
+ const backwardQuoteMap = this._backwardQuoteMap;
// Scan text
source = source.clone();
@@ -98,22 +102,28 @@ class DocumentUtil {
const c = text[pos1 - 1];
if (c === '\n') { break; }
- if (quoteStack.length === 0 && terminatorSet.has(c)) {
- break;
+ if (quoteStack.length === 0) {
+ const terminatorInfo = terminatorMap.get(c);
+ if (typeof terminatorInfo !== 'undefined') {
+ if (terminatorInfo[0]) { --pos1; }
+ break;
+ }
}
- let otherQuote = startQuoteMap.get(c);
- if (typeof otherQuote !== 'undefined') {
+ let quoteInfo = forwardQuoteMap.get(c);
+ if (typeof quoteInfo !== 'undefined') {
if (quoteStack.length === 0) {
+ if (quoteInfo[1]) { --pos1; }
break;
} else if (quoteStack[0] === c) {
quoteStack.pop();
+ continue;
}
- } else {
- otherQuote = endQuoteMap.get(c);
- if (typeof otherQuote !== 'undefined') {
- quoteStack.unshift(otherQuote);
- }
+ }
+
+ quoteInfo = backwardQuoteMap.get(c);
+ if (typeof quoteInfo !== 'undefined') {
+ quoteStack.unshift(quoteInfo[0]);
}
}
@@ -123,23 +133,28 @@ class DocumentUtil {
const c = text[pos2];
if (c === '\n') { break; }
- if (quoteStack.length === 0 && terminatorSet.has(c)) {
- ++pos2;
- break;
+ if (quoteStack.length === 0) {
+ const terminatorInfo = terminatorMap.get(c);
+ if (typeof terminatorInfo !== 'undefined') {
+ if (terminatorInfo[1]) { ++pos2; }
+ break;
+ }
}
- let otherQuote = endQuoteMap.get(c);
- if (typeof otherQuote !== 'undefined') {
+ let quoteInfo = backwardQuoteMap.get(c);
+ if (typeof quoteInfo !== 'undefined') {
if (quoteStack.length === 0) {
+ if (quoteInfo[1]) { ++pos2; }
break;
} else if (quoteStack[0] === c) {
quoteStack.pop();
+ continue;
}
- } else {
- otherQuote = startQuoteMap.get(c);
- if (typeof otherQuote !== 'undefined') {
- quoteStack.unshift(otherQuote);
- }
+ }
+
+ quoteInfo = forwardQuoteMap.get(c);
+ if (typeof quoteInfo !== 'undefined') {
+ quoteStack.unshift(quoteInfo[0]);
}
}
diff --git a/ext/mixed/js/text-scanner.js b/ext/mixed/js/text-scanner.js
index 82bb898f..f26bcf0e 100644
--- a/ext/mixed/js/text-scanner.js
+++ b/ext/mixed/js/text-scanner.js
@@ -59,7 +59,7 @@ class TextScanner extends EventDispatcher {
this._touchInputEnabled = false;
this._pointerEventsEnabled = false;
this._scanLength = 1;
- this._sentenceExtent = 1;
+ this._sentenceScanExtent = 1;
this._layoutAwareScan = false;
this._preventMiddleMouse = false;
this._inputs = [];
@@ -134,7 +134,18 @@ class TextScanner extends EventDispatcher {
}
}
- setOptions({inputs, deepContentScan, selectText, delay, touchInputEnabled, pointerEventsEnabled, scanLength, sentenceExtent, layoutAwareScan, preventMiddleMouse}) {
+ setOptions({
+ inputs,
+ deepContentScan,
+ selectText,
+ delay,
+ touchInputEnabled,
+ pointerEventsEnabled,
+ scanLength,
+ sentenceScanExtent,
+ layoutAwareScan,
+ preventMiddleMouse
+ }) {
if (Array.isArray(inputs)) {
this._inputs = inputs.map(({
include,
@@ -182,8 +193,8 @@ class TextScanner extends EventDispatcher {
if (typeof scanLength === 'number') {
this._scanLength = scanLength;
}
- if (typeof sentenceExtent === 'number') {
- this._sentenceExtent = sentenceExtent;
+ if (typeof sentenceScanExtent === 'number') {
+ this._sentenceScanExtent = sentenceScanExtent;
}
if (typeof layoutAwareScan === 'boolean') {
this._layoutAwareScan = layoutAwareScan;
@@ -711,7 +722,7 @@ class TextScanner extends EventDispatcher {
async _findTerms(textSource, optionsContext) {
const scanLength = this._scanLength;
- const sentenceExtent = this._sentenceExtent;
+ const sentenceScanExtent = this._sentenceScanExtent;
const layoutAwareScan = this._layoutAwareScan;
const searchText = this.getTextSourceContent(textSource, scanLength, layoutAwareScan);
if (searchText.length === 0) { return null; }
@@ -720,13 +731,13 @@ class TextScanner extends EventDispatcher {
if (definitions.length === 0) { return null; }
textSource.setEndOffset(length, layoutAwareScan);
- const sentence = this._documentUtil.extractSentence(textSource, sentenceExtent, layoutAwareScan);
+ const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent);
return {definitions, sentence, type: 'terms'};
}
async _findKanji(textSource, optionsContext) {
- const sentenceExtent = this._sentenceExtent;
+ const sentenceScanExtent = this._sentenceScanExtent;
const layoutAwareScan = this._layoutAwareScan;
const searchText = this.getTextSourceContent(textSource, 1, layoutAwareScan);
if (searchText.length === 0) { return null; }
@@ -735,7 +746,7 @@ class TextScanner extends EventDispatcher {
if (definitions.length === 0) { return null; }
textSource.setEndOffset(1, layoutAwareScan);
- const sentence = this._documentUtil.extractSentence(textSource, sentenceExtent, layoutAwareScan);
+ const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent);
return {definitions, sentence, type: 'kanji'};
}
diff --git a/test/data/html/test-document1.html b/test/data/html/test-document1.html
index 37dbb017..3b702a86 100644
--- a/test/data/html/test-document1.html
+++ b/test/data/html/test-document1.html
@@ -21,7 +21,7 @@
data-end-node-selector="span"
data-end-offset="0"
data-result-type="TextSourceRange",
- data-sentence-extent="100"
+ data-sentence-scan-extent="100"
data-sentence="真白「心配してくださって、ありがとございます」"
>
<span>真白「心配してくださって、ありがとございます」</span>
@@ -37,7 +37,7 @@
data-end-node-selector="span"
data-end-offset="5"
data-result-type="TextSourceRange",
- data-sentence-extent="100"
+ data-sentence-scan-extent="100"
data-sentence="心配してくださって、ありがとございます"
>
<span>真白「心配してくださって、ありがとございます」</span>
@@ -53,7 +53,7 @@
data-end-node-selector="span"
data-end-offset="16"
data-result-type="TextSourceRange",
- data-sentence-extent="100"
+ data-sentence-scan-extent="100"
data-sentence="心配して「くださって」、ありがと「ございます」"
>
<span>真白「心配して「くださって」、ありがと「ございます」」</span>
@@ -69,7 +69,7 @@
data-end-node-selector="span"
data-end-offset="4"
data-result-type="TextSourceRange",
- data-sentence-extent="100"
+ data-sentence-scan-extent="100"
data-sentence="ありがとございます。"
>
<span>ありがとございます。ありがとございます。</span>
@@ -85,7 +85,7 @@
data-end-node-selector="span"
data-end-offset="14"
data-result-type="TextSourceRange",
- data-sentence-extent="100"
+ data-sentence-scan-extent="100"
data-sentence="ありがとございます。"
>
<span>ありがとございます。ありがとございます。</span>
@@ -101,7 +101,7 @@
data-end-node-selector="input"
data-end-offset="0"
data-result-type="TextSourceRange",
- data-sentence-extent="100"
+ data-sentence-scan-extent="100"
data-sentence="真白「心配してくださって、ありがとございます」"
data-has-imposter="true"
>
@@ -118,7 +118,7 @@
data-end-node-selector="textarea"
data-end-offset="0"
data-result-type="TextSourceRange",
- data-sentence-extent="100"
+ data-sentence-scan-extent="100"
data-sentence="真白「心配してくださって、ありがとございます」"
data-has-imposter="true"
>
@@ -135,7 +135,7 @@
data-end-node-selector="button"
data-end-offset="0"
data-result-type="TextSourceElement",
- data-sentence-extent="100"
+ data-sentence-scan-extent="100"
data-sentence="よみちゃん"
>
<button style="width: 100%; box-sizing: border-box; font-family: inherit; font-size: inherit; border: 1px solid #d8d8d8; background-color: #f0f0f0; padding: 0.2em;">よみちゃん</button>
@@ -151,7 +151,7 @@
data-end-node-selector="img"
data-end-offset="0"
data-result-type="TextSourceElement"
- data-sentence-extent="100"
+ data-sentence-scan-extent="100"
data-sentence="よみちゃん"
>
<img src="data:image/gif;base64,R0lGODdhBwAHAIABAAAAAP///ywAAAAABwAHAAACDIRvEaC32FpCbEkKCgA7" alt="よみちゃん" title="よみちゃん" style="width: 70px; height: 70px; image-rendering: crisp-edges; image-rendering: pixelated; display: block;" />
diff --git a/test/test-document-util.js b/test/test-document-util.js
index 56368f0b..09f0c5e7 100644
--- a/test/test-document-util.js
+++ b/test/test-document-util.js
@@ -127,7 +127,7 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
endNodeSelector,
endOffset,
resultType,
- sentenceExtent,
+ sentenceScanExtent,
sentence,
hasImposter
} = testElement.dataset;
@@ -139,7 +139,7 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
startOffset = parseInt(startOffset, 10);
endOffset = parseInt(endOffset, 10);
- sentenceExtent = parseInt(sentenceExtent, 10);
+ sentenceScanExtent = parseInt(sentenceScanExtent, 10);
assert.notStrictEqual(elementFromPointValue, null);
assert.notStrictEqual(caretRangeFromPointValue, null);
@@ -182,7 +182,7 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
if (source === null) { continue; }
// Test docSentenceExtract
- const sentenceActual = documentUtil.extractSentence(source, sentenceExtent, false).text;
+ const sentenceActual = documentUtil.extractSentence(source, false, sentenceScanExtent).text;
assert.strictEqual(sentenceActual, sentence);
// Clean