diff options
Diffstat (limited to 'ext/mixed/js/document-util.js')
-rw-r--r-- | ext/mixed/js/document-util.js | 71 |
1 files changed, 43 insertions, 28 deletions
diff --git a/ext/mixed/js/document-util.js b/ext/mixed/js/document-util.js index 46ed321e..647cbedc 100644 --- a/ext/mixed/js/document-util.js +++ b/ext/mixed/js/document-util.js @@ -31,12 +31,16 @@ class DocumentUtil { ['\'', '\''], ['"', '"'] ]; - this._terminatorSet = new Set(['…', '。', '.', '.', '?', '?', '!', '!']); - this._startQuoteMap = new Map(); - this._endQuoteMap = new Map(); + const terminatorString = '…。..??!!'; + this._terminatorMap = new Map(); + for (const char of terminatorString) { + this._terminatorMap.set(char, [false, true]); + } + this._forwardQuoteMap = new Map(); + this._backwardQuoteMap = new Map(); for (const [char1, char2] of quoteArray) { - this._startQuoteMap.set(char1, char2); - this._endQuoteMap.set(char2, char1); + this._forwardQuoteMap.set(char1, [char2, false]); + this._backwardQuoteMap.set(char2, [char1, false]); } } @@ -77,10 +81,10 @@ class DocumentUtil { } } - extractSentence(source, extent, layoutAwareScan) { - const terminatorSet = this._terminatorSet; - const startQuoteMap = this._startQuoteMap; - const endQuoteMap = this._endQuoteMap; + extractSentence(source, layoutAwareScan, extent) { + const terminatorMap = this._terminatorMap; + const forwardQuoteMap = this._forwardQuoteMap; + const backwardQuoteMap = this._backwardQuoteMap; // Scan text source = source.clone(); @@ -98,22 +102,28 @@ class DocumentUtil { const c = text[pos1 - 1]; if (c === '\n') { break; } - if (quoteStack.length === 0 && terminatorSet.has(c)) { - break; + if (quoteStack.length === 0) { + const terminatorInfo = terminatorMap.get(c); + if (typeof terminatorInfo !== 'undefined') { + if (terminatorInfo[0]) { --pos1; } + break; + } } - let otherQuote = startQuoteMap.get(c); - if (typeof otherQuote !== 'undefined') { + let quoteInfo = forwardQuoteMap.get(c); + if (typeof quoteInfo !== 'undefined') { if (quoteStack.length === 0) { + if (quoteInfo[1]) { --pos1; } break; } else if (quoteStack[0] === c) { quoteStack.pop(); + continue; } - } else { - otherQuote = endQuoteMap.get(c); - if (typeof otherQuote !== 'undefined') { - quoteStack.unshift(otherQuote); - } + } + + quoteInfo = backwardQuoteMap.get(c); + if (typeof quoteInfo !== 'undefined') { + quoteStack.unshift(quoteInfo[0]); } } @@ -123,23 +133,28 @@ class DocumentUtil { const c = text[pos2]; if (c === '\n') { break; } - if (quoteStack.length === 0 && terminatorSet.has(c)) { - ++pos2; - break; + if (quoteStack.length === 0) { + const terminatorInfo = terminatorMap.get(c); + if (typeof terminatorInfo !== 'undefined') { + if (terminatorInfo[1]) { ++pos2; } + break; + } } - let otherQuote = endQuoteMap.get(c); - if (typeof otherQuote !== 'undefined') { + let quoteInfo = backwardQuoteMap.get(c); + if (typeof quoteInfo !== 'undefined') { if (quoteStack.length === 0) { + if (quoteInfo[1]) { ++pos2; } break; } else if (quoteStack[0] === c) { quoteStack.pop(); + continue; } - } else { - otherQuote = startQuoteMap.get(c); - if (typeof otherQuote !== 'undefined') { - quoteStack.unshift(otherQuote); - } + } + + quoteInfo = forwardQuoteMap.get(c); + if (typeof quoteInfo !== 'undefined') { + quoteStack.unshift(quoteInfo[0]); } } |