aboutsummaryrefslogtreecommitdiff
path: root/search/search.ts
blob: 0a50773c78a560b393c6dd6da204ff2f9ff4b091 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import { Tag, TagGroup } from "./tags.ts";
import { SearchSentenceProps, SearchSentenceResult, SearchTermResult, SearchWord } from "./types.ts";
import DB from "../db/db.ts";
import "../util/array.ts";
import "../util/set.ts";
import { DeepPartial } from "../util/types.ts";

/** @summary main Search class */
export default class Search {
	db: DB;
	ready: Promise<void>;

	constructor() {
		this.db = new DB();

		this.ready = new Promise<void>(async resolve => {
			await this.db.ready;
			resolve();
		});
	}

  /** @summary find possible terms at start of string by deconjugating */
  public async terms(term: string): Promise<Array<SearchTermResult>> {
    await this.ready;

    var results = await this.db.findTerm(term);

    // skip filtering valid results if there are none
    if (results.length == 0) return [];

    // filter invalid deconjugations/results
    results = results.filter(result => {
      // ignore ignored by user terms
      if (result.sort < 0) return false;

      // deconjugated words
      if (result.depth > 0) {
        // check if this word can be conjugated at all
        if (!result.tags.anyOf(TagGroup.Conjugable as string[])) return false;

        // ignore other wrong deconjugations
        if (result.tags.includes(Tag.Class.Verb.U) &&
            !result.tags.includes(Tag.Inflection.Reason.U)) return false;
        if (result.tags.includes(Tag.Class.Verb.Ru) &&
            !result.tags.includes(Tag.Inflection.Reason.Ru)) return false;
        if (result.tags.includes(Tag.Class.Verb.Suru) &&
            !result.tags.includes(Tag.Inflection.Reason.Suru)) return false;
        if (result.tags.includes(Tag.Class.Adjective.I) &&
            !result.tags.includes(Tag.Inflection.Reason.Adjective.I)) return false;
        if (result.tags.includes(Tag.Class.Adjective.Na) &&
            !result.tags.includes(Tag.Inflection.Reason.Adjective.Na)) return false;
      }

      // all other results should be valid
      return true;
    });

    return results.map(result => ({
      id: result.id,
      writing: result.expression,
      reading: result.reading,
      tags: result.tags,
      source: result.original,
      sort: result.sort,
      depth: result.depth,
      match: {
        reading: result.match.reading,
        writing: result.match.writing,
      },
    }));
  }

	/** @summary parse sentence into terms with readings */
	public async sentence(sentence: string, optional?: DeepPartial<SearchSentenceProps>): Promise<SearchSentenceResult> {
		await this.ready;

		var props: SearchSentenceProps = {
			lookahead: optional?.lookahead ?? 15,
			priorityMod: {
				high: optional?.priorityMod?.high ?? 10,
				low: optional?.priorityMod?.low ?? -10,
			},
			breaks: optional?.breaks ?? [],
		}

		var parseResult: SearchSentenceResult = {
			input: sentence,
			words: [],
		};

		for (let start = 0; start < sentence.length; start++) {
			var lookahead = props.lookahead; // TODO: stop at next delimiter (optimization)
      var term = sentence.substring(start, start + lookahead);
      var results = (await this.terms(term)).map(term => {
        var word = term as SearchWord;
        word.start = start;
        return word;
      });

			// current starting point did not yield results, try again at next character or until end of input
			if (results.length == 0) continue;
	
			// bias search results by modifying sort value
			results = results.map(result => {
				// true if last token was a name else false
				const lastTokenName = parseResult.words.peek()?.tags.anyOf(Object.values(Tag.Name));

				// give higher priority to suffixes when last token was a name, else lower priority
				if (result.tags.includes(Tag.Class.Suffix))
					result.sort += lastTokenName ? props.priorityMod.high : props.priorityMod.low;

				// give lower priority to terms matched only by their readings, and are
				// usually written in kanji
				if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.writing)
					result.sort += props.priorityMod.low;

				return result;
			});

			results.sort((a, b) => {
				// sort by original string length (long to short)
				if (a.source.length != b.source.length) return b.source.length - a.source.length;
				// then by sort index (high to low)
				if (a.sort != b.sort) return b.sort - a.sort;
				// then by depth (high to low)
				if (a.depth != b.depth) return b.depth - a.depth;
				// else keep current order (random)
				return 0;
			});

			// pick top result
			const result = results[0];

			parseResult.words.push(result);
			start += result.source.length - 1; // -1 because loop already increments start
			continue; // extra verbose end of iteration
		}
		return parseResult;
	}
};