-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtriggerWordsFoundInDocument.ts
More file actions
38 lines (36 loc) · 1.47 KB
/
triggerWordsFoundInDocument.ts
File metadata and controls
38 lines (36 loc) · 1.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import levenshtein from "js-levenshtein";
import { typeList } from "../stats/types/typeList";
/**
* For all the type of analysis that takes place, see if there are any matches.
*/
export function documentContainsAnyTypeTriggerWord(document: string): boolean {
const classifiedType = typeList.find((type) => {
return triggerWordsFoundInDocument(document, type.triggerWords);
});
return classifiedType !== undefined;
}
/**
* Try to roughly guess if the given trigger words are present in a specific document.
*/
export function triggerWordsFoundInDocument(document: string, triggerWordList: string[]): boolean {
const documentWordsSplit = document
.toUpperCase()
.replace("\n", " ")
.split(/[\s-]+/);
return triggerWordList
.map((triggerWords) => {
const triggerWordsSplit = triggerWords.split(" ");
const hits = triggerWordsSplit.map((triggerWord) => {
// Find the smallest levenshtein distance to any word in the document.
const closestMatchInDocument = Math.min(
...documentWordsSplit.map((documentWord) =>
levenshtein(triggerWord, documentWord),
),
);
// Allow the odd mis-classified character.
return closestMatchInDocument <= 1;
});
return hits.filter((hit) => hit).length === triggerWordsSplit.length;
})
.includes(true);
}