-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.ts
133 lines (120 loc) · 4.43 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
interface BoosterNode {
nodeid: number,
depth: number,
split: string,
split_condition: number,
yes: number,
no: number,
missing: number,
children: Array<BoosterNode | BoosterLeaf>
}
interface BoosterLeaf {
nodeid: number,
leaf: number
}
type Booster = BoosterNode; // The root of the tree
type XGBoostModel = Array<Booster>;
type FeatureIndex = Record<string, number>;
type ReverseFeatureIndex = Record<string, string>;
async function loadJson(file: string) {
const fs = await import("fs");
const buffer = await fs.promises.readFile(file);
return JSON.parse(buffer.toString());
}
function sigmoid(x: number) {
return 1 / (1 + Math.pow(Math.E, -x));
}
function isLeaf(node: BoosterNode | BoosterLeaf): node is BoosterLeaf {
return (node as BoosterLeaf).leaf !== undefined;
}
export default class Scorer {
model?: XGBoostModel;
reverseFeatureIndex?: ReverseFeatureIndex;
static async create(model: string | object, featureIndex?: string | object) {
const scorer = new Scorer
scorer.model = typeof model === "string" ? await loadJson(model) : model;
if (featureIndex) {
const loadedFeatureIndex: FeatureIndex =
typeof featureIndex === "string" ? await loadJson(featureIndex) : featureIndex;
scorer.reverseFeatureIndex =
Object.keys(loadedFeatureIndex)
.reduce((acc: Record<string, string>, fName: string) => {
const fIdx: number = loadedFeatureIndex[fName];
acc[`${fIdx}`] = fName;
return acc;
}, {});
}
return scorer;
}
scoreSingleInstance(features: Record<string, number>) {
if (!this.model) {
throw new Error(`Scorer not initialized, create a scorer using Scorer.create() only`)
}
const totalScore: number =
this.model
.map((booster: Booster) => {
let currNode: BoosterNode | BoosterLeaf = booster;
while (!isLeaf(currNode)) {
const splitFeature = currNode.split;
let nextNodeId: number;
if (features[splitFeature] !== undefined) {
const conditionResult = features[splitFeature] < currNode.split_condition;
nextNodeId = conditionResult ? currNode.yes : currNode.no;
} else {
// Missing feature
nextNodeId = currNode.missing;
}
const nextNode: BoosterNode | BoosterLeaf | undefined =
currNode.children.find(child => child.nodeid === nextNodeId);
if (nextNode === undefined) {
throw new Error(`Invalid model JSON, missing node ID: ${nextNodeId}`)
}
currNode = nextNode;
}
return currNode.leaf;
})
.reduce((score, boosterScore) => score + boosterScore, 0.0)
return sigmoid(totalScore);
}
async score(input: string | object | Array<object>): Promise<Array<number> | number> {
if (typeof input !== "string" && typeof input !== "object") {
throw new Error(`Invalid input to score method: ${input}, expected string or object, was ${typeof input}`)
}
// Scoring a single instance or array of instances
if (typeof input === "object") {
if (Array.isArray(input)) {
return (input as Array<object>).map(en => this.scoreSingleInstance(en as Record<string, number>));
} else {
return this.scoreSingleInstance(input as Record<string, number>);
}
}
if (!this.reverseFeatureIndex) {
throw new Error(`Cannot score LibSVM input without a feature index, please specify one while creating a scorer.`)
}
// Scoring a LibSVM data file
const fs = await import("fs");
const readline = await import("readline");
const inputStream = fs.createReadStream(input);
const rl = readline.createInterface({
input: inputStream,
crlfDelay: Infinity
})
let scores = [];
for await (const line of rl) {
const features: Record<string, number> =
line
.split(" ")
.slice(1)
.map(p => p.split(":"))
.map(([featureId, value]) => [(this.reverseFeatureIndex as ReverseFeatureIndex)[featureId], value])
.reduce((featureMap: Record<string, number>, entry: Array<string>) => {
const [ featureName, featureValue ] = entry;
featureMap[featureName] = parseFloat(featureValue);
return featureMap;
}, {});
const score = this.scoreSingleInstance(features);
scores.push(score);
}
return scores;
}
}