This repository was archived by the owner on Jun 1, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathModel.coffee
More file actions
140 lines (124 loc) · 4.51 KB
/
Model.coffee
File metadata and controls
140 lines (124 loc) · 4.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
async = require "async"
fs = require "fs"
zlib = require "zlib"
{carry} = require "carrier"
{spawn} = require "child_process"
{Pool} = require "generic-pool"
{Counter} = require "./Counter"
{Document} = require "./Document"
class Model
constructor: (@path) ->
@features = {}
@lower_words = new Counter
@non_abbrs = new Counter
@pool = null
@cleanup = (err) => @close -> throw err
process.on "uncaughtException", @cleanup
load: (callback) ->
model_file_path = @path + "/svm_model"
unless fs.existsSync model_file_path
return callback new Error "#{model_file_path} does not exist"
async.parallel {
features: (callback) =>
@loadGzippedJSON (@path + "/features.json.gz"), callback
lower_words: (callback) =>
@loadGzippedJSON (@path + "/lower_words.json.gz"), callback
non_abbrs: (callback) =>
@loadGzippedJSON (@path + "/non_abbrs.json.gz"), callback
}, (err, o) =>
return callback err if err?
if (f for own f of o.features).length == 0
return callback new Error "model has no features"
@features = o.features
@lower_words = new Counter o.lower_words
@non_abbrs = new Counter o.non_abbrs
@pool = Pool {
name: 'svmclassify'
create: (callback) ->
classifier = spawn "svm_classifyd", [model_file_path]
classifier.carrier = carry classifier.stdout
callback null, classifier
destroy: (child) ->
child.kill "SIGINT"
max: 5
idleTimeoutMillis: 5000
}
callback()
close: (callback) ->
process.removeListener "uncaughtException", @cleanup
return callback() unless @pool?
@pool.drain => @pool.destroyAllNow -> callback()
loadGzippedJSON: (path, callback) ->
try
zlib.gunzip fs.readFileSync(path), (err, buffer) ->
return callback err if err?
callback null, JSON.parse buffer
catch err
callback err
logistic: (x, y=1) ->
return 1.0 / (1 + Math.pow Math.E, (-1 * y * x))
classify: (doc, callback) ->
return callback new Error "model has not been loaded" unless @pool?
try
@pool.acquire (err, classifier) =>
if err?
@close -> callback err
return
fragments = doc.getFragments()
# callback with an err if classifier prints to stderr
classifier.stderr.on "data", (data) ->
@close -> callback new Error data.toString()
# callback with an err if the classifier dies or is killed abnormally
classifier.on "exit", (code, signal) ->
unless signal == "SIGINT"
@close ->
if code?
callback new Error "classifer exited with code #{code})"
else if signal?
callback new Error "classifer killed with #{signal}"
# parse classifier output
index = 0
classifier.carrier.on "line", (line) =>
value = parseFloat line
if isNaN value
@close -> callback new Error "unexpected output: #{line}"
return
if index == fragments.length
if classifier?
classifier.carrier.removeAllListeners()
classifier.stderr.removeAllListeners()
classifier.removeAllListeners()
@pool.release classifier
classifier = null
callback null
else
fragments[index++].prediction = @logistic value
# format fragment features and send to classifier
for frag in fragments
feats = (@features[f] for f in frag.getFeatures() when f of @features)
feats.sort (x,y) -> x-y
classifier.stdin.write(
"0 " + ("#{f}:1" for f in feats).join(" ") + "\n")
classifier.stdin.write "\n"
catch err
callback err
segment: (text, callback) ->
doc = new Document text
doc.featurize this
@classify doc, (err) ->
return callback err if err?
callback null, doc.segment()
# def prep(self, doc):
# self.lower_words, self.non_abbrs = doc.get_stats(verbose=False)
# self.lower_words = dict(self.lower_words)
# self.non_abbrs = dict(self.non_abbrs)
# def train(self, doc):
# abstract
# def save(self):
# """
# save model objects in self.path
# """
# sbd_util.save_pickle(self.feats, self.path + 'feats')
# sbd_util.save_pickle(self.lower_words, self.path + 'lower_words')
# sbd_util.save_pickle(self.non_abbrs, self.path + 'non_abbrs')
exports.Model = Model