-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy patharff.js
More file actions
133 lines (124 loc) · 3.55 KB
/
arff.js
File metadata and controls
133 lines (124 loc) · 3.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
var fs = require('fs')
, readline = require('readline')
, EventEmitter = require('events').EventEmitter
/*
* returns an EventEmitter that emits the following events as the file is
* parsed:
* - relation - function(relationName) {}
* - attribute - function(attribute, type) {}
* - data - function(data) {} - called once for each line of data
* - error - function(exception) - called when errors occur
*
* See http://axon.cs.byu.edu/~martinez/classes/478/stuff/arff.html
* or http://weka.wikispaces.com/ARFF+%28developer+version%29
* for a description of the Attribute-Relation File Format
*
* Note: This parser conforms more closely to the simplified version used at BYU
*
* TODO: handle quoted names with spaces
* TODO: handle date types
*/
module.exports = function arff(input) {
var is;
var emitter = new EventEmitter();
var section;
if (typeof input === 'string') {
is = fs.createReadStream(input);
}
// input is a readable stream
else if (input.readable) {
is = input;
}
else {
process.nextTick(function() {
emitter.emit('error', new Error('Unknown input:'+input));
});
}
var writeStream = fs.createWriteStream('/dev/null');
var handlers = {
line: function(line) {
if (!section) section = 'header';
var chunks = line.trim().split(/[\s]+/);
// skip blank lines and comments
if (chunks.length === 1 && chunks[0] === '') return;
else if (/^%/.test(chunks[0])) {
return;
}
// relation name
else if (/^@RELATION/i.test(chunks[0])) {
if (section !== 'header') {
return emitter.emit('error', new Error('@RELATION found outside of header'));
}
emitter.emit('relation', chunks[1])
}
// attribute spec
else if (/^@ATTRIBUTE/i.test(chunks[0])) {
if (section != 'header') {
return emitter.emit('error', new Error('@ATTRIBUTE found outside of header section'));
}
var name = chunks[1].replace(/['"]|:$/g, '');
var type = parseAttributeType(chunks.slice(2).join(' '));
emitter.emit('attribute', name, type);
}
else if (/^@DATA/i.test(chunks[0])) {
if (section == 'data') {
return emitter.emit('error', new Error('@DATA found after DATA'));
}
section = 'data';
}
else {
if (section == 'data') {
emitter.emit('data', chunks.join('').replace(/['"]/g, '').split(','));
}
}
},
end: function() {
emitter.emit('end');
writeStream.end();
},
error: function(err) {
emitter.emit('error', err);
}
}
lines = readline.createInterface({
input: is,
output: writeStream
});
lines.on('line', handlers.line);
lines.on('error', handlers.error);
lines.on('close', handlers.end);
return emitter;
}
/*
* Types can be any of:
* - numeric | integer | real | continuous
* - string
* - date [format]
* - nominal
*/
function parseAttributeType(type) {
var finaltype = { type: type };
var parts;
if (/^date/i.test(type)) {
parts = type.split(/[\s]+/);
var format = "yyyy-MM-dd'T'HH:mm:ss";
if (parts.length > 1) {
format = parts[1];
}
finaltype = {
type: 'date',
format: format
}
}
else if (parts=type.match(/^{([^}]*)}$/)) {
finaltype.type = 'nominal';
finaltype.oneof = parts[1].replace(/[\s'"]/g, '').split(/,/);
}
else if (/^numeric|^integer|^real|^continuous/i.test(type)) {
finaltype.type = 'numeric';
}
else if (/string/i.test(type)) {
finaltype.type = 'string';
}
return finaltype;
}