-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathanalyze.py
More file actions
109 lines (100 loc) · 3.07 KB
/
analyze.py
File metadata and controls
109 lines (100 loc) · 3.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from pprint import pprint
def get_checks(token):
ret = []
is_first = True
start_time = 0
with open("sentences/checks", "r") as f:
for l in f.readlines():
l = l.split("\t")
if l[0] == token:
if is_first:
start_time = float(l[1])
is_first = False
ret.append(
{
"timestamp": float(l[1]) - start_time,
"time_taken": float(l[2]),
"dataset": l[3],
"index": int(l[4]),
"sentence": l[5],
"candidate": l[6],
"bleurt": float(l[7]),
"mi": int(l[8]),
"dollars": float(l[9]),
}
)
return ret
def get_submits(token):
ret = []
is_first = True
start_time = 0
with open("sentences/submits", "r") as f:
for l in f.readlines():
try:
l = l.split("\t")
if l[0] == token:
if is_first:
start_time = float(l[1])
is_first = False
ret.append(
{
"timestamp": float(l[1]) - start_time,
"time_taken": float(l[2]),
"dataset": l[3],
"index": int(l[4]),
"sentence": l[5],
"candidate": l[6],
"bleurt": float(l[7]),
"mi": int(l[8]),
"dollars": float(l[9]),
}
)
except:
print(l)
return ret
def print_sent_cand(d): # a list of dict of sentences and candidates
for r in d:
pprint(
{
"sentence": r["sentence"],
"candidate": r["candidate"],
"bleurt": r["bleurt"],
}
)
# person = get_submits('BVZ2UNUUSH')
# print(len(person), person[-1]['timestamp']/60, sum(_['dollars'] for _ in person))
# print_sent_cand(person)
F = open("sentences/final_submits", "w+")
F.write(
"\t".join(
[
"token",
"time",
"duration",
"dataset",
"index",
"sentence",
"candidate",
"BLEURT",
"MI",
"dollar",
]
)
+ "\n"
)
with open("sentences/submits", "r") as f:
for l in f.readlines():
l = l.split("\t")
try:
q = float(l[1])
if l[0] != "NFIPX9BDAS" and len(l[0]) == 10:
for _ in range(len(l)):
if l[_] == "mrpc":
l[_] = "msrp"
if l[_] == "True":
l[_] = "1"
if l[_] == "False":
l[_] = "0"
F.write("\t".join(l))
except:
continue