-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcomment_annotation.py
More file actions
104 lines (86 loc) · 3.62 KB
/
comment_annotation.py
File metadata and controls
104 lines (86 loc) · 3.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import pandas as pd
import pprint
file_name = "sample_for_groundtruth.csv"
annotations = "annotations.csv"
class Notes:
"""
at least four attributes other than initialization
0- loads the dataframe with comments from the same folder this class is located in
1- checks the state of progress is made it's uploaded and won't be lost.
2- prints one text (one comment) and takes the annotation input
3- iterates over the dataframe and uses #2
4- saves the progress made.
"""
def __init__(self):
self.dataframe = self.__dataloader__()
self.startingpoint, self.l, self.length = 0, [], 0
self.__progress__()
print(f"Starting analysis from comment {self.startingpoint + 1}")
self.ci = 0
self.evaluation()
self.__closing__()
#0
def __dataloader__(self):
try:
dataframe = pd.read_csv(file_name)
dataframe.rename(columns={'Unnamed: 0': 'User ID', '0': 'Comments'}, inplace=True)
print("Comments were loaded. First comment:")
print(dataframe.head(1))
return dataframe
except FileNotFoundError:
print(f'File "{file_name}" not found!')
#1
def __progress__(self):
try:
self.l = pd.read_csv(annotations)
self.startingpoint = self.l['Last comment checked'][0] + 1
self.length = len(self.l['User ID'])
# so that the starting point is one position after the last one evaluated.
except FileNotFoundError:
print(f"File {annotations} not found...\nCreating an empty dataframe to fill with annotations...")
result = pd.DataFrame()
result['Semantic evaluation'] = [] * self.dataframe.shape[0]
result['User ID'] = self.dataframe['User ID']
result['Last comment checked'] = 0
#result.rename(columns={0:'Semantic evaluation'}, inplace=True)
print("Length of the annotation dataframe:", len(result))
self.length = len(result)
self.l = result
print(self.l.head())
print("Done.")
#2
def ground_truthing(self, text):
pp = pprint.PrettyPrinter(width=64, depth=1)
pp.pprint(text)
inp = str(input("\n\t\tSCORE -->"))
if inp not in ('1', '2', '3'):
if inp.strip() == 'exit':
return float("NaN")
if inp != 'exit':
print("Please insert a valid input: exit, or 1, 2, 3.")
inp = str(input("\n\t\tSCORE --> ")).strip()
if inp.strip() == 'exit':
return float("NaN")
print()
return inp
def evaluation(self):
print("\n\t\tPRESS 'exit' TO QUIT")
# ci means comment index
for ci in range(self.startingpoint, self.length):
print(f"{ci + 1} / {self.length}")
res = self.ground_truthing(self.dataframe['Comments'][ci])
if type(res) == float:
self.ci = ci
return
self.l.loc[ci,'Semantic evaluation'] = int(res)-2
self.l.loc[ci+1:, 'Semantic evaluation'] = float('NaN')
# when going from human using a keyboard to an intuitive
# the semantic evaluation goes from 0, 1, 2 to -1 0 +1
self.l.loc[0, 'Last comment checked'] = ci # this is the last comment checked
def __closing__(self):
print(f"Progress made: {self.ci/len(self.l)}%")
print("Saving annotations...")
self.l.to_csv("annotations.csv", sep=',', index=False, encoding="utf-8")
print(self.l.head(25))
print("Saved.")
Notes()