-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsplit_binder.py
More file actions
80 lines (69 loc) · 2.46 KB
/
split_binder.py
File metadata and controls
80 lines (69 loc) · 2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import json
import glob
import argparse
import pandas
import binder.feature_extraction as feature_extraction
import binder.segmentation as segmentation
ALLOWED_EXTENSIONS = ["json"]
def get_files(src):
files = []
for extension in ALLOWED_EXTENSIONS:
ext_files = glob.glob(os.path.join(
src, "**/*." + extension), recursive=True)
files += ext_files
return files
def process_files(files, dst, overwrite):
for file in files:
_, file_name = os.path.split(file)
name, _ = os.path.splitext(file_name)
output_name = name + ".pkl"
output_path = os.path.join(dst, output_name)
features = None
if not os.path.exists(output_path) or overwrite:
features = feature_extraction.run(file)
features.to_pickle(output_path)
else:
features = pandas.read_pickle(output_path)
segments = segmentation.run(features)
print (file)
for segment in segments:
print (segment)
output_name = name + ".txt"
output_path = os.path.join(dst, output_name)
segments = "\n".join([", ".join(x) for x in segments])
open(output_path, "w").write(segments)
def run(src, dst, overwrite):
src = os.path.abspath(src)
if not os.path.exists(src):
raise Exception("Directory/File (%s) does not exist." % (src))
files = get_files(src)
if not files:
raise Exception("Found 0 files in %s" % (src))
dst = os.path.abspath(dst)
os.makedirs(dst, exist_ok=True)
process_files(files, dst, overwrite)
if __name__ == '__main__':
PARSER = argparse.ArgumentParser(
"Command line arguments for Information Extraction")
PARSER.add_argument("-s",
"--src",
type=str,
required=True,
dest="src",
help="Source directory of files")
PARSER.add_argument("-d",
"--dst",
type=str,
required=True,
dest="dst",
help="Destination directory")
PARSER.add_argument("-o",
"--overwrite",
type=int,
choices=[0, 1],
default=1,
dest="overwrite",
help="Overwrite files")
FLAGS = PARSER.parse_args()
run(**vars(FLAGS))