-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmake_pres_abs.py
More file actions
66 lines (51 loc) · 1.52 KB
/
make_pres_abs.py
File metadata and controls
66 lines (51 loc) · 1.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/python3
import sys
import json
from collections import defaultdict
######
#
# USAGE:
# make_pres_abs.py results.txt ids_to_names.tsv
#
######
dict_bins = {}
dict_proteins = defaultdict(list)
# Read file and save into dictionaries what enzyme is present in what bin
file = sys.argv[1]
file_standardized_results = open(file,"r")
while True:
l = file_standardized_results.readline()
if not l: break
else:
fields = l.strip().split("\t")
bin_name = fields[0]
pro_name = fields[8]
dict_proteins[pro_name].append(bin_name)
dict_bins[bin_name] = 0
file_standardized_results.close()
# Read file with ids to names to print IDs os proteins
dict_id_to_name = {}
file2 = sys.argv[2]
file_ref_ids = open(file2,"r")
while True:
l = file_ref_ids.readline()
#print(l)
if not l: break
else:
fields = l.strip().split("\t")
dict_id_to_name[fields[1]] = fields[0]
file_ref_ids.close()
#print(json.dumps(dict_id_to_name, indent = 4))
#quit()
# For each enzyme, check if it was found in the current bin or not
pres_abs_df_rows = defaultdict(list)
for enzyme,bin in dict_proteins.items():
for bin_name in dict_bins.keys():
if(bin_name in bin):
pres_abs_df_rows[enzyme].append(1)
else:
pres_abs_df_rows[enzyme].append(0)
print('Specie', ';'.join(str(bin_name) for bin_name in dict_bins.keys()), sep=";")
for k,v in pres_abs_df_rows.items():
# print(k, ';'.join(str(x) for x in v), sep=";")
print(dict_id_to_name[k], ';'.join(str(x) for x in v), sep=";")