-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmolecule_extractor.py
More file actions
81 lines (66 loc) · 1.95 KB
/
Copy pathmolecule_extractor.py
File metadata and controls
81 lines (66 loc) · 1.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Copyright © 2020 Brian Pomerantz
import sys
def chain_name():
name_dict = {}
if len(sys.argv) < 3:
return None
f = open(sys.argv[2], 'r')
lines = f.readlines()
f.close()
for line in lines:
ll = line.strip().split(',')
name_dict[ll[1]] = ll[0]
return name_dict
def main():
f = open(sys.argv[1], 'r')
lines = f.readlines()
f.close()
chain_name_dict = chain_name()
data = {}
for line in lines:
if 'ATOM' not in line:
continue
ll = line.split()
mol_desi = ll[16]
nucleotide = len(ll[5]) == 1 and ll[5] in ['A', 'C', 'G', 'U']
if mol_desi not in data:
data[mol_desi] = [set(), nucleotide]
data[mol_desi][0].add(int(ll[8]))
for d in data:
ll_out = list(data[d][0])
ll_out.sort()
if chain_name_dict is None:
print(f'{d}:\t{len(ll_out)}\t', end='')
else:
print(f'{chain_name_dict[d]}:\t{len(ll_out)}\t', end='')
if data[d][1]:
previous = -10
in_dash = False
first = True
for n in ll_out:
if n - previous == 1:
if in_dash:
previous = n
continue
else:
print('-', end='')
in_dash = True
else:
if in_dash:
print(f'{previous}, {n}', end='')
in_dash = False
else:
if first:
print(f'{n}', end='')
first = False
else:
print(f', {n}', end='')
previous = n
if in_dash:
print(previous)
else:
print()
else:
print()
if __name__ == '__main__':
main()