-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathopening_angle_cont_plot_grouped_ButForCrowded.py
More file actions
209 lines (169 loc) · 8.23 KB
/
opening_angle_cont_plot_grouped_ButForCrowded.py
File metadata and controls
209 lines (169 loc) · 8.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import os
import numpy as np
import matplotlib.pyplot as plt
import MDAnalysis as mda
"""
This script analyzes the "opening angle" of the RbsB protein from
molecular dynamics simulations, creating a collage plot that compares results
from different force fields.
This script is an adaptation of an older version, modified to work with a
new directory structure and produce a 1x3 collage plot.
Key Operations:
1. Navigates the new directory structure to find PDB and XTC files for
AMBER, CHARMM, and SIRAH force fields across multiple 'Take' runs.
2. Calculates the opening angle over time for each simulation using MDAnalysis.
3. Calculates a running average over a 0.05 µs (50 ns) window for each trajectory.
4. Generates a 1-row, 3-column collage plot ('RbsB_Opening_Angle_Collage_Averaged.png').
- Each column represents a force field.
- Each subplot shows the running average for all 5 individual 'Take' runs.
- A standard deviation envelope is overlaid on the 'Take' plots.
"""
# --- Configuration ---
# NOTE: Please update BASE_DIR to your actual data directory path.
BASE_DIR = "/ceph/users/akv16273/34_Elcock_13abundant_inEColi/21_Analysis_Datastructure/"
FORCE_FIELD_CONFIG = {
"20_AMBER": {"name": "AMBER", "col": 0},
"12_AA": {"name": "CHARMM", "col": 1},
"13_SIRAH": {"name": "SIRAH", "col": 2}
}
# --- Simulation & Analysis Parameters ---
PDB_FILE = "RbsB_backbone_reference.pdb"
XTC_FILE = "RbsB_backbone_fitted.xtc"
PRIMARY_SET = [("LEU", 37, "CA"), ("ASP", 264, "CA"), ("GLN", 160, "CA")]
FALLBACK_SET = [("LEU", 62, "CA"), ("ASP", 289, "CA"), ("GLN", 185, "CA")]
TAKE_COUNT = 5
AVG_WINDOW_US = 0.05 # The desired averaging window in microseconds (50 ns).
def calculate_angle(atom1, atom2, atom3):
"""Calculates the angle between three MDAnalysis atom objects."""
vec1 = atom1.position - atom2.position
vec2 = atom3.position - atom2.position
cos_theta = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
cos_theta = np.clip(cos_theta, -1.0, 1.0)
return np.degrees(np.arccos(cos_theta))
def running_average(data, window_size):
"""Calculates a running average of 1D data."""
if window_size <= 1: return data
return np.convolve(data, np.ones(window_size) / window_size, mode='valid')
def process_simulation(directory, pdb_file, xtc_file, primary_set, fallback_set):
"""Processes a single simulation trajectory to calculate opening angles."""
pdb_path = os.path.join(directory, pdb_file)
xtc_path = os.path.join(directory, xtc_file)
try:
u = mda.Universe(pdb_path, xtc_path)
protein = u.select_atoms("protein")
try:
atom1 = protein.select_atoms(
f"resname {primary_set[0][0]} and resid {primary_set[0][1]} and name {primary_set[0][2]}")[0]
atom2 = protein.select_atoms(
f"resname {primary_set[1][0]} and resid {primary_set[1][1]} and name {primary_set[1][2]}")[0]
atom3 = protein.select_atoms(
f"resname {primary_set[2][0]} and resid {primary_set[2][1]} and name {primary_set[2][2]}")[0]
except IndexError:
print(f" -> Primary atom set not found. Trying fallback.")
atom1 = protein.select_atoms(
f"resname {fallback_set[0][0]} and resid {fallback_set[0][1]} and name {fallback_set[0][2]}")[0]
atom2 = protein.select_atoms(
f"resname {fallback_set[1][0]} and resid {fallback_set[1][1]} and name {fallback_set[1][2]}")[0]
atom3 = protein.select_atoms(
f"resname {fallback_set[2][0]} and resid {fallback_set[2][1]} and name {fallback_set[2][2]}")[0]
# Convert time from picoseconds to microseconds
times = [ts.time / 1_000_000 for ts in u.trajectory]
angles = [calculate_angle(atom1, atom2, atom3) for ts in u.trajectory]
return np.array(times), np.array(angles)
except Exception as e:
print(f" -> ERROR processing files in {directory}: {e}")
return None, None
def plot_collage(all_results, window_size_frames):
"""Generates and saves the 1x3 collage plot."""
# Set sharey=False to ensure all plots get y-axis numbers.
# Adjusted figsize to better accommodate the repeated y-axis labels.
fig, axes = plt.subplots(1, 3, figsize=(18, 3), sharey=False)
axes = axes.flatten()
reference_lines = [
(113.05, 'purple'),
(99.98, 'green'),
(94.04, 'orange'),
(66.44, 'red')
]
col_order = ["AMBER", "CHARMM", "SIRAH"]
for ax, field in zip(axes, col_order):
if field not in all_results:
ax.axis('off')
continue
all_takes_data = all_results[field]
all_avg_angles = []
avg_times = None
# Plot individual Take averages
for take, (times, angles) in all_takes_data.items():
running_avg = running_average(angles, window_size_frames)
current_times = times[:len(running_avg)]
ax.plot(current_times, running_avg, linewidth=1.5)
all_avg_angles.append(running_avg)
if avg_times is None:
avg_times = current_times
# Plot std dev of the averaged curves
if all_avg_angles:
min_len = min(len(a) for a in all_avg_angles)
all_avg_angles = [a[:min_len] for a in all_avg_angles]
avg_times = avg_times[:min_len]
mean_curve = np.mean(all_avg_angles, axis=0)
std_curve = np.std(all_avg_angles, axis=0)
ax.fill_between(avg_times, mean_curve - std_curve, mean_curve + std_curve, color='gray', alpha=0.3)
for ref_angle, color in reference_lines:
ax.axhline(ref_angle, color=color, linestyle="--", linewidth=1.5)
ax.set_ylim(55, 125)
ax.set_xlim(0.0, 1.0)
ax.set_xlabel("Time (µs)", fontsize=18)
# Set the y-axis label for every subplot
ax.set_ylabel("Opening Angle (°)", fontsize=18)
ax.tick_params(axis='both', which='major', labelsize=14)
ax.grid(True, linestyle='-', alpha=0.7)
ax.set_box_aspect(1 / 1.5)
plt.tight_layout()
output_filename = "RbsB_Opening_Angle_Collage_Averaged.png"
plt.savefig(output_filename, dpi=600)
# Show the plot in a window after saving
plt.show()
# Close the plot object to free up memory
plt.close()
print(f"\nCollage plot saved as {output_filename}")
def main():
"""Main function to drive data processing and plotting."""
all_results = {}
time_per_frame_us = None
print("Starting analysis of RbsB Opening Angle...")
for ff_folder, config in FORCE_FIELD_CONFIG.items():
ff_name = config['name']
print(f"\nProcessing Force Field: {ff_name}")
current_path = os.path.join(BASE_DIR, ff_folder)
if ff_folder == "12_AA":
current_path = os.path.join(current_path, "1_monomer")
ff_takes_data = {}
for i in range(1, TAKE_COUNT + 1):
take_folder_name = f"Take_{i}"
take_path = os.path.join(current_path, take_folder_name)
print(f" Analyzing: {take_path}")
if os.path.isdir(take_path):
times, angles = process_simulation(take_path, PDB_FILE, XTC_FILE, PRIMARY_SET, FALLBACK_SET)
if angles is not None:
ff_takes_data[take_folder_name] = (times, angles)
if time_per_frame_us is None and len(times) > 1:
# Calculate time step in microseconds
time_per_frame_us = times[1] - times[0]
else:
print(f" -> Warning: Directory not found - {take_path}")
if ff_takes_data:
all_results[ff_name] = ff_takes_data
if not all_results:
print("\nNo data was processed. Cannot generate plot.")
return
# Calculate window size in frames for the averaging window
if time_per_frame_us and time_per_frame_us > 0:
window_size_frames = int(AVG_WINDOW_US / time_per_frame_us)
print(f"\nCalculated running average window: {window_size_frames} frames for {AVG_WINDOW_US} µs.")
else:
print("\nCould not determine time per frame. Using default window of 50 frames.")
window_size_frames = 50
plot_collage(all_results, window_size_frames)
if __name__ == "__main__":
main()