-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdiff_plotting_chunks.py
More file actions
131 lines (107 loc) · 5.13 KB
/
diff_plotting_chunks.py
File metadata and controls
131 lines (107 loc) · 5.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# -*- coding: utf-8 -*-
"""
This script calculates and plots the Mean Squared Displacement (MSD) for different protein simulations
from various force fields. It reads data from multiple files, processes it, and generates a 3x2 subplot
grid to compare the MSD over time for each force field. The output is a high-resolution figure
suitable for academic publications.
"""
import os
import numpy as np
import matplotlib.pyplot as plt
# --- Plotting Style Configuration ---
# Set global font sizes for better readability in publications.
plt.rcParams['font.size'] = 14
plt.rcParams['axes.labelsize'] = 16
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14
# --- End of Configuration ---
# List of folders containing the simulation data for different force fields.
folder_paths = [
"/ceph/users/akv16273/34_Elcock_13abundant_inEColi/21_Analysis_Datastructure/20_AMBER/",
"/ceph/users/akv16273/34_Elcock_13abundant_inEColi/21_Analysis_Datastructure/12_AA/1_monomer/",
"/ceph/users/akv16273/34_Elcock_13abundant_inEColi/21_Analysis_Datastructure/13_SIRAH/",
"/ceph/users/akv16273/34_Elcock_13abundant_inEColi/21_Analysis_Datastructure/9_Martini3/3_303K/",
"/ceph/users/akv16273/34_Elcock_13abundant_inEColi/21_Analysis_Datastructure/10_Martini2_nonPW/3_303K/",
"/ceph/users/akv16273/34_Elcock_13abundant_inEColi/21_Analysis_Datastructure/11_Martini2_PW/3_303K/"
]
# Specific force field names for plot titles as requested
ff_names = ['AMBER', 'CHARMM', 'SIRAH hybrid', 'M3', 'M2', 'M2PW']
def parse_file(filename):
"""
Parses a single data file to extract time (tau) and MSD values.
Args:
filename (str): The path to the input file.
Returns:
tuple: A tuple containing two lists, tau (time) and msd (Mean Squared Displacement).
"""
tau = []
msd = []
with open(filename, 'r') as file:
lines = file.readlines()
for line in lines:
if line.startswith("#") or line.startswith("@"):
continue
data = line.split()
if len(data) >= 2:
tau.append(float(data[0]))
msd.append(float(data[1]))
return tau, msd
# Create a 2x3 subplot grid with a more square aspect ratio for each plot
fig, axes = plt.subplots(2, 3, figsize=(18, 10), sharex=False, sharey=False)
axes = axes.flatten() # Flatten the 2D array of axes to easily iterate over it
# Loop through each folder path and its corresponding subplot
for i, folder_path in enumerate(folder_paths):
ax = axes[i]
time = None
all_msd_data = []
# This block attempts to find data files. It will only work if the script
# is run in an environment where the paths are accessible.
try:
# Check for "Take_" subdirectories
takes = [os.path.join(folder_path, d) for d in os.listdir(folder_path) if
os.path.isdir(os.path.join(folder_path, d)) and 'Take_' in d]
if not takes: # If no "Take_" folders, assume data is directly in the folder_path
takes = [folder_path]
for take_path in takes:
# Generate the list of filenames
filenames = []
if os.path.exists(take_path):
for f in os.listdir(take_path):
if f.startswith('diff_RbsB_SpeA_FkpA_') and f.endswith('.xvg'):
filenames.append(os.path.join(take_path, f))
for filename in filenames:
if os.path.exists(filename):
tau, msd = parse_file(filename)
if time is None and tau:
time = np.array(tau) / 1000 # Convert ns to microseconds
if tau and len(time) == len(msd):
all_msd_data.append(msd)
else:
if tau: # Only print warning if tau has data
print(f"Warning: Length mismatch in file {filename} (time: {len(time)}, msd: {len(msd)})")
else:
print(f"Warning: File not found {filename}")
except FileNotFoundError:
print(f"Directory not found: {folder_path}. This subplot will be empty.")
# Plotting the data for the current force field
if time is not None and all_msd_data:
for msd in all_msd_data:
ax.plot(time, msd)
# Highlight the region from 0.02 to 0.04 microseconds
ax.axvspan(0.02, 0.04, color='gray', alpha=0.3)
# Set title for the subplot with larger, bold font
ax.set_title(ff_names[i], fontsize=18, fontweight='bold')
# Set common labels (size is now controlled by rcParams)
ax.set_xlabel(r'$\tau$ ($\mu$s)')
ax.set_ylabel('MSD (nm$^2$)')
ax.set_xlim(0, 0.1)
ax.set_ylim(0, 0.09)
# Overall title for the entire figure with larger, bold font
# fig.suptitle('MSD Comparison Across Different Force Fields', fontsize=22, fontweight='bold')
# Adjust layout to prevent titles and labels from overlapping
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
# Save the plot to a file with high resolution and the new filename
plt.savefig('msd_subplots_lines_all_reps_andFFs.png', dpi=600)
# Show the plot
plt.show()
print("Plot saved as msd_subplots_lines_all_reps_andFFs.png")