-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfolder_processor.py
More file actions
141 lines (113 loc) · 4.93 KB
/
folder_processor.py
File metadata and controls
141 lines (113 loc) · 4.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#%%
import os
import shutil
import re
from pathlib import Path
import pandas as pd
#%%
container_name = 'alpha3_24hsappet' # Update this path accordingly
day_name = '2025_10_14' # Update this path accordingly
common_file_names = ['a.osf'] # Copy these files to every folder automatically
main_dir = "/Volumes/tungsten/scratch/gfelsenb/Ana/2p-imaging/burak/"
exp_path = os.path.join(main_dir, container_name) # Update this path accordingly
day_folder_path = os.path.join(exp_path, day_name) # Update this path accordingly
day_folder = Path(day_folder_path)
if not day_folder.exists():
print(f"Error: Folder {day_folder_path} does not exist")
exit('Terminating script.')
#%%
# Pattern to match S1-T##### at the beginning of filename
pattern = re.compile(r'^(S1-T\d{5})')
# Valid extensions to process
valid_extensions = {'.osf', '.bin', '.ini', '.ach', '.lvd'}
# Get all files in the day folder
files = [f for f in day_folder.iterdir() if f.is_file()]
for file in files:
# Check if file has valid extension
if file.suffix.lower() not in valid_extensions:
print(f"Skipping {file.name}: Invalid extension {file.suffix}")
continue
# Extract identifier from filename
match = pattern.match(file.name)
if match:
identifier = match.group(1)
target_folder = day_folder / identifier
# Create target folder if it doesn't exist
if not target_folder.exists():
try:
target_folder.mkdir(parents=True, exist_ok=True)
print(f"Created folder: {target_folder}")
except Exception as e:
print(f"Error creating folder {target_folder}: {e}")
continue
# Check if target folder exists
if target_folder.exists() and target_folder.is_dir():
target_path = target_folder / file.name
try:
shutil.move(str(file), str(target_path))
print(f"Moved: {file.name} -> {identifier}/")
except Exception as e:
print(f"Error moving {file.name}: {e}")
else:
print(f"Warning: Target folder {identifier} does not exist for {file.name}")
else:
print(f"Warning: No valid identifier found in {file.name}")
# Get all the folders in the day folder
folders = [f for f in day_folder.iterdir() if f.is_dir()]
# Copy common files to each folder
for folder in folders:
for common_file in common_file_names:
source_file = day_folder / common_file
if source_file.exists():
# change the target file name to be the folder name + common file extension
target_file = folder / f"{folder.name}{source_file.suffix}"
try:
shutil.copy(str(source_file), str(target_file))
print(f"Copied {common_file} to {folder.name}/")
except Exception as e:
print(f"Error copying {common_file} to {folder.name}: {e}")
else:
print(f"Warning: Common file {common_file} not found in {day_folder}")
#%%
# Update database with new seriesIDs
print("\nUpdating database with new seriesIDs...")
# Find the database CSV file
database_files = list(Path(exp_path).glob("*_database.csv"))
database_files = [f for f in database_files if not f.name.startswith('.')]
if not database_files:
print(f"Error: No database CSV file found in {exp_path}")
else:
if len(database_files) > 1:
print(f"Warning: Multiple database files found. Using {database_files[0].name}")
database_path = database_files[0]
# Read existing database
df = pd.read_csv(database_path)
# Get existing seriesIDs
if 'seriesID' not in df.columns:
print("Error: 'seriesID' column not found in database")
else:
existing_series = set(df['seriesID'].dropna().astype(str))
# Pattern to match S1-T##### folder names
series_pattern = re.compile(r'^S1-T\d{5}$')
# Find all series folders in the day folder
series_folders = [f for f in day_folder.iterdir()
if f.is_dir() and series_pattern.match(f.name)]
# Find new seriesIDs
new_series = []
for folder in series_folders:
series_id = folder.name
if series_id not in existing_series:
new_series.append(series_id)
if not new_series:
print("No new seriesIDs found to add to database")
else:
# Create new rows for the new seriesIDs
new_rows = pd.DataFrame({'seriesID': new_series})
# Append to existing dataframe
df_updated = pd.concat([df, new_rows], ignore_index=True)
# Save updated database
df_updated.to_csv(database_path, index=False)
print(f"Added {len(new_series)} new seriesIDs to database:")
for series_id in new_series:
print(f" - {series_id}")
#%%