-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_sensor_initial_states_mesh_h0.03_hstep0.00025_hstart0.035.slurm
More file actions
209 lines (186 loc) · 9.63 KB
/
Copy pathrun_sensor_initial_states_mesh_h0.03_hstep0.00025_hstart0.035.slurm
File metadata and controls
209 lines (186 loc) · 9.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/bin/bash
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --mem=6G # MB
#SBATCH --time=1-12 # MINUTES, DAYS-HOURS
#SBATCH --job-name=MaMMoS_min_case-b_benchmark2
#
# Number of GPUs
#SBATCH --gres=gpu:1
#SBATCH --nodelist=La
#
#SBATCH --export=NONE
# By default all environment variables of the shell invoking the sbatch command are propagated.
# This may cause unexpected behaviour as for example $HOME used in this script might be different
# from the expected /home/<username>. Also consider $PATH, $OCL* or $CUDA* variables. I recommend
# to set --export=NONE to avoid the propagation.
# Show some information
START_TIME=$(date +%s)
echo "START TIME:" $(date -u +"%Y-%m-%dT%H:%M:%SZ") "($START_TIME)"
SIMDIR=$HOME/slurm_$SLURM_JOB_ID
echo "JOBNAME:" $SLURM_JOB_NAME
echo "PARTITION:" $SLURM_JOB_PARTITION
echo "SUBMIT HOST:" $SLURM_SUBMIT_HOST
echo "ALLOC. NODES:" $SLURMD_NODENAME
echo "SUBMIT DIR:" $SLURM_SUBMIT_DIR
echo "SIMDIR:" $SIMDIR
# ============================================================================
# SIMULATION PARAMETERS
# ============================================================================
# Set mesh size (h) and hstep values
# When using backup files for the initial state, make sure they exist for the chosen parameters
# available files are named like:
# - backup_mesh_sensor_mesh_h0p005_hstep0p00025.npz
# - backup_mesh_sensor_mesh_h0p004_hstep0p00045.npz
# - backup_mesh_sensor_mesh_h0p01_hstep0p00045.npz
# and backup_sensor_mesh_h0p005_hstep0p00025.0002.state.npz
# ============================================================================
# MESH_H has to match available initial state files !
MESH_H="0.03"
# HSTEP can be chosen freely
HSTEP="0.00025"
KL_air="10.0"
K_air="2.0"
# ============================================================================
# Set variables
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
export PIXI_PROJECT_ROOT=$SLURM_SUBMIT_DIR
# ============================================================================
# ENVIRONMENT CONFIGURATION
# ============================================================================
# Choose between pixi (default) or mamba (legacy) environment management
# Set USE_PIXI=true to use pixi environment (recommended)
# Set USE_PIXI=false to use the old mamba environment setup
# ============================================================================
USE_PIXI=true
if [ "$USE_PIXI" = false ]; then
# ========== MAMBA ENVIRONMENT SETUP (Legacy) ==========
# This section uses the original mamba-based approach to run simulations
# Requires: mamba/micromamba environment to be configured
export MAMBA_ROOT_PREFIX='/scandium/home/programs/micromamba/micromamba'
export MAMBA_EXE='/scandium/home/programs/micromamba/bin/micromamba'
ENV_NAME="mfree-mumag-gpu"
if [ ! -x "$MAMBA_EXE" ]; then
echo "ERROR: mamba executable not found at $MAMBA_EXE"
exit 1
fi
echo "ENVIRONMENT: Using Mamba environment '$ENV_NAME'"
echo "MAMBA EXECUTABLE: $MAMBA_EXE"
else
# ========== PIXI ENVIRONMENT SETUP (Recommended) ==========
# This section uses pixi for reproducible environment management
# Requires: pixi installed and pixi.toml in SLURM_SUBMIT_DIR
# Advantages: Better reproducibility, lighter weight, self-contained
# Determine the real home directory (handle cases where $HOME differs between compute nodes)
# This is important when home directories are mounted differently (e.g., /home vs /ceph/home)
REAL_HOME=$(eval echo ~$(whoami))
# Set path to pixi executable by checking multiple possible locations
# This makes the script portable across different systems and installations
PIXI_EXE=""
for pixi_path in \
"$(command -v pixi 2>/dev/null)" \
"$REAL_HOME/.pixi/bin/pixi" \
"$HOME/.pixi/bin/pixi" \
"$REAL_HOME/.local/bin/pixi" \
"$HOME/.local/bin/pixi" \
"/usr/local/bin/pixi" \
"/ceph/home/$(whoami)/.pixi/bin/pixi"
do
if [ -n "$pixi_path" ] && [ -f "$pixi_path" ]; then
PIXI_EXE="$pixi_path"
break
fi
done
if [ -z "$PIXI_EXE" ]; then
echo "ERROR: pixi executable not found!"
echo "Searched locations:"
echo " - command -v pixi"
echo " - $REAL_HOME/.pixi/bin/pixi"
echo " - $HOME/.pixi/bin/pixi"
echo " - $REAL_HOME/.local/bin/pixi"
echo " - $HOME/.local/bin/pixi"
echo " - /usr/local/bin/pixi"
echo " - /ceph/home/$(whoami)/.pixi/bin/pixi"
echo ""
echo "To fix this:"
echo " 1. Install pixi: curl -sSf https://pixi.sh | bash"
echo " 2. Or set USE_PIXI=false in this script to use mamba instead"
exit 1
fi
echo "ENVIRONMENT: Using Pixi with manifest at $SLURM_SUBMIT_DIR/pixi.toml"
echo "PIXI EXECUTABLE: $PIXI_EXE"
fi
# Create working directory
mkdir -p $SIMDIR
cd $SIMDIR
echo "WORKING DIR:" $PWD
cp -r $SLURM_SUBMIT_DIR/src .
mkdir -p examples/sensor_loop/
cp -r $SLURM_SUBMIT_DIR/examples/sensor_loop/sensor_loop_step_by_step.py ./examples/sensor_loop/
cp -r $SLURM_SUBMIT_DIR/examples/sensor_loop/sensor_loop_evaluation.py ./examples/sensor_loop/
# Create sensor_initial_state folder and copy ONLY the required backup files + any existing sensor.* files
mkdir -p examples/sensor_loop/sensor_initial_state
# echo "Copying backup files for initial state (h=${MESH_H}, hstep=${HSTEP_INI})..."
# cp $SLURM_SUBMIT_DIR/examples/sensor_loop/sensor_initial_state/backup_mesh_sensor_mesh_h${MESH_H}_hstep${HSTEP_INI}.npz ./examples/sensor_loop/sensor_initial_state/ 2>/dev/null && echo " ✓ Backup mesh copied" || echo " ⚠ Backup mesh not found (will be generated if needed)"
# cp $SLURM_SUBMIT_DIR/examples/sensor_loop/sensor_initial_state/backup_sensor_mesh_h${MESH_H}_hstep${HSTEP_INI}.0002.state.npz ./examples/sensor_loop/sensor_initial_state/ 2>/dev/null && echo " ✓ Backup state copied" || echo " ⚠ Backup state not found (will be computed if needed)"
# Also copy any existing sensor.* files from initial state dir (config and mesh files)
echo "Copying existing sensor.* files from initial state directory..."
for file in $SLURM_SUBMIT_DIR/examples/sensor_loop/sensor_initial_state/sensor.*; do
if [ -f "$file" ]; then
filename=$(basename "$file")
# Skip old state files and .dat files (these will be regenerated)
if [[ ! "$filename" =~ \.state\.npz$ ]] && [[ "$filename" != "sensor.dat" ]]; then
cp "$file" ./examples/sensor_loop/sensor_initial_state/
echo " ✓ Copied $filename"
fi
fi
done
# # Copy case folders (only sensor.* files: .p2, .krn, .npz, excluding old output states and .dat)
# echo "Preparing case directories..."
# for CASE_DIR in sensor_case-a_down sensor_case-a_up sensor_case-b_down sensor_case-b_up sensor_case-c_down sensor_case-c_up; do
# mkdir -p examples/sensor_loop/$CASE_DIR
# # Copy all sensor.* files (configuration and mesh), but skip old output files
# # Include: sensor.p2, sensor.krn, sensor.npz
# # Exclude: sensor.*.state.npz (old outputs), sensor.dat (old outputs)
# for file in $SLURM_SUBMIT_DIR/examples/sensor_loop/$CASE_DIR/sensor.*; do
# if [ -f "$file" ]; then
# filename=$(basename "$file")
# # Skip state files and .dat files (these are old outputs to be regenerated)
# if [[ ! "$filename" =~ \.state\.npz$ ]] && [[ "$filename" != "sensor.dat" ]]; then
# cp "$file" ./examples/sensor_loop/$CASE_DIR/
# fi
# fi
# done
# done
# ============================================================================
# RUN SIMULATIONS
# ============================================================================
echo "Before RUNNING SIMULATION : sensor loop step by step.py"
echo "SIMULATION PARAMETERS: MESH_H=$MESH_H, HSTEP=$HSTEP"
if [ "$USE_PIXI" = false ]; then
$MAMBA_EXE run -n $ENV_NAME python -u ./examples/sensor_loop/sensor_loop_step_by_step.py --mesh-h $MESH_H --hstep $HSTEP --only-compute-initial-state --KL $KL_air --K $K_air
else
find examples/sensor_loop/ -type f -name '*.p2' -exec grep -Hn -i -E 'h[_ ]?step' {} \;
find examples/sensor_loop/sensor_initial_state -type f -name '*.npz'
$PIXI_EXE run --manifest-path $SLURM_SUBMIT_DIR/pixi.toml python -u ./examples/sensor_loop/sensor_loop_step_by_step.py --mesh-h $MESH_H --hstep $HSTEP --only-compute-initial-state --KL $KL_air --K $K_air
find examples/sensor_loop/sensor_initial_state -type f -name '*.npz'
cp ./examples/sensor_loop/sensor_initial_state/sensor.0002.state.npz $SLURM_SUBMIT_DIR/examples/sensor_loop/sensor_initial_state/backup_sensor_mesh_h${MESH_H}_hstep${HSTEP}.0002.state.npz
find examples/sensor_loop/sensor_initial_state -type f -name '*.vtu'
cp ./examples/sensor_loop/sensor_initial_state/sensor.0002.vtu $SLURM_SUBMIT_DIR/examples/sensor_loop/sensor_initial_state/backup_sensor_mesh_h${MESH_H}_hstep${HSTEP}.0002.vtu
cp ./examples/sensor_loop/sensor_initial_state/sensor.npz $SLURM_SUBMIT_DIR/examples/sensor_loop/sensor_initial_state/backup_mesh_sensor_mesh_h${MESH_H}_hstep${HSTEP}.npz
find examples/sensor_loop/ -type f -name '*.p2' -exec grep -Hn -i -E 'h[_ ]?step' {} \;
fi
echo "After RUNNING SIMULATION : sensor loop step by step.py"
# ============================================================================
# clean up, remove working directory
cd ..
rm -r $SIMDIR
# Show end time
END_TIME=$(date +%s)
echo "END TIME:" $(date -u +"%Y-%m-%dT%H:%M:%SZ") "($END_TIME)"
ELAPSED=$((END_TIME - START_TIME))
HOURS=$((ELAPSED / 3600))
MINUTES=$(((ELAPSED % 3600) / 60))
SECONDS=$((ELAPSED % 60))
echo "ELAPSED TIME: ${HOURS}h ${MINUTES}m ${SECONDS}s"