Skip to content

Commit ac41e44

Browse files
committed
Merge branch 'main' of github.com:sidratresearch/rail_tpz into sidrat-staging
2 parents cd53fd6 + 82e8ae7 commit ac41e44

4 files changed

Lines changed: 43 additions & 43 deletions

File tree

examples/TPZ_example_notebook.ipynb

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"# TPZ: Trees for Photo-Z's\n",
99
"\n",
1010
"Author: Sam Schmidt <br>\n",
11-
"Last successfully run: March 24, 2025 <br>\n",
11+
"Last successfully run: December 11, 2025 <br>\n",
1212
"\n",
1313
"TPZ is one of the codes implemented in the MLZ (Machine Learning PhotoZ) package by Matias Carraso-Kind, some documentation for the algorithm is included in Matias' website for the package:\n",
1414
"http://matias-ck.com/mlz/\n",
@@ -137,11 +137,11 @@
137137
"\n",
138138
"The other configuration parameters for TPZ are:\n",
139139
"- `seed` (int): the random seed used by numpy for this stage <br>\n",
140-
"- `nrandom` (int): the number of random training catalogs with Gaussian scatter to create. <br>\n",
141-
"- `ntrees` (int): the number of bootstrap samples for a given random catalog to create. <br>\n",
142-
"REMINDER: the total number of trees trained will be `nrandom` * `ntrees`, and if `nrandom` is set to 1, then no random catalogs are created, only the original training sample is used.<br>\n",
143-
"- `minleaf` (int): the mininum number of galaxies in a terminal leaf. <br>\n",
144-
"- `natt` (int): the number of attributes to split. <br>\n",
140+
"- `n_random` (int): the number of random training catalogs with Gaussian scatter to create. <br>\n",
141+
"- `n_trees` (int): the number of bootstrap samples for a given random catalog to create. <br>\n",
142+
"REMINDER: the total number of trees trained will be `n_random` * `n_trees`, and if `n_random` is set to 1, then no random catalogs are created, only the original training sample is used.<br>\n",
143+
"- `min_leaf` (int): the mininum number of galaxies in a terminal leaf. <br>\n",
144+
"- `n_att` (int): the number of attributes to split. <br>\n",
145145
"- `sigmafactor` (float): Gaussian smoothing with kernel Sigma1*Resolution. <br>\n",
146146
"- `rmsfactor` (float): MS for zconf calculation. <br>\n",
147147
"- `tree_strategy` (string): see paragraph below.<br>\n",
@@ -197,8 +197,8 @@
197197
" err_bands=error_list,\n",
198198
" hdf5_groupname='photometry',\n",
199199
" err_dict=new_err_dict,\n",
200-
" nrandom=3, \n",
201-
" ntrees=5,\n",
200+
" n_random=3, \n",
201+
" n_trees=5,\n",
202202
" #tree_strategy='native') # uncomment this line and comment out the line below to switch to using \"native\" trees \n",
203203
" tree_strategy='sklearn')"
204204
]
@@ -208,7 +208,7 @@
208208
"id": "638f6f40-e60d-47f4-b639-6bb4560b1631",
209209
"metadata": {},
210210
"source": [
211-
"Now, lets create our stage and run `inform`. We specified `nrandom = 3` and `ntrees = 5`, so we will get 15 trained trees that constitute our model. For our 10k training galaxy sample this takes about 0.5 seconds for \"sklearn\", or about 90 seconds using \"native\" on my Mac desktop for a rough guide for how long this should take to train:"
211+
"Now, lets create our stage and run `inform`. We specified `n_random = 3` and `n_trees = 5`, so we will get 15 trained trees that constitute our model. For our 10k training galaxy sample this takes about 0.5 seconds for \"sklearn\", or about 90 seconds using \"native\" on my Mac desktop for a rough guide for how long this should take to train:"
212212
]
213213
},
214214
{
@@ -387,7 +387,7 @@
387387
"metadata": {},
388388
"outputs": [],
389389
"source": [
390-
"which=5355\n",
390+
"which=7187\n",
391391
"fig, axs = plt.subplots()\n",
392392
"results().plot_native(key=which,axes=axs, label=f\"PDF for galaxy {which}\")\n",
393393
"axs.axvline(sz[which],c='r',ls='--', label=\"true redshift\")\n",
@@ -428,7 +428,7 @@
428428
"name": "python",
429429
"nbconvert_exporter": "python",
430430
"pygments_lexer": "ipython3",
431-
"version": "3.10.13"
431+
"version": "3.12.12"
432432
}
433433
},
434434
"nbformat": 4,

src/rail/estimation/algos/mlz_utils/data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def make_random(self, ntimes=-1):
287287
:param int ntimes: taken from class Pars unless otherwise indicated
288288
"""
289289
if ntimes == -1:
290-
ntimes = int(self.Pars.nrandom)
290+
ntimes = int(self.Pars.n_random)
291291
# if outfileran == '': outfileran = self.Pars.randomcatname
292292
self.BigRan = create_random_realizations(
293293
self.AT, self.cat, ntimes, self.Pars.keyatt, self.rng

src/rail/estimation/algos/tpz_lite.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -99,16 +99,16 @@ class TPZliteInformer(CatInformer):
9999
data to train up a set of decision trees that are then stored
100100
as a pickled model file for use by the Estimator stage.
101101
102-
ntrees controls how many bootstrap realizations are created from a
102+
n_trees controls how many bootstrap realizations are created from a
103103
single catalog realization to train one tree.
104104
nransom controls how many catalog realizations are created. Each
105105
random catalog consists of adding Gaussian scatter to each attribute
106106
based on its associated error column. If the error column `eind` is
107107
-1 then a small error of 0.00005 is hardcoded into TPZ. The key
108108
attribute is not included in this random catalog creation.
109109
110-
So, a total of nrandom*ntrees trees are trained and stored in the
111-
final model i.e. if nrandom=3 and ntrees=5 then 15 total trees
110+
So, a total of n_random*n_trees trees are trained and stored in the
111+
final model i.e. if n_random=3 and n_trees=5 then 15 total trees
112112
are trained and stored.
113113
"""
114114

@@ -129,12 +129,12 @@ class TPZliteInformer(CatInformer):
129129
# use_atts=Param(list, def_train_atts,
130130
# msg="attributes to use in training trees"),
131131
err_dict=SHARED_PARAMS,
132-
nrandom=Param(
132+
n_random=Param(
133133
int, 8, msg="number of random bootstrap samples of training data to create"
134134
),
135-
ntrees=Param(int, 5, msg="number of trees to create"),
136-
minleaf=Param(int, 5, msg="minimum number in terminal leaf"),
137-
natt=Param(int, 3, msg="number of attributes to split for TPZ"),
135+
n_trees=Param(int, 5, msg="number of trees to create"),
136+
min_leaf=Param(int, 5, msg="minimum number in terminal leaf"),
137+
n_att=Param(int, 3, msg="number of attributes to split for TPZ"),
138138
sigmafactor=Param(
139139
float, 3.0, msg="Gaussian smoothing with kernel Sigma1*Resolution"
140140
),
@@ -240,12 +240,12 @@ def run(self):
240240
#####
241241
# make random data
242242
# So make_random takes the error columns and just adds Gaussian scatter to the input (or 0.00005 if no error supplied)
243-
# it saves `nrandom` copies of this in a dictionary for each attribute for each galaxy
243+
# it saves `n_random` copies of this in a dictionary for each attribute for each galaxy
244244
# not how I would have done things, but we're keeping it to try to duplicate MLZ's code exactly.
245-
if self.config.nrandom > 1:
245+
if self.config.n_random > 1:
246246
if self._rank == 0:
247-
print(f"creating {self.config.nrandom} random realizations...")
248-
traindata.make_random(ntimes=int(self.config.nrandom))
247+
print(f"creating {self.config.n_random} random realizations...")
248+
traindata.make_random(ntimes=int(self.config.n_random))
249249
temprandos = traindata.BigRan
250250
else: # pragma: no cover
251251
temprandos = None
@@ -255,17 +255,17 @@ def run(self):
255255
# Matias writes out randoms from make_random for rank=0, then reads them all back in from file so that all ranks have access,
256256
# that seems slow so, instead, let's just assign them here (after broadcasting to all):
257257
if self._parallel == MPI_PARALLEL:
258-
if self.config.nrandom > 1:
258+
if self.config.n_random > 1:
259259
temprandos = self._comm.bcast(temprandos, root=0)
260-
if self.config.nrandom > 1:
260+
if self.config.n_random > 1:
261261
traindata.BigRan = temprandos
262262
if self._parallel == MPI_PARALLEL:
263263
self._comm.Barrier()
264264

265-
ntot = int(self.config.nrandom * self.config.ntrees)
265+
ntot = int(self.config.n_random * self.config.n_trees)
266266
if self._rank == 0:
267267
print(
268-
f"making a total of {ntot} trees for {self.config.nrandom} random realizations * {self.config.ntrees} bootstraps"
268+
f"making a total of {ntot} trees for {self.config.n_random} random realizations * {self.config.n_trees} bootstraps"
269269
)
270270

271271
zfine, zfine2, resz, resz2, wzin = analysis.get_zbins(self.config)
@@ -287,9 +287,9 @@ def run(self):
287287
self._comm.Barrier()
288288
# copy some stuff from the runMLZ script:
289289
for kss in range(s0, s1):
290-
print(f"making {kss+1} of {ntot}...")
291-
if self.config.nrandom > 1:
292-
ir = kss // int(self.config.ntrees)
290+
print(f"making {kss + 1} of {ntot}...")
291+
if self.config.n_random > 1:
292+
ir = kss // int(self.config.n_trees)
293293
if ir != 0:
294294
traindata.newcat(ir)
295295
DD = "all"
@@ -300,16 +300,16 @@ def run(self):
300300
traindata.X,
301301
traindata.Y,
302302
forest="yes",
303-
minleaf=int(self.config.minleaf),
304-
mstar=int(self.config.natt),
303+
minleaf=int(self.config.min_leaf),
304+
mstar=int(self.config.n_att),
305305
dict_dim=DD,
306306
)
307307
elif self.config.tree_strategy == "sklearn":
308308
randx = rng.integers(low=0, high=25000, size=1)[0]
309309
T = DecisionTreeRegressor(
310310
random_state=randx,
311-
min_samples_leaf=self.config.minleaf,
312-
max_features=int(self.config.natt),
311+
min_samples_leaf=self.config.min_leaf,
312+
max_features=int(self.config.n_att),
313313
)
314314
T.fit(traindata.X, traindata.Y)
315315
else: # pragma: no cover already tested above
@@ -341,10 +341,10 @@ def run(self):
341341
redshift_col=self.config.redshift_col,
342342
att_dict=train_att_dict,
343343
keyatt=self.config.keyatt,
344-
nrandom=self.config.nrandom,
345-
ntrees=self.config.ntrees,
346-
minleaf=self.config.minleaf,
347-
natt=self.config.natt,
344+
n_random=self.config.n_random,
345+
n_trees=self.config.n_trees,
346+
min_leaf=self.config.min_leaf,
347+
n_att=self.config.n_att,
348348
sigmafactor=self.config.sigmafactor,
349349
bands=self.config.bands,
350350
rmsfactor=self.config.rmsfactor,
@@ -415,7 +415,7 @@ def _process_chunk(self, start, end, inputdata, first):
415415
test_att_dict = make_index_dict(self.config.err_dict, testkeys)
416416
zfine, zfine2, resz, resz2, wzin = analysis.get_zbins(self.attPars)
417417
zfine2 = zfine2[wzin]
418-
ntot = int(self.attPars.nrandom * self.attPars.ntrees)
418+
ntot = int(self.attPars.n_random * self.attPars.n_trees)
419419

420420
Ng_temp = np.array(list(inputdata.values()))
421421
# Ng = np.array(Ng_temp, 'i')

tests/test_tpz.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
def test_tpz_larger_training(treestrat, nrand):
1515
train_config_dict = {
1616
"hdf5_groupname": "photometry",
17-
"nrandom": nrand,
18-
"ntrees": 5,
17+
"n_random": nrand,
18+
"n_trees": 5,
1919
"model": "tpz_tests.pkl",
2020
"tree_strategy": treestrat,
2121
}
@@ -88,8 +88,8 @@ def test_tpz_input_data_format():
8888
nrand = 1
8989
train_config_dict = {
9090
"hdf5_groupname": "",
91-
"nrandom": nrand,
92-
"ntrees": 5,
91+
"n_random": nrand,
92+
"n_trees": 5,
9393
"model": "tpz_tests.pkl",
9494
"tree_strategy": treestrat,
9595
}

0 commit comments

Comments
 (0)