Skip to content

Commit 09ba9e6

Browse files
Add Pipe For Float Formatting (#587)
- Problem: MiXeR reported results with too many significant digits. Made the result tables feel cluttered. - Solution: add a pipe to round a column to a given number of significant digits. Use this pipe for MiXeR results.
1 parent 57ec3e2 commit 09ba9e6

4 files changed

Lines changed: 54 additions & 6 deletions

File tree

experiments/tralfamadorian97/for_documentation/bivariate_mixer_table_gen.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1+
from mecfs_bio.assets.gwas.me_cfs.decode_me.analysis.mixer.decode_me_univariate_mixer import DECODE_ME_UNIVARIATE_MIXER
12
from mecfs_bio.assets.gwas.multi_trait.polygenic_overlap.bivariate_mixer.mecfs_pain_bivariate_mixer import \
23
MECFS_PAIN_BIVARIATE_MIXER
4+
from mecfs_bio.assets.gwas.multisite_pain.johnston_et_al.analysis.mixer.johnston_et_al_univariate_mixer import \
5+
JOHNSTON_ET_AL_UNIVARIATE_MIXER
36
from mecfs_bio.figures.key_scripts.generate_figures import generate_figures
47

58

69
def go():
7-
generate_figures([MECFS_PAIN_BIVARIATE_MIXER.result_table_markdown_task])
10+
generate_figures([MECFS_PAIN_BIVARIATE_MIXER.result_table_markdown_task,
11+
DECODE_ME_UNIVARIATE_MIXER.result_markdown_table_task,
12+
JOHNSTON_ET_AL_UNIVARIATE_MIXER.result_markdown_table_task
13+
])
814

915

1016
if __name__ == '__main__':

experiments/tralfamadorian97/runs/mixer_runs.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,15 @@
1010

1111
def run_mixer():
1212
DEFAULT_RUNNER.run(
13-
# [
14-
# ]+DECODE_ME_UNIVARIATE_MIXER.terminal_tasks()
15-
# JOHNSTON_ET_AL_UNIVARIATE_MIXER.terminal_tasks(),
16-
MECFS_PAIN_BIVARIATE_MIXER.terminal_tasks(),
13+
[
14+
]+DECODE_ME_UNIVARIATE_MIXER.terminal_tasks()
15+
+JOHNSTON_ET_AL_UNIVARIATE_MIXER.terminal_tasks()
16+
+MECFS_PAIN_BIVARIATE_MIXER.terminal_tasks(),
1717
incremental_save=True,
1818
must_rebuild_transitive=[
19-
MECFS_PAIN_BIVARIATE_MIXER.results
19+
MECFS_PAIN_BIVARIATE_MIXER.results,
20+
JOHNSTON_ET_AL_UNIVARIATE_MIXER.results_task,
21+
DECODE_ME_UNIVARIATE_MIXER.results_task
2022
# DECODE_ME_UNIVARIATE_MIXER.combine_task
2123
],
2224
settings=TopologicalSchedulerSettings(

mecfs_bio/asset_generator/mixer_asset_generator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
)
4242
from mecfs_bio.build_system.task.pipes.composite_pipe import CompositePipe
4343
from mecfs_bio.build_system.task.pipes.drop_col_pipe import DropColPipe
44+
from mecfs_bio.build_system.task.pipes.format_numbers_pipe import FormatFloatNumbersPipe
4445
from mecfs_bio.build_system.task.pipes.heritability_conversion_pipe import (
4546
HeritabilityConversionPipe,
4647
)
@@ -152,6 +153,7 @@ def univariate_mixer_asset_generator(
152153
TransposePipe(),
153154
RenameColByPositionPipe(col_position=0, col_new_name="Parameter"),
154155
RenameColByPositionPipe(col_position=1, col_new_name="Value"),
156+
FormatFloatNumbersPipe(col="Value", format_str=".4g"),
155157
]
156158
),
157159
)
@@ -254,6 +256,7 @@ def bivariate_mixer_asset_generator(
254256
RenameColByPositionPipe(col_position=0, col_new_name="Parameter"),
255257
RenameColByPositionPipe(col_position=1, col_new_name="Value"),
256258
SelectColPipe(["Parameter", "Value"]),
259+
FormatFloatNumbersPipe(col="Value", format_str=".4g"),
257260
]
258261
),
259262
)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
"""
2+
Pipe to format a column of floating point values
3+
"""
4+
5+
import narwhals
6+
from attrs import frozen
7+
8+
from mecfs_bio.build_system.task.pipes.data_processing_pipe import DataProcessingPipe
9+
10+
11+
@frozen
12+
class FormatFloatNumbersPipe(DataProcessingPipe):
13+
"""
14+
Pipe to set the format for floating point values in a given column.
15+
"""
16+
17+
format_str: str
18+
col: str
19+
20+
def process(self, x: narwhals.LazyFrame) -> narwhals.LazyFrame:
21+
collected = x.collect().to_pandas()
22+
result_col = []
23+
for value in collected[self.col].tolist():
24+
if _convertible_to_float(value):
25+
result_col.append(f"{float(value):{self.format_str}}")
26+
else:
27+
result_col.append(value)
28+
collected[self.col] = result_col
29+
return narwhals.from_native(collected).lazy()
30+
31+
32+
def _convertible_to_float(val: str) -> bool:
33+
try:
34+
float(val)
35+
return True
36+
except (ValueError, TypeError):
37+
return False

0 commit comments

Comments
 (0)