Skip to content

Commit 82c00e9

Browse files
committed
feat: update filters, pipeline, i18n translations and tests
1 parent 49c0f99 commit 82c00e9

7 files changed

Lines changed: 111 additions & 15 deletions

File tree

src/components/filters.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,14 @@ def render_page_filters(df: pd.DataFrame) -> pd.DataFrame:
2929
with c1:
3030
# 1. Tratamento de Réplicas
3131
method_options = {
32-
"media": "Média das Réplicas",
33-
"desdobrar": "Desdobrar em Linhas",
34-
"replica_1": "Réplica 1 Apenas",
35-
"replica_2": "Réplica 2 Apenas",
36-
"replica_3": "Réplica 3 Apenas (IAF)"
32+
"media": t("sidebar.rep.media", default="Média das Réplicas"),
33+
"mediana": t("sidebar.rep.mediana", default="Mediana das Réplicas"),
34+
"desdobrar": t("sidebar.rep.desdobrar", default="Desdobrar em Linhas"),
35+
"replica_1": t("sidebar.rep.replica_1", default="Réplica 1 Apenas"),
36+
"replica_2": t("sidebar.rep.replica_2", default="Réplica 2 Apenas"),
37+
"replica_3": t("sidebar.rep.replica_3", default="Réplica 3 Apenas (IAF)"),
3738
}
38-
39+
3940
current_method = st.session_state.get("rep_method", "media")
4041
method_keys = list(method_options.keys())
4142
method_idx = method_keys.index(current_method) if current_method in method_keys else 0
@@ -47,6 +48,8 @@ def render_page_filters(df: pd.DataFrame) -> pd.DataFrame:
4748
format_func=lambda x: method_options[x],
4849
key="filter_rep_method"
4950
)
51+
if selected_method == "mediana":
52+
st.caption(":information_source: " + t("sidebar.rep.mediana_note"))
5053

5154
# Se o método mudou, reprocessa o pipeline
5255
if selected_method != current_method:

src/i18n/locales/en.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,17 @@
506506
"comparative.hourly.download": "Download hourly series (CSV)",
507507
"sidebar.filters_title": "Global Filters",
508508
"sidebar.rep_method_label": "Replica Treatment",
509+
"sidebar.rep.media": "Mean of Replicates",
510+
"sidebar.rep.mediana": "Median of Replicates",
511+
"sidebar.rep.desdobrar": "Unfold into Rows",
512+
"sidebar.rep.replica_1": "Replicate 1 Only",
513+
"sidebar.rep.replica_2": "Replicate 2 Only",
514+
"sidebar.rep.replica_3": "Replicate 3 Only (IAF)",
515+
"sidebar.rep.mediana_note": "With n=2 replicates (Chl a, Chl b), the median is mathematically identical to the mean. The robustness gain only applies to IAF (n=3), where the median drops the most extreme of the three readings.",
516+
"pipeline.rep_grouped": "Grouped",
517+
"pipeline.rep_expanded": "Expanded",
518+
"pipeline.rep_method_label": "Change replicate treatment:",
519+
"pipeline.section_info": "Automated Physiology Cleaning Pipeline",
509520
"sidebar.filter_crop": "Crop",
510521
"sidebar.filter_mun": "Municipality",
511522
"sidebar.filter_faz": "Farm",

src/i18n/locales/es.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,17 @@
506506
"comparative.hourly.download": "Descargar serie horaria (CSV)",
507507
"sidebar.filters_title": "Filtros Globales",
508508
"sidebar.rep_method_label": "Tratamiento de Réplicas",
509+
"sidebar.rep.media": "Media de las Réplicas",
510+
"sidebar.rep.mediana": "Mediana de las Réplicas",
511+
"sidebar.rep.desdobrar": "Desdoblar en Filas",
512+
"sidebar.rep.replica_1": "Sólo Réplica 1",
513+
"sidebar.rep.replica_2": "Sólo Réplica 2",
514+
"sidebar.rep.replica_3": "Sólo Réplica 3 (IAF)",
515+
"sidebar.rep.mediana_note": "Con n=2 réplicas (Chl a, Chl b), la mediana es matemáticamente idéntica a la media. La ganancia de robustez aparece sólo en IAF (n=3), donde la mediana descarta el valor más extremo de las tres lecturas.",
516+
"pipeline.rep_grouped": "Agrupado",
517+
"pipeline.rep_expanded": "Expandido",
518+
"pipeline.rep_method_label": "Cambiar tratamiento de réplicas:",
519+
"pipeline.section_info": "Pipeline Automatizado de Limpieza de Fisiología",
509520
"sidebar.filter_crop": "Cultivo",
510521
"sidebar.filter_mun": "Municipio",
511522
"sidebar.filter_faz": "Hacienda",

src/i18n/locales/pt.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,17 @@
506506
"comparative.hourly.download": "Baixar série horária (CSV)",
507507
"sidebar.filters_title": "Filtros Globais",
508508
"sidebar.rep_method_label": "Tratamento de Réplicas",
509+
"sidebar.rep.media": "Média das Réplicas",
510+
"sidebar.rep.mediana": "Mediana das Réplicas",
511+
"sidebar.rep.desdobrar": "Desdobrar em Linhas",
512+
"sidebar.rep.replica_1": "Réplica 1 Apenas",
513+
"sidebar.rep.replica_2": "Réplica 2 Apenas",
514+
"sidebar.rep.replica_3": "Réplica 3 Apenas (IAF)",
515+
"sidebar.rep.mediana_note": "Com n=2 réplicas (Chl a, Chl b), a mediana é matematicamente idêntica à média. O ganho de robustez aparece apenas no IAF (n=3), onde a mediana descarta o valor mais extremo entre as três leituras.",
516+
"pipeline.rep_grouped": "Agrupado",
517+
"pipeline.rep_expanded": "Expandido",
518+
"pipeline.rep_method_label": "Alterar Tratamento de Réplicas:",
519+
"pipeline.section_info": "Pipeline de Limpeza Automatizada de Fisiologia",
509520
"sidebar.filter_crop": "Cultura",
510521
"sidebar.filter_mun": "Município",
511522
"sidebar.filter_faz": "Fazenda",

src/pages/pipeline.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,25 @@ def render():
3232

3333
# Exibe a opção de tratamento de réplicas também na página para conveniência
3434
method_options = {
35-
"media": "Média das Réplicas (Agrupado)",
36-
"desdobrar": "Desdobrar em Linhas (Expandido)",
37-
"replica_1": "Réplica 1 Apenas",
38-
"replica_2": "Réplica 2 Apenas",
39-
"replica_3": "Réplica 3 Apenas (IAF)"
35+
"media": t("sidebar.rep.media", default="Média das Réplicas") + " (" + t("pipeline.rep_grouped", default="Agrupado") + ")",
36+
"mediana": t("sidebar.rep.mediana", default="Mediana das Réplicas") + " (" + t("pipeline.rep_grouped", default="Agrupado") + ")",
37+
"desdobrar": t("sidebar.rep.desdobrar", default="Desdobrar em Linhas") + " (" + t("pipeline.rep_expanded", default="Expandido") + ")",
38+
"replica_1": t("sidebar.rep.replica_1", default="Réplica 1 Apenas"),
39+
"replica_2": t("sidebar.rep.replica_2", default="Réplica 2 Apenas"),
40+
"replica_3": t("sidebar.rep.replica_3", default="Réplica 3 Apenas (IAF)"),
4041
}
41-
42+
43+
method_keys = list(method_options.keys())
44+
method_idx = method_keys.index(rep_method) if rep_method in method_keys else 0
4245
selected_method = st.selectbox(
4346
t("pipeline.rep_method_label", default="Alterar Tratamento de Réplicas:"),
44-
options=list(method_options.keys()),
45-
index=list(method_options.keys()).index(rep_method),
47+
options=method_keys,
48+
index=method_idx,
4649
format_func=lambda x: method_options[x],
4750
key="pipeline_rep_method"
4851
)
52+
if selected_method == "mediana":
53+
st.caption(":information_source: " + t("sidebar.rep.mediana_note"))
4954

5055
# Se mudar na página, atualiza e re-executa
5156
if selected_method != rep_method:

src/pipeline.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,9 @@ def clean_fisiologia_data(
6464
"""Aplica o pipeline completo de limpeza e tratamento de réplicas de Fisiologia.
6565
6666
Modos de tratamento de réplicas (rep_method):
67-
- "media": Calcula a média entre as repetições disponíveis.
67+
- "media": Calcula a média aritmética entre as repetições disponíveis.
68+
- "mediana": Calcula a mediana entre as repetições (robusta a outliers; com
69+
n=2 réplicas é matematicamente idêntica à média).
6870
- "desdobrar": Desdobra as réplicas 1, 2 e 3 em linhas independentes.
6971
- "replica_1": Utiliza apenas as colunas da réplica 1.
7072
- "replica_2": Utiliza apenas as colunas da réplica 2.
@@ -147,6 +149,29 @@ def clean_fisiologia_data(
147149

148150
step_desc = "Consolidação de réplicas por média aritmética"
149151

152+
elif rep_method == "mediana":
153+
# Mediana das réplicas: mais robusta a outliers que a média.
154+
# Note: com n=2 (Chl a/b), mediana == média; o ganho aparece em IAF (n=3).
155+
# Os nomes das colunas de saída ficam idênticos ("Chl_a_media", "IAF_media",
156+
# etc.) para manter compatibilidade com filtros, EDA e modelagem; apenas o
157+
# cálculo subjacente muda.
158+
if c_chl_a or c_chl_a_1:
159+
out["Chl_a_media"] = out[[c for c in [c_chl_a, c_chl_a_1] if c]].median(axis=1)
160+
else:
161+
out["Chl_a_media"] = np.nan
162+
163+
if c_chl_b or c_chl_b_1:
164+
out["Chl_b_media"] = out[[c for c in [c_chl_b, c_chl_b_1] if c]].median(axis=1)
165+
else:
166+
out["Chl_b_media"] = np.nan
167+
168+
if c_iaf or c_iaf_1 or c_iaf_2:
169+
out["IAF_media"] = out[[c for c in [c_iaf, c_iaf_1, c_iaf_2] if c]].median(axis=1)
170+
else:
171+
out["IAF_media"] = np.nan
172+
173+
step_desc = "Consolidação de réplicas por mediana"
174+
150175
elif rep_method == "desdobrar":
151176
# Cria três dataframes independentes representando cada réplica
152177
dfs = []

tests/test_pipeline.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,36 @@ def test_clean_pipeline_replica_especifica(self):
108108
assert row2["Chl_a_media"] == 28.0 # valor de Chl a.1 (Replica 2)
109109
assert row2["IAF_media"] == 2.8
110110

111+
def test_clean_pipeline_mediana(self):
112+
df_clean, logs = clean_fisiologia_data(self.df_raw, rep_method="mediana")
113+
assert len(df_clean) == 1
114+
row = df_clean.iloc[0]
115+
# Chl a: median(30.0, 28.0) = 29.0 → idêntico à média com n=2
116+
assert row["Chl_a_media"] == pytest.approx(29.0)
117+
# Chl b: median(10.0, 8.0) = 9.0 → idêntico à média com n=2
118+
assert row["Chl_b_media"] == pytest.approx(9.0)
119+
# IAF: median(3.0, 2.8, 3.2) = 3.0 → coincide com a média neste caso
120+
assert row["IAF_media"] == pytest.approx(3.0)
121+
# Verifica que a etapa correta foi registrada
122+
assert any("mediana" in step.step.lower() for step in logs)
123+
124+
def test_mediana_robustness_with_outlier_iaf(self):
125+
# Cenário em que mediana e média divergem: outlier em IAF.2.
126+
df = pd.DataFrame({
127+
"Cultura": ["Soja"],
128+
"Uso atual": ["Perene"],
129+
"Época": ["Verão"],
130+
"A": [25.0],
131+
"IAF": [3.0],
132+
"IAF.1": [3.1],
133+
"IAF.2": [99.0], # leitura espúria (ex.: ceptômetro lendo o céu)
134+
})
135+
media_clean, _ = clean_fisiologia_data(df, rep_method="media")
136+
mediana_clean, _ = clean_fisiologia_data(df, rep_method="mediana")
137+
# média puxa o resultado pelo outlier; mediana descarta-o
138+
assert media_clean.iloc[0]["IAF_media"] == pytest.approx((3.0 + 3.1 + 99.0) / 3)
139+
assert mediana_clean.iloc[0]["IAF_media"] == pytest.approx(3.1)
140+
111141

112142
class TestBuildStepReport:
113143
def test_percent_removed(self):

0 commit comments

Comments
 (0)