Skip to content

Commit 85bf75b

Browse files
committed
Adds display date columns to mapped output dataset. Simplifies logic for generating currency conversion year.
1 parent 5c195a6 commit 85bf75b

2 files changed

Lines changed: 58 additions & 33 deletions

File tree

services/map/src/map_clean/main.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,46 @@ def main(input_fpath: str, output_fpath: str, logger: logging.Logger) -> None:
4848
logger.info("Dropping NBIM records.")
4949
clean_df = clean_df.query("source != 'NBIM'")
5050

51+
# Define local function to generate display date
52+
def get_display_date(row: pd.Series) -> tuple[str, str]:
53+
"""Determines the project date to display on the website.
54+
55+
Args:
56+
row: A row of data from the DataFrame.
57+
58+
Returns:
59+
A two-item tuple consisting of the date field name
60+
(e.g., "date_signed", "date_effective") and
61+
value (e.g., "2023-04-19") for display on the
62+
website. If a project does not have any dates
63+
populated, a two-item tuple of empty strings
64+
is returned instead.
65+
"""
66+
ranked_date_types = [
67+
"date_signed",
68+
"date_approved",
69+
"date_disclosed",
70+
"date_under_appraisal",
71+
"date_effective",
72+
"fiscal_year_effective",
73+
"date_planned_effective",
74+
"date_last_updated",
75+
"date_actual_close",
76+
"date_revised_close",
77+
"date_planned_close",
78+
]
79+
for date_type in ranked_date_types:
80+
if row[date_type]:
81+
return date_type, row[date_type]
82+
return "", ""
83+
84+
# Add "Display Date" and "Display Date Type" columns
85+
logger.info("Adding display date name and type columns.")
86+
(
87+
clean_df.loc[:, ["display_date"]],
88+
clean_df.loc[:, ["display_date_type"]],
89+
) = zip(*clean_df.apply(get_display_date, axis=1))
90+
5191
# Write mapped data to output file
5292
try:
5393
logger.info(f'Writing mapped project data to "{output_fpath}".')

services/transform/src/clean_raw/currency.py

Lines changed: 18 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,11 @@ def _build_annual_rate_lookup(self, annual_rates: pd.DataFrame) -> dict:
6767
exchange rate for that year.
6868
"""
6969
# Group annual exchange rates by country and convert to dict
70-
country_rates = annual_rates.set_index("COUNTRY").to_dict(
71-
orient="index"
72-
)
70+
country_rates = annual_rates.set_index("COUNTRY").to_dict(orient="index")
7371

7472
# Create lookup for annual exchange rates
7573
lookup = {}
7674
for country_code, rates in country_rates.items():
77-
7875
# Parse rates into DataFrame, with each row representing a year
7976
country_df = pd.DataFrame([rates]).T.reset_index()
8077

@@ -119,14 +116,11 @@ def _build_monthly_rate_lookup(self, monthly_rates: pd.DataFrame) -> dict:
119116
average exchange rate for the months in that year.
120117
"""
121118
# Group monthly exchange rates by country and convert to dict
122-
country_rates = monthly_rates.set_index("COUNTRY").to_dict(
123-
orient="index"
124-
)
119+
country_rates = monthly_rates.set_index("COUNTRY").to_dict(orient="index")
125120

126121
# Create lookup for averaged monthly exchange rates
127122
lookup = {}
128123
for country_code, rates in country_rates.items():
129-
130124
# Parse rates into DataFrame, with each row representing a reporting period
131125
country_df = pd.DataFrame([rates]).T.reset_index()
132126

@@ -188,9 +182,7 @@ def _load_currency_county_map(self) -> dict:
188182
with open(CURRENCY_COUNTRY_MAP_FPATH, encoding="utf-8") as f:
189183
return json.load(f)
190184

191-
def _load_exchange_rates(
192-
self, frequency: Literal["A", "M" "Q"]
193-
) -> pd.DataFrame:
185+
def _load_exchange_rates(self, frequency: Literal["A", "MQ"]) -> pd.DataFrame:
194186
"""Loads exchange rates from the International Monetary Fund (IMF).
195187
196188
NOTE: Here, rates represent conversions from domestic/local currency
@@ -308,22 +300,19 @@ def get_conversion_year(row: pd.Series) -> str:
308300
The year, formatted as YYYY, or an empty string
309301
if the year cannot be determined.
310302
"""
311-
if row["date_signed"]:
312-
return row["date_signed"][:4]
313-
elif row["date_approved"]:
314-
return row["date_approved"][:4]
315-
elif row["date_disclosed"]:
316-
return row["date_disclosed"][:4]
317-
elif row["date_under_appraisal"][:4]:
318-
return row["date_under_appraisal"][:4]
319-
elif row["date_effective"]:
320-
return row["date_effective"][:4]
321-
elif row["fiscal_year_effective"]:
322-
return row["fiscal_year_effective"]
323-
elif row["date_planned_effective"]:
324-
return row["date_planned_effective"][:4]
325-
else:
326-
return ""
303+
ranked_date_types = [
304+
"date_signed",
305+
"date_approved",
306+
"date_disclosed",
307+
"date_under_appraisal",
308+
"date_effective",
309+
"fiscal_year_effective",
310+
"date_planned_effective",
311+
]
312+
for date_type in ranked_date_types:
313+
if row[date_type]:
314+
return row[date_type][:4]
315+
return ""
327316

328317
# Calculate year for each project
329318
copy["conversion_year"] = copy.apply(get_conversion_year, axis=1)
@@ -357,13 +346,9 @@ def convert(row: pd.Series) -> int | None:
357346
return int(row["total_amount"] * row["conversion_rate"])
358347

359348
# Convert debt amounts to USD in same year
360-
copy["converted_amount_usd"] = copy.apply(convert, axis=1).astype(
361-
pd.Int64Dtype()
362-
)
349+
copy["converted_amount_usd"] = copy.apply(convert, axis=1).astype(pd.Int64Dtype())
363350

364351
# Replace any NA values resulting from conversion
365-
copy["converted_amount_usd"] = copy["converted_amount_usd"].replace(
366-
{pd.NA: None}
367-
)
352+
copy["converted_amount_usd"] = copy["converted_amount_usd"].replace({pd.NA: None})
368353

369354
return copy

0 commit comments

Comments
 (0)