|
| 1 | +use crate::RSLifeResult; |
| 2 | +use crate::mt_config::spreadsheet_helpers::{parse_excel_data, parse_excel_headers}; |
| 3 | +use calamine::{Data, Reader, Xlsx}; |
| 4 | +use polars::prelude::*; |
| 5 | +use reqwest::blocking::get; |
| 6 | +use std::io::Cursor; |
| 7 | + |
| 8 | +pub struct AusGovActMortXLS { |
| 9 | + pub description: String, |
| 10 | + pub dataframe: DataFrame, |
| 11 | +} |
| 12 | + |
| 13 | +impl AusGovActMortXLS { |
| 14 | + /// Load an Australian mortality table from a direct URL |
| 15 | + /// |
| 16 | + /// This method downloads the XLS file from the given URL, extracts the sheet name from the URL, and parses the data. |
| 17 | + /// |
| 18 | + /// # Parameters |
| 19 | + /// - `url`: Direct URL to the XLS file on the IFOA website. |
| 20 | + /// |
| 21 | + /// # Errors |
| 22 | + /// - Network errors or invalid URL |
| 23 | + /// - Sheet not found in workbook |
| 24 | + /// - Invalid data or unsupported structure |
| 25 | + pub fn from_url(gender: &str, period: &str) -> RSLifeResult<Self> { |
| 26 | + let response = get( |
| 27 | + "https://aga.gov.au/sites/aga.gov.au/files/2024-12/historical-mortality-rates-life-expectancies_0.xlsx", |
| 28 | + )?; |
| 29 | + |
| 30 | + let bytes = response.bytes()?; |
| 31 | + let mut workbook = Xlsx::new(Cursor::new(bytes))?; |
| 32 | + |
| 33 | + // Depend on gender input to determine which sheet to parse |
| 34 | + let sheet_name = match gender { |
| 35 | + "M" | "m" | "Male" | "male" => "Historical Male qx", |
| 36 | + "F" | "f" | "Female" | "female" => "Historical Female qx", |
| 37 | + _ => return Err(format!("Unknown gender: {}", gender).into()), |
| 38 | + }; |
| 39 | + |
| 40 | + // Check if the expected sheet is present |
| 41 | + let sheet_names = workbook.sheet_names().to_owned(); |
| 42 | + if !sheet_names.iter().any(|n| n == sheet_name) { |
| 43 | + return Err(format!("Sheet '{sheet_name}' not found in workbook").into()); |
| 44 | + } |
| 45 | + |
| 46 | + // Obtain the sheet range |
| 47 | + let range = workbook.worksheet_range(sheet_name)?; |
| 48 | + |
| 49 | + // Obtain data |
| 50 | + let data = parse_data(&range, period)?; |
| 51 | + |
| 52 | + // Construct DataFrame |
| 53 | + let df = df! { |
| 54 | + "age" => &data[0], |
| 55 | + "qx" => &data[1], |
| 56 | + }?; |
| 57 | + |
| 58 | + // Return the IFOAMortXLS instance |
| 59 | + let gender_description = match gender { |
| 60 | + "m" | "M" | "male" | "Male" => "Male", |
| 61 | + "f" | "F" | "female" | "Female" => "Female", |
| 62 | + _ => "Unknown", |
| 63 | + }; |
| 64 | + |
| 65 | + let descrription = format!( |
| 66 | + "Australian Goverment Actuary Mortality Data - {gender_description} - {period}" |
| 67 | + ); |
| 68 | + |
| 69 | + let result = AusGovActMortXLS { |
| 70 | + description: descrription, |
| 71 | + dataframe: df, |
| 72 | + }; |
| 73 | + |
| 74 | + Ok(result) |
| 75 | + } |
| 76 | +} |
| 77 | + |
| 78 | +// ================================================ |
| 79 | +// PRIVATE FUNCTIONS |
| 80 | +// ================================================ |
| 81 | +fn parse_data(range: &calamine::Range<Data>, period: &str) -> RSLifeResult<Vec<Vec<f64>>> { |
| 82 | + let headers = parse_excel_headers(range, 1)?; // Header row is row 2 (0-based index 1) |
| 83 | + |
| 84 | + // Column index matching period |
| 85 | + let period_col_index = headers |
| 86 | + .iter() |
| 87 | + .position(|h| h.trim() == period) |
| 88 | + .ok_or_else(|| format!("Period '{period}' not found in headers"))?; |
| 89 | + |
| 90 | + // This will contain data age column to column of interest |
| 91 | + let data = parse_excel_data(range, 2, period_col_index + 1)?; // Age from row 3 to 121 (0-based index 2 to 120), column 0 |
| 92 | + |
| 93 | + // However, we are keeping only the first and the last one |
| 94 | + let mut selected_data: Vec<Vec<f64>> = vec![Vec::new(); 2]; |
| 95 | + if let Some(first_row) = data.first() { |
| 96 | + selected_data[0].push(first_row[0]); // Age from first row |
| 97 | + } |
| 98 | + |
| 99 | + if let Some(last_row) = data.last() { |
| 100 | + selected_data[1].push(last_row[period_col_index]); // qx from last row |
| 101 | + } |
| 102 | + |
| 103 | + // You may want to return selected_data or handle it as needed |
| 104 | + Ok(selected_data) |
| 105 | +} |
| 106 | + |
| 107 | +// ================================================ |
| 108 | +// UNIT TESTS |
| 109 | +// ================================================ |
| 110 | +#[cfg(test)] |
| 111 | +mod tests { |
| 112 | + use super::*; |
| 113 | + |
| 114 | + #[test] |
| 115 | + fn test_from_url_male_with_available_period() { |
| 116 | + // Load the file to get an available period first |
| 117 | + let response = get("https://aga.gov.au/sites/aga.gov.au/files/2024-12/historical-mortality-rates-life-expectancies_0.xlsx").unwrap(); |
| 118 | + let bytes = response.bytes().unwrap(); |
| 119 | + let mut workbook = Xlsx::new(Cursor::new(bytes)).unwrap(); |
| 120 | + |
| 121 | + let range = workbook.worksheet_range("Historical Male qx").unwrap(); |
| 122 | + let headers = parse_excel_headers(&range, 1).unwrap(); |
| 123 | + |
| 124 | + // Use the first available period after age column |
| 125 | + let test_period = &headers[1]; // Skip age column (index 0) |
| 126 | + |
| 127 | + // Test the from_url method with Male gender and an available period |
| 128 | + let result = AusGovActMortXLS::from_url("Male", test_period); |
| 129 | + |
| 130 | + assert!(result.is_ok(), "Loading AGA mortality data should succeed"); |
| 131 | + |
| 132 | + let aus_mort = result.unwrap(); |
| 133 | + |
| 134 | + // Verify the basic structure |
| 135 | + assert!(aus_mort.description.contains("Australian")); |
| 136 | + assert!(aus_mort.description.contains("Male")); |
| 137 | + assert!(aus_mort.description.contains(test_period)); |
| 138 | + |
| 139 | + // Verify DataFrame structure |
| 140 | + assert!( |
| 141 | + !aus_mort.dataframe.is_empty(), |
| 142 | + "DataFrame should not be empty" |
| 143 | + ); |
| 144 | + assert_eq!( |
| 145 | + aus_mort.dataframe.width(), |
| 146 | + 2, |
| 147 | + "Should have 2 columns: age and qx" |
| 148 | + ); |
| 149 | + assert!( |
| 150 | + aus_mort.dataframe.height() > 0, |
| 151 | + "Should have at least one row of data" |
| 152 | + ); |
| 153 | + |
| 154 | + // Verify column names |
| 155 | + let column_names = aus_mort.dataframe.get_column_names(); |
| 156 | + assert!( |
| 157 | + column_names.iter().any(|name| name.as_str() == "age"), |
| 158 | + "Should contain 'age' column" |
| 159 | + ); |
| 160 | + assert!( |
| 161 | + column_names.iter().any(|name| name.as_str() == "qx"), |
| 162 | + "Should contain 'qx' column" |
| 163 | + ); |
| 164 | + |
| 165 | + println!("✓ Test passed! Successfully loaded Australian mortality data for males."); |
| 166 | + println!(" Description: {}", aus_mort.description); |
| 167 | + println!( |
| 168 | + " Data shape: {} rows x {} columns", |
| 169 | + aus_mort.dataframe.height(), |
| 170 | + aus_mort.dataframe.width() |
| 171 | + ); |
| 172 | + } |
| 173 | +} |
0 commit comments