-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathobligations_outlays.R
More file actions
324 lines (271 loc) · 11.6 KB
/
Copy pathobligations_outlays.R
File metadata and controls
324 lines (271 loc) · 11.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# Set working directory to the folder containing this script
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
# Load required packages
library(tidyverse)
library(janitor)
library(httr)
library(jsonlite)
# don't use scientific notation
options(scipen=999)
##########
# import and process lists of contracts and assistance awards compiled by CCIJ/Wayan Vota
africa_contracts <- read_csv("data/africa_contracts_list.csv")
africa_grants <- read_csv("data/africa_grants_list.csv")
##########
# data from USAspending
###
# contracts
all_award_ids <- unique(africa_contracts$award_id)
contracts_usaspending <- tibble()
# Define the award type codes for contracts (run these sequentially)
award_type_codes <- c("A", "B", "C", "D") # contracts
award_type_codes <- c("IDV_A", "IDV_B", "IDV_B_A", "IDV_B_B", "IDV_B_C", "IDV_C", "IDV_D", "IDV_E") # idvs
page_limit <- 100
# API endpoint URL for searching spending by award
api_url <- "https://api.usaspending.gov/api/v2/search/spending_by_award/"
# Define the fields you want to retrieve
fields_to_retrieve <- c(
"Award ID",
"Recipient Name",
"Award Amount",
"Total Outlays",
"Description",
"Contract Award Type",
"Recipient UEI",
"Recipient Location",
"Primary Place of Performance",
"Awarding Agency",
"Awarding Sub Agency",
"Start Date",
"End Date",
"NAICS",
"PSC",
"recipient_id",
"prime_award_recipient_id"
)
# Set the batch size for award_id
batch_size <- 100
# Loop through the award id codes in batches
for (i in seq(1, length(all_award_ids), by = batch_size)) {
current_award_id_batch <- all_award_ids[i:min(i + batch_size - 1, length(all_award_ids))]
page_number <- 1
repeat {
# Construct the request body for the current page and batch of award ids
request_body <- list(
filters = list(
award_ids = current_award_id_batch,
award_type_codes = award_type_codes
),
fields = fields_to_retrieve,
page = page_number,
limit = page_limit,
sort = "Award ID",
order = "asc",
subawards = FALSE,
auditTrail = paste0("Results Table - Spending by batch (", i, "-", min(i + batch_size - 1, length(all_award_ids)), "), Page ", page_number)
)
# Convert the request body to JSON
json_body <- toJSON(request_body, auto_unbox = TRUE)
# Make the API POST request
response <- POST(
url = api_url,
body = json_body,
content_type_json()
)
# Check for successful response
if (http_status(response)$category == "Success") {
# Parse the JSON response
api_data <- content(response, "text", encoding = "UTF-8")
parsed_data <- fromJSON(api_data)
# Extract the award data from the current page
current_page_data <- parsed_data$results
if (!is.null(current_page_data) && length(current_page_data) > 0) {
contracts_usaspending <- bind_rows(contracts_usaspending, current_page_data)
}
print(paste0("Fetching batch ", i, " - ", min(i + batch_size - 1, length(all_award_ids)), ", page: ", page_number, "\n"))
# Check if there are more pages safely
if (!is.null(parsed_data$page_metadata) &&
!is.null(parsed_data$page_metadata$current) &&
!is.null(parsed_data$page_metadata$total) &&
!is.na(parsed_data$page_metadata$current) &&
!is.na(parsed_data$page_metadata$total) &&
parsed_data$page_metadata$current < parsed_data$page_metadata$total) {
page_number <- page_number + 1
} else {
# No more pages for the current batch of award ids
break
}
} else {
# Handle errors
cat("API request failed for batch", i, "-", min(i + batch_size - 1, length(all_award_ids)), ", page", page_number, "with status code:", http_status(response)$status, "\n")
print(content(response, "text", encoding = "UTF-8"))
break # Stop fetching pages for the current batch if an error occurs
}
# Add a small delay
Sys.sleep(1)
}
}
# some cleaning
contracts_usaspending <- contracts_usaspending %>%
clean_names() %>%
mutate(
start_date = ymd(start_date),
end_date = ymd(end_date),
awarding_agency = gsub("Agency for International Development", "USAID",awarding_agency),
awarding_agency = str_to_upper(awarding_agency),
)
# find any missing contracts
missing <- setdiff(africa_contracts$award_id,contracts_usaspending$award_id) # export any missing and run search at the site
missing <- paste(missing, collapse = ",")
writeLines(missing, "data/contracts_missing.txt")
# import downloaded data for missing contracts from manual search at USAspending
contracts_missing <- read_csv("data/contracts_missing_2025-10-01/Contracts_PrimeAwardSummaries_2025-10-01_H00M34S56_1.csv") %>%
select(award_id = award_id_piid, awarding_agency = awarding_agency_name,recipient_name,description = prime_award_base_transaction_description,
start_date = period_of_performance_start_date, end_date = period_of_performance_current_end_date, award_amount = total_obligated_amount, total_outlays = total_outlayed_amount) %>%
mutate(awarding_agency = gsub("Agency for International Development", "USAID",awarding_agency),
awarding_agency = str_to_upper(awarding_agency))
# combine contract data
contracts_usaspending <- bind_rows(contracts_usaspending,contracts_missing)
# join to contracts list and reformat
contracts_usaspending_edit <- inner_join(contracts_usaspending,africa_contracts, by = c("award_id", "recipient_name")) %>%
mutate(potential_cut = award_amount - total_outlays,
expired = case_when(end_date < "2025-01-20" ~ "TRUE"),
performance_country_flag = gsub("CONGO \\(KINSHASA\\)", "DR CONGO", performance_country_flag), # standardizing country names
performance_country_flag = gsub("CONGO \\(BRAZZAVILLE\\)", "REP. CONGO", performance_country_flag),
performance_country_flag = gsub("COTE D'IVOIRE", "CÔTE D’IVOIRE", performance_country_flag),
performance_country_flag = gsub("GAMBIA, THE", "GAMBIA", performance_country_flag),
performance_country_flag = gsub("CAPE VERDE", "CABO VERDE", performance_country_flag),
) %>%
arrange(-potential_cut) %>%
unnest_wider(recipient_location, names_sep = "_") %>%
unnest_wider(primary_place_of_performance, names_sep = "_") %>%
unnest_wider(naics, names_sep = "_") %>%
unnest_wider(psc, names_sep = "_") %>%
unique()
# write to CSV
write_csv(contracts_usaspending_edit, "data/contracts_usaspending_edit.csv", na = "")
#####
# grants
grants_usaspending <- tibble()
# Define the award type codes for assistance (run these sequentially)
award_type_codes <- c("02","03","04","05") # grants
award_type_codes <- c("06","10") # direct payments
award_type_codes <- c("-1","09","11") # other assistance
# API endpoint URL for searching spending by award (same as before)
api_url <- "https://api.usaspending.gov/api/v2/search/spending_by_award/"
# Define the fields you want to retrieve
fields_to_retrieve <- c(
"Award ID",
"Recipient Name",
"Award Amount",
"Total Outlays",
"Description",
"Contract Award Type",
"Recipient UEI",
"Recipient Location",
"Primary Place of Performance",
"Awarding Agency",
"Awarding Sub Agency",
"Start Date",
"End Date",
"NAICS",
"PSC",
"recipient_id",
"prime_award_recipient_id"
)
# Set the batch size for award_id
batch_size <- 100
# Initialize batch counter
batch_number <- 1
all_award_ids <- africa_grants$award_id
# Loop through the award_ids in batches
for (i in seq(1, length(all_award_ids), by = batch_size)) {
current_award_id_batch <- all_award_ids[i:min(i + batch_size - 1, length(all_award_ids))]
page_number <- 1
print(paste0("Fetching batch ", i, " - ", min(i + batch_size - 1, length(all_award_ids)), ", page: ", page_number, "\n"))
repeat {
# Construct the request body for the current page and batch of grants award ids
request_body <- list(
filters = list(
award_ids = current_award_id_batch,
award_type_codes = award_type_codes # Use the grant award type codes
),
fields = fields_to_retrieve,
page = page_number,
limit = page_limit,
sort = "Award ID",
order = "asc",
subawards = FALSE,
auditTrail = paste0("Results Table - Spending by batch (", batch_number, "), Page ", page_number)
)
# Convert the request body to JSON
json_body <- toJSON(request_body, auto_unbox = TRUE)
# Make the API POST request
response <- POST(
url = api_url,
body = json_body,
content_type_json()
)
# Check for successful response
if (http_status(response)$category == "Success") {
# Parse the JSON response
api_data <- content(response, "text", encoding = "UTF-8")
parsed_data <- fromJSON(api_data)
# Extract the award data from the current page
current_page_data <- parsed_data$results
if (!is.null(current_page_data) && length(current_page_data) > 0) {
grants_usaspending <- bind_rows(grants_usaspending, current_page_data) # Store in the new tibble
}
# Check if there are more pages safely
if (!is.null(parsed_data$page_metadata) &&
!is.null(parsed_data$page_metadata$current) &&
!is.null(parsed_data$page_metadata$total) &&
!is.na(parsed_data$page_metadata$current) &&
!is.na(parsed_data$page_metadata$total) &&
parsed_data$page_metadata$current < parsed_data$page_metadata$total) {
page_number <- page_number + 1
} else {
# No more pages for the current batch of IDV award ids
break
}
} else {
# Handle errors
cat(paste("API request failed for grant batch:", batch_number, ", page", page_number, "with status code:", http_status(response)$status, "\n"))
print(content(response, "text", encoding = "UTF-8"))
break # Stop fetching pages for the current batch if an error occurs
}
# Add a small delay
Sys.sleep(1)
}
# Increment batch counter
batch_number <- batch_number + 1
}
# some cleaning
grants_usaspending <- grants_usaspending %>%
clean_names() %>%
mutate(
start_date = ymd(start_date),
end_date = ymd(end_date),
awarding_agency = gsub("Agency for International Development", "USAID",awarding_agency),
awarding_agency = str_to_upper(awarding_agency)
)
glimpse(grants_usaspending)
missing <- setdiff(africa_grants$award_id,grants_usaspending$award_id) # export any missing and run search at the site (wasn't needed)
# join to grants list and reformat
grants_usaspending_edit <- inner_join(grants_usaspending,africa_grants, by = c("award_id", "recipient_name")) %>%
mutate(potential_cut = award_amount - total_outlays,
expired = case_when(end_date < "2025-01-20" ~ "TRUE"),
performance_country_flag = gsub("CONGO \\(KINSHASA\\)", "DR CONGO", performance_country_flag), # standardizing country names
performance_country_flag = gsub("CONGO \\(BRAZZAVILLE\\)", "REP. CONGO", performance_country_flag),
performance_country_flag = gsub("COTE D'IVOIRE", "CÔTE D’IVOIRE", performance_country_flag),
performance_country_flag = gsub("GAMBIA, THE", "GAMBIA", performance_country_flag),
performance_country_flag = gsub("CAPE VERDE", "CABO VERDE", performance_country_flag),
) %>%
arrange(-potential_cut) %>%
unnest_wider(recipient_location, names_sep = "_") %>%
unnest_wider(primary_place_of_performance, names_sep = "_") %>%
unnest_wider(naics, names_sep = "_") %>%
unnest_wider(psc, names_sep = "_") %>%
unique()
# write to CSV
write_csv(grants_usaspending_edit, "data/grants_usaspending_edit.csv", na = "")