Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
duckdb_version: v1.4.4
ci_tools_version: v1.4-andium
extension_name: eurostat
exclude_archs: 'windows_amd64_mingw;wasm_mvp;wasm_eh;wasm_threads'
exclude_archs: 'windows_amd64_mingw'

code-quality-check:
name: Code Quality Check
Expand Down
13 changes: 12 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,20 @@ find_package(LibXml2 REQUIRED)
set(EXTENSION_NAME ${TARGET_NAME}_extension)
set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension)

if(EMSCRIPTEN)
# _LINKED_LIBS influences only Wasm compilation it's unclear why this is
# needed, but somehow some global symbols are not properly
# exported otherwise this solves by basically re-linking (at the moment the
# Wasm binary is actually produced)
set(DUCKDB_EXTENSION_EUROSTAT_LINKED_LIBS
"../../vcpkg_installed/wasm32-emscripten/lib/lib*.a")
endif()

project(${TARGET_NAME})
include_directories(src/include)
include_directories(${CMAKE_SOURCE_DIR}/third_party/httplib)
if(NOT EMSCRIPTEN)
include_directories(${CMAKE_SOURCE_DIR}/third_party/httplib)
endif()

set(EXTENSION_SOURCES src/eurostat_extension.cpp)
add_subdirectory(src/eurostat)
Expand Down
1 change: 0 additions & 1 deletion src/eurostat/eurostat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ namespace eurostat {

//! Get the level for a GEO code in the NUTS classification or if it is considered aggregates.
std::string Dimension::GetGeoLevelFromGeoCode(const std::string &geo_code) {

#define STARTS_WITH(str, prefix) ((strlen(prefix) <= str.size()) && strncmp(str.c_str(), prefix, strlen(prefix)) == 0)

//
Expand Down
16 changes: 1 addition & 15 deletions src/eurostat/eurostat_data_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ namespace {
//======================================================================================================================

struct ES_Read {

//------------------------------------------------------------------------------------------------------------------
// Bind
//------------------------------------------------------------------------------------------------------------------
Expand All @@ -64,7 +63,6 @@ struct ES_Read {

static unique_ptr<FunctionData> Bind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names) {

D_ASSERT(input.inputs.size() == 2);

const string &provider_id = StringValue::Get(input.inputs[0]);
Expand Down Expand Up @@ -127,7 +125,6 @@ struct ES_Read {
//! Parse a data row from a TSV line.
static bool ParseDatarow(State &data_table, const std::vector<string> &time_periods, int32_t geo_column_index,
const string &line, std::unordered_map<string, bool> &row_keys, const bool &check_keys) {

std::vector<bool> state_keys;

// Split line by tabs.
Expand Down Expand Up @@ -182,7 +179,6 @@ struct ES_Read {
// Parse observation values for each time period.

for (size_t i = 0; i < time_periods.size(); i++) {

// Duplicate row, skip.

if (check_keys && state_keys[i]) {
Expand Down Expand Up @@ -214,7 +210,6 @@ struct ES_Read {
static std::vector<string> GetDataUrls(ClientContext &context, TableFunctionInitInput &input,
const std::vector<eurostat::Dimension> &data_structure,
const string &base_url, const BindData &bind_data) {

std::unordered_map<string, bool> urls;

// Use complex filters previously parsed in 'PushdownComplexFilter' function.
Expand Down Expand Up @@ -246,7 +241,6 @@ struct ES_Read {
}

static unique_ptr<GlobalTableFunctionState> Init(ClientContext &context, TableFunctionInitInput &input) {

auto &bind_data = input.bind_data->Cast<BindData>();
auto global_state = make_uniq_base<GlobalTableFunctionState, State>();
auto &data_table = global_state->Cast<State>();
Expand All @@ -268,7 +262,6 @@ struct ES_Read {
bool check_keys = data_urls.size() > 1;

for (const auto &data_url : data_urls) {

EUROSTAT_SCAN_DEBUG_LOG(1, "Fetching data from URL: %s", data_url.c_str());

// Execute HTTP GET request.
Expand All @@ -279,8 +272,7 @@ struct ES_Read {
}
url_count++;

auto response =
HttpRequest::ExecuteHttpRequest(settings, data_url, "GET", duckdb_httplib_openssl::Headers(), "", "");
auto response = HttpRequest::ExecuteHttpRequest(settings, data_url, "GET", HttpHeaders(), "", "");

if (response.content_type == "application/xml") {
std::string error_msg = EurostatUtils::GetXmlErrorMessage(response.body);
Expand Down Expand Up @@ -311,7 +303,6 @@ struct ES_Read {

while (std::getline(line_stream, line)) {
if (!line.empty()) {

// Parse header line to...
if (line_index == 0) {
size_t pos = line.find("\\TIME_PERIOD");
Expand Down Expand Up @@ -368,7 +359,6 @@ struct ES_Read {
//------------------------------------------------------------------------------------------------------------------

static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) {

auto &gstate = input.global_state->Cast<State>();

// Calculate how many record we can fit in the output
Expand All @@ -382,7 +372,6 @@ struct ES_Read {

// Load current subset of rows.
for (idx_t row_idx = 0, record_idx = current_row; row_idx < output_size; row_idx++, record_idx++) {

const auto &datarow = gstate.rows[record_idx];
const DimensionValues &dim_values = gstate.dimensions[datarow.dimension_index];
const auto &dim_count = dim_values.values.size();
Expand Down Expand Up @@ -439,7 +428,6 @@ struct ES_Read {

static void PushdownComplexFilter(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p,
vector<unique_ptr<Expression>> &expressions) {

auto &bind_data = bind_data_p->Cast<BindData>();

// Get column_ids from LogicalGet to map expression column indices to table columns.
Expand Down Expand Up @@ -473,7 +461,6 @@ struct ES_Read {
//------------------------------------------------------------------------------------------------------------------

static void Register(ExtensionLoader &loader) {

InsertionOrderPreservingMap<string> tags;
tags.insert("ext", "eurostat");
tags.insert("category", "table");
Expand All @@ -499,7 +486,6 @@ struct ES_Read {
// #####################################################################################################################

void EurostatDataFunctions::Register(ExtensionLoader &loader) {

ES_Read::Register(loader);
}

Expand Down
36 changes: 3 additions & 33 deletions src/eurostat/eurostat_info_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ namespace {
//======================================================================================================================

struct ES_Endpoints {

//------------------------------------------------------------------------------------------------------------------
// Bind
//------------------------------------------------------------------------------------------------------------------
Expand All @@ -42,7 +41,6 @@ struct ES_Endpoints {

static unique_ptr<FunctionData> Bind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names) {

names.emplace_back("provider_id");
return_types.push_back(LogicalType::VARCHAR);
names.emplace_back("organization");
Expand Down Expand Up @@ -122,7 +120,6 @@ struct ES_Endpoints {
//------------------------------------------------------------------------------------------------------------------

static void Register(ExtensionLoader &loader) {

InsertionOrderPreservingMap<string> tags;
tags.insert("ext", "eurostat");
tags.insert("category", "table");
Expand All @@ -137,7 +134,6 @@ struct ES_Endpoints {
//======================================================================================================================

struct ES_Dataflows {

#define INFO_COLUMN_PROVIDER_ID 0
#define INFO_COLUMN_DATAFLOW_ID 1
#define INFO_COLUMN_TYPE 2
Expand Down Expand Up @@ -174,7 +170,6 @@ struct ES_Dataflows {
//! Parse DataflowInfo from JSON object
inline static DataflowInfo ParseDataflow(const string &provider_id, yyjson_val *object_val,
bool &load_datastructure, bool &load_annotations) {

yyjson_val *extension_val = nullptr;
yyjson_val *annotation_val = nullptr;
yyjson_val *attrib_val = nullptr;
Expand Down Expand Up @@ -300,7 +295,6 @@ struct ES_Dataflows {

static unique_ptr<FunctionData> Bind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names) {

std::vector<string> providers;
std::vector<string> dataflows;
string language = "en";
Expand Down Expand Up @@ -417,7 +411,6 @@ struct ES_Dataflows {
};

static unique_ptr<GlobalTableFunctionState> Init(ClientContext &context, TableFunctionInitInput &input) {

const auto &bind_data = input.bind_data->Cast<BindData>();
const auto &providers = bind_data.providers;
const auto &dataflows = bind_data.dataflows;
Expand Down Expand Up @@ -451,8 +444,7 @@ struct ES_Dataflows {

// Execute HTTP GET request

auto response =
HttpRequest::ExecuteHttpRequest(settings, url, "GET", duckdb_httplib_openssl::Headers(), "", "");
auto response = HttpRequest::ExecuteHttpRequest(settings, url, "GET", HttpHeaders(), "", "");

if (response.status_code != 200) {
throw IOException(
Expand Down Expand Up @@ -519,7 +511,6 @@ struct ES_Dataflows {
//------------------------------------------------------------------------------------------------------------------

static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) {

auto &gstate = input.global_state->Cast<State>();

// Calculate how many record we can fit in the output
Expand Down Expand Up @@ -659,7 +650,6 @@ struct ES_Dataflows {
//------------------------------------------------------------------------------------------------------------------

static void Register(ExtensionLoader &loader) {

InsertionOrderPreservingMap<string> tags;
tags.insert("ext", "eurostat");
tags.insert("category", "table");
Expand Down Expand Up @@ -696,7 +686,6 @@ static constexpr const char *ES_VALUES_PATH =
static constexpr const char *ES_ERROR_PATH = "/S:Fault/faultstring";

struct ES_DataStructure {

//! Information of a Dimension of an EUROSTAT Dataflow
struct Dimension {
int32_t position = -1;
Expand All @@ -709,7 +698,6 @@ struct ES_DataStructure {
//! Returns the basic data structure of an EUROSTAT Dataflow.
static std::vector<Dimension> GetBasicDataSchema(ClientContext &context, const string &provider_id,
const string &dataflow_id, const string &language) {

std::vector<Dimension> dimensions;

// Execute HTTP GET request
Expand All @@ -719,8 +707,7 @@ struct ES_DataStructure {
"/latest?detail=referencepartial&references=descendants";

HttpSettings settings = HttpRequest::ExtractHttpSettings(context, url);
auto response =
HttpRequest::ExecuteHttpRequest(settings, url, "GET", duckdb_httplib_openssl::Headers(), "", "");
auto response = HttpRequest::ExecuteHttpRequest(settings, url, "GET", HttpHeaders(), "", "");

if (response.status_code != 200) {
throw IOException("EUROSTAT: Failed to fetch dataflow metadata from provider='%s', dataflow='%s': (%d) %s",
Expand All @@ -738,9 +725,7 @@ struct ES_DataStructure {
xmlXPathObjectPtr xpath_obj = nullptr;

for (const auto &xpath : {ES_DIMENSION_PATH, ES_TIME_DIMENSION_PATH}) {

if ((xpath_obj = xmlXPathEvalExpression(BAD_CAST xpath, xpath_ctx)) && xpath_obj->nodesetval) {

for (int i = 0; i < xpath_obj->nodesetval->nodeNr; i++) {
xmlNodePtr node = xpath_obj->nodesetval->nodeTab[i];

Expand Down Expand Up @@ -790,17 +775,14 @@ struct ES_DataStructure {
// Get the Concept names for Dimensions

if ((xpath_obj = xmlXPathEvalExpression(BAD_CAST ES_CONCEPT_PATH, xpath_ctx)) && xpath_obj->nodesetval) {

for (int i = 0; i < xpath_obj->nodesetval->nodeNr; i++) {
xmlNodePtr node = xpath_obj->nodesetval->nodeTab[i];

auto concept_id = XmlUtils::GetNodeAttributeValue(node, "id");
if (!concept_id.empty()) {
for (auto &dim : dimensions) {
if (dim.concept_id == concept_id) {

for (xmlNodePtr child = node->children; child; child = child->next) {

if (strcmp((const char *)child->name, "Name") == 0) {
string lang = XmlUtils::GetNodeAttributeValue(child, "lang", language);

Expand All @@ -826,7 +808,6 @@ struct ES_DataStructure {
//! Returns the data structure of an EUROSTAT Dataflow.
static std::vector<Dimension> GetDataSchema(ClientContext &context, const string &provider_id,
const string &dataflow_id, const string &language) {

auto dimensions = ES_DataStructure::GetBasicDataSchema(context, provider_id, dataflow_id, language);

// Execute HTTP GET request
Expand All @@ -835,8 +816,7 @@ struct ES_DataStructure {
string url = it->second.api_url + "contentconstraint/" + provider_id + "/" + dataflow_id;

HttpSettings settings = HttpRequest::ExtractHttpSettings(context, url);
auto response =
HttpRequest::ExecuteHttpRequest(settings, url, "GET", duckdb_httplib_openssl::Headers(), "", "");
auto response = HttpRequest::ExecuteHttpRequest(settings, url, "GET", HttpHeaders(), "", "");

if (response.status_code != 200) {
throw IOException("EUROSTAT: Failed to fetch dataflow metadata from provider='%s', dataflow='%s': (%d) %s",
Expand All @@ -853,7 +833,6 @@ struct ES_DataStructure {
xmlXPathObjectPtr xpath_obj = nullptr;

if ((xpath_obj = xmlXPathEvalExpression(BAD_CAST ES_VALUES_PATH, xpath_ctx)) && xpath_obj->nodesetval) {

for (int i = 0; i < xpath_obj->nodesetval->nodeNr; i++) {
xmlNodePtr node = xpath_obj->nodesetval->nodeTab[i];

Expand All @@ -863,7 +842,6 @@ struct ES_DataStructure {

for (auto &dim : dimensions) {
if (dim.id == dim_id) {

for (xmlNodePtr child = node->children; child; child = child->next) {
if (strcmp((const char *)child->name, "Value") == 0) {
string code_value = XmlUtils::GetNodeTextContent(child);
Expand Down Expand Up @@ -900,7 +878,6 @@ struct ES_DataStructure {

static unique_ptr<FunctionData> Bind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names) {

D_ASSERT(input.inputs.size() == 2);

const string provider_id = StringValue::Get(input.inputs[0]);
Expand Down Expand Up @@ -975,7 +952,6 @@ struct ES_DataStructure {
//------------------------------------------------------------------------------------------------------------------

static unique_ptr<NodeStatistics> Cardinality(ClientContext &context, const FunctionData *data) {

auto &bind_data = data->Cast<BindData>();
auto result = make_uniq<NodeStatistics>();

Expand All @@ -991,7 +967,6 @@ struct ES_DataStructure {
//------------------------------------------------------------------------------------------------------------------

static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) {

auto &bind_data = input.bind_data->Cast<BindData>();
auto &gstate = input.global_state->Cast<State>();

Expand Down Expand Up @@ -1086,7 +1061,6 @@ struct ES_DataStructure {
//------------------------------------------------------------------------------------------------------------------

static void Register(ExtensionLoader &loader) {

InsertionOrderPreservingMap<string> tags;
tags.insert("ext", "eurostat");
tags.insert("category", "table");
Expand All @@ -1109,7 +1083,6 @@ struct ES_DataStructure {
//! Returns the data structure (dimensions) of a given dataflow
std::vector<eurostat::Dimension> EurostatUtils::DataStructureOf(ClientContext &context, const std::string &provider_id,
const std::string &dataflow_id) {

auto dimensions = ES_DataStructure::GetBasicDataSchema(context, provider_id, dataflow_id, "en");
std::vector<eurostat::Dimension> data_structure;

Expand All @@ -1122,7 +1095,6 @@ std::vector<eurostat::Dimension> EurostatUtils::DataStructureOf(ClientContext &c

//! Extracts the error message of a given Eurostat API response body
std::string EurostatUtils::GetXmlErrorMessage(const std::string &response_body) {

XmlDocument document = XmlDocument(response_body);
xmlXPathContextPtr xpath_ctx = document.GetXPathContext();
xmlXPathObjectPtr xpath_obj = nullptr;
Expand All @@ -1131,7 +1103,6 @@ std::string EurostatUtils::GetXmlErrorMessage(const std::string &response_body)

if ((xpath_obj = xmlXPathEvalExpression(BAD_CAST ES_ERROR_PATH, xpath_ctx)) && xpath_obj->nodesetval &&
xpath_obj->nodesetval->nodeNr > 0) {

xmlNodePtr node = xpath_obj->nodesetval->nodeTab[0];
error_msg = XmlUtils::GetNodeTextContent(node);
}
Expand All @@ -1144,7 +1115,6 @@ std::string EurostatUtils::GetXmlErrorMessage(const std::string &response_body)
}

void EurostatInfoFunctions::Register(ExtensionLoader &loader) {

ES_Endpoints::Register(loader);
ES_Dataflows::Register(loader);
ES_DataStructure::Register(loader);
Expand Down
Loading
Loading