Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/core/algorithms/cfd/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ target_sources(
)
target_link_libraries(
${NAME} PRIVATE ${DESBORDANTE_PREFIX}::cfd::util ${DESBORDANTE_PREFIX}::model::table
Boost::headers
spdlog::spdlog_header_only Boost::headers
)

set(NAME cfd)
Expand Down
10 changes: 2 additions & 8 deletions src/core/algorithms/cfd/cfd_discovery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ namespace algos::cfd {
CFDDiscovery::CFDDiscovery() : Algorithm() {
using namespace config::names;
RegisterOptions();
MakeOptionsAvailable({kTable, kCfdColumnsNumber, kCfdTuplesNumber});
MakeOptionsAvailable({kTable});
}

void CFDDiscovery::LoadDataInternal() {
relation_ = CFDRelationData::CreateFrom(*input_table_, columns_number_, tuples_number_);
relation_ = CFDRelationData::CreateFrom(*input_table_);

if (relation_->GetColumnData().empty()) {
throw std::runtime_error("Got an empty .csv file: CFD mining is meaningless.");
Expand All @@ -37,12 +37,6 @@ void CFDDiscovery::RegisterOptions() {
DESBORDANTE_OPTION_USING;

RegisterOption(config::kTableOpt(&input_table_));
RegisterOption(Option{&columns_number_, kCfdColumnsNumber, kDCfdColumnsNumber, 0u});
RegisterOption(Option{&tuples_number_, kCfdTuplesNumber, kDCfdTuplesNumber, 0u});
}

int CFDDiscovery::NrCfds() const {
return (int)cfd_list_.size();
}

ItemsetCFDList const& CFDDiscovery::GetItemsetCfds() const {
Expand Down
3 changes: 0 additions & 3 deletions src/core/algorithms/cfd/cfd_discovery.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,12 @@ class CFDDiscovery : public Algorithm {
protected:
config::InputTable input_table_;

unsigned columns_number_;
unsigned tuples_number_;
ItemsetCFDList cfd_list_;
std::shared_ptr<CFDRelationData> relation_;

public:
explicit CFDDiscovery();
void LoadDataInternal() final;
int NrCfds() const;
ItemsetCFDList const& GetItemsetCfds() const;
CFDList GetCfds() const;
std::string GetRelationString(char delim = ' ') const;
Expand Down
80 changes: 32 additions & 48 deletions src/core/algorithms/cfd/fd_first_algorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,33 @@ FDFirstAlgorithm::FDFirstAlgorithm() : CFDDiscovery() {
void FDFirstAlgorithm::RegisterOptions() {
DESBORDANTE_OPTION_USING;

Substrategy default_val = Substrategy::kDfs;
RegisterOption(Option{&min_supp_, kCfdMinimumSupport, kDCfdMinimumSupport, 0u});
RegisterOption(Option{&min_conf_, kCfdMinimumConfidence, kDCfdMinimumConfidence, 0.0});
RegisterOption(Option{&max_lhs_, kCfdMaximumLhs, kDCfdMaximumLhs, 0u});
RegisterOption(Option{&substrategy_, kCfdSubstrategy, kDCfdSubstrategy, default_val});
auto check_conf = [](double val) {
if (val <= 0 || val > 1) {
throw config::ConfigurationError("Minimum confidence must be a value between (0,1].");
}
};
auto check_supp = [this](unsigned int val) {
if (val == 0) {
throw config::ConfigurationError("Minimum support must be more than 0.");
} else if (val > relation_->GetNumRows()) {
throw config::ConfigurationError(
"Minimum support must be less than or equal to the number of tuples.");
}
};
auto check_lhs = [](unsigned int val) {
if (val == 0) {
throw config::ConfigurationError("Maximum LHS size must be more than 0.");
}
};

RegisterOption(Option{&min_supp_, kCfdMinimumSupport, kDCfdMinimumSupport, 0u}.SetValueCheck(
std::move(check_supp)));
RegisterOption(
Option{&min_conf_, kCfdMinimumConfidence, kDCfdMinimumConfidence, 0.0}.SetValueCheck(
std::move(check_conf)));
RegisterOption(Option{&max_lhs_, kCfdMaximumLhs, kDCfdMaximumLhs, 1u}.SetValueCheck(
std::move(check_lhs)));
RegisterOption(Option{&substrategy_, kCfdSubstrategy, kDCfdSubstrategy, Substrategy::kDfs});
}

void FDFirstAlgorithm::ResetStateCFD() {
Expand All @@ -43,51 +65,13 @@ void FDFirstAlgorithm::ResetStateCFD() {

unsigned long long FDFirstAlgorithm::ExecuteInternal() {
max_cfd_size_ = max_lhs_ + 1;
CheckForIncorrectInput();
auto start_time = std::chrono::system_clock::now();
FdsFirstDFS();
auto elapsed_milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(
auto elapsed_millis = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now() - start_time);
unsigned long long apriori_millis = elapsed_milliseconds.count();
LOG_INFO("> CFD COUNT: {}", cfd_list_.size());

return apriori_millis;
}

void FDFirstAlgorithm::CheckForIncorrectInput() const {
// TODO: should be checked by Option
if (min_supp_ < 1) {
throw config::ConfigurationError("[ERROR] Illegal Support value: \"" +
std::to_string(min_supp_) + "\"" + " is less than 1");
}

if (min_conf_ < 0 || min_conf_ > 1) {
throw config::ConfigurationError("[ERROR] Illegal Confidence value: \"" +
std::to_string(min_conf_) + "\"" + " not in [0,1]");
}

if (max_cfd_size_ < 2) {
throw config::ConfigurationError("[ERROR] Illegal Max size value: \"" +
std::to_string(max_cfd_size_) + "\"" + " is less than 1");
}

if (columns_number_ != 0 && tuples_number_ == 0) {
throw config::ConfigurationError(
"[ERROR] Illegal columns_number and tuples_number values: columns_number is " +
std::to_string(columns_number_) + " while tuples_number is 0");
}

if (tuples_number_ != 0 && columns_number_ == 0) {
throw config::ConfigurationError(
"[ERROR] Illegal columns_number and tuples_number values: tuples_number is " +
std::to_string(tuples_number_) + " while columns_number is 0");
}

if (columns_number_ != 0 && tuples_number_ != 0 && min_supp_ > tuples_number_) {
throw config::ConfigurationError(
"[ERROR] Illegal Support value : " + std::to_string(min_supp_) + " is not in [1, " +
std::to_string(tuples_number_) + "]");
}
return elapsed_millis.count();
}

void FDFirstAlgorithm::MakeExecuteOptsAvailable() {
Expand Down Expand Up @@ -181,7 +165,7 @@ void FDFirstAlgorithm::FdsFirstDFS() {
free_itemsets_.insert(Itemset{a.item});
}
cand_store_ = PrefixTree<Itemset, Itemset>();
store_[Itemset()] = PartitionTIdList(Iota(relation_->Size()));
store_[Itemset()] = PartitionTIdList(Iota(relation_->GetNumRows()));
cand_store_.Insert(Itemset(), all_attrs_);
FdsFirstDFS(Itemset(), items, substrategy_);
}
Expand Down Expand Up @@ -543,7 +527,7 @@ FDFirstAlgorithm::PIdListMiners FDFirstAlgorithm::GetPartitionSingletons() {
attr_indices[dom[i]] = std::make_pair(a, i);
}
}
for (size_t row = 0; row < relation_->Size(); row++) {
for (size_t row = 0; row < relation_->GetNumRows(); row++) {
auto const& tup = relation_->GetRow(row);
for (int item : tup) {
auto const& attr_node_ix = attr_indices.at(item);
Expand All @@ -556,7 +540,7 @@ FDFirstAlgorithm::PIdListMiners FDFirstAlgorithm::GetPartitionSingletons() {
int attr_item = -1 - static_cast<int>(a);
auto new_node = MinerNode<PartitionTIdList>(attr_item);
auto const& dom = relation_->GetDomain(a);
new_node.tids.tids.reserve(relation_->Size() + dom.size() - 1);
new_node.tids.tids.reserve(relation_->GetNumRows() + dom.size() - 1);
new_node.tids.sets_number = dom.size();
for (unsigned i = 0; i < dom.size(); i++) {
auto& ts = new_node.tids.tids;
Expand Down
1 change: 0 additions & 1 deletion src/core/algorithms/cfd/fd_first_algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ class FDFirstAlgorithm : public algos::cfd::CFDDiscovery {
std::unordered_map<int, std::vector<Itemset>> rules_;

void ResetStateCFD() final;
void CheckForIncorrectInput() const;

void FdsFirstDFS();
void FdsFirstDFS(Itemset const&, std::vector<MinerNode<PartitionTIdList>> const&,
Expand Down
17 changes: 15 additions & 2 deletions src/core/algorithms/cfd/model/cfd_column_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include <utility>
#include <vector>

#include <boost/unordered_map.hpp>

#include "core/model/table/abstract_column_data.h"

// see algorithms/cfd/LICENSE
Expand All @@ -11,20 +13,31 @@ namespace algos::cfd {

// Column presentation class for CFDRelationData.
class CFDColumnData : model::AbstractColumnData {
public:
using NumToken = int;
using ItemDictionary = boost::unordered_map<std::string, int>;

private:
std::vector<NumToken> values_;
// maps a value string to corresponding item id
ItemDictionary values_dict_;

public:
CFDColumnData(Column const* col, std::vector<int> col_values)
: AbstractColumnData(col), values_(std::move(col_values)) {}
CFDColumnData(Column const* col, std::vector<int> col_values, ItemDictionary values_dict)
: AbstractColumnData(col),
values_(std::move(col_values)),
values_dict_(std::move(values_dict)) {}

explicit CFDColumnData(Column const* col) : AbstractColumnData(col) {}

std::vector<NumToken> const& GetValues() const {
return values_;
}

ItemDictionary const& GetValueDict() const {
return values_dict_;
}

std::string ToString() const final {
return "Cfd data for " + column_->ToString();
}
Expand Down
Loading
Loading