Skip to content

Error when no split founds in ContinuousOptimalBinning #357

Description

@YC-1412

I got an error when running ContinuousOptimalBinning. It occurs when splits_prebinning is empty (length 0).

The _compute_prebins expects 8 returned values but it would only return 3 values if splits_prebinning is empty.

(splits_prebinning, n_records, sums, ssums, stds, min_t, max_t,
n_zeros) = self._compute_prebins(splits_prebinning, x, y, sw_clean)

def _compute_prebins(self, splits_prebinning, x, y, sw):
n_splits = len(splits_prebinning)
if not n_splits:
return splits_prebinning, np.array([]), np.array([])

I will submit a PR to try fixing it. Thanks!

Code for replicating the bug

from optbinning import ContinuousOptimalBinning
import pandas as pd

df = pd.DataFrame({
    'a1': [0, 1, 2, 3, 4, 5, 6, 9],
    'weight': [189484, 6058, 963, 158, 20, 3, 2, 0],
    'y': [789, 1275, 1435, 2563, 536, 0, 3214, 0],
})
cb = ContinuousOptimalBinning(min_n_bins=2, split_digits=0, min_prebin_size=0.01, max_bin_size=0.99, monotonic_trend=None, random_state=42, verbose=True)
cb.fit(df['a1'], df['y'], sample_weight=df['weight']/df['weight'].sum())
cb.binning_table.build()

Error log

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[1], line 10
      4 df = pd.DataFrame({
      5     'a1': [0, 1, 2, 3, 4, 5, 6, 9],
      6     'weight': [189484, 6058, 963, 158, 20, 3, 2, 0],
      7     'y': [789, 1275, 1435, 2563, 536, 0, 3214, 0],
      8 })
      9 cb = ContinuousOptimalBinning(min_n_bins=2, split_digits=0, min_prebin_size=0.01, max_bin_size=0.99, monotonic_trend=None, random_state=42, verbose=True)
---> 10 cb.fit(df['a1'], df['y'], sample_weight=df['weight']/df['weight'].sum())
     11 cb.binning_table.build()

File ~/Documents/CS/Side_Project/optbinning/optbinning/binning/continuous_binning.py:441, in ContinuousOptimalBinning.fit(self, x, y, sample_weight, check_input)
    417 def fit(self, x, y, sample_weight=None, check_input=False):
    418     """Fit the optimal binning according to the given training data.
    419 
    420     Parameters
   (...)    439         Fitted optimal binning.
    440     """
--> 441     return self._fit(x, y, sample_weight, check_input)

File ~/Documents/CS/Side_Project/optbinning/optbinning/binning/continuous_binning.py:660, in ContinuousOptimalBinning._fit(self, x, y, sample_weight, check_input)
    653         [splits, n_records, sums, ssums, stds, min_t, max_t,
    654          n_zeros] = self._prebinning_refinement(
    655             user_splits, x_clean, y_clean, y_missing, x_special,
    656             y_special, y_others, sw_clean, sw_missing, sw_special,
    657             sw_others)
    658 else:
    659     [splits, n_records, sums, ssums, stds, min_t, max_t,
--> 660      n_zeros] = self._fit_prebinning(
    661         x_clean, y_clean, y_missing, x_special, y_special, y_others,
    662         None, sw_clean, sw_missing, sw_special, sw_others)
    664 self._n_prebins = len(n_records)
    666 self._categories = categories

File ~/Documents/CS/Side_Project/optbinning/optbinning/binning/binning.py:893, in OptimalBinning._fit_prebinning(self, x, y, y_missing, x_special, y_special, y_others, class_weight, sw_clean, sw_missing, sw_special, sw_others)
    883 min_bin_size = int(np.ceil(self.min_prebin_size * self._n_samples))
    885 prebinning = PreBinning(method=self.prebinning_method,
    886                         n_bins=self.max_n_prebins,
    887                         min_bin_size=min_bin_size,
   (...)    890                         **self.prebinning_kwargs
    891                         ).fit(x, y, sw_clean)
--> 893 return self._prebinning_refinement(prebinning.splits, x, y, y_missing,
    894                                    x_special, y_special, y_others,
    895                                    sw_clean, sw_missing, sw_special,
    896                                    sw_others)

File ~/Documents/CS/Side_Project/optbinning/optbinning/binning/continuous_binning.py:892, in ContinuousOptimalBinning._prebinning_refinement(self, splits_prebinning, x, y, y_missing, x_special, y_special, y_others, sw_clean, sw_missing, sw_special, sw_others)
    888 if self.split_digits is not None:
    889     splits_prebinning = np.round(splits_prebinning, self.split_digits)
    891 (splits_prebinning, n_records, sums, ssums, stds, min_t, max_t,
--> 892  n_zeros) = self._compute_prebins(splits_prebinning, x, y, sw_clean)
    894 return (splits_prebinning, n_records, sums, ssums, stds, min_t, max_t,
    895         n_zeros)

File ~/Documents/CS/Side_Project/optbinning/optbinning/binning/continuous_binning.py:963, in ContinuousOptimalBinning._compute_prebins(self, splits_prebinning, x, y, sw)
    959     if self.verbose:
    960         logger.info("Pre-binning: number prebins removed: {}"
    961                     .format(np.count_nonzero(mask_remove)))
--> 963     (splits_prebinning, n_records, sums, ssums, stds, min_t, max_t,
    964      n_zeros) = self._compute_prebins(splits, x, y, sw)
    966 return (splits_prebinning, n_records, sums, ssums, stds, min_t, max_t,
    967         n_zeros)

ValueError: not enough values to unpack (expected 8, got 3)

Metadata

Metadata

Labels

bugSomething isn't working

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions