Merge pull request #33 from sorrychoe/update/tomotopy-version-up

sorrychoe · web-flow · commit bc56dd11ec43 · 2026-04-06T22:47:14.000+09:00
🚀 dump to 1.2.10
diff --git a/.pylintrc b/.pylintrc
@@ -519,5 +519,5 @@ min-public-methods=2
 
 # Exceptions that will emit a warning when being caught. Defaults to
 # "BaseException, Exception".
-overgeneral-exceptions=BaseException,
-                       Exception
+overgeneral-exceptions=builtins.BaseException,
+                       builtins.Exception
diff --git a/pyBigKinds/_version.py b/pyBigKinds/_version.py
@@ -1 +1 @@
-__version__ = "1.2.9"
+__version__ = "1.2.10"
diff --git a/pyBigKinds/base.py b/pyBigKinds/base.py
@@ -5,7 +5,7 @@
 
 def header_remover(df):
     """
-    Removes any text enclosed in square brackets ([]) from the 'title' column of a DataFrame or list.
+    Removes any text enclosed in square brackets ([]) from the '제목' (title) column of a DataFrame or list.
 
     Parameters:
     df (pandas.DataFrame or list): The input DataFrame or list containing a column or text data where headers (enclosed in square brackets) need to be removed.
@@ -17,33 +17,35 @@ def header_remover(df):
     TypeError: If the input is not a pandas DataFrame or list.
     """
     if isinstance(df, pd.DataFrame):
-        ans = df["제목"].str.replace(r"\[[^)]*\]", "", regex=True)
+        ans = df["제목"].str.replace(r"\[[^\]]*\]", "", regex=True)
     elif isinstance(df, list):
-        ans = df.str.replace(r"\[[^)]*\]", "", regex=True)
+        ans = pd.Series(df).str.replace(r"\[[^\]]*\]", "", regex=True).tolist()
     else:
         raise TypeError("input value is to be have to list or DataFrame")
     return ans
 
 
 def keyword_list(df):
     """
-    Converts the '키워드' column of a DataFrame to a list or returns a list as-is if the input is already a list.
+    Converts the '키워드' column of a DataFrame to a list or returns a list as-is if the input is already a list or Series.
 
     Parameters:
-    df (pandas.DataFrame or list): The input DataFrame containing the '키워드' column or a list to be converted to a list format.
+    df (pandas.DataFrame or pandas.Series or list): The input data containing keywords.
 
     Returns:
-    list: A list of keywords from the '키워드' column of the DataFrame, or a list itself if the input is a list.
+    list: A list of keywords.
 
     Raises:
-    TypeError: If the input is not a pandas DataFrame or list.
+    TypeError: If the input is not a pandas DataFrame, Series, or list.
     """
     if isinstance(df, pd.DataFrame):
         return df["키워드"].values.tolist()
-    elif isinstance(df, list):
+    elif isinstance(df, pd.Series):
         return df.values.tolist()
+    elif isinstance(df, list):
+        return df
     else:
-        raise TypeError("input value is to be have to list or DataFrame")
+        raise TypeError("input value is to be have to list, Series or DataFrame")
 
 
 def keyword_parser(text_list):
diff --git a/pyBigKinds/preprocessing.py b/pyBigKinds/preprocessing.py
@@ -68,16 +68,16 @@ def keyword_dataframe_no_duplicated(df):
         raise TypeError("input type is to be have to DataFrame")
 
 
-def tfidf(df, *press):
+def tfidf(df, col=None):
     """
     Calculates the Term Frequency-Inverse Document Frequency (TF-IDF) for keywords in the input DataFrame.
 
-    This function takes an optional column name (press) to select a specific column for TF-IDF calculations. It uses the TfidfVectorizer to compute TF-IDF values for the keywords
+    This function takes an optional column name (col) to select a specific column for TF-IDF calculations. It uses the TfidfVectorizer to compute TF-IDF values for the keywords
     and returns a DataFrame of words with their corresponding TF-IDF scores.
 
     Parameters:
     df (pandas.DataFrame): The input DataFrame containing text data, typically in a '키워드' column.
-    press (str, optional): A column name specifying which column to apply the TF-IDF transformation. Defaults to None.
+    col (str, optional): A column name specifying which column to apply the TF-IDF transformation. Defaults to None.
 
     Returns:
     pandas.DataFrame: A DataFrame with two columns - '단어' (keyword) and '빈도' (TF-IDF score), sorted by score in descending order.
@@ -86,8 +86,8 @@ def tfidf(df, *press):
     TypeError: If the input is not a pandas DataFrame.
     """
     if isinstance(df, pd.DataFrame):
-        if isinstance(press, str):
-            df = df[press]
+        if isinstance(col, str):
+            df = df[col]
         lis = keyword_list(df)
 
         tfidfv = TfidfVectorizer()
diff --git a/pyBigKinds/representation.py b/pyBigKinds/representation.py
@@ -293,7 +293,12 @@ def association(dataframe, min_support=0.5, use_colnames=True, min_threshold=0.1
     words = keyword_parser(keyword_list(dataframe))
     te = TransactionEncoder()
     te_data = te.fit(words).transform(words, sparse=True)
-    te_df = pd.DataFrame.sparse.from_spmatrix(te_data, columns=te.columns_)
+    te_df = pd.DataFrame(
+        {
+            col: pd.arrays.SparseArray(te_data[:, i].toarray().ravel().astype(bool))
+            for i, col in enumerate(te.columns_)
+        },
+    )
 
     result = apriori(te_df, min_support=min_support, use_colnames=use_colnames)
 
diff --git a/requirements.txt b/requirements.txt
@@ -8,5 +8,5 @@ pre-commit>=3
 pylint>=3.2.0
 pytest>=7
 scikit-learn>=1.0.0
-tomotopy>=0.12.4
+tomotopy>=0.14.0
 wordcloud>=1.8.2.2
diff --git a/test/test_representation.py b/test/test_representation.py
@@ -85,4 +85,4 @@ def test_lda(dataframe):
 def test_association(dataframe):
     apriopri = association(dataframe)
     assert type(apriopri) == pd.DataFrame
-    assert apriopri.shape == (8, 10)
+    assert apriopri.shape == (8, 14)

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "1.2.9"`
	`1`	`+__version__ = "1.2.10"`