Skip to content

Commit bc56dd1

Browse files
authored
Merge pull request #33 from sorrychoe/update/tomotopy-version-up
πŸš€ dump to 1.2.10
2 parents c234f6c + 2089bdb commit bc56dd1

7 files changed

Lines changed: 27 additions & 20 deletions

File tree

β€Ž.pylintrcβ€Ž

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -519,5 +519,5 @@ min-public-methods=2
519519

520520
# Exceptions that will emit a warning when being caught. Defaults to
521521
# "BaseException, Exception".
522-
overgeneral-exceptions=BaseException,
523-
Exception
522+
overgeneral-exceptions=builtins.BaseException,
523+
builtins.Exception

β€ŽpyBigKinds/_version.pyβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.2.9"
1+
__version__ = "1.2.10"

β€ŽpyBigKinds/base.pyβ€Ž

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
def header_remover(df):
77
"""
8-
Removes any text enclosed in square brackets ([]) from the 'title' column of a DataFrame or list.
8+
Removes any text enclosed in square brackets ([]) from the '제λͺ©' (title) column of a DataFrame or list.
99
1010
Parameters:
1111
df (pandas.DataFrame or list): The input DataFrame or list containing a column or text data where headers (enclosed in square brackets) need to be removed.
@@ -17,33 +17,35 @@ def header_remover(df):
1717
TypeError: If the input is not a pandas DataFrame or list.
1818
"""
1919
if isinstance(df, pd.DataFrame):
20-
ans = df["제λͺ©"].str.replace(r"\[[^)]*\]", "", regex=True)
20+
ans = df["제λͺ©"].str.replace(r"\[[^\]]*\]", "", regex=True)
2121
elif isinstance(df, list):
22-
ans = df.str.replace(r"\[[^)]*\]", "", regex=True)
22+
ans = pd.Series(df).str.replace(r"\[[^\]]*\]", "", regex=True).tolist()
2323
else:
2424
raise TypeError("input value is to be have to list or DataFrame")
2525
return ans
2626

2727

2828
def keyword_list(df):
2929
"""
30-
Converts the 'ν‚€μ›Œλ“œ' column of a DataFrame to a list or returns a list as-is if the input is already a list.
30+
Converts the 'ν‚€μ›Œλ“œ' column of a DataFrame to a list or returns a list as-is if the input is already a list or Series.
3131
3232
Parameters:
33-
df (pandas.DataFrame or list): The input DataFrame containing the 'ν‚€μ›Œλ“œ' column or a list to be converted to a list format.
33+
df (pandas.DataFrame or pandas.Series or list): The input data containing keywords.
3434
3535
Returns:
36-
list: A list of keywords from the 'ν‚€μ›Œλ“œ' column of the DataFrame, or a list itself if the input is a list.
36+
list: A list of keywords.
3737
3838
Raises:
39-
TypeError: If the input is not a pandas DataFrame or list.
39+
TypeError: If the input is not a pandas DataFrame, Series, or list.
4040
"""
4141
if isinstance(df, pd.DataFrame):
4242
return df["ν‚€μ›Œλ“œ"].values.tolist()
43-
elif isinstance(df, list):
43+
elif isinstance(df, pd.Series):
4444
return df.values.tolist()
45+
elif isinstance(df, list):
46+
return df
4547
else:
46-
raise TypeError("input value is to be have to list or DataFrame")
48+
raise TypeError("input value is to be have to list, Series or DataFrame")
4749

4850

4951
def keyword_parser(text_list):

β€ŽpyBigKinds/preprocessing.pyβ€Ž

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,16 @@ def keyword_dataframe_no_duplicated(df):
6868
raise TypeError("input type is to be have to DataFrame")
6969

7070

71-
def tfidf(df, *press):
71+
def tfidf(df, col=None):
7272
"""
7373
Calculates the Term Frequency-Inverse Document Frequency (TF-IDF) for keywords in the input DataFrame.
7474
75-
This function takes an optional column name (press) to select a specific column for TF-IDF calculations. It uses the TfidfVectorizer to compute TF-IDF values for the keywords
75+
This function takes an optional column name (col) to select a specific column for TF-IDF calculations. It uses the TfidfVectorizer to compute TF-IDF values for the keywords
7676
and returns a DataFrame of words with their corresponding TF-IDF scores.
7777
7878
Parameters:
7979
df (pandas.DataFrame): The input DataFrame containing text data, typically in a 'ν‚€μ›Œλ“œ' column.
80-
press (str, optional): A column name specifying which column to apply the TF-IDF transformation. Defaults to None.
80+
col (str, optional): A column name specifying which column to apply the TF-IDF transformation. Defaults to None.
8181
8282
Returns:
8383
pandas.DataFrame: A DataFrame with two columns - '단어' (keyword) and 'λΉˆλ„' (TF-IDF score), sorted by score in descending order.
@@ -86,8 +86,8 @@ def tfidf(df, *press):
8686
TypeError: If the input is not a pandas DataFrame.
8787
"""
8888
if isinstance(df, pd.DataFrame):
89-
if isinstance(press, str):
90-
df = df[press]
89+
if isinstance(col, str):
90+
df = df[col]
9191
lis = keyword_list(df)
9292

9393
tfidfv = TfidfVectorizer()

β€ŽpyBigKinds/representation.pyβ€Ž

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,12 @@ def association(dataframe, min_support=0.5, use_colnames=True, min_threshold=0.1
293293
words = keyword_parser(keyword_list(dataframe))
294294
te = TransactionEncoder()
295295
te_data = te.fit(words).transform(words, sparse=True)
296-
te_df = pd.DataFrame.sparse.from_spmatrix(te_data, columns=te.columns_)
296+
te_df = pd.DataFrame(
297+
{
298+
col: pd.arrays.SparseArray(te_data[:, i].toarray().ravel().astype(bool))
299+
for i, col in enumerate(te.columns_)
300+
},
301+
)
297302

298303
result = apriori(te_df, min_support=min_support, use_colnames=use_colnames)
299304

β€Žrequirements.txtβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ pre-commit>=3
88
pylint>=3.2.0
99
pytest>=7
1010
scikit-learn>=1.0.0
11-
tomotopy>=0.12.4
11+
tomotopy>=0.14.0
1212
wordcloud>=1.8.2.2

β€Žtest/test_representation.pyβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,4 @@ def test_lda(dataframe):
8585
def test_association(dataframe):
8686
apriopri = association(dataframe)
8787
assert type(apriopri) == pd.DataFrame
88-
assert apriopri.shape == (8, 10)
88+
assert apriopri.shape == (8, 14)

0 commit comments

Comments
Β (0)