-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot.py
More file actions
116 lines (94 loc) · 3.81 KB
/
Copy pathplot.py
File metadata and controls
116 lines (94 loc) · 3.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from config import DIM_REDUCTION, N_COMPONENTS, FEATURES, OUTPUT_DIR
def reduce_dim(X):
method = DIM_REDUCTION.lower()
if method == 'pca':
reducer = PCA(n_components= N_COMPONENTS, random_state=42)
return reducer.fit_transform(X)
elif method == 'umap':
try:
import umap
except ImportError:
raise ImportError('Could not import umap')
reducer = umap.UMAP(n_components= N_COMPONENTS, random_state=42)
return reducer.fit_transform(X)
elif method == 'tsne':
from sklearn.manifold import TSNE
reducer = TSNE(n_components= N_COMPONENTS, random_state= 42, perplexity= 30)
return reducer.fit_transform(X)
else:
return ValueError(f'Unknown DIM_REDUCTION method: {method}')
def plot_clusters(df, X, km, save = True, hover_labels = False):
try:
plot_iteractive(df, X, km, save)
except:
plot_static(df, X, km, save)
def plot_static(df, X, km, save):
coords = reduce_dim(X)
labels = df['cluster'].values
n_clusters = km.n_clusters
colours = cm.tab10(np.linspace(0, 1, n_clusters))
fig, ax = plt.subplots(figsize=(10, 7))
for c in range(n_clusters):
mask = labels == c
ax.scatter(
coords[mask, 0], coords[mask, 1],
color = colours[c], label= f'Cluster {c} ({mask.sum()})',
alpha = 0.7, s = 40,
)
ax.set_title(f'K-Means Clusters ({DIM_REDUCTION.upper()})', fontsize=14)
ax.legend(loc='best', fontsize= 9)
ax.axis('off')
plt.tight_layout()
if save:
path = OUTPUT_DIR / 'clusters.png'
plt.savefig(path, dpi= 150)
print(f'\n[plotting] | Saved cluster plot -> {path}')
plt.show()
def plot_iteractive(df, X, km, save):
import plotly.express as px
coords = reduce_dim(X)
plot_df = df[['name', 'artist', 'cluster']].copy()
plot_df['x'] = coords[:, 0]
plot_df['y'] = coords[:, 1]
plot_df['cluster_str'] = 'Cluster ' + plot_df['cluster'].astype(str)
fig = px.scatter(
plot_df, x='x', y='y',
color='cluster_str',
hover_data={'name': True, 'artist': True, 'x': False, 'y': False},
title=f'K-Means Clusters ({DIM_REDUCTION.upper()})',
template='plotly_dark',
)
fig.update_traces(marker=dict(size=7, opacity=0.8))
fig.update_layout(legend_title_text='Cluster', xaxis_visible=False, yaxis_visible=False)
if save:
path = OUTPUT_DIR / 'clusters.html'
fig.write_html(str(path))
print(f'\n[plotting] | Saved iteractive plot -> {path}')
fig.show()
def plot_clusters_profiles(df, km, save= True):
feature_cols = [feature for feature in FEATURES if feature in df.columns]
centres = pd.DataFrame(km.cluster_centres_, columns= feature_cols)
n= len(feature_cols)
angles = np.linspace(0, 2*np.pi, n, endpoint=False).tolist()
angles += angles[:1]
fig, ax = plt.subplots(figsize= (8, 8), subplot_kw=dict(polar=True))
colours = cm.tab10(np.linspace(0, 1, km.n_clusters))
for c, row in centres.iterrows():
values = row.tolist() + [row.iloc[0]]
ax.plot(angles, values, 'o-', color = colours[c], lw= 1.5, label= f'Cluster: {c}')
ax.fill(angles, values, color= colours[c], alpha= 0.1)
ax.set_xticks(angles[:-1])
ax.set_xticklabels(feature_cols, fontsize= 10)
ax.set_title('Cluster Feature Profiles', pad=20)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
if save:
path = OUTPUT_DIR / 'cluster_profiles.png'
plt.savefig(path, dpi= 150, bbox_inches= 'tight')
print(f'\n[plotting] | Saved Cluster Profile -> {path}')
plt.show()