-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathexample.py
More file actions
116 lines (62 loc) · 2.68 KB
/
Copy pathexample.py
File metadata and controls
116 lines (62 loc) · 2.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
__author__ = 'Stefano Mauceri'
__email__ = 'mauceri.stefano@gmail.com'
# =============================================================================
# IMPORT
# =============================================================================
import os
import numpy as np
from prototype import prototype
import scipy.spatial.distance as ssd
from dissimilarity import dissimilarity
from sklearn.metrics import roc_curve, auc
from sklearn.neighbors import NearestNeighbors
# =============================================================================
# EXAMPLE
# =============================================================================
# LOAD DATA
dataset_name = 'Plane'
path = os.path.join(os.getcwd(), 'data', dataset_name)
X_train = np.load(os.path.join(path, f'{dataset_name}_X_TRAIN.npy'))
Y_train = np.load(os.path.join(path, f'{dataset_name}_Y_TRAIN.npy'))
X_test = np.load(os.path.join(path, f'{dataset_name}_X_TEST.npy'))
Y_test = np.load(os.path.join(path, f'{dataset_name}_Y_TEST.npy'))
# ADAPT DATA TO ONE-CLASS CLASSIFICATION
print('AVAILABLE CLASSES: ', np.unique(Y_train))
positive = 1 # Choose a positive class
print('POSITVE CLASS: ', positive)
X_train = X_train[(Y_train == positive)]
Y_test = (Y_test == positive).astype(np.int8)
# SELECT DISS. MEASURE and PROTOTYPE METHOD
D, P = dissimilarity(), prototype()
Dissimilarity = D.kullback_leibler
diss_params = {}
# OR
#Dissimilarity = D.EDR # This is fairly slow
#diss_params = {'eps':0.25} # Threshold on distance for EDR computation
Prot_method = P.borders
# OR
#Prot_method = P.centers_k_means
# GET DISSIMILARITY MATRIX
# Some prototype methods eg "centers_k_means" do not require
# the computation of the dissimilarity matrix
Diss_Matrix = ssd.cdist(X_train, X_train, metric=Dissimilarity, **diss_params)
# GET PROTOTYPES
n = 2 # number of prototypes we want to get
Prototypes = Prot_method(X_train, n, Diss_Matrix)
# OR
#Prototypes = Prot_method(X_train, n)
# GET DISSIMILARITY-BASED REPRESENTATION
DBR_X_train = ssd.cdist(X_train, Prototypes, Dissimilarity, **diss_params)
DBR_X_test = ssd.cdist(X_test, Prototypes, Dissimilarity, **diss_params)
# GET AUROC
Classifier = NearestNeighbors(n_neighbors=1)
Classifier.fit(DBR_X_train)
Test_scores = Classifier.kneighbors(DBR_X_test)[0] * -1
# Test scores are multiplied by -1 because the ROC curve expects that
# more is better while in terms of dissimilarities less is better.
fpr, tpr, _ = roc_curve(Y_test, Test_scores, pos_label=1)
AUROC = auc(fpr, tpr) * 100
print('AUROC:', round(AUROC, 1))
# =============================================================================
# THE END
# =============================================================================