-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest_algorithms.py
More file actions
167 lines (132 loc) · 6.11 KB
/
test_algorithms.py
File metadata and controls
167 lines (132 loc) · 6.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""
tests/test_algorithms.py — Unit tests for GustafsonKessel, GathGeva, and FCM.
Tests verify:
- Output shapes and membership normalisation (columns sum to 1)
- Convergence on small synthetic data
- Consistent results given a fixed random_state
- predict_proba on held-out data produces valid memberships
- RuntimeError raised if predict_proba called before fit
"""
import numpy as np
import pytest
from fuzzy_visual_encoding.algorithms import GathGeva, GustafsonKessel, _fuzzy_cmeans
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def blob_data() -> tuple[np.ndarray, np.ndarray]:
"""Two well-separated 2-d Gaussian blobs for deterministic clustering."""
rng = np.random.default_rng(0)
X0 = rng.normal(loc=[0.0, 0.0], scale=0.3, size=(50, 2))
X1 = rng.normal(loc=[3.0, 3.0], scale=0.3, size=(50, 2))
X = np.vstack([X0, X1])
y = np.array([0] * 50 + [1] * 50)
return X, y
# ---------------------------------------------------------------------------
# FCM (base)
# ---------------------------------------------------------------------------
class TestFuzzyCMeans:
def test_output_shapes(self):
rng = np.random.default_rng(1)
X = rng.standard_normal((30, 4))
centers, U = _fuzzy_cmeans(X, n_clusters=3, rng=rng)
assert centers.shape == (3, 4)
assert U.shape == (3, 30)
def test_membership_sums_to_one(self):
rng = np.random.default_rng(2)
X = rng.standard_normal((40, 3))
_, U = _fuzzy_cmeans(X, n_clusters=4, rng=rng)
np.testing.assert_allclose(U.sum(axis=0), np.ones(40), atol=1e-9)
def test_membership_nonnegative(self):
rng = np.random.default_rng(3)
X = rng.standard_normal((20, 2))
_, U = _fuzzy_cmeans(X, n_clusters=3, rng=rng)
assert (U >= 0).all()
# ---------------------------------------------------------------------------
# Gustafson-Kessel
# ---------------------------------------------------------------------------
class TestGustafsonKessel:
def test_fit_returns_self(self, blob_data):
X, _ = blob_data
gk = GustafsonKessel(n_clusters=2, random_state=0)
result = gk.fit(X)
assert result is gk
def test_fitted_attributes_populated(self, blob_data):
X, _ = blob_data
gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X)
assert gk.centers_ is not None
assert gk.covariances_ is not None
assert gk.U_ is not None
def test_centers_shape(self, blob_data):
X, _ = blob_data
gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X)
assert gk.centers_.shape == (2, 2)
def test_U_shape(self, blob_data):
X, _ = blob_data
gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X)
assert gk.U_.shape == (2, len(X))
def test_U_columns_sum_to_one(self, blob_data):
X, _ = blob_data
gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X)
np.testing.assert_allclose(gk.U_.sum(axis=0), np.ones(len(X)), atol=1e-8)
def test_predict_proba_shape(self, blob_data):
X, _ = blob_data
gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X[:80])
proba = gk.predict_proba(X[80:])
assert proba.shape == (len(X[80:]), 2)
def test_predict_proba_rows_sum_to_one(self, blob_data):
X, _ = blob_data
gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X[:80])
proba = gk.predict_proba(X[80:])
np.testing.assert_allclose(proba.sum(axis=1), np.ones(len(X[80:])), atol=1e-8)
def test_predict_proba_before_fit_raises(self):
gk = GustafsonKessel(n_clusters=2)
with pytest.raises(RuntimeError, match="fit\\(\\)"):
gk.predict_proba(np.zeros((5, 2)))
def test_cluster_separation(self, blob_data):
"""Dominant cluster assignment should match true labels for clean blobs."""
X, y = blob_data
gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X)
# Each observation should have >50% membership in one cluster
dominant = gk.U_.argmax(axis=0) # (N,)
# The two dominant clusters should split cleanly between the two blobs
assert len(np.unique(dominant[:50])) == 1
assert len(np.unique(dominant[50:])) == 1
assert dominant[0] != dominant[50]
def test_reproducibility(self, blob_data):
X, _ = blob_data
gk1 = GustafsonKessel(n_clusters=2, random_state=7).fit(X)
gk2 = GustafsonKessel(n_clusters=2, random_state=7).fit(X)
np.testing.assert_array_equal(gk1.centers_, gk2.centers_)
# ---------------------------------------------------------------------------
# Gath-Geva
# ---------------------------------------------------------------------------
class TestGathGeva:
def test_fit_returns_self(self, blob_data):
X, _ = blob_data
gg = GathGeva(n_clusters=2, random_state=0)
assert gg.fit(X) is gg
def test_fitted_attributes_populated(self, blob_data):
X, _ = blob_data
gg = GathGeva(n_clusters=2, random_state=0).fit(X)
assert gg.centers_ is not None
assert gg.priors_ is not None
assert gg.covariances_ is not None
def test_priors_shape(self, blob_data):
X, _ = blob_data
gg = GathGeva(n_clusters=2, random_state=0).fit(X)
assert gg.priors_.shape == (2,)
def test_predict_proba_shape(self, blob_data):
X, _ = blob_data
gg = GathGeva(n_clusters=2, random_state=0).fit(X[:80])
proba = gg.predict_proba(X[80:])
assert proba.shape == (len(X[80:]), 2)
def test_predict_proba_rows_sum_to_one(self, blob_data):
X, _ = blob_data
gg = GathGeva(n_clusters=2, random_state=0).fit(X[:80])
proba = gg.predict_proba(X[80:])
np.testing.assert_allclose(proba.sum(axis=1), np.ones(len(X[80:])), atol=1e-8)
def test_predict_proba_before_fit_raises(self):
gg = GathGeva(n_clusters=2)
with pytest.raises(RuntimeError, match="fit\\(\\)"):
gg.predict_proba(np.zeros((5, 2)))