FuzzyVisualEncoding/tests/test_algorithms.py at master · ashish-code/FuzzyVisualEncoding · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""
tests/test_algorithms.py — Unit tests for GustafsonKessel, GathGeva, and FCM.

Tests verify:
    - Output shapes and membership normalisation (columns sum to 1)
    - Convergence on small synthetic data
    - Consistent results given a fixed random_state
    - predict_proba on held-out data produces valid memberships
    - RuntimeError raised if predict_proba called before fit
"""

import numpy as np
import pytest

from fuzzy_visual_encoding.algorithms import GathGeva, GustafsonKessel, _fuzzy_cmeans


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture
def blob_data() -> tuple[np.ndarray, np.ndarray]:
    """Two well-separated 2-d Gaussian blobs for deterministic clustering."""
    rng = np.random.default_rng(0)
    X0 = rng.normal(loc=[0.0, 0.0], scale=0.3, size=(50, 2))
    X1 = rng.normal(loc=[3.0, 3.0], scale=0.3, size=(50, 2))
    X = np.vstack([X0, X1])
    y = np.array([0] * 50 + [1] * 50)
    return X, y


# ---------------------------------------------------------------------------
# FCM (base)
# ---------------------------------------------------------------------------


class TestFuzzyCMeans:
    def test_output_shapes(self):
        rng = np.random.default_rng(1)
        X = rng.standard_normal((30, 4))
        centers, U = _fuzzy_cmeans(X, n_clusters=3, rng=rng)
        assert centers.shape == (3, 4)
        assert U.shape == (3, 30)

    def test_membership_sums_to_one(self):
        rng = np.random.default_rng(2)
        X = rng.standard_normal((40, 3))
        _, U = _fuzzy_cmeans(X, n_clusters=4, rng=rng)
        np.testing.assert_allclose(U.sum(axis=0), np.ones(40), atol=1e-9)

    def test_membership_nonnegative(self):
        rng = np.random.default_rng(3)
        X = rng.standard_normal((20, 2))
        _, U = _fuzzy_cmeans(X, n_clusters=3, rng=rng)
        assert (U >= 0).all()


# ---------------------------------------------------------------------------
# Gustafson-Kessel
# ---------------------------------------------------------------------------


class TestGustafsonKessel:
    def test_fit_returns_self(self, blob_data):
        X, _ = blob_data
        gk = GustafsonKessel(n_clusters=2, random_state=0)
        result = gk.fit(X)
        assert result is gk

    def test_fitted_attributes_populated(self, blob_data):
        X, _ = blob_data
        gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X)
        assert gk.centers_ is not None
        assert gk.covariances_ is not None
        assert gk.U_ is not None

    def test_centers_shape(self, blob_data):
        X, _ = blob_data
        gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X)
        assert gk.centers_.shape == (2, 2)

    def test_U_shape(self, blob_data):
        X, _ = blob_data
        gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X)
        assert gk.U_.shape == (2, len(X))

    def test_U_columns_sum_to_one(self, blob_data):
        X, _ = blob_data
        gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X)
        np.testing.assert_allclose(gk.U_.sum(axis=0), np.ones(len(X)), atol=1e-8)

    def test_predict_proba_shape(self, blob_data):
        X, _ = blob_data
        gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X[:80])
        proba = gk.predict_proba(X[80:])
        assert proba.shape == (len(X[80:]), 2)

    def test_predict_proba_rows_sum_to_one(self, blob_data):
        X, _ = blob_data
        gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X[:80])
        proba = gk.predict_proba(X[80:])
        np.testing.assert_allclose(proba.sum(axis=1), np.ones(len(X[80:])), atol=1e-8)

    def test_predict_proba_before_fit_raises(self):
        gk = GustafsonKessel(n_clusters=2)
        with pytest.raises(RuntimeError, match="fit\\(\\)"):
            gk.predict_proba(np.zeros((5, 2)))

    def test_cluster_separation(self, blob_data):
        """Dominant cluster assignment should match true labels for clean blobs."""
        X, y = blob_data
        gk = GustafsonKessel(n_clusters=2, random_state=0).fit(X)
        # Each observation should have >50% membership in one cluster
        dominant = gk.U_.argmax(axis=0)  # (N,)
        # The two dominant clusters should split cleanly between the two blobs
        assert len(np.unique(dominant[:50])) == 1
        assert len(np.unique(dominant[50:])) == 1
        assert dominant[0] != dominant[50]

    def test_reproducibility(self, blob_data):
        X, _ = blob_data
        gk1 = GustafsonKessel(n_clusters=2, random_state=7).fit(X)
        gk2 = GustafsonKessel(n_clusters=2, random_state=7).fit(X)
        np.testing.assert_array_equal(gk1.centers_, gk2.centers_)


# ---------------------------------------------------------------------------
# Gath-Geva
# ---------------------------------------------------------------------------


class TestGathGeva:
    def test_fit_returns_self(self, blob_data):
        X, _ = blob_data
        gg = GathGeva(n_clusters=2, random_state=0)
        assert gg.fit(X) is gg

    def test_fitted_attributes_populated(self, blob_data):
        X, _ = blob_data
        gg = GathGeva(n_clusters=2, random_state=0).fit(X)
        assert gg.centers_ is not None
        assert gg.priors_ is not None
        assert gg.covariances_ is not None

    def test_priors_shape(self, blob_data):
        X, _ = blob_data
        gg = GathGeva(n_clusters=2, random_state=0).fit(X)
        assert gg.priors_.shape == (2,)

    def test_predict_proba_shape(self, blob_data):
        X, _ = blob_data
        gg = GathGeva(n_clusters=2, random_state=0).fit(X[:80])
        proba = gg.predict_proba(X[80:])
        assert proba.shape == (len(X[80:]), 2)

    def test_predict_proba_rows_sum_to_one(self, blob_data):
        X, _ = blob_data
        gg = GathGeva(n_clusters=2, random_state=0).fit(X[:80])
        proba = gg.predict_proba(X[80:])
        np.testing.assert_allclose(proba.sum(axis=1), np.ones(len(X[80:])), atol=1e-8)

    def test_predict_proba_before_fit_raises(self):
        gg = GathGeva(n_clusters=2)
        with pytest.raises(RuntimeError, match="fit\\(\\)"):
            gg.predict_proba(np.zeros((5, 2)))