SNMF

noloox.decomposition.SNMF

Bases: TransformerMixin, BaseEstimator

Semi-Nonnegative Matrix Factorization. Equivalent to NMF, except the components, and therefore the outcome variables are unbounded. The latent factors are constrained to be nonnegative.

Example:

import numpy as np
from noloox.decomposition import SNMF

X = np.random.normal(0, 1, size=(200, 50))
model = SNMF(n_components=10)

X_transformed = model.fit_transform(X)
assert np.all(X_transformed >= 0)

Parameters:

Name Type Description Default
n_components int

Number of latent components to discover.

required
tol float

Tolerance for stopping condition.

1e-05
max_iter int

Maximum number of iterations.

200
progress_bar bool

Indicates whether to display a progress bar when fitting.

True
random_state Optional[int]

Used for model intialization with KMeans.

None
sparsity float

L1 penalty. Higher values result in a stricter clustering.

0.0

Attributes:

Name Type Description
components_ ndarray of shape (n_components, n_features)

Factorization matrix, sometimes called ‘dictionary’. Unconstrained.

n_iter_ int

Acutal number of iterations.

reconstruction_err_ float

Reconstruction error of the model at the last iteration.

Source code in noloox/decomposition/snmf.py
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
class SNMF(TransformerMixin, BaseEstimator):
    """Semi-Nonnegative Matrix Factorization.
    Equivalent to NMF, except the components, and therefore the outcome variables are unbounded.
    The latent factors are constrained to be nonnegative.

    Example:
    ```python
    import numpy as np
    from noloox.decomposition import SNMF

    X = np.random.normal(0, 1, size=(200, 50))
    model = SNMF(n_components=10)

    X_transformed = model.fit_transform(X)
    assert np.all(X_transformed >= 0)
    ```

    Parameters
    ----------
    n_components: int
        Number of latent components to discover.
    tol: float, default=1e-5
        Tolerance for stopping condition.
    max_iter: int, default=200
        Maximum number of iterations.
    progress_bar: bool, default=True
        Indicates whether to display a progress bar when fitting.
    random_state: int, default=None
        Used for model intialization with KMeans.
    sparsity: float, default=0.0
        L1 penalty. Higher values result in a stricter clustering.

    Attributes
    ----------
    components_: ndarray of shape (n_components, n_features)
        Factorization matrix, sometimes called ‘dictionary’.
        Unconstrained.
    n_iter_: int
        Acutal number of iterations.
    reconstruction_err_: float
        Reconstruction error of the model at the last iteration.
    """

    def __init__(
        self,
        n_components: int,
        tol: float = 1e-5,
        max_iter: int = 200,
        progress_bar: bool = True,
        random_state: Optional[int] = None,
        sparsity: float = 0.0,
        verbose: bool = False,
    ):
        self.n_components = n_components
        self.tol = tol
        self.max_iter = max_iter
        self.progress_bar = progress_bar
        self.random_state = random_state
        self.sparsity = sparsity
        self.verbose = verbose

    def fit_transform(self, X, y=None):
        """Learn an SNMF model for the data X and returns the transformed data.

        Parameters
        ----------
        X: array-like of shape (n_samples, n_features)
            Datapoints to factor.
        y: Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        W : ndarray of shape (n_samples, n_components)
            Transformed data. Strictily nonnegative.
        """
        G = init_G(X.T, self.n_components, random_state=self.random_state)
        F = update_F(X.T, G)
        error_at_init = rec_err(X.T, F, G)
        prev_error = error_at_init
        _step = partial(step, sparsity=self.sparsity, X=X)
        for i in trange(
            self.max_iter,
            desc="Iterative updates.",
            disable=not self.progress_bar,
        ):
            G, F, error = _step(G, F)
            difference = prev_error - error
            if (error < error_at_init) and (
                (prev_error - error) / error_at_init
            ) < self.tol:
                if self.verbose:
                    print(f"Converged after {i} iterations")
                self.n_iter_ = i
                break
            prev_error = error
            if self.verbose:
                print(
                    f"Iteration: {i}, Error: {error}, init_error: {error_at_init}, difference from previous: {difference}"
                )
        else:
            warnings.warn("SNMF did not converge, try specifying a higher max_iter.")
        self.components_ = np.array(F.T)
        self.reconstruction_err_ = error
        self.n_iter_ = i
        return np.array(G)

    def fit(self, X, y=None):
        """Learn an SNMF model for the data X.

        Parameters
        ----------
        X: array-like of shape (n_samples, n_features)
            Datapoints to factor.
        y: Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self: SNMF
            Fitted model.
        """
        self.fit_transform(X, y)
        return self

    def transform(self, X):
        """Transform the data X according to the fitted SNMF model.

        Parameters
        ----------
        X: array-like of shape (n_samples, n_features)
            Datapoints to transform.

        Returns
        -------
        W: ndarray of shape (n_samples, n_components)
            Nonnegative latent sources.
        """
        G = jnp.maximum(X @ jnp.linalg.pinv(self.components_), 0)
        return np.array(G)

    def inverse_transform(self, X):
        """Transform data back to its original space.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_components)
            Transformed data matrix.

        Returns
        -------
        X_original : ndarray of shape (n_samples, n_features)
            Returns a data matrix of the original shape.
        """
        return X @ self.components_

fit(X, y=None)

Learn an SNMF model for the data X.

Parameters:

Name Type Description Default
X

Datapoints to factor.

required
y

Not used, present for API consistency by convention.

None

Returns:

Name Type Description
self SNMF

Fitted model.

Source code in noloox/decomposition/snmf.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
def fit(self, X, y=None):
    """Learn an SNMF model for the data X.

    Parameters
    ----------
    X: array-like of shape (n_samples, n_features)
        Datapoints to factor.
    y: Ignored
        Not used, present for API consistency by convention.

    Returns
    -------
    self: SNMF
        Fitted model.
    """
    self.fit_transform(X, y)
    return self

fit_transform(X, y=None)

Learn an SNMF model for the data X and returns the transformed data.

Parameters:

Name Type Description Default
X

Datapoints to factor.

required
y

Not used, present for API consistency by convention.

None

Returns:

Name Type Description
W ndarray of shape (n_samples, n_components)

Transformed data. Strictily nonnegative.

Source code in noloox/decomposition/snmf.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
def fit_transform(self, X, y=None):
    """Learn an SNMF model for the data X and returns the transformed data.

    Parameters
    ----------
    X: array-like of shape (n_samples, n_features)
        Datapoints to factor.
    y: Ignored
        Not used, present for API consistency by convention.

    Returns
    -------
    W : ndarray of shape (n_samples, n_components)
        Transformed data. Strictily nonnegative.
    """
    G = init_G(X.T, self.n_components, random_state=self.random_state)
    F = update_F(X.T, G)
    error_at_init = rec_err(X.T, F, G)
    prev_error = error_at_init
    _step = partial(step, sparsity=self.sparsity, X=X)
    for i in trange(
        self.max_iter,
        desc="Iterative updates.",
        disable=not self.progress_bar,
    ):
        G, F, error = _step(G, F)
        difference = prev_error - error
        if (error < error_at_init) and (
            (prev_error - error) / error_at_init
        ) < self.tol:
            if self.verbose:
                print(f"Converged after {i} iterations")
            self.n_iter_ = i
            break
        prev_error = error
        if self.verbose:
            print(
                f"Iteration: {i}, Error: {error}, init_error: {error_at_init}, difference from previous: {difference}"
            )
    else:
        warnings.warn("SNMF did not converge, try specifying a higher max_iter.")
    self.components_ = np.array(F.T)
    self.reconstruction_err_ = error
    self.n_iter_ = i
    return np.array(G)

inverse_transform(X)

Transform data back to its original space.

Parameters:

Name Type Description Default
X ndarray of shape (n_samples, n_components)

Transformed data matrix.

required

Returns:

Name Type Description
X_original ndarray of shape (n_samples, n_features)

Returns a data matrix of the original shape.

Source code in noloox/decomposition/snmf.py
203
204
205
206
207
208
209
210
211
212
213
214
215
216
def inverse_transform(self, X):
    """Transform data back to its original space.

    Parameters
    ----------
    X : ndarray of shape (n_samples, n_components)
        Transformed data matrix.

    Returns
    -------
    X_original : ndarray of shape (n_samples, n_features)
        Returns a data matrix of the original shape.
    """
    return X @ self.components_

transform(X)

Transform the data X according to the fitted SNMF model.

Parameters:

Name Type Description Default
X

Datapoints to transform.

required

Returns:

Name Type Description
W ndarray of shape (n_samples, n_components)

Nonnegative latent sources.

Source code in noloox/decomposition/snmf.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
def transform(self, X):
    """Transform the data X according to the fitted SNMF model.

    Parameters
    ----------
    X: array-like of shape (n_samples, n_features)
        Datapoints to transform.

    Returns
    -------
    W: ndarray of shape (n_samples, n_components)
        Nonnegative latent sources.
    """
    G = jnp.maximum(X @ jnp.linalg.pinv(self.components_), 0)
    return np.array(G)