Peax

`noloox.cluster.Peax`

Bases: ClusterMixin, BaseEstimator

Peax clustering model. The model estimates the number of clusters from density peaks, then uses Gaussian Mixtures with fixed means to estimate cluster probabilities.

Parameters:

Name	Type	Description	Default
`random_state`	`Optional[int]`	Random seed to use for fitting gaussian mixture to peaks.	`None`

Attributes:

Name	Type	Description
`labels_`	`ndarray of shape (n_samples,)`	Cluster labels for each point in `X`.
`gmm_`	`FixedMeanGaussianMixture`	The fitted Gaussian mixture model with fixed means.
`means_`	`ndarray of shape (n_components, 2)`	Coordinates of detected density peaks used as cluster means.
`weights_`	`ndarray of shape (n_components,)`	Final mixture component weights after refitting.
`classes_`	`ndarray of shape (n_components,)`	Sorted array of unique cluster labels.
`density`	`gaussian_kde`	Kernel density estimator fitted to the input data.

Source code in noloox/cluster/peax.py

class Peax(ClusterMixin, BaseEstimator):
    """Peax clustering model.
    The model estimates the number of clusters from density peaks,
    then uses Gaussian Mixtures with fixed means to estimate cluster
    probabilities.

    Parameters
    ----------
    random_state: int, default None
        Random seed to use for fitting gaussian mixture to peaks.

    Attributes
    ----------
    labels_ : ndarray of shape (n_samples,)
        Cluster labels for each point in `X`.
    gmm_ : FixedMeanGaussianMixture
        The fitted Gaussian mixture model with fixed means.
    means_ : ndarray of shape (n_components, 2)
        Coordinates of detected density peaks used as cluster means.
    weights_ : ndarray of shape (n_components,)
        Final mixture component weights after refitting.
    classes_ : ndarray of shape (n_components,)
        Sorted array of unique cluster labels.
    density : gaussian_kde
        Kernel density estimator fitted to the input data.
    """

    def __init__(self, random_state: Optional[int] = None):
        self.random_state = random_state

    def fit_predict(self, X, y=None):
        """Fit Peax clustering model and cluster datapoints.

        Parameters
        ----------
        X: array-like of shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row corresponds to a single data point.

        y: Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        labels: ndarray of shape (n_samples,)
            Cluster labels for each datapoint.
        """
        if X.shape[1] > 2:
            raise ValueError(
                f"X has {X.shape[1]} > 2 features. Peax only accepts 2D data."
            )
        self.X_range = np.min(X), np.max(X)
        self.density = gaussian_kde(X.T, "scott")
        coord = np.linspace(*self.X_range, num=100)
        z = []
        for yval in coord:
            points = np.stack([coord, np.full(coord.shape, yval)]).T
            prob = np.exp(self.density.logpdf(points.T))
            z.append(prob)
        z = np.stack(z)
        peaks = detect_peaks(z.T)
        peak_ind = np.nonzero(peaks)
        peak_pos = np.stack([coord[peak_ind[0]], coord[peak_ind[1]]]).T
        weights = self.density.pdf(peak_pos.T)
        weights = weights / weights.sum()
        self.gmm_ = FixedMeanGaussianMixture(
            peak_pos.shape[0],
            means_init=peak_pos,
            weights_init=weights,
            random_state=self.random_state,
        )
        self.labels_ = self.gmm_.fit_predict(X)
        # Checking whether there are close to zero components
        is_zero = np.isclose(self.gmm_.weights_, 0)
        n_zero = np.sum(is_zero)
        if n_zero > 0:
            print(f"{n_zero} components have zero weight, removing them and refitting.")
        peak_pos = peak_pos[~is_zero]
        weights = self.gmm_.weights_[~is_zero]
        weights = weights / weights.sum()
        self.gmm_ = FixedMeanGaussianMixture(
            peak_pos.shape[0],
            means_init=peak_pos,
            weights_init=weights,
            random_state=self.random_state,
        )
        self.labels_ = self.gmm_.fit_predict(X)
        self.classes_ = np.sort(np.unique(self.labels_))
        self.means_ = self.gmm_.means_
        self.weights_ = self.gmm_.weights_
        return self.labels_

    def fit(self, X, y=None):
        """Fits clustering model to data.

        Parameters
        ----------
        X: array-like of shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row corresponds to a single data point.

        y: Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self: Peax
            Fitted clustering model.
        """
        self.fit_predict(X, y)
        return self

    @property
    def n_components(self) -> int:
        """Number of clusters found in the data."""
        return self.gmm_.n_components

    def predict_proba(self, X):
        """Evaluate the components' density for each sample.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.

        Returns
        -------
        resp : array, shape (n_samples, n_components)
            Density of each Gaussian component for each sample in X.
        """
        return self.gmm_.predict_proba(X)

    def score_samples(self, X):
        return self.density.logpdf(X.T)

    def score(self, X):
        return np.mean(self.score_samples(X))

`n_components: int` `property`

Number of clusters found in the data.

`fit(X, y=None)`

Fits clustering model to data.

Parameters:

Name	Type	Description	Default
`X`		List of n_features-dimensional data points. Each row corresponds to a single data point.	required
`y`		Not used, present for API consistency by convention.	`None`

Returns:

Name	Type	Description
`self`	`Peax`	Fitted clustering model.

Source code in noloox/cluster/peax.py

def fit(self, X, y=None):
    """Fits clustering model to data.

    Parameters
    ----------
    X: array-like of shape (n_samples, n_features)
        List of n_features-dimensional data points. Each row corresponds to a single data point.

    y: Ignored
        Not used, present for API consistency by convention.

    Returns
    -------
    self: Peax
        Fitted clustering model.
    """
    self.fit_predict(X, y)
    return self

`fit_predict(X, y=None)`

Fit Peax clustering model and cluster datapoints.

Parameters:

Name	Type	Description	Default
`X`		List of n_features-dimensional data points. Each row corresponds to a single data point.	required
`y`		Not used, present for API consistency by convention.	`None`

Returns:

Name	Type	Description
`labels`	`ndarray of shape (n_samples,)`	Cluster labels for each datapoint.

Source code in noloox/cluster/peax.py

def fit_predict(self, X, y=None):
    """Fit Peax clustering model and cluster datapoints.

    Parameters
    ----------
    X: array-like of shape (n_samples, n_features)
        List of n_features-dimensional data points. Each row corresponds to a single data point.

    y: Ignored
        Not used, present for API consistency by convention.

    Returns
    -------
    labels: ndarray of shape (n_samples,)
        Cluster labels for each datapoint.
    """
    if X.shape[1] > 2:
        raise ValueError(
            f"X has {X.shape[1]} > 2 features. Peax only accepts 2D data."
        )
    self.X_range = np.min(X), np.max(X)
    self.density = gaussian_kde(X.T, "scott")
    coord = np.linspace(*self.X_range, num=100)
    z = []
    for yval in coord:
        points = np.stack([coord, np.full(coord.shape, yval)]).T
        prob = np.exp(self.density.logpdf(points.T))
        z.append(prob)
    z = np.stack(z)
    peaks = detect_peaks(z.T)
    peak_ind = np.nonzero(peaks)
    peak_pos = np.stack([coord[peak_ind[0]], coord[peak_ind[1]]]).T
    weights = self.density.pdf(peak_pos.T)
    weights = weights / weights.sum()
    self.gmm_ = FixedMeanGaussianMixture(
        peak_pos.shape[0],
        means_init=peak_pos,
        weights_init=weights,
        random_state=self.random_state,
    )
    self.labels_ = self.gmm_.fit_predict(X)
    # Checking whether there are close to zero components
    is_zero = np.isclose(self.gmm_.weights_, 0)
    n_zero = np.sum(is_zero)
    if n_zero > 0:
        print(f"{n_zero} components have zero weight, removing them and refitting.")
    peak_pos = peak_pos[~is_zero]
    weights = self.gmm_.weights_[~is_zero]
    weights = weights / weights.sum()
    self.gmm_ = FixedMeanGaussianMixture(
        peak_pos.shape[0],
        means_init=peak_pos,
        weights_init=weights,
        random_state=self.random_state,
    )
    self.labels_ = self.gmm_.fit_predict(X)
    self.classes_ = np.sort(np.unique(self.labels_))
    self.means_ = self.gmm_.means_
    self.weights_ = self.gmm_.weights_
    return self.labels_

`predict_proba(X)`

Evaluate the components' density for each sample.

Parameters:

Name	Type	Description	Default
`X`	`array-like of shape (n_samples, n_features)`	List of n_features-dimensional data points. Each row corresponds to a single data point.	required

Returns:

Name	Type	Description
`resp`	`(array, shape(n_samples, n_components))`	Density of each Gaussian component for each sample in X.

Source code in noloox/cluster/peax.py

def predict_proba(self, X):
    """Evaluate the components' density for each sample.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        List of n_features-dimensional data points. Each row
        corresponds to a single data point.

    Returns
    -------
    resp : array, shape (n_samples, n_components)
        Density of each Gaussian component for each sample in X.
    """
    return self.gmm_.predict_proba(X)

Peax

noloox.cluster.Peax

n_components: int property

fit(X, y=None)

fit_predict(X, y=None)

predict_proba(X)

`noloox.cluster.Peax`

`n_components: int` `property`

`fit(X, y=None)`

`fit_predict(X, y=None)`

`predict_proba(X)`