Skip to content

Binary segmentation#

ruptures.detection.binseg.Binseg #

Binary segmentation.

__init__(self, model='l2', custom_cost=None, min_size=2, jump=5, params=None) special #

Initialize a Binseg instance.

Parameters:

Name Type Description Default
model str

segment model, ["l1", "l2", "rbf",...]. Not used if 'custom_cost' is not None.

'l2'
custom_cost BaseCost

custom cost function. Defaults to None.

None
min_size int

minimum segment length. Defaults to 2 samples.

2
jump int

subsample (one every jump points). Defaults to 5 samples.

5
params dict

a dictionary of parameters for the cost instance.

None
Source code in ruptures/detection/binseg.py
def __init__(self, model="l2", custom_cost=None, min_size=2, jump=5, params=None):
    """Initialize a Binseg instance.

    Args:
        model (str, optional): segment model, ["l1", "l2", "rbf",...]. Not used if ``'custom_cost'`` is not None.
        custom_cost (BaseCost, optional): custom cost function. Defaults to None.
        min_size (int, optional): minimum segment length. Defaults to 2 samples.
        jump (int, optional): subsample (one every *jump* points). Defaults to 5 samples.
        params (dict, optional): a dictionary of parameters for the cost instance.
    """

    if custom_cost is not None and isinstance(custom_cost, BaseCost):
        self.cost = custom_cost
    else:
        if params is None:
            self.cost = cost_factory(model=model)
        else:
            self.cost = cost_factory(model=model, **params)
    self.min_size = max(min_size, self.cost.min_size)
    self.jump = jump
    self.n_samples = None
    self.signal = None
    # cache for intermediate results
    self.single_bkp = lru_cache(maxsize=None)(self._single_bkp)

fit(self, signal) #

Compute params to segment signal.

Parameters:

Name Type Description Default
signal array

signal to segment. Shape (n_samples, n_features) or (n_samples,).

required

Returns:

Type Description
Binseg

self

Source code in ruptures/detection/binseg.py
def fit(self, signal) -> "Binseg":
    """Compute params to segment signal.

    Args:
        signal (array): signal to segment. Shape (n_samples, n_features) or (n_samples,).

    Returns:
        self
    """
    # update some params
    if signal.ndim == 1:
        self.signal = signal.reshape(-1, 1)
    else:
        self.signal = signal
    self.n_samples, _ = self.signal.shape
    self.cost.fit(signal)
    self.single_bkp.cache_clear()

    return self

fit_predict(self, signal, n_bkps=None, pen=None, epsilon=None) #

Fit to the signal and return the optimal breakpoints.

Helper method to call fit and predict once

Parameters:

Name Type Description Default
signal array

signal. Shape (n_samples, n_features) or (n_samples,).

required
n_bkps int

number of breakpoints.

None
pen float

penalty value (>0)

None
epsilon float

reconstruction budget (>0)

None

Returns:

Type Description
list

sorted list of breakpoints

Source code in ruptures/detection/binseg.py
def fit_predict(self, signal, n_bkps=None, pen=None, epsilon=None):
    """Fit to the signal and return the optimal breakpoints.

    Helper method to call fit and predict once

    Args:
        signal (array): signal. Shape (n_samples, n_features) or (n_samples,).
        n_bkps (int): number of breakpoints.
        pen (float): penalty value (>0)
        epsilon (float): reconstruction budget (>0)

    Returns:
        list: sorted list of breakpoints
    """
    self.fit(signal)
    return self.predict(n_bkps=n_bkps, pen=pen, epsilon=epsilon)

predict(self, n_bkps=None, pen=None, epsilon=None) #

Return the optimal breakpoints.

Must be called after the fit method. The breakpoints are associated with the signal passed to fit(). The stopping rule depends on the parameter passed to the function.

Parameters:

Name Type Description Default
n_bkps int

number of breakpoints to find before stopping.

None
pen float

penalty value (>0)

None
epsilon float

reconstruction budget (>0)

None

Exceptions:

Type Description
AssertionError

if none of n_bkps, pen, epsilon is set.

BadSegmentationParameters

in case of impossible segmentation configuration

Returns:

Type Description
list

sorted list of breakpoints

Source code in ruptures/detection/binseg.py
def predict(self, n_bkps=None, pen=None, epsilon=None):
    """Return the optimal breakpoints.

    Must be called after the fit method. The breakpoints are associated with the
    signal passed to [`fit()`][ruptures.detection.binseg.Binseg.fit].
    The stopping rule depends on the parameter passed to the function.

    Args:
        n_bkps (int): number of breakpoints to find before stopping.
        pen (float): penalty value (>0)
        epsilon (float): reconstruction budget (>0)

    Raises:
        AssertionError: if none of `n_bkps`, `pen`, `epsilon` is set.
        BadSegmentationParameters: in case of impossible segmentation
            configuration

    Returns:
        list: sorted list of breakpoints
    """
    msg = "Give a parameter."
    assert any(param is not None for param in (n_bkps, pen, epsilon)), msg

    # raise an exception in case of impossible segmentation configuration
    if not sanity_check(
        n_samples=self.cost.signal.shape[0],
        n_bkps=1,
        jump=self.jump,
        min_size=self.min_size,
    ):
        raise BadSegmentationParameters

    partition = self._seg(n_bkps=n_bkps, pen=pen, epsilon=epsilon)
    bkps = sorted(e for s, e in partition.keys())
    return bkps