Source code for featuretools.primitives.standard.cum_transform_feature

import numpy as np
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import IntegerNullable

from featuretools.primitives.base import TransformPrimitive


[docs]class CumSum(TransformPrimitive):
    """Calculates the cumulative sum.

    Description:
        Given a list of values, return the cumulative sum
        (or running total). There is no set window, so the
        sum at each point is calculated over all prior values.
        `NaN` values will return `NaN`, but in the window of a
        cumulative caluclation, they're ignored.

    Examples:
        >>> cum_sum = CumSum()
        >>> cum_sum([1, 2, 3, 4, None, 5]).tolist()
        [1.0, 3.0, 6.0, 10.0, nan, 15.0]
    """

    name = "cum_sum"
    input_types = [ColumnSchema(semantic_tags={"numeric"})]
    return_type = ColumnSchema(semantic_tags={"numeric"})
    uses_full_dataframe = True
    description_template = "the cumulative sum of {}"

    def get_function(self):
        def cum_sum(values):
            return values.cumsum()

        return cum_sum


[docs]class CumCount(TransformPrimitive):
    """Calculates the cumulative count.

    Description:
        Given a list of values, return the cumulative count
        (or running count). There is no set window, so the
        count at each point is calculated over all prior
        values. `NaN` values are counted.

    Examples:
        >>> cum_count = CumCount()
        >>> cum_count([1, 2, 3, 4, None, 5]).tolist()
        [1, 2, 3, 4, 5, 6]
    """

    name = "cum_count"
    input_types = [
        [ColumnSchema(semantic_tags={"foreign_key"})],
        [ColumnSchema(semantic_tags={"category"})],
    ]
    return_type = ColumnSchema(logical_type=IntegerNullable, semantic_tags={"numeric"})
    uses_full_dataframe = True
    description_template = "the cumulative count of {}"

    def get_function(self):
        def cum_count(values):
            return np.arange(1, len(values) + 1)

        return cum_count


[docs]class CumMean(TransformPrimitive):
    """Calculates the cumulative mean.

    Description:
        Given a list of values, return the cumulative mean
        (or running mean). There is no set window, so the
        mean at each point is calculated over all prior values.
        `NaN` values will return `NaN`, but in the window of a
        cumulative caluclation, they're treated as 0.

    Examples:
        >>> cum_mean = CumMean()
        >>> cum_mean([1, 2, 3, 4, None, 5]).tolist()
        [1.0, 1.5, 2.0, 2.5, nan, 2.5]
    """

    name = "cum_mean"
    input_types = [ColumnSchema(semantic_tags={"numeric"})]
    return_type = ColumnSchema(semantic_tags={"numeric"})
    uses_full_dataframe = True
    description_template = "the cumulative mean of {}"

    def get_function(self):
        def cum_mean(values):
            return values.cumsum() / np.arange(1, len(values) + 1)

        return cum_mean


[docs]class CumMin(TransformPrimitive):
    """Calculates the cumulative minimum.

    Description:
        Given a list of values, return the cumulative min
        (or running min). There is no set window, so the min
        at each point is calculated over all prior values.
        `NaN` values will return `NaN`, but in the window of a
        cumulative caluclation, they're ignored.

    Examples:
        >>> cum_min = CumMin()
        >>> cum_min([1, 2, -3, 4, None, 5]).tolist()
        [1.0, 1.0, -3.0, -3.0, nan, -3.0]
    """

    name = "cum_min"
    input_types = [ColumnSchema(semantic_tags={"numeric"})]
    return_type = ColumnSchema(semantic_tags={"numeric"})
    uses_full_dataframe = True
    description_template = "the cumulative minimum of {}"

    def get_function(self):
        def cum_min(values):
            return values.cummin()

        return cum_min


[docs]class CumMax(TransformPrimitive):
    """Calculates the cumulative maximum.

    Description:
        Given a list of values, return the cumulative max
        (or running max). There is no set window, so the max
        at each point is calculated over all prior values.
        `NaN` values will return `NaN`, but in the window of a
        cumulative caluclation, they're ignored.

    Examples:
        >>> cum_max = CumMax()
        >>> cum_max([1, 2, 3, 4, None, 5]).tolist()
        [1.0, 2.0, 3.0, 4.0, nan, 5.0]
    """

    name = "cum_max"
    input_types = [ColumnSchema(semantic_tags={"numeric"})]
    return_type = ColumnSchema(semantic_tags={"numeric"})
    uses_full_dataframe = True
    description_template = "the cumulative maximum of {}"

    def get_function(self):
        def cum_max(values):
            return values.cummax()

        return cum_max
Table of Contents

Quick search

Source code for featuretools.primitives.standard.cum_transform_feature