NOTICE

The upcoming release of Featuretools 1.0.0 contains several breaking changes. Users are encouraged to test this version prior to release by installing from GitHub:

pip install https://github.com/alteryx/featuretools/archive/woodwork-integration.zip

For details on migrating to the new version, refer to Transitioning to Featuretools Version 1.0. Please report any issues in the Featuretools GitHub repo or by messaging in Alteryx Open Source Slack.


Source code for featuretools.primitives.standard.cum_transform_feature

import numpy as np
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import IntegerNullable

from featuretools.primitives.base import TransformPrimitive


[docs]class CumSum(TransformPrimitive): """Calculates the cumulative sum. Description: Given a list of values, return the cumulative sum (or running total). There is no set window, so the sum at each point is calculated over all prior values. `NaN` values will return `NaN`, but in the window of a cumulative caluclation, they're ignored. Examples: >>> cum_sum = CumSum() >>> cum_sum([1, 2, 3, 4, None, 5]).tolist() [1.0, 3.0, 6.0, 10.0, nan, 15.0] """ name = "cum_sum" input_types = [ColumnSchema(semantic_tags={'numeric'})] return_type = ColumnSchema(semantic_tags={'numeric'}) uses_full_dataframe = True description_template = "the cumulative sum of {}" def get_function(self): def cum_sum(values): return values.cumsum() return cum_sum
[docs]class CumCount(TransformPrimitive): """Calculates the cumulative count. Description: Given a list of values, return the cumulative count (or running count). There is no set window, so the count at each point is calculated over all prior values. `NaN` values are counted. Examples: >>> cum_count = CumCount() >>> cum_count([1, 2, 3, 4, None, 5]).tolist() [1, 2, 3, 4, 5, 6] """ name = "cum_count" input_types = [[ColumnSchema(semantic_tags={'foreign_key'})], [ColumnSchema(semantic_tags={'category'})]] return_type = ColumnSchema(logical_type=IntegerNullable, semantic_tags={'numeric'}) uses_full_dataframe = True description_template = "the cumulative count of {}" def get_function(self): def cum_count(values): return np.arange(1, len(values) + 1) return cum_count
[docs]class CumMean(TransformPrimitive): """Calculates the cumulative mean. Description: Given a list of values, return the cumulative mean (or running mean). There is no set window, so the mean at each point is calculated over all prior values. `NaN` values will return `NaN`, but in the window of a cumulative caluclation, they're treated as 0. Examples: >>> cum_mean = CumMean() >>> cum_mean([1, 2, 3, 4, None, 5]).tolist() [1.0, 1.5, 2.0, 2.5, nan, 2.5] """ name = "cum_mean" input_types = [ColumnSchema(semantic_tags={'numeric'})] return_type = ColumnSchema(semantic_tags={'numeric'}) uses_full_dataframe = True description_template = "the cumulative mean of {}" def get_function(self): def cum_mean(values): return values.cumsum() / np.arange(1, len(values) + 1) return cum_mean
[docs]class CumMin(TransformPrimitive): """Calculates the cumulative minimum. Description: Given a list of values, return the cumulative min (or running min). There is no set window, so the min at each point is calculated over all prior values. `NaN` values will return `NaN`, but in the window of a cumulative caluclation, they're ignored. Examples: >>> cum_min = CumMin() >>> cum_min([1, 2, -3, 4, None, 5]).tolist() [1.0, 1.0, -3.0, -3.0, nan, -3.0] """ name = "cum_min" input_types = [ColumnSchema(semantic_tags={'numeric'})] return_type = ColumnSchema(semantic_tags={'numeric'}) uses_full_dataframe = True description_template = "the cumulative minimum of {}" def get_function(self): def cum_min(values): return values.cummin() return cum_min
[docs]class CumMax(TransformPrimitive): """Calculates the cumulative maximum. Description: Given a list of values, return the cumulative max (or running max). There is no set window, so the max at each point is calculated over all prior values. `NaN` values will return `NaN`, but in the window of a cumulative caluclation, they're ignored. Examples: >>> cum_max = CumMax() >>> cum_max([1, 2, 3, 4, None, 5]).tolist() [1.0, 2.0, 3.0, 4.0, nan, 5.0] """ name = "cum_max" input_types = [ColumnSchema(semantic_tags={'numeric'})] return_type = ColumnSchema(semantic_tags={'numeric'}) uses_full_dataframe = True description_template = "the cumulative maximum of {}" def get_function(self): def cum_max(values): return values.cummax() return cum_max