Source code for featuretools.primitives.standard.cum_transform_feature

import numpy as np
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import IntegerNullable

from featuretools.primitives.base import TransformPrimitive


[docs]class CumSum(TransformPrimitive): """Calculates the cumulative sum. Description: Given a list of values, return the cumulative sum (or running total). There is no set window, so the sum at each point is calculated over all prior values. `NaN` values will return `NaN`, but in the window of a cumulative caluclation, they're ignored. Examples: >>> cum_sum = CumSum() >>> cum_sum([1, 2, 3, 4, None, 5]).tolist() [1.0, 3.0, 6.0, 10.0, nan, 15.0] """ name = "cum_sum" input_types = [ColumnSchema(semantic_tags={"numeric"})] return_type = ColumnSchema(semantic_tags={"numeric"}) uses_full_dataframe = True description_template = "the cumulative sum of {}" def get_function(self): def cum_sum(values): return values.cumsum() return cum_sum
[docs]class CumCount(TransformPrimitive): """Calculates the cumulative count. Description: Given a list of values, return the cumulative count (or running count). There is no set window, so the count at each point is calculated over all prior values. `NaN` values are counted. Examples: >>> cum_count = CumCount() >>> cum_count([1, 2, 3, 4, None, 5]).tolist() [1, 2, 3, 4, 5, 6] """ name = "cum_count" input_types = [ [ColumnSchema(semantic_tags={"foreign_key"})], [ColumnSchema(semantic_tags={"category"})], ] return_type = ColumnSchema(logical_type=IntegerNullable, semantic_tags={"numeric"}) uses_full_dataframe = True description_template = "the cumulative count of {}" def get_function(self): def cum_count(values): return np.arange(1, len(values) + 1) return cum_count
[docs]class CumMean(TransformPrimitive): """Calculates the cumulative mean. Description: Given a list of values, return the cumulative mean (or running mean). There is no set window, so the mean at each point is calculated over all prior values. `NaN` values will return `NaN`, but in the window of a cumulative caluclation, they're treated as 0. Examples: >>> cum_mean = CumMean() >>> cum_mean([1, 2, 3, 4, None, 5]).tolist() [1.0, 1.5, 2.0, 2.5, nan, 2.5] """ name = "cum_mean" input_types = [ColumnSchema(semantic_tags={"numeric"})] return_type = ColumnSchema(semantic_tags={"numeric"}) uses_full_dataframe = True description_template = "the cumulative mean of {}" def get_function(self): def cum_mean(values): return values.cumsum() / np.arange(1, len(values) + 1) return cum_mean
[docs]class CumMin(TransformPrimitive): """Calculates the cumulative minimum. Description: Given a list of values, return the cumulative min (or running min). There is no set window, so the min at each point is calculated over all prior values. `NaN` values will return `NaN`, but in the window of a cumulative caluclation, they're ignored. Examples: >>> cum_min = CumMin() >>> cum_min([1, 2, -3, 4, None, 5]).tolist() [1.0, 1.0, -3.0, -3.0, nan, -3.0] """ name = "cum_min" input_types = [ColumnSchema(semantic_tags={"numeric"})] return_type = ColumnSchema(semantic_tags={"numeric"}) uses_full_dataframe = True description_template = "the cumulative minimum of {}" def get_function(self): def cum_min(values): return values.cummin() return cum_min
[docs]class CumMax(TransformPrimitive): """Calculates the cumulative maximum. Description: Given a list of values, return the cumulative max (or running max). There is no set window, so the max at each point is calculated over all prior values. `NaN` values will return `NaN`, but in the window of a cumulative caluclation, they're ignored. Examples: >>> cum_max = CumMax() >>> cum_max([1, 2, 3, 4, None, 5]).tolist() [1.0, 2.0, 3.0, 4.0, nan, 5.0] """ name = "cum_max" input_types = [ColumnSchema(semantic_tags={"numeric"})] return_type = ColumnSchema(semantic_tags={"numeric"}) uses_full_dataframe = True description_template = "the cumulative maximum of {}" def get_function(self): def cum_max(values): return values.cummax() return cum_max