import numpy as np
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import IntegerNullable
from featuretools.primitives.base import TransformPrimitive
[docs]class CumSum(TransformPrimitive):
"""Calculates the cumulative sum.
Description:
Given a list of values, return the cumulative sum
(or running total). There is no set window, so the
sum at each point is calculated over all prior values.
`NaN` values will return `NaN`, but in the window of a
cumulative caluclation, they're ignored.
Examples:
>>> cum_sum = CumSum()
>>> cum_sum([1, 2, 3, 4, None, 5]).tolist()
[1.0, 3.0, 6.0, 10.0, nan, 15.0]
"""
name = "cum_sum"
input_types = [ColumnSchema(semantic_tags={"numeric"})]
return_type = ColumnSchema(semantic_tags={"numeric"})
uses_full_dataframe = True
description_template = "the cumulative sum of {}"
def get_function(self):
def cum_sum(values):
return values.cumsum()
return cum_sum
[docs]class CumCount(TransformPrimitive):
"""Calculates the cumulative count.
Description:
Given a list of values, return the cumulative count
(or running count). There is no set window, so the
count at each point is calculated over all prior
values. `NaN` values are counted.
Examples:
>>> cum_count = CumCount()
>>> cum_count([1, 2, 3, 4, None, 5]).tolist()
[1, 2, 3, 4, 5, 6]
"""
name = "cum_count"
input_types = [
[ColumnSchema(semantic_tags={"foreign_key"})],
[ColumnSchema(semantic_tags={"category"})],
]
return_type = ColumnSchema(logical_type=IntegerNullable, semantic_tags={"numeric"})
uses_full_dataframe = True
description_template = "the cumulative count of {}"
def get_function(self):
def cum_count(values):
return np.arange(1, len(values) + 1)
return cum_count
[docs]class CumMean(TransformPrimitive):
"""Calculates the cumulative mean.
Description:
Given a list of values, return the cumulative mean
(or running mean). There is no set window, so the
mean at each point is calculated over all prior values.
`NaN` values will return `NaN`, but in the window of a
cumulative caluclation, they're treated as 0.
Examples:
>>> cum_mean = CumMean()
>>> cum_mean([1, 2, 3, 4, None, 5]).tolist()
[1.0, 1.5, 2.0, 2.5, nan, 2.5]
"""
name = "cum_mean"
input_types = [ColumnSchema(semantic_tags={"numeric"})]
return_type = ColumnSchema(semantic_tags={"numeric"})
uses_full_dataframe = True
description_template = "the cumulative mean of {}"
def get_function(self):
def cum_mean(values):
return values.cumsum() / np.arange(1, len(values) + 1)
return cum_mean
[docs]class CumMin(TransformPrimitive):
"""Calculates the cumulative minimum.
Description:
Given a list of values, return the cumulative min
(or running min). There is no set window, so the min
at each point is calculated over all prior values.
`NaN` values will return `NaN`, but in the window of a
cumulative caluclation, they're ignored.
Examples:
>>> cum_min = CumMin()
>>> cum_min([1, 2, -3, 4, None, 5]).tolist()
[1.0, 1.0, -3.0, -3.0, nan, -3.0]
"""
name = "cum_min"
input_types = [ColumnSchema(semantic_tags={"numeric"})]
return_type = ColumnSchema(semantic_tags={"numeric"})
uses_full_dataframe = True
description_template = "the cumulative minimum of {}"
def get_function(self):
def cum_min(values):
return values.cummin()
return cum_min
[docs]class CumMax(TransformPrimitive):
"""Calculates the cumulative maximum.
Description:
Given a list of values, return the cumulative max
(or running max). There is no set window, so the max
at each point is calculated over all prior values.
`NaN` values will return `NaN`, but in the window of a
cumulative caluclation, they're ignored.
Examples:
>>> cum_max = CumMax()
>>> cum_max([1, 2, 3, 4, None, 5]).tolist()
[1.0, 2.0, 3.0, 4.0, nan, 5.0]
"""
name = "cum_max"
input_types = [ColumnSchema(semantic_tags={"numeric"})]
return_type = ColumnSchema(semantic_tags={"numeric"})
uses_full_dataframe = True
description_template = "the cumulative maximum of {}"
def get_function(self):
def cum_max(values):
return values.cummax()
return cum_max