Source code for featuretools.primitives.standard.aggregation.entropy
from scipy import stats
from woodwork.column_schema import ColumnSchema
from featuretools.primitives.base.aggregation_primitive_base import AggregationPrimitive
from featuretools.utils.gen_utils import Library
[docs]class Entropy(AggregationPrimitive):
"""Calculates the entropy for a categorical column
Description:
Given a list of observations from a categorical
column return the entropy of the distribution.
NaN values can be treated as a category or
dropped.
Args:
dropna (bool): Whether to consider NaN values as a separate category
Defaults to False.
base (float): The logarithmic base to use
Defaults to e (natural logarithm)
Examples:
>>> pd_entropy = Entropy()
>>> pd_entropy([1,2,3,4])
1.3862943611198906
"""
name = "entropy"
input_types = [ColumnSchema(semantic_tags={"category"})]
return_type = ColumnSchema(semantic_tags={"numeric"})
stack_on_self = False
description_template = "the entropy of {}"
[docs] def __init__(self, dropna=False, base=None):
self.dropna = dropna
self.base = base
def get_function(self, agg_type=Library.PANDAS):
def pd_entropy(s):
distribution = s.value_counts(normalize=True, dropna=self.dropna)
return stats.entropy(distribution, base=self.base)
return pd_entropy