Source code for featuretools.primitives.standard.aggregation.average_count_per_unique

from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Double

from featuretools.primitives.base import AggregationPrimitive


[docs]class AverageCountPerUnique(AggregationPrimitive): """Determines the average count across all unique value. Args: skipna (bool): Determines if to use NA/null values. Defaults to True to skip NA/null. Examples: Determine the average count values for all unique items in the input >>> input = [1, 1, 2, 2, 3, 4, 5, 6, 7, 8] >>> avg_count_per_unique = AverageCountPerUnique() >>> avg_count_per_unique(input) 1.25 Determine the average count values for all unique items in the input with nan values ignored >>> input = [1, 1, 2, 2, 3, 4, 5, None, 6, 7, 8] >>> avg_count_per_unique = AverageCountPerUnique() >>> avg_count_per_unique(input) 1.25 Determine the average count values for all unique items in the input with nan values included >>> input = [1, 2, 2, 3, 4, 5, None, 6, 7, 8, 9] >>> avg_count_per_unique_skipna_false = AverageCountPerUnique(skipna=False) >>> avg_count_per_unique_skipna_false(input) 1.1 """ name = "average_count_per_unique" input_types = [ColumnSchema(semantic_tags={"category"})] return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"}) default_value = 0
[docs] def __init__(self, skipna=True): self.skipna = skipna
def get_function(self): def average_count_per_unique(x): return x.value_counts( dropna=self.skipna, ).mean(skipna=self.skipna) return average_count_per_unique