Source code for featuretools.primitives.standard.aggregation.percent_unique
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Double
from featuretools.primitives.base import AggregationPrimitive
[docs]class PercentUnique(AggregationPrimitive):
"""Determines the percent of unique values.
Description:
Given a list of values, determine what percent of the
list is made up of unique values. Multiple `NaN` values
are treated as one unique value.
Args:
skipna (bool): Determines whether to ignore `NaN` values.
Defaults to True.
Examples:
>>> percent_unique = PercentUnique()
>>> percent_unique([1, 1, 2, 2, 3, 4, 5, 6, 7, 8])
0.8
We can control whether or not `NaN` values are ignored.
>>> percent_unique = PercentUnique()
>>> percent_unique([1, 1, 2, None])
0.5
>>> percent_unique_skipna = PercentUnique(skipna=False)
>>> percent_unique_skipna([1, 1, 2, None])
0.75
"""
name = "percent_unique"
input_types = [ColumnSchema(semantic_tags={"category"})]
return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"})
default_value = 0
[docs] def __init__(self, skipna=True):
self.skipna = skipna
def get_function(self):
def percent_unique(x):
return x.nunique(dropna=self.skipna) / (x.shape[0] * 1.0)
return percent_unique