Source code for featuretools.primitives.standard.aggregation.has_no_duplicates

from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import BooleanNullable

from featuretools.primitives.base import AggregationPrimitive


[docs]class HasNoDuplicates(AggregationPrimitive): """Determines if there are duplicates in the input. Args: skipna (bool): Determines if to use NA/null values. Defaults to True to skip NA/null. Examples: >>> has_no_duplicates = HasNoDuplicates() >>> has_no_duplicates([1, 1, 2]) False >>> has_no_duplicates([1, 2, 3]) True NaNs are skipped by default. >>> has_no_duplicates([1, 2, 3, None, None]) True However, the way NaNs are treated can be controlled. >>> has_no_duplicates_skipna = HasNoDuplicates(skipna=False) >>> has_no_duplicates_skipna([1, 2, 3, None, None]) False >>> has_no_duplicates_skipna([1, 2, 3, None]) True """ name = "has_no_duplicates" input_types = [ [ColumnSchema(semantic_tags={"category"})], [ColumnSchema(semantic_tags={"numeric"})], ] return_type = ColumnSchema(logical_type=BooleanNullable) stack_on_self = False default_value = True
[docs] def __init__(self, skipna=True): self.skipna = skipna
def get_function(self): def has_no_duplicates(data): if self.skipna: data = data.dropna() return not data.duplicated().any() return has_no_duplicates