Source code for featuretools.primitives.standard.aggregation.percent_true

from dask import dataframe as dd
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Boolean, BooleanNullable, Double

from featuretools.primitives.base.aggregation_primitive_base import AggregationPrimitive
from featuretools.utils.gen_utils import Library


[docs]class PercentTrue(AggregationPrimitive): """Determines the percent of `True` values. Description: Given a list of booleans, return the percent of values which are `True` as a decimal. `NaN` values are treated as `False`, adding to the denominator. Examples: >>> percent_true = PercentTrue() >>> percent_true([True, False, True, True, None]) 0.6 """ name = "percent_true" input_types = [ [ColumnSchema(logical_type=BooleanNullable)], [ColumnSchema(logical_type=Boolean)], ] return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"}) stack_on = [] stack_on_exclude = [] default_value = 0 compatibility = [Library.PANDAS, Library.DASK] description_template = "the percentage of true values in {}" def get_function(self, agg_type=Library.PANDAS): if agg_type == Library.DASK: def chunk(s): def format_chunk(x): return x[:].fillna(False) chunk_sum = s.agg(lambda x: format_chunk(x).sum()) chunk_len = s.agg(lambda x: len(format_chunk(x))) if chunk_sum.dtype == "bool": chunk_sum = chunk_sum.astype("int64") if chunk_len.dtype == "bool": chunk_len = chunk_len.astype("int64") return (chunk_sum, chunk_len) def agg(val, length): return (val.sum(), length.sum()) def finalize(total, length): return total / length return dd.Aggregation(self.name, chunk=chunk, agg=agg, finalize=finalize) def percent_true(s): return s.fillna(False).mean() return percent_true