Source code for featuretools.primitives.standard.aggregation.n_most_common
import numpy as np
from woodwork.column_schema import ColumnSchema
from featuretools.primitives.base.aggregation_primitive_base import AggregationPrimitive
from featuretools.utils.gen_utils import Library
[docs]class NMostCommon(AggregationPrimitive):
"""Determines the `n` most common elements.
Description:
Given a list of values, return the `n` values
which appear the most frequently. If there are
fewer than `n` unique values, the output will be
filled with `NaN`.
Args:
n (int): defines "n" in "n most common." Defaults
to 3.
Examples:
>>> n_most_common = NMostCommon(n=2)
>>> x = ['orange', 'apple', 'orange', 'apple', 'orange', 'grapefruit']
>>> n_most_common(x).tolist()
['orange', 'apple']
"""
name = "n_most_common"
input_types = [ColumnSchema(semantic_tags={"category"})]
return_type = None
[docs] def __init__(self, n=3):
self.n = n
self.number_output_features = n
self.description_template = [
"the {} most common values of {{}}".format(n),
"the most common value of {}",
*["the {nth_slice} most common value of {}"] * (n - 1),
]
def get_function(self, agg_type=Library.PANDAS):
def n_most_common(x):
# Counts of 0 remain in value_counts output if dtype is category
# so we need to remove them
counts = x.value_counts()
counts = counts[counts > 0]
array = np.array(counts.index[: self.n])
if len(array) < self.n:
filler = np.full(self.n - len(array), np.nan)
array = np.append(array, filler)
return array
return n_most_common