Source code for featuretools.primitives.standard.aggregation.n_most_common
import numpy as np
from woodwork.column_schema import ColumnSchema
from featuretools.primitives.base.aggregation_primitive_base import AggregationPrimitive
[docs]class NMostCommon(AggregationPrimitive):
    """Determines the `n` most common elements.
    Description:
        Given a list of values, return the `n` values
        which appear the most frequently. If there are
        fewer than `n` unique values, the output will be
        filled with `NaN`.
    Args:
        n (int): defines "n" in "n most common." Defaults
            to 3.
    Examples:
        >>> n_most_common = NMostCommon(n=2)
        >>> x = ['orange', 'apple', 'orange', 'apple', 'orange', 'grapefruit']
        >>> n_most_common(x).tolist()
        ['orange', 'apple']
    """
    name = "n_most_common"
    input_types = [ColumnSchema(semantic_tags={"category"})]
    return_type = None
[docs]    def __init__(self, n=3):
        self.n = n
        self.number_output_features = n
        self.description_template = [
            "the {} most common values of {{}}".format(n),
            "the most common value of {}",
            *["the {nth_slice} most common value of {}"] * (n - 1),
        ] 
    def get_function(self):
        def n_most_common(x):
            # Counts of 0 remain in value_counts output if dtype is category
            # so we need to remove them
            counts = x.value_counts()
            counts = counts[counts > 0]
            array = np.array(counts.index[: self.n])
            if len(array) < self.n:
                filler = np.full(self.n - len(array), np.nan)
                array = np.append(array, filler)
            return array
        return n_most_common