Source code for featuretools.synthesis.get_valid_primitives

from featuretools.primitives import AggregationPrimitive, TransformPrimitive
from featuretools.primitives.utils import (
    get_aggregation_primitives,
    get_transform_primitives,
)
from featuretools.synthesis.deep_feature_synthesis import DeepFeatureSynthesis
from featuretools.synthesis.utils import _categorize_features, get_unused_primitives
from featuretools.utils.gen_utils import Library


[docs]def get_valid_primitives(
    entityset,
    target_dataframe_name,
    max_depth=2,
    selected_primitives=None,
    **dfs_kwargs,
):
    """
    Returns two lists of primitives (transform and aggregation) containing
    primitives that can be applied to the specific target dataframe to create
    features.  If the optional 'selected_primitives' parameter is not used,
    all discoverable primitives will be considered.

    Note:
        When using a ``max_depth`` greater than 1, some primitives returned by
        this function may not create any features if passed to DFS alone.  These
        primitives relied on features created by other primitives as input
        (primitive stacking).

    Args:
        entityset (EntitySet): An already initialized entityset
        target_dataframe_name (str): Name of dataframe to create features for.
        max_depth (int, optional): Maximum allowed depth of features.
        selected_primitives(list[str or AggregationPrimitive/TransformPrimitive], optional):
            list of primitives to consider when looking for valid primitives.
            If None, all primitives will be considered
        dfs_kwargs (keywords): Additional keyword arguments to pass as keyword arguments to
            the DeepFeatureSynthesis object. Should not include ``max_depth``, ``agg_primitives``,
            or ``trans_primitives``, as those are passed in explicity.
    Returns:
       list[AggregationPrimitive], list[TransformPrimitive]:
           The list of valid aggregation primitives and the list of valid
           transform primitives.
    """
    agg_primitives = []
    trans_primitives = []
    available_aggs = get_aggregation_primitives()
    available_trans = get_transform_primitives()

    for library in Library:
        if library.value == entityset.dataframe_type:
            df_library = library
            break

    if selected_primitives:
        for prim in selected_primitives:
            if not isinstance(prim, str):
                if issubclass(prim, AggregationPrimitive):
                    prim_list = agg_primitives
                elif issubclass(prim, TransformPrimitive):
                    prim_list = trans_primitives
                else:
                    raise ValueError(
                        f"Selected primitive {prim} is not an "
                        "AggregationPrimitive, TransformPrimitive, or str",
                    )
            elif prim in available_aggs:
                prim = available_aggs[prim]
                prim_list = agg_primitives
            elif prim in available_trans:
                prim = available_trans[prim]
                prim_list = trans_primitives
            else:
                raise ValueError(f"'{prim}' is not a recognized primitive name")
            if df_library in prim.compatibility:
                prim_list.append(prim)
    else:
        agg_primitives = [
            agg for agg in available_aggs.values() if df_library in agg.compatibility
        ]
        trans_primitives = [
            trans
            for trans in available_trans.values()
            if df_library in trans.compatibility
        ]

    dfs_object = DeepFeatureSynthesis(
        target_dataframe_name,
        entityset,
        agg_primitives=agg_primitives,
        trans_primitives=trans_primitives,
        max_depth=max_depth,
        **dfs_kwargs,
    )

    features = dfs_object.build_features()

    trans, agg, _, _ = _categorize_features(features)

    trans_unused = get_unused_primitives(trans_primitives, trans)
    agg_unused = get_unused_primitives(agg_primitives, agg)

    # switch from str to class
    agg_unused = [available_aggs[name] for name in agg_unused]
    trans_unused = [available_trans[name] for name in trans_unused]

    used_agg_prims = set(agg_primitives).difference(set(agg_unused))
    used_trans_prims = set(trans_primitives).difference(set(trans_unused))
    return list(used_agg_prims), list(used_trans_prims)
Table of Contents

Quick search

Source code for featuretools.synthesis.get_valid_primitives