NOTICE

The upcoming release of Featuretools 1.0.0 contains several breaking changes. Users are encouraged to test this version prior to release by installing from GitHub:

pip install https://github.com/alteryx/featuretools/archive/woodwork-integration.zip

For details on migrating to the new version, refer to Transitioning to Featuretools Version 1.0. Please report any issues in the Featuretools GitHub repo or by messaging in Alteryx Open Source Slack.


Source code for featuretools.primitives.base.aggregation_primitive_base

import copy
import functools
import inspect

from featuretools.primitives.base.primitive_base import PrimitiveBase
from featuretools.primitives.base.utils import inspect_function_args


[docs]class AggregationPrimitive(PrimitiveBase): stack_on = None # whitelist of primitives that can be in input_types stack_on_exclude = None # blacklist of primitives that can be insigniture base_of = None # whitelist of primitives this prim can be input for base_of_exclude = None # primitives this primitive can't be input for stack_on_self = True # whether or not it can be in input_types of self def generate_name(self, base_feature_names, relationship_path_name, parent_dataframe_name, where_str, use_prev_str): base_features_str = ", ".join(base_feature_names) return u"%s(%s.%s%s%s%s)" % ( self.name.upper(), relationship_path_name, base_features_str, where_str, use_prev_str, self.get_args_string(), ) def generate_names(self, base_feature_names, relationship_path_name, parent_dataframe_name, where_str, use_prev_str): n = self.number_output_features base_name = self.generate_name(base_feature_names, relationship_path_name, parent_dataframe_name, where_str, use_prev_str) return [base_name + "[%s]" % i for i in range(n)]
[docs]def make_agg_primitive(function, input_types, return_type, name=None, stack_on_self=True, stack_on=None, stack_on_exclude=None, base_of=None, base_of_exclude=None, description=None, cls_attributes=None, uses_calc_time=False, default_value=None, commutative=False, number_output_features=1): '''Returns a new aggregation primitive class. The primitive infers default values by passing in empty data. Args: function (function): Function that takes in a series and applies some transformation to it. input_types (list[ColumnSchema]): ColumnSchema of the inputs. return_type (ColumnSchema): ColumnSchema of returned feature. name (str): Name of the function. If no name is provided, the name of `function` will be used. stack_on_self (bool): Whether this primitive can be in input_types of self. stack_on (list[PrimitiveBase]): Whitelist of primitives that can be input_types. stack_on_exclude (list[PrimitiveBase]): Blacklist of primitives that cannot be input_types. base_of (list[PrimitiveBase): Whitelist of primitives that can have this primitive in input_types. base_of_exclude (list[PrimitiveBase]): Blacklist of primitives that cannot have this primitive in input_types. description (str): Description of primitive. cls_attributes (dict[str -> anytype]): Custom attributes to be added to class. Key is attribute name, value is the attribute value. uses_calc_time (bool): If True, the cutoff time the feature is being calculated at will be passed to the function as the keyword argument 'time'. default_value (int, float): Default value when creating the primitive to avoid the inference step. If no default value if provided, the inference happen. commutative (bool): If True, will only make one feature per unique set of base features. number_output_features (int): The number of output features (columns in the matrix) associated with this feature. Example: .. ipython :: python from featuretools.primitives import make_agg_primitive from woodwork.column_schema import ColumnSchema from woodwork.logical_types import Datetime def time_since_last(values, time=None): time_since = time - values.iloc[-1] return time_since.total_seconds() TimeSinceLast = make_agg_primitive( function=time_since_last, input_types=[ColumnSchema(logical_type=Datetime, semantic_tags={'time_index'})], return_type=ColumnSchema(semantic_tags={'numeric'}), description="Time since last related instance", uses_calc_time=True) ''' if description is None: default_description = 'A custom primitive' doc = inspect.getdoc(function) description = doc if doc is not None else default_description cls = {"__doc__": description} if cls_attributes is not None: cls.update(cls_attributes) name = name or function.__name__ new_class = type(name, (AggregationPrimitive,), cls) new_class.name = name new_class.input_types = input_types new_class.return_type = return_type new_class.stack_on = stack_on new_class.stack_on_exclude = stack_on_exclude new_class.stack_on_self = stack_on_self new_class.base_of = base_of new_class.base_of_exclude = base_of_exclude new_class.commutative = commutative new_class.number_output_features = number_output_features new_class, default_kwargs = inspect_function_args(new_class, function, uses_calc_time) if len(default_kwargs) > 0: new_class.default_kwargs = default_kwargs def new_class_init(self, **kwargs): self.kwargs = copy.deepcopy(self.default_kwargs) self.kwargs.update(kwargs) self.partial = functools.partial(function, **self.kwargs) self.partial.__name__ = name new_class.__init__ = new_class_init new_class.get_function = lambda self: self.partial else: # creates a lambda function that returns function every time new_class.get_function = lambda self, f=function: f if default_value is None: # infers default_value by passing empty data try: new_class.default_value = function(*[[]] * len(input_types)) except Exception: pass else: # avoiding the inference step new_class.default_value = default_value return new_class