Source code for featuretools.primitives.base.transform_primitive_base

import copy
import functools
import inspect

from featuretools.primitives.base.primitive_base import PrimitiveBase
from featuretools.primitives.base.utils import inspect_function_args

[docs]class TransformPrimitive(PrimitiveBase): """Feature for dataframe that is a based off one or more other features in that dataframe.""" # (bool) If True, feature function depends on all values of dataframe # (and will receive these values as input, regardless of specified instance ids) uses_full_dataframe = False def generate_name(self, base_feature_names): return u"%s(%s%s)" % (, u", ".join(base_feature_names), self.get_args_string(), ) def generate_names(self, base_feature_names): n = self.number_output_features base_name = self.generate_name(base_feature_names) return [base_name + "[%s]" % i for i in range(n)]
[docs]def make_trans_primitive(function, input_types, return_type, name=None, description=None, cls_attributes=None, uses_calc_time=False, commutative=False, number_output_features=1): '''Returns a new transform primitive class Args: function (function): Function that takes in a series and applies some transformation to it. input_types (list[ColumnSchema]): ColumnSchema of the inputs. return_type (ColumnSchema): ColumnSchema of returned feature. name (str): Name of the primitive. If no name is provided, the name of `function` will be used. description (str): Description of primitive. cls_attributes (dict[str -> anytype]): Custom attributes to be added to class. Key is attribute name, value is the attribute value. uses_calc_time (bool): If True, the cutoff time the feature is being calculated at will be passed to the function as the keyword argument 'time'. commutative (bool): If True, will only make one feature per unique set of base features. number_output_features (int): The number of output features (columns in the matrix) associated with this feature. Example: .. ipython :: python from featuretools.primitives import make_trans_primitive from woodwork.column_schema import ColumnSchema from woodwork.logical_types import Boolean def pd_is_in(array, list_of_outputs=None): if list_of_outputs is None: list_of_outputs = [] return pd.Series(array).isin(list_of_outputs) def isin_generate_name(self): return u"%s.isin(%s)" % (self.base_features[0].get_name(), str(self.kwargs['list_of_outputs'])) IsIn = make_trans_primitive( function=pd_is_in, input_types=[ColumnSchema()], return_type=ColumnSchema(logical_type=Boolean), name="is_in", description="For each value of the base feature, checks " "whether it is in a list that provided.", cls_attributes={"generate_name": isin_generate_name}) ''' if description is None: default_description = 'A custom transform primitive' doc = inspect.getdoc(function) description = doc if doc is not None else default_description # dictionary that holds attributes for class cls = {"__doc__": description} if cls_attributes is not None: cls.update(cls_attributes) # creates the new class and set name and types name = name or function.__name__ new_class = type(name, (TransformPrimitive,), cls) = name new_class.input_types = input_types new_class.return_type = return_type new_class.commutative = commutative new_class.number_output_features = number_output_features new_class, default_kwargs = inspect_function_args(new_class, function, uses_calc_time) if len(default_kwargs) > 0: new_class.default_kwargs = default_kwargs def new_class_init(self, *args, **kwargs): self.kwargs = copy.deepcopy(self.default_kwargs) self.kwargs.update(kwargs) self.partial = functools.partial(function, **self.kwargs) self.partial.__name__ = name new_class.__init__ = new_class_init new_class.get_function = lambda self: self.partial else: # creates a lambda function that returns function every time new_class.get_function = lambda self, f=function: f return new_class