NOTICE

The upcoming release of Featuretools 1.0.0 contains several breaking changes. Users are encouraged to test this version prior to release by installing from GitHub:

pip install https://github.com/alteryx/featuretools/archive/woodwork-integration.zip

For details on migrating to the new version, refer to Transitioning to Featuretools Version 1.0. Please report any issues in the Featuretools GitHub repo or by messaging in Alteryx Open Source Slack.


Source code for featuretools.feature_base.feature_descriptions

import json

import featuretools as ft


[docs]def describe_feature(feature, feature_descriptions=None, primitive_templates=None, metadata_file=None): '''Generates an English language description of a feature. Args: feature (FeatureBase) : Feature to describe feature_descriptions (dict, optional) : dictionary mapping features or unique feature names to custom descriptions primitive_templates (dict, optional) : dictionary mapping primitives or primitive names to description templates metadata_file (str, optional) : path to json metadata file Returns: str : English description of the feature ''' feature_descriptions = feature_descriptions or {} primitive_templates = primitive_templates or {} if metadata_file: file_feature_descriptions, file_primitive_templates = parse_json_metadata(metadata_file) feature_descriptions = {**file_feature_descriptions, **feature_descriptions} primitive_templates = {**file_primitive_templates, **primitive_templates} description = generate_description(feature, feature_descriptions, primitive_templates) return description[:1].upper() + description[1:] + '.'
def generate_description(feature, feature_descriptions, primitive_templates): # Check if feature has custom description if feature in feature_descriptions or feature.unique_name() in feature_descriptions: description = (feature_descriptions.get(feature) or feature_descriptions.get(feature.unique_name())) return description # Check if identity feature: if isinstance(feature, ft.IdentityFeature): description = feature.column_schema.description if description is None: description = 'the "{}"'.format(feature.column_name) return description # Handle direct features if isinstance(feature, ft.DirectFeature): base_feature, direct_description = get_direct_description(feature) direct_base = generate_description(base_feature, feature_descriptions, primitive_templates) return direct_base + direct_description # Get input descriptions input_descriptions = [] input_columns = feature.base_features if isinstance(feature, ft.feature_base.FeatureOutputSlice): input_columns = feature.base_feature.base_features for input_col in input_columns: col_description = generate_description(input_col, feature_descriptions, primitive_templates) input_descriptions.append(col_description) # Remove groupby description from input columns groupby_description = None if isinstance(feature, ft.GroupByTransformFeature): groupby_description = input_descriptions.pop() # Generate primitive description template_override = None if feature.primitive in primitive_templates or feature.primitive.name in primitive_templates: template_override = (primitive_templates.get(feature.primitive) or primitive_templates.get(feature.primitive.name)) slice_num = feature.n if hasattr(feature, 'n') else None primitive_description = feature.primitive.get_description(input_descriptions, slice_num=slice_num, template_override=template_override) if isinstance(feature, ft.feature_base.FeatureOutputSlice): feature = feature.base_feature # Generate groupby phrase if applicable groupby = '' if isinstance(feature, ft.AggregationFeature): groupby_description = get_aggregation_groupby(feature, feature_descriptions) if groupby_description is not None: if groupby_description.startswith('the '): groupby_description = groupby_description[4:] groupby = "for each {}".format(groupby_description) # Generate aggregation dataframe phrase with use_previous dataframe_description = '' if isinstance(feature, ft.AggregationFeature): if feature.use_previous: dataframe_description = "of the previous {} of ".format( feature.use_previous.get_name().lower()) else: dataframe_description = "of all instances of " dataframe_description += '"{}"'.format(feature.relationship_path[-1][1].child_dataframe.ww.name) # Generate where phrase where = '' if hasattr(feature, 'where') and feature.where: where_col = generate_description(feature.where.base_features[0], feature_descriptions, primitive_templates) where = 'where {} is {}'.format(where_col, feature.where.primitive.value) # Join all parts of template description_template = [primitive_description, dataframe_description, where, groupby] description = " ".join([phrase for phrase in description_template if phrase != '']) return description def get_direct_description(feature): direct_description = ' the instance of "{}" associated with this ' \ 'instance of "{}"'.format(feature.relationship_path[-1][1].parent_dataframe.ww.name, feature.dataframe_name) base_features = feature.base_features # shortens stacked direct features to make it easier to understand while isinstance(base_features[0], ft.DirectFeature): base_feat = base_features[0] base_feat_description = ' the instance of "{}" associated ' \ 'with'.format(base_feat.relationship_path[-1][1].parent_dataframe.ww.name) direct_description = base_feat_description + direct_description base_features = base_feat.base_features direct_description = ' for' + direct_description return base_features[0], direct_description def get_aggregation_groupby(feature, feature_descriptions=None): if feature_descriptions is None: feature_descriptions = {} groupby_name = feature.dataframe.ww.index groupby = ft.IdentityFeature(feature.entityset[feature.dataframe_name].ww[groupby_name]) if groupby in feature_descriptions or groupby.unique_name() in feature_descriptions: return (feature_descriptions.get(groupby) or feature_descriptions.get(groupby.unique_name())) else: return '"{}" in "{}"'.format(groupby_name, feature.dataframe_name) def parse_json_metadata(file): with open(file) as f: json_metadata = json.load(f) return (json_metadata.get('feature_descriptions', {}), json_metadata.get('primitive_templates', {}))