Source code for featuretools.feature_base.feature_descriptions

import json

import featuretools as ft


[docs]def describe_feature( feature, feature_descriptions=None, primitive_templates=None, metadata_file=None, ): """Generates an English language description of a feature. Args: feature (FeatureBase) : Feature to describe feature_descriptions (dict, optional) : dictionary mapping features or unique feature names to custom descriptions primitive_templates (dict, optional) : dictionary mapping primitives or primitive names to description templates metadata_file (str, optional) : path to json metadata file Returns: str : English description of the feature """ feature_descriptions = feature_descriptions or {} primitive_templates = primitive_templates or {} if metadata_file: file_feature_descriptions, file_primitive_templates = parse_json_metadata( metadata_file, ) feature_descriptions = {**file_feature_descriptions, **feature_descriptions} primitive_templates = {**file_primitive_templates, **primitive_templates} description = generate_description( feature, feature_descriptions, primitive_templates, ) return description[:1].upper() + description[1:] + "."
def generate_description(feature, feature_descriptions, primitive_templates): # Check if feature has custom description if feature in feature_descriptions or feature.unique_name() in feature_descriptions: description = feature_descriptions.get(feature) or feature_descriptions.get( feature.unique_name(), ) return description # Check if identity feature: if isinstance(feature, ft.IdentityFeature): description = feature.column_schema.description if description is None: description = 'the "{}"'.format(feature.column_name) return description # Handle direct features if isinstance(feature, ft.DirectFeature): base_feature, direct_description = get_direct_description(feature) direct_base = generate_description( base_feature, feature_descriptions, primitive_templates, ) return direct_base + direct_description # Get input descriptions input_descriptions = [] input_columns = feature.base_features if isinstance(feature, ft.feature_base.FeatureOutputSlice): input_columns = feature.base_feature.base_features for input_col in input_columns: col_description = generate_description( input_col, feature_descriptions, primitive_templates, ) input_descriptions.append(col_description) # Remove groupby description from input columns groupby_description = None if isinstance(feature, ft.GroupByTransformFeature): groupby_description = input_descriptions.pop() # Generate primitive description template_override = None if ( feature.primitive in primitive_templates or feature.primitive.name in primitive_templates ): template_override = primitive_templates.get( feature.primitive, ) or primitive_templates.get(feature.primitive.name) slice_num = feature.n if hasattr(feature, "n") else None primitive_description = feature.primitive.get_description( input_descriptions, slice_num=slice_num, template_override=template_override, ) if isinstance(feature, ft.feature_base.FeatureOutputSlice): feature = feature.base_feature # Generate groupby phrase if applicable groupby = "" if isinstance(feature, ft.AggregationFeature): groupby_description = get_aggregation_groupby(feature, feature_descriptions) if groupby_description is not None: if groupby_description.startswith("the "): groupby_description = groupby_description[4:] groupby = "for each {}".format(groupby_description) # Generate aggregation dataframe phrase with use_previous dataframe_description = "" if isinstance(feature, ft.AggregationFeature): if feature.use_previous: dataframe_description = "of the previous {} of ".format( feature.use_previous.get_name().lower(), ) else: dataframe_description = "of all instances of " dataframe_description += '"{}"'.format( feature.relationship_path[-1][1].child_dataframe.ww.name, ) # Generate where phrase where = "" if hasattr(feature, "where") and feature.where: where_col = generate_description( feature.where.base_features[0], feature_descriptions, primitive_templates, ) where = "where {} is {}".format(where_col, feature.where.primitive.value) # Join all parts of template description_template = [ primitive_description, dataframe_description, where, groupby, ] description = " ".join([phrase for phrase in description_template if phrase != ""]) return description def get_direct_description(feature): direct_description = ( ' the instance of "{}" associated with this ' 'instance of "{}"'.format( feature.relationship_path[-1][1].parent_dataframe.ww.name, feature.dataframe_name, ) ) base_features = feature.base_features # shortens stacked direct features to make it easier to understand while isinstance(base_features[0], ft.DirectFeature): base_feat = base_features[0] base_feat_description = ' the instance of "{}" associated ' "with".format( base_feat.relationship_path[-1][1].parent_dataframe.ww.name, ) direct_description = base_feat_description + direct_description base_features = base_feat.base_features direct_description = " for" + direct_description return base_features[0], direct_description def get_aggregation_groupby(feature, feature_descriptions=None): if feature_descriptions is None: feature_descriptions = {} groupby_name = feature.dataframe.ww.index groupby = ft.IdentityFeature( feature.entityset[feature.dataframe_name].ww[groupby_name], ) if groupby in feature_descriptions or groupby.unique_name() in feature_descriptions: return feature_descriptions.get(groupby) or feature_descriptions.get( groupby.unique_name(), ) else: return '"{}" in "{}"'.format(groupby_name, feature.dataframe_name) def parse_json_metadata(file): with open(file) as f: json_metadata = json.load(f) return ( json_metadata.get("feature_descriptions", {}), json_metadata.get("primitive_templates", {}), )