Source code for featuretools.feature_base.feature_descriptions

import json

import featuretools as ft


[docs]def describe_feature(feature, feature_descriptions=None, primitive_templates=None, metadata_file=None): '''Generates an English language description of a feature. Args: feature (FeatureBase) : Feature to describe feature_descriptions (dict, optional) : dictionary mapping features or unique feature names to custom descriptions primitive_templates (dict, optional) : dictionary mapping primitives or primitive names to description templates metadata_file (str, optional) : path to json metadata file Returns: str : English description of the feature ''' feature_descriptions = feature_descriptions or {} primitive_templates = primitive_templates or {} if metadata_file: file_feature_descriptions, file_primitive_templates = parse_json_metadata(metadata_file) feature_descriptions = {**file_feature_descriptions, **feature_descriptions} primitive_templates = {**file_primitive_templates, **primitive_templates} description = generate_description(feature, feature_descriptions, primitive_templates) return description[:1].upper() + description[1:] + '.'
def generate_description(feature, feature_descriptions, primitive_templates): # Check if feature has custom description if feature in feature_descriptions or feature.unique_name() in feature_descriptions: description = (feature_descriptions.get(feature) or feature_descriptions.get(feature.unique_name())) return description # Check if identity feature: if isinstance(feature, ft.IdentityFeature): description = feature.column_schema.description if description is None: description = 'the "{}"'.format(feature.column_name) return description # Handle direct features if isinstance(feature, ft.DirectFeature): base_feature, direct_description = get_direct_description(feature) direct_base = generate_description(base_feature, feature_descriptions, primitive_templates) return direct_base + direct_description # Get input descriptions input_descriptions = [] input_columns = feature.base_features if isinstance(feature, ft.feature_base.FeatureOutputSlice): input_columns = feature.base_feature.base_features for input_col in input_columns: col_description = generate_description(input_col, feature_descriptions, primitive_templates) input_descriptions.append(col_description) # Remove groupby description from input columns groupby_description = None if isinstance(feature, ft.GroupByTransformFeature): groupby_description = input_descriptions.pop() # Generate primitive description template_override = None if feature.primitive in primitive_templates or feature.primitive.name in primitive_templates: template_override = (primitive_templates.get(feature.primitive) or primitive_templates.get(feature.primitive.name)) slice_num = feature.n if hasattr(feature, 'n') else None primitive_description = feature.primitive.get_description(input_descriptions, slice_num=slice_num, template_override=template_override) if isinstance(feature, ft.feature_base.FeatureOutputSlice): feature = feature.base_feature # Generate groupby phrase if applicable groupby = '' if isinstance(feature, ft.AggregationFeature): groupby_description = get_aggregation_groupby(feature, feature_descriptions) if groupby_description is not None: if groupby_description.startswith('the '): groupby_description = groupby_description[4:] groupby = "for each {}".format(groupby_description) # Generate aggregation dataframe phrase with use_previous dataframe_description = '' if isinstance(feature, ft.AggregationFeature): if feature.use_previous: dataframe_description = "of the previous {} of ".format( feature.use_previous.get_name().lower()) else: dataframe_description = "of all instances of " dataframe_description += '"{}"'.format(feature.relationship_path[-1][1].child_dataframe.ww.name) # Generate where phrase where = '' if hasattr(feature, 'where') and feature.where: where_col = generate_description(feature.where.base_features[0], feature_descriptions, primitive_templates) where = 'where {} is {}'.format(where_col, feature.where.primitive.value) # Join all parts of template description_template = [primitive_description, dataframe_description, where, groupby] description = " ".join([phrase for phrase in description_template if phrase != '']) return description def get_direct_description(feature): direct_description = ' the instance of "{}" associated with this ' \ 'instance of "{}"'.format(feature.relationship_path[-1][1].parent_dataframe.ww.name, feature.dataframe_name) base_features = feature.base_features # shortens stacked direct features to make it easier to understand while isinstance(base_features[0], ft.DirectFeature): base_feat = base_features[0] base_feat_description = ' the instance of "{}" associated ' \ 'with'.format(base_feat.relationship_path[-1][1].parent_dataframe.ww.name) direct_description = base_feat_description + direct_description base_features = base_feat.base_features direct_description = ' for' + direct_description return base_features[0], direct_description def get_aggregation_groupby(feature, feature_descriptions=None): if feature_descriptions is None: feature_descriptions = {} groupby_name = feature.dataframe.ww.index groupby = ft.IdentityFeature(feature.entityset[feature.dataframe_name].ww[groupby_name]) if groupby in feature_descriptions or groupby.unique_name() in feature_descriptions: return (feature_descriptions.get(groupby) or feature_descriptions.get(groupby.unique_name())) else: return '"{}" in "{}"'.format(groupby_name, feature.dataframe_name) def parse_json_metadata(file): with open(file) as f: json_metadata = json.load(f) return (json_metadata.get('feature_descriptions', {}), json_metadata.get('primitive_templates', {}))