import json
import featuretools as ft
[docs]def describe_feature(
    feature, feature_descriptions=None, primitive_templates=None, metadata_file=None
):
    """Generates an English language description of a feature.
    Args:
        feature (FeatureBase) : Feature to describe
        feature_descriptions (dict, optional) : dictionary mapping features or unique
            feature names to custom descriptions
        primitive_templates (dict, optional) : dictionary mapping primitives or
            primitive names to description templates
        metadata_file (str, optional) : path to json metadata file
    Returns:
        str : English description of the feature
    """
    feature_descriptions = feature_descriptions or {}
    primitive_templates = primitive_templates or {}
    if metadata_file:
        file_feature_descriptions, file_primitive_templates = parse_json_metadata(
            metadata_file
        )
        feature_descriptions = {**file_feature_descriptions, **feature_descriptions}
        primitive_templates = {**file_primitive_templates, **primitive_templates}
    description = generate_description(
        feature, feature_descriptions, primitive_templates
    )
    return description[:1].upper() + description[1:] + "." 
def generate_description(feature, feature_descriptions, primitive_templates):
    # Check if feature has custom description
    if feature in feature_descriptions or feature.unique_name() in feature_descriptions:
        description = feature_descriptions.get(feature) or feature_descriptions.get(
            feature.unique_name()
        )
        return description
    # Check if identity feature:
    if isinstance(feature, ft.IdentityFeature):
        description = feature.column_schema.description
        if description is None:
            description = 'the "{}"'.format(feature.column_name)
        return description
    # Handle direct features
    if isinstance(feature, ft.DirectFeature):
        base_feature, direct_description = get_direct_description(feature)
        direct_base = generate_description(
            base_feature, feature_descriptions, primitive_templates
        )
        return direct_base + direct_description
    # Get input descriptions
    input_descriptions = []
    input_columns = feature.base_features
    if isinstance(feature, ft.feature_base.FeatureOutputSlice):
        input_columns = feature.base_feature.base_features
    for input_col in input_columns:
        col_description = generate_description(
            input_col, feature_descriptions, primitive_templates
        )
        input_descriptions.append(col_description)
    # Remove groupby description from input columns
    groupby_description = None
    if isinstance(feature, ft.GroupByTransformFeature):
        groupby_description = input_descriptions.pop()
    # Generate primitive description
    template_override = None
    if (
        feature.primitive in primitive_templates
        or feature.primitive.name in primitive_templates
    ):
        template_override = primitive_templates.get(
            feature.primitive
        ) or primitive_templates.get(feature.primitive.name)
    slice_num = feature.n if hasattr(feature, "n") else None
    primitive_description = feature.primitive.get_description(
        input_descriptions, slice_num=slice_num, template_override=template_override
    )
    if isinstance(feature, ft.feature_base.FeatureOutputSlice):
        feature = feature.base_feature
    # Generate groupby phrase if applicable
    groupby = ""
    if isinstance(feature, ft.AggregationFeature):
        groupby_description = get_aggregation_groupby(feature, feature_descriptions)
    if groupby_description is not None:
        if groupby_description.startswith("the "):
            groupby_description = groupby_description[4:]
        groupby = "for each {}".format(groupby_description)
    # Generate aggregation dataframe phrase with use_previous
    dataframe_description = ""
    if isinstance(feature, ft.AggregationFeature):
        if feature.use_previous:
            dataframe_description = "of the previous {} of ".format(
                feature.use_previous.get_name().lower()
            )
        else:
            dataframe_description = "of all instances of "
        dataframe_description += '"{}"'.format(
            feature.relationship_path[-1][1].child_dataframe.ww.name
        )
    # Generate where phrase
    where = ""
    if hasattr(feature, "where") and feature.where:
        where_col = generate_description(
            feature.where.base_features[0], feature_descriptions, primitive_templates
        )
        where = "where {} is {}".format(where_col, feature.where.primitive.value)
    # Join all parts of template
    description_template = [
        primitive_description,
        dataframe_description,
        where,
        groupby,
    ]
    description = " ".join([phrase for phrase in description_template if phrase != ""])
    return description
def get_direct_description(feature):
    direct_description = (
        ' the instance of "{}" associated with this '
        'instance of "{}"'.format(
            feature.relationship_path[-1][1].parent_dataframe.ww.name,
            feature.dataframe_name,
        )
    )
    base_features = feature.base_features
    # shortens stacked direct features to make it easier to understand
    while isinstance(base_features[0], ft.DirectFeature):
        base_feat = base_features[0]
        base_feat_description = ' the instance of "{}" associated ' "with".format(
            base_feat.relationship_path[-1][1].parent_dataframe.ww.name
        )
        direct_description = base_feat_description + direct_description
        base_features = base_feat.base_features
    direct_description = " for" + direct_description
    return base_features[0], direct_description
def get_aggregation_groupby(feature, feature_descriptions=None):
    if feature_descriptions is None:
        feature_descriptions = {}
    groupby_name = feature.dataframe.ww.index
    groupby = ft.IdentityFeature(
        feature.entityset[feature.dataframe_name].ww[groupby_name]
    )
    if groupby in feature_descriptions or groupby.unique_name() in feature_descriptions:
        return feature_descriptions.get(groupby) or feature_descriptions.get(
            groupby.unique_name()
        )
    else:
        return '"{}" in "{}"'.format(groupby_name, feature.dataframe_name)
def parse_json_metadata(file):
    with open(file) as f:
        json_metadata = json.load(f)
    return (
        json_metadata.get("feature_descriptions", {}),
        json_metadata.get("primitive_templates", {}),
    )