Source code for featuretools.feature_base.features_serializer

import json

from featuretools.primitives.utils import serialize_primitive
from featuretools.utils.s3_utils import get_transport_params, use_smartopen_features
from featuretools.utils.wrangle import _is_s3, _is_url
from featuretools.version import __version__ as ft_version

SCHEMA_VERSION = "9.0.0"


[docs]def save_features(features, location=None, profile_name=None): """Saves the features list as JSON to a specified filepath/S3 path, writes to an open file, or returns the serialized features as a JSON string. If no file provided, returns a string. Args: features (list[:class:`.FeatureBase`]): List of Feature definitions. location (str or :class:`.FileObject`, optional): The location of where to save the features list which must include the name of the file, or a writeable file handle to write to. If location is None, will return a JSON string of the serialized features. Default: None profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials. Set to False to use an anonymous profile. Note: Features saved in one version of Featuretools are not guaranteed to work in another. After upgrading Featuretools, features may need to be generated again. Example: .. ipython:: python :suppress: from featuretools.tests.testing_utils import ( make_ecommerce_entityset) import featuretools as ft es = make_ecommerce_entityset() import os .. code-block:: python f1 = ft.Feature(es["log"].ww["product_id"]) f2 = ft.Feature(es["log"].ww["purchased"]) f3 = ft.Feature(es["log"].ww["value"]) features = [f1, f2, f3] # Option 1 filepath = os.path.join('/Home/features/', 'list.json') ft.save_features(features, filepath) # Option 2 filepath = os.path.join('/Home/features/', 'list.json') with open(filepath, 'w') as f: ft.save_features(features, f) # Option 3 features_string = ft.save_features(features) .. seealso:: :func:`.load_features` """ return FeaturesSerializer(features).save(location, profile_name=profile_name)
class FeaturesSerializer(object): def __init__(self, feature_list): self.feature_list = feature_list self._features_dict = None def to_dict(self): names_list = [feat.unique_name() for feat in self.feature_list] es = self.feature_list[0].entityset feature_defs, primitive_defs = self._feature_definitions() return { "schema_version": SCHEMA_VERSION, "ft_version": ft_version, "entityset": es.to_dictionary(), "feature_list": names_list, "feature_definitions": feature_defs, "primitive_definitions": primitive_defs, } def save(self, location, profile_name): features_dict = self.to_dict() if location is None: return json.dumps(features_dict) if isinstance(location, str): if _is_url(location): raise ValueError("Writing to URLs is not supported") if _is_s3(location): transport_params = get_transport_params(profile_name) use_smartopen_features( location, features_dict, transport_params, read=False, ) else: with open(location, "w") as f: json.dump(features_dict, f) else: json.dump(features_dict, location) def _feature_definitions(self): if not self._features_dict: self._features_dict = {} self._primitives_dict = {} for feature in self.feature_list: self._serialize_feature(feature) primitive_number = 0 primitive_id_to_key = {} for name, feature in self._features_dict.items(): primitive = feature["arguments"].get("primitive") if primitive: primitive_id = id(primitive) if primitive_id not in primitive_id_to_key.keys(): # Primitive we haven't seen before, add to dict and increment primitive_id counter # Always use string for keys because json conversion results in integer dict keys # being converted to strings, but integer dict values are not. primitives_dict_key = str(primitive_number) primitive_id_to_key[primitive_id] = primitives_dict_key self._primitives_dict[ primitives_dict_key ] = serialize_primitive(primitive) self._features_dict[name]["arguments"][ "primitive" ] = primitives_dict_key primitive_number += 1 else: # Primitive we have seen already - use existing primitive_id key key = primitive_id_to_key[primitive_id] self._features_dict[name]["arguments"]["primitive"] = key return self._features_dict, self._primitives_dict def _serialize_feature(self, feature): name = feature.unique_name() if name not in self._features_dict: self._features_dict[feature.unique_name()] = feature.to_dictionary() for dependency in feature.get_dependencies(deep=True): name = dependency.unique_name() if name not in self._features_dict: self._features_dict[name] = dependency.to_dictionary()