NOTICE

The upcoming release of Featuretools 1.0.0 contains several breaking changes. Users are encouraged to test this version prior to release by installing from GitHub:

pip install https://github.com/alteryx/featuretools/archive/woodwork-integration.zip

For details on migrating to the new version, refer to Transitioning to Featuretools Version 1.0. Please report any issues in the Featuretools GitHub repo or by messaging in Alteryx Open Source Slack.


Source code for featuretools.entityset.relationship

[docs]class Relationship(object): """Class to represent a relationship between dataframes See Also: :class:`.EntitySet` """
[docs] def __init__(self, entityset, parent_dataframe_name, parent_column_name, child_dataframe_name, child_column_name): """ Create a relationship Args: entityset (:class:`.EntitySet`): EntitySet to which the relationship belongs parent_dataframe_name (str): Name of the parent dataframe in the EntitySet parent_column_name (str): Name of the parent column child_dataframe_name (str): Name of the child dataframe in the EntitySet child_column_name (str): Name of the child column """ self.entityset = entityset self._parent_dataframe_name = parent_dataframe_name self._child_dataframe_name = child_dataframe_name self._parent_column_name = parent_column_name self._child_column_name = child_column_name if (self.parent_dataframe.ww.index is not None and self._parent_column_name != self.parent_dataframe.ww.index): raise AttributeError(f"Parent column '{self._parent_column_name}' is not the index of " f"dataframe {self._parent_dataframe_name}")
@classmethod def from_dictionary(cls, arguments, es): parent_dataframe = arguments['parent_dataframe_name'] child_dataframe = arguments['child_dataframe_name'] parent_column = arguments['parent_column_name'] child_column = arguments['child_column_name'] return cls(es, parent_dataframe, parent_column, child_dataframe, child_column) def __repr__(self): ret = u"<Relationship: %s.%s -> %s.%s>" % \ (self._child_dataframe_name, self._child_column_name, self._parent_dataframe_name, self._parent_column_name) return ret def __eq__(self, other): if not isinstance(other, self.__class__): return False return self._parent_dataframe_name == other._parent_dataframe_name and \ self._child_dataframe_name == other._child_dataframe_name and \ self._parent_column_name == other._parent_column_name and \ self._child_column_name == other._child_column_name def __hash__(self): return hash((self._parent_dataframe_name, self._child_dataframe_name, self._parent_column_name, self._child_column_name)) @property def parent_dataframe(self): """Parent dataframe object""" return self.entityset[self._parent_dataframe_name] @property def child_dataframe(self): """Child dataframe object""" return self.entityset[self._child_dataframe_name] @property def parent_column(self): """Column in parent dataframe""" return self.parent_dataframe.ww[self._parent_column_name] @property def child_column(self): """Column in child dataframe""" return self.child_dataframe.ww[self._child_column_name] @property def parent_name(self): """The name of the parent, relative to the child.""" if self._is_unique(): return self._parent_dataframe_name else: return '%s[%s]' % (self._parent_dataframe_name, self._child_column_name) @property def child_name(self): """The name of the child, relative to the parent.""" if self._is_unique(): return self._child_dataframe_name else: return '%s[%s]' % (self._child_dataframe_name, self._child_column_name) def to_dictionary(self): return { 'parent_dataframe_name': self._parent_dataframe_name, 'child_dataframe_name': self._child_dataframe_name, 'parent_column_name': self._parent_column_name, 'child_column_name': self._child_column_name, } def _is_unique(self): """Is there any other relationship with same parent and child dataframes?""" es = self.entityset relationships = es.get_forward_relationships(self._child_dataframe_name) n = len([r for r in relationships if r._parent_dataframe_name == self._parent_dataframe_name]) assert n > 0, 'This relationship is missing from the entityset' return n == 1
class RelationshipPath(object): def __init__(self, relationships_with_direction): self._relationships_with_direction = relationships_with_direction @property def name(self): relationship_names = [_direction_name(is_forward, r) for is_forward, r in self._relationships_with_direction] return '.'.join(relationship_names) def dataframes(self): if self: # Yield first dataframe. is_forward, relationship = self[0] if is_forward: yield relationship._child_dataframe_name else: yield relationship._parent_dataframe_name # Yield the dataframe pointed to by each relationship. for is_forward, relationship in self: if is_forward: yield relationship._parent_dataframe_name else: yield relationship._child_dataframe_name def __add__(self, other): return RelationshipPath(self._relationships_with_direction + other._relationships_with_direction) def __getitem__(self, index): return self._relationships_with_direction[index] def __iter__(self): for is_forward, relationship in self._relationships_with_direction: yield is_forward, relationship def __len__(self): return len(self._relationships_with_direction) def __eq__(self, other): return isinstance(other, RelationshipPath) and \ self._relationships_with_direction == other._relationships_with_direction def __ne__(self, other): return not self == other def __repr__(self): if self._relationships_with_direction: path = '%s.%s' % (next(self.dataframes()), self.name) else: path = '[]' return '<RelationshipPath %s>' % path def _direction_name(is_forward, relationship): if is_forward: return relationship.parent_name else: return relationship.child_name