Source code for featuretools.entityset.relationship

[docs]class Relationship(object): """Class to represent a relationship between dataframes See Also: :class:`.EntitySet` """
[docs] def __init__(self, entityset, parent_dataframe_name, parent_column_name, child_dataframe_name, child_column_name): """ Create a relationship Args: entityset (:class:`.EntitySet`): EntitySet to which the relationship belongs parent_dataframe_name (str): Name of the parent dataframe in the EntitySet parent_column_name (str): Name of the parent column child_dataframe_name (str): Name of the child dataframe in the EntitySet child_column_name (str): Name of the child column """ self.entityset = entityset self._parent_dataframe_name = parent_dataframe_name self._child_dataframe_name = child_dataframe_name self._parent_column_name = parent_column_name self._child_column_name = child_column_name if (self.parent_dataframe.ww.index is not None and self._parent_column_name != self.parent_dataframe.ww.index): raise AttributeError(f"Parent column '{self._parent_column_name}' is not the index of " f"dataframe {self._parent_dataframe_name}")
@classmethod def from_dictionary(cls, arguments, es): parent_dataframe = arguments['parent_dataframe_name'] child_dataframe = arguments['child_dataframe_name'] parent_column = arguments['parent_column_name'] child_column = arguments['child_column_name'] return cls(es, parent_dataframe, parent_column, child_dataframe, child_column) def __repr__(self): ret = u"<Relationship: %s.%s -> %s.%s>" % \ (self._child_dataframe_name, self._child_column_name, self._parent_dataframe_name, self._parent_column_name) return ret def __eq__(self, other): if not isinstance(other, self.__class__): return False return self._parent_dataframe_name == other._parent_dataframe_name and \ self._child_dataframe_name == other._child_dataframe_name and \ self._parent_column_name == other._parent_column_name and \ self._child_column_name == other._child_column_name def __hash__(self): return hash((self._parent_dataframe_name, self._child_dataframe_name, self._parent_column_name, self._child_column_name)) @property def parent_dataframe(self): """Parent dataframe object""" return self.entityset[self._parent_dataframe_name] @property def child_dataframe(self): """Child dataframe object""" return self.entityset[self._child_dataframe_name] @property def parent_column(self): """Column in parent dataframe""" return self.parent_dataframe.ww[self._parent_column_name] @property def child_column(self): """Column in child dataframe""" return self.child_dataframe.ww[self._child_column_name] @property def parent_name(self): """The name of the parent, relative to the child.""" if self._is_unique(): return self._parent_dataframe_name else: return '%s[%s]' % (self._parent_dataframe_name, self._child_column_name) @property def child_name(self): """The name of the child, relative to the parent.""" if self._is_unique(): return self._child_dataframe_name else: return '%s[%s]' % (self._child_dataframe_name, self._child_column_name) def to_dictionary(self): return { 'parent_dataframe_name': self._parent_dataframe_name, 'child_dataframe_name': self._child_dataframe_name, 'parent_column_name': self._parent_column_name, 'child_column_name': self._child_column_name, } def _is_unique(self): """Is there any other relationship with same parent and child dataframes?""" es = self.entityset relationships = es.get_forward_relationships(self._child_dataframe_name) n = len([r for r in relationships if r._parent_dataframe_name == self._parent_dataframe_name]) assert n > 0, 'This relationship is missing from the entityset' return n == 1
class RelationshipPath(object): def __init__(self, relationships_with_direction): self._relationships_with_direction = relationships_with_direction @property def name(self): relationship_names = [_direction_name(is_forward, r) for is_forward, r in self._relationships_with_direction] return '.'.join(relationship_names) def dataframes(self): if self: # Yield first dataframe. is_forward, relationship = self[0] if is_forward: yield relationship._child_dataframe_name else: yield relationship._parent_dataframe_name # Yield the dataframe pointed to by each relationship. for is_forward, relationship in self: if is_forward: yield relationship._parent_dataframe_name else: yield relationship._child_dataframe_name def __add__(self, other): return RelationshipPath(self._relationships_with_direction + other._relationships_with_direction) def __getitem__(self, index): return self._relationships_with_direction[index] def __iter__(self): for is_forward, relationship in self._relationships_with_direction: yield is_forward, relationship def __len__(self): return len(self._relationships_with_direction) def __eq__(self, other): return isinstance(other, RelationshipPath) and \ self._relationships_with_direction == other._relationships_with_direction def __ne__(self, other): return not self == other def __repr__(self): if self._relationships_with_direction: path = '%s.%s' % (next(self.dataframes()), self.name) else: path = '[]' return '<RelationshipPath %s>' % path def _direction_name(is_forward, relationship): if is_forward: return relationship.parent_name else: return relationship.child_name