Source code for featuretools.primitives.standard.transform.time_series.lag

import pandas as pd
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Boolean, BooleanNullable

from featuretools.primitives.base import TransformPrimitive


[docs]class Lag(TransformPrimitive): """Shifts an array of values by a specified number of periods. Args: periods (int): The number of periods by which to shift the input. Default is 1. Periods correspond to rows. Examples: >>> lag = Lag() >>> lag([1, 2, 3, 4, 5], pd.Series(pd.date_range(start="2020-01-01", periods=5, freq='D'))).tolist() [nan, 1.0, 2.0, 3.0, 4.0] You can specify the number of periods to shift the values >>> lag_periods = Lag(periods=3) >>> lag_periods([True, False, False, True, True], pd.Series(pd.date_range(start="2020-01-01", periods=5, freq='D'))).tolist() [nan, nan, nan, True, False] """ # Note: with pandas 1.5.0, using Lag with a string input will result in `None` values # being introduced instead of `nan` values that were present in previous versions. # All missing values will be replaced by `np.nan` (for Double) or `pd.NA` (all other types) # once Woodwork is initialized on the feature matrix. name = "lag" input_types = [ [ ColumnSchema(semantic_tags={"category"}), ColumnSchema(semantic_tags={"time_index"}), ], [ ColumnSchema(semantic_tags={"numeric"}), ColumnSchema(semantic_tags={"time_index"}), ], [ ColumnSchema(logical_type=Boolean), ColumnSchema(semantic_tags={"time_index"}), ], [ ColumnSchema(logical_type=BooleanNullable), ColumnSchema(semantic_tags={"time_index"}), ], ] return_type = None uses_full_dataframe = True
[docs] def __init__(self, periods=1): self.periods = periods
def get_function(self): def lag(input_col, time_index): x = pd.Series(input_col.values, index=time_index.values) return x.shift(periods=self.periods, fill_value=None).values return lag