Source code for featuretools.primitives.standard.aggregation.avg_time_between
from datetime import datetime
import numpy as np
import pandas as pd
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Datetime, Double
from featuretools.primitives.base.aggregation_primitive_base import AggregationPrimitive
from featuretools.utils import convert_time_units
[docs]class AvgTimeBetween(AggregationPrimitive):
"""Computes the average number of seconds between consecutive events.
Description:
Given a list of datetimes, return the average time (default in seconds)
elapsed between consecutive events. If there are fewer
than 2 non-null values, return `NaN`.
Args:
unit (str): Defines the unit of time.
Defaults to seconds. Acceptable values:
years, months, days, hours, minutes, seconds, milliseconds, nanoseconds
Examples:
>>> from datetime import datetime
>>> avg_time_between = AvgTimeBetween()
>>> times = [datetime(2010, 1, 1, 11, 45, 0),
... datetime(2010, 1, 1, 11, 55, 15),
... datetime(2010, 1, 1, 11, 57, 30)]
>>> avg_time_between(times)
375.0
>>> avg_time_between = AvgTimeBetween(unit="minutes")
>>> avg_time_between(times)
6.25
"""
name = "avg_time_between"
input_types = [ColumnSchema(logical_type=Datetime, semantic_tags={"time_index"})]
return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"})
description_template = "the average time between each of {}"
[docs] def __init__(self, unit="seconds"):
self.unit = unit.lower()
def get_function(self):
def pd_avg_time_between(x):
"""Assumes time scales are closer to order
of seconds than to nanoseconds
if times are much closer to nanoseconds
we could get some floating point errors
this can be fixed with another function
that calculates the mean before converting
to seconds
"""
x = x.dropna()
if x.shape[0] < 2:
return np.nan
if isinstance(x.iloc[0], (pd.Timestamp, datetime)):
x = x.view("int64")
# use len(x)-1 because we care about difference
# between values, len(x)-1 = len(diff(x))
avg = (x.max() - x.min()) / (len(x) - 1)
avg = avg * 1e-9
# long form:
# diff_in_ns = x.diff().iloc[1:].astype('int64')
# diff_in_seconds = diff_in_ns * 1e-9
# avg = diff_in_seconds.mean()
return convert_time_units(avg, self.unit)
return pd_avg_time_between