Source code for featuretools.primitives.standard.transform.datetime.distance_to_holiday
import pandas as pd
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Datetime
from featuretools.primitives.base import TransformPrimitive
from featuretools.primitives.standard.transform.datetime.utils import HolidayUtil
[docs]class DistanceToHoliday(TransformPrimitive):
"""Computes the number of days before or after a given holiday.
Description:
For a list of dates, return the distance from the nearest
occurrence of a chosen holiday. The distance is returned in
days. If the closest occurrence is prior to the date given,
return a negative number.
If a date is missing, return `NaN`.
Currently only works with dates between 1950 and 2100.
Args:
holiday (str): Name of the holiday. Defaults to New Year's Day.
country (str): Specifies which country's calendar to use for the
given holiday. Default is `US`.
Examples:
>>> from datetime import datetime
>>> distance_to_holiday = DistanceToHoliday("New Year's Day")
>>> dates = [datetime(2010, 1, 1),
... datetime(2012, 5, 31),
... datetime(2017, 7, 31),
... datetime(2020, 12, 31)]
>>> distance_to_holiday(dates).tolist()
[0, -151, 154, 1]
We can also control the country in which we're searching for
a holiday.
>>> distance_to_holiday = DistanceToHoliday("Victoria Day", country='Canada')
>>> dates = [datetime(2010, 1, 1),
... datetime(2012, 5, 31),
... datetime(2017, 7, 31),
... datetime(2020, 12, 31)]
>>> distance_to_holiday(dates).tolist()
[143, -10, -70, 144]
"""
name = "distance_to_holiday"
input_types = [ColumnSchema(logical_type=Datetime)]
return_type = ColumnSchema(semantic_tags={"numeric"})
default_value = 0
[docs] def __init__(self, holiday="New Year's Day", country="US"):
self.country = country
self.holiday = holiday
self.holidayUtil = HolidayUtil(country)
available_holidays = list(set(self.holidayUtil.federal_holidays.values()))
if self.holiday not in available_holidays:
error = "must be one of the available holidays:\n%s" % available_holidays
raise ValueError(error)
def get_function(self):
def distance_to_holiday(x):
holiday_df = self.holidayUtil.to_df()
holiday_df = holiday_df[holiday_df.names == self.holiday]
df = pd.DataFrame({"date": x})
df["x_index"] = df.index # store original index as a column
df = df.dropna()
df = df.sort_values("date")
df["date"] = df["date"].dt.date.astype("datetime64[ns]")
matches = pd.merge_asof(
df,
holiday_df,
left_on="date",
right_on="holiday_date",
direction="nearest",
tolerance=pd.Timedelta("365d"),
)
matches = matches.set_index("x_index")
matches["days_diff"] = (matches.holiday_date - matches.date).dt.days
return matches.days_diff.reindex_like(x)
return distance_to_holiday