Source code for featuretools.primitives.standard.transform.latlong.cityblock_distance
import numpy as np
import pandas as pd
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Double, LatLong
from featuretools.primitives.base import TransformPrimitive
from featuretools.primitives.standard.transform.latlong.utils import (
_haversine_calculate,
)
[docs]class CityblockDistance(TransformPrimitive):
"""Calculates the distance between points in a city road grid.
Description:
This distance is calculated using the haversine formula, which
takes into account the curvature of the Earth.
If either input data contains `NaN`s, the calculated
distance with be `NaN`.
This calculation is also known as the Mahnattan distance.
Args:
unit (str): Determines the unit value to output. Could
be miles or kilometers. Default is miles.
Examples:
>>> cityblock_distance = CityblockDistance()
>>> DC = (38, -77)
>>> Boston = (43, -71)
>>> NYC = (40, -74)
>>> distances_mi = cityblock_distance([DC, DC], [NYC, Boston])
>>> np.round(distances_mi, 3).tolist()
[301.519, 672.089]
We can also change the units in which the distance is calculated.
>>> cityblock_distance_kilometers = CityblockDistance(unit='kilometers')
>>> distances_km = cityblock_distance_kilometers([DC, DC], [NYC, Boston])
>>> np.round(distances_km, 3).tolist()
[485.248, 1081.622]
"""
name = "cityblock_distance"
input_types = [
ColumnSchema(logical_type=LatLong),
ColumnSchema(logical_type=LatLong),
]
return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"})
commutative = True
[docs] def __init__(self, unit="miles"):
if unit not in ["miles", "kilometers"]:
raise ValueError("Invalid unit given")
self.unit = unit
def get_function(self):
def cityblock(latlong_1, latlong_2):
latlong_1 = np.array(latlong_1.tolist())
latlong_2 = np.array(latlong_2.tolist())
lat_1s = latlong_1[:, 0]
lat_2s = latlong_2[:, 0]
lon_1s = latlong_1[:, 1]
lon_2s = latlong_2[:, 1]
lon_dis = _haversine_calculate(lat_1s, lon_1s, lat_1s, lon_2s, self.unit)
lat_dist = _haversine_calculate(lat_1s, lon_1s, lat_2s, lon_1s, self.unit)
return pd.Series(lon_dis + lat_dist)
return cityblock