Source code for featuretools.primitives.standard.transform.latlong.cityblock_distance

import numpy as np
import pandas as pd
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Double, LatLong

from featuretools.primitives.base import TransformPrimitive
from featuretools.primitives.standard.transform.latlong.utils import (
    _haversine_calculate,
)


[docs]class CityblockDistance(TransformPrimitive): """Calculates the distance between points in a city road grid. Description: This distance is calculated using the haversine formula, which takes into account the curvature of the Earth. If either input data contains `NaN`s, the calculated distance with be `NaN`. This calculation is also known as the Mahnattan distance. Args: unit (str): Determines the unit value to output. Could be miles or kilometers. Default is miles. Examples: >>> cityblock_distance = CityblockDistance() >>> DC = (38, -77) >>> Boston = (43, -71) >>> NYC = (40, -74) >>> distances_mi = cityblock_distance([DC, DC], [NYC, Boston]) >>> np.round(distances_mi, 3).tolist() [301.519, 672.089] We can also change the units in which the distance is calculated. >>> cityblock_distance_kilometers = CityblockDistance(unit='kilometers') >>> distances_km = cityblock_distance_kilometers([DC, DC], [NYC, Boston]) >>> np.round(distances_km, 3).tolist() [485.248, 1081.622] """ name = "cityblock_distance" input_types = [ ColumnSchema(logical_type=LatLong), ColumnSchema(logical_type=LatLong), ] return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"}) commutative = True
[docs] def __init__(self, unit="miles"): if unit not in ["miles", "kilometers"]: raise ValueError("Invalid unit given") self.unit = unit
def get_function(self): def cityblock(latlong_1, latlong_2): latlong_1 = np.array(latlong_1.tolist()) latlong_2 = np.array(latlong_2.tolist()) lat_1s = latlong_1[:, 0] lat_2s = latlong_2[:, 0] lon_1s = latlong_1[:, 1] lon_2s = latlong_2[:, 1] lon_dis = _haversine_calculate(lat_1s, lon_1s, lat_1s, lon_2s, self.unit) lat_dist = _haversine_calculate(lat_1s, lon_1s, lat_2s, lon_1s, self.unit) return pd.Series(lon_dis + lat_dist) return cityblock