Source code for featuretools.primitives.standard.latlong_transform_primitives

import numpy as np
import pandas as pd
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import BooleanNullable, Double, LatLong

from featuretools.primitives.base import TransformPrimitive
from featuretools.primitives.utils import _haversine_calculate


[docs]class CityblockDistance(TransformPrimitive): """Calculates the distance between points in a city road grid. Description: This distance is calculated using the haversine formula, which takes into account the curvature of the Earth. If either input data contains `NaN`s, the calculated distance with be `NaN`. This calculation is also known as the Mahnattan distance. Args: unit (str): Determines the unit value to output. Could be miles or kilometers. Default is miles. Examples: >>> cityblock_distance = CityblockDistance() >>> DC = (38, -77) >>> Boston = (43, -71) >>> NYC = (40, -74) >>> distances_mi = cityblock_distance([DC, DC], [NYC, Boston]) >>> np.round(distances_mi, 3).tolist() [301.519, 672.089] We can also change the units in which the distance is calculated. >>> cityblock_distance_kilometers = CityblockDistance(unit='kilometers') >>> distances_km = cityblock_distance_kilometers([DC, DC], [NYC, Boston]) >>> np.round(distances_km, 3).tolist() [485.248, 1081.622] """ name = "cityblock_distance" input_types = [ ColumnSchema(logical_type=LatLong), ColumnSchema(logical_type=LatLong), ] return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"}) commutative = True
[docs] def __init__(self, unit="miles"): if unit not in ["miles", "kilometers"]: raise ValueError("Invalid unit given") self.unit = unit
def get_function(self): def cityblock(latlong_1, latlong_2): latlong_1 = np.array(latlong_1.tolist()) latlong_2 = np.array(latlong_2.tolist()) lat_1s = latlong_1[:, 0] lat_2s = latlong_2[:, 0] lon_1s = latlong_1[:, 1] lon_2s = latlong_2[:, 1] lon_dis = _haversine_calculate(lat_1s, lon_1s, lat_1s, lon_2s, self.unit) lat_dist = _haversine_calculate(lat_1s, lon_1s, lat_2s, lon_1s, self.unit) return pd.Series(lon_dis + lat_dist) return cityblock
[docs]class GeoMidpoint(TransformPrimitive): """Determines the geographic center of two coordinates. Examples: >>> geomidpoint = GeoMidpoint() >>> geomidpoint([(42.4, -71.1)], [(40.0, -122.4)]) [(41.2, -96.75)] """ name = "geomidpoint" input_types = [ ColumnSchema(logical_type=LatLong), ColumnSchema(logical_type=LatLong), ] return_type = ColumnSchema(logical_type=LatLong) commutative = True def get_function(self): def geomidpoint_func(latlong_1, latlong_2): latlong_1 = np.array(latlong_1.tolist()) latlong_2 = np.array(latlong_2.tolist()) lat_1s = latlong_1[:, 0] lat_2s = latlong_2[:, 0] lon_1s = latlong_1[:, 1] lon_2s = latlong_2[:, 1] lat_middle = np.array([lat_1s, lat_2s]).transpose().mean(axis=1) lon_middle = np.array([lon_1s, lon_2s]).transpose().mean(axis=1) return list(zip(lat_middle, lon_middle)) return geomidpoint_func
[docs]class Haversine(TransformPrimitive): """Calculates the approximate haversine distance between two LatLong columns. Args: unit (str): Determines the unit value to output. Could be `miles` or `kilometers`. Default is `miles`. Examples: >>> haversine = Haversine() >>> distances = haversine([(42.4, -71.1), (40.0, -122.4)], ... [(40.0, -122.4), (41.2, -96.75)]) >>> np.round(distances, 3).tolist() [2631.231, 1343.289] Output units can be specified >>> haversine_km = Haversine(unit='kilometers') >>> distances_km = haversine_km([(42.4, -71.1), (40.0, -122.4)], ... [(40.0, -122.4), (41.2, -96.75)]) >>> np.round(distances_km, 3).tolist() [4234.555, 2161.814] """ name = "haversine" input_types = [ ColumnSchema(logical_type=LatLong), ColumnSchema(logical_type=LatLong), ] return_type = ColumnSchema(semantic_tags={"numeric"}) commutative = True
[docs] def __init__(self, unit="miles"): valid_units = ["miles", "kilometers"] if unit not in valid_units: error_message = "Invalid unit %s provided. Must be one of %s" % ( unit, valid_units, ) raise ValueError(error_message) self.unit = unit self.description_template = ( "the haversine distance in {} between {{}} and {{}}".format(self.unit) )
def get_function(self): def haversine(latlong_1, latlong_2): latlong_1 = np.array(latlong_1.tolist()) latlong_2 = np.array(latlong_2.tolist()) lat_1s = latlong_1[:, 0] lat_2s = latlong_2[:, 0] lon_1s = latlong_1[:, 1] lon_2s = latlong_2[:, 1] distance = _haversine_calculate(lat_1s, lon_1s, lat_2s, lon_2s, self.unit) return distance return haversine def generate_name(self, base_feature_names): name = "{}(".format(self.name.upper()) name += ", ".join(base_feature_names) if self.unit != "miles": name += ", unit={}".format(self.unit) name += ")" return name
[docs]class IsInGeoBox(TransformPrimitive): """Determines if coordinates are inside a box defined by two corner coordinate points. Description: Coordinate values should be specified as (latitude, longitude) tuples. This primitive is unable to handle coordinates and boxes at the poles, and near +/- 180 degrees latitude. Args: point1 (tuple(float, float)): The coordinates of the first corner of the box. Defaults to (0, 0). point2 (tuple(float, float)): The coordinates of the diagonal corner of the box. Defaults to (0, 0). Example: >>> is_in_geobox = IsInGeoBox((40.7128, -74.0060), (42.2436, -71.1677)) >>> is_in_geobox([(41.034, -72.254), (39.125, -87.345)]).tolist() [True, False] """ name = "is_in_geobox" input_types = [ColumnSchema(logical_type=LatLong)] return_type = ColumnSchema(logical_type=BooleanNullable)
[docs] def __init__(self, point1=(0, 0), point2=(0, 0)): self.point1 = point1 self.point2 = point2 self.lats = np.sort(np.array([point1[0], point2[0]])) self.lons = np.sort(np.array([point1[1], point2[1]]))
def get_function(self): def geobox(latlongs): transposed = np.transpose(np.array(latlongs.tolist())) lats = (self.lats[0] <= transposed[0]) & (self.lats[1] >= transposed[0]) longs = (self.lons[0] <= transposed[1]) & (self.lons[1] >= transposed[1]) return lats & longs return geobox
[docs]class Latitude(TransformPrimitive): """Returns the first tuple value in a list of LatLong tuples. For use with the LatLong logical type. Examples: >>> latitude = Latitude() >>> latitude([(42.4, -71.1), ... (40.0, -122.4), ... (41.2, -96.75)]).tolist() [42.4, 40.0, 41.2] """ name = "latitude" input_types = [ColumnSchema(logical_type=LatLong)] return_type = ColumnSchema(semantic_tags={"numeric"}) description_template = "the latitude of {}" def get_function(self): def latitude(latlong): latlong = np.array(latlong.tolist()) return latlong[:, 0] return latitude
[docs]class Longitude(TransformPrimitive): """Returns the second tuple value in a list of LatLong tuples. For use with the LatLong logical type. Examples: >>> longitude = Longitude() >>> longitude([(42.4, -71.1), ... (40.0, -122.4), ... (41.2, -96.75)]).tolist() [-71.1, -122.4, -96.75] """ name = "longitude" input_types = [ColumnSchema(logical_type=LatLong)] return_type = ColumnSchema(semantic_tags={"numeric"}) description_template = "the longitude of {}" def get_function(self): def longitude(latlong): latlong = np.array(latlong.tolist()) return latlong[:, 1] return longitude