Source code for featuretools.primitives.standard.transform.url.url_to_domain
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import URL, Categorical
from featuretools.primitives.base import TransformPrimitive
[docs]class URLToDomain(TransformPrimitive):
"""Determines the domain of a url.
Description:
Calculates the label to identify the network domain of a URL. Supports
urls with or without protocol as well as international country domains.
Examples:
>>> url_to_domain = URLToDomain()
>>> urls = ['https://play.google.com',
... 'http://www.google.co.in',
... 'www.facebook.com']
>>> url_to_domain(urls).tolist()
['play.google.com', 'google.co.in', 'facebook.com']
"""
name = "url_to_domain"
input_types = [ColumnSchema(logical_type=URL)]
return_type = ColumnSchema(logical_type=Categorical, semantic_tags={"category"})
def get_function(self):
def url_to_domain(x):
p = r"^(?:https?:\/\/)?(?:[^@\/\n]+@)?(?:www\.)?([^:\/?\n]+)"
return x.str.extract(p, expand=False)
return url_to_domain