Source code for featuretools.primitives.standard.transform.full_name_to_last_name
import pandas as pd
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Categorical, PersonFullName
from featuretools.primitives.base import TransformPrimitive
[docs]class FullNameToLastName(TransformPrimitive):
"""Determines the first name from a person's name.
Description:
Given a list of names, determines the last name. If
only a single name is provided, assume this is a first name, and
return `nan`. This assumes all titles will be followed by a period.
Examples:
>>> full_name_to_last_name = FullNameToLastName()
>>> names = ['Woolf Spector', 'Oliva y Ocana, Dona. Fermina',
... 'Ware, Mr. Frederick', 'Peter, Michael J', 'Mr. Brown']
>>> full_name_to_last_name(names).to_list()
['Spector', 'Oliva y Ocana', 'Ware', 'Peter', 'Brown']
"""
name = "full_name_to_last_name"
input_types = [ColumnSchema(logical_type=PersonFullName)]
return_type = ColumnSchema(logical_type=Categorical, semantic_tags={"category"})
def get_function(self):
def full_name_to_last_name(x):
titles_pattern = r"([A-Z][a-z]+)\. "
df = pd.DataFrame({"names": x})
# extract initial names
pattern = r"(^.+?,|^[A-Z][a-z]+\. [A-Z][a-z]+$| [A-Z][a-z]+$| [A-Z][a-z]+[/-][A-Z][a-z]+$)"
df["last_name"] = df["names"].str.extract(pattern)
# remove titles
df["last_name"] = df["last_name"].str.replace(
titles_pattern,
"",
regex=True,
)
# clean up white space and leftover commas
df["last_name"] = df["last_name"].str.replace(",", "").str.strip()
return df["last_name"]
return full_name_to_last_name