Source code for featuretools.primitives.standard.transform.natural_language.num_words

from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import NaturalLanguage

from featuretools.primitives.base import TransformPrimitive
from featuretools.utils.gen_utils import Library


[docs]class NumWords(TransformPrimitive): """Determines the number of words in a string by counting the spaces. Examples: >>> num_words = NumWords() >>> num_words(['This is a string', ... 'Two words', ... 'no-spaces', ... 'Also works with sentences. Second sentence!']).tolist() [4, 2, 1, 6] """ name = "num_words" input_types = [ColumnSchema(logical_type=NaturalLanguage)] return_type = ColumnSchema(semantic_tags={"numeric"}) compatibility = [Library.PANDAS, Library.DASK, Library.SPARK] description_template = "the number of words in {}" def get_function(self): def word_counter(array): return array.fillna("").str.count(" ") + 1 return word_counter