Source code for nlp_primitives.polarity_score

import numpy as np
import pandas as pd
from featuretools.primitives.base import TransformPrimitive
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize.treebank import TreebankWordDetokenizer
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Double, NaturalLanguage

from nlp_primitives.utilities import clean_tokens


[docs]class PolarityScore(TransformPrimitive):
    """Calculates the polarity of a text on a scale from -1 (negative) to 1 (positive)

    Description:
        Given a list of strings assign a polarity score from -1 (negative text),
        to 0 (neutral text), to 1 (positive text). The functions returns a score
        for every given piece of text. If a string is missing, return 'NaN'

    Examples:
        >>> x = ['He loves dogs', 'She hates cats', 'There is a dog', '']
        >>> polarity_score = PolarityScore()
        >>> polarity_score(x).tolist()
        [0.808, -0.787, 0.0, 0.0]
    """

    name = "polarity_score"
    input_types = [ColumnSchema(logical_type=NaturalLanguage)]
    return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"})
    default_value = 0

    def get_function(self):
        dtk = TreebankWordDetokenizer()

        def polarity_score(x):
            vader = SentimentIntensityAnalyzer()
            li = []

            def vader_pol(sentence):
                return (
                    vader.polarity_scores(sentence)["pos"]
                    - vader.polarity_scores(sentence)["neg"]
                )

            for el in x:
                if pd.isnull(el):
                    li.append(np.nan)
                else:
                    el = clean_tokens(el)
                    if len(el) < 1:
                        li.append(0.0)
                    else:
                        li.append(vader_pol(dtk.detokenize(el)))
            return pd.Series(li)

        return polarity_score
Table of Contents

Quick search

Source code for nlp_primitives.polarity_score