Source code for nlp_primitives.tensorflow.elmo

from featuretools.primitives.base import TransformPrimitive
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Double, NaturalLanguage


[docs]class Elmo(TransformPrimitive): """Transforms a sentence or short paragraph using deep contextualized langauge representations. Usese the following pre-trained model [tfhub model](https://tfhub.dev/google/elmo/2) Args: None Examples: >>> Elmo = Elmo() >>> words = ["I like to eat pizza", ... "The roller coaster was built in 1885.", ... "When will humans go to mars?"] >>> output = Elmo(words) >>> len(output) 1024 >>> len(output[0]) 3 >>> values = output[:3, 0] >>> [round(x, 4) for x in values] [-0.3457, -0.4546, 0.2538] """ name = "elmo" input_types = [ColumnSchema(logical_type=NaturalLanguage)] return_type = ColumnSchema(logical_type=Double, semantic_tags="numeric")
[docs] def __init__(self): self.handle = "https://tfhub.dev/google/elmo/2" self.number_output_features = 1024 self.n = 1024
def install(self): import tensorflow as tf import tensorflow_hub as hub with tf.compat.v1.Session(): tf.compat.v1.global_variables_initializer().run() self.embed = hub.Module(self.handle) def get_function(self): self.install() def elmo(col): import tensorflow as tf with tf.compat.v1.Session() as session: session.run( [ tf.compat.v1.global_variables_initializer(), tf.compat.v1.tables_initializer(), ] ) embeddings = session.run(self.embed(col.tolist())) return embeddings.transpose() return elmo