Add Tensorflow pre-processing to existing Keras model (for use in Tensorflow Serving)

I figured it out, so I'm going to answer my own question here.

Here's the gist:

First, (in separate code file) I trained the model using Keras only with my own pre-processing functions, exported the Keras model weights file and my token-to-index dictionary.

Then, I copied just the Keras model architecture, set the input as the pre-processed tensor output, loaded the weights file from the previously trained Keras model, and sandwiched it between the Tensorflow pre-processing operations and the Tensorflow exporter.

Final product:

import tensorflow as tf
from keras import backend as K
from keras.models import Sequential, Embedding, LSTM, Dense
from tensorflow.contrib.session_bundle import exporter
from tensorflow.contrib.lookup import HashTable, TextFileInitializer

# Initialize Keras with Tensorflow session
sess = tf.Session()
K.set_session(sess)

# Token to index lookup dictionary
token_to_idx_path = '...'
token_to_idx_dict = HashTable(TextFileInitializer(token_to_idx_path, tf.string, 0, tf.int64, 1, delimiter='\t'), 0)

maxlen = ...

# Pre-processing sub-graph using Tensorflow operations
input = tf.placeholder(tf.string, name='input')
sparse_tokenized_input = tf.string_split(input)
tokenized_input = tf.sparse_tensor_to_dense(sparse_tokenized_input, default_value='')
token_idxs = token_to_idx_dict.lookup(tokenized_input)
token_idxs_padded = tf.pad(token_idxs, [[0,0],[0,maxlen]])
token_idxs_embedding = tf.slice(token_idxs_padded, [0,0], [-1,maxlen])

# Initialize Keras model
model = Sequential()
e = Embedding(max_features, 128, input_length=maxlen)
e.set_input(token_idxs_embedding)
model.add(e)
model.add(LSTM(128, activation='sigmoid'))
model.add(Dense(num_classes, activation='softmax'))

# Load weights from previously trained Keras model
weights_path = '...'
model.load_weights(weights_path)

K.set_learning_phase(0)

# Export model in Tensorflow format
# (Official tutorial: https://github.com/tensorflow/serving/blob/master/tensorflow_serving/g3doc/serving_basic.md)
saver = tf.train.Saver(sharded=True)
model_exporter = exporter.Exporter(saver)
signature = exporter.classification_signature(input_tensor=model.input, scores_tensor=model.output)
model_exporter.init(sess.graph.as_graph_def(), default_graph_signature=signature)
model_dir = '...'
model_version = 1
model_exporter.export(model_dir, tf.constant(model_version), sess)

# Input example
with sess.as_default():
    token_to_idx_dict.init.run()
    sess.run(model.output, feed_dict={input: ["this is a raw input example"]})

The accepted answer is super helpful, however it uses an outdated Keras API as @Qululu mentioned, and an outdated TF Serving API (Exporter), and it does not show how to export the model so that its input is the original tf placeholder (versus Keras model.input, which is post preprocessing). Following is a version that works well as of TF v1.4 and Keras 2.1.2:

sess = tf.Session()
K.set_session(sess)

K._LEARNING_PHASE = tf.constant(0)
K.set_learning_phase(0)

max_features = 5000
max_lens = 500

dict_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.TextFileInitializer("vocab.txt",tf.string, 0, tf.int64, TextFileIndex.LINE_NUMBER, vocab_size=max_features, delimiter=" "), 0)

x_input = tf.placeholder(tf.string, name='x_input', shape=(None,))
sparse_tokenized_input = tf.string_split(x_input)
tokenized_input = tf.sparse_tensor_to_dense(sparse_tokenized_input, default_value='')
token_idxs = dict_table.lookup(tokenized_input)
token_idxs_padded = tf.pad(token_idxs, [[0,0],[0, max_lens]])
token_idxs_embedding = tf.slice(token_idxs_padded, [0,0], [-1, max_lens])

model = Sequential()
model.add(InputLayer(input_tensor=token_idxs_embedding, input_shape=(None, max_lens)))

 ...REST OF MODEL...

model.load_weights("model.h5")

x_info = tf.saved_model.utils.build_tensor_info(x_input)
y_info = tf.saved_model.utils.build_tensor_info(model.output)

prediction_signature = tf.saved_model.signature_def_utils.build_signature_def(inputs={"text": x_info}, outputs={"prediction":y_info}, method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME)

builder = saved_model_builder.SavedModelBuilder("/path/to/model")

legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')

init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)


# Add the meta_graph and the variables to the builder
builder.add_meta_graph_and_variables(
  sess, [tag_constants.SERVING],
  signature_def_map={
       signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
           prediction_signature,
  },
  legacy_init_op=legacy_init_op)

builder.save()  

UPDATE Doing pre-processing for inference with Tensorflow is a CPU op, and is not carried out efficiently if the model is deployed on a GPU server. The GPU stalls really bad, and the throughput is very low. Therefore, we ditched this for efficient pre-processing in the client process, instead.