I've set up a toy example of the TensorFlow linear classifier tutorial . In this example, the fit
method is called with a parameter input_fn
in which I pass train_input_fn
. This is how TensorFlow likes to pass the data. However, I really want to run mini batches. Fortunately, fit
has a batch_size
parameter, but I need to forgo the use of input_fn
and pass x
and y
instead. I've tried passing ndarray
s and DataFrames
as well as the output from the train_input_fn
function. Nothing works. I need a working example of using the batch_size
parameter.
Here is the setup code split into stuff I have no problem with followed by the problem portion.
import pandas as pd
import numpy as np
import tensorflow as tf
import tempfile
np.random.seed([3,1415])
df = pd.DataFrame(dict(cat1=np.random.choice(('Yes', 'No'), (100,),),
val1=np.random.rand(100),
val2=np.random.rand(100),
val3=np.random.rand(100),
label=np.random.choice((0, 1), (100,))))
LABEL_COLUMN = "label"
trainBegin, trainEnd = 0, 80
testBegin, testEnd = 80, 100
df_train = df.iloc[trainBegin:trainEnd, :]
df_test = df.iloc[testBegin:testEnd, :]
CONTINUOUS_COLUMNS = ['val1', 'val2', 'val3']
CATEGORICAL_COLUMNS = ['cat1']
def input_fn(df):
# Creates a dictionary mapping from each continuous feature column name (k) to
# the values of that column stored in a constant Tensor.
continuous_cols = {k: tf.constant(df[k].values)
for k in CONTINUOUS_COLUMNS}
# Creates a dictionary mapping from each categorical feature column name (k)
# to the values of that column stored in a tf.SparseTensor.
categorical_cols = {k: tf.SparseTensor(
indices=[[i, 0] for i in range(df[k].size)],
values=df[k].values,
shape=[df[k].size, 1])
for k in CATEGORICAL_COLUMNS}
# Merges the two dictionaries into one.
feature_cols = dict(continuous_cols.items() + categorical_cols.items())
# Converts the label column into a constant Tensor.
label = tf.constant(df[LABEL_COLUMN].values)
# Returns the feature columns and the label.
return feature_cols, label
def train_input_fn():
return input_fn(df_train)
def eval_input_fn():
return input_fn(df_test)
val1 = tf.contrib.layers.real_valued_column("val1")
val2 = tf.contrib.layers.real_valued_column("val2")
val3 = tf.contrib.layers.real_valued_column("val3")
wide_columns = [val1, val2, val3]
model_dir = tempfile.mkdtemp()
m = tf.contrib.learn.LinearClassifier(feature_columns=wide_columns, model_dir=model_dir)
m.fit(input_fn=train_input_fn, steps=200)
results = m.evaluate(input_fn=eval_input_fn, steps=1)
for key in sorted(results):
print("%s: %s" % (key, results[key]))
accuracy: 0.45
eval_auc: 0.459596
loss: 0.771354
model_dir = tempfile.mkdtemp()
m = tf.contrib.learn.LinearClassifier(feature_columns=wide_columns, model_dir=model_dir)
m.fit(input_fn=train_input_fn, steps=200)
# 2 lines that are different ##########################
x, y = train_input_fn()
results = m.evaluate(x=x, y=y, batch_size=100, steps=1)
#######################################################
for key in sorted(results):
print("%s: %s" % (key, results[key]))
Below is the error I get but I get a different error depending on what I try. The documentation says a matrix. I tried that too.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-135-5b53add19aac> in <module>()
12 # p.fit(input_fn=train_input_fn, steps=10)
13 x, y = train_input_fn()
---> 14 p.fit(x=df_train, y=df_train, steps=10, batch_size=100)
15 results = p.evaluate(input_fn=eval_input_fn, steps=1)
16 for key in sorted(results):
/Users/sean/anaconda/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in fit(self, x, y, input_fn, steps, batch_size, monitors)
171 if x is None:
172 raise ValueError('Either x or input_fn must be provided.')
--> 173 input_fn, feed_fn = _get_input_fn(x, y, batch_size)
174 elif (x is not None) or (y is not None):
175 raise ValueError('Can not provide both input_fn and either of x and y.')
/Users/sean/anaconda/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in _get_input_fn(x, y, batch_size)
65 def _get_input_fn(x, y, batch_size):
66 df = data_feeder.setup_train_data_feeder(
---> 67 x, y, n_classes=None, batch_size=batch_size)
68 return df.input_builder, df.get_feed_dict_fn()
69
/Users/sean/anaconda/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/io/data_feeder.pyc in setup_train_data_feeder(X, y, n_classes, batch_size, shuffle, epochs)
97 ValueError: if one of `X` and `y` is iterable and the other is not.
98 """
---> 99 X, y = _data_type_filter(X, y)
100 if HAS_DASK:
101 # pylint: disable=g-import-not-at-top
/Users/sean/anaconda/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/io/data_feeder.pyc in _data_type_filter(X, y)
65 y = extract_dask_labels(y)
66 if HAS_PANDAS:
---> 67 X = extract_pandas_data(X)
68 if y is not None:
69 y = extract_pandas_labels(y)
/Users/sean/anaconda/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/io/pandas_io.pyc in extract_pandas_data(data)
51 return data.values.astype('float')
52 else:
---> 53 raise ValueError('Data types for data must be int, float, or bool.')
54
55
ValueError: Data types for data must be int, float, or bool.
It seems that the format if you pass x
and y
is not the same as input_fn
. Quote from fit
's docstring :
x: matrix or tensor of shape [n_samples, n_features...]. Can be iterator that returns arrays of features. The training input samples for fitting the model. If set, input_fn must be None.
The example below works. Note that
I had to substitute 'Yes'
/ 'No'
by booleans (which is probably not equivalent, but illustrates the point) since seems one cannot enter sparse data this way.
I used infer_real_valued_columns_from_input
to get the columns.
Revised version:
import pandas as pd
import numpy as np
import tensorflow as tf
import tempfile
np.random.seed([3,1415])
_x_df = pd.DataFrame(dict(
cat1=np.random.choice((True, False), (100,),),
val1=np.random.rand(100),
val2=np.random.rand(100),
val3=np.random.rand(100)))
_y_df = pd.DataFrame(dict(label=np.random.choice((0, 1), (100,))))
trainBegin, trainEnd = 0, 80
testBegin, testEnd = 80, 100
x_df_train = _x_df.iloc[trainBegin:trainEnd, :]
x_df_test = _x_df.iloc[testBegin:testEnd, :]
y_df_train = _y_df.iloc[trainBegin:trainEnd, :]
y_df_test = _y_df.iloc[testBegin:testEnd, :]
wide_columns = tf.contrib.learn.infer_real_valued_columns_from_input(x_df_train)
model_dir = tempfile.mkdtemp()
m = tf.contrib.learn.LinearClassifier(feature_columns=wide_columns, model_dir=model_dir)
m.fit(x_df_train, y_df_train, batch_size=5, steps=200)
results = m.evaluate(x_df_test, y_df_test, batch_size=5, steps=1)
for key in sorted(results):
print("%s: %s" % (key, results[key]))
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.