报错了 AttributeError: 'HashedCategoricalColumn' object has no attribute 'vocab_size'
时间: 2023-07-31 07:04:20 浏览: 212
python的mysql数据查询及报错AttributeError: ‘Connection’ object has no attribute ‘curson’
非常抱歉,上述代码中存在错误。对于`HashedCategoricalColumn`没有`vocab_size`属性。我会为您提供更新后的代码。
```python
import tensorflow as tf
from tensorflow.keras.layers import Dense, Embedding, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
# 定义数据特征
feature_columns = []
embedding_dims = {}
# userId特征
user_id = tf.feature_column.categorical_column_with_hash_bucket('userId', hash_bucket_size=1000)
user_id_embedding = tf.feature_column.embedding_column(user_id, dimension=10)
feature_columns.append(user_id_embedding)
embedding_dims['userId'] = 10
# movieId特征
movie_id = tf.feature_column.categorical_column_with_hash_bucket('movieId', hash_bucket_size=10000)
movie_id_embedding = tf.feature_column.embedding_column(movie_id, dimension=10)
feature_columns.append(movie_id_embedding)
embedding_dims['movieId'] = 10
# rating特征
rating = tf.feature_column.numeric_column('rating')
feature_columns.append(rating)
# timestamp特征(可选)
timestamp = tf.feature_column.numeric_column('timestamp')
feature_columns.append(timestamp)
# 读取数据
data = pd.read_csv('your_data.csv') # 替换为您的数据文件路径
# 划分训练集和测试集
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
# 创建输入管道
def input_fn(df, num_epochs, shuffle=True, batch_size=32):
df = df.copy()
labels = df.pop('rating')
dataset = tf.data.Dataset.from_tensor_slices((dict(df), labels))
if shuffle:
dataset = dataset.shuffle(buffer_size=len(df))
dataset = dataset.batch(batch_size)
dataset = dataset.repeat(num_epochs)
return dataset
# 创建Wide & Deep模型
def create_wide_deep_model(feature_columns, embedding_dims):
inputs = {}
embeddings = []
for feature_col in feature_columns:
feature_name = feature_col.categorical_column.key
input_tensor = tf.keras.Input(shape=(1,), name=feature_name)
inputs[feature_name] = input_tensor
if feature_name in embedding_dims:
embedding_dim = embedding_dims[feature_name]
embedding = tf.keras.layers.Embedding(hash_bucket_size=feature_col.hash_bucket_size,
input_dim=feature_col.num_buckets,
output_dim=embedding_dim)(input_tensor)
embedding = tf.keras.layers.Flatten()(embedding)
embeddings.append(embedding)
wide = tf.keras.layers.concatenate(embeddings)
wide_output = tf.keras.layers.Dense(1, activation='sigmoid', name='wide_output')(wide)
deep = tf.keras.layers.Dense(64, activation='relu')(wide)
deep = tf.keras.layers.Dense(64, activation='relu')(deep)
deep_output = tf.keras.layers.Dense(1, activation='sigmoid', name='deep_output')(deep)
model = tf.keras.Model(inputs=inputs, outputs=[wide_output, deep_output])
return model
# 创建Wide & Deep模型
model = create_wide_deep_model(feature_columns, embedding_dims)
# 编译模型
model.compile(optimizer=Adam(learning_rate=0.001),
loss={'wide_output': 'binary_crossentropy', 'deep_output': 'mean_squared_error'},
metrics={'wide_output': 'accuracy', 'deep_output': 'mae'})
# 训练模型
model.fit(input_fn(train_data, num_epochs=10),
steps_per_epoch=len(train_data) // 32,
validation_data=input_fn(test_data, num_epochs=1, shuffle=False),
validation_steps=len(test_data) // 32,
epochs=10)
```
这次我更正了`Embedding`层的输入维度和哈希桶的大小。请尝试运行更新后的代码,希望能解决问题。如果还有其他疑问,请随时提问。
阅读全文