关于python：LSTM：lstm_1层的输入0与层不兼容：预期ndim=3，发现ndim=2(reshaping input)

LSTM: Input 0 of layer lstm_1 is incompatible with the layer: expected ndim=3, found ndim=2 (reshaping input)

我想根据 Robert Frost 的诗歌创作诗歌。
我已经预处理了我的数据集：

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

max_sentence_len = max(len(l) for l in corpus_int)

input_seq = np.array(tf.keras.preprocessing.sequence.pad_sequences(corpus_int,padding = 'pre',truncating = 'pre',maxlen = max_sentence_len))
predictors, label = input_seq[:,:-1],input_seq[:,-1]#predictors everything except last, label only last
label = ku.to_categorical(label, num_classes=total_words,dtype='int32')

predictors

array([[ 0, 0, 0, ..., 10, 5, 544],
[ 0, 0, 0, ..., 64, 8, 854],
[ 0, 0, 0, ..., 855, 174, 2],
...,
[ 0, 0, 0, ..., 129, 49, 94],
[ 0, 0, 0, ..., 183, 159, 60],
[ 0, 0, 3, ..., 3, 2157, 4]], dtype=int32)

label

array([[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
...,
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 1]], dtype=int32)

之后，我使用编码器 - 解码器架构构建了我的模型：

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

class seq2seq(tf.keras.Model):
def __init__(self,max_sequence_len,total_words):
super(seq2seq,self).__init__()
self.max_sequence_len = max_sequence_len
self.total_words = total_words

self.input_len = self.max_sequence_len - 1
self.total_words = self.total_words

#Encoder
self.enc_embedding = tf.keras.layers.Embedding(input_dim = total_words,output_dim = 300,input_length = max_sentence_len - 1)
self.enc_lstm_1 = tf.keras.layers.LSTM(units = 300, activation = 'tanh')
self.enc_lstm_2 = tf.keras.layers.LSTM(units = 300, activation = 'tanh', return_state = True)

#decoder
self.dec_embedding = tf.keras.layers.Embedding(input_dim = total_words,output_dim = 300,input_length = max_sentence_len - 1)
self.dec_lstm_1 = tf.keras.layers.LSTM(units = 300, activation = 'tanh')
self.dec_lstm_2 = tf.keras.layers.LSTM(units = 300, activation = 'tanh', return_state = True,return_sequences = True)

#Dense layer and output:
self.dense = tf.keras.layers.Dense(total_words, activation='softmax')

def call(self,inputs):
#Encoding
enc_x = self.enc_embedding(inputs)
enc_x = self.enc_lstm_1(enc_x)
enc_outputs, state_h, state_c = self.enc_lstm_2(enc_x)

#Decoding:
dec_x = self.dec_embedding(enc_outputs)
dec_x = self.dec_lstm_1(dec_x,initial_state = [state_h, state_c])
dec_outputs, _, _ = self.enc_lstm_2(dec_x)
output_dense = self.dense(dec_outputs)

return output_dense

model = seq2seq(max_sequence_len = max_sentence_len,total_words = total_words)
model.compile(optimizer = tf.keras.optimizers.RMSprop(lr=0.0001),loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(predictors,label,epochs=5, batch_size=128)

但最后我得到以下错误：

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

ValueError Traceback (most recent call last)
<ipython-input-4-1c349573302d> in <module>()
37 model = seq2seq(max_sequence_len = max_sentence_len,total_words = total_words)
38 model.compile(optimizer = tf.keras.optimizers.RMSprop(lr=0.0001),loss='categorical_crossentropy', metrics=['accuracy'])
---> 39 model.fit(predictors,label,epochs=5, batch_size=128)

8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
235 except Exception as e: # pylint:disable=broad-except
236 if hasattr(e, 'ag_error_metadata'):
--> 237 raise e.ag_error_metadata.to_exception(e)
238 else:
239 raise

ValueError: in converted code:

<ipython-input-4-1c349573302d>:27 call *
enc_outputs, state_h, state_c = self.enc_lstm_2(enc_x)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/layers/recurrent.py:623 __call__
return super(RNN, self).__call__(inputs, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/base_layer.py:812 __call__
self.name)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/input_spec.py:177 assert_input_compatibility
str(x.shape.as_list()))

ValueError: Input 0 of layer lstm_1 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [None, 300]

我明白，问题出在输入形状中(正如在帖子中回答的那样，预期 ndim=3，发现 ndim=2)。

但我不知道我应该如何为 tensorflow 2.0 重塑我的数据。
你能帮我解决这个问题吗？

相关讨论

问题的根源在于 return_sequences:

的使用

True --> 返回每个输入时间步的输出。 LSTM 维度为 20，输入形状为 (32, 100, 40)，输出形状为 (32, 100, 20) == (batch_size, timesteps, lstm_units)
False --> 返回最后一个时间步的输出，使用所有时间步计算：(32, 1, 20)

默认情况下，层将压缩尺寸为 1 - 所以 return_sequences=False 返回一个 2D 输入。同样，Dense 不能处理 3D 输入，除非通过 TimeDistributed - 所以预密集 LSTM 应该有 return_sequences=False。所有提到的更改都在下面实现 - 模型能够适应。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41

class seq2seq(tf.keras.Model):
def __init__(self,max_sequence_len,total_words):
super(seq2seq,self).__init__()
self.max_sequence_len = max_sequence_len
self.total_words = total_words

self.input_len = self.max_sequence_len - 1
self.total_words = self.total_words

#Encoder
self.enc_embedding = tf.keras.layers.Embedding(input_dim = total_words,
output_dim = 300,input_length = max_sentence_len - 1)
self.enc_lstm_1 = tf.keras.layers.LSTM(units = 300, activation = 'tanh',
return_sequences=True)
self.enc_lstm_2 = tf.keras.layers.LSTM(units = 300, activation = 'tanh',
return_state = True)

#decoder
self.dec_embedding = tf.keras.layers.Embedding(input_dim = total_words,
output_dim = 300,input_length = max_sentence_len - 1)
self.dec_lstm_1 = tf.keras.layers.LSTM(units = 300, activation = 'tanh',
return_sequences=True)
self.dec_lstm_2 = tf.keras.layers.LSTM(units = 300, activation = 'tanh',
return_state = True,return_sequences = False)

#Dense layer and output:
self.dense = tf.keras.layers.Dense(total_words, activation='softmax')

def call(self,inputs):
#Encoding
enc_x = self.enc_embedding(inputs)
enc_x = self.enc_lstm_1(enc_x)
enc_outputs, state_h, state_c = self.enc_lstm_2(enc_x)

#Decoding:
dec_x = self.dec_embedding(enc_outputs)
dec_x = self.dec_lstm_1(dec_x,initial_state = [state_h, state_c])
dec_outputs, _, _ = self.enc_lstm_2(dec_x)
output_dense = self.dense(dec_outputs)

return output_dense