LSTM: Input 0 of layer lstm_1 is incompatible with the layer: expected ndim=3, found ndim=2 (reshaping input)
我想根据 Robert Frost 的诗歌创作诗歌。
我已经预处理了我的数据集:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | max_sentence_len = max(len(l) for l in corpus_int) input_seq = np.array(tf.keras.preprocessing.sequence.pad_sequences(corpus_int,padding = 'pre',truncating = 'pre',maxlen = max_sentence_len)) predictors, label = input_seq[:,:-1],input_seq[:,-1]#predictors everything except last, label only last label = ku.to_categorical(label, num_classes=total_words,dtype='int32') predictors array([[ 0, 0, 0, ..., 10, 5, 544], [ 0, 0, 0, ..., 64, 8, 854], [ 0, 0, 0, ..., 855, 174, 2], ..., [ 0, 0, 0, ..., 129, 49, 94], [ 0, 0, 0, ..., 183, 159, 60], [ 0, 0, 3, ..., 3, 2157, 4]], dtype=int32) label array([[0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 1]], dtype=int32) |
之后,我使用编码器 - 解码器架构构建了我的模型:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | class seq2seq(tf.keras.Model): def __init__(self,max_sequence_len,total_words): super(seq2seq,self).__init__() self.max_sequence_len = max_sequence_len self.total_words = total_words self.input_len = self.max_sequence_len - 1 self.total_words = self.total_words #Encoder self.enc_embedding = tf.keras.layers.Embedding(input_dim = total_words,output_dim = 300,input_length = max_sentence_len - 1) self.enc_lstm_1 = tf.keras.layers.LSTM(units = 300, activation = 'tanh') self.enc_lstm_2 = tf.keras.layers.LSTM(units = 300, activation = 'tanh', return_state = True) #decoder self.dec_embedding = tf.keras.layers.Embedding(input_dim = total_words,output_dim = 300,input_length = max_sentence_len - 1) self.dec_lstm_1 = tf.keras.layers.LSTM(units = 300, activation = 'tanh') self.dec_lstm_2 = tf.keras.layers.LSTM(units = 300, activation = 'tanh', return_state = True,return_sequences = True) #Dense layer and output: self.dense = tf.keras.layers.Dense(total_words, activation='softmax') def call(self,inputs): #Encoding enc_x = self.enc_embedding(inputs) enc_x = self.enc_lstm_1(enc_x) enc_outputs, state_h, state_c = self.enc_lstm_2(enc_x) #Decoding: dec_x = self.dec_embedding(enc_outputs) dec_x = self.dec_lstm_1(dec_x,initial_state = [state_h, state_c]) dec_outputs, _, _ = self.enc_lstm_2(dec_x) output_dense = self.dense(dec_outputs) return output_dense model = seq2seq(max_sequence_len = max_sentence_len,total_words = total_words) model.compile(optimizer = tf.keras.optimizers.RMSprop(lr=0.0001),loss='categorical_crossentropy', metrics=['accuracy']) model.fit(predictors,label,epochs=5, batch_size=128) |
但最后我得到以下错误:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | ValueError Traceback (most recent call last) <ipython-input-4-1c349573302d> in <module>() 37 model = seq2seq(max_sequence_len = max_sentence_len,total_words = total_words) 38 model.compile(optimizer = tf.keras.optimizers.RMSprop(lr=0.0001),loss='categorical_crossentropy', metrics=['accuracy']) ---> 39 model.fit(predictors,label,epochs=5, batch_size=128) 8 frames /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs) 235 except Exception as e: # pylint:disable=broad-except 236 if hasattr(e, 'ag_error_metadata'): --> 237 raise e.ag_error_metadata.to_exception(e) 238 else: 239 raise ValueError: in converted code: <ipython-input-4-1c349573302d>:27 call * enc_outputs, state_h, state_c = self.enc_lstm_2(enc_x) /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/layers/recurrent.py:623 __call__ return super(RNN, self).__call__(inputs, **kwargs) /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/base_layer.py:812 __call__ self.name) /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/input_spec.py:177 assert_input_compatibility str(x.shape.as_list())) ValueError: Input 0 of layer lstm_1 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [None, 300] |
我明白,问题出在输入形状中(正如在帖子中回答的那样,预期 ndim=3,发现 ndim=2)。
但我不知道我应该如何为 tensorflow 2.0 重塑我的数据。
你能帮我解决这个问题吗?
问题的根源在于
的使用
-
True --> 返回每个输入时间步的输出。 LSTM 维度为20 ,输入形状为(32, 100, 40) ,输出形状为(32, 100, 20) == (batch_size, timesteps, lstm_units) -
False --> 返回最后一个时间步的输出,使用所有时间步计算:(32, 1, 20)
默认情况下,层将压缩尺寸为 1 - 所以
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | class seq2seq(tf.keras.Model): def __init__(self,max_sequence_len,total_words): super(seq2seq,self).__init__() self.max_sequence_len = max_sequence_len self.total_words = total_words self.input_len = self.max_sequence_len - 1 self.total_words = self.total_words #Encoder self.enc_embedding = tf.keras.layers.Embedding(input_dim = total_words, output_dim = 300,input_length = max_sentence_len - 1) self.enc_lstm_1 = tf.keras.layers.LSTM(units = 300, activation = 'tanh', return_sequences=True) self.enc_lstm_2 = tf.keras.layers.LSTM(units = 300, activation = 'tanh', return_state = True) #decoder self.dec_embedding = tf.keras.layers.Embedding(input_dim = total_words, output_dim = 300,input_length = max_sentence_len - 1) self.dec_lstm_1 = tf.keras.layers.LSTM(units = 300, activation = 'tanh', return_sequences=True) self.dec_lstm_2 = tf.keras.layers.LSTM(units = 300, activation = 'tanh', return_state = True,return_sequences = False) #Dense layer and output: self.dense = tf.keras.layers.Dense(total_words, activation='softmax') def call(self,inputs): #Encoding enc_x = self.enc_embedding(inputs) enc_x = self.enc_lstm_1(enc_x) enc_outputs, state_h, state_c = self.enc_lstm_2(enc_x) #Decoding: dec_x = self.dec_embedding(enc_outputs) dec_x = self.dec_lstm_1(dec_x,initial_state = [state_h, state_c]) dec_outputs, _, _ = self.enc_lstm_2(dec_x) output_dense = self.dense(dec_outputs) return output_dense |