Scikit Learn - ValueError: operands could not be broadcast together
我正试图在数据集上应用
runfile('D:/ROFI/ML/Heart Disease/prediction.py', wdir='D:/ROFI/ML/Heart Disease')
Traceback (most recent call last):File"", line 1, in
runfile('D:/ROFI/ML/Heart Disease/prediction.py', wdir='D:/ROFI/ML/Heart Disease')File"C:\Users\User\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 866, in runfile
execfile(filename, namespace)File"C:\Users\User\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)File"D:/ROFI/ML/Heart Disease/prediction.py", line 85, in
predict(x_train, y_train, x_test, y_test)File"D:/ROFI/ML/Heart Disease/prediction.py", line 73, in predict
predicted_data = model.predict(x_test)File"C:\Users\User\Anaconda3\lib\site-packages\sklearn
aive_bayes.py", line 65, in predict
jll = self._joint_log_likelihood(X)File"C:\Users\User\Anaconda3\lib\site-packages\sklearn
aive_bayes.py", line 429, in _joint_log_likelihood
n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) /ValueError: operands could not be broadcast together with shapes (294,14) (15,)
这里怎么了?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | import pandas from sklearn import metrics from sklearn.preprocessing import Imputer from sklearn.naive_bayes import GaussianNB def load_data(feature_columns, predicted_column): train_data_frame = pandas.read_excel("training_data.xlsx") test_data_frame = pandas.read_excel("testing_data.xlsx") data_frame = pandas.read_excel("data_set.xlsx") x_train = train_data_frame[feature_columns].values y_train = train_data_frame[predicted_column].values x_test = test_data_frame[feature_columns].values y_test = test_data_frame[predicted_column].values x_train, x_test = impute(x_train, x_test) return x_train, y_train, x_test, y_test def impute(x_train, x_test): fill_missing = Imputer(missing_values=-9, strategy="mean", axis=0) x_train = fill_missing.fit_transform(x_train) x_test = fill_missing.fit_transform(x_test) return x_train, x_test def predict(x_train, y_train, x_test, y_test): model = GaussianNB() model.fit(x_train, y_train.ravel()) predicted_data = model.predict(x_test) accuracy = metrics.accuracy_score(y_test, predicted_data) print("Accuracy of our naive bayes model is : %.2f"%(accuracy * 100)) return predicted_data feature_columns = ["age","sex","chol","cigs","years","fbs","trestbps","restecg","thalach","exang","oldpeak","slope","ca","thal","num"] predicted_column = ["cp"] x_train, y_train, x_test, y_test = load_data(feature_columns, predicted_column) predict(x_train, y_train, x_test, y_test) |
注意:两个文件的列数相同。
我找到了虫子。该错误是由于