Opening images with OpenCV 2.4.11 to use as a dataset for sklearn
我目前正在进行以下工作:
- Python2.7
- 打开cv 2.4.11
- 学习0.16.1
我正在使用下面的教程。
我的目标是加载自己的数据集,而不是使用预定义的数据集。我正在尝试通过以下操作来实现这一点:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | import numpy as np import cv2 import os from matplotlib import pyplot as plt from os import listdir from os.path import isfile, join from sklearn import datasets from sklearn import svm digits = datasets.load_digits() imageFolderPath ='C:/PathToFolderContainingMyImages/' # Getting all the paths for each image individualImagePaths = [ imageFolderPath + f for f in listdir(imageFolderPath) if isfile(join(imageFolderPath,f))] individualImagePaths = sorted(individualImagePaths) logos = [] logoLabels = [] for x in individualImagePaths: filename = os.path.basename(x).split("") filename = filename[0] logos.append(np.array(cv2.imread(x,0))) logoLabels.append(filename) logos = np.asarray(logos) logoLabels = np.asarray(logoLabels) print type(logos) print type(logoLabels) print logos[0] print logoLabels[0] print type(digits.images) print type(digits.target) print digits.images[0] print digits.target[0] clf = svm.SVC(gamma=0.001, C=100.) clf.fit(logos[:-1], logoLabels[:-1]) |
运行此脚本时出现以下错误:
1 2 | array = np.array(array, dtype=dtype, order=order, copy=copy) ValueError: setting an array element with a sequence. |
号
我的图像命名为:
"1 (1).png" for a image that is of a 1 digit
"2 (1).png" for a image that is of a 2 digit
"2 (2).png" for a image that is of a 2 digit
号
1 2 3 4 | print type(logos) print type(logoLabels) print logos[0] print logoLabels[0] |
返回:
1 2 3 4 5 6 7 8 9 10 | <type 'numpy.ndarray'> <type 'numpy.ndarray'> [[255 255 255 ..., 255 255 255] [255 255 255 ..., 255 255 255] [255 255 255 ..., 255 255 255] ..., [255 255 255 ..., 255 255 255] [255 255 255 ..., 255 255 255] [255 255 255 ..., 255 255 255]] 0 |
。
1 2 3 4 | print type(digits.images) print type(digits.target) print digits.images[0] print digits.target[0] |
返回:
1 2 3 4 5 6 7 8 9 10 11 | <type 'numpy.ndarray'> <type 'numpy.ndarray'> [[ 0. 0. 5. 13. 9. 1. 0. 0.] [ 0. 0. 13. 15. 10. 15. 5. 0.] [ 0. 3. 15. 2. 0. 11. 8. 0.] [ 0. 4. 12. 0. 0. 8. 8. 0.] [ 0. 5. 8. 0. 0. 9. 8. 0.] [ 0. 4. 11. 0. 1. 12. 7. 0.] [ 0. 2. 14. 5. 10. 12. 0. 0.] [ 0. 0. 6. 13. 10. 0. 0. 0.]] 0 |
。
关于如何创建/加载我的owndaa集并使用sklearn
如果其他人看到这一点并遇到问题,我会向后工作,将数字数据集保存到单个图像中,重新加载图像,训练分类器,然后预测图像。
我最终没有使用opencv,如下面的代码所示:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | import matplotlib.pyplot as plt import numpy as np import scipy from sklearn import datasets, svm, metrics import uuid import os from os import listdir from os.path import isfile, join def scipySaveImage(path,name,image): scipy.misc.imsave(path + name,image) def scipyLoadImage(path,flatten=0): return scipy.misc.imread(path,flatten) def saveAsUniqueImage(path,image,target): # Defining our parts target = str(target) unique = str(uuid.uuid1()) extension = '.png' name = target + '-' + unique + extension scipySaveImage(path,name,image) def saveDataSet(path,dataset): for i,image in enumerate(dataset.images): target = dataset.target[i] saveAsUniqueImage(path,image,target) def predict(classifier,data): return classifier.predict(data[:len(data)]) def shape_data(data): n_samples = len(data) return data.reshape((n_samples, -1)) def train_classifer(data): n_samples = len(data) data = shape_data(data) classifier = svm.SVC(gamma=0.001) classifier.fit(data[:n_samples], digits.target[:n_samples]) return classifier # A Dataset Object class dataset: def __init_(): return def loadTestDataset(path): data = dataset() targets = [] filenames = [] images = [] imagePaths = [ path + f for f in listdir(path) if isfile(join(path,f))] imagePaths = sorted(imagePaths) for x in imagePaths: filename = os.path.basename(x) target = filename.split("-") target = target[0] target = int(target) targets.append(target) filenames.append(filename) images.append(scipy.misc.imread(x,1)) data.target = np.asarray(targets) data.images = np.asarray(images) data.filenames = filenames return data # The folder where my digit images will go in training_path = 'Digits/' # Saving the images in the digit dataset into the Digit folder # Comment out this line if you already have the digits saveDataSet(training_path,datasets.load_digits()) # Loading all the images from a folder into a dataset digits = loadTestDataset(training_path) # Reloads the images from our training folder test_digits = loadTestDataset(training_path) # Shaping the images, I believe this just makes the rows 1 pixel so an 8x8 image will now be 64x1 test_digits.images = shape_data(test_digits.images) # Training our classifer so it knows how to classify digits digits_model = train_classifer(digits.images) # The target that our model thinks is being represented prediction = predict(digits_model,test_digits.images[300]) # Printing the filename ( which includes the target ) and our models prediction print test_digits.filenames[300] print prediction |