关于python:使用OpenCV 2.4.11打开图像以用作sklearn的数据集

Opening images with OpenCV 2.4.11 to use as a dataset for sklearn

我目前正在进行以下工作:

  • Python2.7
  • 打开cv 2.4.11
  • 学习0.16.1

我正在使用下面的教程。

我的目标是加载自己的数据集,而不是使用预定义的数据集。我正在尝试通过以下操作来实现这一点:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import numpy as np
import cv2
import os
from matplotlib import pyplot as plt
from os import listdir
from os.path import isfile, join
from sklearn import datasets
from sklearn import svm

digits = datasets.load_digits()

imageFolderPath ='C:/PathToFolderContainingMyImages/'

# Getting all the paths for each image
individualImagePaths = [ imageFolderPath + f for f in listdir(imageFolderPath) if isfile(join(imageFolderPath,f))]
individualImagePaths = sorted(individualImagePaths)

logos = []
logoLabels = []

for x in individualImagePaths:
    filename = os.path.basename(x).split("")
    filename = filename[0]

    logos.append(np.array(cv2.imread(x,0)))
    logoLabels.append(filename)

logos = np.asarray(logos)
logoLabels = np.asarray(logoLabels)

print type(logos)
print type(logoLabels)
print logos[0]
print logoLabels[0]

print type(digits.images)
print type(digits.target)
print digits.images[0]
print digits.target[0]

clf = svm.SVC(gamma=0.001, C=100.)
clf.fit(logos[:-1], logoLabels[:-1])

运行此脚本时出现以下错误:

1
2
array = np.array(array, dtype=dtype, order=order, copy=copy)
ValueError: setting an array element with a sequence.

我的图像命名为:

"1 (1).png" for a image that is of a 1 digit

"2 (1).png" for a image that is of a 2 digit

"2 (2).png" for a image that is of a 2 digit

1
2
3
4
print type(logos)
print type(logoLabels)
print logos[0]
print logoLabels[0]

返回:

1
2
3
4
5
6
7
8
9
10
<type 'numpy.ndarray'>
<type 'numpy.ndarray'>
[[255 255 255 ..., 255 255 255]
 [255 255 255 ..., 255 255 255]
 [255 255 255 ..., 255 255 255]
 ...,
 [255 255 255 ..., 255 255 255]
 [255 255 255 ..., 255 255 255]
 [255 255 255 ..., 255 255 255]]
0

1
2
3
4
print type(digits.images)
print type(digits.target)
print digits.images[0]
print digits.target[0]

返回:

1
2
3
4
5
6
7
8
9
10
11
<type 'numpy.ndarray'>
<type 'numpy.ndarray'>
[[  0.   0.   5.  13.   9.   1.   0.   0.]
 [  0.   0.  13.  15.  10.  15.   5.   0.]
 [  0.   3.  15.   2.   0.  11.   8.   0.]
 [  0.   4.  12.   0.   0.   8.   8.   0.]
 [  0.   5.   8.   0.   0.   9.   8.   0.]
 [  0.   4.  11.   0.   1.  12.   7.   0.]
 [  0.   2.  14.   5.  10.  12.   0.   0.]
 [  0.   0.   6.  13.  10.   0.   0.   0.]]
0

关于如何创建/加载我的owndaa集并使用sklearn fit函数的数据集有什么想法吗?


如果其他人看到这一点并遇到问题,我会向后工作,将数字数据集保存到单个图像中,重新加载图像,训练分类器,然后预测图像。

我最终没有使用opencv,如下面的代码所示:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import matplotlib.pyplot as plt
import numpy as np
import scipy
from sklearn import datasets, svm, metrics
import uuid
import os
from os import listdir
from os.path import isfile, join

def scipySaveImage(path,name,image):
    scipy.misc.imsave(path + name,image)

def scipyLoadImage(path,flatten=0):
        return scipy.misc.imread(path,flatten)

def saveAsUniqueImage(path,image,target):
        # Defining our parts
        target = str(target)
        unique = str(uuid.uuid1())
        extension = '.png'
        name = target + '-' + unique + extension
        scipySaveImage(path,name,image)

def saveDataSet(path,dataset):
    for i,image in enumerate(dataset.images):
        target = dataset.target[i]
        saveAsUniqueImage(path,image,target)

def predict(classifier,data):
        return classifier.predict(data[:len(data)])

def shape_data(data):
        n_samples = len(data)
        return data.reshape((n_samples, -1))

def train_classifer(data):
        n_samples = len(data)
        data = shape_data(data)
        classifier = svm.SVC(gamma=0.001)
        classifier.fit(data[:n_samples], digits.target[:n_samples])
        return classifier

# A Dataset Object
class dataset:
        def __init_():
                return

def loadTestDataset(path):
        data = dataset()
        targets = []
        filenames = []
        images = []

        imagePaths = [ path + f for f in listdir(path) if isfile(join(path,f))]
        imagePaths = sorted(imagePaths)

        for x in imagePaths:
                filename = os.path.basename(x)
                target = filename.split("-")
                target = target[0]
                target = int(target)

                targets.append(target)
                filenames.append(filename)
                images.append(scipy.misc.imread(x,1))

        data.target = np.asarray(targets)
        data.images = np.asarray(images)
        data.filenames = filenames

        return data

# The folder where my digit images will go in
training_path = 'Digits/'

# Saving the images in the digit dataset into the Digit folder
# Comment out this line if you already have the digits
saveDataSet(training_path,datasets.load_digits())

# Loading all the images from a folder into a dataset
digits = loadTestDataset(training_path)

# Reloads the images from our training folder
test_digits = loadTestDataset(training_path)

# Shaping the images, I believe this just makes the rows 1 pixel so an 8x8 image will now be 64x1
test_digits.images = shape_data(test_digits.images)

# Training our classifer so it knows how to classify digits
digits_model = train_classifer(digits.images)

# The target that our model thinks is being represented
prediction = predict(digits_model,test_digits.images[300])

# Printing the filename ( which includes the target ) and our models prediction
print test_digits.filenames[300]
print prediction