Python, OpenCV: classify gender using ORB features and KNN
任务:将人脸图像分为女性和男性。提供带标签的训练图像,从网络摄像机获取测试图像。
使用:python 2.7,opencv 2.4.4
我正在使用ORB从灰度图像中提取特征,我希望用它来训练K-最近邻分类器。每个训练图像都是不同的人,因此每个图像的关键点和描述符的数量明显不同。我的问题是我无法理解knn和orb的opencv文档。我看过其他关于圆球,克恩和弗兰恩的问题,但没什么帮助。
ORB给出的描述符的性质是什么?它与通过简短、浏览、筛选等获得的描述符有什么不同?
在KNN中,每个训练样本的特征描述符的大小应该相同。如何确保每个图像的描述符大小相同?更一般地说,应以何种格式向KNN提供特征,以便使用给定的数据和标签进行培训?数据应该是int还是float?它可以是炭吗?
培训数据可以在这里找到。
我也在使用来自opencv样本的
目前,KNN模型只提供了10张图片,以供培训,看我的程序是否通过而没有错误,但事实并非如此。
这是我的代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | import cv2 from numpy import float32 as np.float32 def chooseCascade(): # TODO: Option for diferent cascades # HAAR Classifier for frontal face _cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml') return _cascade def cropToObj(cascade,imageFile): # Load as 1-channel grayscale image image = cv2.imread(imageFile,0) # Crop to the object of interest in the image objRegion = cascade.detectMultiScale(image) # TODO: What if multiple ojbects in image? x1 = objRegion[0,0] y1 = objRegion[0,1] x1PlusWidth = objRegion[0,0]+objRegion[0,2] y1PlusHeight = objRegion[0,1]+objRegion[0,3] _objImage = image[y1:y1PlusHeight,x1:x1PlusWidth] return _objImage def recognizer(fileNames): # ORB contructor orb = cv2.ORB(nfeatures=100) keyPoints = [] descriptors = [] # A cascade for face detection haarFaceCascade = chooseCascade() # Start processing images for imageFile in fileNames: # Find faces using the HAAR cascade faceImage = cropToObj(haarFaceCascade,imageFile) # Extract keypoints and description faceKeyPoints, faceDescriptors = orb.detectAndCompute(faceImage, mask = None) #print faceDescriptors.shape descRow = faceDescriptors.shape[0] descCol = faceDescriptors.shape[1] flatFaceDescriptors = faceDescriptors.reshape(descRow*descCol).astype(np.float32) keyPoints.append(faceKeyPoints) descriptors.append(flatFaceDescriptors) print descriptors # KNN model and training on descriptors responses = [] for name in fileNames: if name.startswith('BF'): responses.append(0) # Female else: responses.append(1) # Male knn = cv2.KNearest() knnTrainSuccess = knn.train(descriptors, responses, isRegression = False) # isRegression = false, implies classification # Obtain test face image from cam capture = cv2.VideoCapture(0) closeCamera = -1 while(closeCamera < 0): _retval, _camImage = capture.retrieve() # Find face in camera image testFaceImage = haarFaceCascade.detectMultiScale(_camImage) # TODO: What if multiple faces? # Keyponts and descriptors of test face image testFaceKP, testFaceDesc = orb.detectAndCompute(testFaceImage, mask = None) testDescRow = testFaceDesc.shape[0] flatTestFaceDesc = testFaceDesc.reshape(1,testDescRow*testDescCol).astype(np.float32) # Args in knn.find_nearest: testData, neighborhood returnedValue, result, neighborResponse, distance = knn.find_nearest(flatTestFaceDesc,3) print returnedValue, result, neighborResponse, distance # Display results # TODO: Overlay classification text cv2.imshow("testImage", _camImage) closeCamera = cv2.waitKey(1) cv2.destroyAllWindows() if __name__ == '__main__': fileNames = ['BF09NES_gray.jpg', 'BF11NES_gray.jpg', 'BF13NES_gray.jpg', 'BF14NES_gray.jpg', 'BF18NES_gray.jpg', 'BM25NES_gray.jpg', 'BM26NES_gray.jpg', 'BM29NES_gray.jpg', 'BM31NES_gray.jpg', 'BM34NES_gray.jpg'] recognizer(fileNames) |
目前,我在使用
另外,这种方法是完全错误的吗?我应该用其他的方法来进行性别分类吗?我对opencv facerec演示中的fisherface和eigenface示例不满意,所以请不要直接告诉我这些。
任何其他帮助都非常感谢。谢谢。
---编辑---
我试过几件事,想出了一个答案。
我仍然希望社区中有人能通过提出一个想法来帮助我,这样我就不必把事情硬编码到我的解决方案中。我还怀疑knn.match_nearest()没有做我需要它做的事情。
正如预期的那样,识别器一点也不准确,而且由于旋转、照明等原因很容易出现错误分类。任何关于改进这种方法的建议都会受到赞赏。
我用来训练的数据库是:卡罗琳斯卡指导的情感面孔
以前,我一直在努力寻找ORB、SIFT、SURF等的技术差异,我发现这些帖子很有帮助:
- https://stackoverflow.com/a/10169025/1463143
- 对于比例不变的特征提取,是否有快速的冲浪和筛选选择?
- OpenCv ORB功能探测器是如何工作的?
需要注意的是,OpenCV中的这些特征检测算法需要一个单通道(通常为8位)灰度图像。
结果表明,
因此,在增加一个描述符列表之后,我将该列表转换为一个数组。
但是!在此之前,我将orb
下一个挑战是使用
1 | OpenCV Error: Bad argument (Input samples must be floating-point matrix (<num_samples>x<var_count>)) in find_nearest |
即使您有一个需要传递给
所以我不得不用一种粗略的方法来检查我的网络摄像头拍摄的图像是否在我解决这个问题的硬编码方法中可用。
现在代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | import cv2 import numpy as np def chooseCascade(): # TODO: Option for diferent cascades # HAAR Classifier for frontal face _cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml') return _cascade def cropToObj(cascade,imageFile,flag): if flag == 0: # Load as 1-channel grayscale image image = cv2.imread(imageFile,0) elif flag == 1: # Load as 3-channel color image image = cv2.imread(imageFile,1) elif flag == -1: # Load image as is image = cv2.imread(imageFile,-1) elif flag == 2: # Image is from camera image = imageFile else: print 'improper arguments passed to cropToObj' # Crop to the object of interest in the image objRegion = cascade.detectMultiScale(image) # TODO: What if multiple ojbects in image? x1 = objRegion[0,0] y1 = objRegion[0,1] x1PlusWidth = objRegion[0,0]+objRegion[0,2] y1PlusHeight = objRegion[0,1]+objRegion[0,3] objImage = image[y1:y1PlusHeight,x1:x1PlusWidth] return objImage def recognizer(fileNames): # ORB contructor orb = cv2.ORB(nfeatures=25) keyPoints = [] descriptors = [] # A cascade for face detection haarFaceCascade = chooseCascade() # Start processing images for imageFile in fileNames: # Find faces using the HAAR cascade faceImage = cropToObj(haarFaceCascade,imageFile,flag) # Extract keypoints and description faceKeyPoints, faceDescriptors = orb.detectAndCompute(faceImage, mask = None) #print faceDescriptors.shape descRow = faceDescriptors.shape[0] descCol = faceDescriptors.shape[1] flatFaceDescriptors = faceDescriptors.reshape(descRow*descCol) keyPoints.append(faceKeyPoints) descriptors.append(flatFaceDescriptors) descriptors = np.asarray(descriptors, dtype=np.float32) # KNN model and training on descriptors responses = [] for name in fileNames: if name.startswith('BF'): responses.append(0) # Female else: responses.append(1) # Male responses = np.asarray(responses) knn = cv2.KNearest() knnTrainSuccess = knn.train(descriptors, responses, isRegression = False) # isRegression = false, implies classification # Obtain test face image from cam capture = cv2.VideoCapture(0) closeCamera = -1 while(closeCamera < 0): retval, camImage = capture.read() # Find face in camera image try: testFaceImage = cropToObj(haarFaceCascade, camImage, 2) # TODO: What if multiple faces? testFaceImage = cv2.cvtColor(testFaceImage, cv2.COLOR_BGR2GRAY) except TypeError: print 'check if front face is visible to camera' pass # Keyponts and descriptors of test face image testFaceKP, testFaceDesc = orb.detectAndCompute(testFaceImage, mask = None) testDescRow = testFaceDesc.shape[0] testDescCol = testFaceDesc.shape[1] flatTestFaceDesc = testFaceDesc.reshape(1,testDescRow*testDescCol) flatTestFaceDesc = np.asarray(flatTestFaceDesc,dtype=np.float32) if flatTestFaceDesc.size == 800: # Args in knn.find_nearest: testData, neighborhood returnedValue, result, neighborResponse, distance = knn.find_nearest(flatTestFaceDesc,5) if returnedValue == 0.0: print 'Female' else: print 'Male' else: print 'insufficient size of image' # Display results # TODO: Overlay classification text cv2.imshow("testImage", camImage) closeCamera = cv2.waitKey(1) cv2.destroyAllWindows() if __name__ == '__main__': fileNames = ['BF09NES_gray.jpg', 'BF11NES_gray.jpg', 'BF13NES_gray.jpg', 'BF14NES_gray.jpg', 'BF18NES_gray.jpg', 'BM25NES_gray.jpg', 'BM26NES_gray.jpg', 'BM29NES_gray.jpg', 'BM31NES_gray.jpg', 'BM34NES_gray.jpg'] recognizer(fileNames) |
我仍然希望社区中有人能通过提出一个想法来帮助我,这样我就不必把事情硬编码到我的解决方案中。我还怀疑knn.match_nearest()没有做我需要它做的事情。
正如预期的那样,识别器一点也不准确,而且很容易由于旋转、照明等原因导致错误分类。任何关于改进这种方法的建议都会受到赞赏。
我对所述方法的有效性/可操作性有一些疑问。这是另一种你可能需要考虑的方法。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | import cv2 import numpy as np import os def feaCnt(): mat = np.zeros((400,400,3),dtype=np.uint8) ret = extr(mat) return len(ret) def extr(img): return sobel(img) def sobel(img): gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) klr = [[-1,0,1],[-2,0,2],[-1,0,1]] kbt = [[1,2,1],[0,0,0],[-1,-2,-1]] ktb = [[-1,-2,-1],[0,0,0],[1,2,1]] krl = [[1,0,-1],[2,0,-2],[1,0,-1]] kd1 = [[0,1,2],[-1,0,1],[-2,-1,0]] kd2 = [[-2,-1,0],[-1,0,1],[0,1,2]] kd3 = [[0,-1,-2],[1,0,-1],[2,1,0]] kd4 = [[2,1,0],[1,0,-1],[0,-1,-2]] karr = np.asanyarray([ klr, kbt, ktb, krl, kd1, kd2, kd3, kd4 ]) gray=cv2.resize(gray,(40,40)) res = np.float32([cv2.resize(cv2.filter2D(gray, -1,k),(15,15)) for k in karr]) return res.flatten() root = 'C:/data/gen' model='c:/data/models/svm/gen.xml' imgs = [] idx =0 for path, subdirs, files in os.walk(root): for name in files: p =path[len(root):].split('\') p.remove('') lbl = p[0] fpath = os.path.join(path, name) imgs.append((fpath,int(lbl))) idx+=1 samples = np.zeros((len(imgs),feaCnt()),dtype = np.float32) labels = np.zeros(len(imgs),dtype = np.float32) i=0. for f,l in imgs: print i img = cv2.imread(f) samples[i]=extr(img) labels[i]=l i+=1 svm = cv2.SVM() svmparams = dict( kernel_type = cv2.SVM_POLY, svm_type = cv2.SVM_C_SVC, degree=3.43, gamma=1.5e-4, coef0=1e-1, ) print 'svm train' svm.train(samples,labels,params=svmparams) svm.save(model) print 'done' result = np.float32( [(svm.predict(s)) for s in samples]) correct=0. total=0. for i,j in zip(result,labels): total+=1 if i==j: correct+=1 print '%f'%(correct/total) |