网上没找到DBSCAN的C语言实现,基本都是python或者C++实现,由于项目需求C实现,自己对C也不熟悉,所以先简单写了一个,暂时能跑通,等后面再优化吧,还不知道是否有bug之类的
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | #include <stdio.h> #include <stdlib.h> #include <math.h> #include <string.h> void normalization_points(float point[][3], int rows) { //聚类前对数据做最大最小值归一化 保证每个维度量纲一致 // points -- 原始数据点云 // rows -- 传入参数的行数 // 聚类 只关注[x, y, v] //归一化最值范围 float x_min = -20; //单位 m float x_max = 20; float y_min = 0; float y_max = 50; float v_max = 20; //单位 m/s for (int i=0; i<rows; i++) { point[i][0] = (point[i][0] - x_min) / (x_max - x_min); point[i][1] = (point[i][1] - y_min) / (y_max - y_min); point[i][2] = fabsf(point[i][2]/ v_max); } } float cal_distance(float point[][3], int firstPoint, int secondPoint) { float temp = sqrt( pow((double)(point[firstPoint][0] - point[secondPoint][0]), 2) + pow((double)(point[firstPoint][1] - point[secondPoint][1]), 2) + pow((double)(point[firstPoint][2] - point[secondPoint][2]), 2)); return temp; } void dbscan_cluster(float point[][3], int point_num, int MinPts, float neighborhood) { //预分配内存 int *clusterIndex = (int*)malloc(sizeof(int) * point_num); //聚类蔟标签 int *dataType = (int*)malloc(sizeof(int) * point_num); //区分是否为核心点 int *visited_data = (int*)malloc(sizeof(int) * point_num); //访问标志,访问过的为1,未访问的为0 memset(clusterIndex, 0, sizeof(int) * point_num); memset(dataType, 0, sizeof(int) * point_num); memset(visited_data, 0, sizeof(int) * point_num); /* 求核心点 */ for(int i = 0; i<point_num; i++) { int inter_point_num = 0; for(int j = 0; j<point_num; j++) { float distance = cal_distance(point, i, j); if(distance <= neighborhood) { inter_point_num++; } } if(inter_point_num >=MinPts) //点数大于MinPts 为核心点 { dataType[i] = 2; //核心点的标签为2 } } /* 由核心点依次找密度可达点 */ int cluster_num = 0; for(int i=0; i<point_num; i++) { if((dataType[i]==2) && (!visited_data[i]))//若为核心点且还未分配类别,则由核心点查找密度可达点 { visited_data[1] = 0;// int *record_single_cluster = (int*)malloc(sizeof(int) * point_num); //存储可达点索引 int length = 0; memset(record_single_cluster, 0, sizeof(int) * point_num); //遍历查找密度可达点 for(int j=0; j<point_num; j++) { if(!visited_data[j]) { float temp = cal_distance(point, i, j); if(temp < neighborhood) { record_single_cluster[length] = j;//记录当前序号 length++; } } } //若length > minPts,存为一个类别 if(length > MinPts) { cluster_num++;//类别+1 for(int j=0; j<length; j++) { visited_data[record_single_cluster[j]] = 1; clusterIndex[record_single_cluster[j]] = cluster_num; } } free(record_single_cluster); record_single_cluster = NULL; } } for(int i=0; i<point_num; i++) { printf("%d\n", clusterIndex[i]); } /* 聚类完毕 */ /* cluster_num为类别总数,clusterIndex存储的为每个点的聚类类别,0为噪声 */ free(clusterIndex); clusterIndex = NULL; free(dataType); dataType = NULL; free(visited_data); visited_data = NULL; } int main() { float point[13][3] = {<!-- -->{1,1,1},{1.1,1.1,1.1},{1,1.2,1.1},{15,15.1,18.1},{15.2,15.5,17.9},{1.3,1.1,1},{1.01,1.02,1.03},{5,5,5},{10.1,9.1,11.1},{5.01,5.02,5.03},{5.02,5.05,5.01},{5.02,5.01,5.05},{10,9,11}}; int point_num = 13; normalization_points(point, point_num); int MinPts = 3; float neighborhood = 0.1; dbscan_cluster(point, point_num, MinPts, neighborhood); return 0; } |