How to recursively list directories in C on Linux?
我需要递归列出C编程中的所有目录和文件。 我已经研究过FTW但是我没有使用这两种操作系统(Fedora和Minix)。 从过去几个小时里读到的所有不同的东西开始,我开始感到头疼。
如果有人知道我可以看到的代码片段,那将是惊人的,或者如果有人能给我一个很好的指导,我将非常感激。
为什么每个人都坚持一次又一次地重新发明轮子?
POSIX.1-2008标准化了
基于Na?ve
例如,请考虑以下C程序,该程序列出从当前工作目录开始的目录树,或命令行中命名的每个目录,或者只是命令行中命名的文件:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | /* We want POSIX.1-2008 + XSI, i.e. SuSv4, features */ #define _XOPEN_SOURCE 700 /* Added on 2017-06-25: If the C library can support 64-bit file sizes and offsets, using the standard names, these defines tell the C library to do so. */ #define _LARGEFILE64_SOURCE #define _FILE_OFFSET_BITS 64 #include <stdlib.h> #include <unistd.h> #include <ftw.h> #include <time.h> #include <stdio.h> #include <string.h> #include <errno.h> /* POSIX.1 says each process has at least 20 file descriptors. * Three of those belong to the standard streams. * Here, we use a conservative estimate of 15 available; * assuming we use at most two for other uses in this program, * we should never run into any problems. * Most trees are shallower than that, so it is efficient. * Deeper trees are traversed fine, just a bit slower. * (Linux allows typically hundreds to thousands of open files, * so you'll probably never see any issues even if you used * a much higher value, say a couple of hundred, but * 15 is a safe, reasonable value.) */ #ifndef USE_FDS #define USE_FDS 15 #endif int print_entry(const char *filepath, const struct stat *info, const int typeflag, struct FTW *pathinfo) { /* const char *const filename = filepath + pathinfo->base; */ const double bytes = (double)info->st_size; /* Not exact if large! */ struct tm mtime; localtime_r(&(info->st_mtime), &mtime); printf("%04d-%02d-%02d %02d:%02d:%02d", mtime.tm_year+1900, mtime.tm_mon+1, mtime.tm_mday, mtime.tm_hour, mtime.tm_min, mtime.tm_sec); if (bytes >= 1099511627776.0) printf(" %9.3f TiB", bytes / 1099511627776.0); else if (bytes >= 1073741824.0) printf(" %9.3f GiB", bytes / 1073741824.0); else if (bytes >= 1048576.0) printf(" %9.3f MiB", bytes / 1048576.0); else if (bytes >= 1024.0) printf(" %9.3f KiB", bytes / 1024.0); else printf(" %9.0f B ", bytes); if (typeflag == FTW_SL) { char *target; size_t maxlen = 1023; ssize_t len; while (1) { target = malloc(maxlen + 1); if (target == NULL) return ENOMEM; len = readlink(filepath, target, maxlen); if (len == (ssize_t)-1) { const int saved_errno = errno; free(target); return saved_errno; } if (len >= (ssize_t)maxlen) { free(target); maxlen += 1024; continue; } target[len] = '\0'; break; } printf(" %s -> %s ", filepath, target); free(target); } else if (typeflag == FTW_SLN) printf(" %s (dangling symlink) ", filepath); else if (typeflag == FTW_F) printf(" %s ", filepath); else if (typeflag == FTW_D || typeflag == FTW_DP) printf(" %s/ ", filepath); else if (typeflag == FTW_DNR) printf(" %s/ (unreadable) ", filepath); else printf(" %s (unknown) ", filepath); return 0; } int print_directory_tree(const char *const dirpath) { int result; /* Invalid directory path? */ if (dirpath == NULL || *dirpath == '\0') return errno = EINVAL; result = nftw(dirpath, print_entry, USE_FDS, FTW_PHYS); if (result >= 0) errno = result; return errno; } int main(int argc, char *argv[]) { int arg; if (argc < 2) { if (print_directory_tree(".")) { fprintf(stderr,"%s. ", strerror(errno)); return EXIT_FAILURE; } } else { for (arg = 1; arg < argc; arg++) { if (print_directory_tree(argv[arg])) { fprintf(stderr,"%s. ", strerror(errno)); return EXIT_FAILURE; } } } return EXIT_SUCCESS; } |
上面的大部分代码都在
上面唯一的手工波形细节是决定应该让
在Linux中,您可以使用
在评论中,Ruslan提到他们必须切换到
这是一个递归版本:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | #include <unistd.h> #include <sys/types.h> #include <dirent.h> #include <stdio.h> #include <string.h> void listdir(const char *name, int indent) { DIR *dir; struct dirent *entry; if (!(dir = opendir(name))) return; while ((entry = readdir(dir)) != NULL) { if (entry->d_type == DT_DIR) { char path[1024]; if (strcmp(entry->d_name,".") == 0 || strcmp(entry->d_name,"..") == 0) continue; snprintf(path, sizeof(path),"%s/%s", name, entry->d_name); printf("%*s[%s] ", indent,"", entry->d_name); listdir(path, indent + 2); } else { printf("%*s- %s ", indent,"", entry->d_name); } } closedir(dir); } int main(void) { listdir(".", 0); return 0; } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | int is_directory_we_want_to_list(const char *parent, char *name) { struct stat st_buf; if (!strcmp(".", name) || !strcmp("..", name)) return 0; char *path = alloca(strlen(name) + strlen(parent) + 2); sprintf(path,"%s/%s", parent, name); stat(path, &st_buf); return S_ISDIR(st_buf.st_mode); } int list(const char *name) { DIR *dir = opendir(name); struct dirent *ent; while (ent = readdir(dir)) { char *entry_name = ent->d_name; printf("%s ", entry_name); if (is_directory_we_want_to_list(name, entry_name)) { // You can consider using alloca instead. char *next = malloc(strlen(name) + strlen(entry_name) + 2); sprintf(next,"%s/%s", name, entry_name); list(next); free(next); } } closedir(dir); } |
在这种情况下值得浏览的头文件:stat.h,dirent.h。请记住,上面的代码不会检查可能发生的任何错误。
ftw.h中定义的
正如我在评论中提到的,我认为这种任务有两个固有缺陷的递归方法。
第一个缺陷是打开文件的限制。此限制对深度遍历施加了限制。如果有足够的子文件夹,递归方法将会中断。 (请参阅有关堆栈溢出的编辑)
第二个缺陷更微妙。递归方法使得测试硬链接变得非常困难。如果文件夹树是循环的(由于硬链接),递归方法将中断(希望没有堆栈溢出)。 (请参阅关于硬链接的编辑)
但是,通过使用单个文件描述符和链接列表替换递归来避免这些问题非常简单。
我假设这不是一个学校项目,递归是可选的。
这是一个示例应用程序。
使用
我为宏和东西道歉...我通常使用内联函数,但我认为如果它只是在一个函数中,它将更容易遵循代码。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | #include <dirent.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> int main(int argc, char const *argv[]) { /* print use instruction unless a folder name was given */ if (argc < 2) fprintf(stderr, " use: " " %s <directory> " "for example: " " %s ./ ", argv[0], argv[0]), exit(0); /*************** a small linked list macro implementation ***************/ typedef struct list_s { struct list_s *next; struct list_s *prev; } list_s; #define LIST_INIT(name) \ { .next = &name, .prev = &name } #define LIST_PUSH(dest, node) \ do { \ (node)->next = (dest)->next; \ (node)->prev = (dest); \ (node)->next->prev = (node); \ (dest)->next = (node); \ } while (0); #define LIST_POP(list, var) \ if ((list)->next == (list)) { \ var = NULL; \ } else { \ var = (list)->next; \ (list)->next = var->next; \ var->next->prev = var->prev; \ } /*************** a record (file / folder) item type ***************/ typedef struct record_s { /* this is a flat processing queue. */ list_s queue; /* this will list all queued and processed folders (cyclic protection) */ list_s folders; /* this will list all the completed items (siblings and such) */ list_s list; /* unique ID */ ino_t ino; /* name length */ size_t len; /* name string */ char name[]; } record_s; /* take a list_s pointer and convert it to the record_s pointer */ #define NODE2RECORD(node, list_name) \ ((record_s *)(((uintptr_t)(node)) - \ ((uintptr_t) & ((record_s *)0)->list_name))) /* initializes a new record */ #define RECORD_INIT(name) \ (record_s){.queue = LIST_INIT((name).queue), \ .folders = LIST_INIT((name).folders), \ .list = LIST_INIT((name).list)} /*************** the actual code ***************/ record_s records = RECORD_INIT(records); record_s *pos, *item; list_s *tmp; DIR *dir; struct dirent *entry; /* initialize the root folder record and add it to the queue */ pos = malloc(sizeof(*pos) + strlen(argv[1]) + 2); *pos = RECORD_INIT(*pos); pos->len = strlen(argv[1]); memcpy(pos->name, argv[1], pos->len); if (pos->name[pos->len - 1] != '/') pos->name[pos->len++] = '/'; pos->name[pos->len] = 0; /* push to queue, but also push to list (first item processed) */ LIST_PUSH(&records.queue, &pos->queue); LIST_PUSH(&records.list, &pos->list); /* as long as the queue has items to be processed, do so */ while (records.queue.next != &records.queue) { /* pop queued item */ LIST_POP(&records.queue, tmp); /* collect record to process */ pos = NODE2RECORD(tmp, queue); /* add record to the processed folder list */ LIST_PUSH(&records.folders, &pos->folders); /* process the folder and add all folder data to current list */ dir = opendir(pos->name); if (!dir) continue; while ((entry = readdir(dir)) != NULL) { /* create new item, copying it's path data and unique ID */ item = malloc(sizeof(*item) + pos->len + entry->d_namlen + 2); *item = RECORD_INIT(*item); item->len = pos->len + entry->d_namlen; memcpy(item->name, pos->name, pos->len); memcpy(item->name + pos->len, entry->d_name, entry->d_namlen); item->name[item->len] = 0; item->ino = entry->d_ino; /* add item to the list, right after the `pos` item */ LIST_PUSH(&pos->list, &item->list); /* unless it's a folder, we're done. */ if (entry->d_type != DT_DIR) continue; /* test for '.' and '..' */ if (entry->d_name[0] == '.' && (entry->d_name[1] == 0 || (entry->d_name[1] == '.' && entry->d_name[2] == 0))) continue; /* add folder marker */ item->name[item->len++] = '/'; item->name[item->len] = 0; /* test for cyclic processing */ list_s *t = records.folders.next; while (t != &records.folders) { if (NODE2RECORD(t, folders)->ino == item->ino) { /* we already processed this folder! */ break; /* this breaks from the small loop... */ } t = t->next; } if (t != &records.folders) continue; /* if we broke from the small loop, entry is done */ /* item is a new folder, add to queue */ LIST_PUSH(&records.queue, &item->queue); } closedir(dir); } /*************** Printing the results and cleaning up ***************/ while (records.list.next != &records.list) { /* pop list item */ LIST_POP(&records.list, tmp); /* collect record to process */ pos = NODE2RECORD(tmp, list); /* prepare for next iteration */ LIST_POP(&records.list, tmp); fwrite(pos->name, pos->len, 1, stderr); fwrite(" ", 1, 1, stderr); free(pos); } return 0; } |
编辑
@Stargateur在评论中提到递归代码可能会在达到打开文件限制之前溢出堆栈。
虽然我没有看到堆栈溢出是如何更好的,但只要进程在调用时没有接近文件限制,这种评估可能是正确的。
@Stargateur在评论中提到的另一点是,递归代码的深度受到子目录的最大数量(ext4文件系统上的64000)的限制,并且硬链接极不可能(因为硬链接到文件夹不是在Linux / Unix上允许)。
如果代码在Linux上运行(根据问题,这是一个好消息),所以这个问题不是真正的问题(除非在macOS或Windows上运行代码)...尽管64K子文件夹在递归中可能会使堆栈大开。
话虽如此,无递归选项仍然具有优势,例如能够轻松地对处理的项目数量添加限制以及能够缓存结果。
附:
根据评论,这里是一个非递归版本的代码,不检查循环层次结构。它更快,应该足够安全,可以在不允许使用文件夹硬链接的Linux机器上使用。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | #include <dirent.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> int main(int argc, char const *argv[]) { /* print use instruction unless a folder name was given */ if (argc < 2) fprintf(stderr, " use: " " %s <directory> " "for example: " " %s ./ ", argv[0], argv[0]), exit(0); /*************** a small linked list macro implementation ***************/ typedef struct list_s { struct list_s *next; struct list_s *prev; } list_s; #define LIST_INIT(name) \ { .next = &name, .prev = &name } #define LIST_PUSH(dest, node) \ do { \ (node)->next = (dest)->next; \ (node)->prev = (dest); \ (node)->next->prev = (node); \ (dest)->next = (node); \ } while (0); #define LIST_POP(list, var) \ if ((list)->next == (list)) { \ var = NULL; \ } else { \ var = (list)->next; \ (list)->next = var->next; \ var->next->prev = var->prev; \ } /*************** a record (file / folder) item type ***************/ typedef struct record_s { /* this is a flat processing queue. */ list_s queue; /* this will list all the completed items (siblings and such) */ list_s list; /* unique ID */ ino_t ino; /* name length */ size_t len; /* name string */ char name[]; } record_s; /* take a list_s pointer and convert it to the record_s pointer */ #define NODE2RECORD(node, list_name) \ ((record_s *)(((uintptr_t)(node)) - \ ((uintptr_t) & ((record_s *)0)->list_name))) /* initializes a new record */ #define RECORD_INIT(name) \ (record_s){.queue = LIST_INIT((name).queue), .list = LIST_INIT((name).list)} /*************** the actual code ***************/ record_s records = RECORD_INIT(records); record_s *pos, *item; list_s *tmp; DIR *dir; struct dirent *entry; /* initialize the root folder record and add it to the queue */ pos = malloc(sizeof(*pos) + strlen(argv[1]) + 2); *pos = RECORD_INIT(*pos); pos->len = strlen(argv[1]); memcpy(pos->name, argv[1], pos->len); if (pos->name[pos->len - 1] != '/') pos->name[pos->len++] = '/'; pos->name[pos->len] = 0; /* push to queue, but also push to list (first item processed) */ LIST_PUSH(&records.queue, &pos->queue); LIST_PUSH(&records.list, &pos->list); /* as long as the queue has items to be processed, do so */ while (records.queue.next != &records.queue) { /* pop queued item */ LIST_POP(&records.queue, tmp); /* collect record to process */ pos = NODE2RECORD(tmp, queue); /* process the folder and add all folder data to current list */ dir = opendir(pos->name); if (!dir) continue; while ((entry = readdir(dir)) != NULL) { /* create new item, copying it's path data and unique ID */ item = malloc(sizeof(*item) + pos->len + entry->d_namlen + 2); *item = RECORD_INIT(*item); item->len = pos->len + entry->d_namlen; memcpy(item->name, pos->name, pos->len); memcpy(item->name + pos->len, entry->d_name, entry->d_namlen); item->name[item->len] = 0; item->ino = entry->d_ino; /* add item to the list, right after the `pos` item */ LIST_PUSH(&pos->list, &item->list); /* unless it's a folder, we're done. */ if (entry->d_type != DT_DIR) continue; /* test for '.' and '..' */ if (entry->d_name[0] == '.' && (entry->d_name[1] == 0 || (entry->d_name[1] == '.' && entry->d_name[2] == 0))) continue; /* add folder marker */ item->name[item->len++] = '/'; item->name[item->len] = 0; /* item is a new folder, add to queue */ LIST_PUSH(&records.queue, &item->queue); } closedir(dir); } /*************** Printing the results and cleaning up ***************/ while (records.list.next != &records.list) { /* pop list item */ LIST_POP(&records.list, tmp); /* collect record to process */ pos = NODE2RECORD(tmp, list); /* prepare for next iteration */ LIST_POP(&records.list, tmp); fwrite(pos->name, pos->len, 1, stderr); fwrite(" ", 1, 1, stderr); free(pos); } return 0; } |
这是一个递归的简化版本,但使用的堆栈空间更少:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | #include <errno.h> #include <stdio.h> #include <string.h> #include <sys/types.h> #include <unistd.h> #include <dirent.h> void listdir(char *path, size_t size) { DIR *dir; struct dirent *entry; size_t len = strlen(path); if (!(dir = opendir(path))) { fprintf(stderr,"path not found: %s: %s ", path, strerror(errno)); return; } puts(path); while ((entry = readdir(dir)) != NULL) { char *name = entry->d_name; if (entry->d_type == DT_DIR) { if (!strcmp(name,".") || !strcmp(name,"..")) continue; if (len + strlen(name) + 2 > size) { fprintf(stderr,"path too long: %s/%s ", path, name); } else { path[len] = '/'; strcpy(path + len + 1, name); listdir(path, size); path[len] = '\0'; } } else { printf("%s/%s ", path, name); } } closedir(dir); } int main(void) { char path[1024] ="."; listdir(path, sizeof path); return 0; } |
在我的系统上,它的输出与