关于zip:在python中解压缩文件

Unzipping files in python

我通读了zip file模块的文档,但不知道如何解压缩文件,只知道如何压缩文件。如何将zip文件的所有内容解压缩到同一目录中?


1
2
3
4
import zipfile
zip_ref = zipfile.ZipFile(path_to_zip_file, 'r')
zip_ref.extractall(directory_to_extract_to)
zip_ref.close()

差不多就是这样!


如果您使用的是python 3.2或更高版本:

1
2
3
import zipfile
with zipfile.ZipFile("file.zip","r") as zip_ref:
    zip_ref.extractall("targetdir")

您不需要使用关闭或尝试/捕获,因为它使用上下文管理器构造。


如果您使用的是python 2.6,请使用extractall方法。+

1
2
zip = ZipFile('file.zip')
zip.extractall()


这是针对zip和rar的递归解决方案:

  • 只需使用下面给出的python代码创建一个文件。
  • 运行命令中的代码,如python filename.py
  • 它将要求您提供zip或rar文件的绝对路径。
  • 获取与zip文件同名的文件夹中提取的所有文件。
  • 这与Winrar的"在这里提取"功能相同。
  • 提供了一个额外的功能,即递归提取。如果您的文件说"a.zip"包含其他.zip文件,如"b.zip"、"c.zip"等,那么这些文件也将以嵌套方式提取。
  • 对于rar支持,您需要安装unrar和rarfile python包。

    1
    2
    pip install unrar
    pip install rarfile
  • 现在还没有完成,您还必须手动安装Windows和Linux的UNRAR。

    Linux:

    1
    sudo apt-get install unrar

    对于Windows:

    单击此处下载unrar.exe文件

  • 安装它。

  • 现在从程序文件中安装unrar.exe文件。
  • 通常位置是:

    1
    C:\Program Files (x86)\GnuWin32\bin\unrar.exe
  • 在Windows路径变量中添加此路径。因为此路径将是提取RAR文件时使用的UNRAR刀具路径。

    1
    rarfile.UNRAR_TOOL = C:\Program Files (x86)\GnuWin32\bin\unrar.exe

    如果一切都设置好了,就可以部署了。

-----------------

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#import zip file.
import zipfile
# import rarfile
import rarfile
# for path checking.
import os.path
# deleting directory.
import shutil

def check_archrive_file(loc):
    '''
    check the file is an archive file or not.
    if the file is an archive file just extract it using the proper extracting method.
    '''

    # check if it is a zip file or not.
    if (loc.endswith('.zip') or loc.endswith('.rar')):
        # chcek the file is present or not .
        if os.path.isfile(loc):
            #create a directory at the same location where file will be extracted.
            output_directory_location = loc.split('.')[0]
            # if os path not exists .
            if not os.path.exists(output_directory_location):
                # create directory .
                os.mkdir(output_directory_location)
                print(" Otput Directory" , output_directory_location)
                # extract
                if loc.endswith('.zip'):
                    extractzip(loc,output_directory_location)
                else:
                    extractrar(loc,output_directory_location)

            else:
                # Directory allready exist.
                print("Otput Directory" , output_directory_location)
                # deleting previous directoty .
                print("Deleting old Otput Directory")
                ## Try to remove tree; if failed show an error using try...except on screen
                try:
                    # delete the directory .
                    shutil.rmtree(output_directory_location)
                    # delete success
                    print("Delete success now extracting")
                    # extract
                    # extract
                    if loc.endswith('.zip'):
                        extractzip(loc,output_directory_location)
                    else:
                        extractrar(loc,output_directory_location)
                except OSError as e:
                    print ("Error: %s - %s." % (e.filename, e.strerror))
        else:
            print("File not located to this path")
    else:
        print("File do not have any archrive structure.")


def extractzip(loc,outloc):
    '''
    using the zipfile tool extract here .
    This function is valid if the file type is zip only
   '''

    with zipfile.ZipFile(loc,"r") as zip_ref:
        # iterate over zip info list.
        for item in zip_ref.infolist():
            zip_ref.extract(item,outloc)
        # once extraction is complete
        # check the files contains any zip file or not .
        # if directory then go through the directoty.
        zip_files = [files for files in zip_ref.filelist if files.filename.endswith('.zip')]
        # print other zip files
        # print(zip_files)
        # iterate over zip files.
        for file in zip_files:
            # iterate to get the name.
            new_loc = os.path.join(outloc,file.filename)
            #new location
            # print(new_loc)
            #start extarction.
            check_archrive_file(new_loc)
        # close.
        zip_ref.close()


def extractrar(loc,outloc):
    '''
    using the rarfile tool extract here .
    this function is valid if the file type is rar only
   '''

   #check the file is rar or not
    if(rarfile.is_rarfile(loc)):
        with rarfile.RarFile(loc,"r") as rar_ref:
                # iterate over zip info list.
                for item in rar_ref.infolist():
                    rar_ref.extract(item,outloc)
                # once extraction is complete
                # get the name of the rar files inside the rar.
                rar_files = [file for file in rar_ref.infolist() if file.filename.endswith('.rar') ]
                # iterate
                for file in rar_files:
                    # iterate to get the name.
                    new_loc = os.path.join(outloc,file.filename)
                    #new location
                    # print(new_loc)
                    #start extarction.
                    check_archrive_file(new_loc)
                # close.
                rar_ref.close()
    else:
        print("File"+loc+" is not a rar file")




def checkpathVariables():
    '''
    check path variables.
    if unrar.exe nor present then
    install unrar and set unrar.exe in path variable.
    '''

    try:
            user_paths = os.environ['PYTHONPATH'].split(os.pathsep)
    except KeyError:
            user_paths = []
    # iterate over paths.
    for item in user_paths:
        print("User path python variables :"+user_paths)
    # check rar tool exe present or not.
    for item in user_paths:
        # print(item)
        if("unrar.exe" in item):
            print("Unrar tool setup found PYTHONPATH")
            return
    print("Unrar tool setup not found in  PYTHONPATH")
    # print os path
    os_paths_list = os.environ['PATH'].split(';')
    # check rar tool exe present or not.
    for item in os_paths_list:
        # print(item)
        if("unrar.exe" in item):
            print("Unrar tool setup found in PATH")
            rarfile.UNRAR_TOOL = item
            print("Unrar tool path set up complete ."+item)
            return
    print("Unrar tool setup not found in PATH")
    print("RAR TOOL WILL NOT WORK FOR YOU.")
    downloadlocation ="https://www.rarlab.com/rar/unrarw32.exe"
    print("install unrar form the link"+downloadlocation)




# run the main function
if __name__ == '__main__':
    '''
    before you run this function make sure you have installed two packages
    unrar and rarfile.
    if not installed then
    pip install unrar
    pip install rarfile.
    This is not only the case unrar tool should be set up.
    zip is included in standard library so do not worry about the zip file.
    '''

    # check path and variables.
    checkpathVariables()
    # Take input form the user.
    location = input('Please provide the absolute path of the zip/rar file-----> ')
    check_archrive_file(location)

-----------------

不要惊慌,它主要分为四个部分。

第1部分

检查您是否正确安装了path变量。如果不想使用rar文件,则不需要此部分。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def checkpathVariables():
    '''
    check path variables.
    if unrar.exe nor present then
    install unrar and set unrar.exe in path variable.
    '''

    try:
            user_paths = os.environ['PYTHONPATH'].split(os.pathsep)
    except KeyError:
            user_paths = []
    # iterate over paths.
    for item in user_paths:
        print("User path python variables :"+user_paths)
    # check rar tool exe present or not.
    for item in user_paths:
        # print(item)
        if("unrar.exe" in item):
            print("Unrar tool setup found PYTHONPATH")
            return
    print("Unrar tool setup not found in  PYTHONPATH")
    # print os path
    os_paths_list = os.environ['PATH'].split(';')
    # check rar tool exe present or not.
    for item in os_paths_list:
        # print(item)
        if("unrar.exe" in item):
            print("Unrar tool setup found in PATH")
            rarfile.UNRAR_TOOL = item
            print("Unrar tool path set up complete ."+item)
            return
    print("Unrar tool setup not found in PATH")
    print("RAR TOOL WILL NOT WORK FOR YOU.")
    downloadlocation ="https://www.rarlab.com/rar/unrarw32.exe"
    print("install unrar form the link"+downloadlocation)

第2部分

此函数提取zip文件。接受两个参数loc和outloc。loc="绝对路径的文件名"。outloc="将从中提取文件"。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def extractzip(loc,outloc):
        '''
        using the zipfile tool extract here .
        This function is valid if the file type is zip only
       '''

        with zipfile.ZipFile(loc,"r") as zip_ref:
            # iterate over zip info list.
            for item in zip_ref.infolist():
                zip_ref.extract(item,outloc)
            # once extraction is complete
            # check the files contains any zip file or not .
            # if directory then go through the directoty.
            zip_files = [files for files in zip_ref.filelist if files.filename.endswith('.zip')]
            # print other zip files
            # print(zip_files)
            # iterate over zip files.
            for file in zip_files:
                # iterate to get the name.
                new_loc = os.path.join(outloc,file.filename)
                #new location
                # print(new_loc)
                #start extarction.
                check_archrive_file(new_loc)
            # close.
            zip_ref.close()

第3部分

此函数提取一个rar文件。几乎和Zip一样。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def extractrar(loc,outloc):
        '''
        using the rarfile tool extract here .
        this function is valid if the file type is rar only
       '''

       #check the file is rar or not
        if(rarfile.is_rarfile(loc)):
            with rarfile.RarFile(loc,"r") as rar_ref:
                    # iterate over zip info list.
                    for item in rar_ref.infolist():
                        rar_ref.extract(item,outloc)
                    # once extraction is complete
                    # get the name of the rar files inside the rar.
                    rar_files = [file for file in rar_ref.infolist() if file.filename.endswith('.rar') ]
                    # iterate
                    for file in rar_files:
                        # iterate to get the name.
                        new_loc = os.path.join(outloc,file.filename)
                        #new location
                        # print(new_loc)
                        #start extarction.
                        check_archrive_file(new_loc)
                    # close.
                    rar_ref.close()
        else:
            print("File"+loc+" is not a rar file")

第4部分

主函数要求用户提供绝对路径。您可以通过设置位置值将其更改为预定义的路径。并对输入函数进行注释。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
if __name__ == '__main__':
    '''
    before you run this function make sure you have installed two packages
    unrar and rarfile.
    if not installed then
    pip install unrar
    pip install rarfile.
    This is not only the case unrar tool should be set up.
    zip is included in standard library so do not worry about the zip file.
    '''

    # check path and variables.
    checkpathVariables()
    # Take input form the user.
    location = input('Please provide the absolute path of the zip/rar file-----> ')
    check_archrive_file(location)

仍然存在的问题。

  • 此解决方案无法提取所有类型的rar文件。
  • 虽然通过了检查
  • 我检查了由winrar创建的rar,它提供了一个警告,不提取文件。

    [如果您能帮助解决此警告和问题,请发表评论]

    1
    2
    3
    4
    5
    6
    rarfile.RarWarning: Non-fatal error [1]: b'

    D:\\Kiosk\\Download\\Tutorial\
    eezoo\\a.rar is not RAR archive

    No files to extract
  • 但能很容易地提取RAR4型。


您也只能导入ZipFile

1
2
3
4
from zipfile import ZipFile
zf = ZipFile('path_to_file/file.zip', 'r')
zf.extractall('path_to_extract_folder')
zf.close()

在python 2和python 3中工作。


1
2
3
4
5
6
7
8
9
10
11
import os
zip_file_path ="C:\AA\BB"
file_list = os.listdir(path)
abs_path = []
for a in file_list:
    x = zip_file_path+'\'+a
    print x
    abs_path.append(x)
for f in abs_path:
    zip=zipfile.ZipFile(f)
    zip.extractall(zip_file_path)

如果文件不是zip,则不包含对其的验证。如果文件夹包含非.zip文件,它将失败。