UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 7240: character maps to <undefined>
我是做硕士论文的学生。作为论文的一部分,我正在与Python合作。我正在读取一个
Traceback (most recent call last): File
"C:\Users\SGADI\workspace\DAB_Trace\my_code\trace_parcer.py", line 19,
in for row in reader:
- File"C:\Users\SGADI\Desktop\Python-32bit-3.4.3.2\python-3.4.3\lib\encodings\cp1252.py",
line 23, in decode returncodecs.charmap_decode(input,self.errors,decoding_table)[0] - UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 7240: character maps to
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | import csv import re #import matplotlib #import matplotlib.pyplot as plt import datetime #import pandas #from dateutil.parser import parse #def parse_csv_file(): timestamp = datetime.datetime.strptime('00:00:00.000', '%H:%M:%S.%f') timestamp_list = [] snr_list = [] freq_list = [] rssi_list = [] dab_present_list = [] counter = 0 f = open("output.txt","w") with open('test_log_20150325_gps.csv') as csvfile: reader = csv.reader(csvfile, delimiter=';') for row in reader: #timestamp = datetime.datetime.strptime(row[0], '%M:%S.%f') #timestamp.split("",1) timestamp = row[0] timestamp_list.append(timestamp) #timestamp = row[0] details = row[-1] counter += 1 print (counter) #if(counter > 25000): # break #timestamp = datetime.datetime.strptime(row[0], '%M:%S.%f') #timestamp_list.append(float(timestamp)) #search for SNRLevel=\d+ snr = re.findall('SNRLevel=(\d+)', details) if snr == []: snr = 0 else: snr = snr[0] snr_list.append(int(snr)) #search for Frequency=09ABC freq = re.findall('Frequency=([0-9a-fA-F]+)', details) if freq == []: freq = 0 else: freq = int(freq[0], 16) freq_list.append(int(freq)) #search for RSSI=\d+ rssi = re.findall('RSSI=(\d+)', details) if rssi == []: rssi = 0 else: rssi = rssi[0] rssi_list.append(int(rssi)) #search for DABSignalPresent=\d+ dab_present = re.findall('DABSignalPresent=(\d+)', details) if dab_present== []: dab_present = 0 else: dab_present = dab_present[0] dab_present_list.append(int(dab_present)) f.write(str(timestamp) +"\t") f.write(str(freq) +"\t") f.write(str(snr) +"\t") f.write(str(rssi) +"\t") f.write(str(dab_present) +" ") print (timestamp, freq, snr, rssi, dab_present) #print (index+1) #print(timestamp,freq,snr) #print (counter) #print(timestamp_list,freq_list,snr_list,rssi_list) '''if snr != []: if freq != []: timestamp_list.append(timestamp) snr_list.append(snr) freq_list.append(freq) f.write(str(timestamp_list) +"\t") f.write(str(freq_list) +"\t") f.write(str(snr_list) +" ") print(timestamp_list,freq_list,snr_list)''' f.close() |
我找了那个特别的角色,但没有找到。我在互联网上搜索了建议更改格式的内容:我尝试了UT8、Latin1和其他一些格式,但我仍然收到了这个错误。你能帮我解决一下怎么用
请帮我找到解决办法,谢谢。
我已经解决了这个问题。我们可以用这个代码
1 2 3 4 5 6 7 | import codecs types_of_encoding = ["utf8","cp1252"] for encoding_type in types_of_encoding: with codecs.open(filename, encoding = encoding_type, errors ='replace') as csvfile: your code .... .... |
1 2 3 4 5 6 7 8 9 10 11 12 13 | with open('input.tsv','rb') as f: for ln in f: decoded=False line='' for cp in ('cp1252', 'cp850','utf-8','utf8'): try: line = ln.decode(cp) decoded=True break except UnicodeDecodeError: pass if decoded: # use 'line' |