forked from TheAlgorithms/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathremove_dulipcates_file.py
65 lines (59 loc) · 2.29 KB
/
remove_dulipcates_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import sys
import hashlib
from os.path import getsize
class ClearRepeat(object):
def __init__(self):
self.file_path = None
self.file_collection = None
self.file_origin = {}
self.file_repeat = {}
def getSource(self,file_path):
try:
if os.path.exists(file_path):
self.file_path = file_path
self.file_collection = []
for dirpath,dirnames,filenames in os.walk(self.file_path):
for file in filenames:
fullpath = os.path.join(dirpath,file)
self.file_collection.append(fullpath)
print('File Collection Success.Total File:%d\r'%len(self.file_collection),end='')
print('\n')
except Exception as error:
self.file_path = None
print(error)
def findRepeat(self):
if self.file_path != None and self.file_collection != []:
file_count=0
for file in self.file_collection:
try:
compound_key = (getsize(file),self.createChecksum(file))
if compound_key in self.file_origin:
print("\nDelete Repete File %s"%file)
os.remove(file)
else:
self.file_origin[compound_key] = file
except Exception as error:
print(error)
file_count+=1
print("Check File Count:%d\r"%file_count,end='')
print("\nDelete Repeat File Success!")
else:
print("\nPlease Check File Path Is Correctly!")
def createChecksum(self,path):
fp = open(path,encoding='gb18030', errors='ignore')
checksum = hashlib.md5()
while True:
buffer = fp.read(8192)
if not buffer: break
checksum.update(buffer.encode('utf-16'))
fp.close()
checksum = checksum.digest()
return checksum
if __name__ == '__main__':
# file_path = input('Please enter your file path > ')
file_path = '/Users/weidongc/Downloads/20220910中三班中秋节做月饼'
# file_path = '/volume1/homes/老赵头'
obj_clear = ClearRepeat()
obj_clear.getSource(file_path)
obj_clear.findRepeat()