-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclone.py
More file actions
executable file
·450 lines (390 loc) · 13.9 KB
/
clone.py
File metadata and controls
executable file
·450 lines (390 loc) · 13.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
#!/usr/bin/python
import sys, os, re, json, hashlib
from subprocess import check_output
from subprocess import CalledProcessError
from datetime import datetime
from time import ctime
import argparse
from argparse import RawTextHelpFormatter
##################################################
#
# Configuration variables
#
##################################################
configDir = os.environ['HOME'] + "/.config/gclone"
remoteDataFileName = configDir + "/remote-data"
localDataFileName = configDir + "/local-data"
rclone = "/usr/sbin/rclone"
remoteName = "remote"
localDir = os.environ['HOME'] + "/GDrive"
verbose = False
debug = False
useMd5 = True
fastRemote = False
dryRun = False
#
# Action enum
#
ADD = "add"
DEL = "remove"
MOD = "modify"
#
# Conflict enum
#
CONFLICT_ADD = "add_conflict"
CONFLICT_MOD = "mod_conflict"
CONFLICT_DEL = "del_conflict"
CONFLICT_ERR = "error"
stdErrLogFile = open(configDir + "/error.log", 'w')
##################################################
#
# Configuration variables - End
#
##################################################
##################################################
#
# Main commands
#
##################################################
def init():
print "Initializing..."
if not os.path.exists(configDir):
os.makedirs(configDir)
# Local files
localData = readLocalTree()
# put into the file
localDataFile = open(localDataFileName, 'w')
json.dump(localData, localDataFile, default=dateTimeSerializer)
localDataFile.close()
# Remote files
dirs = {}
verbosePrint("Fetching remote folders data...")
readRemoteTree("/", dirs)
verbosePrint("Done.\n")
readRemoteFiles(dirs)
debugPrint("Final remote data structure:\n" + str(dirs) + "\n")
# put into the file
remoteDataFile = open(remoteDataFileName, 'w')
json.dump(dirs, remoteDataFile, default=dateTimeSerializer)
remoteDataFile.close()
def clone():
verbosePrint("Reading state data...")
with open(remoteDataFileName) as dataFile:
oldRemoteData = json.load(dataFile, object_pairs_hook=dateTimeDeserailizer)
with open(localDataFileName) as dataFile:
oldLocalData = json.load(dataFile, object_pairs_hook=dateTimeDeserailizer)
#read new remote files
dirs = {}
verbosePrint("Fetching remote folders data...")
readRemoteTree("/", dirs, fastRemoteHandling=fastRemote)
verbosePrint("Done.\n")
newRemoteData = readRemoteFiles(dirs, fastRemoteHandling=fastRemote)
#read new local files
newLocalData = readLocalTree()
verbosePrint("Calculating action lists...")
incoming = compareStates(oldRemoteData, newRemoteData)
outgoing = compareStates(oldLocalData, newLocalData)
if (fastRemote):
incoming = checkIncomingFolderDeletes(incoming)
conflicts = checkForConflicts(incoming, outgoing)
if (dryRun):
printActionList("INCOMING", incoming)
print
printActionList("OUTGOING", outgoing)
print
print "CONFLICTS"
print conflicts
else:
print "implement action reply"
def config():
print "Configuration directory: " + configDir
print "Remote state file name: " + remoteDataFileName
print "Local state file name: " + localDataFileName
print "Path to rclone: " + rclone
print "The name of the remote: " + remoteName
print "Local directory: " + localDir
##################################################
#
# Building remote file information
#
##################################################
def readRemoteTree(dirName, dirs, fastRemoteHandling=False):
"""Gather data on all remote dirs"""
if fastRemoteHandling:
return dirs
remoteDirs = check_output([rclone, "lsd", remoteName + ":" + dirName], stderr=stdErrLogFile)
for line in remoteDirs.splitlines():
dirRaw = re.split(r"\s*", line, maxsplit=5)
newDirName = dirName + "/" + dirRaw[5]
if dirName == "/":
newDirName = dirName + dirRaw[5]
dirData = {
'size': 0L,
'date': datetime.strptime(dirRaw[2] + " " + dirRaw[3], "%Y-%m-%d %H:%M:%S"),
'name': newDirName,
'md5': "0",
'type': "dir"
}
debugPrint(str(dirData))
dirs[newDirName] = dirData
readRemoteTree(newDirName, dirs, fastRemoteHandling)
return dirs
def readRemoteFiles(dirs, fastRemoteHandling=False):
verbosePrint("Fetching remote files...")
md5sums = {}
if useMd5:
remoteMd5Files = check_output([rclone, "md5sum", remoteName + ":/"], stderr=stdErrLogFile)
for line in remoteMd5Files.splitlines():
fileLine = re.split(r"\s*", line, maxsplit=1)
md5sums[fileLine[1]] = fileLine[0]
debugPrint("Remote file md5 checsums:\n" + str(md5sums) + "\n")
debugPrint("Remote files data:")
remoteFiles = check_output([rclone, "lsl", remoteName + ":/"], stderr=stdErrLogFile)
for line in remoteFiles.splitlines():
debugPrint(line)
fileLine = re.split(r"\s*", line, maxsplit=4)
# Positions
sizePos, datePos, timePos, fnamePos = (1, 2, 3, 4)
if fileLine[0]:
sizePos, datePos, timePos, fnamePos = (0, 1, 2, 3)
fileData = {
'size': long(fileLine[sizePos]),
'date': datetime.strptime(fileLine[datePos] + " " + fileLine[timePos][0:15], "%Y-%m-%d %H:%M:%S.%f"),
'name': fileLine[fnamePos],
'md5': md5sums[fileLine[fnamePos]] if useMd5 else "0",
'type': "file"
}
debugPrint(str(fileData))
dirs[fileLine[fnamePos]] = fileData
if fastRemoteHandling:
deduceDirName(dirs, fileLine[fnamePos])
del md5sums
verbosePrint("Done.\n")
return dirs
def deduceDirName(dirs, fileName):
"""Adds the file's parent dirs as an entry"""
try:
lastDel = fileName.rindex("/")
dirName = "/" + fileName[:lastDel]
dirData = {
'size': 0L,
'date': datetime.now(),
'name': dirName,
'md5': "0",
'type': "dir"
}
dirs[dirName] = dirData
debugPrint(str(dirData))
deduceDirName(dirs, fileName[:lastDel])
except ValueError:
pass # files in root dir are not preceeded by a '/'
return dirs
##################################################
#
# Building local file information
#
##################################################
def readLocalTree():
"""Read data on all local dirs."""
localData = {}
verbosePrint("Inspecting local files and folders.")
for root, dirs, files in os.walk(localDir):
relativeDir = root[len(localDir):]
fileBase = ""
if len(relativeDir) > 0:
localData[relativeDir] = localDirData(root, relativeDir)
fileBase = relativeDir[1:] + "/"
for fileName in files:
localData[fileBase + fileName] = localFileData(root + "/" + fileName, fileBase + fileName)
debugPrint("Local file and folder data:\n" + str(localData))
return localData
def localDirData(dirName, relativeDir):
dirInfo = os.lstat(dirName)
dirData = {
'size': 0,
'date': datetime.fromtimestamp(dirInfo.st_mtime).replace(microsecond=0),
'name': relativeDir,
'md5': "0",
'type': "dir"
}
return dirData
def localFileData(fileName, relativeFileName, calcMd5=True):
fileInfo = os.lstat(fileName)
fileData = {
'size': fileInfo.st_size,
'date': datetime.fromtimestamp(fileInfo.st_mtime),
'name': relativeFileName,
'md5': md5(fileName) if calcMd5 else "0",
'type': "file"
}
return fileData
##################################################
#
# Helper functions
#
##################################################
def md5(fname):
hash = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(102400), b""):
hash.update(chunk)
return hash.hexdigest()
def verbosePrint(str):
if verbose:
print str
def debugPrint(str):
if verbose and debug:
print str
##################################################
#
# Datetime serializer/deserializer
#
##################################################
def dateTimeSerializer(obj):
"""JSON serializer for datetime objects"""
if isinstance(obj, datetime):
return obj.isoformat()
else:
return obj
def dateTimeDeserailizer(pairs):
"""JSON deserializer for dateime objects"""
d = {}
for k, v in pairs:
if isinstance(v, basestring):
for dateFormat in ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S"]:
try:
d[k] = datetime.strptime(v, dateFormat)
break
except ValueError:
d[k] = v
else:
d[k] = v
return d
##################################################
#
# Calculation sync actions
#
##################################################
def compareStates(oldState, newState):
actionList = []
oldStateCopy = dict(oldState)
newStateCopy = dict(newState)
# find incoming add or modify changes
for k, v in newState.iteritems():
key = k.decode('utf-8')
try:
oldVal = oldState[key]
if not isStateEqual(oldVal, v):
action = {}
action['object'] = v
action['actionType'] = MOD
actionList.append(action)
del oldStateCopy[key]
del newStateCopy[k]
except KeyError:
# newState contains file that is not present in old state -> ADD
action = {}
action['object'] = v
action['actionType'] = ADD
actionList.append(action)
del newStateCopy[k]
# whaetever is left in oldState is deleted in newState
for k, v in oldStateCopy.iteritems():
action = {}
action['object'] = v
action['actionType'] = DEL
actionList.append(action)
return actionList
def isStateEqual(oldState, newState):
if oldState['type'] != newState['type']:
raise Exception("Type of entry changed: [old]:" + oldState['type'] + " [new]:" + newState['type'],
oldState['name'])
if oldState['type'] == "dir":
return True
if oldState['size'] != newState['size']:
return False
if oldState['date'] != newState['date']:
return False
if useMd5:
if oldState['md5'] != "0" and newState['md5'] != "0" and oldState['md5'] != newState['md5']:
return False
return True
def printActionList(name, list):
print "Action list " + name + ":"
for action in list:
print action['actionType'].ljust(10) + ":: " + str(action['object'])
def remoteDirExists(dirName):
try:
remoteOutput = check_output([rclone, "lsd", remoteName + ":" + dirName], stderr=stdErrLogFile)
except CalledProcessError as e:
return False
return True
def checkIncomingFolderDeletes(incoming):
verbosePrint("Checking whether folder deletes are true positives...")
return [x for x in incoming if not isDelFalse(x)]
def isDelFalse(action):
actionType = action['actionType']
targetDescriptor = action['object']
targetNane = targetDescriptor['name']
objType = targetDescriptor['type']
return actionType == DEL and objType == "dir" and remoteDirExists(targetNane)
def checkForConflicts(incoming, outgoing):
conflictList = []
for inAction in incoming:
for outAction in outgoing:
if (inAction['object']['name'] == outAction['object']['name']):
cType = conflictType(inAction['actionType'], outAction['actionType'])
# delete from both sides is OK. The end result is the same.
if (cType != CONFLICT_DEL):
conflictInfo = {}
conflictInfo['conflictType'] = cType
conflictInfo['inAction'] = inAction['actionType']
conflictInfo['outAction'] = outAction['actionType']
conflictInfo['inObject'] = inAction['object']
conflictInfo['outObject'] = outAction['object']
conflictList.append(conflictInfo)
return conflictList
def conflictType(inActionType, outActionType):
if (inActionType == ADD and outActionType == ADD):
return CONFLICT_ADD
elif (inActionType == MOD and outActionType == MOD):
return CONFLICT_MOD
elif (inActionType == DEL and outActionType == DEL):
return CONFLICT_DEL
else:
return CONFLICT_ERR
##################################################
#
# main program
#
##################################################
argParser = argparse.ArgumentParser(description="Utility to clone Google drive using rclone.",
formatter_class=RawTextHelpFormatter)
argParser.add_argument("cmd", choices=["init", "clone", "config"], help=
"""init - Initialize the application and create state
description files.
clone - Synchronize with Google drive.
config - Print the program configuration.""")
argParser.add_argument("-v", "--verbose", help="Print progress.", default=False, action="store_true")
argParser.add_argument("-d", "--debug", help="Print debugging information.", default=False, action="store_true")
argParser.add_argument("--no-md5", help="Don't use MD5 checksums when synchronizing.", default=True,
action="store_false", dest="md5")
argParser.add_argument("--fast-remote", help="Deduce dir names from remote file listing.", default=False,
action="store_true", dest="fastRemote")
argParser.add_argument("--dry-run", help="Only print the resulting actions, do not execute them.", default=False,
action="store_true", dest="dryRun")
args = argParser.parse_args()
verbose = args.verbose
debug = args.debug
useMd5 = args.md5
fastRemote = args.fastRemote
dryRun = args.dryRun
if args.cmd == "init":
init()
elif args.cmd == "clone":
clone()
elif args.cmd == "config":
config()
else:
argParser.print_help()
stdErrLogFile.close()