-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtemp.py
697 lines (610 loc) · 29.6 KB
/
temp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
"""
This is my version of this script - https://github.com/TheLastGimbus/GooglePhotosTakeoutHelper
So this script was copying all the photos to a new place and then modifying the metadata. I didnt want
that. I wanted the photos to be in the folder they already where and just edit them there. So I removed
a bunch of stuff, like output folder options and added things like deleting the json file once its done
using the data from it. Tested it a few times and then ran on the actual data and it worked out pretty well.
I'm pretty happy with it.
This is the original actual script I copied and worked off of.
https://github.com/TheLastGimbus/GooglePhotosTakeoutHelper/blob/14140d4f1c5d8b55b65f4bbd293c9a791bfb7635/google_photos_takeout_helper/__main__.py
"""
from tqdm import tqdm as _tqdm
from loguru import logger
logger.remove() # removes the default console logger provided by Loguru.
# I find it to be too noisy with details more appropriate for file logging.
# INFO and messages of higher priority only shown on the console.
logger.add(lambda msg: _tqdm.write(msg, end=""), format="{message}", level="INFO")
# This creates a logging sink and handler that puts all messages at or above the TRACE level into a logfile for each run.
logger.add("file_{time}.log", level="TRACE", encoding="utf8") # Unicode instructions needed to avoid file write errors.
@logger.catch(
message=
"WHHoopssiee! Looks like script crashed! This shouldn't happen, although it often does haha :P\n"
"Most of the times, you should cut out the last printed file (it should be down there somehwere) "
"to some other folder, and continue\n"
"\n"
"If this doesn't help, and it keeps doing this after many cut-outs, you can check out issues tab:\n"
"https://github.com/TheLastGimbus/GooglePhotosTakeoutHelper/issues \n"
"to see if anyone has similar issue, or contact me other way:\n"
"https://github.com/TheLastGimbus/GooglePhotosTakeoutHelper/blob/master/README.md#contacterrors \n"
) # wraps entire function in a trap to display enhanced error tracebaks after an exception occurs.
def main():
import argparse as _argparse
import json as _json
import os as _os
import re as _re
import shutil as _shutil
import hashlib as _hashlib
import functools as _functools
from collections import defaultdict as _defaultdict
from datetime import datetime as _datetime
from pathlib import Path as Path
try:
from google_photos_takeout_helper.__version__ import __version__
except ModuleNotFoundError:
from __version__ import __version__
import piexif as _piexif
from fractions import Fraction # piexif requires some values to be stored as rationals
import math
if _os.name == 'nt':
import win32_setctime as _windoza_setctime
parser = _argparse.ArgumentParser(
prog='Google Photos Takeout Helper',
usage='google-photos-takeout-helper -i [INPUT TAKEOUT FOLDER] -o [OUTPUT FOLDER]',
description=
"""This script takes all of your photos from Google Photos takeout,
fixes their exif DateTime data (when they were taken) and file creation date,
and then copies it all to one folder.
""",
)
parser.add_argument('--version', action='version', version=f"%(prog)s {__version__}")
parser.add_argument(
'-i', '--input-folder',
type=str,
required=True,
help='Input folder with all stuff form Google Photos takeout zip(s)'
)
parser.add_argument(
'-o', '--output-folder',
type=str,
required=False,
default='ALL_PHOTOS',
help='Output folders which in all photos will be placed in'
)
parser.add_argument(
'--skip-extras',
action='store_true',
help='EXPERIMENTAL: Skips the extra photos like photos that end in "edited" or "EFFECTS".'
)
parser.add_argument(
'--skip-extras-harder', # Oh yeah, skip my extras harder daddy
action='store_true',
help='EXPERIMENTAL: Skips the extra photos like photos like pic(1). Also includes --skip-extras.'
)
parser.add_argument(
"--divide-to-dates",
action='store_true',
help="Create folders and subfolders based on the date the photos were taken"
)
parser.add_argument(
'--albums',
type=str,
help="EXPERIMENTAL, MAY NOT WORK FOR EVERYONE: What kind of 'albums solution' you would like:\n"
"'json' - written in a json file\n"
)
args = parser.parse_args()
logger.info('Heeeere we go!')
PHOTOS_DIR = Path(args.input_folder)
FIXED_DIR = Path(args.output_folder)
TAG_DATE_TIME_ORIGINAL = _piexif.ExifIFD.DateTimeOriginal
TAG_DATE_TIME_DIGITIZED = _piexif.ExifIFD.DateTimeDigitized
TAG_DATE_TIME = 306
TAG_PREVIEW_DATE_TIME = 50971
photo_formats = ['.jpg', '.jpeg', '.png', '.webp', '.bmp', '.tif', '.tiff', '.svg', '.heic']
video_formats = ['.mp4', '.gif', '.mov', '.webm', '.avi', '.wmv', '.rm', '.mpg', '.mpe', '.mpeg', '.mkv', '.m4v',
'.mts', '.m2ts']
extra_formats = [
'-edited', '-effects', '-smile', '-mix', # EN/US
'-edytowane', # PL
# Add more "edited" flags in more languages if you want. They need to be lowercase.
]
# Album Multimap
album_mmap = _defaultdict(list)
# Duplicate by full hash multimap
files_by_full_hash = _defaultdict(list)
# holds all the renamed files that clashed from their
rename_map = dict()
_all_jsons_dict = _defaultdict(dict)
# Statistics:
s_removed_duplicates_count = 0
s_copied_files = 0
s_cant_insert_exif_files = [] # List of files where inserting exif failed
s_date_from_folder_files = [] # List of files where date was set from folder name
s_skipped_extra_files = [] # List of extra files ("-edited" etc) which were skipped
s_no_json_found = [] # List of files where we couldn't find json
s_no_date_at_all = [] # List of files where there was absolutely no option to set correct date
def for_all_files_recursive(
dir: Path,
file_function=lambda fi: True,
folder_function=lambda fo: True,
filter_fun=lambda file: True
):
for file in dir.rglob("*"):
if file.is_dir():
folder_function(file)
continue
elif file.is_file():
if filter_fun(file):
file_function(file)
else:
logger.debug(f'Found something weird... {file}')
def is_photo(file: Path):
if file.suffix.lower() not in photo_formats:
return False
# skips the extra photo file, like edited or effects. They're kinda useless.
nonlocal s_skipped_extra_files
if args.skip_extras or args.skip_extras_harder: # if the file name includes something under the extra_formats, it skips it.
for extra in extra_formats:
if extra in file.name.lower():
s_skipped_extra_files.append(str(file.resolve()))
return False
if args.skip_extras_harder:
search = r"\(\d+\)\." # we leave the period in so it doesn't catch folders.
if bool(_re.search(search, file.name)):
# PICT0003(5).jpg -> PICT0003.jpg The regex would match "(5).", and replace it with a "."
plain_file = file.with_name(_re.sub(search, '.', str(file)))
# if the original exists, it will ignore the (1) file, ensuring there is only one copy of each file.
if plain_file.is_file():
s_skipped_extra_files.append(str(file.resolve()))
return False
return True
def is_video(file: Path):
if file.suffix.lower() not in video_formats:
return False
return True
def chunk_reader(fobj, chunk_size=1024):
""" Generator that reads a file in chunks of bytes """
while True:
chunk = fobj.read(chunk_size)
if not chunk:
return
yield chunk
def get_hash(file: Path, first_chunk_only=False, hash_algo=_hashlib.sha1):
hashobj = hash_algo()
with open(file, "rb") as f:
if first_chunk_only:
hashobj.update(f.read(1024))
else:
for chunk in chunk_reader(f):
hashobj.update(chunk)
return hashobj.digest()
# PART 3: removing duplicates
# THIS IS PARTLY COPIED FROM STACKOVERFLOW
# https://stackoverflow.com/questions/748675/finding-duplicate-files-and-removing-them
#
# We now use an optimized version linked from tfeldmann
# https://gist.github.com/tfeldmann/fc875e6630d11f2256e746f67a09c1ae
#
# THANK YOU Todor Minakov (https://github.com/tminakov) and Thomas Feldmann (https://github.com/tfeldmann)
#
# NOTE: defaultdict(list) is a multimap, all init array handling is done internally
# See: https://en.wikipedia.org/wiki/Multimap#Python
#
def find_duplicates(path: Path, filter_fun=lambda file: True):
files_by_size = _defaultdict(list)
files_by_small_hash = _defaultdict(list)
for file in path.rglob("*"):
if file.is_file() and filter_fun(file):
try:
file_size = file.stat().st_size
except (OSError, FileNotFoundError):
# not accessible (permissions, etc) - pass on
continue
files_by_size[file_size].append(file)
# For all files with the same file size, get their hash on the first 1024 bytes
logger.info('Calculating small hashes...')
for file_size, files in _tqdm(files_by_size.items(), unit='files-by-size'):
if len(files) < 2:
continue # this file size is unique, no need to spend cpu cycles on it
for file in files:
try:
small_hash = get_hash(file, first_chunk_only=True)
except OSError:
# the file access might've changed till the exec point got here
continue
files_by_small_hash[(file_size, small_hash)].append(file)
# For all files with the hash on the first 1024 bytes, get their hash on the full
# file - if more than one file is inserted on a hash here they are certinly duplicates
logger.info('Calculating full hashes...')
for files in _tqdm(files_by_small_hash.values(), unit='files-by-small-hash'):
if len(files) < 2:
# the hash of the first 1k bytes is unique -> skip this file
continue
for file in files:
try:
full_hash = get_hash(file, first_chunk_only=False)
except OSError:
# the file access might've changed till the exec point got here
continue
files_by_full_hash[full_hash].append(file)
# Removes all duplicates in folder
# ONLY RUN AFTER RUNNING find_duplicates()
def remove_duplicates():
nonlocal s_removed_duplicates_count
# Now we have populated the final multimap of absolute dups, We now can attempt to find the original file
# and remove all the other duplicates
for files in _tqdm(files_by_full_hash.values(), unit='duplicates'):
if len(files) < 2:
continue # this file size is unique, no need to spend cpu cycles on it
s_removed_duplicates_count += len(files) - 1
for file in files:
# TODO reconsider which dup we delete these now that we're searching globally?
if len(files) > 1:
file.unlink()
files.remove(file)
return True
# PART 1: Fixing metadata and date-related stuff
# Returns json dict
def find_json_for_file(file: Path):
parenthesis_regexp = r'\([0-9]+\)'
parenthesis = _re.findall(parenthesis_regexp, file.name)
if len(parenthesis) == 1:
# Fix for files that have as image/video IMG_1234(1).JPG with a json IMG_1234.JPG(1).json
stripped_filename = _re.sub(parenthesis_regexp, '', file.name)
potential_json = file.with_name(stripped_filename + parenthesis[0] + '.json')
else:
potential_json = file.with_name(file.name + '.json')
if potential_json.is_file():
return potential_json
nonlocal _all_jsons_dict
# Check if we need to load this folder
if file.parent not in _all_jsons_dict:
for json_file in file.parent.rglob("*.json"):
try:
with json_file.open('r') as f:
json_dict = _json.load(f)
if "title" in json_dict:
# We found a JSON file with a proper title, store the file name
_all_jsons_dict[file.parent][json_dict["title"]] = json_dict
except:
logger.debug(f"Couldn't open json file {json_file}")
# Check if we have found the JSON file among all the loaded ones in the folder
if file.parent in _all_jsons_dict and file.name in _all_jsons_dict[file.parent]:
# Great we found a valid JSON file in this folder corresponding to this file
return _all_jsons_dict[file.parent][file.name]
else:
nonlocal s_no_json_found
s_no_json_found.append(str(file.resolve()))
raise FileNotFoundError(f"Couldn't find json for file: {file}")
# Returns date in 2019:01:01 23:59:59 format
def get_date_from_folder_meta(dir: Path):
file = find_album_meta_json_file(dir)
if not file:
logger.debug("Couldn't pull datetime from album meta")
return None
try:
with open(str(file), 'r') as fi:
album_dict = _json.load(fi)
# find_album_meta_json_file *should* give us "safe" file
time = int(album_dict["albumData"]["date"]["timestamp"])
return _datetime.fromtimestamp(time).strftime('%Y:%m:%d %H:%M:%S')
except KeyError:
logger.error(
"get_date_from_folder_meta - json doesn't have required stuff "
"- that probably means that either google fucked us again, or find_album_meta_json_file"
"is seriously broken"
)
return None
@_functools.lru_cache(maxsize=None)
def find_album_meta_json_file(dir: Path):
for file in dir.rglob("*.json"):
try:
with open(str(file), 'r') as f:
dict = _json.load(f)
if "albumData" in dict:
return file
except Exception as e:
logger.debug(e)
logger.debug(f"find_album_meta_json_file - Error opening file: {file}")
return None
def set_creation_date_from_str(file: Path, str_datetime):
try:
# Turns out exif can have different formats - YYYY:MM:DD, YYYY/..., YYYY-... etc
# God wish that americans won't have something like MM-DD-YYYY
# The replace ': ' to ':0' fixes issues when it reads the string as 2006:11:09 10:54: 1.
# It replaces the extra whitespace with a 0 for proper parsing
str_datetime = str_datetime.replace('-', ':').replace('/', ':').replace('.', ':').replace('\\', ':').replace(': ', ':0')[:19]
timestamp = _datetime.strptime(
str_datetime,
'%Y:%m:%d %H:%M:%S'
).timestamp()
_os.utime(file, (timestamp, timestamp))
if _os.name == 'nt':
_windoza_setctime.setctime(str(file), timestamp)
except Exception as e:
logger.debug('Error setting creation date from string:')
logger.debug(e)
raise ValueError(f"Error setting creation date from string: {str_datetime}")
def set_creation_date_from_exif(file: Path):
try:
# Why do you need to be like that, Piexif...
exif_dict = _piexif.load(str(file))
except Exception as e:
raise IOError("Can't read file's exif!")
tags = [['0th', TAG_DATE_TIME], ['Exif', TAG_DATE_TIME_ORIGINAL], ['Exif', TAG_DATE_TIME_DIGITIZED]]
datetime_str = ''
date_set_success = False
for tag in tags:
try:
datetime_str = exif_dict[tag[0]][tag[1]].decode('UTF-8')
set_creation_date_from_str(file, datetime_str)
date_set_success = True
break
except KeyError:
pass # No such tag - continue searching :/
except ValueError:
logger.debug("Wrong date format in exif!")
logger.debug(datetime_str)
logger.debug("does not match '%Y:%m:%d %H:%M:%S'")
if not date_set_success:
raise IOError('No correct DateTime in given exif')
def set_file_exif_date(file: Path, creation_date):
try:
exif_dict = _piexif.load(str(file))
except: # Sorry but Piexif is too unpredictable
exif_dict = {'0th': {}, 'Exif': {}}
creation_date = creation_date.encode('UTF-8')
exif_dict['0th'][TAG_DATE_TIME] = creation_date
exif_dict['Exif'][TAG_DATE_TIME_ORIGINAL] = creation_date
exif_dict['Exif'][TAG_DATE_TIME_DIGITIZED] = creation_date
try:
_piexif.insert(_piexif.dump(exif_dict), str(file))
except Exception as e:
logger.debug("Couldn't insert exif!")
logger.debug(e)
nonlocal s_cant_insert_exif_files
s_cant_insert_exif_files.append(str(file.resolve()))
def get_date_str_from_json(json):
return _datetime.fromtimestamp(
int(json['photoTakenTime']['timestamp'])
).strftime('%Y:%m:%d %H:%M:%S')
# ========= THIS IS ALL GPS STUFF =========
def change_to_rational(number):
"""convert a number to rantional
Keyword arguments: number
return: tuple like (1, 2), (numerator, denominator)
"""
f = Fraction(str(number))
return f.numerator, f.denominator
# got this here https://github.com/hMatoba/piexifjs/issues/1#issuecomment-260176317
def degToDmsRational(degFloat):
min_float = degFloat % 1 * 60
sec_float = min_float % 1 * 60
deg = math.floor(degFloat)
deg_min = math.floor(min_float)
sec = round(sec_float * 100)
return [(deg, 1), (deg_min, 1), (sec, 100)]
def set_file_geo_data(file: Path, json):
"""
Reads the geoData from google and saves it to the EXIF. This works assuming that the geodata looks like -100.12093, 50.213143. Something like that.
Written by DalenW.
:param file:
:param json:
:return:
"""
# prevents crashes
try:
exif_dict = _piexif.load(str(file))
except:
exif_dict = {'0th': {}, 'Exif': {}}
# converts a string input into a float. If it fails, it returns 0.0
def _str_to_float(num):
if type(num) == str:
return 0.0
else:
return float(num)
# fallbacks to GeoData Exif if it wasn't set in the photos editor.
# https://github.com/TheLastGimbus/GooglePhotosTakeoutHelper/pull/5#discussion_r531792314
longitude = _str_to_float(json['geoData']['longitude'])
latitude = _str_to_float(json['geoData']['latitude'])
altitude = _str_to_float(json['geoData']['altitude'])
# Prioritise geoData set from GPhotos editor. If it's blank, fall back to geoDataExif
if longitude == 0 and latitude == 0:
longitude = _str_to_float(json['geoDataExif']['longitude'])
latitude = _str_to_float(json['geoDataExif']['latitude'])
altitude = _str_to_float(json['geoDataExif']['altitude'])
# latitude >= 0: North latitude -> "N"
# latitude < 0: South latitude -> "S"
# longitude >= 0: East longitude -> "E"
# longitude < 0: West longitude -> "W"
if longitude >= 0:
longitude_ref = 'E'
else:
longitude_ref = 'W'
longitude = longitude * -1
if latitude >= 0:
latitude_ref = 'N'
else:
latitude_ref = 'S'
latitude = latitude * -1
# referenced from https://gist.github.com/c060604/8a51f8999be12fc2be498e9ca56adc72
gps_ifd = {
_piexif.GPSIFD.GPSVersionID: (2, 0, 0, 0)
}
# skips it if it's empty
if latitude != 0 or longitude != 0:
gps_ifd.update({
_piexif.GPSIFD.GPSLatitudeRef: latitude_ref,
_piexif.GPSIFD.GPSLatitude: degToDmsRational(latitude),
_piexif.GPSIFD.GPSLongitudeRef: longitude_ref,
_piexif.GPSIFD.GPSLongitude: degToDmsRational(longitude)
})
if altitude != 0:
gps_ifd.update({
_piexif.GPSIFD.GPSAltitudeRef: 1,
_piexif.GPSIFD.GPSAltitude: change_to_rational(round(altitude))
})
gps_exif = {"GPS": gps_ifd}
exif_dict.update(gps_exif)
try:
_piexif.insert(_piexif.dump(exif_dict), str(file))
except Exception as e:
logger.debug("Couldn't insert geo exif!")
# local variable 'new_value' referenced before assignment means that one of the GPS values is incorrect
logger.debug(e)
# ============ END OF GPS STUFF ============
# Fixes ALL metadata, takes just file and dir and figures it out
def fix_metadata(file: Path):
# logger.info(file)
has_nice_date = False
try:
set_creation_date_from_exif(file)
has_nice_date = True
except (_piexif.InvalidImageDataError, ValueError, IOError) as e:
logger.debug(e)
logger.debug(f'No exif for {file}')
except IOError:
logger.debug('No creation date found in exif!')
try:
google_json_file = find_json_for_file(file)
try:
with open(google_json_file, 'r') as f:
google_json = _json.load(f)
date = get_date_str_from_json(google_json)
set_file_geo_data(file, google_json)
set_file_exif_date(file, date)
set_creation_date_from_str(file, date)
_os.remove(google_json_file)
has_nice_date = True
except:
raise FileNotFoundError(f"Couldn't find json for file: {file}")
return
except FileNotFoundError as e:
logger.debug(e)
if has_nice_date:
return True
logger.debug(f'Last option, copying folder meta as date for {file}')
date = get_date_from_folder_meta(file.parent)
if date is not None:
set_file_exif_date(file, date)
set_creation_date_from_str(file, date)
nonlocal s_date_from_folder_files
s_date_from_folder_files.append(str(file.resolve()))
return True
else:
logger.warning(f'There was literally no option to set date on {file}')
nonlocal s_no_date_at_all
s_no_date_at_all.append(str(file.resolve()))
return False
# PART 2: Copy all photos and videos to target folder
# Makes a new name like 'photo(1).jpg'
def new_name_if_exists(file: Path):
new_name = file
i = 1
while True:
if not new_name.is_file():
return new_name
else:
new_name = file.with_name(f"{file.stem}({i}){file.suffix}")
rename_map[str(file)] = new_name
i += 1
# xD python lambdas are shit - this is only because we can't do 2 commands, so we do them in arguments
def _walk_with_tqdm(res, bar: _tqdm):
bar.update()
return res
# Count *all* photo and video files - this is hacky, and we should use .rglob altogether instead of is_photo
logger.info("Counting how many input files we have ahead...")
_input_files_count = 0
for ext in _tqdm(photo_formats + video_formats, unit='formats'):
_input_files_count += len(list(PHOTOS_DIR.rglob(f'**/*{ext}')))
logger.info(f'Input files: {_input_files_count}')
logger.info('=====================')
logger.info('Fixing files metadata and creation dates...')
# tqdm progress bar stuff
_metadata_bar = _tqdm(total=_input_files_count, unit='files')
for_all_files_recursive(
dir=PHOTOS_DIR,
file_function=lambda f: _walk_with_tqdm(fix_metadata(f), _metadata_bar),
# TODO (probably never, but should): Change this maybe to path.rglob
filter_fun=lambda f: (is_photo(f) or is_video(f))
)
_metadata_bar.close()
logger.info('=====================')
logger.info('=====================')
_copy_bar = _tqdm(total=_input_files_count, unit='files')
_copy_bar.close()
logger.info('=====================')
logger.info('=====================')
# logger.info('Finding duplicates...')
# find_duplicates(FIXED_DIR, lambda f: (is_photo(f) or is_video(f)))
# logger.info('Removing duplicates...')
# remove_duplicates()
# logger.info('=====================')
# if args.albums is not None:
# if args.albums.lower() == 'json':
# logger.info('=====================')
# logger.info('Populate json file with albums...')
# logger.info('=====================')
# for_all_files_recursive(
# dir=PHOTOS_DIR,
# folder_function=populate_album_map
# )
# file = PHOTOS_DIR / 'albums.json'
# with open(file, 'w', encoding="utf-8") as outfile:
# _json.dump(album_mmap, outfile)
# logger.info(str(file))
logger.info('')
logger.info('DONE! FREEEEEDOOOOM!!!')
logger.info('')
logger.info("Final statistics:")
logger.info(f"Files copied to target folder: {s_copied_files}")
logger.info(f"Removed duplicates: {s_removed_duplicates_count}")
logger.info(f"Files for which we couldn't find json: {len(s_no_json_found)}")
if len(s_no_json_found) > 0:
with open(PHOTOS_DIR / 'no_json_found.txt', 'w', encoding="utf-8") as f:
f.write("# This file contains list of files for which there was no corresponding .json file found\n")
f.write("# You might find it useful, but you can safely delete this :)\n")
f.write("\n".join(s_no_json_found))
logger.info(f" - you have full list in {f.name}")
logger.info(f"Files where inserting new exif failed: {len(s_cant_insert_exif_files)}")
if len(s_cant_insert_exif_files) > 0:
logger.info("(This is not necessary bad thing - pretty much all videos fail, "
"and your photos probably have their original exif already")
with open(PHOTOS_DIR / 'failed_inserting_exif.txt', 'w', encoding="utf-8") as f:
f.write("# This file contains list of files where setting right exif date failed\n")
f.write("# You might find it useful, but you can safely delete this :)\n")
f.write("\n".join(s_cant_insert_exif_files))
logger.info(f" - you have full list in {f.name}")
logger.info(f"Files where date was set from name of the folder: {len(s_date_from_folder_files)}")
if len(s_date_from_folder_files) > 0:
with open(PHOTOS_DIR / 'date_from_folder_name.txt', 'w',encoding="utf-8") as f:
f.write("# This file contains list of files where date was set from name of the folder\n")
f.write("# You might find it useful, but you can safely delete this :)\n")
f.write("\n".join(s_date_from_folder_files))
logger.info(f" - you have full list in {f.name}")
if args.skip_extras or args.skip_extras_harder:
# Remove duplicates: https://www.w3schools.com/python/python_howto_remove_duplicates.asp
s_skipped_extra_files = list(dict.fromkeys(s_skipped_extra_files))
logger.info(f"Extra files that were skipped: {len(s_skipped_extra_files)}")
with open(PHOTOS_DIR / 'skipped_extra_files.txt', 'w', encoding="utf-8") as f:
f.write("# This file contains list of extra files (ending with '-edited' etc) which were skipped because "
"you've used either --skip-extras or --skip-extras-harder\n")
f.write("# You might find it useful, but you can safely delete this :)\n")
f.write("\n".join(s_skipped_extra_files))
logger.info(f" - you have full list in {f.name}")
if len(s_no_date_at_all) > 0:
logger.info('')
logger.info(f"!!! There were {len(s_no_date_at_all)} files where there was absolutely no way to set "
f"a correct date! They will probably appear at the top of the others, as their 'last modified' "
f"value is set to moment of downloading your takeout :/")
with open(PHOTOS_DIR / 'unsorted.txt', 'w', encoding="utf-8") as f:
f.write("# This file contains list of files where there was no way to set correct date!\n")
f.write("# You probably want to set their dates manually - but you can delete this if you want\n")
f.write("\n".join(s_no_date_at_all))
logger.info(f" - you have full list in {f.name}")
logger.info('')
logger.info('Sooo... what now? You can see README.md for what nice G Photos alternatives I found and recommend')
logger.info('')
logger.info('If I helped you, you can consider donating me: https://www.paypal.me/TheLastGimbus')
logger.info('Have a nice day!')
if __name__ == '__main__':
main()