DeepFake_Detection_ScatterNet/originalff.py at master · Sharvil97/DeepFake_Detection_ScatterNet · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/usr/bin/env python
""" Downloads FaceForensics public data release
Example usage:
    # source-to-target
    python download.py -d \<'compressed' or 'raw'> \<output folder>
    # self-reenactment
    python download.py -d \<'selfreenactment_compressed' or 'selfreenactment_raw'> \<output folder>
    # cropped self-reenactment images
    python download.py -d selfreenactment_images' \<output folder>
    # only original videos
    python download.py -d original_videos \<output filename>
"""
# -*- coding: utf-8 -*-
import argparse
import os
import urllib
import urllib.request
import tempfile
from os.path import join


SERVER_URL = 'http://kaldir.vc.in.tum.de/FaceForensics/'
TOS_URL = SERVER_URL + 'webpage/FaceForensics_TOS.pdf'
BASE_URL = SERVER_URL + 'v1_cargo/'
ORIGINAL_VIDEOS_URL = BASE_URL + 'original_videos.tar.gz'
RELEASE_DATASET_SIZE = {'raw': '~3.5TB',
                        'compressed': '~130GB',
                        'images': '~135GB'}
DATASET_TYPES = ["raw", "compressed", "selfreenactment_raw",
                 "selfreenactment_compressed", "original_videos",
                 "selfreenactment_images", "source_to_target_images"]
NUM_SAMPLES=5


def get_filelist(filelist_url):
    lines = urllib.request.urlopen(filelist_url)
    video_filenames = []
    for line in lines:
        line = line.decode('utf-8')
        video_filename = line.rstrip('\n')
        video_filenames.append(video_filename)
    return video_filenames


def download_files(filenames, base_url, output_path, sample_only=False):
    os.makedirs(output_path, exist_ok=True)
    num_filenames=len(filenames) if not sample_only else NUM_SAMPLES
    for i, filename in enumerate(filenames):
        if i % 10 == 0:
            print("{}/{}".format(i, num_filenames))
        download_file(base_url + filename, join(output_path, filename))
        if sample_only and i != 0 and i % (NUM_SAMPLES - 1) == 0:
            break
    print("{}/{}".format(num_filenames, num_filenames))


def download_file(url, out_file):
    out_dir = os.path.dirname(out_file)
    if not os.path.isfile(out_file):
        fh, out_file_tmp = tempfile.mkstemp(dir=out_dir)
        f = os.fdopen(fh, 'w')
        f.close()
        urllib.request.urlretrieve(url, out_file_tmp)
        os.rename(out_file_tmp, out_file)
    else:
        print('WARNING: skipping download of existing file ' + out_file)


def main():
    parser = argparse.ArgumentParser(
        description='Downloads FaceForensics public data release.')
    parser.add_argument('output_path', help='directory in which to download')
    parser.add_argument('-d', '--dataset_type', default='compressed',
                        help='Enter which dataset you want to download: '\
                             '"raw", "compressed", "selfreenactment_raw", ' \
                             '"selfreenactment_compressed", ' \
                             '"original_videos" ' \
                             '"source_to_target_images" or ' \
                             '"selfreenactment_images".')
    parser.add_argument('--not_altered', action='store_true',
        help="don't download face2face altered videos")
    parser.add_argument('--not_original', action='store_true',
        help="don't download original videos")
    parser.add_argument('--not_mask', action='store_true',
        help="don't download face2face mask videos")
    parser.add_argument('--not_test', action='store_true',
        help="don't download videos of the test set")
    parser.add_argument('--not_train', action='store_true',
        help="don't download videos of the training set")
    parser.add_argument('--not_val', action='store_true',
        help="don't download videos of the validation set")
    parser.add_argument('--sample_only', action='store_true',
        help='activate this, if you only want to download 5 files per '
             'subfolder')
    args = parser.parse_args()

    # Check for dataset type
    if args.dataset_type not in DATASET_TYPES:
        raise Exception('Wrong dataset type. Please consult "-h" for possible'
                        'options.')

    # TOS
    print('By pressing any key to continue you confirm that you have agreed '\
          'to the FaceForensics terms of use as described at:')
    print(TOS_URL)
    print('***')
    print('Press any key to continue, or CTRL-C to exit.')
    key = input('')

    # Check which videos to download
    downloaded_video_types = []
    if not args.not_altered: downloaded_video_types.append('altered')
    if not args.not_original: downloaded_video_types.append('original')
    if not 'images' in args.dataset_type:
        if not args.not_mask: downloaded_video_types.append('mask')

    # Check which folders to download
    downloaded_folders = []
    if not args.not_test: downloaded_folders.append('test')
    if not args.not_train: downloaded_folders.append('train')
    if not args.not_val: downloaded_folders.append('val')

    # Check for dataset type
    if 'selfreenactment' in args.dataset_type:
        dataset = 'selfreenactment'
        dataset_type = args.dataset_type.replace('selfreenactment_', '')
    else:
        dataset = 'source_to_target'
        dataset_type = args.dataset_type.replace('source_to_target_', '')

    # Warning
    if not args.dataset_type == 'original_videos':
        dataset_filesize = RELEASE_DATASET_SIZE[dataset_type]
        print('***')
        if not args.sample_only:
            print('WARNING: You are downloading the FaceForensics dataset {} of'
                  ' size {}'.format(args.dataset_type, dataset_filesize))
        print(
            'Note that existing scan directories will be skipped. Delete ' \
            'partially downloaded directories to re-download.')
        print('***')
        print('Press any key to continue, or CTRL-C to exit.')
        key = input('')

    # Download
    print('\nDownloading dataset: {}'.format(args.dataset_type))
    if args.dataset_type == 'original_videos':
        print('Please be patient, this may take a while (~2gb)')
        download_file(ORIGINAL_VIDEOS_URL,
                      out_file=join(args.output_path,
                                    'faceforensics_original_videos.tar.gz'))
    else:
        for folder in downloaded_folders:
            if 'images' in args.dataset_type:
                filelist_folder = 'images_' + folder
            else:
                filelist_folder = folder
            filelist_url = BASE_URL + '{}/filelists/{}.txt'.format(dataset,
                                                                filelist_folder)
            filenames = get_filelist(filelist_url)
            print('\nDownloading {}'.format(folder))
            for video_type in downloaded_video_types:
                output_path = join(args.output_path,
                                   'FaceForensics_{}'.format(args.dataset_type),
                                   folder, video_type)
                print('{}/{} > {}'.format(folder, video_type, output_path))
                base_url = BASE_URL + '{}/{}/{}/{}/'.format(dataset,
                                                            dataset_type,
                                                            folder,video_type)
                download_files(filenames, base_url, output_path=output_path,
                               sample_only=args.sample_only)


if __name__ == "__main__":
    main()