|
| 1 | +""" |
| 2 | +<Program Name> |
| 3 | + hash.py |
| 4 | +
|
| 5 | +<Author> |
| 6 | + |
| 7 | +
|
| 8 | +<Started> |
| 9 | + February 28, 2012. Based on a previous version of this module. |
| 10 | +
|
| 11 | +<Copyright> |
| 12 | + See LICENSE for licensing information. |
| 13 | +
|
| 14 | +<Purpose> |
| 15 | + Support secure hashing and message digests. Any hash-related routines that |
| 16 | + securesystemslib requires should be located in this module. Simplifying the |
| 17 | + creation of digest objects, and providing a central location for hash |
| 18 | + routines are the main goals of this module. Support routines implemented |
| 19 | + include functions to create digest objects given a filename or file object. |
| 20 | + Only the standard hashlib library is currently supported, but |
| 21 | + pyca/cryptography support will be added in the future. |
| 22 | +""" |
| 23 | + |
| 24 | +import hashlib |
| 25 | + |
| 26 | +from securesystemslib import exceptions |
| 27 | +from securesystemslib.storage import FilesystemBackend |
| 28 | + |
| 29 | +DEFAULT_CHUNK_SIZE = 4096 |
| 30 | +DEFAULT_HASH_ALGORITHM = "sha256" |
| 31 | +DEFAULT_HASH_LIBRARY = "hashlib" |
| 32 | +SUPPORTED_LIBRARIES = ["hashlib"] |
| 33 | + |
| 34 | + |
| 35 | +# If `pyca_crypto` is installed, add it to supported libraries |
| 36 | +try: |
| 37 | + import binascii |
| 38 | + |
| 39 | + from cryptography.hazmat.backends import default_backend |
| 40 | + from cryptography.hazmat.primitives import hashes as _pyca_hashes |
| 41 | + |
| 42 | + # Dictionary of `pyca/cryptography` supported hash algorithms. |
| 43 | + PYCA_DIGEST_OBJECTS_CACHE = { |
| 44 | + "sha224": _pyca_hashes.SHA224, |
| 45 | + "sha256": _pyca_hashes.SHA256, |
| 46 | + "sha384": _pyca_hashes.SHA384, |
| 47 | + "sha512": _pyca_hashes.SHA512, |
| 48 | + } |
| 49 | + |
| 50 | + SUPPORTED_LIBRARIES.append("pyca_crypto") |
| 51 | + |
| 52 | + class PycaDiggestWrapper: |
| 53 | + """ |
| 54 | + <Purpose> |
| 55 | + A wrapper around `cryptography.hazmat.primitives.hashes.Hash` which adds |
| 56 | + additional methods to meet expected interface for digest objects: |
| 57 | +
|
| 58 | + digest_object.digest_size |
| 59 | + digest_object.hexdigest() |
| 60 | + digest_object.update('data') |
| 61 | + digest_object.digest() |
| 62 | +
|
| 63 | + <Properties> |
| 64 | + algorithm: |
| 65 | + Specific for `cryptography.hazmat.primitives.hashes.Hash` object. |
| 66 | +
|
| 67 | + digest_size: |
| 68 | + Returns original's object digest size. |
| 69 | +
|
| 70 | + <Methods> |
| 71 | + digest(self) -> bytes: |
| 72 | + Calls original's object `finalize` method and returns digest as bytes. |
| 73 | + NOTE: `cryptography.hazmat.primitives.hashes.Hash` allows calling |
| 74 | + `finalize` method just once on the same instance, so everytime `digest` |
| 75 | + methods is called, we replace internal object (`_digest_obj`). |
| 76 | +
|
| 77 | + hexdigest(self) -> str: |
| 78 | + Returns a string hex representation of digest. |
| 79 | +
|
| 80 | + update(self, data) -> None: |
| 81 | + Updates digest object data by calling the original's object `update` |
| 82 | + method. |
| 83 | + """ |
| 84 | + |
| 85 | + def __init__(self, digest_obj): |
| 86 | + self._digest_obj = digest_obj |
| 87 | + |
| 88 | + @property |
| 89 | + def algorithm(self): |
| 90 | + return self._digest_obj.algorithm |
| 91 | + |
| 92 | + @property |
| 93 | + def digest_size(self): |
| 94 | + return self._digest_obj.algorithm.digest_size |
| 95 | + |
| 96 | + def digest(self): |
| 97 | + digest_obj_copy = self._digest_obj.copy() |
| 98 | + digest = self._digest_obj.finalize() |
| 99 | + self._digest_obj = digest_obj_copy |
| 100 | + return digest |
| 101 | + |
| 102 | + def hexdigest(self): |
| 103 | + return binascii.hexlify(self.digest()).decode("utf-8") |
| 104 | + |
| 105 | + def update(self, data): |
| 106 | + self._digest_obj.update(data) |
| 107 | + |
| 108 | +except ImportError: # pragma: no cover |
| 109 | + pass |
| 110 | + |
| 111 | + |
| 112 | +def digest(algorithm=DEFAULT_HASH_ALGORITHM, hash_library=DEFAULT_HASH_LIBRARY): |
| 113 | + """ |
| 114 | + <Purpose> |
| 115 | + Provide the caller with the ability to create digest objects without having |
| 116 | + to worry about crypto library availability or which library to use. The |
| 117 | + caller also has the option of specifying which hash algorithm and/or |
| 118 | + library to use. |
| 119 | +
|
| 120 | + # Creation of a digest object using defaults or by specifying hash |
| 121 | + # algorithm and library. |
| 122 | + digest_object = securesystemslib.hash.digest() |
| 123 | + digest_object = securesystemslib.hash.digest('sha384') |
| 124 | + digest_object = securesystemslib.hash.digest('sha256', 'hashlib') |
| 125 | +
|
| 126 | + # The expected interface for digest objects. |
| 127 | + digest_object.digest_size |
| 128 | + digest_object.hexdigest() |
| 129 | + digest_object.update('data') |
| 130 | + digest_object.digest() |
| 131 | +
|
| 132 | + # Added hash routines by this module. |
| 133 | + digest_object = securesystemslib.hash.digest_fileobject(file_object) |
| 134 | + digest_object = securesystemslib.hash.digest_filename(filename) |
| 135 | +
|
| 136 | + <Arguments> |
| 137 | + algorithm: |
| 138 | + The hash algorithm (e.g., 'sha256', 'sha512'). |
| 139 | +
|
| 140 | + hash_library: |
| 141 | + The crypto library to use for the given hash algorithm (e.g., 'hashlib'). |
| 142 | +
|
| 143 | + <Exceptions> |
| 144 | + securesystemslib.exceptions.UnsupportedAlgorithmError, if an unsupported |
| 145 | + hashing algorithm is specified, or digest could not be generated with given |
| 146 | + the algorithm. |
| 147 | +
|
| 148 | + securesystemslib.exceptions.UnsupportedLibraryError, if an unsupported |
| 149 | + library was requested via 'hash_library'. |
| 150 | +
|
| 151 | + <Side Effects> |
| 152 | + None. |
| 153 | +
|
| 154 | + <Returns> |
| 155 | + Digest object |
| 156 | +
|
| 157 | + e.g. |
| 158 | + hashlib.new(algorithm) or |
| 159 | + PycaDiggestWrapper object |
| 160 | + """ |
| 161 | + |
| 162 | + # Was a hashlib digest object requested and is it supported? |
| 163 | + # If so, return the digest object. |
| 164 | + if hash_library == "hashlib" and hash_library in SUPPORTED_LIBRARIES: |
| 165 | + try: |
| 166 | + if algorithm == "blake2b-256": |
| 167 | + return hashlib.new("blake2b", digest_size=32) |
| 168 | + else: |
| 169 | + return hashlib.new(algorithm) |
| 170 | + |
| 171 | + except (ValueError, TypeError): |
| 172 | + # ValueError: the algorithm value was unknown |
| 173 | + # TypeError: unexpected argument digest_size (on old python) |
| 174 | + raise exceptions.UnsupportedAlgorithmError(algorithm) |
| 175 | + |
| 176 | + # Was a pyca_crypto digest object requested and is it supported? |
| 177 | + elif hash_library == "pyca_crypto" and hash_library in SUPPORTED_LIBRARIES: |
| 178 | + try: |
| 179 | + hash_algorithm = PYCA_DIGEST_OBJECTS_CACHE[algorithm]() |
| 180 | + return PycaDiggestWrapper( |
| 181 | + _pyca_hashes.Hash(hash_algorithm, default_backend()) |
| 182 | + ) |
| 183 | + |
| 184 | + except KeyError: |
| 185 | + raise exceptions.UnsupportedAlgorithmError(algorithm) |
| 186 | + |
| 187 | + # The requested hash library is not supported. |
| 188 | + else: |
| 189 | + raise exceptions.UnsupportedLibraryError( |
| 190 | + "Unsupported" |
| 191 | + " library requested. Supported hash" |
| 192 | + " libraries: " + repr(SUPPORTED_LIBRARIES) |
| 193 | + ) |
| 194 | + |
| 195 | + |
| 196 | +def digest_fileobject( |
| 197 | + file_object, |
| 198 | + algorithm=DEFAULT_HASH_ALGORITHM, |
| 199 | + hash_library=DEFAULT_HASH_LIBRARY, |
| 200 | + normalize_line_endings=False, |
| 201 | +): |
| 202 | + """ |
| 203 | + <Purpose> |
| 204 | + Generate a digest object given a file object. The new digest object |
| 205 | + is updated with the contents of 'file_object' prior to returning the |
| 206 | + object to the caller. |
| 207 | +
|
| 208 | + <Arguments> |
| 209 | + file_object: |
| 210 | + File object whose contents will be used as the data |
| 211 | + to update the hash of a digest object to be returned. |
| 212 | +
|
| 213 | + algorithm: |
| 214 | + The hash algorithm (e.g., 'sha256', 'sha512'). |
| 215 | +
|
| 216 | + hash_library: |
| 217 | + The library providing the hash algorithms (e.g., 'hashlib'). |
| 218 | +
|
| 219 | + normalize_line_endings: (default False) |
| 220 | + Whether or not to normalize line endings for cross-platform support. |
| 221 | + Note that this results in ambiguous hashes (e.g. 'abc\n' and 'abc\r\n' |
| 222 | + will produce the same hash), so be careful to only apply this to text |
| 223 | + files (not binary), when that equivalence is desirable and cannot result |
| 224 | + in easily-maliciously-corrupted files producing the same hash as a valid |
| 225 | + file. |
| 226 | +
|
| 227 | + <Exceptions> |
| 228 | + securesystemslib.exceptions.FormatError, if the arguments are |
| 229 | + improperly formatted. |
| 230 | +
|
| 231 | + securesystemslib.exceptions.UnsupportedAlgorithmError, if an unsupported |
| 232 | + hashing algorithm was specified via 'algorithm'. |
| 233 | +
|
| 234 | + securesystemslib.exceptions.UnsupportedLibraryError, if an unsupported |
| 235 | + crypto library was specified via 'hash_library'. |
| 236 | +
|
| 237 | + <Side Effects> |
| 238 | + None. |
| 239 | +
|
| 240 | + <Returns> |
| 241 | + Digest object |
| 242 | +
|
| 243 | + e.g. |
| 244 | + hashlib.new(algorithm) or |
| 245 | + PycaDiggestWrapper object |
| 246 | + """ |
| 247 | + # Digest object returned whose hash will be updated using 'file_object'. |
| 248 | + # digest() raises: |
| 249 | + # securesystemslib.exceptions.UnsupportedAlgorithmError |
| 250 | + # securesystemslib.exceptions.UnsupportedLibraryError |
| 251 | + digest_object = digest(algorithm, hash_library) |
| 252 | + |
| 253 | + # Defensively seek to beginning, as there's no case where we don't |
| 254 | + # intend to start from the beginning of the file. |
| 255 | + file_object.seek(0) |
| 256 | + |
| 257 | + # Read the contents of the file object in at most 4096-byte chunks. |
| 258 | + # Update the hash with the data read from each chunk and return after |
| 259 | + # the entire file is processed. |
| 260 | + while True: |
| 261 | + data = file_object.read(DEFAULT_CHUNK_SIZE) |
| 262 | + if not data: |
| 263 | + break |
| 264 | + |
| 265 | + if normalize_line_endings: |
| 266 | + while data[-1:] == b"\r": |
| 267 | + c = file_object.read(1) |
| 268 | + if not c: |
| 269 | + break |
| 270 | + |
| 271 | + data += c |
| 272 | + |
| 273 | + data = ( |
| 274 | + data |
| 275 | + # First Windows |
| 276 | + .replace(b"\r\n", b"\n") |
| 277 | + # Then Mac |
| 278 | + .replace(b"\r", b"\n") |
| 279 | + ) |
| 280 | + |
| 281 | + if not isinstance(data, bytes): |
| 282 | + digest_object.update(data.encode("utf-8")) |
| 283 | + |
| 284 | + else: |
| 285 | + digest_object.update(data) |
| 286 | + |
| 287 | + return digest_object |
| 288 | + |
| 289 | + |
| 290 | +def digest_filename( |
| 291 | + filename, |
| 292 | + algorithm=DEFAULT_HASH_ALGORITHM, |
| 293 | + hash_library=DEFAULT_HASH_LIBRARY, |
| 294 | + normalize_line_endings=False, |
| 295 | + storage_backend=None, |
| 296 | +): |
| 297 | + """ |
| 298 | + <Purpose> |
| 299 | + Generate a digest object, update its hash using a file object |
| 300 | + specified by filename, and then return it to the caller. |
| 301 | +
|
| 302 | + <Arguments> |
| 303 | + filename: |
| 304 | + The filename belonging to the file object to be used. |
| 305 | +
|
| 306 | + algorithm: |
| 307 | + The hash algorithm (e.g., 'sha256', 'sha512'). |
| 308 | +
|
| 309 | + hash_library: |
| 310 | + The library providing the hash algorithms (e.g., 'hashlib'). |
| 311 | +
|
| 312 | + normalize_line_endings: |
| 313 | + Whether or not to normalize line endings for cross-platform support. |
| 314 | +
|
| 315 | + storage_backend: |
| 316 | + An object which implements |
| 317 | + securesystemslib.storage.StorageBackendInterface. When no object is |
| 318 | + passed a FilesystemBackend will be instantiated and used. |
| 319 | +
|
| 320 | + <Exceptions> |
| 321 | + securesystemslib.exceptions.UnsupportedAlgorithmError, if the given |
| 322 | + 'algorithm' is unsupported. |
| 323 | +
|
| 324 | + securesystemslib.exceptions.UnsupportedLibraryError, if the given |
| 325 | + 'hash_library' is unsupported. |
| 326 | +
|
| 327 | + securesystemslib.exceptions.StorageError, if the file cannot be opened. |
| 328 | +
|
| 329 | + <Side Effects> |
| 330 | + None. |
| 331 | +
|
| 332 | + <Returns> |
| 333 | + Digest object |
| 334 | +
|
| 335 | + e.g. |
| 336 | + hashlib.new(algorithm) or |
| 337 | + PycaDiggestWrapper object |
| 338 | + """ |
| 339 | + digest_object = None |
| 340 | + |
| 341 | + if storage_backend is None: |
| 342 | + storage_backend = FilesystemBackend() |
| 343 | + |
| 344 | + # Open 'filename' in read+binary mode. |
| 345 | + with storage_backend.get(filename) as file_object: |
| 346 | + # Create digest_object and update its hash data from file_object. |
| 347 | + # digest_fileobject() raises: |
| 348 | + # securesystemslib.exceptions.UnsupportedAlgorithmError |
| 349 | + # securesystemslib.exceptions.UnsupportedLibraryError |
| 350 | + digest_object = digest_fileobject( |
| 351 | + file_object, algorithm, hash_library, normalize_line_endings |
| 352 | + ) |
| 353 | + |
| 354 | + return digest_object |
0 commit comments