Source code for tonic.prototype.datasets.utils._utils
import hashlib
from pathlib import Path
from typing import Optional, Union
[docs]def check_sha256(
fpath: Union[str, Path],
sha256_provided: str,
chunk_size: Optional[int] = 1024 * 1024,
) -> str:
"""Function that checks the SHA256 of the archive/dataset. In torchvision they strongly
recommend to switch to SHA256 from MD5. This function is inspired by
torchvision.prototype.datasets.utils._resource.
Parameters:
fpath: path to the archive/dataset.
sha256_provided: the SHA256 sum to be checked.
chunk_size: the file is binary read in chunks to not load it fully to memory. This is the size of each chunk.
"""
sha256_computed = hashlib.sha256()
with open(fpath, "rb") as fp:
chunk_reader = lambda: fp.read(chunk_size)
for chunk in iter(chunk_reader, b""):
sha256_computed.update(chunk)
# Converting to hex format for comparison.
sha256_computed = sha256_computed.hexdigest()
if sha256_computed != sha256_provided:
raise RuntimeError(
f"The SHA256 provided does not match the actual one. \nComputed: {sha256_computed}.\nProvided: {sha256_provided}."
)
return sha256_computed