Source code for fury.data.fetcher

"""Fetcher based on dipy."""

import os
import sys
import contextlib

from os.path import join as pjoin
from hashlib import sha256
from shutil import copyfileobj

import tarfile
import zipfile

from urllib.request import urlopen

# Set a user-writeable file-system location to put files:
if 'FURY_HOME' in os.environ:
    fury_home = os.environ['FURY_HOME']
else:
    fury_home = pjoin(os.path.expanduser('~'), '.fury')

# The URL to the University of Washington Researchworks repository:
UW_RW_URL = \
    "https://digital.lib.washington.edu/researchworks/bitstream/handle/"

FURY_DATA_URL = \
    "https://raw.githubusercontent.com/fury-gl/fury-data/master/examples/"

MODEL_DATA_URL = \
    "https://raw.githubusercontent.com/fury-gl/fury-data/master/models/"


class FetcherError(Exception):
    pass


[docs]def update_progressbar(progress, total_length): """Show progressbar. Takes a number between 0 and 1 to indicate progress from 0 to 100%. """ # Try to set the bar_length according to the console size try: columns = os.popen('tput cols', 'r').read() bar_length = int(columns) - 46 if bar_length < 1: bar_length = 20 except Exception: # Default value if determination of console size fails bar_length = 20 block = int(round(bar_length * progress)) size_string = "{0:.2f} MB".format(float(total_length) / (1024 * 1024)) text = "\rDownload Progress: [{0}] {1:.2f}% of {2}".format( "#" * block + "-" * (bar_length - block), progress * 100, size_string) sys.stdout.write(text) sys.stdout.flush()
[docs]def copyfileobj_withprogress(fsrc, fdst, total_length, length=16 * 1024): copied = 0 while True: buf = fsrc.read(length) if not buf: break fdst.write(buf) copied += len(buf) progress = float(copied) / float(total_length) update_progressbar(progress, total_length)
def _already_there_msg(folder): """Print a message indicating that dataset is already in place.""" msg = 'Dataset is already in place. If you want to fetch it again ' msg += 'please first remove the folder %s ' % folder print(msg) def _get_file_sha(filename): """Generate SHA checksum for the entire file in blocks of 256. Parameters ---------- filename : str The path to the file whose sha checksum is to be generated Returns ------- sha256_data : str The computed sha hash from the input file """ sha256_data = sha256() with open(filename, 'rb') as f: for chunk in iter(lambda: f.read(256*sha256_data.block_size), b''): sha256_data.update(chunk) return sha256_data.hexdigest()
[docs]def check_sha(filename, stored_sha256=None): """Check the generated sha checksum. Parameters ---------- filename : str The path to the file whose checksum is to be compared stored_sha256 : str, optional Used to verify the generated SHA checksum. Default: None, checking is skipped """ if stored_sha256 is not None: computed_sha256 = _get_file_sha(filename) if stored_sha256.lower() != computed_sha256: msg = """The downloaded file, %s, does not have the expected sha checksum of "%s". Instead, the sha checksum was: "%s". This could mean that something is wrong with the file or that the upstream file has been updated. You can try downloading the file again or updating to the newest version of Fury.""" % (filename, stored_sha256, computed_sha256) raise FetcherError(msg)
def _get_file_data(fname, url): with contextlib.closing(urlopen(url)) as opener: try: response_size = opener.headers['content-length'] except KeyError: response_size = None with open(fname, 'wb') as data: if response_size is None: copyfileobj(opener, data) else: copyfileobj_withprogress(opener, data, response_size)
[docs]def fetch_data(files, folder, data_size=None): """Download files to folder and checks their sha checksums. Parameters ---------- files : dictionary For each file in `files` the value should be (url, sha). The file will be downloaded from url if the file does not already exist or if the file exists but the sha checksum does not match. folder : str The directory where to save the file, the directory will be created if it does not already exist. data_size : str, optional A string describing the size of the data (e.g. "91 MB") to be logged to the screen. Default does not produce any information about data size. Raises ------ FetcherError Raises if the sha checksum of the file does not match the expected value. The downloaded file is not deleted when this error is raised. """ if not os.path.exists(folder): print("Creating new folder %s" % (folder)) os.makedirs(folder) if data_size is not None: print('Data size is approximately %s' % data_size) all_skip = True for f in files: url, sha = files[f] fullpath = pjoin(folder, f) if os.path.exists(fullpath) and (_get_file_sha(fullpath) == sha.lower()): continue all_skip = False print('Downloading "%s" to %s' % (f, folder)) _get_file_data(fullpath, url) check_sha(fullpath, sha) if all_skip: _already_there_msg(folder) else: print("Files successfully downloaded to %s" % (folder))
def _make_fetcher(name, folder, baseurl, remote_fnames, local_fnames, sha_list=None, doc="", data_size=None, msg=None, unzip=False): """Create a new fetcher. Parameters ---------- name : str The name of the fetcher function. folder : str The full path to the folder in which the files would be placed locally. Typically, this is something like 'pjoin(fury_home, 'foo')' baseurl : str The URL from which this fetcher reads files remote_fnames : list of strings The names of the files in the baseurl location local_fnames : list of strings The names of the files to be saved on the local filesystem sha_list : list of strings, optional The sha checksums of the files. Used to verify the content of the files. Default: None, skipping checking sha. doc : str, optional. Documentation of the fetcher. data_size : str, optional. If provided, is sent as a message to the user before downloading starts. msg : str, optional. A message to print to screen when fetching takes place. Default (None) is to print nothing unzip : bool, optional Whether to unzip the file(s) after downloading them. Supports zip, gz, and tar.gz files. Returns ------- fetcher : function A function that, when called, fetches data according to the designated inputs """ def fetcher(): files = {} for i, (f, n), in enumerate(zip(remote_fnames, local_fnames)): files[n] = (baseurl + f, sha_list[i] if sha_list is not None else None) fetch_data(files, folder, data_size) if msg is not None: print(msg) if unzip: for f in local_fnames: split_ext = os.path.splitext(f) if split_ext[-1] == '.gz' or split_ext[-1] == '.bz2': if os.path.splitext(split_ext[0])[-1] == '.tar': ar = tarfile.open(pjoin(folder, f)) ar.extractall(path=folder) ar.close() else: raise ValueError('File extension is not recognized') elif split_ext[-1] == '.zip': z = zipfile.ZipFile(pjoin(folder, f), 'r') z.extractall(folder) z.close() else: raise ValueError('File extension is not recognized') return files, folder fetcher.__name__ = name fetcher.__doc__ = doc return fetcher fetch_viz_icons = _make_fetcher( "fetch_viz_icons", pjoin(fury_home, "icons"), UW_RW_URL + "1773/38478/", ['icomoon.tar.gz'], ['icomoon.tar.gz'], ['BC1FEEA6F58BA3601D6A0B029EB8DFC5F352E21F2A16BA41099A96AA3F5A4735'], data_size="12KB", doc="Download icons for fury", unzip=True ) fetch_viz_wiki_nw = _make_fetcher( "fetch_viz_wiki_nw", pjoin(fury_home, "examples", "wiki_nw"), FURY_DATA_URL, ['wiki_categories.txt', 'wiki_edges.txt', 'wiki_positions.txt'], ['wiki_categories.txt', 'wiki_edges.txt', 'wiki_positions.txt'], ['1679241B13D2FD01209160F0C186E14AB55855478300B713D5369C12854CFF82', '702EE8713994243C8619A29C9ECE32F95305737F583B747C307500F3EC4A6B56', '044917A8FBD0EB980D93B6C406A577BEA416FA934E897C26C87E91C218EF4432'], doc="Download the following wiki information" "Interdisciplinary map of the journals", msg=("More information about complex " "networks can be found in this papers:" " https://arxiv.org/abs/0711.3199") ) fetch_viz_models = _make_fetcher( "fetch_viz_models", pjoin(fury_home, "models"), MODEL_DATA_URL, ['utah.obj', 'suzanne.obj'], ['utah.obj', 'suzanne.obj'], ['0B50F12CEDCDC27377AC702B1EE331223BECEC59593B3F00A9E06B57A9C1B7C3', 'BB4FF4E65D65D71D53000E06D2DC7BF89B702223657C1F64748811A3A6C8D621'], doc=" Download the models for shader tutorial" )
[docs]def read_viz_icons(style='icomoon', fname='infinity.png'): """Read specific icon from specific style. Parameters ---------- style : str Current icon style. Default is icomoon. fname : str Filename of icon. This should be found in folder HOME/.fury/style/. Default is infinity.png. Returns -------- path : str Complete path of icon. """ folder = pjoin(fury_home, 'icons', style) return pjoin(folder, fname)
[docs]def read_viz_models(fname): """Read specific model. Parameters ---------- fname : str Filename of the model. This should be found in folder HOME/.fury/models/. Returns -------- path : str Complete path of models. """ folder = pjoin(fury_home, 'models') return pjoin(folder, fname)