Source code for datopy.workflow

"""
Tools for data I/O (saving figures and Jupyter runtime environment files,
manually downloading modules) and other data workflows.
"""

import os
import doctest
import requests
import importlib
import urllib.request
# from collections.abc import Callable
from typing import Dict, List, Any, Callable

### Save figs

### Save drive

### Download Github modules



[docs]
def git_module_loader(modules: Dict[str, List[str]],
                      save_dir: str = f"{os.path.dirname(os.path.abspath(__file__))}",
                      run_tests: bool = False,
                      run_download: bool = False) -> None:
    """
    Securely downloads collections of modules directly from their Git repo and stores in the current directory.

    Parameters
    ----------
    modules : Dict[str, List[str]]
        Keys are relative branch paths '{git-user}/{repo-name}/{branch-name}'.
        Values are lists of module filenames relative to their parent branch.

    run_tests : bool, default=False
        Whether or not to run doctests for successful downloads.

    run_download : bool, default=False
        Additional safeguard to ensure no modules are accidentally downloaded.

    Examples
    --------
    >>> from datopy.workflow import git_module_loader

    >>> modules = {'gitusername/repo/branch': ['module1.py', 'module2.py']}
    >>> git_module_loader(modules, run_tests=True, run_download=True)
    Module gitusername/repo/branch/module1.py does not exist.
    Module gitusername/repo/branch/module2.py does not exist.

    >>> modules = {"HIPS/autograd/master":
    ...     ['autograd/tracer.py', 'autograd/util.py']}
    >>> git_module_loader(modules, run_tests=False, run_download=False)
    Skipping download.
    Skipping download.
    """

    for repo in modules:
        for module in modules[repo]:
            module_url = f"https://raw.githubusercontent.com/{repo}/{module}"
            exists = requests.head(
                module_url, allow_redirects=False).status_code == 200

            if not exists:
                print(f"Module {repo}/{module} does not exist.")
                continue

            filename = os.path.join(save_dir, os.path.basename(module))
            if os.path.isfile(filename):
                print(f"Module {repo}/{module} already downloaded.")
                continue

            if not run_download:
                print("Skipping download.")
                continue

            print(f"Downloading {repo}/{module}.")
            os.makedirs(save_dir, exist_ok=True)
            urllib.request.urlretrieve(url=module_url, filename=filename)

            if run_tests:
                print('Running tests:\n')
                module_name = module.split('/')[-1].split('.')[0]
                mod = importlib.import_module(module_name)
                doctest.testmod(mod, verbose=True)



### Efficient testing


[docs]
def doctest_function(object: Callable[..., Any], globs: dict[str, Any],
                     verbose=True) -> None:
    """
    Convenience wrapper to run doctests for a specific function or class.

    Parameters
    ----------
    object : Callable[..., Any]
        Class, function, or other object with doctests to be run.
    globs : dict[str, Any]
        Global variables from module of interest.

    See Also
    --------
    datopy.run_doctests.run_doctest_suite :
        Simultaneously run all doctests across modules.
    """
    print('-------------------------------------------------------')
    finder = doctest.DocTestFinder(verbose=verbose, recurse=False)
    runner = doctest.DocTestRunner(verbose=verbose)
    for test in finder.find(obj=object, globs=globs):
        results = runner.run(test)
    print('-------------------------------------------------------')
    print(results)




[docs]
def main():
    import doctest

    # Comment out (2) to run all tests in script; (1) to run specific tests
    doctest.testmod(verbose=True)

    # doctest_function(git_module_loader, globs=globals())


if __name__ == "__main__":
    main()