Source code for datopy.workflow

"""
Tools for data input/output.

Included:

- Saving figures and Jupyter runtime environment files
- Manually downloading modules
"""

import os
import sys
import doctest
import requests
import importlib
import urllib.request
from typing import Dict, List, Any, Callable

from datopy.util._numpydoc_validate import numpydoc_validate_module

# TODO: rename this io.py?

# TODO: save_figs

# TODO: save_drive


[docs] def git_module_loader( modules: Dict[str, List[str]], save_dir: str | None = None, run_tests: bool = False, run_download: bool = False ) -> None: """ Download collections of modules directly from their Git repo. Retrieved files are stored in the current directory. Parameters ---------- modules : Dict[str, List[str]] Keys are relative branch paths '{git-user}/{repo-name}/{branch-name}'. Values are lists of module filenames relative to their parent branch. run_tests : bool, default=False Whether or not to run doctests for successful downloads. run_download : bool, default=False Additional safeguard to ensure no modules are accidentally downloaded. Examples -------- >>> from datopy.workflow import git_module_loader >>> modules = {'gitusername/repo/branch': ['module1.py', 'module2.py']} >>> git_module_loader(modules, run_tests=True, run_download=True) Module gitusername/repo/branch/module1.py does not exist. Module gitusername/repo/branch/module2.py does not exist. >>> modules = {"HIPS/autograd/master": ... ['autograd/tracer.py', 'autograd/util.py']} >>> git_module_loader(modules, run_tests=False, run_download=False) Skipping download. Skipping download. """ if not save_dir: save_dir = f"{os.path.dirname(os.path.abspath(__file__))}" else: pass for repo in modules: for module in modules[repo]: module_url = f"https://raw.githubusercontent.com/{repo}/{module}" exists = requests.head( module_url, allow_redirects=False).status_code == 200 if not exists: print(f"Module {repo}/{module} does not exist.") continue filename = os.path.join(save_dir, os.path.basename(module)) if os.path.isfile(filename): print(f"Module {repo}/{module} already downloaded.") continue if not run_download: print("Skipping download.") continue print(f"Downloading {repo}/{module}.") os.makedirs(save_dir, exist_ok=True) urllib.request.urlretrieve(url=module_url, filename=filename) if run_tests: print('Running tests:\n') module_name = module.split('/')[-1].split('.')[0] mod = importlib.import_module(module_name) doctest.testmod(mod, verbose=True)
# -- Efficient testing -------------------------------------------------------
[docs] def doctest_function(object: Callable[..., Any], globs: dict[str, Any], verbose=True) -> None: """ Run doctests for a specific function or class. Parameters ---------- object : Callable[..., Any] Class, function, or other object with doctests to be run. globs : dict[str, Any] Global variables from module of interest. See Also -------- datopy.run_doctests.run_doctest_suite : Simultaneously run all doctests across modules. """ print('-------------------------------------------------------') finder = doctest.DocTestFinder(verbose=verbose, recurse=False) runner = doctest.DocTestRunner(verbose=verbose) for test in finder.find(obj=object, globs=globs): results = runner.run(test) print('-------------------------------------------------------') print(results)
def main(): import doctest # Comment out (2) to run all tests in script; (1) to run specific tests # doctest.testmod(verbose=True) # doctest_function(git_module_loader, globs=globals()) numpydoc_validate_module(sys.modules['__main__']) if __name__ == "__main__": main()