"""
Tools for visualizing matrix and dataframe operations.
Notes
-----
Adapted from [1]_ to support NumPy arrays, in addition to Pandas
data frames.
References
----------
.. [1] VanderPlas, J. (2016). Python data science handbook: Essential tools
for working with data. O'Reilly Media, Inc.
----
"""
import sys
import doctest
import numpy as np
import pandas as pd
from typing import Any, Iterable
from datopy.workflow import doctest_function
from datopy.util._numpydoc_validate import numpydoc_validate_module
[docs]
def make_df(cols: Iterable[Any], ind: Iterable[Any]) -> pd.DataFrame:
"""
Generate a data frame with a simple structure for conducting tests.
Parameters
----------
cols : Iterable[Any]
An iterable with items representing column names.
ind : Iterable[Any]
An iterable with items representing row names.
Returns
-------
pd.DataFrame
A pandas data frame of shape (len(ind), len(cols)).
Examples
--------
>>> from datopy.inspection import make_df
>>> import pandas as pd
>>> make_df("ABC", [1,2,3])
A B C
1 A1 B1 C1
2 A2 B2 C2
3 A3 B3 C3
>>> make_df([1,2,3], ("A", "B", "C"))
1 2 3
A 1A 2A 3A
B 1B 2B 3B
C 1C 2C 3C
"""
data = {c: [str(c) + str(i) for i in ind] for c in cols}
return pd.DataFrame(data=data, index=pd.Index(ind))
[docs]
def display(
*args,
globs: dict[str, Any] | None = None,
bold: bool = True
) -> None:
"""
Display an informative representation of multiple objects side-by-side.
Parameters
----------
*args : tuple
Tuple of expressions to evaluate and display.
globs : dict[str, Any], default=None
Global namespace, to give eval() access to nonlocals passed by name.
bold : bool, default=True
Option to enable/disable string styling.
Warnings
--------
This function uses `eval()` to render expressions it receives
as strings. Access to variables in the global namespace is controlled
by `globs`. Take care to only pass trusted expressions to the function.
Examples
--------
>>> import numpy as np
>>> import pandas as pd
>>> from datopy.inspection import display, make_df
Data frame example
>>> df1 = make_df('AB', [1, 2]); df2 = make_df('AB', [3, 4])
>>> display('df1', 'df2', 'pd.concat([df1, df2])', globs=globals(), bold=False)
<BLANKLINE>
df1
--- (2, 2) ---
A B
1 A1 B1
2 A2 B2
<BLANKLINE>
<BLANKLINE>
df2
--- (2, 2) ---
A B
3 A3 B3
4 A4 B4
<BLANKLINE>
<BLANKLINE>
pd.concat([df1, df2])
--- (4, 2) ---
A B
1 A1 B1
2 A2 B2
3 A3 B3
4 A4 B4
<BLANKLINE>
<BLANKLINE>
Matrix example
>>> A = np.array([[1, 3], [2, 4]]); x = np.array([[0, 1]])
>>> display("A", "x.T", "np.dot(A, x.T)", globs=globals(), bold=False)
<BLANKLINE>
A
--- (2, 2) ---
array([[1, 3],
[2, 4]])
<BLANKLINE>
<BLANKLINE>
x.T
--- (2, 1) ---
array([[0],
[1]])
<BLANKLINE>
<BLANKLINE>
np.dot(A, x.T)
--- (2, 1) ---
array([[3],
[4]])
<BLANKLINE>
<BLANKLINE>
"""
if globs is None:
globs = {}
output = ""
for arg in args:
name = '\033[1m' + arg + '\033[0m' if bold else arg
value = np.round(eval(arg, globs), 2)
shape = np.shape(value)
output += f"\n{name}\n--- {repr(shape)} ---\n{repr(value)}\n\n"
print(output)
return None
def main():
# Comment out (2) to run all tests in script; (1) to run specific tests
doctest.testmod(verbose=True)
# doctest_function(display, globs=globals())
numpydoc_validate_module(sys.modules['__main__'])
return None
if __name__ == "__main__":
main()