"""Summary table dataframe helpers.
You can annotate the format of different values.
"""
import datetime
import enum
from dataclasses import dataclass
import pandas as pd
FORMATTERS = {
Format.integer: "{v:.0f}",
Format.percent: "{v:.2%}",
Format.dollar: "${v:,.2f}",
Format.duration_days_hours: "{days} days {hours} hours",
Format.duration_hours_minutes: "{hours} hours {minutes} minutes",
Format.num_bars: "{v:.0f} bars",
Format.missing: "-",
Format.decimal: "{v:.2f}"
}
[docs]@dataclass(slots=True)
class Value:
v: object
format: Format
def __str__(self):
return format_value(self)
[docs]def as_dollar(v) -> Value:
"""Format value as US dollars"""
return Value(v, Format.dollar)
[docs]def as_integer(v)-> Value:
"""Format value as an integer"""
return Value(v, Format.integer)
[docs]def as_percent(v) -> Value:
"""Format value as a percent"""
return Value(v, Format.percent)
[docs]def as_duration(v: datetime.timedelta) -> Value:
"""Format value as a duration"""
if v.days > 0:
return Value(v, Format.duration_days_hours)
else:
return Value(v, Format.duration_hours_minutes)
[docs]def as_bars(v: float) -> Value:
"""Format value as number of bars. Rounds down so we only use the number of
fully completed bars."""
v = int(v)
return Value(v, Format.num_bars)
[docs]def as_missing() -> Value:
"""Format a missing value e.g. because of division by zero"""
return Value(None, Format.missing)
[docs]def as_decimal(v: float) -> Value:
"""Format a decimal value"""
return Value(v, Format.decimal)
[docs]def create_summary_table(data: dict, column_names: list[str] | str | None = None, index_name: str | None = None) -> pd.DataFrame:
"""Create a summary table from a human readable data.
* Keys are human readable labels
* Values are instances of :py:class:`Value`
TODO: If column_names is not provided, we get column header "zero" that needs to be hidden.
:param data: Human readable data in the form of a dict
:param column_names: Column names for the dataframe. If None, no column names are used.
:param index_name: Name of the index column. If None, no index name is used.
:return: A styled pandas dataframe
"""
formatted_data = {}
counter = 0
list_length = 0
for k, v in data.items():
if isinstance(v, Value):
formatted_data[k] = format_value(v)
elif isinstance(v, list):
if counter == 0:
list_length = len(v)
else:
assert len(v) == list_length, f"If one value in the dict is a list, all values must be lists of the same length. Expected list of length {list_length}, got {v}"
formatted_data[k] = format_values(v)
counter += 1
df = pd.DataFrame.from_dict(formatted_data, orient="index")
if column_names is not None:
if isinstance(column_names, str):
column_names = [column_names]
df.columns = column_names
if index_name is not None:
df.index.name = index_name
# https://pandas.pydata.org/docs/dev/reference/api/pandas.io.formats.style.Styler.hide.html
df.style.hide(axis="index", names=True)
df.style.hide(axis="columns", names=False)
# df.style.hide_columns()
df.style.set_table_styles([
{'selector': 'thead', 'props': [('display', 'none')]}
])
return df