Source code for anim.data

from dataclasses import dataclass, field

import numpy as np
import pandas
import xarray as xr
import zarr

from anim.tools import Timing


[docs] @dataclass class Stats: img_name: str = None # filled in `process` img_building: float = np.nan # filled in `process` img_saving: float = np.nan # filled in `process` time_data_compress: float = np.nan # filled in `dump_data` time_data_uncompress: float = np.nan # filled in `load_data` time_data_computation: float = np.nan # filled in `animate` size_data_uncompressed: float = np.nan # filled in `dump_data` size_data_compressed: float = np.nan # filled in `dump_data` def __or__(self, other): stat = Stats() stat.img_name = other.img_name if other.img_name is not None else self.img_name for k, v in other.__dict__.items(): if k == "img_name": continue if not np.isnan(v): setattr(stat, k, v) else: setattr(stat, k, getattr(self, k)) return stat def to_dict(self): return self.__dict__ def __str__(self): msg = [] data = [] # data=(compute=2500ms, size=3Mo->2Mo(250.02ms)|image=(build=250ms,save=350ms) # normally, this one is always setup if not np.isnan(self.time_data_computation): # is not None: # msg = msg + f", compute={self.time_data_computation*1e3:.2f}ms" data.append(f"compute={self.time_data_computation*1e3:6.2f}ms") # normally, this one is always setup if not np.isnan(self.size_data_uncompressed): # is not None: # msg = msg + f"size {self.size_data_uncompressed/1e6:.2f}Mo" data.append(f"size={self.size_data_uncompressed/1e3:6.2f}Ko") if not np.isnan(self.size_data_compressed): # is not None: # msg = msg + f"->{self.size_data_compressed/1e6:.2f}Mo" data[-1] += f"->{self.size_data_compressed/1e3:6.2f}Ko" if not np.isnan(self.time_data_compress): # is not None: # msg = msg + f", compress={self.time_data_compress*1e3:.2f}ms" data[-1] += f"[{self.time_data_compress*1e3:6.2f}ms]" msg_data = ",".join(data) msg_data = f"data=({msg_data})" msg.append(msg_data) img = [] if not np.isnan(self.img_building): # is not None: img.append(f"build={self.img_building*1e3:6.2f}ms") if not np.isnan(self.img_saving): # is not None: img.append(f"save={self.img_saving*1e3:6.2f}ms") if len(img) > 0: msg_img = ",".join(img) msg_img = f"image=({msg_img})" msg.append(msg_img) msg = "|".join(msg) return msg
[docs] class StatStorage:
[docs] def __init__(self): self.data = dict()
def __call__(self, stat: Stats): img_name = stat.img_name if img_name not in self.data: self.data[img_name] = stat else: self.data[img_name] |= stat def __getitem__(self, stat): return self.data[stat.img_name] def build_dataframe(self): # describe(self): df = pandas.DataFrame(list(x.to_dict() for x in self.data.values())) del df["img_name"] units = {} # "img_name": "img_name"} for k in "size_data_uncompressed", "size_data_compressed": df[k] = df[k] / 1e6 units[k] = f"{k[5:]} (Mo)" for k in ("time_data_compress", "time_data_uncompress", "time_data_computation"): df[k] = df[k] * 1e3 units[k] = f"{k[5:]} (ms)" for k in ("img_building", "img_saving"): units[k] = f"{k[4:]} (s)" df.columns = [f"{units[k]}" for k in df.columns] return df @property def size(self): return len(self.data)
[docs] @dataclass class AnimationInfo: imagePatern: str checkIfImageExist: bool = False onlyCompute: bool = False savefig_kwargs: dict = field(default_factory=dict)
[docs] def zarr_weight(group): "compute size of all variable contained in the group" return sum(group[var].nbytes_stored for var in group.array_keys())
[docs] def load_data(raw: xr.Dataset | zarr.hierarchy.Group): if isinstance(raw, zarr.hierarchy.Group): with Timing() as timing: ds = xr.open_zarr(raw.store, chunks=None).load() ds.attrs.update(raw.attrs) return ds, Stats(time_data_uncompress=timing.dt) return raw, Stats()
[docs] def dump_data(ds: xr.Dataset or zarr.hierarchy.Group, max_size=1e6, encoding: dict() or None = None): # if data is already compressed stats = Stats(size_data_uncompressed=ds.nbytes) if isinstance(ds, zarr.hierarchy.Group): stats.size_data_compressed = zarr_weight(ds) return ds, stats elif ds.nbytes > max_size: zg = zarr.group() with Timing() as timing: ds.to_zarr(zg._store, mode="w", encoding=encoding) stats.size_data_compressed = zarr_weight(zg) stats.time_data_compress = timing.dt return zg, stats else: return ds, stats