Source code for ribs.archives._archive_data_frame

"""Provides ArchiveDataFrame."""
import re

import pandas as pd

# Developer Notes:
# - The documentation for this class is hacked -- to add new methods, manually
#   modify the template in docs/_templates/autosummary/class.rst
# - See here for info on extending DataFrame:
#   https://pandas.pydata.org/pandas-docs/stable/development/extending.html


[docs]class ArchiveDataFrame(pd.DataFrame): """A modified :class:`~pandas.DataFrame` for archive data. As this class inherits from :class:`~pandas.DataFrame`, it has the same methods, attributes, and arguments (even though the arguments shown here are ``*args`` and ``**kwargs``). However, this class adds methods that make it convenient to work with elites. This documentation only lists these additional methods and attributes. Example: This object is created by :meth:`~ArchiveBase.data` (i.e. users typically do not create it on their own):: df = archive.data(..., return_type="pandas") To iterate through every elite as a dict, use:: for elite in df.iterelites(): elite["solution"] # Shape: (solution_dim,) elite["objective"] ... Arrays corresponding to individual fields can be accessed with :meth:`get_field`. For instance, the following is an array where entry ``i`` contains the measures of the ``i``'th elite in the DataFrame:: df.get_field("measures") .. warning:: Calling :meth:`get_field` always creates a copy, so the following will copy the measures 3 times:: df.get_field("measures")[0] df.get_field("measures").mean() df.get_field("measures").median() **Thus, if you need to use the method several times, we recommend storing it first, like so**:: measures = df.get_field("measures") measures[0] measures.mean() measures.median() .. note:: After saving an ArchiveDataFrame to a CSV, loading it with :func:`pandas.read_csv` will load a :class:`~pandas.DataFrame`. To load a CSV as an ArchiveDataFrame, pass the ``DataFrame`` from ``read_csv`` to ArchiveDataFrame:: df = ArchiveDataFrame(pd.read_csv("file.csv")) .. note:: Results of :meth:`get_field` "align" with each other -- e.g. ``get_field("measures")[i]`` corresponds to ``get_field("index")[i]``, ``get_field("objective")[i]``, and ``get_field("solution")[i]``. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @property def _constructor(self): return ArchiveDataFrame
[docs] def iterelites(self): """Iterator that outputs every elite in the ArchiveDataFrame as a dict. """ # Identify fields in the data frame. There are some edge cases here, # such as if someone purposely names their field with an underscore and # a number at the end like "foobar_0", but it covers most cases. fields = {} for col in self: split = col.split("_") if len(split) == 1: # Single column. fields[col] = None elif split[-1].isdigit(): # If the last item in the split is numerical, this should match # vector fields like "measures_0". # Exclude last val and underscore - note negative sign. field_name = col[:-(len(split[-1]) + 1)] fields[field_name] = None else: fields[col] = None # Retrieve field data. for name in fields: fields[name] = self.get_field(name) n_elites = len(self) return map( lambda i: { name: arr[i] for name, arr in fields.items() }, range(n_elites), )
[docs] def get_field(self, field): """Array holding the data for the given field. None if there is no data for the field. """ # Note: The column names cannot be pre-computed because the DataFrame # columns might change in-place, e.g., when a column is deleted. if field in self: # Scalar field -- e.g., "objective" return self[field].to_numpy(copy=True) else: # Vector field -- e.g., field="measures" and we want columns like # "measures_0" and "measures_1" field_re = f"{field}_\\d+" cols = [c for c in self if re.fullmatch(field_re, c)] return self[cols].to_numpy(copy=True) if cols else None