Source code for ribs.archives._archive_base

"""Provides ArchiveBase."""

from __future__ import annotations

from abc import ABC
from collections.abc import Collection, Iterator
from typing import Literal, overload

import numpy as np
from numpy.typing import ArrayLike

from ribs.archives._archive_data_frame import ArchiveDataFrame
from ribs.archives._archive_stats import ArchiveStats
from ribs.typing import BatchData, Int, SingleData


[docs] class ArchiveBase(ABC): """Base class for archives. An archive stores *elites*. Each elite consists of several data *fields*: at a minimum, the elite has a *solution* and the evaluated *objective* and *measures* of the solution. The elite may also include additional data fields. Besides elites, archives can store components like k-D trees and density estimators. The primary method of an archive is to write new solutions to it with :meth:`add`. There are also methods to read from the archive, such as :meth:`retrieve` and :meth:`data`. These methods typically operate over batches of inputs (e.g., adding multiple solutions at once with :meth:`add`), but methods such as :meth:`add_single` and :meth:`retrieve_single` support single inputs. Due to the flexibility of workflows available in pyribs, it is possible to design archives that require only a small subset of the methods in this base class. As such, none of the methods listed here are required to be implemented in child classes, although by default they will raise :class:`NotImplementedError` when called. Args: solution_dim: Dimensionality of the solution space. Scalar or multi-dimensional solution shapes are allowed by passing an empty tuple or tuple of integers, respectively. objective_dim: Dimensionality of the objective space. For single-objective optimization problems where the objective is a scalar, this argument should be an empty tuple ``()``. In multi-objective optimization problems, this argument should be an integer indicating the number of objectives. measure_dim: Dimensionality of the measure space. """ def __init__( self, *, solution_dim: Int | tuple[Int, ...], objective_dim: tuple[()] | Int, measure_dim: Int, ) -> None: self._solution_dim = solution_dim self._objective_dim = objective_dim self._measure_dim = measure_dim ## Properties of the archive ## @property def solution_dim(self) -> Int | tuple[Int, ...]: """Dimensionality of the solution space.""" return self._solution_dim @property def objective_dim(self) -> tuple[()] | Int: """Dimensionality of the objective space. The empty tuple ``()`` indicates a scalar objective. """ return self._objective_dim @property def measure_dim(self) -> Int: """Dimensionality of the measure space.""" return self._measure_dim @property def field_list(self) -> list[str]: """List of data fields in the archive.""" raise NotImplementedError( "`field_list` has not been implemented in this archive" ) @property def dtypes(self) -> dict[str, np.dtype]: """Mapping from field name to dtype for all fields in the archive.""" raise NotImplementedError("`dtypes` has not been implemented in this archive") @property def stats(self) -> ArchiveStats: """Statistics about the archive. See :class:`~ribs.archives.ArchiveStats` for more info. """ raise NotImplementedError("`stats` has not been implemented in this archive") @property def empty(self) -> bool: """Whether the archive is empty.""" raise NotImplementedError("`empty` has not been implemented in this archive") ## dunder methods ##
[docs] def __len__(self) -> int: """Number of elites in the archive.""" raise NotImplementedError("`__len__` has not been implemented in this archive")
[docs] def __iter__(self) -> Iterator[SingleData]: """Creates an iterator over the elites in the archive. Example: :: for elite in archive: elite["solution"] elite["objective"] elite["measures"] ... """ raise NotImplementedError("`__iter__` has not been implemented in this archive")
## Methods for writing to the archive ##
[docs] def add( self, solution: ArrayLike, objective: ArrayLike, measures: ArrayLike, **fields: ArrayLike, ) -> BatchData: """Inserts a batch of solutions and their data into the archive. The indices of all arguments should "correspond" to each other, i.e., ``solution[i]``, ``objective[i]``, and ``measures[i]`` should be the solution parameters, objective, and measures for solution ``i``. For API consistency, all child classes should take in ``solution``, ``objective``, and ``measures``. There may be cases where one of these parameters is not necessary, e.g., ``objective`` is not required in diversity optimization settings. In such cases, it should be possible to pass in ``None`` as the argument. Args: solution: (batch_size, :attr:`solution_dim`) array of solution parameters. objective: (batch_size, :attr:`objective_dim`) array with objective function evaluations of the solutions. measures: (batch_size, :attr:`measure_dim`) array with measure space coordinates of all the solutions. fields: Additional data for each solution. Each argument should be an array with ``batch_size`` as the first dimension. Returns: Dict describing the result of the add operation. Each entry should be an array that provides the information for each solution, e.g., one entry might be a "status" array of shape ``(batch_size,)`` that provides the status of each solution. The exact keys and values are determined by child classes. """ raise NotImplementedError("`add` has not been implemented in this archive")
[docs] def add_single( self, solution: ArrayLike, objective: ArrayLike, measures: ArrayLike, **fields: ArrayLike, ) -> SingleData: """Inserts a single solution and its data into the archive. Args: solution: Parameters of the solution. objective: Objective function evaluation of the solution. measures: Coordinates in measure space of the solution. fields: Additional data for the solution. Returns: Information describing the result of the add operation. As in :meth:`add`, the content of this dict is determined by child classes. """ raise NotImplementedError( "`add_single` has not been implemented in this archive" )
[docs] def clear(self) -> None: """Resets the archive, e.g., by removing all elites in it. After calling this method, the archive should be :attr:`empty`. """ raise NotImplementedError("`clear` has not been implemented in this archive")
## Methods for reading from the archive ##
[docs] def retrieve(self, measures: ArrayLike) -> tuple[np.ndarray, BatchData]: """Queries the archive for elites with the given batch of measures. This method operates in batch. It takes in a batch of measures and outputs the batched data for the elites:: occupied, elites = archive.retrieve(...) occupied # Shape: (batch_size,) elites["solution"] # Shape: (batch_size, solution_dim) elites["objective"] # Shape: (batch_size, objective_dim) elites["measures"] # Shape: (batch_size, measure_dim) ... ``occupied`` indicates whether an elite was found for each measure, i.e., whether the archive was *occupied* at each queried measure. If ``occupied[i]`` is True, then ``elites["solution"][i]``, ``elites["objective"][i]``, ``elites["measures"][i]``, and other fields will contain the data of the elite for the input ``measures[i]``. If ``occupied[i]`` is False, then those fields will instead have arbitrary values, e.g., ``elites["solution"][i]`` may be set to all NaN. Args: measures: (batch_size, :attr:`measure_dim`) array of measure space points at which to retrieve solutions. Returns: 2-element tuple of (boolean ``occupied`` array, dict of elite data). See above for description. Raises: ValueError: ``measures`` is not of shape (batch_size, :attr:`measure_dim`). ValueError: ``measures`` has non-finite values (inf or NaN). """ raise NotImplementedError("`retrieve` has not been implemented in this archive")
[docs] def retrieve_single(self, measures: ArrayLike) -> tuple[bool, SingleData]: """Queries the archive for an elite with the given measures. While :meth:`retrieve` takes in a *batch* of measures, this method takes in the measures for only *one* solution and returns a single bool and a dict with single entries:: occupied, elite = archive.retrieve_single(...) occupied # Bool elite["solution"] # Shape: (solution_dim,) elite["objective"] # Shape: (objective_dim,) elite["measures"] # Shape: (measure_dim,) ... Args: measures: (:attr:`measure_dim`,) array of measures. Returns: 2-element tuple of (boolean, dict of data for one elite) Raises: ValueError: ``measures`` is not of shape (:attr:`measure_dim`,). ValueError: ``measures`` has non-finite values (inf or NaN). """ raise NotImplementedError( "`retrieve_single` has not been implemented in this archive" )
@overload def data( self, fields: str, return_type: Literal["dict", "tuple", "pandas"] = "dict", ) -> np.ndarray: ... @overload def data( self, fields: None | Collection[str] = None, return_type: Literal["dict"] = "dict", ) -> BatchData: ... @overload def data( self, fields: None | Collection[str] = None, return_type: Literal["tuple"] = "tuple", ) -> tuple[np.ndarray]: ... @overload def data( self, fields: None | Collection[str] = None, return_type: Literal["pandas"] = "pandas", ) -> ArchiveDataFrame: ...
[docs] def data( self, fields: None | Collection[str] | str = None, return_type: Literal["dict", "tuple", "pandas"] = "dict", ) -> np.ndarray | BatchData | tuple[np.ndarray] | ArchiveDataFrame: """Returns data of the elites in the archive. Args: fields: List of fields to include, such as ``"solution"``, ``"objective"``, ``"measures"``, and other fields in the archive. This can also be a single str indicating a field name. return_type: Data to return; see below. Ignored if ``fields`` is a str. Returns: The data for all elites in the archive. All data returned by this method will be a copy, i.e., the data will not update as the archive changes. If ``fields`` was a single str, the returned data will just be an array holding data for the given field, such as:: measures = archive.data("measures") Otherwise, the returned data can take the following forms, depending on the ``return_type`` argument: - ``return_type="dict"``: Dict mapping from the field name to the field data at the given indices. An example is:: { "solution": [[1.0, 1.0, ...], ...], "objective": [1.5, ...], "measures": [[1.0, 2.0], ...], ... } The keys in this dict can be modified with the ``fields`` arg; duplicate fields will be ignored since the dict stores unique keys. - ``return_type="tuple"``: Tuple of arrays matching the field order in ``fields``. For instance, if ``fields`` is ``["objective", "measures"]``, this method would return a tuple of ``(objective_arr, measures_arr)`` that could be unpacked as:: objective, measures = archive.data(["objective", "measures"], return_type="tuple") Unlike with the ``dict`` return type, duplicate fields will show up as duplicate entries in the tuple, e.g., ``fields=["objective", "objective"]`` will result in two objective arrays being returned. When ``fields=None`` (the default case), the fields in the tuple will be ordered according to the :attr:`field_list`. - ``return_type="pandas"``: An :class:`~ribs.archives.ArchiveDataFrame` with the following columns: - For fields that are scalars, a single column with the field name. For example, ``objective`` would have a single column called ``objective``. - For fields that are 1D arrays, multiple columns with the name suffixed by its index. To illustrate, for a ``measures`` field of length 10, the dataframe would contain 10 columns with names ``measures_0``, ``measures_1``, ..., ``measures_9``. **The output format for fields with >1D data is currently not defined.** In short, the dataframe might look like this by default: +------------+------+-----------+------------+------+ | solution_0 | ... | objective | measures_0 | ... | +============+======+===========+============+======+ | | ... | | | ... | +------------+------+-----------+------------+------+ Like the other return types, the columns returned can be adjusted with the ``fields`` parameter. Raises: ValueError: Invalid field name provided. ValueError: Invalid return_type provided. ValueError: Passed ``return_type="pandas"`` when one of the fields has >1D data. """ raise NotImplementedError("`data` has not been implemented in this archive")
[docs] def sample_elites(self, n: Int, replace: bool = True) -> BatchData: """Randomly samples elites from the archive. Currently, this sampling is done uniformly at random, either with or without replacement. Additional sampling methods may be supported in the future. Example: :: elites = archive.sample_elites(16) elites["solution"] # Shape: (16, solution_dim) elites["objective"] elites["measures"] ... Args: n: Number of elites to sample. replace: Whether to replace the elites when sampling. If True, the elites will be replaced and thus will be sampled independently. Returns: A batch of elites randomly selected from the archive. Raises: IndexError: The archive is empty. ValueError: ``n`` was greater than the number of elites in the archive when ``replace=False``. """ raise NotImplementedError( "`sample_elites` has not been implemented in this archive" )