Source code for rascal.models.krr

"""Kernel ridge regression: Model class and utilities

Public classes:
    Kernel  Kernel Ridge Regression model (sparse GPR only).

Public functions:
    compute_KNM             Compute GAP kernel of a set of structures
    train_gap_model         Train a GAP model given a kernel matrix and sparse points
"""
from ..utils import BaseIO
from ..lib import compute_sparse_kernel_gradients, compute_sparse_kernel_neg_stress

import numpy as np
import ase


[docs]class KRR(BaseIO):
    """Kernel Ridge Regression model. Only supports sparse GPR
    training for the moment.

    Parameters
    ----------
    weights : np.array
        weights of the model

    kernel : Kernel
        kernel class used to train the model

    X_train : SparsePoints
        reference samples used for the training

    self_contributions : dictionary
        map atomic number to the property baseline, e.g. isolated atoms
        energies when the model has been trained on total energies.

    description : string
        User-defined string used to describe the model for future reference

    units : dict
        Energy and length units used by the model (default: eV and Å (aka AA),
        same as used in ASE)
    """

    def __init__(
        self,
        weights,
        kernel,
        X_train,
        self_contributions,
        description="KRR potential model",
        units=None,
    ):
        # Weights of the krr model
        self.weights = weights
        self.kernel = kernel
        self.X_train = X_train
        self.self_contributions = self_contributions
        self.target_type = kernel.target_type
        self.description = description
        if units is None:
            units = dict()
        if "energy" not in units:
            units["energy"] = "eV"
        if "length" not in units:
            units["length"] = "AA"
        self.units = units

    def _get_property_baseline(self, managers):
        """build total baseline contribution for each prediction"""
        if self.target_type == "Structure":
            Y0 = np.zeros(len(managers))
            for i_manager, manager in enumerate(managers):
                if isinstance(manager, ase.Atoms):
                    numbers = manager.get_atomic_numbers()
                    for sp in numbers:
                        Y0[i_manager] += self.self_contributions[sp]
                else:
                    for at in manager:
                        Y0[i_manager] += self.self_contributions[at.atom_type]
        elif self.target_type == "Atom":
            n_centers = 0
            for manager in managers:
                n_centers += len(manager)
            Y0 = np.zeros(n_centers)
            i_center = 0
            for manager in managers:
                for center in manager:
                    Y0[i_center] = self.self_contributions[center.atom_type]
                    i_center += 1
        return Y0

[docs]    def predict(self, managers, KNM=None):
        """Predict properties associated with the atomic structures in managers.

        Parameters
        ----------
        managers : AtomsList
            list of atomic structures with already computed features compatible
            with representation in kernel
        KNM : np.array, optional
            precomputed sparse kernel matrix

        Returns
        -------
        np.array
            predictions
        """
        if KNM is None:
            kernel = self.kernel(managers, self.X_train, (False, False))
        else:
            if len(managers) != KNM.shape[0]:
                raise ValueError(
                    "KNM size mismatch {}!={}".format(len(managers), KNM.shape[0])
                )
            elif self.X_train.size() != KNM.shape[1]:
                raise ValueError(
                    "KNM size mismatch {}!={}".format(self.X_train.size(), KNM.shape[1])
                )
            kernel = KNM
        Y0 = self._get_property_baseline(managers)
        return Y0 + np.dot(kernel, self.weights).reshape((-1))

[docs]    def predict_forces(self, managers, KNM=None):
        """Predict negative gradients w.r.t atomic positions, e.g. forces, associated with the atomic structures in managers.

        Parameters
        ----------
        managers : AtomsList
            list of atomic structures with already computed features compatible
            with representation in kernel
        KNM : np.array, optional
            precomputed sparse kernel matrix

        Returns
        -------
        np.array
            predictions
        """
        if self.kernel.kernel_type != "Sparse":
            raise NotImplementedError(
                "force prediction only implemented for kernels with kernel_type=='Sparse'"
            )
        if KNM is None:
            rep = self.kernel._representation
            gradients = compute_sparse_kernel_gradients(
                rep,
                self.kernel._kernel,
                managers.managers,
                self.X_train._sparse_points,
                self.weights.reshape((1, -1)),
            )
        else:
            n_atoms = 0
            for manager in managers:
                n_atoms += len(manager)
            if 3 * n_atoms != KNM.shape[0]:
                raise ValueError(
                    "KNM size mismatch {}!={}".format(3 * n_atoms, KNM.shape[0])
                )
            elif self.X_train.size() != KNM.shape[1]:
                raise ValueError(
                    "KNM size mismatch {}!={}".format(self.X_train.size(), KNM.shape[1])
                )
            gradients = np.dot(KNM, self.weights).reshape((-1, 3))

        return -gradients

[docs]    def predict_stress(self, managers, KNM=None):
        """Predict gradients w.r.t cell parameters, e.g. stress, associated with the atomic structures in managers.
        The stress is returned using the Voigt order: xx, yy, zz, yz, xz, xy.

        Parameters
        ----------
        managers : AtomsList
            list of atomic structures with already computed features compatible
            with representation in kernel
        KNM : np.array, optional
            precomputed sparse kernel matrix

        Returns
        -------
        np.array
            predictions
        """
        if self.kernel.kernel_type != "Sparse":
            raise NotImplementedError(
                "stress prediction only implemented for kernels with kernel_type=='Sparse'"
            )

        if KNM is None:
            rep = self.kernel._representation
            neg_stress = compute_sparse_kernel_neg_stress(
                rep,
                self.kernel._kernel,
                managers.managers,
                self.X_train._sparse_points,
                self.weights.reshape((1, -1)),
            )
        else:
            if 6 * len(managers) != KNM.shape[0]:
                raise ValueError(
                    "KNM size mismatch {}!={}".format(6 * len(managers), KNM.shape[0])
                )
            elif self.X_train.size() != KNM.shape[1]:
                raise ValueError(
                    "KNM size mismatch {}!={}".format(self.X_train.size(), KNM.shape[1])
                )
            neg_stress = np.dot(KNM, self.weights).reshape((len(managers), 6))

        return -neg_stress

    def get_weights(self):
        return self.weights

    def _get_init_params(self):
        init_params = dict(
            weights=self.weights,
            kernel=self.kernel,
            X_train=self.X_train,
            self_contributions=self.self_contributions,
            description=self.description,
            units=self.units.copy(),
        )
        return init_params

    def _set_data(self, data):
        super()._set_data(data)

    def _get_data(self):
        return super()._get_data()

    def get_representation_calculator(self):
        return self.kernel._rep


def _get_kernel_strides(frames):
    """Get strides for total-energy/gradient kernels of the given structures

    Parameters
    ----------
    frames
        List of structures each indicating the number of atoms

    This assumes the final kernel will be stored in GAP energy-force
    format, i.e.  Nstructures rows for total-energy (summed) kernels and
    3*Natoms_total rows for gradients w.r.t. atomic positions.

    Returns
    -------
    int
        the number of structures
    int
        the number of gradient entries (== 3 * the total number of atoms)
    np.array(int)
        strides for assigning the gradient entries for each structure
    """
    Nstructures = len(frames)
    Ngrad_stride = [0]
    Ngrads = 0
    for frame in frames:
        n_at = len(frame)
        Ngrad_stride.append(n_at * 3)
        Ngrads += n_at * 3
    Ngrad_stride = np.cumsum(Ngrad_stride) + Nstructures
    return Nstructures, Ngrads, Ngrad_stride


def _compute_kernel_single(i_frame, frame, representation, X_sparse, kernel):
    """Compute GAP kernel of the (new) structure against the sparse points

    Parameters
    ----------
    i_frame
        frame index (ignored???)
    frame
        New structure to compute kernel for
    representation
        RepresentationCalculator to use for the structures
    X_sparse
        Sparse points to compute kernels against
    kernel
        Kernel object to use

    Returns
    -------
    en_row
        Energy kernel row
    grad_rows
        Gradient of the kernel
    """
    feat = representation.transform([frame])
    en_row = kernel(feat, X_sparse)
    grad_rows = kernel(feat, X_sparse, grad=(True, False))
    return en_row, grad_rows


[docs]def compute_KNM(frames, X_sparse, kernel, soap):
    """Compute GAP kernel of the (new) structures against the sparse points

    Parameters
    ----------
    frames
        New structures to compute kernel for
    representation
        RepresentationCalculator to use for the structures
    X_sparse
        Sparse points to compute kernels against
    kernel
        Kernel object to use

    Returns
    -------
    K_NM: np.array
        Summed total-energy kernel stacked with the atom-position gradient of the kernel

    Notes
    -----
    This function can take quite a long time to run.  To get a progress bar,
    you can wrap the `frames` parameter in a [tqdm]_ object like this:

    .. code-block:: python

        from tqdm.notebook import tqdm # for Jupyter
        #from tqdm import tqdm # on the command line
        K_NM = compute_KNM(
            tqdm(frames, desc="compute KNM", leave=False),
            X_sparse,
            kernel,
            soap
        )

    .. [tqdm] https://github.com/tqdm/tqdm
    """
    # If frames has been wrapped in a tqdm, use the underlying iterable
    # so as not to "use up" the progress bar prematurely
    if hasattr(frames, "iterable"):
        Nstructures, Ngrads, Ngrad_stride = _get_kernel_strides(frames.iterable)
    else:
        Nstructures, Ngrads, Ngrad_stride = _get_kernel_strides(frames)
    KNM = np.zeros((Nstructures + Ngrads, X_sparse.size()))
    for i_frame, frame in enumerate(frames):
        en_row, grad_rows = _compute_kernel_single(
            i_frame, frame, soap, X_sparse, kernel
        )
        KNM[Ngrad_stride[i_frame] : Ngrad_stride[i_frame + 1]] = grad_rows
        KNM[i_frame] = en_row
    return KNM


def train_gap_model(
    kernel,
    frames,
    KNM_,
    X_sparse,
    y_train,
    self_contributions,
    grad_train=None,
    lambdas=None,
    jitter=1e-8,
):
    """
    Defines the procedure to train a SOAP-GAP model [1]:
    .. math::
        Y(A) = \sum_{i \in A} y_{a_i}(X_i),
    where :math:`Y(A)` is the predicted property function associated with the
    atomic structure :math:`A$, :math:`i` and :math:`a_i` are the index and
    species of the atoms in structure :math:`X` and :math:`y_a(A_i)` is the
    atom centered model that depends on the central atomic species.
    The individual predictions are given by:
    .. math::
        y_{a_i(A_i) = \sum_m^{M} \alpha_m \delta_{b_m a_i} k(A_i,T_m),
    where :math:`k(\cdot,\cdot)` is a kernel function, :math:`\alpha_m` are the
    weights of the model and :math:`b_m is the atom type associated with the
    sparse point :math:`T_m`.
    Hence a kernel element for the target property :math:`Y(A)` is given by:
    .. math::
        KNM_{Am} = \sum_{i \in A} \delta_{b_m a_i} k(A_i,T_m)
    and for :math:`\vec{\nabla}_iY(A)`:
    .. math::
       KNM_{A_{i}m} = \delta_{b_m a_i} \sum_{j \in A_i} \vec{\nabla}_i k(A_j,T_m)
    The training is given by:
    .. math::
        \bm{\alpha} =  K^{-1} \bm{Y},
    where :math:`K` is given by:
    .. math::
        K = K_{MM} + K_{MN} \Lambda^{-2} K_{NM},
    :math:`\bm{Y}=K_{MN} \Lambda^{-2} \bm{y}$, :math:`\bm{y}` the training
    targets and :math:`\Lambda` the regularization matrix.
    The regularization matrix is chosen to be diagonal:
    .. math::
        \Lambda^{-1}_{nn} = \delta_{nn} * lambdas[0] / \sigma_{\bm{y}} * np.sqrt(Natoms)
    for the targets and
    .. math::
        \Lambda^{-1}_{nn} = \delta_{nn} * lambdas[1] / \sigma_{\bm{y}},
    for the derivatives of the targets w.r.t. the atomic positions and
    :math:`\sigma_{\bm{y}}` is the standard deviation of the target property
    (not derivatives).

    Parameters
    ----------
    kernel : Kernel
        SparseKernel to compute KMM and initialize the model. It was used to
        build KNM_.
    frames : list(ase.Atoms)
        Training structures
    KNM_ : np.array
        kernel matrix to use in the training, typically computed with:
        KNM = kernel(managers, X_sparse)
        KNM_down = kernel(managers, X_sparse, grad=(True, False))
        KNM = np.vstack([KNM, KNM_down])
        when training with derivatives.
    X_sparse : SparsePoints
        basis samples to use in the model's interpolation
    y_train : np.array
        reference property
    self_contributions : dictionary
        map atomic number to the property baseline, e.g. training on
        total energies is not very recommended so one would provide
        the corresponding isolated atom energies so that the model
        can be trained on the corresponding formation energies and
        still predict total energies.
    grad_train : np.array, optional
        derivatives of y_train w.r.t. to the atomic motion, e.g.
        minus interatomic forces, by default None
    lambdas : list/tuple, optional
        regularisation parameter for the training, i.e. lambdas[0] -> property
        and lambdas[1] -> gradients of the property, by default None
    jitter : double, optional
        small jitter for the numerical stability of solving the linear system,
        by default 1e-8

    Returns
    -------
    KRR
        a trained model that can predict the property and its gradients

    .. [1] Ceriotti, M., Willatt, M. J., & Csányi, G. (2018).
        Machine Learning of Atomic-Scale Properties Based on Physical Principles.
        In Handbook of Materials Modeling (pp. 1–27). Springer, Cham.
        https://doi.org/10.1007/978-3-319-42913-7_68-1
    """
    KMM = kernel(X_sparse)
    Y = y_train.reshape((-1, 1)).copy()
    KNM = KNM_.copy()
    n_centers = Y.shape[0]
    Natoms = np.zeros(n_centers)
    Y0 = np.zeros((n_centers, 1))
    for iframe, frame in enumerate(frames):
        Natoms[iframe] = len(frame)
        for sp in frame.get_atomic_numbers():
            Y0[iframe] += self_contributions[sp]
    Y = Y - Y0
    delta = np.std(Y)
    # lambdas[0] is provided per atom hence the '* np.sqrt(Natoms)'
    # the first n_centers rows of KNM are expected to refer to the
    #  property
    KNM[:n_centers] /= lambdas[0] / delta * np.sqrt(Natoms)[:, None]
    Y /= lambdas[0] / delta * np.sqrt(Natoms)[:, None]

    if grad_train is not None:
        KNM[n_centers:] /= lambdas[1] / delta
        F = grad_train.reshape((-1, 1)).copy()
        F /= lambdas[1] / delta
        Y = np.vstack([Y, F])

    KMM[np.diag_indices_from(KMM)] += jitter

    K = KMM + np.dot(KNM.T, KNM)
    Y = np.dot(KNM.T, Y)
    weights = np.linalg.lstsq(K, Y, rcond=None)[0]
    model = KRR(weights, kernel, X_sparse, self_contributions)

    # avoid memory clogging
    del K, KMM
    K, KMM = [], []

    return model