"""Kernel ridge regression: Model class and utilities
Public classes:
Kernel Kernel Ridge Regression model (sparse GPR only).
Public functions:
compute_KNM Compute GAP kernel of a set of structures
train_gap_model Train a GAP model given a kernel matrix and sparse points
"""
from ..utils import BaseIO
from ..lib import compute_sparse_kernel_gradients, compute_sparse_kernel_neg_stress
import numpy as np
import ase
[docs]class KRR(BaseIO):
"""Kernel Ridge Regression model. Only supports sparse GPR
training for the moment.
Parameters
----------
weights : np.array
weights of the model
kernel : Kernel
kernel class used to train the model
X_train : SparsePoints
reference samples used for the training
self_contributions : dictionary
map atomic number to the property baseline, e.g. isolated atoms
energies when the model has been trained on total energies.
description : string
User-defined string used to describe the model for future reference
units : dict
Energy and length units used by the model (default: eV and Å (aka AA),
same as used in ASE)
"""
def __init__(
self,
weights,
kernel,
X_train,
self_contributions,
description="KRR potential model",
units=None,
):
# Weights of the krr model
self.weights = weights
self.kernel = kernel
self.X_train = X_train
self.self_contributions = self_contributions
self.target_type = kernel.target_type
self.description = description
if units is None:
units = dict()
if "energy" not in units:
units["energy"] = "eV"
if "length" not in units:
units["length"] = "AA"
self.units = units
def _get_property_baseline(self, managers):
"""build total baseline contribution for each prediction"""
if self.target_type == "Structure":
Y0 = np.zeros(len(managers))
for i_manager, manager in enumerate(managers):
if isinstance(manager, ase.Atoms):
numbers = manager.get_atomic_numbers()
for sp in numbers:
Y0[i_manager] += self.self_contributions[sp]
else:
for at in manager:
Y0[i_manager] += self.self_contributions[at.atom_type]
elif self.target_type == "Atom":
n_centers = 0
for manager in managers:
n_centers += len(manager)
Y0 = np.zeros(n_centers)
i_center = 0
for manager in managers:
for center in manager:
Y0[i_center] = self.self_contributions[center.atom_type]
i_center += 1
return Y0
[docs] def predict(self, managers, KNM=None):
"""Predict properties associated with the atomic structures in managers.
Parameters
----------
managers : AtomsList
list of atomic structures with already computed features compatible
with representation in kernel
KNM : np.array, optional
precomputed sparse kernel matrix
Returns
-------
np.array
predictions
"""
if KNM is None:
kernel = self.kernel(managers, self.X_train, (False, False))
else:
if len(managers) != KNM.shape[0]:
raise ValueError(
"KNM size mismatch {}!={}".format(len(managers), KNM.shape[0])
)
elif self.X_train.size() != KNM.shape[1]:
raise ValueError(
"KNM size mismatch {}!={}".format(self.X_train.size(), KNM.shape[1])
)
kernel = KNM
Y0 = self._get_property_baseline(managers)
return Y0 + np.dot(kernel, self.weights).reshape((-1))
[docs] def predict_forces(self, managers, KNM=None):
"""Predict negative gradients w.r.t atomic positions, e.g. forces, associated with the atomic structures in managers.
Parameters
----------
managers : AtomsList
list of atomic structures with already computed features compatible
with representation in kernel
KNM : np.array, optional
precomputed sparse kernel matrix
Returns
-------
np.array
predictions
"""
if self.kernel.kernel_type != "Sparse":
raise NotImplementedError(
"force prediction only implemented for kernels with kernel_type=='Sparse'"
)
if KNM is None:
rep = self.kernel._representation
gradients = compute_sparse_kernel_gradients(
rep,
self.kernel._kernel,
managers.managers,
self.X_train._sparse_points,
self.weights.reshape((1, -1)),
)
else:
n_atoms = 0
for manager in managers:
n_atoms += len(manager)
if 3 * n_atoms != KNM.shape[0]:
raise ValueError(
"KNM size mismatch {}!={}".format(3 * n_atoms, KNM.shape[0])
)
elif self.X_train.size() != KNM.shape[1]:
raise ValueError(
"KNM size mismatch {}!={}".format(self.X_train.size(), KNM.shape[1])
)
gradients = np.dot(KNM, self.weights).reshape((-1, 3))
return -gradients
[docs] def predict_stress(self, managers, KNM=None):
"""Predict gradients w.r.t cell parameters, e.g. stress, associated with the atomic structures in managers.
The stress is returned using the Voigt order: xx, yy, zz, yz, xz, xy.
Parameters
----------
managers : AtomsList
list of atomic structures with already computed features compatible
with representation in kernel
KNM : np.array, optional
precomputed sparse kernel matrix
Returns
-------
np.array
predictions
"""
if self.kernel.kernel_type != "Sparse":
raise NotImplementedError(
"stress prediction only implemented for kernels with kernel_type=='Sparse'"
)
if KNM is None:
rep = self.kernel._representation
neg_stress = compute_sparse_kernel_neg_stress(
rep,
self.kernel._kernel,
managers.managers,
self.X_train._sparse_points,
self.weights.reshape((1, -1)),
)
else:
if 6 * len(managers) != KNM.shape[0]:
raise ValueError(
"KNM size mismatch {}!={}".format(6 * len(managers), KNM.shape[0])
)
elif self.X_train.size() != KNM.shape[1]:
raise ValueError(
"KNM size mismatch {}!={}".format(self.X_train.size(), KNM.shape[1])
)
neg_stress = np.dot(KNM, self.weights).reshape((len(managers), 6))
return -neg_stress
def get_weights(self):
return self.weights
def _get_init_params(self):
init_params = dict(
weights=self.weights,
kernel=self.kernel,
X_train=self.X_train,
self_contributions=self.self_contributions,
description=self.description,
units=self.units.copy(),
)
return init_params
def _set_data(self, data):
super()._set_data(data)
def _get_data(self):
return super()._get_data()
def get_representation_calculator(self):
return self.kernel._rep
def _get_kernel_strides(frames):
"""Get strides for total-energy/gradient kernels of the given structures
Parameters
----------
frames
List of structures each indicating the number of atoms
This assumes the final kernel will be stored in GAP energy-force
format, i.e. Nstructures rows for total-energy (summed) kernels and
3*Natoms_total rows for gradients w.r.t. atomic positions.
Returns
-------
int
the number of structures
int
the number of gradient entries (== 3 * the total number of atoms)
np.array(int)
strides for assigning the gradient entries for each structure
"""
Nstructures = len(frames)
Ngrad_stride = [0]
Ngrads = 0
for frame in frames:
n_at = len(frame)
Ngrad_stride.append(n_at * 3)
Ngrads += n_at * 3
Ngrad_stride = np.cumsum(Ngrad_stride) + Nstructures
return Nstructures, Ngrads, Ngrad_stride
def _compute_kernel_single(i_frame, frame, representation, X_sparse, kernel):
"""Compute GAP kernel of the (new) structure against the sparse points
Parameters
----------
i_frame
frame index (ignored???)
frame
New structure to compute kernel for
representation
RepresentationCalculator to use for the structures
X_sparse
Sparse points to compute kernels against
kernel
Kernel object to use
Returns
-------
en_row
Energy kernel row
grad_rows
Gradient of the kernel
"""
feat = representation.transform([frame])
en_row = kernel(feat, X_sparse)
grad_rows = kernel(feat, X_sparse, grad=(True, False))
return en_row, grad_rows
[docs]def compute_KNM(frames, X_sparse, kernel, soap):
"""Compute GAP kernel of the (new) structures against the sparse points
Parameters
----------
frames
New structures to compute kernel for
representation
RepresentationCalculator to use for the structures
X_sparse
Sparse points to compute kernels against
kernel
Kernel object to use
Returns
-------
K_NM: np.array
Summed total-energy kernel stacked with the atom-position gradient of the kernel
Notes
-----
This function can take quite a long time to run. To get a progress bar,
you can wrap the `frames` parameter in a [tqdm]_ object like this:
.. code-block:: python
from tqdm.notebook import tqdm # for Jupyter
#from tqdm import tqdm # on the command line
K_NM = compute_KNM(
tqdm(frames, desc="compute KNM", leave=False),
X_sparse,
kernel,
soap
)
.. [tqdm] https://github.com/tqdm/tqdm
"""
# If frames has been wrapped in a tqdm, use the underlying iterable
# so as not to "use up" the progress bar prematurely
if hasattr(frames, "iterable"):
Nstructures, Ngrads, Ngrad_stride = _get_kernel_strides(frames.iterable)
else:
Nstructures, Ngrads, Ngrad_stride = _get_kernel_strides(frames)
KNM = np.zeros((Nstructures + Ngrads, X_sparse.size()))
for i_frame, frame in enumerate(frames):
en_row, grad_rows = _compute_kernel_single(
i_frame, frame, soap, X_sparse, kernel
)
KNM[Ngrad_stride[i_frame] : Ngrad_stride[i_frame + 1]] = grad_rows
KNM[i_frame] = en_row
return KNM
def train_gap_model(
kernel,
frames,
KNM_,
X_sparse,
y_train,
self_contributions,
grad_train=None,
lambdas=None,
jitter=1e-8,
):
"""
Defines the procedure to train a SOAP-GAP model [1]:
.. math::
Y(A) = \sum_{i \in A} y_{a_i}(X_i),
where :math:`Y(A)` is the predicted property function associated with the
atomic structure :math:`A$, :math:`i` and :math:`a_i` are the index and
species of the atoms in structure :math:`X` and :math:`y_a(A_i)` is the
atom centered model that depends on the central atomic species.
The individual predictions are given by:
.. math::
y_{a_i(A_i) = \sum_m^{M} \alpha_m \delta_{b_m a_i} k(A_i,T_m),
where :math:`k(\cdot,\cdot)` is a kernel function, :math:`\alpha_m` are the
weights of the model and :math:`b_m is the atom type associated with the
sparse point :math:`T_m`.
Hence a kernel element for the target property :math:`Y(A)` is given by:
.. math::
KNM_{Am} = \sum_{i \in A} \delta_{b_m a_i} k(A_i,T_m)
and for :math:`\vec{\nabla}_iY(A)`:
.. math::
KNM_{A_{i}m} = \delta_{b_m a_i} \sum_{j \in A_i} \vec{\nabla}_i k(A_j,T_m)
The training is given by:
.. math::
\bm{\alpha} = K^{-1} \bm{Y},
where :math:`K` is given by:
.. math::
K = K_{MM} + K_{MN} \Lambda^{-2} K_{NM},
:math:`\bm{Y}=K_{MN} \Lambda^{-2} \bm{y}$, :math:`\bm{y}` the training
targets and :math:`\Lambda` the regularization matrix.
The regularization matrix is chosen to be diagonal:
.. math::
\Lambda^{-1}_{nn} = \delta_{nn} * lambdas[0] / \sigma_{\bm{y}} * np.sqrt(Natoms)
for the targets and
.. math::
\Lambda^{-1}_{nn} = \delta_{nn} * lambdas[1] / \sigma_{\bm{y}},
for the derivatives of the targets w.r.t. the atomic positions and
:math:`\sigma_{\bm{y}}` is the standard deviation of the target property
(not derivatives).
Parameters
----------
kernel : Kernel
SparseKernel to compute KMM and initialize the model. It was used to
build KNM_.
frames : list(ase.Atoms)
Training structures
KNM_ : np.array
kernel matrix to use in the training, typically computed with:
KNM = kernel(managers, X_sparse)
KNM_down = kernel(managers, X_sparse, grad=(True, False))
KNM = np.vstack([KNM, KNM_down])
when training with derivatives.
X_sparse : SparsePoints
basis samples to use in the model's interpolation
y_train : np.array
reference property
self_contributions : dictionary
map atomic number to the property baseline, e.g. training on
total energies is not very recommended so one would provide
the corresponding isolated atom energies so that the model
can be trained on the corresponding formation energies and
still predict total energies.
grad_train : np.array, optional
derivatives of y_train w.r.t. to the atomic motion, e.g.
minus interatomic forces, by default None
lambdas : list/tuple, optional
regularisation parameter for the training, i.e. lambdas[0] -> property
and lambdas[1] -> gradients of the property, by default None
jitter : double, optional
small jitter for the numerical stability of solving the linear system,
by default 1e-8
Returns
-------
KRR
a trained model that can predict the property and its gradients
.. [1] Ceriotti, M., Willatt, M. J., & Csányi, G. (2018).
Machine Learning of Atomic-Scale Properties Based on Physical Principles.
In Handbook of Materials Modeling (pp. 1–27). Springer, Cham.
https://doi.org/10.1007/978-3-319-42913-7_68-1
"""
KMM = kernel(X_sparse)
Y = y_train.reshape((-1, 1)).copy()
KNM = KNM_.copy()
n_centers = Y.shape[0]
Natoms = np.zeros(n_centers)
Y0 = np.zeros((n_centers, 1))
for iframe, frame in enumerate(frames):
Natoms[iframe] = len(frame)
for sp in frame.get_atomic_numbers():
Y0[iframe] += self_contributions[sp]
Y = Y - Y0
delta = np.std(Y)
# lambdas[0] is provided per atom hence the '* np.sqrt(Natoms)'
# the first n_centers rows of KNM are expected to refer to the
# property
KNM[:n_centers] /= lambdas[0] / delta * np.sqrt(Natoms)[:, None]
Y /= lambdas[0] / delta * np.sqrt(Natoms)[:, None]
if grad_train is not None:
KNM[n_centers:] /= lambdas[1] / delta
F = grad_train.reshape((-1, 1)).copy()
F /= lambdas[1] / delta
Y = np.vstack([Y, F])
KMM[np.diag_indices_from(KMM)] += jitter
K = KMM + np.dot(KNM.T, KNM)
Y = np.dot(KNM.T, Y)
weights = np.linalg.lstsq(K, Y, rcond=None)[0]
model = KRR(weights, kernel, X_sparse, self_contributions)
# avoid memory clogging
del K, KMM
K, KMM = [], []
return model