"""
This module provides functions for computing three-atom neighbor list tuples
and fitting/evaluating 3D tensor product BSplines.
"""
from typing import List, Dict, Tuple
import numpy as np
from numba import jit
import ase
from uf3.data import composition
from uf3.representation import bspline
from uf3.representation import distances
from scipy.spatial import distance
[docs]def featurize_energy_3b(geom: ase.Atoms,
knot_sets: List[List[np.ndarray]],
basis_functions: List,
hashes: List,
supercell: ase.Atoms = None,
trailing_trim: int = 0,
) -> List[np.ndarray]:
"""
Args:
geom (ase.Atoms): configuration of interest
knot_sets (np.ndarray): list of lists of knot sequences per interaction
basis_functions (list): list of lists of callable basis functions
for each interaction
hashes (list): list of three-body hashes.
supercell (ase.Atoms): optional supercell.
trailing_trim (int): number of basis functions at trailing edge
to suppress. Useful for ensuring smooth cutoffs.
Returns:
grid_3b (List of np.ndarray): feature grid per interaction.
"""
if supercell is None:
supercell = geom
n_interactions = len(hashes)
sup_comp = supercell.get_atomic_numbers()
L, M, N = coefficient_counts_from_knots(knot_sets)
# identify pairs
dist_matrix, i_where, j_where = identify_ij(geom,
knot_sets,
supercell)
grids = [np.zeros((L[i], M[i], N[i]))
for i in range(n_interactions)]
if len(i_where) == 0:
return grids
triplet_generator = generate_triplets(
i_where, j_where, sup_comp, hashes, dist_matrix, knot_sets)
for triplet_batch in triplet_generator:
for interaction_idx in range(n_interactions):
interaction_data = triplet_batch[interaction_idx]
if interaction_data is None:
continue
i, r_l, r_m, r_n, ituples = interaction_data
tuples_3b, idx_lmn = evaluate_triplet_distances(
r_l,
r_m,
r_n,
basis_functions[interaction_idx],
knot_sets[interaction_idx],
trailing_trim=trailing_trim)
grids[interaction_idx] += arrange_3b(tuples_3b,
idx_lmn,
L[interaction_idx],
M[interaction_idx],
N[interaction_idx])
return grids
[docs]def coefficient_counts_from_knots(knot_sets: List[List[np.ndarray]],
) -> Tuple[List[int], List[int], List[int]]:
"""
Count number of basis functions per dimension from knot sequences.
Args:
knot_sets (list): list of three-body knot sequences
per chemical interation
Returns:
L, M, N (list): lists of number of basis functions per dimension (3).
"""
L = []
M = []
N = []
for knot_sequences in knot_sets:
l_space, m_space, n_space = knot_sequences
L.append(len(l_space) - 4)
M.append(len(m_space) - 4)
N.append(len(n_space) - 4)
return L, M, N
[docs]@jit(nopython=True, nogil=True)
def arrange_3b(triangle_values: np.ndarray,
idx_lmn: np.ndarray,
L: int,
M: int,
N: int
) -> np.ndarray:
"""
Arrange contributions per basis function in L x M x N grid.
Args:
triangle_values (np.ndarray): array of shape (n_triangles * 4, 3)
idx_lmn (np.ndarray): array of shape (n_triangles * 4, 3)
L (int): number of basis functions in first dimension of tensor.
M (int): number of basis functions in second dimension of tensor.
N (int): number of basis functions in third dimension of tensor.
Returns:
grid (np.ndarray)
"""
# multiply spline values and arrange in 3D grid
grid = np.zeros((L, M, N))
n_values = len(triangle_values)
n_triangles = int(n_values / 4)
for triangle_idx in range(n_triangles):
idx_l = idx_lmn[triangle_idx * 4, 0]
idx_m = idx_lmn[triangle_idx * 4, 1]
idx_n = idx_lmn[triangle_idx * 4, 2]
values = triangle_values[triangle_idx * 4: (triangle_idx + 1) * 4, :]
# each triangle influences 4 x 4 x 4 = 64 basis functions
for i in range(4):
for j in range(4):
for k in range(4):
value = values[i, 0] * values[j, 1] * values[k, 2]
grid[idx_l + i, idx_m + j, idx_n + k] += value
return grid
[docs]def featurize_force_3b(geom: ase.Atoms,
knot_sets: List[List[np.ndarray]],
basis_functions: List[List],
trio_hashes: Dict[int, np.ndarray],
supercell: ase.Atoms = None,
trailing_trim: int = 0,
) -> List[List[List[np.ndarray]]]:
"""
Generate features per force component per atom for a configuration.
Args:
geom (ase.Atoms): configuration of interest.
knot_sets (np.ndarray): list of lists of knot sequences per interaction
basis_functions (list): list of lists of callable basis functions
for each chemical interaction
trio_hashes (dict): map of interaction to integer hashes.
supercell (ase.Atoms): optional supercell.
trailing_trim (int): number of
Returns:
grid_3b (list): array-like list of shape
(n_atoms, 3, n_basis_functions) where 3 refers to the three
cartesian directions x, y, and z.
"""
if supercell is None:
supercell = geom
n_interactions = len(trio_hashes)
sup_comp = supercell.get_atomic_numbers()
n_atoms = len(geom)
L, M, N = coefficient_counts_from_knots(knot_sets)
coords, matrix, x_where, y_where = identify_ij(geom,
knot_sets,
supercell,
square=True)
if len(x_where) == 0:
return np.zeros((n_atoms, 3, L, M, N))
force_grids = [[[np.zeros((L[i], M[i], N[i])),
np.zeros((L[i], M[i], N[i])),
np.zeros((L[i], M[i], N[i]))]
for _ in range(n_atoms)]
for i in range(n_interactions)]
# process each atom's neighbors to limit memory requirement
triplet_generator = generate_triplets(
x_where, y_where, sup_comp, trio_hashes, matrix, knot_sets)
for triplet_batch in triplet_generator:
for interaction_idx in range(n_interactions):
interaction_data = triplet_batch[interaction_idx]
if interaction_data is None:
continue
i, r_l, r_m, r_n, ituples = interaction_data
tuples_3b, idx_lmn = evaluate_triplet_derivatives(
r_l,
r_m,
r_n,
basis_functions[interaction_idx],
knot_sets[interaction_idx],
trailing_trim=trailing_trim)
drij_dr = distances.compute_direction_cosines(coords,
matrix,
ituples[:, 0],
ituples[:, 1],
n_atoms)
drik_dr = distances.compute_direction_cosines(coords,
matrix,
ituples[:, 0],
ituples[:, 2],
n_atoms)
drjk_dr = distances.compute_direction_cosines(coords,
matrix,
ituples[:, 1],
ituples[:, 2],
n_atoms)
grids = arrange_deriv_3b(tuples_3b,
idx_lmn,
drij_dr,
drik_dr,
drjk_dr,
L[interaction_idx],
M[interaction_idx],
N[interaction_idx])
for a in range(n_atoms):
for c in range(3):
force_grids[interaction_idx][a][c] -= grids[a][c]
return force_grids
[docs]@jit(nopython=True, nogil=True)
def arrange_deriv_3b(triangle_values: np.ndarray,
idx_lmn: np.ndarray,
drij_dr: np.ndarray,
drik_dr: np.ndarray,
drjk_dr: np.ndarray,
L: int,
M: int,
N: int
) -> List[List[np.ndarray]]:
"""
Args:
triangle_values (np.ndarray): array of shape (n_triangles * 4, 3)
idx_lmn (np.ndarray): array of shape (n_triangles * 4, 3)
dr{ij, ik, jk}_dr (np.ndarray): direction-cosine arrays of shape
(n_atoms, 3, n_triangles).
L (int): number of basis functions.
Returns:
force_grids (list): array-like list of shape
(n_atoms, 3, n_basis_functions) where 3 refers to the three
cartesian directions x, y, and z.
"""
# multiply spline values and arrange in 3D grid
n_atoms = len(drij_dr)
n_values = len(triangle_values)
n_triangles = int(n_values / 4)
force_grids = [(np.zeros((L, M, N)),
np.zeros((L, M, N)),
np.zeros((L, M, N)))
for _ in range(n_atoms)]
for a in range(n_atoms): # atom index
for c in range(3): # cartesian directions
for tri_idx in range(n_triangles):
dij = drij_dr[a, c, tri_idx]
dik = drik_dr[a, c, tri_idx]
djk = drjk_dr[a, c, tri_idx]
if dij == 0 and dik == 0 and djk == 0:
continue
idx_l = idx_lmn[tri_idx * 4, 0]
idx_m = idx_lmn[tri_idx * 4, 1]
idx_n = idx_lmn[tri_idx * 4, 2]
v = triangle_values[tri_idx * 4: (tri_idx + 1) * 4, :]
# each triangle influences 4 x 4 x 4 = 64 basis functions
for i in range(4):
for j in range(4):
for k in range(4):
val = (v[i, 3] * v[j, 1] * v[k, 2] * dij +
v[i, 0] * v[j, 4] * v[k, 2] * dik +
v[i, 0] * v[j, 1] * v[k, 5] * djk)
force_grids[a][c][idx_l+i, idx_m+j, idx_n+k] += val
return force_grids
[docs]def identify_ij(geom: ase.Atoms,
knot_sets: List[List[np.ndarray]],
supercell: ase.Atoms = None,
square: bool = False):
"""
Args:
geom (ase.Atoms)
knot_sets (np.ndarray): list of lists of knot sequences per interaction
supercell (ase.Atoms)
square (bool): whether to return square distance matrix (True)
or truncate to atoms in unit cell (False).
TODO: refactor to break up into smaller, reusable functions
Returns:
dist_matrix (np.ndarray): rectangular matrix of shape
(n_atoms, n_supercell) where n_supercell is the number of
atoms within the cutoff distance of any in-unit-cell atom.
{i, j}_where (np.ndarray): unsorted list of atom indices
over which to loop to obtain valid pair distances.
"""
if supercell is None:
supercell = geom
knots_flat = np.concatenate([sequence for set_ in knot_sets
for sequence in set_])
r_min = np.min(knots_flat)
r_max = np.max(knots_flat)
sup_positions = supercell.get_positions()
geo_positions = geom.get_positions()
# mask atoms that aren't close to any unit-cell atom
dist_matrix = distance.cdist(geo_positions, sup_positions)
cutoff_mask = dist_matrix < r_max # ignore atoms without in-cell neighbors
cutoff_mask = np.any(cutoff_mask, axis=0)
sup_positions = sup_positions[cutoff_mask, :] # reduced distance matrix
# enforce distance cutoffs
n_geo = len(geo_positions)
dist_matrix = distance.cdist(sup_positions, sup_positions)
if square is False:
cut_matrix = dist_matrix[:n_geo, :]
dist_mask = (cut_matrix >= r_min) & (cut_matrix <= r_max)
i_where, j_where = np.where(dist_mask)
return dist_matrix, i_where, j_where
else:
dist_mask = (dist_matrix >= r_min) & (dist_matrix <= r_max)
i_where, j_where = np.where(dist_mask)
return sup_positions, dist_matrix, i_where, j_where
[docs]def legacy_generate_triplets(i_where:np.ndarray,
j_where: np.ndarray,
distance_matrix: np.ndarray,
knot_sequences: List[np.ndarray],
) -> Tuple:
"""
Identify unique "i-j-j'" tuples by combining provided i-j pairs, then
compute i-j, i-k, and j-k pair distances from i-j-k tuples,
distance matrix, and knot sequence for cutoffs.
Args:
i_where (np.ndarray): sorted "i" indices
j_where (np.ndarray): sorted "j" indices
distance_matrix (np.ndarray)
knot_sequences (list of np.ndarray)
Returns:
tuples_idx (np.ndarray): array of shape (n_triangles, 3)
"""
# find unique values of i (sorted such that i < j)
i_values, group_sizes = np.unique(i_where, return_counts=True)
# group j by values of i
i_groups = np.array_split(j_where, np.cumsum(group_sizes)[:-1])
# generate j-k combinations
for i in range(len(i_groups)):
tuples = np.array(np.meshgrid(i_groups[i],
i_groups[i])).T.reshape(-1, 2)
tuples = np.insert(tuples, 0, i_values[i], axis=1)
# atoms j and k are interchangable; filter
comparison_mask = (tuples[:, 1] < tuples[:, 2])
tuples = tuples[comparison_mask]
# extract distance tuples
r_l = distance_matrix[tuples[:, 0], tuples[:, 1]]
r_m = distance_matrix[tuples[:, 0], tuples[:, 2]]
r_n = distance_matrix[tuples[:, 1], tuples[:, 2]]
# mask by longest distance
l_mask = (r_l >= knot_sequences[0][0]) & (
r_l <= knot_sequences[0][-1])
m_mask = (r_m >= knot_sequences[1][0]) & (
r_m <= knot_sequences[1][-1])
n_mask = (r_n >= knot_sequences[2][0]) & (
r_n <= knot_sequences[2][-1])
dist_mask = np.logical_and.reduce([l_mask, m_mask, n_mask])
r_l = r_l[dist_mask]
r_m = r_m[dist_mask]
r_n = r_n[dist_mask]
tuples = tuples[dist_mask]
yield i, r_l, r_m, r_n, tuples
[docs]def generate_triplets(i_where: np.ndarray,
j_where: np.ndarray,
sup_composition: np.ndarray,
hashes: np.ndarray,
distance_matrix: np.ndarray,
knot_sets: List[List[np.ndarray]]
) -> List[Tuple]:
"""
Identify unique "i-j-j'" tuples by combining provided i-j pairs, then
compute i-j, i-k, and j-k pair distances from i-j-k tuples,
distance matrix, and knot sequence for cutoffs.
TODO: refactor to break up into smaller, reusable functions
Args:
i_where (np.ndarray): sorted "i" indices
j_where (np.ndarray): sorted "j" indices
sup_composition (np.ndarray): composition given by atomic numbers.
hashes (np.ndarray): array of unique integer hashes for interactions.
distance_matrix (np.ndarray): pair distance matrix.
knot_sets (np.ndarray): list of lists of knot sequences per interaction
Returns:
tuples_idx (np.ndarray): array of shape (n_triangles, 3)
"""
n_hashes = len(hashes)
# find unique values of i (sorted such that i < j)
i_values, group_sizes = np.unique(i_where, return_counts=True)
# group j by values of i
i_groups = np.array_split(j_where, np.cumsum(group_sizes)[:-1])
# generate j-k combinations
for i in range(len(i_groups)):
tuples = np.array(np.meshgrid(i_groups[i],
i_groups[i])).T.reshape(-1, 2)
tuples = np.insert(tuples, 0, i_values[i], axis=1)
comp_tuples = sup_composition[tuples]
comp_tuples[:, 1:] = np.sort(comp_tuples[:, 1:], axis=1)
ijk_hash = composition.get_szudzik_hash(comp_tuples)
grouped_triplets = [None] * n_hashes
for j, hash_ in enumerate(hashes):
ituples = tuples[ijk_hash == hash_]
if len(ituples) == 0:
grouped_triplets[j] = None
continue
# atoms j and k are interchangable; filter
comparison_mask = (ituples[:, 1] < ituples[:, 2])
ituples = ituples[comparison_mask]
# extract distance tuples
r_l = distance_matrix[ituples[:, 0], ituples[:, 1]]
r_m = distance_matrix[ituples[:, 0], ituples[:, 2]]
r_n = distance_matrix[ituples[:, 1], ituples[:, 2]]
# mask by longest distance
l_mask = ((r_l >= knot_sets[j][0][0])
& (r_l <= knot_sets[j][0][-1]))
m_mask = ((r_m >= knot_sets[j][1][0])
& (r_m <= knot_sets[j][1][-1]))
n_mask = ((r_n >= knot_sets[j][2][0])
& (r_n <= knot_sets[j][2][-1]))
dist_mask = np.logical_and.reduce([l_mask, m_mask, n_mask])
r_l = r_l[dist_mask]
r_m = r_m[dist_mask]
r_n = r_n[dist_mask]
ituples = ituples[dist_mask]
grouped_triplets[j] = i, r_l, r_m, r_n, ituples
yield grouped_triplets
[docs]def evaluate_triplet_distances(r_l: np.ndarray,
r_m: np.ndarray,
r_n: np.ndarray,
basis_functions: List[List],
knot_sequences: List[np.ndarray],
trailing_trim: int = 0,
):
"""
Identify non-zero basis functions for each point and call functions.
TODO: refactor to break up into smaller, reusable functions
Args:
r_l (np.ndarray): vector of i-j distances.
r_m (np.ndarray): vector of i-k distances.
r_n (np.ndarray): vector of j-k distances.
basis_functions (list): list of lists of of callable basis functions.
knot_sequences (list of np.ndarray): list of knot sequences.
trailing_trim (int): number of basis functions at trailing edge
to suppress. Useful for ensuring smooth cutoffs.
Returns:
tuples_3b (np.ndarray):
idx_lmn (np.ndarray):
"""
l_knots, m_knots, n_knots = knot_sequences
# identify non-zero splines (tiling each distance x 4)
r_l, idx_rl = bspline.find_spline_indices(r_l, l_knots)
r_m, idx_rm = bspline.find_spline_indices(r_m, m_knots)
r_n, idx_rn = bspline.find_spline_indices(r_n, n_knots)
# evaluate splines per dimension
L = len(l_knots) - 4 - trailing_trim
M = len(m_knots) - 4 - trailing_trim
N = len(n_knots) - 4 - trailing_trim
n_tuples = len(r_l)
values_3b = np.zeros((n_tuples, 3)) # array of basis-function values
for l_idx in range(L):
mask = (idx_rl == l_idx)
points = r_l[mask]
values_3b[mask, 0] = basis_functions[0][l_idx](points)
for m_idx in range(M):
mask = (idx_rm == m_idx)
points = r_m[mask]
values_3b[mask, 1] = basis_functions[1][m_idx](points)
for n_idx in range(N):
mask = (idx_rn == n_idx)
points = r_n[mask]
values_3b[mask, 2] = basis_functions[2][n_idx](points)
idx_lmn = np.vstack([idx_rl, idx_rm, idx_rn]).T
# if trailing_trim > 0:
# trim_mask = np.logical_or.reduce([idx_rl <= L,
# idx_rm <= M,
# idx_rn <= N])
# print("E", np.sum(trim_mask), len(values_3b))
# values_3b = values_3b[trim_mask]
# idx_lmn = idx_lmn[trim_mask]
return values_3b, idx_lmn
[docs]def evaluate_triplet_derivatives(r_l: np.ndarray,
r_m: np.ndarray,
r_n: np.ndarray,
basis_functions: List[List],
knot_sequences: List[np.ndarray],
trailing_trim: int = 0,
) -> Tuple[np.ndarray, np.ndarray]:
"""
Identify non-zero basis functions for each point and call functions.
TODO: refactor to break up into smaller, reusable functions
Args:
r_l (np.ndarray): vector of i-j distances.
r_m (np.ndarray): vector of i-k distances.
r_n (np.ndarray): vector of j-k distances.
basis_functions (list): list of lists of of callable basis functions.
knot_sequences (list of np.ndarray)
trailing_trim (int): number of basis functions at trailing edge
to suppress. Useful for ensuring smooth cutoffs.
Returns:
tuples_3b (np.ndarray): tuples of spline derivative values
per dimension per triangle.
idx_lmn (np.ndarray): corresponding indices of relevant basis functions
to increment with derivative values.
"""
l_knots, m_knots, n_knots = knot_sequences
# identify non-zero splines (tiling each distance x 4)
r_l, idx_rl = bspline.find_spline_indices(r_l, l_knots)
r_m, idx_rm = bspline.find_spline_indices(r_m, m_knots)
r_n, idx_rn = bspline.find_spline_indices(r_n, n_knots)
# evaluate splines per dimension
L = len(l_knots) - 4 - trailing_trim
M = len(m_knots) - 4 - trailing_trim
N = len(n_knots) - 4 - trailing_trim
n_tuples = len(r_l)
values_3b = np.zeros((n_tuples, 6)) # array of basis-function values
for l_idx in range(L):
mask = (idx_rl == l_idx)
points = r_l[mask]
values_3b[mask, 0] = basis_functions[0][l_idx](points)
values_3b[mask, 3] = basis_functions[0][l_idx](points, nu=1)
for m_idx in range(M):
mask = (idx_rm == m_idx)
points = r_m[mask]
values_3b[mask, 1] = basis_functions[1][m_idx](points)
values_3b[mask, 4] = basis_functions[1][m_idx](points, nu=1)
for n_idx in range(N):
mask = (idx_rn == n_idx)
points = r_n[mask]
values_3b[mask, 2] = basis_functions[2][n_idx](points)
values_3b[mask, 5] = basis_functions[2][n_idx](points, nu=1)
idx_lmn = np.vstack([idx_rl, idx_rm, idx_rn]).T
return values_3b, idx_lmn
# if trailing_trim > 0:
# trim_mask = np.logical_or.reduce([idx_rl <= L,
# idx_rm <= M,
# idx_rn <= N])
# print("F", np.sum(trim_mask), len(values_3b))
# values_3b = values_3b[trim_mask]
# idx_lmn = idx_lmn[trim_mask]
# else:
# trim_mask = None
# return values_3b, idx_lmn, trim_mask
[docs]def symmetrize_3B(grid_3b: np.ndarray, symmetry: int = 2) -> np.ndarray:
"""
Symmetrize 3D array with mirror plane(s), enforcing permutational
invariance with respect to j and k indices.
This allows us to avoid sorting, which is slow.
"""
template = np.ones_like(grid_3b)
for i, j, k in np.ndindex(*template.shape):
if symmetry == 2:
if (i == j):
template[i, j, k] = 0.5
elif symmetry == 3:
if (i == j and i == k):
template[i, j, k] = 1 / 6
elif (i == k) or (i == j) or (j == k):
template[i, j, k] = 0.5
grid_3b = grid_3b * template
if symmetry == 2:
images = [grid_3b, grid_3b.transpose(1, 0, 2)]
elif symmetry == 3:
images = [grid_3b,
grid_3b.transpose(0, 2, 1),
grid_3b.transpose(1, 0, 2),
grid_3b.transpose(1, 2, 0),
grid_3b.transpose(2, 0, 1),
grid_3b.transpose(2, 1, 0)]
else:
images = [grid_3b]
grid_3b = np.sum(images, axis=0)
return grid_3b
[docs]def get_symmetry_weights(symmetry: int,
l_space: np.ndarray,
m_space: np.ndarray,
n_space: np.ndarray,
trailing_trim: int = 3,
) -> np.ndarray:
"""
Args:
symmetry (int): Symmetry considered in system. Default is 2, resulting
in one mirror plane along the l=m plane.
{l, m, n}_space (np.ndarray): knot sequences along three dimensions.
trailing_trim (int): number of basis functions at trailing edge
to suppress. Useful for ensuring smooth cutoffs.
Returns:
template (np.ndarray): array of weights for basis functions,
shaped in three dimensions.
"""
L = len(l_space) - 4
M = len(m_space) - 4
N = len(n_space) - 4
template = np.ones((L, M, N))
if symmetry == 2: # one mirror plane (j and k interchangeable)
for i, j, k in np.ndindex(*template.shape):
if (i > j):
template[i, j, k] = 0
elif (i == j):
template[i, j, k] = 0.5
elif symmetry == 3: # three mirror planes (i, j, k intercheangable)
for i, j, k in np.ndindex(*template.shape):
if i == j and i == k:
template[i, j, k] = 1 / 6
elif i > j or j > k:
template[i, j, k] = 0
elif (i == k) or (i == j) or (j == k):
template[i, j, k] = 0.5
# triangle distance restriction
for i, j, k in np.ndindex(*template.shape):
if l_space[i + 4] + m_space[j + 4] <= n_space[k]:
template[i, j, k] = 0
elif l_space[i + 4] + n_space[k + 4] <= m_space[j]:
template[i, j, k] = 0
elif m_space[j + 4] + n_space[k + 4] <= l_space[i]:
template[i, j, k] = 0
if trailing_trim > 0:
for trim_idx in range(1, trailing_trim + 1):
template[-trim_idx, :, :] = 0
template[:, -trim_idx, :] = 0
template[:, :, -trim_idx] = 0
return template