Source code for uf3.representation.distances

"""
This module provides functions for computing neighbor lists, evaluating
pair distances, computing direction cosines for force components, and
fitting/evaluating one-dimensional BSplines.
"""

from typing import List, Dict, Tuple, Union, Any
import numpy as np
import numba as nb
from scipy import spatial
from scipy import signal
import ase
from ase import symbols as ase_symbols
from uf3.data import geometry
from uf3.data import composition
from uf3.util import parallel


[docs]def distances_by_interaction(geom: ase.Atoms, pair_tuples: List[Tuple[str]], r_min_map: Dict[Tuple[str], float], r_max_map: Dict[Tuple[str], float], supercell: ase.Atoms = None, atomic: bool = False, ) -> Dict[Tuple[str], np.ndarray]: """ Identify pair distances within an entry (or between an entry and its supercell), subject to lower and upper bounds given by r_min_map and r_max_map, per pair interaction e.g. A-A, A-B, B-B, etc. Args: geom (ase.Atoms): configuration of interest. pair_tuples (list): list of pair interactions e.g. [(A-A), (A-B), (A-C), (B-B), ...)] r_min_map (dict): map of minimum pair distance per interaction e.g. {(A-A): 2.0, (A-B): 3.0, (B-B): 4.0} r_max_map (dict): map of maximum pair distance per interaction supercell (ase.Atoms): ase.Atoms output of get_supercell used to account for atoms in periodic images. atomic (bool): whether to split array into lists of vectors corresponding to each atom's atomic environment. Returns: distances_map (dict): for each interaction key (A-A, A-B, ...), flattened np.ndarray of pair distances within range or list of flattened np.ndarray if atomic is True. """ distance_matrix = get_distance_matrix(geom, supercell) if supercell is None: supercell = geom geo_composition = np.array(geom.get_atomic_numbers()) sup_composition = np.array(supercell.get_atomic_numbers()) s_geo = len(geom) # loop through interactions if atomic: distances_map = {tuple_: [] for tuple_ in pair_tuples} else: distances_map = {} for pair in pair_tuples: r_min = r_min_map[pair] r_max = r_max_map[pair] pair_numbers = ase_symbols.symbols2numbers(pair) comp_mask = mask_matrix_by_pair_interaction(pair_numbers, geo_composition, sup_composition) cut_mask = (distance_matrix > r_min) & (distance_matrix < r_max) if not atomic: # valid distances across configuration interaction_mask = comp_mask & cut_mask distances_map[pair] = distance_matrix[interaction_mask] else: # valid distances per atom for i in range(s_geo): atom_slice = distance_matrix[i] interaction_mask = comp_mask[i] & cut_mask[i] distances_map[pair].append(atom_slice[interaction_mask]) return distances_map
[docs]def derivatives_by_interaction(geom: ase.Atoms, pair_tuples: List[Tuple[str]], r_cut: float, r_min_map: Dict[Tuple[str], float], r_max_map: Dict[Tuple[str], float], supercell: ase.Atoms = None, ) -> Tuple[Dict, Dict]: """ Identify pair distances within a supercell and derivatives for evaluating forces, subject to lower and upper bounds given by r_min_map and r_max_map, per pair interaction e.g. A-A, A-B, B-B, etc. Args: geom (ase.Atoms): unit cell ase.Atoms. pair_tuples (list): list of pair interactions. e.g. [(A-A), (A-B), (A-C), (B-B), ...)] r_cut (float): cutoff radius (angstroms). r_min_map (dict): map of minimum pair distance per interaction. e.g. {(A-A): 2.0, (A-B): 3.0, (B-B): 4.0} r_max_map (dict): map of maximum pair distance per interaction. supercell (ase.Atoms): ase.Atoms output of get_supercell used to account for atoms in periodic images. Returns: distances: flattened np.ndarray of pair distances across supercell within range. drij_dR: np.ndarray of shape (n_atoms, 3, n_distances) and the second dimension corresponds to x, y, and z directions. Used to evaluate forces. """ if supercell is None: supercell = geom n_atoms = len(geom) # extract atoms from supercell that are within the maximum # cutoff distance of atoms in the unit cell. supercell = mask_supercell_with_radius(geom, supercell, r_cut) distance_matrix = get_distance_matrix(supercell, supercell) sup_positions = supercell.get_positions() sup_composition = np.array(supercell.get_atomic_numbers()) # loop through interactions distance_map = {} derivative_map = {} for pair in pair_tuples: pair_numbers = ase_symbols.symbols2numbers(pair) r_min = r_min_map[pair] r_max = r_max_map[pair] comp_mask = mask_matrix_by_pair_interaction(pair_numbers, sup_composition, sup_composition) cut_mask = (distance_matrix > r_min) & (distance_matrix < r_max) # valid distances across configuration interaction_mask = comp_mask & cut_mask distance_map[pair] = distance_matrix[interaction_mask] # corresponding derivatives, flattened x_where, y_where = np.where(interaction_mask) drij_dr = compute_direction_cosines(sup_positions, distance_matrix, x_where, y_where, n_atoms) derivative_map[pair] = drij_dr return distance_map, derivative_map
[docs]def mask_supercell_with_radius(geom: ase.Atoms, supercell: ase.Atoms, r_max: float, ) -> ase.Atoms: """ Makes a copy of supercell and deletes atoms that are further than r_max away from any atom in the unit cell geometry. Args: geom (ase.Atoms): unit cell of interest supercell (ase.Atoms): supercell, centered on unitcell. r_max (float): maximum radius. Returns: supercell (ase.Atoms): copy of supercell with subset of atoms within distance. """ supercell = supercell.copy() distance_matrix = get_distance_matrix(geom, supercell) weight_mask = distance_matrix <= r_max # mask distance matrix by r_max valids_mask = np.any(weight_mask, axis=0) reject_idx = np.where(valids_mask == False)[0] del supercell[reject_idx] return supercell
[docs]def mask_matrix_by_pair_interaction(pair: Union[List, Tuple], geo_composition: np.ndarray, sup_composition: np.ndarray = None ) -> np.ndarray: """ Generates boolean mask for the distance matrix based on composition vectors i.e. from ase.Atoms.get_atomic_numbers() e.g. identifying A-B interactions: supercell A B A A B B --------------------- geometry A | - X - - X X B | X - X X - - Args: pair (tuple): pair interaction of interest e.g. (A-B) geo_composition (list, np.ndarray): list of elements for each atom in geometry. sup_composition (list, np.ndarray): optional list of elements for each atom in supercell. Returns: comp_mask (np.ndarray): Boolean mask of dimension (n x m) corresponding to pair interactions of the specified type, where n and m are the number of atoms in the geometry and its supercell, respectively. """ if sup_composition is None: sup_composition = geo_composition s_geo = len(geo_composition) s_sup = len(sup_composition) comp_mask = np.zeros((s_geo, s_sup), dtype=bool) for i, j in (pair, pair[::-1]): geo_match = np.where(geo_composition == j)[0] sup_match = np.where(sup_composition == i)[0] comp_mask[geo_match[None, :], sup_match[:, None]] = True return comp_mask
[docs]def get_distance_matrix(geom: ase.Atoms, supercell: ase.Atoms = None, ) -> np.ndarray: """ Get distance matrix from geometry and, optionally, supercell including atoms from adjacent images. Args: geom (ase.Atoms): unit cell of interest. supercell (ase.Atoms): ase.Atoms output of get_supercell used to account for atoms in periodic images. Returns: distance_matrix (np.ndarray): square (n x n) if supercell is not provided, rectangular (n x m) otherwise, where n and m are the number of atoms in the geometry and its supercell, respectively. """ if supercell is None: supercell = geom sup_positions = supercell.get_positions() geo_positions = geom.get_positions() distance_matrix = spatial.distance.cdist(geo_positions, sup_positions) return distance_matrix
[docs]def get_distance_derivatives(geom: ase.Atoms, supercell: ase.Atoms, r_min: float = 0.0, r_max: float = 10.0, ) -> Tuple[np.ndarray, np.ndarray]: """ Identify pair distances within a supercell, subject to r_min < r < r_max, along with derivatives for evaluating forces. Legacy function for unary systems. Args: supercell (ase.Atoms): output of get_supercell used to account for atoms in periodic images. geom (ase.Atoms): unit cell ase.Atoms. r_min (float): minimum pair distance to consider. r_max (float): maximum pair distance to consider. Returns: distances: flattened np.ndarray of pair distances across supercell within range. drij_dR: np.ndarray of shape (n_atoms, 3, n_distances) and the second dimension corresponds to x, y, and z directions. Used to evaluate forces. """ sup_positions = supercell.get_positions() geo_positions = geom.get_positions() distance_matrix = spatial.distance.cdist(geo_positions, sup_positions) weight_mask = distance_matrix <= r_max # mask distance matrix by r_max valids_mask = np.any(weight_mask, axis=0) sup_positions = sup_positions[valids_mask, :] distance_matrix = spatial.distance.cdist(sup_positions, sup_positions) n_atoms = len(geo_positions) cut_mask = (distance_matrix >= r_min) & (distance_matrix <= r_max) i_where, j_where = np.where(cut_mask) drij_dr = compute_direction_cosines(sup_positions, distance_matrix, i_where, j_where, n_atoms) distances = distance_matrix[cut_mask] return distances, drij_dr
[docs]def distances_from_geometry(geom: ase.Atoms, supercell: ase.Atoms = None, r_min: float = 0.0, r_max: float = 10.0, ) -> np.ndarray: """ Identify pair distances within a geometry (or between a geometry and its supercell), subject to r_min < r < r_max. Legacy function for unary systems. Args: geom (ase.Atoms): unit cell of interest. supercell (ase.Atoms): output of get_supercell used to account for atoms in periodic images. r_min (float): minimum pair distance; default = 0. r_max (float): maximum pair distance; default = 10. Returns: flattened np.ndarray of pair distances within range. """ distance_matrix = get_distance_matrix(geom, supercell) cut_mask = (distance_matrix > r_min) & (distance_matrix < r_max) distances = distance_matrix[cut_mask] return distances
[docs]@nb.njit(nb.float64[:, :](nb.int32[:], nb.int64[:], nb.int64[:])) def kronecker_delta(m_range, i_where, j_where): n_atoms = len(m_range) n_pairs = len(i_where) kronecker = np.zeros((n_atoms, n_pairs), dtype=nb.float64) for m in m_range: for idx in range(n_pairs): i = i_where[idx] j = j_where[idx] kronecker[m, idx] = (m == j) - (m == i) return kronecker
[docs]def kronecker_vectorized(n_atoms: int, i_where: np.ndarray, j_where: np.ndarray ) -> np.ndarray: m_range = np.arange(n_atoms) im = m_range[:, None] == i_where[None, :] jm = m_range[:, None] == j_where[None, :] kronecker = jm.astype(int) - im.astype(int) return kronecker
[docs]def compute_direction_cosines(sup_positions: np.ndarray, distance_matrix: np.ndarray, i_where: np.ndarray, j_where: np.ndarray, n_atoms: int ) -> np.ndarray: """ Intermediate function for computing derivatives for forces. Args: sup_positions: atom positions in supercell. distance_matrix: output of spatial.distance.cdist(sup_positions, sup_positions). i_where: indices of i-th atom based on distance mask. j_where: indices of j-th atom based on distance mask. n_atoms: number of atoms in original unit cell. Returns: drij_dR: np.ndarray of shape (n_atoms, 3, n_distances) and the second dimension corresponds to x, y, and z directions. Used to evaluate forces. """ # n_atoms x n_distances kronecker = kronecker_delta(np.arange(n_atoms, dtype=np.int32), i_where, j_where) # n_distances x 3 delta_r = sup_positions[j_where, :] - sup_positions[i_where, :] # n_distances rij = distance_matrix[i_where, j_where] drij_dr = (kronecker[:, None, :] * delta_r.T[None, :, :] / rij[None, None, :]) return drij_dr
[docs]def summarize_distances(geometries: List[ase.Atoms], chemical_system: composition.ChemicalSystem, r_cut: float = 12.0, n_bins: int = 100, print_stats: bool = True, min_peak_width: float = 0.5, progress: Any = "bar", ) -> Tuple[Dict, np.ndarray, Dict]: """ Construct histogram of distances per pair interaction across list of geometries. Useful for optimizing the lower- and upper- bounds of knot sequences. TODO: refactor to break up into smaller, reusable functions Args: geometries (list): list of ase.Atoms configuration. chemical_system (uf3.composition.ChemicalSystem) r_cut (float): cutoff distance in angstroms. n_bins (int): number of bins in histogram. print_stats (bool): print minimum distance and identified peaks. min_peak_width (float): minimum peak with in angstroms for peak-finding algorithm. progress: style of progress bar. Returns: histogram_map (dict): for each interaction key (A-A, A-B, ...), vector of histogram frequencies of length n_bins. Frequency values are divided by r^2, evaluated at the middle of each bin. bin_edges (np.ndarray): bin edges of histogram. """ pair_tuples = chemical_system.interactions_map[2] bin_edges = np.linspace(0, r_cut, n_bins + 1) histogram_values = {pair: np.zeros(n_bins) for pair in pair_tuples} n_entries = len(geometries) iterable = parallel.progress_iter(geometries, style=progress) for geom in iterable: if any(geom.pbc): supercell = geometry.get_supercell(geom, r_cut=r_cut) density = len(geom) / geom.get_volume() else: supercell = geom density = 1 distance_matrix = get_distance_matrix(geom, supercell) geo_composition = np.array(geom.get_atomic_numbers()) sup_composition = np.array(supercell.get_atomic_numbers()) # loop through interactions for pair in pair_tuples: pair_numbers = ase_symbols.symbols2numbers(pair) comp_mask = mask_matrix_by_pair_interaction(pair_numbers, geo_composition, sup_composition) cut_mask = (distance_matrix > 0) & (distance_matrix < r_cut) interaction_mask = comp_mask & cut_mask distances_vector = distance_matrix[interaction_mask] frequencies, _ = np.histogram(distances_vector, bin_edges) frequencies = frequencies / density / n_entries / 2 if pair[0] != pair[1]: frequencies /= 2 histogram_values[pair] += frequencies bin_centers = 0.5 * np.add(bin_edges[:-1], bin_edges[1:]) bin_span = int(np.ceil(min_peak_width / (bin_edges[1] - bin_edges[0]))) lower_bounds = {} for pair in pair_tuples: histogram_values[pair] /= bin_centers ** 2 * 4 * np.pi lower_bound = bin_edges[np.nonzero(histogram_values[pair])[0][0]] lower_bounds[pair] = lower_bound if print_stats: peaks = bin_centers[signal.find_peaks(histogram_values[pair], width=bin_span)[0]] print(pair, 'Lower bound: {0:.3f} angstroms'.format(lower_bound)) print(pair, 'Peaks (min width {} angstroms):'.format(min_peak_width), peaks) return histogram_values, bin_edges, lower_bounds