# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
#
"""Fast distance array computation --- :mod:`MDAnalysis.lib.distances`
===================================================================
Fast C-routines to calculate distance arrays from coordinate
arrays. Many of the functions also exist in parallel versions, that
typically provide higher performance than the serial code.
The boolean attribute MDAnalysis.lib.distances.USED_OPENMP can be
checked to see if OpenMP was used in the compilation of MDAnalysis.
Selection of acceleration ("backend")
-------------------------------------
All functions take the optional keyword *backend*, which determines
the type of acceleration. Currently, the following choices are
implemented (*backend* is case-insensitive):
.. Table:: Available *backends* for accelerated distance functions.
========== ========================= ======================================
*backend* module description
========== ========================= ======================================
"serial" :mod:`c_distances` serial implementation in C/Cython
"OpenMP" :mod:`c_distances_openmp` parallel implementation in C/Cython
with OpenMP
========== ========================= ======================================
.. versionadded:: 0.13.0
Functions
---------
.. autofunction:: distance_array(reference, configuration [, box [, result [, backend]]])
.. autofunction:: self_distance_array(reference [, box [,result [, backend]]])
.. autofunction:: calc_bonds(atom1, atom2 [, box, [, result [, backend]]])
.. autofunction:: calc_angles(atom1, atom2, atom3 [,box [, result [, backend]]])
.. autofunction:: calc_dihedrals(atom1, atom2, atom3, atom4 [,box [, result [, backend]]])
.. autofunction:: apply_PBC(coordinates, box [, backend])
.. autofunction:: transform_RtoS(coordinates, box [, backend])
.. autofunction:: transform_StoR(coordinates, box [,backend])
"""
from __future__ import division, absolute_import
from six.moves import range
import numpy as np
from numpy.lib.utils import deprecate
from .mdamath import triclinic_vectors, triclinic_box
# hack to select backend with backend=<backend> kwarg. Note that
# the cython parallel code (prange) in parallel.distances is
# independent from the OpenMP code
import importlib
_distances = {}
_distances['serial'] = importlib.import_module(".c_distances",
package="MDAnalysis.lib")
try:
_distances['openmp'] = importlib.import_module(".c_distances_openmp",
package="MDAnalysis.lib")
except ImportError:
pass
del importlib
def _run(funcname, args=None, kwargs=None, backend="serial"):
"""Helper function to select a backend function *funcname*."""
args = args if args is not None else tuple()
kwargs = kwargs if kwargs is not None else dict()
backend = backend.lower()
try:
func = getattr(_distances[backend], funcname)
except KeyError:
raise ValueError("Function {0} not available with backend {1}; try one of: {2}".format(
funcname, backend, ", ".join(_distances.keys())))
return func(*args, **kwargs)
# serial versions are always available (and are typically used within
# the core and topology modules)
from .c_distances import (calc_distance_array,
calc_distance_array_ortho,
calc_distance_array_triclinic,
calc_self_distance_array,
calc_self_distance_array_ortho,
calc_self_distance_array_triclinic,
coord_transform,
calc_bond_distance,
calc_bond_distance_ortho,
calc_bond_distance_triclinic,
calc_angle,
calc_angle_ortho,
calc_angle_triclinic,
calc_dihedral,
calc_dihedral_ortho,
calc_dihedral_triclinic,
ortho_pbc,
triclinic_pbc)
from .c_distances_openmp import OPENMP_ENABLED as USED_OPENMP
def _box_check(box):
"""Take a box input and deduce what type of system it represents based
on the shape of the array and whether all angles are 90.
Parameters
----------
box : array
Box information of unknown format.
Returns
-------
boxtype : str
* ``ortho`` orthogonal box
* ``tri_vecs`` triclinic box vectors
* ``tri_box`` triclinic box lengths and angles
Raises
------
TypeError
If box is not float32.
ValueError
If box type not detected.
"""
if box.dtype != np.float32:
raise TypeError("Box must be of type float32")
boxtype = 'unknown'
if box.shape == (3,):
boxtype = 'ortho'
elif box.shape == (3, 3):
if np.all([box[0][1] == 0.0, # Checks that tri box is properly formatted
box[0][2] == 0.0,
box[1][2] == 0.0]):
boxtype = 'tri_vecs'
else:
boxtype = 'tri_vecs_bad'
elif box.shape == (6,):
if np.all(box[3:] == 90.):
boxtype = 'ortho'
else:
boxtype = 'tri_box'
if boxtype == 'unknown':
raise ValueError("box input not recognised"
", must be an array of box dimensions")
return boxtype
def _check_array(coords, desc):
"""Check an array is a valid array of coordinates
Must be:
(n,3) in shape
float32 data
"""
if (coords.ndim != 2 or coords.shape[1] != 3):
raise ValueError("{0} must be a sequence of 3 dimensional coordinates"
"".format(desc))
_check_array_dtype(coords, desc)
def _check_array_dtype(coords, desc):
"""Check whether an array contains values of dtype: np.float32 or not"""
if coords.dtype != np.float32:
raise TypeError("{0} must be of type float32".format(desc))
def _check_results_array(results, size):
"""Check the results array is ok to use
Must be:
same shape as size
float64
"""
if results.shape != size:
raise ValueError("Result array has incorrect size,"
"should be {0}, got {1}".format(size, results.shape))
if results.dtype != np.float64:
raise TypeError("Results array must be of type float64")
def _check_lengths_match(*arrays):
"""Check all arrays are same shape"""
ref = arrays[0].shape
if not all( a.shape == ref for a in arrays):
raise ValueError("Input arrays must all be same shape"
"Got {0}".format([a.shape for a in arrays]))
[docs]def distance_array(reference, configuration, box=None, result=None, backend="serial"):
"""Calculate all distances between a reference set and another configuration.
If there are *i* positions in reference, and *j* positions in configuration,
will calculate a *i* x *j* array of distances
If an *box* is supplied then a minimum image convention is used when
calculating distances.
If a 2D numpy array of dtype ``numpy.float64`` with the shape ``(len(reference),
len(configuration))`` is provided in *result* then this preallocated array is
filled. This can speed up calculations.
Parameters
----------
reference : numpy.array of numpy.float32
Reference coordinate array.
configuration : numpy.array of numpy.float32
Configuration coordinate array.
box : numpy.array or None
Dimensions of the cell; if provided, the minimum image convention is
applied. The dimensions must be provided in the same format as returned
by by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx,
ly, lz, alpha, beta, gamma]``.
result : numpy.array of numpy.float64, optional
Preallocated result array which must have the
shape ``(len(ref), len(conf))`` and ``dtype=numpy.float64``.
Avoids creating the array which saves time when the function
is called repeatedly. [``None``]
backend
Select the type of acceleration; "serial" is always available. Other
possibilities are "OpenMP" (OpenMP).
Returns
-------
d : numpy.array
``(len(reference),len(configuration))`` numpy array with the distances
``d[i,j]`` between reference coordinates `i` and configuration
coordinates `j`.
Note
----
This method is slower than it could be because internally we need to make
copies of the ref and conf arrays.
.. versionchanged:: 0.13.0
Added *backend* keyword.
"""
ref = reference.copy('C')
conf = configuration.copy('C')
_check_array(conf, 'conf')
_check_array(ref, 'ref')
if box is not None:
boxtype = _box_check(box)
# Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]]
if (boxtype == 'tri_box'):
box = triclinic_vectors(box)
if (boxtype == 'tri_vecs_bad'):
box = triclinic_vectors(triclinic_box(box[0], box[1], box[2]))
confnum = conf.shape[0]
refnum = ref.shape[0]
if result is not None:
_check_results_array(result, (refnum, confnum))
distances = np.asarray(result)
else:
distances = np.zeros((refnum, confnum), np.float64)
if box is not None:
if boxtype == 'ortho':
_run("calc_distance_array_ortho",
args=(ref, conf, box, distances),
backend=backend)
else:
_run("calc_distance_array_triclinic",
args=(ref, conf, box, distances),
backend=backend)
else:
_run("calc_distance_array",
args=(ref, conf, distances),
backend=backend)
return distances
[docs]def self_distance_array(reference, box=None, result=None, backend="serial"):
"""Calculate all distances within a configuration *reference*.
If a *box* is supplied then a minimum image convention is used before
calculating distances.
If a 1D numpy array of dtype ``numpy.float64`` with the shape
``(N*(N-1)/2)`` is provided in *result* then this preallocated array
is filled. This can speed up calculations.
Parameters
----------
reference : array
Reference coordinate array with ``N=len(ref)`` coordinates.
box : array or None
Dimensions of the cell; if provided, the minimum image convention is
applied. The dimensions must be provided in the same format as returned
by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx,
ly, lz, alpha, beta, gamma]``.
result : array, optional
Preallocated result array which must have the shape
``(N*(N-1)/2,)`` and dtype ``numpy.float64``. Avoids creating
the array which saves time when the function is called
repeatedly. [``None``]
backend
Select the type of acceleration; "serial" is always available. Other
possibilities are "OpenMP" (OpenMP).
Returns
-------
d : array
``N*(N-1)/2`` numpy 1D array with the distances dist[i,j] between ref
coordinates i and j at position d[k]. Loop through d:
.. code-block:: python
for i in range(N):
for j in range(i+1, N):
k += 1
dist[i,j] = d[k]
Note
----
This method is slower than it could be because internally we need to make
copies of the coordinate arrays.
.. versionchanged:: 0.13.0
Added *backend* keyword.
"""
ref = reference.copy('C')
_check_array(ref, 'ref')
with_PBC = (box is not None)
if box is not None:
boxtype = _box_check(box)
# Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]]
if (boxtype == 'tri_box'):
box = triclinic_vectors(box)
if (boxtype == 'tri_vecs_bad'):
box = triclinic_vectors(triclinic_box(box[0], box[1], box[2]))
refnum = ref.shape[0]
distnum = refnum * (refnum - 1) // 2
if result is not None:
_check_results_array(result, (distnum,))
distances = np.asarray(result)
else:
distances = np.zeros((distnum,), np.float64)
if box is not None:
if boxtype == 'ortho':
_run("calc_self_distance_array_ortho",
args=(ref, box, distances),
backend=backend)
else:
_run("calc_self_distance_array_triclinic",
args=(ref, box, distances),
backend=backend)
else:
_run("calc_self_distance_array",
args=(ref, distances),
backend=backend)
return distances
return coords
return coords
[docs]def calc_bonds(coords1, coords2, box=None, result=None, backend="serial"):
"""
Calculate all distances between a pair of atoms. *atom1* and *atom2* are both
arrays of coordinates, where atom1[i] and atom2[i] represent a bond.
In comparison to distance_array and self_distance_array which calculate distances
between all combinations of coordinates, calc_bonds can be used to calculate distance
between pairs of objects, similar to::
numpy.linalg.norm(a - b) for a, b in zip(coords1, coords2)
The optional argument *box* applies minimum image convention if supplied.
*box* can be either orthogonal or triclinic
If a 1D numpy array of dtype ``numpy.float64`` with ``len(atom1)`` elements is
provided in *result* then this preallocated array is filled. This can speed
up calculations.
bondlengths = calc_bonds(coords1, coords2 [, box [,result=bondlengths]])
Parameters
----------
coords1 : array
An array of coordinates for one half of the bond.
coords2 : array
An array of coordinates for the other half of bond
box : array
The unitcell dimesions for this system.
The dimensions must be provided in the same format as returned
by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx,
ly, lz, alpha, beta, gamma]``.
result : array, optional
Preallocated result array which must be same length as coord
arrays and ``dtype=numpy.float64``. Avoids creating the
array which saves time when the function is called repeatedly. [None]
backend
Select the type of acceleration; "serial" is always available. Other
possibilities are "OpenMP" (OpenMP).
Returns
-------
bondlengths : array
The length between each pair in coords1 and coords2
.. versionadded:: 0.8
.. versionchanged:: 0.13.0
Added *backend* keyword.
"""
atom1 = coords1.copy('C')
atom2 = coords2.copy('C')
_check_array(atom1, 'atom1')
_check_array(atom2, 'atom2')
_check_lengths_match(atom1, atom2)
if box is not None:
boxtype = _box_check(box)
# Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]]
if (boxtype == 'tri_box'):
box = triclinic_vectors(box)
if (boxtype == 'tri_vecs_bad'):
box = triclinic_vectors(triclinic_box(box[0], box[1], box[2]))
numatom = atom1.shape[0]
if result is not None:
_check_results_array(result, (numatom,))
distances = np.asarray(result)
else:
distances = np.zeros((numatom,), np.float64)
if box is not None:
if boxtype == 'ortho':
_run("calc_bond_distance_ortho",
args=(atom1, atom2, box, distances),
backend=backend)
else:
_run("calc_bond_distance_triclinic",
args=(atom1, atom2, box, distances),
backend=backend)
else:
_run("calc_bond_distance",
args=(atom1, atom2, distances),
backend=backend)
return distances
[docs]def calc_angles(coords1, coords2, coords3, box=None, result=None, backend="serial"):
"""
Calculates the angle formed between three atoms, over a list of coordinates.
All *atom* inputs are lists of coordinates of equal length, with *atom2*
representing the apex of the angle.
If a 1D numpy array of dtype ``numpy.float64`` with ``len(atom1)`` elements is
provided in *result* then this preallocated array is filled. This can speed
up calculations.
The optional argument ``box`` ensures that periodic boundaries are taken into account when
constructing the connecting vectors between atoms, ie that the vector between atoms 1 & 2
goes between coordinates in the same image.
angles = calc_angles(coords1, coords2, coords3, [[box=None],result=angles])
Parameters
----------
coords1 : array
Coordinate array of one side of angles.
coords2 : array
Coordinate array of apex of angles.
coords3 : array
Coordinate array of other side of angles.
box : array
The unitcell dimesions for this system.
The dimensions must be provided in the same format as returned
by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx,
ly, lz, alpha, beta, gamma]``.
result : array, optional
Preallocated result array which must be same length as coord
arrays and ``dtype=numpy.float64``. Avoids creating the
array which saves time when the function is called repeatedly. [None]
backend
Select the type of acceleration; "serial" is always available. Other
possibilities are "OpenMP" (OpenMP).
Returns
-------
angles : array
A numpy.array of angles in radians.
.. versionadded:: 0.8
.. versionchanged:: 0.9.0
Added optional box argument to account for periodic boundaries in calculation
.. versionchanged:: 0.13.0
Added *backend* keyword.
"""
atom1 = coords1.copy('C')
atom2 = coords2.copy('C')
atom3 = coords3.copy('C')
numatom = atom1.shape[0]
_check_array(atom1, 'coords1')
_check_array(atom2, 'coords2')
_check_array(atom3, 'coords3')
_check_lengths_match(atom1, atom2, atom3)
if box is not None:
boxtype = _box_check(box)
# Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]]
if (boxtype == 'tri_box'):
box = triclinic_vectors(box)
if (boxtype == 'tri_vecs_bad'):
box = triclinic_vectors(triclinic_box(box[0], box[1], box[2]))
if result is not None:
_check_results_array(result, (numatom,))
angles = np.asarray(result)
else:
angles = np.zeros((numatom,), np.float64)
if box is not None:
if boxtype == 'ortho':
_run("calc_angle_ortho",
args=(atom1, atom2, atom3, box, angles),
backend=backend)
else:
_run("calc_angle_triclinic",
args=(atom1, atom2, atom3, box, angles),
backend=backend)
else:
_run("calc_angle",
args=(atom1, atom2, atom3, angles),
backend=backend)
return angles
[docs]def calc_dihedrals(coords1, coords2, coords3, coords4, box=None, result=None,
backend="serial"):
"""
Calculate the dihedral angle formed by four atoms, over a list of coordinates.
Dihedral angle around axis connecting atoms 1 and 2 (i.e. the angle
between the planes spanned by atoms (0,1,2) and (1,2,3))::
3
|
1-----2
/
0
If a 1D numpy array of dtype ``numpy.float64`` with ``len(atom1)`` elements
is provided in *result* then this preallocated array is filled. This can
speed up calculations.
The optional argument ``box`` ensures that periodic boundaries are taken
into account when constructing the connecting vectors between atoms, ie
that the vector between atoms 1 & 2 goes between coordinates in the same
image::
angles = calc_dihedrals(coords1, coords2, coords3, coords4 [,box=box, result=angles])
Parameters
----------
coords1 : array
Coordinate array of 1st atom in dihedrals.
coords2 : array
Coordinate array of 2nd atom in dihedrals.
coords3 : array
Coordinate array of 3rd atom in dihedrals.
coords4 : array
Coordinate array of 4th atom in dihedrals.
box : array
The unitcell dimesions for this system.
The dimensions must be provided in the same format as returned
by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx,
ly, lz, alpha, beta, gamma]``.
result : array, optional
Preallocated result array which must be same length as coord
arrays and ``dtype=numpy.float64``. Avoids creating the
array which saves time when the function is called repeatedly. [None]
backend
Select the type of acceleration; "serial" is always available. Other
possibilities are "OpenMP" (OpenMP).
Returns
-------
angles : array
A numpy.array of angles in radians.
.. versionadded:: 0.8
.. versionchanged:: 0.9.0
Added optional box argument to account for periodic boundaries in calculation
.. versionchanged:: 0.11.0
Renamed from calc_torsions to calc_dihedrals
.. versionchanged:: 0.13.0
Added *backend* keyword.
"""
atom1 = coords1.copy('C')
atom2 = coords2.copy('C')
atom3 = coords3.copy('C')
atom4 = coords4.copy('C')
_check_array(atom1, 'atom1')
_check_array(atom2, 'atom2')
_check_array(atom3, 'atom3')
_check_array(atom4, 'atom4')
_check_lengths_match(atom1, atom2, atom3, atom4)
numatom = atom1.shape[0]
if box is not None:
boxtype = _box_check(box)
# Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]]
if (boxtype == 'tri_box'):
box = triclinic_vectors(box)
if (boxtype == 'tri_vecs_bad'):
box = triclinic_vectors(triclinic_box(box[0], box[1], box[2]))
if result is not None:
_check_results_array(result, (numatom,))
angles = np.asarray(result)
else:
angles = np.zeros((numatom,), np.float64)
if box is not None:
if boxtype == 'ortho':
_run("calc_dihedral_ortho",
args=(atom1, atom2, atom3, atom4, box, angles),
backend=backend)
else:
_run("calc_dihedral_triclinic",
args=(atom1, atom2, atom3, atom4, box, angles),
backend=backend)
else:
_run("calc_dihedral",
args=(atom1, atom2, atom3, atom4, angles),
backend=backend)
return angles
[docs]def apply_PBC(incoords, box, backend="serial"):
"""Moves a set of coordinates to all be within the primary unit cell
newcoords = apply_PBC(coords, box)
Parameters
----------
coords : array
Coordinate array (of type numpy.float32).
box : array
The unitcell dimesions for this system; can be either orthogonal or
triclinic information. The dimensions must be provided in the same
format as returned by
:attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz,
alpha, beta, gamma]``.
backend
Select the type of acceleration; "serial" is always available. Other
possibilities are "OpenMP" (OpenMP).
Returns
-------
newcoords : array
Coordinates that are now all within the primary unit cell, as defined
by box.
.. versionadded:: 0.8
.. versionchanged:: 0.13.0
Added *backend* keyword.
"""
coords = incoords.copy('C')
_check_array(coords, 'coords')
coordnum = coords.shape[0]
# determine boxtype
boxtype = _box_check(box)
# Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]]
if boxtype == 'tri_box':
box = triclinic_vectors(box)
if boxtype == 'tri_vecs_bad':
box = triclinic_vectors(triclinic_box(box[0], box[1], box[2]))
box_inv = np.zeros((3), dtype=np.float32)
if boxtype == 'ortho':
box_inv[0] = 1.0 / box[0]
box_inv[1] = 1.0 / box[1]
box_inv[2] = 1.0 / box[2]
_run("ortho_pbc",
args=(coords, box, box_inv),
backend=backend)
else:
box_inv[0] = 1.0 / box[0][0]
box_inv[1] = 1.0 / box[1][1]
box_inv[2] = 1.0 / box[2][2]
_run("triclinic_pbc",
args=(coords, box, box_inv),
backend=backend)
return coords