###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
"""Various utilities to deal with StructureData instances or create new ones
(e.g. convert format to/from SPGLIB, create a StructureData from a different
format, ...)
"""
import copy
import re
import numpy as np
from aiida.common.constants import elements
from aiida.engine import calcfunction
from aiida.orm.nodes.data.structure import Kind, Site, StructureData
__all__ = ('structure_to_spglib_tuple', 'spglib_tuple_to_structure')
[docs]
@calcfunction
def _get_cif_ase_inline(struct, parameters):
"""Creates :py:class:`aiida.orm.nodes.data.cif.CifData` using ASE.
.. note:: requires ASE module.
"""
from aiida.orm import CifData
kwargs = {}
if parameters is not None:
kwargs = parameters.get_dict()
cif = CifData(ase=struct.get_ase(**kwargs))
formula = struct.get_formula(mode='hill', separator=' ')
for i in cif.values.keys():
cif.values[i]['_symmetry_space_group_name_H-M'] = 'P 1'
cif.values[i]['_symmetry_space_group_name_Hall'] = 'P 1'
cif.values[i]['_symmetry_Int_Tables_number'] = 1
cif.values[i]['_cell_formula_units_Z'] = 1
cif.values[i]['_chemical_formula_sum'] = formula
return {'cif': cif}
[docs]
def structure_to_spglib_tuple(structure):
"""Convert an AiiDA structure to a tuple of the format (cell, scaled_positions, element_numbers).
:param structure: the AiiDA structure
:return: (structure_tuple, kind_info, kinds) where structure_tuple
is a tuple of format (cell, scaled_positions, element_numbers);
kind_info is a dictionary mapping the kind_names to
the numbers used in element_numbers. When possible, it uses
the Z number of the element, otherwise it uses numbers > 1000;
kinds is a list of the kinds of the structure.
"""
def get_new_number(the_list, start_from):
"""Get the first integer >= start_from not yet in the list"""
retval = start_from
comp_list = sorted(_ for _ in the_list if _ >= start_from)
current_pos = 0
found = False
while not found:
if len(comp_list) <= current_pos:
return retval
if retval == comp_list[current_pos]:
current_pos += 1
retval += 1
else:
found = True
return retval
z_numbers = {v['symbol']: k for k, v in elements.items()}
cell = np.array(structure.cell)
abs_pos = np.array([_.position for _ in structure.sites])
rel_pos = np.dot(abs_pos, np.linalg.inv(cell))
# kinds = {k.name: k for k in structure.kinds}
kind_numbers: dict = {}
for kind in structure.kinds:
if len(kind.symbols) == 1:
realnumber = z_numbers[kind.symbols[0]]
if realnumber in kind_numbers.values():
number = get_new_number(list(kind_numbers.values()), start_from=realnumber * 1000)
else:
number = realnumber
kind_numbers[kind.name] = number
else:
number = get_new_number(list(kind_numbers.values()), start_from=200000)
kind_numbers[kind.name] = number
numbers = [kind_numbers[s.kind_name] for s in structure.sites]
return ((cell, rel_pos, numbers), kind_numbers, list(structure.kinds))
[docs]
def spglib_tuple_to_structure(structure_tuple, kind_info=None, kinds=None):
"""Convert a tuple of the format (cell, scaled_positions, element_numbers) to an AiiDA structure.
Unless the element_numbers are identical to the Z number of the atoms,
you should pass both kind_info and kinds, with the same format as returned
by get_tuple_from_aiida_structure.
:param structure_tuple: the structure in format (structure_tuple, kind_info)
:param kind_info: a dictionary mapping the kind_names to
the numbers used in element_numbers. If not provided, assumes {element_name: element_Z}
:param kinds: a list of the kinds of the structure.
"""
if kind_info is None and kinds is not None:
raise ValueError('If you pass kind_info, you should also pass kinds')
if kinds is None and kind_info is not None:
raise ValueError('If you pass kinds, you should also pass kind_info')
# Z = {v['symbol']: k for k, v in elements.items()}
cell, rel_pos, numbers = structure_tuple
if kind_info:
_kind_info = copy.copy(kind_info)
_kinds = copy.copy(kinds)
else:
try:
# For each site
symbols = [elements[num]['symbol'] for num in numbers]
except KeyError as exc:
raise ValueError(
f'You did not pass kind_info, but at least one number is not a valid Z number: {exc.args[0]}'
)
_kind_info = {elements[num]['symbol']: num for num in set(numbers)}
# Get the default kinds
_kinds = [Kind(symbols=sym) for sym in set(symbols)]
_kinds_dict = {k.name: k for k in _kinds}
# Now I will use in any case _kinds and _kind_info
if len(_kind_info) != len(set(_kind_info.values())):
raise ValueError('There is at least a number repeated twice in kind_info!')
# Invert the mapping
mapping_num_kindname = {v: k for k, v in _kind_info.items()}
# Create the actual mapping
try:
mapping_to_kinds = {num: _kinds_dict[kindname] for num, kindname in mapping_num_kindname.items()}
except KeyError as exc:
raise ValueError(f"Unable to find '{exc.args[0]}' in the kinds list")
try:
site_kinds = [mapping_to_kinds[num] for num in numbers]
except KeyError as exc:
raise ValueError(f'Unable to find kind in kind_info for number {exc.args[0]}')
structure = StructureData(cell=cell)
for k in _kinds:
structure.append_kind(k)
abs_pos = np.dot(rel_pos, cell)
if len(abs_pos) != len(site_kinds):
raise ValueError('The length of the positions array is different from the length of the element numbers')
for kind, pos in zip(site_kinds, abs_pos):
structure.append_site(Site(kind_name=kind.name, position=pos))
return structure
[docs]
def xyz_parser_iterator(xyz_string):
"""Yields a tuple `(natoms, comment, atomiter)`for each frame
in a XYZ file where `atomiter` is an iterator yielding a
nested tuple `(symbol, (x, y, z))` for each entry.
:param xyz_string: a string containing XYZ-structured text
"""
class BlockIterator:
"""An iterator for wrapping the iterator returned by `match.finditer`
to extract the required fields directly from the match object
"""
def __init__(self, iterator, natoms):
self._it = iterator
self._natoms = natoms
self._catom = 0
def __iter__(self):
return self
def __next__(self):
try:
match = next(self._it)
except StopIteration:
# if we reached the number of atoms declared, everything is well
# and we re-raise the StopIteration exception
if self._catom == self._natoms:
raise
else:
# otherwise we got too less entries
raise TypeError(
f'Number of atom entries ({self._catom}) is smaller than the number of atoms ({self._natoms})'
)
self._catom += 1
if self._catom > self._natoms:
raise TypeError(
f'Number of atom entries ({self._catom}) is larger than the number of atoms ({self._natoms})'
)
return (match.group('sym'), (float(match.group('x')), float(match.group('y')), float(match.group('z'))))
pos_regex = re.compile(
r"""
^ # Linestart
[ \t]* # Optional white space
(?P<sym>[A-Za-z]+[A-Za-z0-9]*)\s+ # get the symbol
(?P<x> [\+\-]? ( \d*[\.]\d+ | \d+[\.]?\d* ) ([Ee][\+\-]?\d+)? ) [ \t]+ # Get x
(?P<y> [\+\-]? ( \d*[\.]\d+ | \d+[\.]?\d* ) ([Ee][\+\-]?\d+)? ) [ \t]+ # Get y
(?P<z> [\+\-]? ( \d*[\.]\d+ | \d+[\.]?\d* ) ([Ee][\+\-]?\d+)? ) # Get z
""",
re.X | re.M,
)
pos_block_regex = re.compile(
r"""
# First line contains an integer
# and only an integer: the number of atoms
^[ \t]* (?P<natoms> [0-9]+) [ \t]*[\n] # End first line
(?P<comment>.*) [\n] # The second line is a comment
(?P<positions> # This is the block of positions
(
(
\s* # White space in front of the element spec is ok
(
[A-Za-z]+[A-Za-z0-9]* # Element spec
(
\s+ # White space in front of the number
[\+\-]? # Plus or minus in front of the number (optional)
(
(
\d* # optional decimal in the beginning .0001 is ok, for example
[\.] # There has to be a dot followed by
\d+ # at least one decimal
)
| # OR
(
\d+ # at least one decimal, followed by
[\.]? # an optional dot
\d* # followed by optional decimals
)
)
([Ee][\+\-]?\d+)? # optional exponents E+03, e-05
){3} # I expect three float values
|
\# # If a line is commented out, that is also ok
)
.* # ignore what is after the comment or the position spec
| # OR
\s* # A line only containing white space
)
[\n] # line break at the end
)+
) # A positions block should be one or more lines
""",
re.X | re.M,
)
for block in pos_block_regex.finditer(xyz_string):
natoms = int(block.group('natoms'))
yield (natoms, block.group('comment'), BlockIterator(pos_regex.finditer(block.group('positions')), natoms))