Source code for aiida.tools.data.structure

###########################################################################
# Copyright (c), The AiiDA team. All rights reserved.                     #
# This file is part of the AiiDA code.                                    #
#                                                                         #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file        #
# For further information please visit http://www.aiida.net               #
###########################################################################
"""Various utilities to deal with StructureData instances or create new ones
(e.g. convert format to/from SPGLIB, create a StructureData from a different
format, ...)

"""

import copy
import re

import numpy as np

from aiida.common.constants import elements
from aiida.engine import calcfunction
from aiida.orm.nodes.data.structure import Kind, Site, StructureData

__all__ = ('structure_to_spglib_tuple', 'spglib_tuple_to_structure')



[docs]
@calcfunction
def _get_cif_ase_inline(struct, parameters):
    """Creates :py:class:`aiida.orm.nodes.data.cif.CifData` using ASE.

    .. note:: requires ASE module.
    """
    from aiida.orm import CifData

    kwargs = {}
    if parameters is not None:
        kwargs = parameters.get_dict()
    cif = CifData(ase=struct.get_ase(**kwargs))
    formula = struct.get_formula(mode='hill', separator=' ')
    for i in cif.values.keys():
        cif.values[i]['_symmetry_space_group_name_H-M'] = 'P 1'
        cif.values[i]['_symmetry_space_group_name_Hall'] = 'P 1'
        cif.values[i]['_symmetry_Int_Tables_number'] = 1
        cif.values[i]['_cell_formula_units_Z'] = 1
        cif.values[i]['_chemical_formula_sum'] = formula
    return {'cif': cif}




[docs]
def structure_to_spglib_tuple(structure):
    """Convert an AiiDA structure to a tuple of the format (cell, scaled_positions, element_numbers).

    :param structure: the AiiDA structure
    :return: (structure_tuple, kind_info, kinds) where structure_tuple
        is a tuple of format (cell, scaled_positions, element_numbers);
        kind_info is a dictionary mapping the kind_names to
        the numbers used in element_numbers. When possible, it uses
        the Z number of the element, otherwise it uses numbers > 1000;
        kinds is a list of the kinds of the structure.
    """

    def get_new_number(the_list, start_from):
        """Get the first integer >= start_from not yet in the list"""
        retval = start_from
        comp_list = sorted(_ for _ in the_list if _ >= start_from)

        current_pos = 0
        found = False
        while not found:
            if len(comp_list) <= current_pos:
                return retval
            if retval == comp_list[current_pos]:
                current_pos += 1
                retval += 1
            else:
                found = True
                return retval

    z_numbers = {v['symbol']: k for k, v in elements.items()}

    cell = np.array(structure.cell)
    abs_pos = np.array([_.position for _ in structure.sites])
    rel_pos = np.dot(abs_pos, np.linalg.inv(cell))
    # kinds = {k.name: k for k in structure.kinds}

    kind_numbers: dict = {}
    for kind in structure.kinds:
        if len(kind.symbols) == 1:
            realnumber = z_numbers[kind.symbols[0]]
            if realnumber in kind_numbers.values():
                number = get_new_number(list(kind_numbers.values()), start_from=realnumber * 1000)
            else:
                number = realnumber
            kind_numbers[kind.name] = number
        else:
            number = get_new_number(list(kind_numbers.values()), start_from=200000)
            kind_numbers[kind.name] = number

    numbers = [kind_numbers[s.kind_name] for s in structure.sites]

    return ((cell, rel_pos, numbers), kind_numbers, list(structure.kinds))




[docs]
def spglib_tuple_to_structure(structure_tuple, kind_info=None, kinds=None):
    """Convert a tuple of the format (cell, scaled_positions, element_numbers) to an AiiDA structure.

    Unless the element_numbers are identical to the Z number of the atoms,
    you should pass both kind_info and kinds, with the same format as returned
    by get_tuple_from_aiida_structure.

    :param structure_tuple: the structure in format (structure_tuple, kind_info)
    :param kind_info: a dictionary mapping the kind_names to
       the numbers used in element_numbers. If not provided, assumes {element_name: element_Z}
    :param kinds: a list of the kinds of the structure.
    """
    if kind_info is None and kinds is not None:
        raise ValueError('If you pass kind_info, you should also pass kinds')
    if kinds is None and kind_info is not None:
        raise ValueError('If you pass kinds, you should also pass kind_info')

    # Z = {v['symbol']: k for k, v in elements.items()}
    cell, rel_pos, numbers = structure_tuple
    if kind_info:
        _kind_info = copy.copy(kind_info)
        _kinds = copy.copy(kinds)
    else:
        try:
            # For each site
            symbols = [elements[num]['symbol'] for num in numbers]
        except KeyError as exc:
            raise ValueError(
                f'You did not pass kind_info, but at least one number is not a valid Z number: {exc.args[0]}'
            )

        _kind_info = {elements[num]['symbol']: num for num in set(numbers)}
        # Get the default kinds
        _kinds = [Kind(symbols=sym) for sym in set(symbols)]

    _kinds_dict = {k.name: k for k in _kinds}
    # Now I will use in any case _kinds and _kind_info
    if len(_kind_info) != len(set(_kind_info.values())):
        raise ValueError('There is at least a number repeated twice in kind_info!')
    # Invert the mapping
    mapping_num_kindname = {v: k for k, v in _kind_info.items()}
    # Create the actual mapping
    try:
        mapping_to_kinds = {num: _kinds_dict[kindname] for num, kindname in mapping_num_kindname.items()}
    except KeyError as exc:
        raise ValueError(f"Unable to find '{exc.args[0]}' in the kinds list")

    try:
        site_kinds = [mapping_to_kinds[num] for num in numbers]
    except KeyError as exc:
        raise ValueError(f'Unable to find kind in kind_info for number {exc.args[0]}')

    structure = StructureData(cell=cell)
    for k in _kinds:
        structure.append_kind(k)
    abs_pos = np.dot(rel_pos, cell)
    if len(abs_pos) != len(site_kinds):
        raise ValueError('The length of the positions array is different from the length of the element numbers')

    for kind, pos in zip(site_kinds, abs_pos):
        structure.append_site(Site(kind_name=kind.name, position=pos))

    return structure




[docs]
def xyz_parser_iterator(xyz_string):
    """Yields a tuple `(natoms, comment, atomiter)`for each frame
    in a XYZ file where `atomiter` is an iterator yielding a
    nested tuple `(symbol, (x, y, z))` for each entry.

    :param xyz_string: a string containing XYZ-structured text
    """

    class BlockIterator:
        """An iterator for wrapping the iterator returned by `match.finditer`
        to extract the required fields directly from the match object
        """

        def __init__(self, iterator, natoms):
            self._it = iterator
            self._natoms = natoms
            self._catom = 0

        def __iter__(self):
            return self

        def __next__(self):
            try:
                match = next(self._it)
            except StopIteration:
                # if we reached the number of atoms declared, everything is well
                # and we re-raise the StopIteration exception
                if self._catom == self._natoms:
                    raise
                else:
                    # otherwise we got too less entries
                    raise TypeError(
                        f'Number of atom entries ({self._catom}) is smaller than the number of atoms ({self._natoms})'
                    )

            self._catom += 1

            if self._catom > self._natoms:
                raise TypeError(
                    f'Number of atom entries ({self._catom}) is larger than the number of atoms ({self._natoms})'
                )

            return (match.group('sym'), (float(match.group('x')), float(match.group('y')), float(match.group('z'))))

    pos_regex = re.compile(
        r"""
^                                                                             # Linestart
[ \t]*                                                                        # Optional white space
(?P<sym>[A-Za-z]+[A-Za-z0-9]*)\s+                                             # get the symbol
(?P<x> [\+\-]?  ( \d*[\.]\d+  | \d+[\.]?\d* )  ([Ee][\+\-]?\d+)? ) [ \t]+     # Get x
(?P<y> [\+\-]?  ( \d*[\.]\d+  | \d+[\.]?\d* )  ([Ee][\+\-]?\d+)? ) [ \t]+     # Get y
(?P<z> [\+\-]?  ( \d*[\.]\d+  | \d+[\.]?\d* )  ([Ee][\+\-]?\d+)? )            # Get z
""",
        re.X | re.M,
    )
    pos_block_regex = re.compile(
        r"""
                                                            # First line contains an integer
                                                            # and only an integer: the number of atoms
^[ \t]* (?P<natoms> [0-9]+) [ \t]*[\n]                      # End first line
(?P<comment>.*) [\n]                                        # The second line is a comment
(?P<positions>                                              # This is the block of positions
    (
        (
            \s*                                             # White space in front of the element spec is ok
            (
                [A-Za-z]+[A-Za-z0-9]*                       # Element spec
                (
                    \s+                                     # White space in front of the number
                    [\+\-]?                                 # Plus or minus in front of the number (optional)
                    (
                        (
                            \d*                             # optional decimal in the beginning .0001 is ok, for example
                            [\.]                            # There has to be a dot followed by
                            \d+                             # at least one decimal
                        )
                        |                                   # OR
                        (
                            \d+                             # at least one decimal, followed by
                            [\.]?                           # an optional dot
                            \d*                             # followed by optional decimals
                        )
                    )
                    ([Ee][\+\-]?\d+)?                       # optional exponents E+03, e-05
                ){3}                                        # I expect three float values
                |
                \#                                          # If a line is commented out, that is also ok
            )
            .*                                              # ignore what is after the comment or the position spec
            |                                               # OR
            \s*                                             # A line only containing white space
         )
        [\n]                                                # line break at the end
    )+
)                                                           # A positions block should be one or more lines
                    """,
        re.X | re.M,
    )

    for block in pos_block_regex.finditer(xyz_string):
        natoms = int(block.group('natoms'))
        yield (natoms, block.group('comment'), BlockIterator(pos_regex.finditer(block.group('positions')), natoms))