Source code for aiida.parsers.plugins.quantumespresso.cp

# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved.                     #
# This file is part of the AiiDA code.                                    #
#                                                                         #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core #
# For further information on the license, see the LICENSE.txt file        #
# For further information please visit http://www.aiida.net               #
###########################################################################
from aiida.orm.calculation.job.quantumespresso.cp import CpCalculation
from aiida.parsers.plugins.quantumespresso.raw_parser_cp import (
    QEOutputParsingError, parse_cp_traj_stanzas, parse_cp_raw_output)
from aiida.parsers.plugins.quantumespresso.constants import (bohr_to_ang,
                                                             timeau_to_sec, hartree_to_ev)
from aiida.orm.data.parameter import ParameterData
from aiida.orm.data.structure import StructureData
from aiida.orm.data.folder import FolderData
from aiida.parsers.parser import Parser
from aiida.common.datastructures import calc_states
from aiida.orm.data.array.trajectory import TrajectoryData
import numpy



[docs]class CpParser(Parser):
    """
    This class is the implementation of the Parser class for Cp.
    """

    def __init__(self, calc):
        """
        Initialize the instance of CpParser
        
        :param calculation: calculation object.
        """
        # check for valid input
        if not isinstance(calc, CpCalculation):
            raise QEOutputParsingError("Input calc must be a CpCalculation")

        super(CpParser, self).__init__(calc)

[docs]    def parse_with_retrieved(self, retrieved):
        """
        Receives in input a dictionary of retrieved nodes.
        Does all the logic here.
        """
        from aiida.common.exceptions import InvalidOperation
        import os, numpy
        from distutils.version import LooseVersion
        
        successful = True

        # check if I'm not to overwrite anything
        state = self._calc.get_state()
        if state != calc_states.PARSING:
            raise InvalidOperation("Calculation not in {} state"
                                   .format(calc_states.PARSING))

        # get the input structure
        input_structure = self._calc.inp.structure

        # load the input dictionary
        # TODO: pass this input_dict to the parser. It might need it.            
        input_dict = self._calc.inp.parameters.get_dict()

        # Check that the retrieved folder is there 
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # check what is inside the folder
        list_of_files = out_folder.get_folder_list()
        # at least the stdout should exist
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            successful = False
            new_nodes_tuple = ()
            self.logger.error("Standard output not found")
            return successful, new_nodes_tuple

        # if there is something more, I note it down, so to call the raw parser
        # with the right options
        # look for xml
        out_file = out_folder.get_abs_path(self._calc._OUTPUT_FILE_NAME)

        xml_file = None
        if self._calc._DATAFILE_XML_BASENAME in list_of_files:
            xml_file = out_folder.get_abs_path(self._calc._DATAFILE_XML_BASENAME)

        xml_counter_file = None
        if self._calc._FILE_XML_PRINT_COUNTER in list_of_files:
            xml_counter_file = out_folder.get_abs_path(
                self._calc._FILE_XML_PRINT_COUNTER)

        parsing_args = [out_file, xml_file, xml_counter_file]

        # call the raw parsing function
        out_dict, raw_successful = parse_cp_raw_output(*parsing_args)

        successful = True if raw_successful else False

        # parse the trajectory. Units in Angstrom, picoseconds and eV.
        # append everthing in the temporary dictionary raw_trajectory
        expected_configs = None
        raw_trajectory = {}
        evp_keys = ['electronic_kinetic_energy', 'cell_temperature', 'ionic_temperature',
                    'scf_total_energy', 'enthalpy', 'enthalpy_plus_kinetic',
                    'energy_constant_motion', 'volume', 'pressure']
        pos_vel_keys = ['cells', 'positions', 'times', 'velocities']
        # set a default null values

        # Now prepare the reordering, as filex in the xml are  ordered
        reordering = self._generate_sites_ordering(out_dict['species'],
                                                   out_dict['atoms'])

        # =============== POSITIONS trajectory ============================
        try:
            with open(out_folder.get_abs_path(
                    '{}.pos'.format(self._calc._PREFIX))) as posfile:
                pos_data = [l.split() for l in posfile]
                # POSITIONS stored in angstrom
            traj_data = parse_cp_traj_stanzas(num_elements=out_dict['number_of_atoms'],
                                              splitlines=pos_data,
                                              prepend_name='positions_traj',
                                              rescale=bohr_to_ang)

            # here initialize the dictionary. If the parsing of positions fails, though, I don't have anything
            # out of the CP dynamics. Therefore, the calculation status is set to FAILED.
            raw_trajectory['positions_ordered'] = self._get_reordered_array(traj_data['positions_traj_data'],
                                                                            reordering)
            raw_trajectory['times'] = numpy.array(traj_data['positions_traj_times'])
        except IOError:
            out_dict['warnings'].append("Unable to open the POS file... skipping.")
            successful = False
        except Exception as e:
            out_dict['warnings'].append("Error parsing POS file ({}). Skipping file."
                                        .format(e.message))
            successful = False

        # =============== CELL trajectory ============================
        try:
            with open(os.path.join(out_folder.get_abs_path('.'),
                                   '{}.cel'.format(self._calc._PREFIX))) as celfile:
                cel_data = [l.split() for l in celfile]
            traj_data = parse_cp_traj_stanzas(num_elements=3,
                                              splitlines=cel_data,
                                              prepend_name='cell_traj',
                                              rescale=bohr_to_ang)
            raw_trajectory['cells'] = numpy.array(traj_data['cell_traj_data'])
        except IOError:
            out_dict['warnings'].append("Unable to open the CEL file... skipping.")
        except Exception as e:
            out_dict['warnings'].append("Error parsing CEL file ({}). Skipping file."
                                        .format(e.message))

        # =============== VELOCITIES trajectory ============================
        try:
            with open(os.path.join(out_folder.get_abs_path('.'),
                                   '{}.vel'.format(self._calc._PREFIX))) as velfile:
                vel_data = [l.split() for l in velfile]
            traj_data = parse_cp_traj_stanzas(num_elements=out_dict['number_of_atoms'],
                                              splitlines=vel_data,
                                              prepend_name='velocities_traj',
                                              rescale=bohr_to_ang / timeau_to_sec * 10 ** 12)  # velocities in ang/ps,
            raw_trajectory['velocities_ordered'] = self._get_reordered_array(traj_data['velocities_traj_data'],
                                                                             reordering)
        except IOError:
            out_dict['warnings'].append("Unable to open the VEL file... skipping.")
        except Exception as e:
            out_dict['warnings'].append("Error parsing VEL file ({}). Skipping file."
                                        .format(e.message))

        # =============== EVP trajectory ============================
        try:
            matrix = numpy.genfromtxt(os.path.join(out_folder.get_abs_path('.'),
                                                   '{}.evp'.format(self._calc._PREFIX)))
            # there might be a different format if the matrix has one row only
            try:
                matrix.shape[1]
            except IndexError:
                matrix = numpy.array(numpy.matrix(matrix))
            
            if LooseVersion(out_dict['creator_version']) > LooseVersion("5.1"):
                # Between version 5.1 and 5.1.1, someone decided to change
                # the .evp output format, without any way to know that this 
                # happened... SVN commit 11158.
                # I here use the version number to parse, plus some
                # heuristics to check that I'm doing the right thing
                #print "New version"
                raw_trajectory['steps'] = numpy.array(matrix[:,0],dtype=int)
                raw_trajectory['evp_times']                 = matrix[:,1]                    # TPS, ps
                raw_trajectory['electronic_kinetic_energy'] = matrix[:,2] * hartree_to_ev    # EKINC, eV
                raw_trajectory['cell_temperature']          = matrix[:,3]                    # TEMPH, K
                raw_trajectory['ionic_temperature']         = matrix[:,4]                    # TEMPP, K
                raw_trajectory['scf_total_energy']          = matrix[:,5] * hartree_to_ev    # ETOT, eV
                raw_trajectory['enthalpy']                  = matrix[:,6] * hartree_to_ev    # ENTHAL, eV
                raw_trajectory['enthalpy_plus_kinetic']     = matrix[:,7] * hartree_to_ev    # ECONS, eV
                raw_trajectory['energy_constant_motion']    = matrix[:,8] * hartree_to_ev    # ECONT, eV
                raw_trajectory['volume']                    = matrix[:,9] * (bohr_to_ang**3) # volume, angstrom^3
                raw_trajectory['pressure']                  = matrix[:,10]                    # out_press, GPa
            else:
                #print "Old version"    
                raw_trajectory['steps'] = numpy.array(matrix[:,0],dtype=int)
                raw_trajectory['electronic_kinetic_energy'] = matrix[:,1] * hartree_to_ev    # EKINC, eV
                raw_trajectory['cell_temperature']          = matrix[:,2]                    # TEMPH, K
                raw_trajectory['ionic_temperature']         = matrix[:,3]                    # TEMPP, K
                raw_trajectory['scf_total_energy']          = matrix[:,4] * hartree_to_ev    # ETOT, eV
                raw_trajectory['enthalpy']                  = matrix[:,5] * hartree_to_ev    # ENTHAL, eV
                raw_trajectory['enthalpy_plus_kinetic']     = matrix[:,6] * hartree_to_ev    # ECONS, eV
                raw_trajectory['energy_constant_motion']    = matrix[:,7] * hartree_to_ev    # ECONT, eV
                raw_trajectory['volume']                    = matrix[:,8] * (bohr_to_ang**3) # volume, angstrom^3
                raw_trajectory['pressure']                  = matrix[:,9]                    # out_press, GPa
                raw_trajectory['evp_times']                  = matrix[:,10]                    # TPS, ps

            # Huristics to understand if it's correct.
            # A better heuristics could also try to fix possible issues
            # (in new versions of QE, it's possible to recompile it with
            # the __OLD_FORMAT flag to get back the old version format...)
            # but I won't do it, as there may be also other columns swapped.
            # Better to stop and ask the user to check what's going on.
            max_time_difference = abs(
                numpy.array(raw_trajectory['times']) - 
                numpy.array(raw_trajectory['evp_times'])).max()
            if max_time_difference > 1.e-4: # It is typically ~1.e-7 due to roundoff errors
                # If there is a large discrepancy, I set successful = False,
                # it means there is something very weird going on...
                out_dict['warnings'].append("Error parsing EVP file ({}). Skipping file."
                                            .format(e.message))
                successful = False

                # In this case, remove all what has been parsed to avoid users
                # using the wrong data
                for k in evp_keys:
                    try:
                        del raw_trajectory[k]
                    except KeyError:
                        # If for some reason a key is not there, ignore
                        pass
            # Delete evp_times in any case, it's a duplicate of 'times'
            del raw_trajectory['evp_times']
            
        except Exception as e:
            out_dict['warnings'].append("Error parsing EVP file ({}). Skipping file.".format(e.message))
        except IOError:
            out_dict['warnings'].append("Unable to open the EVP file... skipping.")

        # get the symbols from the input        
        # TODO: I should have kinds in TrajectoryData
        raw_trajectory['symbols'] = numpy.array([str(i.kind_name) for i in input_structure.sites])

        traj = TrajectoryData()
        traj.set_trajectory(stepids=raw_trajectory['steps'],
                            cells=raw_trajectory['cells'],
                            symbols=raw_trajectory['symbols'],
                            positions=raw_trajectory['positions_ordered'],
                            times=raw_trajectory['times'],
                            velocities=raw_trajectory['velocities_ordered'],
        )

        for this_name in evp_keys:
            try:
                traj.set_array(this_name,raw_trajectory[this_name])
            except KeyError:
                # Some columns may have not been parsed, skip
                pass
        new_nodes_list = [(self.get_linkname_trajectory(),traj)]
        
        # Remove big dictionaries that would be redundant
        # For atoms and cell, there is a small possibility that nothing is parsed
        # but then probably nothing moved.
        try:
            del out_dict['atoms']
        except KeyError:
            pass
        try:
            del out_dict['cell']
        except KeyError:
            pass
        try:
            del out_dict['ions_positions_stau']
        except KeyError:
            pass
        try:
            del out_dict['ions_positions_svel']
        except KeyError:
            pass
        try:
            del out_dict['ions_positions_taui']
        except KeyError:
            pass
        # This should not be needed
        try:
            del out_dict['atoms_index_list']
        except KeyError:
            pass
        # This should be already in the input
        try:
            del out_dict['atoms_if_pos_list']
        except KeyError:
            pass
        # 
        try:
            del out_dict['ions_positions_force']
        except KeyError:
            pass

        # convert the dictionary into an AiiDA object
        output_params = ParameterData(dict=out_dict)
        # save it into db
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        return successful, new_nodes_list

[docs]    def get_linkname_trajectory(self):
        """
        Returns the name of the link to the output_structure (None if not present)
        """
        return 'output_trajectory'

    def _generate_sites_ordering(self, raw_species, raw_atoms):
        """
        take the positions of xml and from file.pos of the LAST step and compare them 
        """
        # Examples in the comments are for species [Ba, O, Ti]
        # and atoms [Ba, Ti, O, O, O]

        # Dictionary to associate the species name to the idx
        # Example: {'Ba': 1, 'O': 2, 'Ti': 3}
        species_dict = {name: idx for idx, name in zip(raw_species['index'],
                                                       raw_species['type'])}
        # List of the indices of the specie associated to each atom,
        # in the order specified in input
        # Example: (1,3,2,2,2)
        atoms_species_idx = [species_dict[a[0]] for a in raw_atoms]
        # I also attach the current position; important to convert to a list
        # Otherwise the iterator can be looped on only once!
        # Example: ((0,1),(1,3),(2,2),(3,2),(4,2))
        ref_atom_list = list(enumerate(atoms_species_idx))
        new_order_tmp = []
        # I reorder the atoms, first by specie, then in their order
        # This is the order used in output by CP!!
        # Example: ((0,1),(2,2),(3,2),(4,2),(1,3))
        for specie_idx in sorted(raw_species['index']):
            for elem in ref_atom_list:
                if elem[1] == specie_idx:
                    new_order_tmp.append(elem)
        # This is the new order that is printed in CP:
        # e.g. reordering[2] is the index of the atom, in the input
        # list of atoms, that is printed in position 2 (0-based, so the
        # third atom) in the CP output files.
        # Example: [0,2,3,4,1]
        reordering = [_[0] for _ in new_order_tmp]
        # I now need the inverse reordering, to put back in place 
        # from the output ordering to the input one!
        # Example: [0,4,1,2,3]
        # Because in the final list (Ba, O, O, O, Ti)
        # the first atom Ba in the input is atom 0 in the CP output (the first),
        # the second atom Ti in the input is atom 4 (the fifth) in the CP output,
        # and so on
        sorted_indexed_reordering = sorted([(_[1], _[0]) for _ in
                                            enumerate(reordering)])
        reordering_inverse = [_[1] for _ in sorted_indexed_reordering]
        return reordering_inverse

    def _get_reordered_list(self, origlist, reordering):
        """
        Given a list to reorder, a list of integer positions with the new
        order, return the reordered list.
        """
        return [origlist[e] for e in reordering]

    def _get_reordered_array(self, input, reordering):
        return numpy.array([self._get_reordered_list(i, reordering) for i in input])