# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
"""
A collection of function that are used to parse the output of Quantum Espresso PW.
The function that needs to be called from outside is parse_raw_output().
The functions mostly work without aiida specific functionalities.
The parsing will try to convert whatever it can in some dictionary, which
by operative decision doesn't have much structure encoded, [the values are simple ]
"""
import xml.dom.minidom
import os
import string
import re
from aiida.parsers.plugins.quantumespresso.constants import ry_to_ev,hartree_to_ev,bohr_to_ang,ry_si,bohr_si
from aiida.parsers.plugins.quantumespresso import QEOutputParsingError
# TODO: it could be possible to use info of the input file to parse output.
# but atm the output has all the informations needed for the parsing.
# parameter that will be used later for comparisons
lattice_tolerance = 1.e-5
default_energy_units = 'eV'
units_suffix = '_units'
k_points_default_units = '2 pi / Angstrom'
default_length_units = 'Angstrom'
default_dipole_units = 'Debye'
default_magnetization_units = 'Bohrmag / cell'
default_charge_units = 'e'
default_force_units = 'ev / angstrom'
default_stress_units = 'GPascal'
default_polarization_units = 'C / m^2'
[docs]def parse_raw_output(out_file, input_dict, parser_opts=None, xml_file=None, dir_with_bands=None):
"""
Parses the output of a calculation
Receives in input the paths to the output file and the xml file.
:param out_file: path to pw std output
:param input_dict: dictionary with the input parameters
:param parser_opts: not used
:param dir_with_bands: path to directory with all k-points (Kxxxxx) folders
:param xml_file: path to QE data-file.xml
:return parameter_data: a dictionary with parsed parameters
:return trajectory_data: a dictionary with arrays (for relax & md calcs.)
:return structure_data: a dictionary with data for the output structure
:return bands_data: a dictionary with data for bands (for bands calcs.)
:return job_successful: a boolean that is False in case of failed calculations
:raises QEOutputParsingError: for errors in the parsing,
:raises AssertionError: if two keys in the parsed dicts are found to be qual
3 different keys to check in output: parser_warnings, xml_warnings and warnings.
On an upper level, these flags MUST be checked.
The first two are expected to be empty unless QE failures or unfinished jobs.
"""
import copy
# TODO: a lot of ifs could be cleaned out
# TODO: input_dict should be used as well
job_successful = True
parser_version = '0.1'
parser_info = {}
parser_info['parser_warnings'] = []
parser_info['parser_info'] = 'AiiDA QE Parser v{}'.format(parser_version)
# if xml_file is not given in input, skip its parsing
if xml_file is not None:
try:
with open(xml_file,'r') as f:
xml_lines = f.read() # Note: read() and not readlines()
except IOError:
raise QEOutputParsingError("Failed to open xml file: {}.".format(xml_file))
xml_data,structure_data,bands_data = parse_pw_xml_output(xml_lines,dir_with_bands)
# Note the xml file should always be consistent.
else:
parser_info['parser_warnings'].append('Skipping the parsing of the xml file.')
xml_data = {}
bands_data = {}
structure_data = {}
# load QE out file
try:
with open(out_file,'r') as f:
out_lines = f.read()
except IOError: # non existing output file -> job crashed
raise QEOutputParsingError("Failed to open output file: {}.".format(out_file))
if not out_lines: # there is an output file, but it's empty -> crash
job_successful = False
# check if the job has finished (that doesn't mean without errors)
finished_run = False
for line in out_lines.split('\n')[::-1]:
if 'JOB DONE' in line:
finished_run = True
break
if not finished_run: # error if the job has not finished
warning = 'QE pw run did not reach the end of the execution.'
parser_info['parser_warnings'].append(warning)
job_successful = False
# parse
try:
out_data,trajectory_data,critical_messages = parse_pw_text_output(out_lines,xml_data,structure_data,input_dict)
except QEOutputParsingError as e:
if not finished_run: # I try to parse it as much as possible
parser_info['parser_warnings'].append('Error while parsing the output file')
out_data = {}
trajectory_data = {}
critical_messages = []
else: # if it was finished and I got error, it's a mistake of the parser
raise QEOutputParsingError('Error while parsing QE output. Exception message: {}'.format(e.message))
# I add in the out_data all the last elements of trajectory_data values.
# Safe for some large arrays, that I will likely never query.
skip_keys = ['forces','atomic_magnetic_moments','atomic_charges',
'lattice_vectors_relax','atomic_positions_relax',
'atomic_species_name']
tmp_trajectory_data = copy.copy(trajectory_data)
for x in tmp_trajectory_data.iteritems():
if x[0] in skip_keys:
continue
out_data[x[0]] = x[1][-1]
if len(x[1])==1: # delete eventual keys that are not arrays (scf cycles)
trajectory_data.pop(x[0])
# note: if an array is empty, there will be KeyError
for key in ['k_points','k_points_weights']:
try:
trajectory_data[key] = xml_data.pop(key)
except KeyError:
pass
# As the k points are an array that is rather large, and again it's not something I'm going to parse likely
# since it's an info mainly contained in the input file, I move it to the trajectory data
# if there is a severe error, the calculation is FAILED
if any([x in out_data['warnings'] for x in critical_messages]):
job_successful = False
for key in out_data.keys():
if key in xml_data.keys():
if key=='fermi_energy' or key=='fermi_energy_units': # an exception for the (only?) key that may be found on both
del out_data[key]
else:
raise AssertionError('{} found in both dictionaries, '
'values: {} vs. {}'.format(
key, out_data[key], xml_data[key])) # this shouldn't happen!
# out_data keys take precedence and overwrite xml_data keys,
# if the same key name is shared by both
# dictionaries (but this should not happen!)
parameter_data = dict(xml_data.items() + out_data.items() + parser_info.items())
# return various data.
# parameter data will be mapped in ParameterData
# trajectory_data in ArrayData
# structure_data in a Structure
# bands_data should probably be merged in ArrayData
return parameter_data,trajectory_data,structure_data,bands_data,job_successful
[docs]def cell_volume(a1,a2,a3):
"""
returns the volume of the primitive cell: ``|a1.(a2xa3)|``
"""
a_mid_0 = a2[1]*a3[2] - a2[2]*a3[1]
a_mid_1 = a2[2]*a3[0] - a2[0]*a3[2]
a_mid_2 = a2[0]*a3[1] - a2[1]*a3[0]
return abs(float(a1[0]*a_mid_0 + a1[1]*a_mid_1 + a1[2]*a_mid_2))
# In the following, some functions that helps the parsing of
# the xml file of QE v5.0.x (version below not tested)
def read_xml_card(dom,cardname):
try:
root_node = [_ for _ in dom.childNodes if
isinstance(_, xml.dom.minidom.Element)
and _.nodeName == "Root"][0]
the_card = [_ for _ in root_node.childNodes if _.nodeName == cardname][0]
#the_card = dom.getElementsByTagName(cardname)[0]
return the_card
except Exception as e:
print e
raise QEOutputParsingError('Error parsing tag {}'.format(cardname) )
def parse_xml_child_integer(tagname,target_tags):
try:
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
b=a.childNodes[0]
return int(b.data)
except Exception:
raise QEOutputParsingError('Error parsing tag {} inside {}'
.format(tagname,target_tags.tagName) )
def parse_xml_child_float(tagname,target_tags):
try:
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
b=a.childNodes[0]
return float(b.data)
except Exception:
raise QEOutputParsingError('Error parsing tag {} inside {}'\
.format(tagname, target_tags.tagName ) )
def parse_xml_child_bool(tagname,target_tags):
try:
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
b=a.childNodes[0]
return str2bool(b.data)
except Exception:
raise QEOutputParsingError('Error parsing tag {} inside {}'\
.format(tagname, target_tags.tagName) )
def str2bool(string):
try:
false_items=["f","0","false","no"]
true_items=["t","1","true","yes"]
string=str(string.lower().strip())
if string in false_items:
return False
if string in true_items:
return True
else:
raise QEOutputParsingError('Error converting string '
'{} to boolean value.'.format(string) )
except Exception:
raise QEOutputParsingError('Error converting string to boolean.')
def parse_xml_child_str(tagname,target_tags):
try:
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
b=a.childNodes[0]
return str(b.data).rstrip().replace('\n','')
except Exception:
raise QEOutputParsingError('Error parsing tag {} inside {}'\
.format(tagname, target_tags.tagName) )
def parse_xml_child_attribute_str(tagname,attributename,target_tags):
try:
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
value=str(a.getAttribute(attributename))
return value.rstrip().replace('\n','').lower()
except Exception:
raise QEOutputParsingError('Error parsing attribute {}, tag {} inside {}'
.format(attributename,tagname,target_tags.tagName) )
def parse_xml_child_attribute_int(tagname,attributename,target_tags):
try:
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
value=int(a.getAttribute(attributename))
return value
except Exception:
raise QEOutputParsingError('Error parsing attribute {}, tag {} inside {}'
.format(attributename,tagname,target_tags.tagName) )
def grep_energy_from_line(line):
try:
return float(line.split('=')[1].split('Ry')[0])*ry_to_ev
except Exception:
raise QEOutputParsingError('Error while parsing energy')
[docs]def convert_qe_time_to_sec(timestr):
"""
Given the walltime string of Quantum Espresso, converts it in a number of
seconds (float).
"""
rest = timestr.strip()
if 'd' in rest:
days, rest = rest.split('d')
else:
days = '0'
if 'h' in rest:
hours, rest = rest.split('h')
else:
hours = '0'
if 'm' in rest:
minutes, rest = rest.split('m')
else:
minutes = '0'
if 's' in rest:
seconds, rest = rest.split('s')
else:
seconds = '0.'
if rest.strip():
raise ValueError("Something remained at the end of the string '{}': '{}'"
.format(timestr, rest))
num_seconds = (
float(seconds) + float(minutes) * 60. +
float(hours) * 3600. + float(days) * 86400.)
return num_seconds
[docs]def convert_list_to_matrix(in_matrix,n_rows,n_columns):
"""
converts a list into a list of lists (a matrix like) with n_rows and n_columns
"""
return [ in_matrix[j:j+n_rows] for j in range(0,n_rows*n_columns,n_rows) ]
def xml_card_cell(parsed_data,dom):
#CARD CELL of QE output
cardname = 'CELL'
target_tags = read_xml_card(dom,cardname)
for tagname in ['NON-PERIODIC_CELL_CORRECTION','BRAVAIS_LATTICE']:
parsed_data[tagname.replace('-','_').lower()] = parse_xml_child_str(tagname,target_tags)
tagname = 'LATTICE_PARAMETER'
value = parse_xml_child_float(tagname,target_tags)
parsed_data[tagname.replace('-','_').lower()+'_xml'] = value
attrname = 'UNITS'
metric = parse_xml_child_attribute_str(tagname,attrname,target_tags)
if metric not in ['bohr','angstrom']:
raise QEOutputParsingError('Error parsing attribute {}, tag {} inside {}, units not found'
.format(attrname,tagname,target_tags.tagName) )
if metric == 'bohr':
value *= bohr_to_ang
parsed_data[tagname.replace('-','_').lower()] = value
tagname='CELL_DIMENSIONS'
try:
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
b=a.childNodes[0]
c=b.data.replace('\n','').split()
value=[ float(i) for i in c ]
parsed_data[tagname.replace('-','_').lower()]=value
except Exception:
raise QEOutputParsingError('Error parsing tag {} inside {}.'.format(tagname,target_tags.tagName) )
tagname = 'DIRECT_LATTICE_VECTORS'
lattice_vectors = []
try:
second_tagname='UNITS_FOR_DIRECT_LATTICE_VECTORS'
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
b=a.getElementsByTagName('UNITS_FOR_DIRECT_LATTICE_VECTORS')[0]
value=str(b.getAttribute('UNITS')).lower()
parsed_data[second_tagname.replace('-','_').lower()]=value
metric = value
if metric not in ['bohr','angstroms']: # REMEMBER TO CHECK THE UNITS AT THE END OF THE FUNCTION
raise QEOutputParsingError('Error parsing tag {} inside {}: units not supported: {}'
.format(tagname,target_tags.tagName,metric) )
lattice_vectors = []
for second_tagname in ['a1','a2','a3']:
#b = a.getElementsByTagName(second_tagname)[0]
b=[_ for _ in a.childNodes if _.nodeName == second_tagname][0]
c = b.childNodes[0]
d = c.data.replace('\n','').split()
value = [ float(i) for i in d ]
if metric=='bohr':
value = [ bohr_to_ang*float(s) for s in value ]
lattice_vectors.append(value)
volume = cell_volume(lattice_vectors[0],lattice_vectors[1],lattice_vectors[2])
except Exception:
raise QEOutputParsingError('Error parsing tag {} inside {} inside {}.'
.format(tagname,target_tags.tagName,cardname) )
# NOTE: lattice_vectors will be saved later together with card IONS.atom
tagname = 'RECIPROCAL_LATTICE_VECTORS'
try:
#a = target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
second_tagname = 'UNITS_FOR_RECIPROCAL_LATTICE_VECTORS'
b = a.getElementsByTagName(second_tagname)[0]
value = str(b.getAttribute('UNITS')).lower()
parsed_data[second_tagname.replace('-','_').lower()]=value
metric=value
# NOTE: output is given in 2 pi / a [ang ^ -1]
if metric not in ['2 pi / a']:
raise QEOutputParsingError('Error parsing tag {} inside {}: units {} not supported'
.format(tagname,target_tags.tagName,metric) )
# reciprocal_lattice_vectors
this_matrix = []
for second_tagname in ['b1','b2','b3']:
b = a.getElementsByTagName(second_tagname)[0]
c = b.childNodes[0]
d = c.data.replace('\n','').split()
value = [ float(i) for i in d ]
if metric == '2 pi / a':
value=[ float(s)/parsed_data['lattice_parameter'] for s in value ]
this_matrix.append(value)
parsed_data['reciprocal_lattice_vectors'] = this_matrix
except Exception:
raise QEOutputParsingError('Error parsing tag {} inside {}.'
.format(tagname,target_tags.tagName) )
return parsed_data,lattice_vectors,volume
def xml_card_ions(parsed_data,dom,lattice_vectors,volume):
cardname='IONS'
target_tags=read_xml_card(dom,cardname)
for tagname in ['NUMBER_OF_ATOMS','NUMBER_OF_SPECIES']:
parsed_data[tagname.lower()]=parse_xml_child_integer(tagname,target_tags)
tagname='UNITS_FOR_ATOMIC_MASSES'
attrname='UNITS'
parsed_data[tagname.lower()]=parse_xml_child_attribute_str(tagname,attrname,target_tags)
try:
parsed_data['species']={}
parsed_data['species']['index'] =[]
parsed_data['species']['type'] =[]
parsed_data['species']['mass'] =[]
parsed_data['species']['pseudo']=[]
for i in range(parsed_data['number_of_species']):
tagname='SPECIE.'+str(i+1)
parsed_data['species']['index'].append(i+1)
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
tagname2='ATOM_TYPE'
parsed_data['species']['type'].append(parse_xml_child_str(tagname2,a))
tagname2='MASS'
parsed_data['species']['mass'].append(parse_xml_child_float(tagname2,a))
tagname2='PSEUDO'
parsed_data['species']['pseudo'].append(parse_xml_child_str(tagname2,a))
tagname='UNITS_FOR_ATOMIC_POSITIONS'
attrname='UNITS'
parsed_data[tagname.lower()]=parse_xml_child_attribute_str(tagname,attrname,target_tags)
except:
raise QEOutputParsingError('Error parsing tag SPECIE.# inside %s.'% (target_tags.tagName ) )
# TODO convert the units
# if parsed_data['units_for_atomic_positions'] not in ['alat','bohr','angstrom']:
try:
atomlist=[]
atoms_index_list=[]
atoms_if_pos_list=[]
tagslist=[]
for i in range(parsed_data['number_of_atoms']):
tagname='ATOM.'+str(i+1)
# USELESS AT THE MOMENT, I DON'T SAVE IT
# parsed_data['atoms']['list_index']=i
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
tagname2='INDEX'
b=int(a.getAttribute(tagname2))
atoms_index_list.append(b)
tagname2='SPECIES'
chem_symbol=str(a.getAttribute(tagname2)).rstrip().replace("\n","")
# I check if it is a subspecie
chem_symbol_digits = "".join([i for i in chem_symbol if i in string.digits])
try:
tagslist.append(int(chem_symbol_digits))
except ValueError:
# If I can't parse the digit, it is probably not there: I add a None to the tagslist
tagslist.append(None)
# I remove the symbols
chem_symbol = chem_symbol.translate(None, string.digits)
tagname2='tau'
b = a.getAttribute(tagname2)
tau = [float(s) for s in b.rstrip().replace("\n","").split()]
metric = parsed_data['units_for_atomic_positions']
if metric not in ['alat','bohr','angstrom']: # REMEMBER TO CONVERT AT THE END
raise QEOutputParsingError('Error parsing tag %s inside %s'% (tagname, target_tags.tagName ) )
if metric=='alat':
tau=[ parsed_data['lattice_parameter_xml']*float(s) for s in tau ]
elif metric=='bohr':
tau=[ bohr_to_ang*float(s) for s in tau ]
atomlist.append([chem_symbol,tau])
tagname2='if_pos'
b=a.getAttribute(tagname2)
if_pos=[int(s) for s in b.rstrip().replace("\n","").split()]
atoms_if_pos_list.append(if_pos)
parsed_data['atoms']=atomlist
parsed_data['atoms_index_list']=atoms_index_list
parsed_data['atoms_if_pos_list']=atoms_if_pos_list
cell={}
cell['lattice_vectors']=lattice_vectors
cell['volume']=volume
cell['atoms']=atomlist
cell['tagslist'] = tagslist
parsed_data['cell']=cell
except Exception:
raise QEOutputParsingError('Error parsing tag ATOM.# inside %s.'% (target_tags.tagName ) )
# saving data together with cell parameters. Did so for better compatibility with ASE.
# correct some units that have been converted in
parsed_data['atomic_positions'+units_suffix] = default_length_units
parsed_data['direct_lattice_vectors'+units_suffix] = default_length_units
return parsed_data
def xml_card_spin(parsed_data,dom):
cardname='SPIN'
target_tags=read_xml_card(dom,cardname)
for tagname in ['LSDA','NON-COLINEAR_CALCULATION',
'SPIN-ORBIT_CALCULATION','SPIN-ORBIT_DOMAG']:
parsed_data[tagname.replace('-','_').lower()
] = parse_xml_child_bool(tagname,target_tags)
return parsed_data
def xml_card_header(parsed_data,dom):
cardname='HEADER'
target_tags=read_xml_card(dom,cardname)
for tagname in ['FORMAT','CREATOR']:
for attrname in ['NAME','VERSION']:
parsed_data[(tagname+'_'+attrname).lower()
] = parse_xml_child_attribute_str(tagname,attrname,target_tags)
return parsed_data
def xml_card_planewaves(parsed_data,dom,calctype):
if calctype not in ['pw','cp']:
raise ValueError("Input flag not accepted, must be 'cp' or 'pw'")
cardname='PLANE_WAVES'
target_tags=read_xml_card(dom,cardname)
tagname = 'UNITS_FOR_CUTOFF'
attrname = 'UNITS'
units = parse_xml_child_attribute_str(tagname,attrname,target_tags).lower()
if 'hartree' not in units:
if 'rydberg' not in units:
raise QEOutputParsingError("Units {} are not supported by parser".format(units))
else:
if 'hartree' in units:
conv_fac = hartree_to_ev
else:
conv_fac = ry_to_ev
tagname='WFC_CUTOFF'
parsed_data[tagname.lower()] = parse_xml_child_float(tagname,target_tags)*conv_fac
parsed_data[tagname.lower()+units_suffix] = default_energy_units
tagname='RHO_CUTOFF'
parsed_data[tagname.lower()] = parse_xml_child_float(tagname,target_tags)*conv_fac
parsed_data[tagname.lower()+units_suffix] = default_energy_units
for tagname in [ 'FFT_GRID','SMOOTH_FFT_GRID' ]:
grid = []
for attrname in ['nr1','nr2','nr3']:
if 'SMOOTH' in tagname:
attrname += 's'
grid.append(parse_xml_child_attribute_int(tagname,attrname,target_tags))
parsed_data[tagname.lower()] = grid
if calctype == 'cp':
for tagname in ['MAX_NUMBER_OF_GK-VECTORS','GVECT_NUMBER','SMOOTH_GVECT_NUMBER' ]:
parsed_data[tagname.lower()] = parse_xml_child_integer(tagname,target_tags)
tagname='GAMMA_ONLY'
parsed_data[tagname.lower()] = parse_xml_child_bool(tagname,target_tags)
tagname='SMALLBOX_FFT_GRID'
fft_grid = []
for attrname in ['nr1b','nr2b','nr3b']:
fft_grid.append(parse_xml_child_attribute_int(tagname,attrname,target_tags))
parsed_data[tagname.lower()] = fft_grid
return parsed_data
def xml_card_symmetries(parsed_data,dom):
cardname='SYMMETRIES'
target_tags=read_xml_card(dom,cardname)
for tagname in ['NUMBER_OF_SYMMETRIES','NUMBER_OF_BRAVAIS_SYMMETRIES']:
parsed_data[tagname.replace('-','_').lower()] = \
parse_xml_child_integer(tagname,target_tags)
for tagname in ['INVERSION_SYMMETRY','DO_NOT_USE_TIME_REVERSAL',
'TIME_REVERSAL_FLAG','NO_TIME_REV_OPERATIONS']:
parsed_data[tagname.lower()]=parse_xml_child_bool(tagname,target_tags)
tagname='UNITS_FOR_SYMMETRIES'
attrname='UNITS'
metric=parse_xml_child_attribute_str(tagname,attrname,target_tags)
if metric not in ['crystal']:
raise QEOutputParsingError('Error parsing attribute {},'.format(attrname) + \
' tag {} inside '.format(tagname) + \
'{}, units unknown'.format(target_tags.tagName ) )
parsed_data['symmetries'+units_suffix] = metric
# parse the symmetry matrices
parsed_data['symmetries']=[]
find_sym=True
i=0
while find_sym:
try:
i+=1
current_sym={}
tagname='SYMM.'+str(i)
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
tagname2='INFO'
b=a.getElementsByTagName(tagname2)[0]
attrname='NAME'
value=str(b.getAttribute(attrname)).rstrip().replace('\n','')
current_sym['name']=value
try:
attrname='T_REV'
value=str(b.getAttribute(attrname)).rstrip().replace('\n','')
current_sym[attrname.lower()]=value
except Exception:
pass
tagname2='ROTATION'
b=a.getElementsByTagName(tagname2)[0]
c=[ int(s) for s in b.childNodes[0].data.split() ]
current_sym[tagname2.lower()] = convert_list_to_matrix(c,3,3)
for tagname2 in ['FRACTIONAL_TRANSLATION','EQUIVALENT_IONS']: # not always present
try:
b = a.getElementsByTagName(tagname2)[0]
if tagname2 == 'FRACTIONAL_TRANSLATION':
value = [ float(s) for s in b.childNodes[0].data.split() ]
else:
value = [ int(s) for s in b.childNodes[0].data.split() ]
current_sym[tagname2.lower()] = value
except Exception:
raise
parsed_data['symmetries'].append(current_sym)
except IndexError: # SYMM.i out of index
find_sym=False
return parsed_data
def xml_card_exchangecorrelation(parsed_data,dom):
cardname='EXCHANGE_CORRELATION'
target_tags=read_xml_card(dom,cardname)
tagname='DFT'
parsed_data[(tagname+'_exchange_correlation').lower()] = \
parse_xml_child_str(tagname,target_tags)
tagname='LDA_PLUS_U_CALCULATION'
try:
parsed_data[tagname.lower()] = parse_xml_child_bool(tagname,target_tags)
except Exception:
parsed_data[tagname.lower()] = False
if parsed_data[tagname.lower()]: # if it is a plus U calculation, I expect more infos
tagname = 'HUBBARD_L'
try:
#a = target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
b = a.childNodes[0]
c = b.data.replace('\n','').split()
value = [int(i) for i in c]
parsed_data[tagname.lower()] = value
except Exception:
raise QEOutputParsingError('Error parsing tag '+\
'{} inside {}.'.format(tagname, target_tags.tagName) )
for tagname in ['HUBBARD_U','HUBBARD_ALPHA','HUBBARD_BETA','HUBBARD_J0']:
try:
#a = target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
b = a.childNodes[0]
c = b.data.replace('\n',' ').split() # note the need of a white space!
value = [float(i)*ry_to_ev for i in c]
parsed_data[tagname.lower()] = value
except Exception:
raise QEOutputParsingError('Error parsing tag '+\
'{} inside {}.'.format(tagname, target_tags.tagName))
tagname = 'LDA_PLUS_U_KIND'
try:
parsed_data[tagname.lower()] = parse_xml_child_integer(tagname,target_tags)
except Exception:
pass
tagname = 'U_PROJECTION_TYPE'
try:
parsed_data[tagname.lower()] = parse_xml_child_str(tagname,target_tags)
except Exception:
pass
tagname = 'HUBBARD_J'
try:
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
b=a.childNodes[0]
c=b.data.replace('\n','').split()
parsed_data[tagname.lower()] = convert_list_to_matrix(c,3,3)
except Exception:
pass
try:
tagname='NON_LOCAL_DF'
parsed_data[tagname.lower()] = parse_xml_child_integer(tagname,target_tags)
except Exception:
pass
try:
tagname='VDW_KERNEL_NAME'
parsed_data[tagname.lower()] = parse_xml_child_str(tagname,target_tags)
except Exception:
pass
return parsed_data
[docs]def parse_pw_xml_output(data,dir_with_bands=None):
"""
Parse the xml data of QE v5.0.x
Input data must be a single string, as returned by file.read()
Returns a dictionary with parsed values
"""
import copy
from xml.parsers.expat import ExpatError
# NOTE : I often assume that if the xml file has been written, it has no
# internal errors.
try:
dom = xml.dom.minidom.parseString(data)
except ExpatError:
return {'xml_warnings':"Error in XML parseString: bad format"},{},{}
parsed_data = {}
parsed_data['xml_warnings'] = []
structure_dict = {}
# CARD CELL
structure_dict,lattice_vectors,volume = copy.deepcopy(xml_card_cell(structure_dict,dom))
# CARD IONS
structure_dict = copy.deepcopy(xml_card_ions(structure_dict,dom,lattice_vectors,volume))
#CARD HEADER
parsed_data = copy.deepcopy(xml_card_header(parsed_data,dom))
# CARD CONTROL
cardname='CONTROL'
target_tags=read_xml_card(dom,cardname)
for tagname in ['PP_CHECK_FLAG','LKPOINT_DIR',
'Q_REAL_SPACE','BETA_REAL_SPACE']:
parsed_data[tagname.lower()] = parse_xml_child_bool(tagname,target_tags)
# TODO: why this one isn't working? What is it actually?
# # CARD MOVING_CELL
#
# try:
# target_tags = dom.getElementsByTagName('MOVING_CELL')[0]
# except:
# raise IOError
#
# tagname='CELL_FACTOR'
# parsed_data[tagname.lower()]=parse_xml_child_float(tagname,target_tags)
# CARD ELECTRIC_FIELD
cardname='ELECTRIC_FIELD'
target_tags=read_xml_card(dom,cardname)
for tagname in ['HAS_ELECTRIC_FIELD','HAS_DIPOLE_CORRECTION']:
parsed_data[tagname.lower()]=parse_xml_child_bool(tagname,target_tags)
if parsed_data['has_electric_field'] or parsed_data['has_dipole_correction']:
tagname='FIELD_DIRECTION'
parsed_data[tagname.lower()]=parse_xml_child_integer(tagname,target_tags)
for tagname in ['MAXIMUM_POSITION','INVERSE_REGION','FIELD_AMPLITUDE']:
parsed_data[tagname.lower()]=parse_xml_child_float(tagname,target_tags)
# CARD PLANE_WAVES
parsed_data = copy.deepcopy(xml_card_planewaves(parsed_data,dom,'pw'))
# CARD SPIN
parsed_data = copy.deepcopy(xml_card_spin(parsed_data,dom))
# CARD BRILLOUIN ZONE
cardname='BRILLOUIN_ZONE'
target_tags=read_xml_card(dom,cardname)
tagname='NUMBER_OF_K-POINTS'
parsed_data[tagname.replace('-','_').lower()] = parse_xml_child_integer(tagname,target_tags)
tagname = 'UNITS_FOR_K-POINTS'
attrname = 'UNITS'
metric = parse_xml_child_attribute_str(tagname,attrname,target_tags)
if metric not in ['2 pi / a']:
raise QEOutputParsingError('Error parsing attribute {},'.format(attrname) + \
' tag {} inside {}, units unknown'.format(tagname, target_tags.tagName) )
k_points_units = metric
for tagname, param in [ ['MONKHORST_PACK_GRID','nk'],['MONKHORST_PACK_OFFSET','k'] ]:
try:
#a = target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
value = [int(a.getAttribute(param+str(i+1))) for i in range(3)]
parsed_data[tagname.replace('-','_').lower()] = value
except Exception: # I might not use the monkhorst pack grid
pass
kpoints = []
kpoints_weights = []
tagname_prefix = 'K-POINT.'
a_dict={_.nodeName: _ for _ in target_tags.childNodes
if _.nodeName.startswith(tagname_prefix)}
try:
import numpy
for i in range(parsed_data['number_of_k_points']):
tagname = "{}{}".format(tagname_prefix,i+1)
#a = target_tags.getElementsByTagName(tagname)[0]
a=a_dict[tagname]
b = a.getAttribute('XYZ').replace('\n','').rsplit()
value = [ float(s) for s in b ]
metric = k_points_units
if metric=='2 pi / a':
value = [ 2.*numpy.pi*float(s)/structure_dict['lattice_parameter'] for s in value ]
weight = float(a.getAttribute('WEIGHT'))
kpoints.append(value)
kpoints_weights.append(weight)
parsed_data['k_points']=kpoints
parsed_data['k_points'+units_suffix] = k_points_default_units
parsed_data['k_points_weights'] = kpoints_weights
except Exception:
raise QEOutputParsingError('Error parsing tag K-POINT.{} '
'inside {}.'.format(i+1,target_tags.tagName) )
# I skip this card until someone will have a need for this.
# try:
# tagname='STARTING_K-POINTS'
# num_starting_k_points=parse_xml_child_integer(tagname,target_tags)
# # raise exception if there is no such a key
# parsed_data[tagname.replace('-','_').lower()]=num_starting_k_points
#
# if parsed_data.get('starting_k_points'):
# try:
# kpoints=[]
# for i in range(parsed_data['starting_k_points']):
# tagname='K-POINT_START.'+str(i+1)
# a=target_tags.getElementsByTagName(tagname)[0]
# b=a.getAttribute('XYZ').replace('\n','').rsplit()
# value=[ float(s) for s in b ]
# metric=parsed_data['k_points_units']
# if metric=='2 pi / a':
# value=[ float(s)/parsed_data['lattice_parameter'] for s in value ]
#
# weight=float(a.getAttribute('WEIGHT'))
#
# kpoints.append([value,weight])
#
# parsed_data['k_point_start']=kpoints
# except Exception:
# raise QEOutputParsingError('Error parsing tag {}'.format(tagname)+\
# ' inside {}.'.format(target_tags.tagName ) )
# except Exception:
# if not parsed_data.get('starting_k_points'):
# pass
# else:
# parsed_data['xml_warnings'].append("Warning: could not parse {}".format(tagname))
# tagname='NORM-OF-Q'
# TODO: decide if save this parameter
# parsed_data[tagname.replace('-','_').lower()]=parse_xml_child_float(tagname,target_tags)
# CARD BAND STRUCTURE INFO
cardname = 'BAND_STRUCTURE_INFO'
target_tags = read_xml_card(dom,cardname)
for tagname in ['NUMBER_OF_SPIN_COMPONENTS','NUMBER_OF_ATOMIC_WFC','NUMBER_OF_BANDS']:
parsed_data[tagname.replace('-','_').lower()] = \
parse_xml_child_integer(tagname,target_tags)
tagname='NON-COLINEAR_CALCULATION'
parsed_data[tagname.replace('-','_').lower()] = \
parse_xml_child_bool(tagname,target_tags)
tagname='NUMBER_OF_ELECTRONS'
parsed_data[tagname.replace('-','_').lower()] = \
parse_xml_child_float(tagname,target_tags)
tagname = 'UNITS_FOR_ENERGIES'
attrname = 'UNITS'
units = parse_xml_child_attribute_str(tagname,attrname,target_tags)
if units not in ['hartree']:
raise QEOutputParsingError('Expected energy units in Hartree.' + \
'Got instead {}'.format(parsed_data['energy_units']) )
try:
tagname='TWO_FERMI_ENERGIES'
parsed_data[tagname.lower()] = parse_xml_child_bool(tagname,target_tags)
except Exception:
pass
if parsed_data.get('two_fermi_energies',False):
tagname = 'FERMI_ENERGY_UP'
parsed_data[tagname.replace('-','_').lower()] = \
parse_xml_child_float(tagname,target_tags) * hartree_to_ev
parsed_data[tagname.lower()+units_suffix] = default_energy_units
tagname = 'FERMI_ENERGY_DOWN'
parsed_data[tagname.replace('-','_').lower()] = \
parse_xml_child_float(tagname,target_tags) * hartree_to_ev
parsed_data[tagname.lower()+units_suffix] = default_energy_units
else:
tagname = 'FERMI_ENERGY'
parsed_data[tagname.replace('-','_').lower()] = \
parse_xml_child_float(tagname,target_tags) * hartree_to_ev
parsed_data[tagname.lower()+units_suffix] = default_energy_units
#CARD MAGNETIZATION_INIT
cardname = 'MAGNETIZATION_INIT'
target_tags = read_xml_card(dom,cardname)
# 0 if false
tagname='CONSTRAINT_MAG'
parsed_data[tagname.lower()] = parse_xml_child_integer(tagname,target_tags)
vec1 = []
vec2 = []
vec3 = []
for i in range(structure_dict['number_of_species']):
tagname='SPECIE.'+str(i+1)
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
tagname2='STARTING_MAGNETIZATION'
vec1.append(parse_xml_child_float(tagname2,a))
tagname2='ANGLE1'
vec2.append(parse_xml_child_float(tagname2,a))
tagname2='ANGLE2'
vec3.append(parse_xml_child_float(tagname2,a))
parsed_data['starting_magnetization'] = vec1
parsed_data['magnetization_angle1'] = vec2
parsed_data['magnetization_angle2'] = vec3
#CARD OCCUPATIONS
cardname = 'OCCUPATIONS'
target_tags = read_xml_card(dom,cardname)
for tagname in ['SMEARING_METHOD','TETRAHEDRON_METHOD','FIXED_OCCUPATIONS']:
parsed_data[tagname.lower()] = parse_xml_child_bool(tagname,target_tags)
#CARD CHARGE-DENSITY
cardname='CHARGE-DENSITY'
target_tags=read_xml_card(dom,cardname)
try:
attrname='iotk_link'
value=str(target_tags.getAttribute(attrname)).rstrip().replace('\n','').lower()
parsed_data[cardname.lower().rstrip().replace('-','_')]=value
except Exception:
raise QEOutputParsingError('Error parsing attribute {},'.format(attrname) + \
' card {}.'.format(cardname))
#CARD EIGENVALUES
# Note: if this card is parsed, the dimension of the database grows very much!
cardname='EIGENVALUES'
target_tags=read_xml_card(dom,cardname)
bands_dict = {}
if dir_with_bands:
try:
occupations1 = []
occupations2 = []
bands1 = []
bands2 = []
for i in range(parsed_data['number_of_k_points']):
tagname='K-POINT.'+str(i+1)
#a=target_tags.getElementsByTagName(tagname)[0]
a=[_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
def read_bands_and_occupations(eigenval_n):
# load the eigenval.xml file
with open(eigenval_n,'r') as eigenval_f:
f = eigenval_f.read()
eig_dom = xml.dom.minidom.parseString(f)
tagname = 'UNITS_FOR_ENERGIES'
a = eig_dom.getElementsByTagName(tagname)[0]
attrname = 'UNITS'
metric = str(a.getAttribute(attrname))
if metric not in ['Hartree']:
raise QEOutputParsingError('Error parsing eigenvalues xml file, ' + \
'units {} not implemented.'.format(metric))
tagname='EIGENVALUES'
a=eig_dom.getElementsByTagName(tagname)[0]
b=a.childNodes[0]
value_e = [ float(s)*hartree_to_ev for s in b.data.split() ]
tagname='OCCUPATIONS'
a = eig_dom.getElementsByTagName(tagname)[0]
b = a.childNodes[0]
value_o = [ float(s) for s in b.data.split() ]
return value_e,value_o
# two cases: in cases of magnetic calculations, I have both spins
try:
tagname2 = 'DATAFILE'
b = a.getElementsByTagName(tagname2)[0]
attrname = 'iotk_link'
value = str(b.getAttribute(attrname)).rstrip().replace('\n','')
eigenval_n = os.path.join(dir_with_bands,value)
value_e,value_o = read_bands_and_occupations(eigenval_n)
bands1.append(value_e)
occupations1.append(value_o)
except IndexError:
tagname2='DATAFILE.1'
b1 = a.getElementsByTagName(tagname2)[0]
tagname2='DATAFILE.2'
b2 = a.getElementsByTagName(tagname2)[0]
attrname = 'iotk_link'
value1 = str(b1.getAttribute(attrname)).rstrip().replace('\n','')
value2 = str(b2.getAttribute(attrname)).rstrip().replace('\n','')
eigenval_n = os.path.join(dir_with_bands,value1)
value_e,value_o = read_bands_and_occupations(eigenval_n)
bands1.append(value_e)
occupations1.append(value_o)
eigenval_n = os.path.join(dir_with_bands,value2)
value_e,value_o = read_bands_and_occupations(eigenval_n)
bands2.append(value_e)
occupations2.append(value_o)
occupations = [occupations1]
bands = [bands1]
if occupations2:
occupations.append(occupations2)
if bands2:
bands.append(bands2)
bands_dict['occupations'] = occupations
bands_dict['bands'] = bands
bands_dict['bands'+units_suffix] = default_energy_units
except Exception:
raise QEOutputParsingError('Error parsing card {}'.format(tagname))
# if dir_with_bands:
# # if there is at least an empty band:
# if parsed_data['smearing_method'] or \
# parsed_data['number_of_electrons']/2. < parsed_data['number_of_bands']:
#
# #TODO: currently I do it only for non magnetic systems
# if len(bands_dict['occupations'])==1:
# # initialize lumo
# lumo = parsed_data['homo']+10000.0
# for list_bands in bands_dict['bands']:
# for value in list_bands:
# if (value > parsed_data['fermi_energy']) and (value<lumo):
# lumo=value
# if (lumo==parsed_data['homo']+10000.0) or lumo<=parsed_data['fermi_energy']:
# #might be an error for bandgap larger than 10000 eV...
# raise QEOutputParsingError('Error while searching for LUMO.')
# parsed_data['lumo']=lumo
# parsed_data['lumo'+units_suffix] = default_energy_units
# CARD symmetries
parsed_data = copy.deepcopy(xml_card_symmetries(parsed_data,dom))
# CARD EXCHANGE_CORRELATION
parsed_data = copy.deepcopy(xml_card_exchangecorrelation(parsed_data,dom))
return parsed_data,structure_dict,bands_dict
[docs]def parse_pw_text_output(data, xml_data={}, structure_data={}, input_dict={}):
"""
Parses the text output of QE-PWscf.
:param data: a string, the file as read by read()
:param xml_data: the dictionary with the keys read from xml.
:param structure_data: dictionary, coming from the xml, with info on the structure
:param input_dict: dictionary with the input parameters
:return parsed_data: dictionary with key values, referring to quantities
at the last scf step.
:return trajectory_data: key,values referring to intermediate scf steps,
as in the case of vc-relax. Empty dictionary if no
value is present.
:return critical_messages: a list with critical messages. If any is found in
parsed_data['warnings'], the calculation is FAILED!
"""
# Separate the input string into separate lines
data_lines = data.split('\n')
parsed_data = {}
parsed_data['warnings'] = []
vdw_correction = False
trajectory_data = {}
# critical warnings: if any is found, the calculation status is FAILED
critical_warnings = {'The maximum number of steps has been reached.':"The maximum step of the ionic/electronic relaxation has been reached.",
'convergence NOT achieved after':"The scf cycle did not reach convergence.",
#'eigenvalues not converged':None, # special treatment
'iterations completed, stopping':'Maximum number of iterations reached in Wentzcovitch Damped Dynamics.',
'Maximum CPU time exceeded':'Maximum CPU time exceeded',
'%%%%%%%%%%%%%%':None,
}
minor_warnings = {'Warning:':None,
'DEPRECATED:':None,
'incommensurate with FFT grid':'The FFT is incommensurate: some symmetries may be lost.',
'SCF correction compared to forces is too large, reduce conv_thr':"Forces are inaccurate (SCF correction is large): reduce conv_thr.",
}
all_warnings = dict(critical_warnings.items() + minor_warnings.items())
# Find some useful quantities.
if not xml_data.get('number_of_bands',None) and not structure_data:
try:
for line in data.split('\n'):
if 'lattice parameter (alat)' in line:
alat = float(line.split('=')[1].split('a.u')[0])
elif 'number of atoms/cell' in line:
nat = int(line.split('=')[1])
elif 'number of atomic types' in line:
ntyp = int(line.split('=')[1])
elif 'unit-cell volume' in line:
volume = float(line.split('=')[1].split('(a.u.)^3')[0])
elif 'number of Kohn-Sham states' in line:
nbnd = int(line.split('=')[1])
elif "number of k points" in line:
nk = int(line.split('=')[1].split()[0])
if input_dict.get('SYSTEM',{}).get('nspin',1) > 1:
# QE counts twice each k-point in spin-polarized calculations
nk /= 2
elif "Dense grid" in line:
FFT_grid = [int(g) for g in
line.split('(')[1].split(')')[0].split(',')]
elif "Smooth grid" in line:
smooth_FFT_grid = [int(g) for g in
line.split('(')[1].split(')')[0].split(',')]
break
alat *= bohr_to_ang
volume *= bohr_to_ang**3
parsed_data['lattice_parameter_initial'] = alat
parsed_data['warnings'].append('Xml data not found: parsing only the text output')
parsed_data['number_of_bands'] = nbnd
try:
parsed_data['number_of_k_points'] = nk
parsed_data['fft_grid'] = FFT_grid
parsed_data['smooth_fft_grid'] = smooth_FFT_grid
except NameError:
# these are not crucial, so parsing does not fail if they are not found
pass
except NameError: # nat or other variables where not found, and thus not initialized
# try to get some error message
for count,line in enumerate(data.split('\n')):
if any( i in line for i in all_warnings):
messages = [ all_warnings[i] if all_warnings[i] is not None
else line for i in all_warnings.keys()
if i in line]
if '%%%%%%%%%%%%%%' in line:
messages = parse_QE_errors(data.split('\n'),count,
parsed_data['warnings'])
# if it found something, add to log
if len(messages)>0:
parsed_data['warnings'].extend(messages)
if len(parsed_data['warnings'])>0:
return parsed_data, trajectory_data, critical_warnings.values()
else:
# did not find any error message -> raise an Error and do not
# return anything
raise QEOutputParsingError("Parser can't load basic info.")
else:
nat = structure_data['number_of_atoms']
ntyp = structure_data['number_of_species']
nbnd = xml_data['number_of_bands']
alat = structure_data['lattice_parameter_xml']
volume = structure_data['cell']['volume']
# NOTE: lattice_parameter_xml is the lattice parameter of the xml file
# in the units used by the code. lattice_parameter instead in angstroms.
# Save these two quantities in the parsed_data, because they will be
# useful for queries (maybe), and structure_data will not be stored as a ParameterData
parsed_data['number_of_atoms'] = nat
parsed_data['number_of_species'] = ntyp
times_strings = ['h_psi','s_psi','cdiaghg']
times_strings += ['fft','fft_scatter', 'init_run']
times_strings += ['wfcinit','c_bands']
parsed_data['volume'] = volume
c_bands_error = False
# now grep quantities that can be considered isolated informations.
for count,line in enumerate(data_lines):
bool_time = [any(re.findall('\\b{}\\b'.format(i), line)) for i in times_strings]
# to be used for later
if 'Carrying out vdW-DF run using the following parameters:' in line:
vdw_correction=True
# # single information only. To check that is not an info of the input already.
# elif 'EXX-fraction' in line:
# parsed_data['exx_fraction'] = float( line.split()[-1] )
elif 'Cartesian axes' in line:
# this is the part when initial positions and chemical
# symbols are printed (they do not change during a run)
i = count+1
while i<count+10 and not('site n.' in data_lines[i] and
'atom' in data_lines[i]):
i += 1
if 'site n.' in data_lines[i] and 'atom' in data_lines[i]:
trajectory_data['atomic_species_name'] = \
[data_lines[i+1+j].split()[1] for j in range(nat)]
# parse the initialization time (take only first occurence)
elif ('init_wall_time_seconds' not in parsed_data
and "total cpu time spent up to now is" in line):
init_time = float(line.split("total cpu time spent up to now is"
)[1].split('secs')[0])
parsed_data['init_wall_time_seconds'] = init_time
# parse the global file, for informations that are written only once
elif 'PWSCF' in line and 'WALL' in line:
try:
time = line.split('CPU')[1].split('WALL')[0]
parsed_data['wall_time'] = time
except Exception:
parsed_data['warnings'].append('Error while parsing wall time.')
try:
parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time)
except ValueError:
raise QEOutputParsingError("Unable to convert wall_time in seconds.")
elif any(bool_time) and 'WALL' in line:
bindex = bool_time.index(True)
bstring = times_strings[bindex] + '_time'
bstring_seconds= times_strings[bindex] + '_time_seconds'
try:
time = line.split('CPU')[1].split('WALL')[0]
parsed_data[bstring] = time
except Exception:
error_string = 'Error while parsing ' + times_strings[bindex]
parsed_data['warnings'].append(error_string)
try:
parsed_data[bstring_seconds] = convert_qe_time_to_sec(time)
except ValueError:
error_string = 'Unable to convert ' + times_strings[bindex] + ' seconds'
parsed_data['warnings'].append(error_string)
elif 'SUMMARY OF PHASES' in line:
try:
j = 0
while True:
j+=1
if 'Ionic Phase' in data_lines[count+j]:
value = float(data_lines[count+j].split(':')[1].split('(')[0])
mod = int(data_lines[count+j].split('(mod')[1].split(')')[0])
if mod != 2:
raise QEOutputParsingError("Units for polarization phase not supported")
parsed_data['ionic_phase'] = value
parsed_data['ionic_phase'+units_suffix] = '2pi'
if 'Electronic Phase' in data_lines[count+j]:
value = float(data_lines[count+j].split(':')[1].split('(')[0])
mod = int(data_lines[count+j].split('(mod')[1].split(')')[0])
if mod != 2:
raise QEOutputParsingError("Units for polarization phase not supported")
parsed_data['electronic_phase'] = value
parsed_data['electronic_phase'+units_suffix] = '2pi'
if 'Total Phase' in data_lines[count+j]:
value = float(data_lines[count+j].split(':')[1].split('(')[0])
mod = int(data_lines[count+j].split('(mod')[1].split(')')[0])
if mod != 2:
raise QEOutputParsingError("Units for polarization phase not supported")
parsed_data['total_phase'] = value
parsed_data['total_phase'+units_suffix] = '2pi'
# TODO: decide a standard unit for e charge
if "C/m^2" in data_lines[count+j]:
value = float(data_lines[count+j].split('=')[1].split('(')[0])
mod = float(data_lines[count+j].split('mod')[1].split(')')[0])
units = data_lines[count+j].split(')')[1].strip()
parsed_data['polarization'] = value
parsed_data['polarization_module'] = mod
parsed_data['polarization'+units_suffix] = default_polarization_units
if 'C / m^2' not in default_polarization_units:
raise QEOutputParsingError("Units for polarization phase not supported")
if 'polarization direction' in data_lines[count+j]:
vec = [ float(s) for s in \
data_lines[count+j].split('(')[1].split(')')[0].split(',') ]
parsed_data['polarization_direction'] = vec
except Exception:
warning = 'Error while parsing polarization.'
parsed_data['warnings'].append(warning)
# for later control on relaxation-dynamics convergence
elif 'nstep' in line and '=' in line:
max_dynamic_iterations = int(line.split()[2])
elif 'point group' in line:
if 'k-point group' not in line:
try:
# Split line in components delimited by either space(s) or
# parenthesis and filter out empty strings
line_elems = filter(None, re.split(' +|\(|\)', line))
pg_international = line_elems[-1]
pg_schoenflies = line_elems[-2]
parsed_data['pointgroup_international'] = pg_international
parsed_data['pointgroup_schoenflies'] = pg_schoenflies
except Exception:
warning = "Problem parsing point group, I found: {}".format(line.strip())
parsed_data['warnings'].append(warning)
# special parsing of c_bands error
elif 'c_bands' in line and 'eigenvalues not converged' in line:
c_bands_error = True
elif "iteration #" in line:
if ( ("Calculation restarted" not in line) and
("Calculation stopped" not in line) ):
try:
parsed_data['total_number_of_scf_iterations'] += 1
except KeyError:
parsed_data['total_number_of_scf_iterations'] = 1
if c_bands_error:
# if there is another iteration, c_bands is not necessarily a problem
# I put a warning only if c_bands error appears in the last iteration
c_bands_error = False
# Parsing of errors
elif any( i in line for i in all_warnings):
message = [ all_warnings[i] for i in all_warnings.keys() if i in line][0]
if message is None:
message = line
# if the run is a molecular dynamics, I ignore that I reached the
# last iteration step.
if ('The maximum number of steps has been reached.' in line and
'md' in input_dict.get('CONTROL',{}).get('calculation','')):
message = None
if 'iterations completed, stopping' in line:
value = message
message = None
if 'Wentzcovitch Damped Dynamics:' in line:
dynamic_iterations = int(line.split()[3])
if max_dynamic_iterations == dynamic_iterations:
message = value
# if 'c_bands' and 'eigenvalues not converged' in line:
# # even if some bands are not converged, this is a problem only
# # if it happens at the last scf step
# # start a loop to find the end of scf step
# # if another iteration is found, no warning is needed
# doloop = True
# j = 0
# while doloop:
# line2 = data.split('\n')[count+j]
# if "iteration #" in line2:
# doloop = False
# message = None
# if "End of self-consistent calculation" in line2:
# doloop = False # to prevent going until the end of file
# j += 1
# print count+j
if '%%%%%%%%%%%%%%' in line:
message = None
messages = parse_QE_errors(data_lines,count,parsed_data['warnings'])
# if it found something, add to log
try:
parsed_data['warnings'].extend(messages)
except UnboundLocalError:
pass
if message is not None:
parsed_data['warnings'].append(message)
if c_bands_error:
parsed_data['warnings'].append("c_bands: at least 1 eigenvalues not converged")
# I split the output text in the atomic SCF calculations.
# the initial part should be things already contained in the xml.
# (cell, initial positions, kpoints, ...) and I skip them.
# In case, parse for them before this point.
# Put everything in a trajectory_data dictionary
relax_steps = data.split('Self-consistent Calculation')[1:]
relax_steps = [ i.split('\n') for i in relax_steps]
# now I create a bunch of arrays for every step.
for data_step in relax_steps:
for count,line in enumerate(data_step):
if 'CELL_PARAMETERS' in line:
try:
a1 = [float(s) for s in data_step[count+1].split()]
a2 = [float(s) for s in data_step[count+2].split()]
a3 = [float(s) for s in data_step[count+3].split()]
# try except indexerror for not enough lines
lattice = line.split('(')[1].split(')')[0].split('=')
if lattice[0].lower() not in ['alat','bohr','angstrom']:
raise QEOutputParsingError('Error while parsing cell_parameters: '+\
'unsupported units {}'.format(lattice[0]) )
if 'alat' in lattice[0].lower():
a1 = [ alat*bohr_to_ang*float(s) for s in a1 ]
a2 = [ alat*bohr_to_ang*float(s) for s in a2 ]
a3 = [ alat*bohr_to_ang*float(s) for s in a3 ]
lattice_parameter_b = float(lattice[1])
if abs(lattice_parameter_b - alat) > lattice_tolerance:
raise QEOutputParsingError("Lattice parameters mismatch! " + \
"{} vs {}".format(lattice_parameter_b, alat))
elif 'bohr' in lattice[0].lower():
lattice_parameter_b*=bohr_to_ang
a1 = [ bohr_to_ang*float(s) for s in a1 ]
a2 = [ bohr_to_ang*float(s) for s in a2 ]
a3 = [ bohr_to_ang*float(s) for s in a3 ]
try:
trajectory_data['lattice_vectors_relax'].append([a1,a2,a3])
except KeyError:
trajectory_data['lattice_vectors_relax'] = [[a1,a2,a3]]
except Exception:
parsed_data['warnings'].append('Error while parsing relaxation cell parameters.')
elif 'ATOMIC_POSITIONS' in line:
try:
this_key = 'atomic_positions_relax'
#this_key_2 = 'atomic_species_name'
# the inizialization of tau prevent parsed_data to be associated
# to the pointer of the previous iteration
metric = line.split('(')[1].split(')')[0]
if metric not in ['alat','bohr','angstrom']:
raise QEOutputParsingError('Error while parsing atomic_positions:'
' units not supported.')
# TODO: check how to map the atoms in the original scheme
positions = []
#chem_symbols = []
for i in range(nat):
line2 = data_step[count+1+i].split()
tau = [float(s) for s in line2[1:4]]
#chem_symbol = str(line2[0]).rstrip()
if metric == 'alat':
tau = [ alat*float(s) for s in tau ]
elif metric == 'bohr':
tau = [ bohr_to_ang*float(s) for s in tau ]
positions.append(tau)
#chem_symbols.append(chem_symbol)
try:
trajectory_data[this_key].append(positions)
except KeyError:
trajectory_data[this_key] = [positions]
#trajectory_data[this_key_2] = chem_symbols # the symbols do not change during a run
except Exception:
parsed_data['warnings'].append('Error while parsing relaxation atomic positions.')
# NOTE: in the above, the chemical symbols are not those of AiiDA
# since the AiiDA structure is different. So, I assume now that the
# order of atoms is the same of the input atomic structure.
# Computed dipole correction in slab geometries.
# save dipole in debye units, only at last iteration of scf cycle
elif 'Computed dipole along edir' in line:
j = count+3
line2 = data_step[j]
try:
units = line2.split()[-1]
if default_dipole_units.lower() not in units.lower(): # only debye
raise QEOutputParsingError("Error parsing the dipole correction."
" Units {} are not supported.".format(units))
value = float(line2.split()[-2])
except IndexError: # on units
pass
# save only the last dipole correction
while 'Computed dipole along edir' not in line2:
j += 1
try:
line2 = data_step[j]
except IndexError: # The dipole is also written at the beginning of a new bfgs iteration
break
if 'End of self-consistent calculation' in line2:
try:
trajectory_data['dipole'].append( value )
except KeyError:
trajectory_data['dipole'] = [value]
parsed_data['dipole'+units_suffix] = default_dipole_units
break
elif ('convergence has been achieved in' in line
or 'convergence NOT achieved after' in line):
try:
scf_iterations = int(line.split("iterations")[0].split()[-1])
try:
trajectory_data['scf_iterations'].append(scf_iterations)
except KeyError:
trajectory_data['scf_iterations'] = [scf_iterations]
except Exception:
parsed_data['warnings'].append('Error while parsing scf iterations.')
elif 'End of self-consistent calculation' in line:
# parse energy threshold for diagonalization algorithm
try:
j = 0
while True:
j -= 1
line2 = data_step[count+j]
if 'ethr' in line2:
value = float(line2.split('=')[1].split(',')[0])
break
try:
trajectory_data['energy_threshold'].append(value)
except KeyError:
trajectory_data['energy_threshold'] = [value]
except Exception:
parsed_data['warnings'].append('Error while parsing ethr.')
# parse final magnetic moments, if present
try:
j = 0
while True:
j -= 1
line2 = data_step[count+j]
if 'Magnetic moment per site' in line2:
break
if 'iteration' in line2:
raise QEOutputParsingError
mag_moments = []
charges = []
while True:
j += 1
line2 = data_step[count+j]
if 'atom:' in line2:
mag_moments.append(float(line2.split('magn:')[1].split()[0]))
charges.append(float(line2.split('charge:')[1].split()[0]))
if len(mag_moments)==nat:
break
try:
trajectory_data['atomic_magnetic_moments'].append(mag_moments)
trajectory_data['atomic_charges'].append(charges)
except KeyError:
trajectory_data['atomic_magnetic_moments'] = [mag_moments]
trajectory_data['atomic_charges'] = [charges]
parsed_data['atomic_magnetic_moments'+units_suffix] = default_magnetization_units
parsed_data['atomic_charges'+units_suffix] = default_charge_units
except QEOutputParsingError:
pass
# grep energy and possibly, magnetization
elif '!' in line:
try:
for key in ['energy','energy_accuracy']:
if key not in trajectory_data:
trajectory_data[key] = []
En = float(line.split('=')[1].split('Ry')[0])*ry_to_ev
E_acc = float(data_step[count+2].split('<')[1].split('Ry')[0])*ry_to_ev
for key,value in [['energy',En],['energy_accuracy',E_acc]]:
trajectory_data[key].append(value)
parsed_data[key+units_suffix] = default_energy_units
# TODO: decide units for magnetization. now bohr mag/cell
j = 0
while True:
j+=1
line2 = data_step[count+j]
for string,key in [
['one-electron contribution','energy_one_electron'],
['hartree contribution','energy_hartree'],
['xc contribution','energy_xc'],
['ewald contribution','energy_ewald'],
['smearing contrib.','energy_smearing'],
['one-center paw contrib.','energy_one_center_paw'],
['est. exchange err','energy_est_exchange'],
['Fock energy','energy_fock'],
# Add also ENVIRON specific contribution to the total energy
['solvation energy','energy_solvation'],
['cavitation energy','energy_cavitation'],
['PV energy', 'energy_pv'],
['periodic energy correct.','energy_pbc_correction'],
['ionic charge energy','energy_ionic_charge'],
['external charges energy','energy_external_charges']
]:
if string in line2:
value = grep_energy_from_line(line2)
try:
trajectory_data[key].append(value)
except KeyError:
trajectory_data[key] = [value]
parsed_data[key+units_suffix] = default_energy_units
# magnetizations
if 'total magnetization' in line2:
this_m = line2.split('=')[1].split('Bohr')[0]
try: # magnetization might be a scalar
value = float(this_m)
except ValueError: # but can also be a three vector component in non-collinear calcs
value = [ float(i) for i in this_m.split() ]
try:
trajectory_data['total_magnetization'].append(value)
except KeyError:
trajectory_data['total_magnetization'] = [value]
parsed_data['total_magnetization'+units_suffix] = default_magnetization_units
elif 'absolute magnetization' in line2:
value=float(line2.split('=')[1].split('Bohr')[0])
try:
trajectory_data['absolute_magnetization'].append(value)
except KeyError:
trajectory_data['absolute_magnetization'] = [value]
parsed_data['absolute_magnetization'+units_suffix] = default_magnetization_units
# exit loop
elif 'convergence' in line2:
break
if vdw_correction:
j=0
while True:
j+=-1
line2 = data_step[count+j]
if 'Non-local correlation energy' in line2:
value = grep_energy_from_line(line2)
try:
trajectory_data['energy_vdw'].append(value)
except KeyError:
trajectory_data['energy_vdw'] = [value]
break
parsed_data['energy_vdw'+units_suffix] = default_energy_units
except Exception:
parsed_data['warnings'].append('Error while parsing for energy terms.')
elif 'the Fermi energy is' in line:
try:
value = float(line.split('is')[1].split('ev')[0])
try:
trajectory_data['fermi_energy'].append(value)
except KeyError:
trajectory_data['fermi_energy'] = [value]
parsed_data['fermi_energy'+units_suffix] = default_energy_units
except Exception:
parsed_data['warnings'].append('Error while parsing Fermi energy from the output file.')
elif 'Forces acting on atoms (Ry/au):' in line:
try:
forces = []
j = 0
while True:
j+=1
line2 = data_step[count+j]
if 'atom ' in line2:
line2 = line2.split('=')[1].split()
# CONVERT FORCES IN eV/Ang
vec = [ float(s)*ry_to_ev / \
bohr_to_ang for s in line2 ]
forces.append(vec)
if len(forces)==nat:
break
try:
trajectory_data['forces'].append(forces)
except KeyError:
trajectory_data['forces'] = [forces]
parsed_data['forces'+units_suffix] = default_force_units
except Exception:
parsed_data['warnings'].append('Error while parsing forces.')
# TODO: adding the parsing support for the decomposition of the forces
elif 'Total force =' in line:
try: # note that I can't check the units: not written in output!
value = float(line.split('=')[1].split('Total')[0])*ry_to_ev/bohr_to_ang
try:
trajectory_data['total_force'].append(value)
except KeyError:
trajectory_data['total_force'] = [value]
parsed_data['total_force'+units_suffix] = default_force_units
except Exception:
parsed_data['warnings'].append('Error while parsing total force.')
elif 'entering subroutine stress ...' in line:
try:
stress = []
for k in range (10+5*vdw_correction):
if "P=" in data_step[count+k+1]:
count2 = count+k+1
if '(Ry/bohr**3)' not in data_step[count2]:
raise QEOutputParsingError('Error while parsing stress: unexpected units.')
for k in range(3):
line2 = data_step[count2+k+1].split()
vec = [ float(s)*10**(-9)*ry_si/(bohr_si)**3 for s in line2[0:3] ]
stress.append(vec)
try:
trajectory_data['stress'].append(stress)
except KeyError:
trajectory_data['stress'] = [stress]
parsed_data['stress'+units_suffix] = default_stress_units
except Exception:
parsed_data['warnings'].append('Error while parsing stress tensor.')
return parsed_data, trajectory_data, critical_warnings.values()
[docs]def parse_QE_errors(lines,count,warnings):
"""
Parse QE errors messages (those appearing between some lines with
``%%%%%%%%``)
:param lines: list of strings, the output text file as read by readlines()
or as obtained by data.split('\\n') when data is the text file read by read()
:param count: the line at which we identified some ``%%%%%%%%``
:param warnings: the warnings already parsed in the file
:return messages: a list of QE error messages
"""
# PREVIOUS VERSION
# TODO: suppress it when we are sure the new one is working in all cases
# # find the indices of the lines with problems
# found_endpoint = False
# init_problem = count
# for count2,line2 in enumerate(lines[count+1:]):
# end_problem = count + count2 + 1
# if "%%%%%%%%%%%%" in line2:
# found_endpoint = True
# break
# if not found_endpoint:
# message = None
# else:
# # build a dictionary with the lines
# prob_list = lines[init_problem:end_problem+1]
# irred_list = list(set(prob_list))
# message = ""
# for v in prob_list:
# if v in irred_list:
# #irred_list.pop(v)
# message += irred_list.pop(irred_list.index(v)) + '\n'
# return [message]
# find the indices of the lines with problems
found_endpoint = False
init_problem = count
for count2,line2 in enumerate(lines[count+1:]):
end_problem = count + count2 + 1
if "%%%%%%%%%%%%" in line2:
found_endpoint = True
break
messages = []
if found_endpoint:
# build a dictionary with the lines
prob_list = lines[init_problem:end_problem+1]
irred_list = list(set(prob_list))
for v in prob_list:
if ( len(v)>0 and (v in irred_list and v not in warnings) ):
messages.append(irred_list.pop(irred_list.index(v)))
return messages