Source code for aiida.common.utils

# Copyright (c), The AiiDA team. All rights reserved.                     #
# This file is part of the AiiDA code.                                    #
#                                                                         #
# The code is hosted on GitHub at #
# For further information on the license, see the LICENSE.txt file        #
# For further information please visit               #
"""Miscellaneous generic utility functions and classes."""

import filecmp
import inspect
import io
import os
import re
import sys
from datetime import datetime
from typing import Any, Dict
from uuid import UUID

from .lang import classproperty

[docs] def get_new_uuid() -> str: """Return a new UUID (typically to be used for new nodes).""" import uuid return str(uuid.uuid4())
[docs] def validate_uuid(given_uuid: str) -> bool: """A simple check for the UUID validity.""" try: parsed_uuid = UUID(given_uuid, version=4) except ValueError: # If not a valid UUID return False # Check if there was any kind of conversion of the hex during # the validation return str(parsed_uuid) == given_uuid
[docs] def validate_list_of_string_tuples(val, tuple_length): """Check that: 1. ``val`` is a list or tuple 2. each element of the list: a. is a list or tuple b. is of length equal to the parameter tuple_length c. each of the two elements is a string Return if valid, raise ValidationError if invalid """ from aiida.common.exceptions import ValidationError err_msg = ( 'the value must be a list (or tuple) ' 'of length-N list (or tuples), whose elements are strings; ' 'N={}'.format(tuple_length) ) if not isinstance(val, (list, tuple)): raise ValidationError(err_msg) for element in val: if ( not isinstance(element, (list, tuple)) or (len(element) != tuple_length) or not all(isinstance(s, str) for s in element) ): raise ValidationError(err_msg) return True
[docs] def get_unique_filename(filename, list_of_filenames): """Return a unique filename that can be added to the list_of_filenames. If filename is not in list_of_filenames, it simply returns the filename string itself. Otherwise, it appends a integer number to the filename (before the extension) until it finds a unique filename. :param filename: the filename to add :param list_of_filenames: the list of filenames to which filename should be added, without name duplicates :returns: Either filename or its modification, with a number appended between the name and the extension. """ if filename not in list_of_filenames: return filename basename, ext = os.path.splitext(filename) # Not optimized, but for the moment this should be fast enough append_int = 1 while True: new_filename = f'{basename:s}-{append_int:d}{ext:s}' if new_filename not in list_of_filenames: break append_int += 1 return new_filename
[docs] def str_timedelta(dt, max_num_fields=3, short=False, negative_to_zero=False): """Given a dt in seconds, return it in a HH:MM:SS format. :param dt: a TimeDelta object :param max_num_fields: maximum number of non-zero fields to show (for instance if the number of days is non-zero, shows only days, hours and minutes, but not seconds) :param short: if False, print always ``max_num_fields`` fields, even if they are zero. If True, do not print the first fields, if they are zero. :param negative_to_zero: if True, set dt = 0 if dt < 0. """ if max_num_fields <= 0: raise ValueError('max_num_fields must be > 0') s_tot = dt.total_seconds() # Important to get more than 1 day, and for # negative values. dt.seconds would give # wrong results in these cases, see # s_tot = int(s_tot) if negative_to_zero: s_tot = max(s_tot, 0) negative = s_tot < 0 s_tot = abs(s_tot) negative_string = ' in the future' if negative else ' ago' # For the moment stay away from months and years, difficult to get days, remainder = divmod(s_tot, 3600 * 24) hours, remainder = divmod(remainder, 3600) minutes, seconds = divmod(remainder, 60) all_fields = [(days, 'D'), (hours, 'h'), (minutes, 'm'), (seconds, 's')] fields = [] start_insert = False counter = 0 for idx, field in enumerate(all_fields): if field[0] != 0: start_insert = True if (len(all_fields) - idx) <= max_num_fields: start_insert = True if start_insert: if counter >= max_num_fields: break fields.append(field) counter += 1 if short: while len(fields) > 1: # at least one element has to remain if fields[0][0] != 0: break fields.pop(0) # remove first element # Join the fields raw_string = ':'.join(['{:02d}{}'.format(*f) for f in fields]) if raw_string.startswith('0'): raw_string = raw_string[1:] # Return the resulting string, appending a suitable string if the time # is negative return f'{raw_string}{negative_string}'
[docs] def get_class_string(obj): """Return the string identifying the class of the object (module + object name, joined by dots). It works both for classes and for class instances. """ if inspect.isclass(obj): return f'{obj.__module__}.{obj.__name__}' return f'{obj.__module__}.{obj.__class__.__name__}'
[docs] def get_object_from_string(class_string): """Given a string identifying an object (as returned by the get_class_string method) load and return the actual object. """ import importlib the_module, _, the_name = class_string.rpartition('.') return getattr(importlib.import_module(the_module), the_name)
[docs] def grouper(n, iterable): """Given an iterable, returns an iterable that returns tuples of groups of elements from iterable of length n, except the last one that has the required length to exaust iterable (i.e., there is no filling applied). :param n: length of each tuple (except the last one,that will have length <= n :param iterable: the iterable to divide in groups """ import itertools iterator = iter(iterable) while True: chunk = tuple(itertools.islice(iterator, n)) if not chunk: return yield chunk
[docs] class ArrayCounter: """A counter & a method that increments it and returns its value. It is used in various tests. """ seq = None
[docs] def __init__(self): self.seq = -1
[docs] def array_counter(self): self.seq += 1 return self.seq
[docs] def are_dir_trees_equal(dir1, dir2): """Compare two directories recursively. Files in each directory are assumed to be equal if their names and contents are equal. @param dir1: First directory path @param dir2: Second directory path @return: True if the directory trees are the same and there were no errors while accessing the directories or files, False otherwise. """ # Directory comparison dirs_cmp = filecmp.dircmp(dir1, dir2) if dirs_cmp.left_only or dirs_cmp.right_only or dirs_cmp.funny_files: return ( False, 'Left directory: {}, right directory: {}, files only ' 'in left directory: {}, files only in right directory: ' '{}, not comparable files: {}'.format( dir1, dir2, dirs_cmp.left_only, dirs_cmp.right_only, dirs_cmp.funny_files ), ) # If the directories contain the same files, compare the common files (_, mismatch, errors) = filecmp.cmpfiles(dir1, dir2, dirs_cmp.common_files, shallow=False) if mismatch: return (False, f"The following files in the directories {dir1} and {dir2} don't match: {mismatch}") if errors: return (False, f"The following files in the directories {dir1} and {dir2} aren't regular: {errors}") for common_dir in dirs_cmp.common_dirs: new_dir1 = os.path.join(dir1, common_dir) new_dir2 = os.path.join(dir2, common_dir) res, msg = are_dir_trees_equal(new_dir1, new_dir2) if not res: return False, msg return True, f'The given directories ({dir1} and {dir2}) are equal'
[docs] class Prettifier: """Class to manage prettifiers (typically for labels of kpoints in band plots) """
[docs] @classmethod def _prettify_label_pass(cls, label): """No-op prettifier, simply returns the same label :param label: a string to prettify """ return label
[docs] @classmethod def _prettify_label_agr(cls, label): """Prettifier for XMGrace :param label: a string to prettify """ label = ( label.replace('GAMMA', r'\xG\f{}') .replace('DELTA', r'\xD\f{}') .replace('LAMBDA', r'\xL\f{}') .replace('SIGMA', r'\xS\f{}') ) return re.sub(r'_(.?)', r'\\s\1\\N', label)
[docs] @classmethod def _prettify_label_agr_simple(cls, label): """Prettifier for XMGrace (for old label names) :param label: a string to prettify """ if label == 'G': return r'\xG' return re.sub(r'(\d+)', r'\\s\1\\N', label)
[docs] @classmethod def _prettify_label_gnuplot(cls, label): """Prettifier for Gnuplot :note: uses unicode, returns unicode strings (potentially, if needed) :param label: a string to prettify """ label = label.replace('GAMMA', 'Γ').replace('DELTA', 'Δ').replace('LAMBDA', 'Λ').replace('SIGMA', 'Σ') return re.sub(r'_(.?)', r'_{\1}', label)
[docs] @classmethod def _prettify_label_gnuplot_simple(cls, label): """Prettifier for Gnuplot (for old label names) :note: uses unicode, returns unicode strings (potentially, if needed) :param label: a string to prettify """ if label == 'G': return 'Γ' return re.sub(r'(\d+)', r'_{\1}', label)
[docs] @classmethod def _prettify_label_latex(cls, label): """Prettifier for matplotlib, using LaTeX syntax :param label: a string to prettify """ label = ( label.replace('GAMMA', r'$\Gamma$') .replace('DELTA', r'$\Delta$') .replace('LAMBDA', r'$\Lambda$') .replace('SIGMA', r'$\Sigma$') ) label = re.sub(r'_(.?)', r'$_{\1}$', label) # label += r"$_{\vphantom{0}}$" return label
[docs] @classmethod def _prettify_label_latex_simple(cls, label): """Prettifier for matplotlib, using LaTeX syntax (for old label names) :param label: a string to prettify """ if label == 'G': return r'$\Gamma$' return re.sub(r'(\d+)', r'$_{\1}$', label)
@classproperty def prettifiers(cls) -> Dict[str, Any]: # noqa: N805 """Property that returns a dictionary that for each string associates the function to prettify a label :return: a dictionary where keys are strings and values are functions """ return { 'agr_seekpath': cls._prettify_label_agr, 'agr_simple': cls._prettify_label_agr_simple, 'latex_simple': cls._prettify_label_latex_simple, 'latex_seekpath': cls._prettify_label_latex, 'gnuplot_simple': cls._prettify_label_gnuplot_simple, 'gnuplot_seekpath': cls._prettify_label_gnuplot, 'pass': cls._prettify_label_pass, }
[docs] @classmethod def get_prettifiers(cls): """Return a list of valid prettifier strings :return: a list of strings """ return sorted(cls.prettifiers.keys())
[docs] def __init__(self, format): """Create a class to pretttify strings of a given format :param format: a string with the format to use to prettify. Valid formats are obtained from self.prettifiers """ if format is None: format = 'pass' try: self._prettifier_f = self.prettifiers[format] except KeyError: raise ValueError(f"Unknown prettifier format {format}; valid formats: {', '.join(self.get_prettifiers())}")
[docs] def prettify(self, label): """Prettify a label using the format passed in the initializer :param label: the string to prettify :return: a prettified string """ return self._prettifier_f(label)
[docs] def prettify_labels(labels, format=None): """Prettify label for typesetting in various formats :param labels: a list of length-2 tuples, in the format(position, label) :param format: a string with the format for the prettifier (e.g. 'agr', 'matplotlib', ...) :return: the same list as labels, but with the second value possibly replaced with a prettified version that typesets nicely in the selected format """ prettifier = Prettifier(format) return [(pos, prettifier.prettify(label)) for pos, label in labels]
[docs] def join_labels(labels, join_symbol='|', threshold=1.0e-6): """Join labels with a joining symbol when they are very close :param labels: a list of length-2 tuples, in the format(position, label) :param join_symbol: the string to use to join different paths. By default, a pipe :param threshold: the threshold to decide if two float values are the same and should be joined :return: the same list as labels, but with the second value possibly replaced with strings joined when close enough """ if labels: new_labels = [list(labels[0])] # modify labels when in overlapping position j = 0 for i in range(1, len(labels)): if abs(labels[i][0] - labels[i - 1][0]) < threshold: new_labels[j][1] += join_symbol + labels[i][1] else: new_labels.append(list(labels[i])) j += 1 else: new_labels = [] return new_labels
[docs] def strip_prefix(full_string, prefix): """Strip the prefix from the given string and return it. If the prefix is not present the original string will be returned unaltered :param full_string: the string from which to remove the prefix :param prefix: the prefix to remove :return: the string with prefix removed """ if full_string.startswith(prefix): return full_string.rsplit(prefix)[1] return full_string
[docs] class Capturing: """This class captures stdout and returns it (as a list, split by lines). Note: if you raise a SystemExit, you have to catch it outside. E.g., in our tests, this works:: import sys with self.assertRaises(SystemExit): with Capturing() as output: sys.exit() But out of the testing environment, the code instead just exits. To use it, access the obj.stdout_lines, or just iterate over the object :param capture_stderr: if True, also captures sys.stderr. To access the lines, use obj.stderr_lines. If False, obj.stderr_lines is None. """
[docs] def __init__(self, capture_stderr=False): """Construct a new instance.""" self.stdout_lines = [] super().__init__() self._capture_stderr = capture_stderr if self._capture_stderr: self.stderr_lines = [] else: self.stderr_lines = None
[docs] def __enter__(self): """Enter the context where all output is captured.""" self._stdout = sys.stdout self._stringioout = io.StringIO() sys.stdout = self._stringioout if self._capture_stderr: self._stderr = sys.stderr self._stringioerr = io.StringIO() sys.stderr = self._stringioerr return self
[docs] def __exit__(self, *args): """Exit the context where all output is captured.""" self.stdout_lines.extend(self._stringioout.getvalue().splitlines()) sys.stdout = self._stdout del self._stringioout # free up some memory if self._capture_stderr: self.stderr_lines.extend(self._stringioerr.getvalue().splitlines()) sys.stderr = self._stderr del self._stringioerr # free up some memory
[docs] def __str__(self): return str(self.stdout_lines)
[docs] def __iter__(self): return iter(self.stdout_lines)
[docs] class ErrorAccumulator: """Allows to run a number of functions and collect all the errors they raise This allows to validate multiple things and tell the user about all the errors encountered at once. Works best if the individual functions do not depend on each other. Does not allow to trace the stack of each error, therefore do not use for debugging, but for semantical checking with user friendly error messages. """
[docs] def __init__(self, *error_cls): self.error_cls = error_cls self.errors = {k: [] for k in self.error_cls}
[docs] def run(self, function, *args, **kwargs): try: function(*args, **kwargs) except self.error_cls as err: self.errors[err.__class__].append(err)
[docs] def success(self): return bool(not any(self.errors.values()))
[docs] def result(self, raise_error=Exception): if raise_error: self.raise_errors(raise_error) return self.success(), self.errors
[docs] def raise_errors(self, raise_cls): if not self.success(): raise raise_cls(f'The following errors were encountered: {self.errors}')
[docs] class DatetimePrecision: """A simple class which stores a datetime object with its precision. No internal check is done (cause itis not possible). precision: 1 (only full date) 2 (date plus hour) 3 (date + hour + minute) 4 (dare + hour + minute +second) """
[docs] def __init__(self, dtobj, precision): """Constructor to check valid datetime object and precision""" if not isinstance(dtobj, datetime): raise TypeError('dtobj argument has to be a datetime object') if not isinstance(precision, int): raise TypeError('precision argument has to be an integer') self.dtobj = dtobj self.precision = precision