Source code for aiida.orm.data

# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved.                     #
# This file is part of the AiiDA code.                                    #
#                                                                         #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core #
# For further information on the license, see the LICENSE.txt file        #
# For further information please visit http://www.aiida.net               #
###########################################################################
from aiida.orm.node import Node
from aiida.common.links import LinkType
from aiida.common.lang import override
from aiida.common.exceptions import ModificationNotAllowed



[docs]class Data(Node): """ This class is base class for all data objects. Specifications of the Data class: AiiDA Data objects are subclasses of Node and should have Multiple inheritance must be suppoted, i.e. Data should have methods for querying and be able to inherit other library objects such as ASE for structures. Architecture note: The code plugin is responsible for converting a raw data object produced by code to AiiDA standard object format. The data object then validates itself according to its method. This is done independently in order to allow cross-validation of plugins. """ _source_attributes = ['db_name', 'db_uri', 'uri', 'id', 'version', 'extras', 'source_md5', 'description', 'license'] # Replace this with a dictionary in each subclass that, given a file # extension, returns the corresponding fileformat string. # # This is used in the self.export() method. # By default, if not found here, # The fileformat string is assumed to match the extension. # Example: {'dat': 'dat_multicolumn'} _custom_export_format_replacements = {} @property def source(self): """ Gets the dictionary describing the source of Data object. Possible fields: * **db_name**: name of the source database. * **db_uri**: URI of the source database. * **uri**: URI of the object's source. Should be a permanent link. * **id**: object's source identifier in the source database. * **version**: version of the object's source. * **extras**: a dictionary with other fields for source description. * **source_md5**: MD5 checksum of object's source. * **description**: human-readable free form description of the object's source. * **license**: a string with a type of license. .. note:: some limitations for setting the data source exist, see ``_validate`` method. :return: dictionary describing the source of Data object. """ return self.get_attr('source', None) @source.setter def source(self, source): """ Sets the dictionary describing the source of Data object. :raise KeyError: if dictionary contains unknown field. :raise ValueError: if supplied source description is not a dictionary. """ if not isinstance(source, dict): raise ValueError("Source must be supplied as a dictionary") unknown_attrs = list(set(source.keys()) - set(self._source_attributes)) if unknown_attrs: raise KeyError("Unknown source parameters: " "{}".format(", ".join(unknown_attrs))) self._set_attr('source', source)
[docs] def set_source(self, source): """ Sets the dictionary describing the source of Data object. """ self.source = source
@override
[docs] def _set_attr(self, key, value): """ Set a new attribute to the Node (in the DbAttribute table). :param str key: key name :param value: its value :raise ModificationNotAllowed: if such attribute cannot be added (e.g. because the node was already stored) :raise ValidationError: if the key is not valid (e.g. it contains the separator symbol). """ if self.is_stored: raise ModificationNotAllowed( "Cannot change the attributes of a stored data node.") super(Data, self)._set_attr(key, value)
@override
[docs] def _del_attr(self, key): """ Delete an attribute. :param key: attribute to delete. :raise AttributeError: if key does not exist. :raise ModificationNotAllowed: if the Node was already stored. """ if self.is_stored: raise ModificationNotAllowed( "Cannot delete the attributes of a stored data node.") super(Data, self)._del_attr(key)
@override def add_link_from(self, src, label=None, link_type=LinkType.UNSPECIFIED): from aiida.orm.calculation import Calculation if link_type is LinkType.CREATE and \ len(self.get_inputs(link_type=LinkType.CREATE)) > 0: raise ValueError("At most one CREATE node can enter a data node") if not isinstance(src, Calculation): raise ValueError( "Links entering a data object can only be of type calculation") return super(Data, self).add_link_from(src, label, link_type) @override
[docs] def _linking_as_output(self, dest, link_type): """ Raise a ValueError if a link from self to dest is not allowed. An output of a data can only be a calculation """ from aiida.orm.calculation import Calculation if not isinstance(dest, Calculation): raise ValueError( "The output of a data node can only be a calculation") return super(Data, self)._linking_as_output(dest, link_type)
@override
[docs] def _exportstring(self, fileformat, main_file_name="", **kwargs): """ Converts a Data object to other text format. :param fileformat: a string (the extension) to describe the file format. :param main_file_name: empty by default, contains the (full) path and filename of the main file, if any. This is not used directly, but is used to infer useful unique names for the additional files, if any. For instance, if the main file for gnuplot is '../myplot.gnu', the plugin could decide to store the dat file under '../myplot_data.dat'. It is up to the plugin to properly deal with this filename (or ignore it, if not relevant, e.g. if no additional files need to be created) :param kwargs: any other parameter is passed down to the specific plugin :returns: a tuple of length 2. The first element is the content of the otuput file. The second is a dictionary (possibly empty) in the format {filename: filecontent} for any additional file that should be produced. """ exporters = self._get_exporters() try: func = exporters[fileformat] except KeyError: if len(exporters.keys()) > 0: raise ValueError("The format {} is not implemented for {}. " "Currently implemented are: {}.".format( fileformat, self.__class__.__name__, ",".join(exporters.keys()))) else: raise ValueError("The format {} is not implemented for {}. " "No formats are implemented yet.".format( fileformat, self.__class__.__name__)) return func(main_file_name=main_file_name, **kwargs)
@override
[docs] def export(self, path, fileformat=None, overwrite=False, **kwargs): """ Save a Data object to a file. :param fname: string with file name. Can be an absolute or relative path. :param fileformat: kind of format to use for the export. If not present, it will try to use the extension of the file name. :param overwrite: if set to True, overwrites file found at path. Default=False :param kwargs: additional parameters to be passed to the _exportstring method :return: the list of files created """ import os if not path: raise ValueError("Path not recognized") if os.path.exists(path) and not overwrite: raise OSError("A file was already found at {}".format(path)) if fileformat is None: extension = os.path.splitext(path)[1] if extension.startswith(os.path.extsep): extension = extension[len(os.path.extsep):] if not extension: raise ValueError("Cannot recognized the fileformat from the " "extension") # Replace the fileformat using the replacements specified in the # _custom_export_format_replacements dictionary. If not found there, # by default assume the fileformat string is identical to the extension fileformat = self._custom_export_format_replacements.get(extension, extension) retlist = [] filetext, extra_files = self._exportstring( fileformat, main_file_name=path, **kwargs) if not overwrite: for fname in extra_files: if os.path.exists(fname): raise OSError("The file {} already exists, stopping.".format( fname)) if os.path.exists(path): raise OSError("The file {} already exists, stopping.".format( path)) for additional_fname, additional_fcontent in extra_files.iteritems(): retlist.append(additional_fname) with open(additional_fname, 'wb') as f: f.write(additional_fcontent) #.encode('utf-8')) # This is up to each specific plugin retlist.append(path) with open(path, 'wb') as f: f.write(filetext) return retlist
[docs] def _get_exporters(self): """ Get all implemented export formats. The convention is to find all _prepare_... methods. Returns a dictionary of method_name: method_function """ # NOTE: To add support for a new format, write a new function called as # _prepare_"" with the name of the new format exporter_prefix = '_prepare_' valid_format_names = self.get_export_formats() valid_formats = {k: getattr(self, exporter_prefix + k) for k in valid_format_names} return valid_formats
@classmethod
[docs] def get_export_formats(cls): """ Get the list of valid export format strings :return: a list of valid formats """ exporter_prefix = '_prepare_' method_names = dir(cls) # get list of class methods names valid_format_names = [i[len(exporter_prefix):] for i in method_names if i.startswith(exporter_prefix)] # filter them return sorted(valid_format_names)
[docs] def importstring(self, inputstring, fileformat, **kwargs): """ Converts a Data object to other text format. :param fileformat: a string (the extension) to describe the file format. :returns: a string with the structure description. """ importers = self._get_importers() try: func = importers[fileformat] except KeyError: if len(importers.keys()) > 0: raise ValueError("The format {} is not implemented for {}. " "Currently implemented are: {}.".format( fileformat, self.__class__.__name__, ",".join(importers.keys()))) else: raise ValueError("The format {} is not implemented for {}. " "No formats are implemented yet.".format( fileformat, self.__class__.__name__)) # func is bound to self by getattr in _get_importers() func(inputstring, **kwargs)
[docs] def importfile(self, fname, fileformat=None): """ Populate a Data object from a file. :param fname: string with file name. Can be an absolute or relative path. :param fileformat: kind of format to use for the export. If not present, it will try to use the extension of the file name. """ if fileformat is None: fileformat = fname.split('.')[-1] with open(fname, 'r') as f: # reads in cwd, if fname is not absolute self.importstring(f.read(), fileformat)
[docs] def _get_importers(self): """ Get all implemented import formats. The convention is to find all _parse_... methods. Returns a list of strings. """ # NOTE: To add support for a new format, write a new function called as # _parse_"" with the name of the new format importer_prefix = '_parse_' method_names = dir(self) # get list of class methods names valid_format_names = [i[len(importer_prefix):] for i in method_names if i.startswith(importer_prefix)] # filter them valid_formats = {k: getattr(self, importer_prefix + k) for k in valid_format_names} return valid_formats
[docs] def convert(self, object_format=None, *args): """ Convert the AiiDA StructureData into another python object :param object_format: Specify the output format """ if object_format is None: raise ValueError("object_format must be provided") if not isinstance(object_format, basestring): raise ValueError('object_format should be a string') converters = self._get_converters() try: func = converters[object_format] except KeyError: if len(converters.keys()) > 0: raise ValueError( "The format {} is not implemented for {}. " "Currently implemented are: {}.".format( object_format, self.__class__.__name__, ",".join(converters.keys()))) else: raise ValueError("The format {} is not implemented for {}. " "No formats are implemented yet.".format( object_format, self.__class__.__name__)) return func(*args)
[docs] def _get_converters(self): """ Get all implemented converter formats. The convention is to find all _get_object_... methods. Returns a list of strings. """ # NOTE: To add support for a new format, write a new function called as # _prepare_"" with the name of the new format exporter_prefix = '_get_object_' method_names = dir(self) # get list of class methods names valid_format_names = [i[len(exporter_prefix):] for i in method_names if i.startswith(exporter_prefix)] # filter them valid_formats = {k: getattr(self, exporter_prefix + k) for k in valid_format_names} return valid_formats
[docs] def _validate(self): """ Perform validation of the Data object. .. note:: validation of data source checks license and requires attribution to be provided in field 'description' of source in the case of any CC-BY* license. If such requirement is too strict, one can remove/comment it out. """ super(Data, self)._validate()
## Validation of ``source`` is commented out due to Issue #9 ## (https://bitbucket.org/epfl_theos/aiida_epfl/issues/9/) ## ## if self.source is not None and \ ## self.source.get('license', None) and \ ## self.source['license'].startswith('CC-BY') and \ ## self.source.get('description', None) is None: ## raise ValidationError("License of the object ({}) requires " ## "attribution, while none is given in the " ## "description".format(self.source['license']))