Source code for aiida.tools.dbimporters.plugins.cod

# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved.                     #
# This file is part of the AiiDA code.                                    #
#                                                                         #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file        #
# For further information please visit http://www.aiida.net               #
###########################################################################
# pylint: disable=no-self-use
""""Implementation of `DbImporter` for the COD database."""
from aiida.tools.dbimporters.baseclasses import CifEntry, DbImporter, DbSearchResults


[docs]class CodDbImporter(DbImporter): """ Database importer for Crystallography Open Database. """
[docs] def _int_clause(self, key, alias, values): """ Returns SQL query predicate for querying integer fields. """ for value in values: if not isinstance(value, int) and not isinstance(value, str): raise ValueError(f"incorrect value for keyword '{alias}' only integers and strings are accepted") return f"{key} IN ({', '.join(str(int(i)) for i in values)})"
[docs] def _str_exact_clause(self, key, alias, values): """ Returns SQL query predicate for querying string fields. """ clause_parts = [] for value in values: if not isinstance(value, int) and not isinstance(value, str): raise ValueError(f"incorrect value for keyword '{alias}' only integers and strings are accepted") if isinstance(value, int): value = str(value) clause_parts.append(f"'{value}'") return f"{key} IN ({', '.join(clause_parts)})"
[docs] def _str_exact_or_none_clause(self, key, alias, values): """ Returns SQL query predicate for querying string fields, allowing to use Python's "None" in addition. """ if None in values: values_now = [] for value in values: if value is not None: values_now.append(value) if values_now: clause = self._str_exact_clause(key, alias, values_now) return f'{clause} OR {key} IS NULL' return f'{key} IS NULL' return self._str_exact_clause(key, alias, values)
[docs] def _formula_clause(self, key, alias, values): """ Returns SQL query predicate for querying formula fields. """ for value in values: if not isinstance(value, str): raise ValueError(f"incorrect value for keyword '{alias}' only strings are accepted") return self._str_exact_clause(key, alias, [f'- {f} -' for f in values])
[docs] def _str_fuzzy_clause(self, key, alias, values): """ Returns SQL query predicate for fuzzy querying of string fields. """ clause_parts = [] for value in values: if not isinstance(value, int) and not isinstance(value, str): raise ValueError(f"incorrect value for keyword '{alias}' only integers and strings are accepted") if isinstance(value, int): value = str(value) clause_parts.append(f"{key} LIKE '%{value}%'") return ' OR '.join(clause_parts)
[docs] def _composition_clause(self, _, alias, values): """ Returns SQL query predicate for querying elements in formula fields. """ clause_parts = [] for value in values: if not isinstance(value, str): raise ValueError(f"incorrect value for keyword '{alias}' only strings are accepted") clause_parts.append(f"formula REGEXP ' {value}[0-9 ]'") return ' AND '.join(clause_parts)
[docs] def _double_clause(self, key, alias, values, precision): """ Returns SQL query predicate for querying double-valued fields. """ for value in values: if not isinstance(value, int) and not isinstance(value, float): raise ValueError(f"incorrect value for keyword '{alias}' only integers and floats are accepted") return ' OR '.join(f'{key} BETWEEN {d - precision} AND {d + precision}' for d in values)
length_precision = 0.001 angle_precision = 0.001 volume_precision = 0.001 temperature_precision = 0.001 pressure_precision = 1
[docs] def _length_clause(self, key, alias, values): """ Returns SQL query predicate for querying lattice vector lengths. """ return self._double_clause(key, alias, values, self.length_precision)
[docs] def _angle_clause(self, key, alias, values): """ Returns SQL query predicate for querying lattice angles. """ return self._double_clause(key, alias, values, self.angle_precision)
[docs] def _volume_clause(self, key, alias, values): """ Returns SQL query predicate for querying unit cell volume. """ return self._double_clause(key, alias, values, self.volume_precision)
[docs] def _temperature_clause(self, key, alias, values): """ Returns SQL query predicate for querying temperature. """ return self._double_clause(key, alias, values, self.temperature_precision)
[docs] def _pressure_clause(self, key, alias, values): """ Returns SQL query predicate for querying pressure. """ return self._double_clause(key, alias, values, self.pressure_precision)
_keywords = { 'id': ['file', _int_clause], 'element': ['element', _composition_clause], 'number_of_elements': ['nel', _int_clause], 'mineral_name': ['mineral', _str_fuzzy_clause], 'chemical_name': ['chemname', _str_fuzzy_clause], 'formula': ['formula', _formula_clause], 'volume': ['vol', _volume_clause], 'spacegroup': ['sg', _str_exact_clause], 'spacegroup_hall': ['sgHall', _str_exact_clause], 'a': ['a', _length_clause], 'b': ['b', _length_clause], 'c': ['c', _length_clause], 'alpha': ['alpha', _angle_clause], 'beta': ['beta', _angle_clause], 'gamma': ['gamma', _angle_clause], 'z': ['Z', _int_clause], 'measurement_temp': ['celltemp', _temperature_clause], 'diffraction_temp': ['diffrtemp', _temperature_clause], 'measurement_pressure': ['cellpressure', _pressure_clause], 'diffraction_pressure': ['diffrpressure', _pressure_clause], 'authors': ['authors', _str_fuzzy_clause], 'journal': ['journal', _str_fuzzy_clause], 'title': ['title', _str_fuzzy_clause], 'year': ['year', _int_clause], 'journal_volume': ['volume', _int_clause], 'journal_issue': ['issue', _str_exact_clause], 'first_page': ['firstpage', _str_exact_clause], 'last_page': ['lastpage', _str_exact_clause], 'doi': ['doi', _str_exact_clause], 'determination_method': ['method', _str_exact_or_none_clause] }
[docs] def __init__(self, **kwargs): self._db = None self._cursor = None self._db_parameters = {'host': 'www.crystallography.net', 'user': 'cod_reader', 'passwd': '', 'db': 'cod'} self.setup_db(**kwargs)
[docs] def query_sql(self, **kwargs): """ Forms a SQL query for querying the COD database using ``keyword = value`` pairs, specified in ``kwargs``. :return: string containing a SQL statement. """ sql_parts = ["(status IS NULL OR status != 'retracted')"] for key in sorted(self._keywords.keys()): if key in kwargs: values = kwargs.pop(key) if not isinstance(values, list): values = [values] sql_parts.append(f'({self._keywords[key][1](self, self._keywords[key][0], key, values)})') if kwargs: raise NotImplementedError(f"following keyword(s) are not implemented: {', '.join(kwargs.keys())}") return f"SELECT file, svnrevision FROM data WHERE {' AND '.join(sql_parts)}"
[docs] def query(self, **kwargs): """ Performs a query on the COD database using ``keyword = value`` pairs, specified in ``kwargs``. :return: an instance of :py:class:`aiida.tools.dbimporters.plugins.cod.CodSearchResults`. """ query_statement = self.query_sql(**kwargs) self._connect_db() results = [] try: self._cursor.execute(query_statement) self._db.commit() for row in self._cursor.fetchall(): results.append({'id': str(row[0]), 'svnrevision': str(row[1])}) finally: self._disconnect_db() return CodSearchResults(results)
[docs] def setup_db(self, **kwargs): """ Changes the database connection details. """ for key in self._db_parameters: if key in kwargs: self._db_parameters[key] = kwargs.pop(key) if len(kwargs.keys()) > 0: raise NotImplementedError( "unknown database connection parameter(s): '" + "', '".join(kwargs.keys()) + "', available parameters: '" + "', '".join(self._db_parameters.keys()) + "'" )
[docs] def get_supported_keywords(self): """ Returns the list of all supported query keywords. :return: list of strings """ return self._keywords.keys()
[docs] def _connect_db(self): """ Connects to the MySQL database for performing searches. """ try: import MySQLdb except ImportError: import pymysql as MySQLdb self._db = MySQLdb.connect( host=self._db_parameters['host'], user=self._db_parameters['user'], passwd=self._db_parameters['passwd'], db=self._db_parameters['db'] ) self._cursor = self._db.cursor()
[docs] def _disconnect_db(self): """ Closes connection to the MySQL database. """ self._db.close()
[docs]class CodSearchResults(DbSearchResults): # pylint: disable=abstract-method """ Results of the search, performed on COD. """ _base_url = 'http://www.crystallography.net/cod/'
[docs] def __init__(self, results): super().__init__(results) self._return_class = CodEntry
[docs] def __len__(self): return len(self._results)
[docs] def _get_source_dict(self, result_dict): """ Returns a dictionary, which is passed as kwargs to the created DbEntry instance, describing the source of the entry. :param result_dict: dictionary, describing an entry in the results. """ source_dict = {'id': result_dict['id']} if 'svnrevision' in result_dict and \ result_dict['svnrevision'] is not None: source_dict['version'] = result_dict['svnrevision'] return source_dict
[docs] def _get_url(self, result_dict): """ Returns an URL of an entry CIF file. :param result_dict: dictionary, describing an entry in the results. """ url = f"{self._base_url + result_dict['id']}.cif" if 'svnrevision' in result_dict and \ result_dict['svnrevision'] is not None: return f"{url}@{result_dict['svnrevision']}" return url
[docs]class CodEntry(CifEntry): # pylint: disable=abstract-method """ Represents an entry from COD. """ _license = 'CC0'
[docs] def __init__( self, uri, db_name='Crystallography Open Database', db_uri='http://www.crystallography.net/cod', **kwargs ): """ Creates an instance of :py:class:`aiida.tools.dbimporters.plugins.cod.CodEntry`, related to the supplied URI. """ super().__init__(db_name=db_name, db_uri=db_uri, uri=uri, **kwargs)