Source code for aiida.backends.djsite.queries

# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved.                     #
# This file is part of the AiiDA code.                                    #
#                                                                         #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core #
# For further information on the license, see the LICENSE.txt file        #
# For further information please visit http://www.aiida.net               #
###########################################################################
from aiida.backends.general.abstractqueries import AbstractQueryManager


[docs]class QueryManagerDjango(AbstractQueryManager):
[docs] def query_jobcalculations_by_computer_user_state( self, state, computer=None, user=None, only_computer_user_pairs=False, only_enabled=True, limit=None): # Here I am overriding the implementation using the QueryBuilder: """ Filter all calculations with a given state. Issue a warning if the state is not in the list of valid states. :param state: The state to be used to filter (should be a string among those defined in aiida.common.datastructures.calc_states) :type state: str :param computer: a Django DbComputer entry, or a Computer object, of a computer in the DbComputer table. A string for the hostname is also valid. :param user: a Django entry (or its pk) of a user in the DbUser table; if present, the results are restricted to calculations of that specific user :param bool only_computer_user_pairs: if False (default) return a queryset where each element is a suitable instance of Node (it should be an instance of Calculation, if everything goes right!) If True, return only a list of tuples, where each tuple is in the format ('dbcomputer__id', 'user__id') [where the IDs are the IDs of the respective tables] :return: a list of calculation objects matching the filters. """ # I assume that calc_states are strings. If this changes in the future, # update the filter below from dbattributes__tval to the correct field. from aiida.orm import Computer,User from aiida.common.exceptions import InputValidationError from aiida.orm.implementation.django.calculation.job import JobCalculation from aiida.common.datastructures import calc_states from aiida.backends.djsite.db.models import DbUser if state not in calc_states: raise InputValidationError("querying for calculation state='{}', but it " "is not a valid calculation state".format(state)) kwargs = {} if computer is not None: # I convert it from various type of inputs # (string, DbComputer, Computer) # to a DbComputer type kwargs['dbcomputer'] = Computer.get(computer).dbcomputer if user is not None: kwargs['user'] = user if only_enabled: kwargs['dbcomputer__enabled'] = True queryresults = JobCalculation.query( dbattributes__key='state', dbattributes__tval=state, **kwargs) if only_computer_user_pairs: computer_users_ids = queryresults.values_list( 'dbcomputer__id', 'user__id').distinct() computer_users = [] for computer_id, user_id in computer_users_ids: #return cls(dbcomputer=DbComputer.get_dbcomputer(computer))DbNode.objects.get(pk=pk).get_aiida_class() computer_users.append((Computer.get(computer_id), DbUser.objects.get(pk=user_id).get_aiida_class())) return computer_users elif limit is not None: return queryresults[:limit] else: return queryresults
[docs] def get_creation_statistics( self, user_email=None ): """ Return a dictionary with the statistics of node creation, summarized by day, optimized for the Django backend. :note: Days when no nodes were created are not present in the returned `ctime_by_day` dictionary. :param user_email: If None (default), return statistics for all users. If an email is specified, return only the statistics for the given user. :return: a dictionary as follows:: { "total": TOTAL_NUM_OF_NODES, "types": {TYPESTRING1: count, TYPESTRING2: count, ...}, "ctime_by_day": {'YYYY-MMM-DD': count, ...} where in `ctime_by_day` the key is a string in the format 'YYYY-MM-DD' and the value is an integer with the number of nodes created that day. """ import sqlalchemy as sa from aiida.backends.djsite.querybuilder_django import dummy_model # Get the session (uses internally aldjemy - so, sqlalchemy) also for the Djsite backend s = dummy_model.get_aldjemy_session() retdict = {} # Total number of nodes retdict["total"] = s.query(dummy_model.DbNode).count() # Nodes per type retdict["types"] = dict(s.query(dummy_model.DbNode.type.label('typestring'), sa.func.count(dummy_model.DbNode.id)).group_by('typestring').all()) # Nodes created per day stat = s.query(sa.func.date_trunc('day', dummy_model.DbNode.ctime).label('cday'), sa.func.count(dummy_model.DbNode.id)).group_by('cday').order_by('cday').all() ctime_by_day = {_[0].strftime('%Y-%m-%d'): _[1] for _ in stat} retdict["ctime_by_day"] = ctime_by_day return retdict
# Still not containing all dates # temporary fix only for DJANGO backend # Will be useless when the _join_ancestors method of the QueryBuilder # will be re-implemented without using the DbPath
[docs] def query_past_days(self, q_object, args): """ Subselect to filter data nodes by their age. :param q_object: a query object :param args: a namespace with parsed command line parameters. """ from aiida.utils import timezone from django.db.models import Q import datetime if args.past_days is not None: now = timezone.now() n_days_ago = now - datetime.timedelta(days=args.past_days) q_object.add(Q(ctime__gte=n_days_ago), Q.AND)
[docs] def query_group(self, q_object, args): """ Subselect to filter data nodes by their group. :param q_object: a query object :param args: a namespace with parsed command line parameters. """ from django.db.models import Q if args.group_name is not None: q_object.add(Q(dbgroups__name__in=args.group_name), Q.AND) if args.group_pk is not None: q_object.add(Q(dbgroups__pk__in=args.group_pk), Q.AND)
[docs] def get_bands_and_parents_structure(self, args): """ Returns bands and closest parent structure """ from collections import defaultdict from django.db.models import Q from aiida.common.utils import grouper from aiida.backends.djsite.db import models from aiida.backends.utils import get_automatic_user from aiida.orm.data.structure import (get_formula, get_symbols_string) from aiida.orm.data.array.bands import BandsData query_group_size = 100 q_object = None if args.all_users is False: q_object = Q(user=get_automatic_user()) else: q_object = Q() self.query_past_days(q_object, args) self.query_group(q_object, args) bands_list = BandsData.query(q_object).distinct().order_by('ctime') bands_list_data = bands_list.values_list('pk', 'label', 'ctime') # split data in chunks grouped_bands_list_data = grouper( query_group_size, [(_[0], _[1], _[2]) for _ in bands_list_data]) entry_list = [] for this_chunk in grouped_bands_list_data: # gather all banddata pks pks = [_[0] for _ in this_chunk] # get the closest structures (WITHOUT DbPath) q_object = Q(type='data.structure.StructureData.') structure_dict = get_closest_parents(pks,q_object, chunk_size=1) struc_pks = [structure_dict[pk] for pk in pks] # query for the attributes needed for the structure formula attr_query = Q(key__startswith='kinds') | Q(key__startswith='sites') attrs = models.DbAttribute.objects.filter(attr_query, dbnode__in=struc_pks).values_list( 'dbnode__pk', 'key', 'datatype', 'tval', 'fval', 'ival', 'bval', 'dval') results = defaultdict(dict) for attr in attrs: results[attr[0]][attr[1]] = {"datatype": attr[2], "tval": attr[3], "fval": attr[4], "ival": attr[5], "bval": attr[6], "dval": attr[7]} # organize all of it in a dictionary deser_data = {} for k in results: deser_data[k] = models.deserialize_attributes(results[k], sep=models.DbAttribute._sep) # prepare the printout for ((bid, blabel, bdate), struc_pk) in zip(this_chunk, struc_pks): if struc_pk is not None: # Exclude structures by the elements if args.element is not None: all_kinds = [k['symbols'] for k in deser_data[struc_pk]['kinds']] all_symbols = [item for sublist in all_kinds for item in sublist] if not any([s in args.element for s in all_symbols]): continue if args.element_only is not None: all_kinds = [k['symbols'] for k in deser_data[struc_pk]['kinds']] all_symbols = [item for sublist in all_kinds for item in sublist] if not all([s in all_symbols for s in args.element_only]): continue # build the formula symbol_dict = {k['name']: get_symbols_string(k['symbols'], k['weights']) for k in deser_data[struc_pk]['kinds']} try: symbol_list = [symbol_dict[s['kind_name']] for s in deser_data[struc_pk]['sites']] formula = get_formula(symbol_list, mode=args.formulamode) # If for some reason there is no kind with the name # referenced by the site except KeyError: formula = "<<UNKNOWN>>" # cycle if we imposed the filter on elements if args.element is not None or args.element_only is not None: continue else: formula = "<<UNKNOWN>>" entry_list.append([str(bid), str(formula), bdate.strftime('%d %b %Y'), blabel]) return entry_list
[docs] def get_all_parents(self, node_pks, return_values=['id']): """ Get all the parents of given nodes :param node_pks: one node pk or an iterable of node pks :return: a list of aiida objects with all the parents of the nodes """ from aiida.backends.djsite.db import models from aiida.common.links import LinkType try: the_node_pks = list(node_pks) except TypeError: the_node_pks = [node_pks] parents = models.DbNode.objects.none() q_inputs = models.DbNode.aiidaobjects.filter( outputs__pk__in=the_node_pks, output_links__type__in=(LinkType.CREATE.value, LinkType.INPUT.value)).distinct() while q_inputs.count() > 0: inputs = list(q_inputs) parents = q_inputs | parents.all() q_inputs = models.DbNode.aiidaobjects.filter( outputs__in=inputs, output_links__type__in=(LinkType.CREATE.value, LinkType.INPUT.value)).distinct() return parents.values_list(*return_values)
[docs]def get_closest_parents(pks,*args,**kwargs): """ Get the closest parents dbnodes of a set of nodes. :param pks: one pk or an iterable of pks of nodes :param chunk_size: we chunk the pks into groups of this size, to optimize the speed (default=50) :param print_progress: print the the progression if True (default=False). :param args: additional query parameters :param kwargs: additional query parameters :returns: a dictionary of the form pk1: pk of closest parent of node with pk1, pk2: pk of closest parent of node with pk2 .. note:: It works also if pks is a list of nodes rather than their pks .. todo:: find a way to always get a parent (when there is one) from each pk. Now, when the same parent has several children in pks, only one of them is kept. This is a BUG, related to the use of a dictionary (children_dict, see below...). For now a work around is to use chunk_size=1. """ from aiida.orm import Node from aiida.backends.djsite.db import models from copy import deepcopy from aiida.common.utils import grouper try: the_pks = list(pks) except TypeError: the_pks = list(set([pks])) chunk_size = kwargs.pop('chunk_size',50) print_progress = kwargs.pop('print_progress',False) result_dict = {} all_chunk_pks = grouper(chunk_size,the_pks) if print_progress: print "Chunk size:",chunk_size for i,chunk_pks in enumerate(all_chunk_pks): if print_progress: print "Dealing with chunk #",i result_chunk_dict = {} q_pks = Node.query(pk__in=chunk_pks).values_list('pk',flat=True) # Now I am looking for parents (depth=0) of the nodes in the chunk: q_inputs = models.DbNode.objects.filter(outputs__pk__in=q_pks).distinct() depth = -1 # to be consistent with the DbPath depth (=0 for direct inputs) children_dict = dict([(k,v) for k,v in q_inputs.values_list('pk','outputs__pk') if v in q_pks]) # While I haven't found a closest ancestor for every member of chunk_pks: while q_inputs.count() > 0 and len(result_chunk_dict)<len(chunk_pks): depth += 1 q = q_inputs.filter(*args, **kwargs) if q.count() > 0: result_chunk_dict.update(dict([(children_dict[k],k) for k in q.values_list('pk',flat=True) if children_dict[k] not in result_chunk_dict])) inputs = list(q_inputs.values_list('pk',flat=True)) q_inputs = models.DbNode.objects.filter(outputs__pk__in=inputs).distinct() q_inputs_dict = dict([(k,children_dict[v]) for k,v in q_inputs.values_list('pk','outputs__pk') if v in inputs]) children_dict = deepcopy(q_inputs_dict) result_dict.update(result_chunk_dict) return result_dict