Source code for aiida.manage.database.delete.nodes

# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved.                     #
# This file is part of the AiiDA code.                                    #
#                                                                         #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core #
# For further information on the license, see the LICENSE.txt file        #
# For further information please visit http://www.aiida.net               #
###########################################################################
"""Function to delete nodes from the database."""
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function

from six.moves import zip

import click

from aiida.cmdline.utils import echo


[docs]def delete_nodes(pks, follow_calls=False, follow_returns=False, dry_run=False, force=False, disable_checks=False, verbosity=0): """ Delete nodes by a list of pks :note: The script will also delete all children calculations generated from the specified nodes. :param pks: a list of the PKs of the nodes to delete :param bool follow_calls: Follow calls :param bool follow_returns: Follow returns. This is a very dangerous option, since anything returned by a workflow might have been used as input in many other calculations. Use with care, and never combine with force. :param bool dry_run: Do not delete, a dry run, with statistics printed according to verbosity levels. :param bool force: Do not ask for confirmation to delete nodes. :param bool disable_checks: If True, will not check whether calculations are losing created data or called instances. If checks are disabled, also logging is disabled. :param bool force: Do not ask for confirmation to delete nodes. :param int verbosity: The verbosity levels, 0 prints nothing, 1 prints just sums and total, 2 prints individual nodes. """ # pylint: disable=too-many-arguments,too-many-branches,too-many-locals,too-many-statements from aiida.backends.utils import delete_nodes_and_connections from aiida.common import exceptions from aiida.common.links import LinkType from aiida.orm import User, Node, ProcessNode, Data, QueryBuilder, load_node user_email = User.objects.get_default().email starting_pks = [] for pk in pks: try: load_node(pk) except exceptions.NotExistent: echo.echo_warning('warning: node with pk<{}> does not exist, skipping'.format(pk)) else: starting_pks.append(pk) if not starting_pks: # I prefer checking explicitly, an empty set might be problematic for the queries done below. if verbosity: echo.echo("Nothing to delete") return # The following code is just for the querying of downwards provenance. # Ideally, there should be a module to interface with, but this is the solution for now. # By only dealing with ids, and keeping track of what has been already # visited in the query, there's good performance and no infinite loops. link_types_to_follow = [LinkType.CREATE.value, LinkType.INPUT_CALC.value, LinkType.INPUT_WORK.value] if follow_calls: link_types_to_follow.append(LinkType.CALL_CALC.value) link_types_to_follow.append(LinkType.CALL_WORK.value) if follow_returns: link_types_to_follow.append(LinkType.RETURN.value) edge_filters = {'type': {'in': link_types_to_follow}} # Operational set always includes the recently (in the last iteration added) nodes. operational_set = set().union(set(starting_pks)) # Union to copy the set! pks_set_to_delete = set().union(set(starting_pks)) while operational_set: # new_pks_set are the the pks of all nodes that are connected to the operational node set # with the links specified. new_pks_set = set(i for i, in QueryBuilder().append(Node, filters={ 'id': { 'in': operational_set } }).append(Node, project='id', edge_filters=edge_filters).iterall()) # The operational set is only those pks that haven't been yet put into the pks_set_to_delete. operational_set = new_pks_set.difference(pks_set_to_delete) # I add these pks in the pks_set_to_delete with a union pks_set_to_delete = pks_set_to_delete.union(new_pks_set) if verbosity > 0: echo.echo("I {} delete {} node{}".format('would' if dry_run else 'will', len(pks_set_to_delete), 's' if len(pks_set_to_delete) > 1 else '')) if verbosity > 1: builder = QueryBuilder().append( Node, filters={'id': { 'in': pks_set_to_delete }}, project=('uuid', 'id', 'node_type', 'label')) echo.echo("The nodes I {} delete:".format('would' if dry_run else 'will')) for uuid, pk, type_string, label in builder.iterall(): try: short_type_string = type_string.split('.')[-2] except IndexError: short_type_string = type_string echo.echo(" {} {} {} {}".format(uuid, pk, short_type_string, label)) # Here I am checking whether I am deleting # A data instance without also deleting the creator, which brakes relationship between a calculation and its data # A calculation instance that was called, without also deleting the caller. if not disable_checks: link_types_to_follow = [LinkType.CALL_CALC.value, LinkType.CALL_WORK.value] called_qb = QueryBuilder() called_qb.append(ProcessNode, filters={'id': {'!in': pks_set_to_delete}}, project='id') called_qb.append( ProcessNode, project='node_type', edge_project='label', filters={'id': { 'in': pks_set_to_delete }}, edge_filters={'type': { 'in': link_types_to_follow }}) caller_to_called2delete = called_qb.all() if verbosity > 0 and caller_to_called2delete: calculation_pks_losing_called = set(next(zip(*caller_to_called2delete))) echo.echo("\n{} calculation{} {} lose at least one called instance".format( len(calculation_pks_losing_called), 's' if len(calculation_pks_losing_called) > 1 else '', 'would' if dry_run else 'will')) if verbosity > 1: echo.echo( "These are the calculations that {} lose a called instance:".format('would' if dry_run else 'will')) for calc_losing_called_pk in calculation_pks_losing_called: echo.echo(' ', load_node(calc_losing_called_pk)) created_qb = QueryBuilder() created_qb.append(ProcessNode, filters={'id': {'!in': pks_set_to_delete}}, project='id') created_qb.append( Data, project='node_type', edge_project='label', filters={'id': { 'in': pks_set_to_delete }}, edge_filters={'type': { '==': LinkType.CREATE.value }}) creator_to_created2delete = created_qb.all() if verbosity > 0 and creator_to_created2delete: calculation_pks_losing_created = set(next(zip(*creator_to_created2delete))) echo.echo("\n{} calculation{} {} lose at least one created data-instance".format( len(calculation_pks_losing_created), 's' if len(calculation_pks_losing_created) > 1 else '', 'would' if dry_run else 'will')) if verbosity > 1: echo.echo("These are the calculations that {} lose a created data-instance:".format( 'would' if dry_run else 'will')) for calc_losing_created_pk in calculation_pks_losing_created: echo.echo(' ', load_node(calc_losing_created_pk)) if dry_run: if verbosity > 0: echo.echo("\nThis was a dry run, exiting without deleting anything") return # Asking for user confirmation here if force: pass else: echo.echo_warning("YOU ARE ABOUT TO DELETE {} NODES! THIS CANNOT BE UNDONE!".format(len(pks_set_to_delete))) if not click.confirm("Shall I continue?"): echo.echo("Exiting without deleting") return # Recover the list of folders to delete before actually deleting the nodes. I will delete the folders only later, # so that if there is a problem during the deletion of the nodes in the DB, I don't delete the folders repositories = [load_node(pk)._repository for pk in pks_set_to_delete] # pylint: disable=protected-access delete_nodes_and_connections(pks_set_to_delete) if not disable_checks: # I pass now to the log the information for calculations losing created data or called instances for calc_pk, calc_type_string, link_label in caller_to_called2delete: calc = load_node(calc_pk) calc.logger.warning("User {} deleted " "an instance of type {} " "called with the label {} " "by this calculation".format(user_email, calc_type_string, link_label)) for calc_pk, data_type_string, link_label in creator_to_created2delete: calc = load_node(calc_pk) calc.logger.warning("User {} deleted " "an instance of type {} " "created with the label {} " "by this calculation".format(user_email, data_type_string, link_label)) # If we are here, we managed to delete the entries from the DB. # I can now delete the folders for repository in repositories: repository.erase(force=True)