Source code for aiida.common.escaping

# -*- coding: utf-8 -*-
# Copyright (c), The AiiDA team. All rights reserved.                     #
# This file is part of the AiiDA code.                                    #
#                                                                         #
# The code is hosted on GitHub at #
# For further information on the license, see the LICENSE.txt file        #
# For further information please visit               #
"""Miscellaneous functions for escaping strings."""

from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import re

[docs]def escape_for_bash(str_to_escape): """ This function takes any string and escapes it in a way that bash will interpret it as a single string. Explanation: At the end, in the return statement, the string is put within single quotes. Therefore, the only thing that I have to escape in bash is the single quote character. To do this, I substitute every single quote ' with '"'"' which means: First single quote: exit from the enclosing single quotes Second, third and fourth character: "'" is a single quote character, escaped by double quotes Last single quote: reopen the single quote to continue the string Finally, note that for python I have to enclose the string '"'"' within triple quotes to make it work, getting finally: the complicated string found below. """ if str_to_escape is None: return '' escaped_quotes = str_to_escape.replace("'", """'"'"'""") return "'{}'".format(escaped_quotes)
# Mapping of "SQL" tokens into corresponding regex expressions SQL_TO_REGEX_TOKENS = [ # Remember in the strings below we have to escape backslashes as well for python... # so '\\\\' is actually a string with two backslashes, '\\' a string with a single backslash, ... ('\\\\', re.escape('\\')), # Double slash should be interpreted as a literal single backslash by regex ('\\%', re.escape('%')), # literal '\%' should be interpreted as literal % by regex ('\\_', re.escape('_')), # literal '\_' should be interpreted as literal _ by regex ('%', '.*'), ('_', '.'), ]
[docs]def escape_for_sql_like(string): """Function that escapes % or _ symbols provided by user SQL LIKE syntax summary: - ``%`` -> match any number of characters - ``_`` -> match exactly one character """ return string.replace('%', '\\%').replace('_', '\\_')
[docs]def get_regex_pattern_from_sql(sql_pattern): r""" Convert a string providing a pattern to match in SQL syntax into a string performing the same match as a regex. SQL LIKE syntax summary: - ``%`` -> match any number of characters - ``_`` -> match exactly one character Moreover, ``\`` is the escape character (by default), so: - ``\\`` -> single backslash - ``\%`` -> literal % symbol - ``\_`` -> literal _ symbol and moreover the string should begin at the beginning of the line and end at the end of the line. :param sql_pattern: the string with the pattern in SQL syntax :return: a string with the pattern in regex syntax """ def tokenizer(string, tokens_to_apply): """ Recursive function that tokenizes a string using the provided tokens :param string: the string to tokenize :param tokens_to_apply: the list of tokens still to process (in order: the first will be processed first) :return: a tokenized and escaped string for regex """ if tokens_to_apply: # We still have tokens to process # find the first occurrence of the first token passed in the list # note that the order of the tokens list is important, e.g. we need # to match first \% and then % first, sep, rest = string.partition(tokens_to_apply[0]) # There is indeed a separator: if sep: # at least one token was found; therefore I have to map tokens[sep] # to the corresponding regex expression (via the dictionary substitution) # Moreover, the 'rest' is not empty so I apply recursively `tokenizer`, # with ALL tokens passed (there could be more occurrences of tokens_to_apply[0]) # Instead, for the first part, we know that we found the FIRST occurrence of tokens_to_apply[0] # so I pass the list without the first element return tokenizer(first, tokens_to_apply=tokens_to_apply[1:] ) + dict(SQL_TO_REGEX_TOKENS)[sep] + tokenizer(rest, tokens_to_apply=tokens_to_apply) # Here sep is empty: it means also rest is empty, and we just # return (recursively) the tokenizer on the first part, avoiding # infinite loops return tokenizer(first, tokens_to_apply=tokens_to_apply[1:]) # There is no more token to process: we now have # a string that we know we want to consider literally, so # we just make sure it's escaped for a regex to avoid # that the user passed in weird characters that are valid # regex symbols like $ ^ [ ] etc return re.escape(string) return '^{}$'.format(tokenizer(sql_pattern, tokens_to_apply=[token_pair[0] for token_pair in SQL_TO_REGEX_TOKENS]))
[docs]def sql_string_match(string, pattern): """ Check if the string matches the provided pattern, specified using SQL syntax. See documentation of :py:func:`~aiida.common.escaping.get_regex_pattern_from_sql` for an explanation of the syntax. :param string: the string to check :param pattern: the SQL pattern :return: True if the string matches, False otherwise """ return bool(re.match(get_regex_pattern_from_sql(pattern), string))