From e06f8a485122d96b9b25dbf3b313a223701bd5f5 Mon Sep 17 00:00:00 2001 From: Lorenz Herzberger <64837895+LaurentDeMontBlanc@users.noreply.github.com> Date: Tue, 21 Jun 2022 10:52:20 +0200 Subject: [PATCH] Planetmint tarantool (#152) * added indexed_pattern_search to basic lua, implemented text_search Signed-off-by: Lorenz Herzberger * adjusted some queries, wip print statements to be removed Signed-off-by: Lorenz Herzberger * fixed get_metadata Signed-off-by: Lorenz Herzberger * added some prints and simplified code Signed-off-by: Lorenz Herzberger * fixed __asset_check Signed-off-by: Lorenz Herzberger * removed print statements Signed-off-by: Lorenz Herzberger * added limit to text_Search Signed-off-by: Lorenz Herzberger --- planetmint/backend/tarantool/basic.lua | 77 +++++++++++++++++++ planetmint/backend/tarantool/query.py | 69 +++++++---------- planetmint/backend/tarantool/schema.py | 10 ++- .../backend/tarantool/transaction/tools.py | 9 ++- planetmint/web/views/metadata.py | 2 +- 5 files changed, 118 insertions(+), 49 deletions(-) diff --git a/planetmint/backend/tarantool/basic.lua b/planetmint/backend/tarantool/basic.lua index bdea22b..fcc46eb 100644 --- a/planetmint/backend/tarantool/basic.lua +++ b/planetmint/backend/tarantool/basic.lua @@ -1 +1,78 @@ box.cfg{listen = 3303} + +function indexed_pattern_search(space_name, field_no, pattern) + if (box.space[space_name] == nil) then + print("Error: Failed to find the specified space") + return nil + end + local index_no = -1 + for i=0,box.schema.INDEX_MAX,1 do + if (box.space[space_name].index[i] == nil) then break end + if (box.space[space_name].index[i].type == "TREE" + and box.space[space_name].index[i].parts[1].fieldno == field_no + and (box.space[space_name].index[i].parts[1].type == "scalar" + or box.space[space_name].index[i].parts[1].type == "string")) then + index_no = i + break + end + end + if (index_no == -1) then + print("Error: Failed to find an appropriate index") + return nil + end + local index_search_key = "" + local index_search_key_length = 0 + local last_character = "" + local c = "" + local c2 = "" + for i=1,string.len(pattern),1 do + c = string.sub(pattern, i, i) + if (last_character ~= "%") then + if (c == '^' or c == "$" or c == "(" or c == ")" or c == "." + or c == "[" or c == "]" or c == "*" or c == "+" + or c == "-" or c == "?") then + break + end + if (c == "%") then + c2 = string.sub(pattern, i + 1, i + 1) + if (string.match(c2, "%p") == nil) then break end + index_search_key = index_search_key .. c2 + else + index_search_key = index_search_key .. c + end + end + last_character = c + end + index_search_key_length = string.len(index_search_key) + local result_set = {} + local number_of_tuples_in_result_set = 0 + local previous_tuple_field = "" + while true do + local number_of_tuples_since_last_yield = 0 + local is_time_for_a_yield = false + for _,tuple in box.space[space_name].index[index_no]: + pairs(index_search_key,{iterator = box.index.GE}) do + if (string.sub(tuple[field_no], 1, index_search_key_length) + > index_search_key) then + break + end + number_of_tuples_since_last_yield = number_of_tuples_since_last_yield + 1 + if (number_of_tuples_since_last_yield >= 10 + and tuple[field_no] ~= previous_tuple_field) then + index_search_key = tuple[field_no] + is_time_for_a_yield = true + break + end + previous_tuple_field = tuple[field_no] + if (string.match(tuple[field_no], pattern) ~= nil) then + number_of_tuples_in_result_set = number_of_tuples_in_result_set + 1 + result_set[number_of_tuples_in_result_set] = tuple + end + end + if (is_time_for_a_yield ~= true) then + break + end + require('fiber').yield() + end + return result_set +end \ No newline at end of file diff --git a/planetmint/backend/tarantool/query.py b/planetmint/backend/tarantool/query.py index 6a1b5b0..784a17c 100644 --- a/planetmint/backend/tarantool/query.py +++ b/planetmint/backend/tarantool/query.py @@ -9,6 +9,7 @@ from hashlib import sha256 from operator import itemgetter import tarantool.error +import json from planetmint.backend import query from planetmint.backend.utils import module_dispatch_registrar @@ -109,7 +110,7 @@ def store_metadatas(connection, metadata: list): for meta in metadata: connection.run( connection.space("meta_data").insert( - (meta["id"], meta["data"] if not "metadata" in meta else meta["metadata"])) + (meta["id"], json.dumps(meta["data"] if not "metadata" in meta else meta["metadata"]))) ) @@ -122,33 +123,33 @@ def get_metadata(connection, transaction_ids: list): ) if metadata is not None: if len(metadata) > 0: + metadata[0] = list(metadata[0]) + metadata[0][1] = json.loads(metadata[0][1]) + metadata[0] = tuple(metadata[0]) _returned_data.append(metadata) - return _returned_data if len(_returned_data) > 0 else None + return _returned_data @register_query(TarantoolDBConnection) def store_asset(connection, asset): - convert = lambda obj: obj if isinstance(obj, tuple) else (obj, obj["id"], obj["id"]) - try: - return connection.run( - connection.space("assets").insert(convert(asset)), - only_data=False - ) - except tarantool.error.DatabaseError: - pass + def convert(obj): + if isinstance(obj, tuple): + obj = list(obj) + obj[0] = json.dumps(obj[0]) + return tuple(obj) + else: + (json.dumps(obj), obj["id"], obj["id"]) + + return connection.run( + connection.space("assets").insert(convert(asset)), + only_data=False + ) @register_query(TarantoolDBConnection) def store_assets(connection, assets: list): - convert = lambda obj: obj if isinstance(obj, tuple) else (obj, obj["id"], obj["id"]) for asset in assets: - try: - connection.run( - connection.space("assets").insert(convert(asset)), - only_data=False - ) - except tarantool.error.DatabaseError: - pass + store_asset(connection, asset) @register_query(TarantoolDBConnection) @@ -156,7 +157,8 @@ def get_asset(connection, asset_id: str): _data = connection.run( connection.space("assets").select(asset_id, index="txid_search") ) - return _data[0][0] if len(_data) > 0 else [] + + return json.loads(_data[0][0]) if len(_data) > 0 else [] @register_query(TarantoolDBConnection) @@ -165,6 +167,7 @@ def get_assets(connection, assets_ids: list) -> list: for _id in list(set(assets_ids)): asset = get_asset(connection, _id) _returned_data.append(asset) + return sorted(_returned_data, key=lambda k: k["id"], reverse=False) @@ -254,27 +257,13 @@ def get_txids_filtered(connection, asset_id: str, operation: str = None, return tuple([elem[0] for elem in _transactions]) - -# @register_query(TarantoolDB) -# def text_search(conn, search, *, language='english', case_sensitive=False, -# # TODO review text search in tarantool (maybe, remove) -# diacritic_sensitive=False, text_score=False, limit=0, table='assets'): -# cursor = conn.run( -# conn.collection(table) -# .find({'$text': { -# '$search': search, -# '$language': language, -# '$caseSensitive': case_sensitive, -# '$diacriticSensitive': diacritic_sensitive}}, -# {'score': {'$meta': 'textScore'}, '_id': False}) -# .sort([('score', {'$meta': 'textScore'})]) -# .limit(limit)) -# -# if text_score: -# return cursor -# -# return (_remove_text_score(obj) for obj in cursor) - +@register_query(TarantoolDBConnection) +def text_search(conn, search, table='assets', limit=0): + pattern = ".{}.".format(search) + res = conn.run( + conn.space(table).call('indexed_pattern_search', (table, 1, pattern)) + ) + return res[0] if limit == 0 else res[0][:limit] def _remove_text_score(asset): asset.pop('score', None) diff --git a/planetmint/backend/tarantool/schema.py b/planetmint/backend/tarantool/schema.py index 73b13dc..bf7017b 100644 --- a/planetmint/backend/tarantool/schema.py +++ b/planetmint/backend/tarantool/schema.py @@ -39,7 +39,8 @@ INDEX_COMMANDS = { { "txid_search": "assets:create_index('txid_search', {type='hash', parts={'tx_id'}})", "assetid_search": "assets:create_index('assetid_search', {type='tree',unique=false, parts={'asset_id', 'tx_id'}})", - "only_asset_search": "assets:create_index('only_asset_search', {type='tree', unique=false, parts={'asset_id'}})" + "only_asset_search": "assets:create_index('only_asset_search', {type='tree', unique=false, parts={'asset_id'}})", + "text_search": "assets:create_index('secondary', {unique=false,parts={1,'string'}})" }, "blocks": { @@ -60,7 +61,8 @@ INDEX_COMMANDS = { }, "meta_data": { - "id_search": "meta_datas:create_index('id_search', { type='hash' , parts={'transaction_id'}})" + "id_search": "meta_datas:create_index('id_search', { type='hash' , parts={'transaction_id'}})", + "text_search": "meta_datas:create_index('secondary', {unique=false,parts={2,'string'}}" }, "pre_commits": { @@ -107,13 +109,13 @@ SCHEMA_COMMANDS = { "abci_chains": "abci_chains:format({{name='height' , type='integer'},{name='is_synched' , type='boolean'},{name='chain_id',type='string'}, {name='id', type='string'}})", "assets": - "assets:format({{name='data' , type='any'}, {name='tx_id', type='string'}, {name='asset_id', type='string'}})", + "assets:format({{name='data' , type='string'}, {name='tx_id', type='string'}, {name='asset_id', type='string'}})", "blocks": "blocks:format{{name='app_hash',type='string'},{name='height' , type='integer'},{name='block_id' , type='string'}}", "blocks_tx": "blocks_tx:format{{name='transaction_id', type = 'string'}, {name = 'block_id', type = 'string'}}", "elections": "elections:format({{name='election_id' , type='string'},{name='height' , type='integer'}, {name='is_concluded' , type='boolean'}})", - "meta_data": "meta_datas:format({{name='transaction_id' , type='string'}, {name='meta_data' , type='any'}})", + "meta_data": "meta_datas:format({{name='transaction_id' , type='string'}, {name='meta_data' , type='string'}})", "pre_commits": "pre_commits:format({{name='commit_id', type='string'}, {name='height',type='integer'}, {name='transactions',type=any}})", "validators": diff --git a/planetmint/backend/tarantool/transaction/tools.py b/planetmint/backend/tarantool/transaction/tools.py index 28fea32..998a742 100644 --- a/planetmint/backend/tarantool/transaction/tools.py +++ b/planetmint/backend/tarantool/transaction/tools.py @@ -1,5 +1,6 @@ from secrets import token_hex import copy +import json from planetmint.transactions.common.memoize import HDict @@ -56,14 +57,14 @@ class TransactionDecompose: if metadata is None: return - self._tuple_transaction["metadata"] = (self._transaction["id"], metadata) + self._tuple_transaction["metadata"] = (self._transaction["id"], json.dumps(metadata)) def __asset_check(self): _asset = self._transaction.get("asset") if _asset is None: return asset_id = _asset["id"] if _asset.get("id") is not None else self._transaction["id"] - self._tuple_transaction["asset"] = (_asset, self._transaction["id"], asset_id) + self._tuple_transaction["asset"] = (json.dumps(_asset), self._transaction["id"], asset_id) def __prepare_inputs(self): _inputs = [] @@ -154,10 +155,10 @@ class TransactionCompose: def _get_asset(self): _asset = iter(self.db_results["asset"]) _res_asset = next(iter(next(_asset, iter([]))), None) - return _res_asset + return json.loads(_res_asset) def _get_metadata(self): - return self.db_results["metadata"][0][1] if len(self.db_results["metadata"]) == 1 else None + return json.loads(self.db_results["metadata"][0][1]) if len(self.db_results["metadata"]) == 1 else None def _get_inputs(self): _inputs = [] diff --git a/planetmint/web/views/metadata.py b/planetmint/web/views/metadata.py index cf1a61d..d8faf19 100644 --- a/planetmint/web/views/metadata.py +++ b/planetmint/web/views/metadata.py @@ -42,7 +42,7 @@ class MetadataApi(Resource): pool = current_app.config['bigchain_pool'] with pool() as planet: - args['table'] = 'metadata' + args['table'] = 'meta_data' metadata = planet.text_search(**args) try: