mirror of
https://github.com/bigchaindb/bigchaindb.git
synced 2024-10-13 13:34:05 +00:00
Added full text search support for mongodb.
- Create widlcard text index for the assets collection. - Created backend query to to text search on assets collection. - Added and updated tests.
This commit is contained in:
parent
ce414e46f3
commit
ccaae91601
@ -327,3 +327,28 @@ def get_unvoted_blocks(conn, node_pubkey):
|
||||
'votes': False, '_id': False
|
||||
}}
|
||||
]))
|
||||
|
||||
|
||||
@register_query(MongoDBConnection)
|
||||
def text_search(conn, search, language='english', case_sensitive=False,
|
||||
diacritic_sensitive=False, text_score=False, limit=0):
|
||||
cursor = conn.run(
|
||||
conn.collection('assets')
|
||||
.find({'$text': {
|
||||
'$search': search,
|
||||
'$language': language,
|
||||
'$caseSensitive': case_sensitive,
|
||||
'$diacriticSensitive': diacritic_sensitive}},
|
||||
{'score': {'$meta': 'textScore'}, '_id': False})
|
||||
.sort([('score', {'$meta': 'textScore'})])
|
||||
.limit(limit))
|
||||
|
||||
if text_score:
|
||||
return cursor
|
||||
else:
|
||||
return (_remove_text_score(asset) for asset in cursor)
|
||||
|
||||
|
||||
def _remove_text_score(asset):
|
||||
asset.pop('score', None)
|
||||
return asset
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
import logging
|
||||
|
||||
from pymongo import ASCENDING, DESCENDING
|
||||
from pymongo import ASCENDING, DESCENDING, TEXT
|
||||
|
||||
from bigchaindb import backend
|
||||
from bigchaindb.common import exceptions
|
||||
@ -113,3 +113,6 @@ def create_assets_secondary_index(conn, dbname):
|
||||
conn.conn[dbname]['assets'].create_index('id',
|
||||
name='asset_id',
|
||||
unique=True)
|
||||
|
||||
# full text search index
|
||||
conn.conn[dbname]['assets'].create_index([('$**', TEXT)], name='text')
|
||||
|
@ -325,3 +325,11 @@ def get_txids_filtered(connection, asset_id, operation=None):
|
||||
"""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@singledispatch
|
||||
def text_search(conn, search, language='english', case_sensitive=False,
|
||||
diacritic_sensitive=False, text_score=False, limit=0):
|
||||
# TODO: docstring
|
||||
|
||||
raise NotImplementedError
|
||||
|
@ -464,3 +464,87 @@ def test_get_assets():
|
||||
|
||||
assert cursor.count() == 2
|
||||
assert list(cursor.sort('id', pymongo.ASCENDING)) == assets[::2]
|
||||
|
||||
|
||||
def test_text_search():
|
||||
from bigchaindb.backend import connect, query
|
||||
conn = connect()
|
||||
|
||||
# Example data and tests cases taken from the mongodb documentation
|
||||
# https://docs.mongodb.com/manual/reference/operator/query/text/
|
||||
assets = [
|
||||
{'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50},
|
||||
{'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
|
||||
{'id': 3, 'subject': 'Baking a cake', 'author': 'abc', 'views': 90},
|
||||
{'id': 4, 'subject': 'baking', 'author': 'xyz', 'views': 100},
|
||||
{'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200},
|
||||
{'id': 6, 'subject': 'Сырники', 'author': 'jkl', 'views': 80},
|
||||
{'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10},
|
||||
{'id': 8, 'subject': 'Cafe con Leche', 'author': 'xyz', 'views': 10}
|
||||
]
|
||||
|
||||
# insert the assets
|
||||
conn.db.assets.insert_many(deepcopy(assets), ordered=False)
|
||||
|
||||
# test search single word
|
||||
assert list(query.text_search(conn, 'coffee')) == [
|
||||
{'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50},
|
||||
{'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
|
||||
{'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10},
|
||||
]
|
||||
|
||||
# match any of the search terms
|
||||
assert list(query.text_search(conn, 'bake coffee cake')) == [
|
||||
{'author': 'abc', 'id': 3, 'subject': 'Baking a cake', 'views': 90},
|
||||
{'author': 'xyz', 'id': 1, 'subject': 'coffee', 'views': 50},
|
||||
{'author': 'xyz', 'id': 4, 'subject': 'baking', 'views': 100},
|
||||
{'author': 'efg', 'id': 2, 'subject': 'Coffee Shopping', 'views': 5},
|
||||
{'author': 'efg', 'id': 7, 'subject': 'coffee and cream', 'views': 10}
|
||||
]
|
||||
|
||||
# search for a phrase
|
||||
assert list(query.text_search(conn, '\"coffee shop\"')) == [
|
||||
{'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
|
||||
]
|
||||
|
||||
# exclude documents that contain a term
|
||||
assert list(query.text_search(conn, 'coffee -shop')) == [
|
||||
{'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50},
|
||||
{'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10},
|
||||
]
|
||||
|
||||
# search different language
|
||||
assert list(query.text_search(conn, 'leche', language='es')) == [
|
||||
{'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200},
|
||||
{'id': 8, 'subject': 'Cafe con Leche', 'author': 'xyz', 'views': 10}
|
||||
]
|
||||
|
||||
# case and diacritic insensitive search
|
||||
assert list(query.text_search(conn, 'сы́рники CAFÉS')) == [
|
||||
{'id': 6, 'subject': 'Сырники', 'author': 'jkl', 'views': 80},
|
||||
{'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200},
|
||||
{'id': 8, 'subject': 'Cafe con Leche', 'author': 'xyz', 'views': 10}
|
||||
]
|
||||
|
||||
# case sensitive search
|
||||
assert list(query.text_search(conn, 'Coffee', case_sensitive=True)) == [
|
||||
{'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
|
||||
]
|
||||
|
||||
# diacritic sensitive search
|
||||
assert list(query.text_search(conn, 'CAFÉ', diacritic_sensitive=True)) == [
|
||||
{'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200},
|
||||
]
|
||||
|
||||
# return text score
|
||||
assert list(query.text_search(conn, 'coffee', text_score=True)) == [
|
||||
{'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50, 'score': 1.0},
|
||||
{'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5, 'score': 0.75},
|
||||
{'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10, 'score': 0.75},
|
||||
]
|
||||
|
||||
# limit search result
|
||||
assert list(query.text_search(conn, 'coffee', limit=2)) == [
|
||||
{'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50},
|
||||
{'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
|
||||
]
|
||||
|
@ -33,7 +33,7 @@ def test_init_creates_db_tables_and_indexes():
|
||||
assert sorted(indexes) == ['_id_', 'block_and_voter']
|
||||
|
||||
indexes = conn.conn[dbname]['assets'].index_information().keys()
|
||||
assert sorted(indexes) == ['_id_', 'asset_id']
|
||||
assert sorted(indexes) == ['_id_', 'asset_id', 'text']
|
||||
|
||||
|
||||
def test_init_database_fails_if_db_exists():
|
||||
|
Loading…
x
Reference in New Issue
Block a user