Skip to content

Commit fbe076b

Browse files
committed
Add ability to filter by a list of filter with an ANY match
1 parent e4f007a commit fbe076b

4 files changed

Lines changed: 146 additions & 82 deletions

File tree

README.md

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,24 +78,34 @@ Now you can query for similar items:
7878
await vec.search([1.0, 9.0])
7979
```
8080

81-
[<Record id=UUID('0c2aceca-9375-4c04-b5b4-01db19eea3c8') metadata='{"action": "jump", "animal": "fox"}' contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>,
82-
<Record id=UUID('4954646e-2677-49c3-91e8-fdc38b8eee75') metadata='{"animal": "fox"}' contents='the brown fox' embedding=array([1. , 1.3], dtype=float32) distance=0.14489260377438218>]
81+
[<Record id=UUID('0313cfac-07e4-4c01-9651-0917ecb1991c') metadata='{"action": "jump", "animal": "fox"}' contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>,
82+
<Record id=UUID('3bde3dd3-9445-4d9e-b72e-f329d19c380d') metadata='{"animal": "fox"}' contents='the brown fox' embedding=array([1. , 1.3], dtype=float32) distance=0.14489260377438218>]
8383

8484
You can specify the number of records to return.
8585

8686
``` python
8787
await vec.search([1.0, 9.0], k=1)
8888
```
8989

90-
[<Record id=UUID('0c2aceca-9375-4c04-b5b4-01db19eea3c8') metadata='{"action": "jump", "animal": "fox"}' contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>]
90+
[<Record id=UUID('0313cfac-07e4-4c01-9651-0917ecb1991c') metadata='{"action": "jump", "animal": "fox"}' contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>]
9191

9292
You can also specify a filter on the metadata as a simple dictionary
9393

9494
``` python
9595
await vec.search([1.0, 9.0], k=1, filter={"action": "jump"})
9696
```
9797

98-
[<Record id=UUID('0c2aceca-9375-4c04-b5b4-01db19eea3c8') metadata='{"action": "jump", "animal": "fox"}' contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>]
98+
[<Record id=UUID('0313cfac-07e4-4c01-9651-0917ecb1991c') metadata='{"action": "jump", "animal": "fox"}' contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>]
99+
100+
You can also specify a list of filter dictionaries, where an item is
101+
returned if it matches any dict
102+
103+
``` python
104+
await vec.search([1.0, 9.0], k=2, filter=[{"action": "jump"}, {"animal": "fox"}])
105+
```
106+
107+
[<Record id=UUID('0313cfac-07e4-4c01-9651-0917ecb1991c') metadata='{"action": "jump", "animal": "fox"}' contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>,
108+
<Record id=UUID('3bde3dd3-9445-4d9e-b72e-f329d19c380d') metadata='{"animal": "fox"}' contents='the brown fox' embedding=array([1. , 1.3], dtype=float32) distance=0.14489260377438218>]
99109

100110
You can access the fields as follows
101111

@@ -104,7 +114,7 @@ records = await vec.search([1.0, 9.0], k=1, filter={"action": "jump"})
104114
records[0][client.SEARCH_RESULT_ID_IDX]
105115
```
106116

107-
UUID('0c2aceca-9375-4c04-b5b4-01db19eea3c8')
117+
UUID('d282ad19-1a69-4a9d-8a15-6f06262e109a')
108118

109119
``` python
110120
records[0][client.SEARCH_RESULT_METADATA_IDX]
@@ -130,6 +140,12 @@ records[0][client.SEARCH_RESULT_DISTANCE_IDX]
130140

131141
0.00016793422934946456
132142

143+
To delete all records use:
144+
145+
``` python
146+
await vec.delete_all()
147+
```
148+
133149
## Advanced Usage
134150

135151
### Indexing

nbs/00_vector.ipynb

Lines changed: 49 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
"import asyncpg\n",
6767
"import uuid\n",
6868
"from pgvector.asyncpg import register_vector\n",
69-
"from typing import (List, Optional)\n",
69+
"from typing import (List, Optional, Union, Dict, Tuple)\n",
7070
"import json "
7171
]
7272
},
@@ -217,7 +217,7 @@
217217
" return \"CREATE INDEX {index_name} ON {table_name} USING ivfflat ({column_name} {index_method}) WITH (lists = {num_lists});\"\\\n",
218218
" .format(index_name=self._get_embedding_index_name(), table_name=self._quote_ident(self.table_name), column_name=self._quote_ident(column_name), index_method=index_method, num_lists=num_lists)\n",
219219
"\n",
220-
" def search_query(self, query_embedding: List[float], k: int=10, filter: Optional[dict] = None):\n",
220+
" def search_query(self, query_embedding: List[float], k: int=10, filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None) -> Tuple[str, List]:\n",
221221
" \"\"\"\n",
222222
" Generates a similarity query.\n",
223223
"\n",
@@ -232,12 +232,20 @@
232232
" params = []\n",
233233
" distance = \"embedding {op} ${index}\".format(op=self.distance_type, index=len(params)+1)\n",
234234
" params = params + [query_embedding]\n",
235-
" \n",
236-
" where = \"TRUE\"\n",
237-
" if filter != None:\n",
235+
"\n",
236+
" if isinstance(filter, dict):\n",
238237
" where = \"metadata @> ${index}\".format(index=len(params)+1)\n",
239238
" json_object = json.dumps(filter)\n",
240239
" params = params + [json_object]\n",
240+
" elif isinstance(filter, list):\n",
241+
" any_params = []\n",
242+
" for idx, filter_dict in enumerate(filter, start=len(params) + 1):\n",
243+
" any_params.append(json.dumps(filter_dict))\n",
244+
" where = \"metadata @> ANY(${index}::jsonb[])\".format(index=len(params) + 1)\n",
245+
" params = params + [any_params]\n",
246+
" else:\n",
247+
" where = \"TRUE\"\n",
248+
" \n",
241249
" query = '''\n",
242250
" SELECT\n",
243251
" id, metadata, contents, embedding, {distance} as distance\n",
@@ -261,7 +269,7 @@
261269
"text/markdown": [
262270
"---\n",
263271
"\n",
264-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L79){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
272+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L87){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
265273
"\n",
266274
"### QueryBuilder.get_create_query\n",
267275
"\n",
@@ -275,7 +283,7 @@
275283
"text/plain": [
276284
"---\n",
277285
"\n",
278-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L79){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
286+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L87){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
279287
"\n",
280288
"### QueryBuilder.get_create_query\n",
281289
"\n",
@@ -443,7 +451,7 @@
443451
" async def search(self, \n",
444452
" query_embedding: List[float], # vector to search for\n",
445453
" k: int=10, # The number of nearest neighbors to retrieve. Default is 10.\n",
446-
" filter: Optional[dict] = None): # A filter for metadata. Default is None.\n",
454+
" filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None): # A filter for metadata. Default is None.\n",
447455
" \"\"\"\n",
448456
" Retrieves similar records using a similarity query.\n",
449457
"\n",
@@ -465,7 +473,7 @@
465473
"text/markdown": [
466474
"---\n",
467475
"\n",
468-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L229){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
476+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L248){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
469477
"\n",
470478
"### Async.create_tables\n",
471479
"\n",
@@ -479,7 +487,7 @@
479487
"text/plain": [
480488
"---\n",
481489
"\n",
482-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L229){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
490+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L248){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
483491
"\n",
484492
"### Async.create_tables\n",
485493
"\n",
@@ -510,7 +518,7 @@
510518
"text/markdown": [
511519
"---\n",
512520
"\n",
513-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L229){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
521+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L248){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
514522
"\n",
515523
"### Async.create_tables\n",
516524
"\n",
@@ -524,7 +532,7 @@
524532
"text/plain": [
525533
"---\n",
526534
"\n",
527-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L229){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
535+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L248){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
528536
"\n",
529537
"### Async.create_tables\n",
530538
"\n",
@@ -555,12 +563,13 @@
555563
"text/markdown": [
556564
"---\n",
557565
"\n",
558-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L279){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
566+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L311){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
559567
"\n",
560568
"### Async.search\n",
561569
"\n",
562570
"> Async.search (query_embedding:List[float], k:int=10,\n",
563-
"> filter:Optional[dict]=None)\n",
571+
"> filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=No\n",
572+
"> ne)\n",
564573
"\n",
565574
"Retrieves similar records using a similarity query.\n",
566575
"\n",
@@ -570,12 +579,13 @@
570579
"text/plain": [
571580
"---\n",
572581
"\n",
573-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L279){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
582+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L311){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
574583
"\n",
575584
"### Async.search\n",
576585
"\n",
577586
"> Async.search (query_embedding:List[float], k:int=10,\n",
578-
"> filter:Optional[dict]=None)\n",
587+
"> filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=No\n",
588+
"> ne)\n",
579589
"\n",
580590
"Retrieves similar records using a similarity query.\n",
581591
"\n",
@@ -660,6 +670,12 @@
660670
"rec = await vec.search([1.0, 2.0], k=4, filter={\"key_1\":\"val_1\", \"key_2\":\"val_3\"})\n",
661671
"assert len(rec) == 0\n",
662672
"\n",
673+
"rec = await vec.search([1.0, 2.0], k=4, filter=[{\"key_1\":\"val_1\"}, {\"key2\":\"val2\"}])\n",
674+
"assert len(rec) == 2\n",
675+
"\n",
676+
"rec = await vec.search([1.0, 2.0], k=4, filter=[{\"key_1\":\"val_1\"}, {\"key2\":\"val2\"}, {\"no such key\": \"no such val\"}])\n",
677+
"assert len(rec) == 2\n",
678+
"\n",
663679
"try:\n",
664680
" # can't upsert using both keys and dictionaries\n",
665681
" await vec.upsert([ \\\n",
@@ -887,7 +903,7 @@
887903
" with conn.cursor() as cur:\n",
888904
" cur.execute(query)\n",
889905
"\n",
890-
" def search(self, query_embedding: List[float], k: int=10, filter: Optional[dict] = None):\n",
906+
" def search(self, query_embedding: List[float], k: int=10, filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None):\n",
891907
" \"\"\"\n",
892908
" Retrieves similar records using a similarity query.\n",
893909
"\n",
@@ -917,7 +933,7 @@
917933
"text/markdown": [
918934
"---\n",
919935
"\n",
920-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L398){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
936+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L438){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
921937
"\n",
922938
"### Sync.create_tables\n",
923939
"\n",
@@ -931,7 +947,7 @@
931947
"text/plain": [
932948
"---\n",
933949
"\n",
934-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L398){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
950+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L438){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
935951
"\n",
936952
"### Sync.create_tables\n",
937953
"\n",
@@ -962,7 +978,7 @@
962978
"text/markdown": [
963979
"---\n",
964980
"\n",
965-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L382){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
981+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L419){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
966982
"\n",
967983
"### Sync.upsert\n",
968984
"\n",
@@ -979,7 +995,7 @@
979995
"text/plain": [
980996
"---\n",
981997
"\n",
982-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L382){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
998+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L419){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
983999
"\n",
9841000
"### Sync.upsert\n",
9851001
"\n",
@@ -1013,12 +1029,13 @@
10131029
"text/markdown": [
10141030
"---\n",
10151031
"\n",
1016-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L453){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
1032+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L507){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
10171033
"\n",
10181034
"### Sync.search\n",
10191035
"\n",
10201036
"> Sync.search (query_embedding:List[float], k:int=10,\n",
1021-
"> filter:Optional[dict]=None)\n",
1037+
"> filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=Non\n",
1038+
"> e)\n",
10221039
"\n",
10231040
"Retrieves similar records using a similarity query.\n",
10241041
"\n",
@@ -1033,12 +1050,13 @@
10331050
"text/plain": [
10341051
"---\n",
10351052
"\n",
1036-
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L453){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
1053+
"[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L507){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
10371054
"\n",
10381055
"### Sync.search\n",
10391056
"\n",
10401057
"> Sync.search (query_embedding:List[float], k:int=10,\n",
1041-
"> filter:Optional[dict]=None)\n",
1058+
"> filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=Non\n",
1059+
"> e)\n",
10421060
"\n",
10431061
"Retrieves similar records using a similarity query.\n",
10441062
"\n",
@@ -1131,6 +1149,12 @@
11311149
"rec = vec.search([1.0, 2.0], k=4, filter={\"key_1\":\"val_1\", \"key_2\":\"val_3\"})\n",
11321150
"assert len(rec) == 0\n",
11331151
"\n",
1152+
"rec = vec.search([1.0, 2.0], k=4, filter=[{\"key_1\":\"val_1\"}, {\"key2\":\"val2\"}])\n",
1153+
"assert len(rec) == 2\n",
1154+
"\n",
1155+
"rec = vec.search([1.0, 2.0], k=4, filter=[{\"key_1\":\"val_1\"}, {\"key2\":\"val2\"}, {\"no such key\": \"no such val\"}])\n",
1156+
"assert len(rec) == 2\n",
1157+
"\n",
11341158
"try:\n",
11351159
" # can't upsert using both keys and dictionaries\n",
11361160
" await vec.upsert([ \\\n",

0 commit comments

Comments
 (0)