timescale
diff --git a/‎CHANGELOG.md‎
Lines changed: 5 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 136 additions & 8 deletions b/‎README.md‎
Lines changed: 136 additions & 8 deletions
diff --git a/‎nbs/00_vector.ipynb‎
Lines changed: 2 additions & 1 deletion b/‎nbs/00_vector.ipynb‎
Lines changed: 2 additions & 1 deletion
@@ -2,6 +2,11 @@
 
 <!-- do not remove -->
 
+## 0.0.2
+
+Add ability to info start and end date from filters
+
+
 ## 0.0.1
 
 First Release!
 
@@ -40,7 +40,7 @@ import os
 ```
 
 ``` python
-_ = load_dotenv(find_dotenv()) 
+_ = load_dotenv(find_dotenv(), override=True) 
 service_url  = os.environ['TIMESCALE_SERVICE_URL']
 ```
 
@@ -97,24 +97,24 @@ Now you can query for similar items:
 await vec.search([1.0, 9.0])
 ```
 
-    [<Record id=UUID('e5dbaa7c-081b-4131-be18-c81ce47fc864') metadata={'action': 'jump', 'animal': 'fox'} contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>,
-     <Record id=UUID('2cdb8cbd-5dd7-4555-926a-5efafb4b1cf0') metadata={'animal': 'fox'} contents='the brown fox' embedding=array([1. , 1.3], dtype=float32) distance=0.14489260377438218>]
+    [<Record id=UUID('d10dc66f-92d5-4296-a702-1690860bbe55') metadata={'action': 'jump', 'animal': 'fox'} contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>,
+     <Record id=UUID('06153343-9085-4844-ad7a-b5cbed912053') metadata={'animal': 'fox'} contents='the brown fox' embedding=array([1. , 1.3], dtype=float32) distance=0.14489260377438218>]
 
 You can specify the number of records to return.
 
 ``` python
 await vec.search([1.0, 9.0], limit=1)
 ```
 
-    [<Record id=UUID('e5dbaa7c-081b-4131-be18-c81ce47fc864') metadata={'action': 'jump', 'animal': 'fox'} contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>]
+    [<Record id=UUID('d10dc66f-92d5-4296-a702-1690860bbe55') metadata={'action': 'jump', 'animal': 'fox'} contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>]
 
 You can also specify a filter on the metadata as a simple dictionary
 
 ``` python
 await vec.search([1.0, 9.0], limit=1, filter={"action": "jump"})
 ```
 
-    [<Record id=UUID('e5dbaa7c-081b-4131-be18-c81ce47fc864') metadata={'action': 'jump', 'animal': 'fox'} contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>]
+    [<Record id=UUID('d10dc66f-92d5-4296-a702-1690860bbe55') metadata={'action': 'jump', 'animal': 'fox'} contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>]
 
 You can also specify a list of filter dictionaries, where an item is
 returned if it matches any dict
@@ -123,8 +123,8 @@ returned if it matches any dict
 await vec.search([1.0, 9.0], limit=2, filter=[{"action": "jump"}, {"animal": "fox"}])
 ```
 
-    [<Record id=UUID('e5dbaa7c-081b-4131-be18-c81ce47fc864') metadata={'action': 'jump', 'animal': 'fox'} contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>,
-     <Record id=UUID('2cdb8cbd-5dd7-4555-926a-5efafb4b1cf0') metadata={'animal': 'fox'} contents='the brown fox' embedding=array([1. , 1.3], dtype=float32) distance=0.14489260377438218>]
+    [<Record id=UUID('d10dc66f-92d5-4296-a702-1690860bbe55') metadata={'action': 'jump', 'animal': 'fox'} contents='jumped over the' embedding=array([ 1. , 10.8], dtype=float32) distance=0.00016793422934946456>,
+     <Record id=UUID('06153343-9085-4844-ad7a-b5cbed912053') metadata={'animal': 'fox'} contents='the brown fox' embedding=array([1. , 1.3], dtype=float32) distance=0.14489260377438218>]
 
 You can access the fields as follows
 
@@ -133,7 +133,7 @@ records = await vec.search([1.0, 9.0], limit=1, filter={"action": "jump"})
 records[0][client.SEARCH_RESULT_ID_IDX]
 ```
 
-    UUID('e5dbaa7c-081b-4131-be18-c81ce47fc864')
+    UUID('d10dc66f-92d5-4296-a702-1690860bbe55')
 
 ``` python
 records[0][client.SEARCH_RESULT_METADATA_IDX]
@@ -293,6 +293,134 @@ search call:
 rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=client.UUIDTimeRange(specific_datetime-timedelta(days=7), specific_datetime+timedelta(days=7)))
 ```
 
+# PgVectorize
+
+PgVectorize enables you to create vector embeddings from any data that
+you already have stored in Postgres. Simply, attach PgVectorize to any
+Postgres table, and it will automatically sync that table’s data with a
+set of embeddings stored in Timescale Vector. For example, let’s say you
+have a blog table defined in the following way:
+
+``` python
+import psycopg2
+from langchain.docstore.document import Document
+from langchain.text_splitter import CharacterTextSplitter
+from timescale_vector import client, pgvectorizer
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores.timescalevector import TimescaleVector
+from datetime import timedelta
+```
+
+``` python
+with psycopg2.connect(service_url) as conn:
+    with conn.cursor() as cursor:
+        cursor.execute('''
+        CREATE TABLE IF NOT EXISTS blog (
+            id              SERIAL PRIMARY KEY NOT NULL,
+            title           TEXT NOT NULL,
+            author          TEXT NOT NULL,
+            contents        TEXT NOT NULL,
+            category        TEXT NOT NULL,
+            published_time  TIMESTAMPTZ NULL --NULL if not yet published
+        );
+        ''')
+```
+
+You can insert some data as follows:
+
+``` python
+with psycopg2.connect(service_url) as conn:
+    with conn.cursor() as cursor:
+        cursor.execute('''
+            INSERT INTO blog (title, author, contents, category, published_time) VALUES ('First Post', 'Matvey Arye', 'some super interesting content about cats.', 'AI', '2021-01-01');
+        ''')
+```
+
+Now, say you want to embed these blogs in Timescale Vector. First you
+need to define an `embed_and_write` function, that takes a set of blog
+posts, creates the embeddings, and writes them into TimescaleVector. For
+example, if using LangChain, it could look something like the following.
+
+``` python
+def get_document(blog):
+    text_splitter = CharacterTextSplitter(
+        chunk_size=1000,
+        chunk_overlap=200,
+    )
+    docs = []
+    for chunk in text_splitter.split_text(blog['contents']):
+        content = f"Author {blog['author']}, title: {blog['title']}, contents:{chunk}"
+        metadata = {
+            "id": str(client.uuid_from_time(blog['published_time'])),
+            "blog_id": blog['id'], 
+            "author": blog['author'], 
+            "category": blog['category'],
+            "published_time": blog['published_time'].isoformat(),
+        }
+        docs.append(Document(page_content=content, metadata=metadata))
+    return docs
+
+def embed_and_write(blog_instances, vectorizer):
+    embedding = OpenAIEmbeddings()
+    vector_store = TimescaleVector(
+        collection_name="blog_embedding",
+        service_url=service_url,
+        embedding=embedding,
+        time_partition_interval=timedelta(days=30),
+    )
+
+    # delete old embeddings for all ids in the work queue. locked_id is a special column that is set to the primary key of the table being
+    # embedded. For items that are deleted, it is the only key that is set.
+    metadata_for_delete = [{"blog_id": blog['locked_id']} for blog in blog_instances]
+    vector_store.delete_by_metadata(metadata_for_delete)
+
+    documents = []
+    for blog in blog_instances:
+        # skip blogs that are not published yet, or are deleted (in which case it will be NULL)
+        if blog['published_time'] != None:
+            documents.extend(get_document(blog))
+
+    if len(documents) == 0:
+        return
+    
+    texts = [d.page_content for d in documents]
+    metadatas = [d.metadata for d in documents]
+    ids = [d.metadata["id"] for d in documents]
+    vector_store.add_texts(texts, metadatas, ids)
+```
+
+Then, all you have to do is run the following code in a scheduled job
+(cron job, lambda job, etc):
+
+``` python
+vectorizer = pgvectorizer.Vectorize(service_url, 'blog')
+while vectorizer.process(embed_and_write) > 0:
+    pass
+```
+
+Every time that job runs it will sync the table with your embeddings. It
+will sync all insert, updates, and deletes to an embeddings table called
+`blog_embedding`.
+
+Now, you can simply search the embeddings follows (again, using
+LangChain in the exampls):
+
+``` python
+embedding = OpenAIEmbeddings()
+vector_store = TimescaleVector(
+    collection_name="blog_embedding",
+    service_url=service_url,
+    embedding=embedding,
+    time_partition_interval=timedelta(days=30),
+)
+
+res = vector_store.similarity_search_with_score("Blogs about cats")
+res
+```
+
+    [(Document(page_content='Author Matvey Arye, title: First Post, contents:some super interesting content about cats.', metadata={'id': '4a784000-4bc4-11eb-9140-78a539e57b40', 'author': 'Matvey Arye', 'blog_id': 1, 'category': 'AI', 'published_time': '2021-01-01T00:00:00+00:00'}),
+      0.12605134378941762)]
+
 ## Development
 
 This project is developed with [nbdev](https://nbdev.fast.ai/). Please
 
@@ -525,7 +525,8 @@
     "        self.time_partition_interval = time_partition_interval\n",
     "        self.infer_filters = infer_filters\n",
     "\n",
-    "    def _quote_ident(self, ident):\n",
+    "    @staticmethod\n",
+    "    def _quote_ident(ident):\n",
     "        \"\"\"\n",
     "        Quotes an identifier to prevent SQL injection.\n",
     "\n",