We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c1408e1 commit 9d3f489Copy full SHA for 9d3f489
5 files changed
rag_documents_ingestion.py
@@ -44,7 +44,7 @@
44
45
# Split the text into smaller chunks
46
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
47
- model_name="gpt-4o", chunk_size=500, chunk_overlap=0
+ model_name="gpt-4o", chunk_size=500, chunk_overlap=125
48
)
49
texts = text_splitter.create_documents([md_text])
50
file_chunks = [{"id": f"{filename}-{(i + 1)}", "text": text.page_content} for i, text in enumerate(texts)]
0 commit comments