We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents c1408e1 + 9d3f489 commit 0a4e74fCopy full SHA for 0a4e74f
5 files changed
rag_documents_ingestion.py
@@ -44,7 +44,7 @@
44
45
# Split the text into smaller chunks
46
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
47
- model_name="gpt-4o", chunk_size=500, chunk_overlap=0
+ model_name="gpt-4o", chunk_size=500, chunk_overlap=125
48
)
49
texts = text_splitter.create_documents([md_text])
50
file_chunks = [{"id": f"{filename}-{(i + 1)}", "text": text.page_content} for i, text in enumerate(texts)]
0 commit comments