1- # pip install sentence-transformers
21import json
32import os
43
@@ -72,24 +71,25 @@ def cosine_similarity(a, b):
7271 return retrieved_documents
7372
7473
75- def reciprocal_rank_fusion (text_results , vector_results , alpha = 0.5 ):
74+ def reciprocal_rank_fusion (text_results , vector_results , k = 60 ):
7675 """
77- Realizar la Fusión de Rango Recíproco en los resultados de búsquedas de texto y vectoriales.
76+ Realizar la Fusión de Rango Recíproco (RRF) en los resultados de búsquedas de texto y vectoriales,
77+ basado en el algoritmo descrito aqui:
78+ https://learn.microsoft.com/azure/search/hybrid-search-ranking#how-rrf-ranking-works
7879 """
79- text_ids = {doc ["id" ] for doc in text_results }
80- vector_ids = {doc ["id" ] for doc in vector_results }
81-
82- combined_results = []
83- for doc in text_results :
84- if doc ["id" ] in vector_ids :
85- combined_results .append ((doc , alpha ))
86- else :
87- combined_results .append ((doc , 1 - alpha ))
88- for doc in vector_results :
89- if doc ["id" ] not in text_ids :
90- combined_results .append ((doc , alpha ))
91- combined_results .sort (key = lambda x : x [1 ], reverse = True )
92- return [doc for doc , _ in combined_results ]
80+ scores = {}
81+
82+ for i , doc in enumerate (text_results ):
83+ if doc ["id" ] not in scores :
84+ scores [doc ["id" ]] = 0
85+ scores [doc ["id" ]] += 1 / (i + k )
86+ for i , doc in enumerate (vector_results ):
87+ if doc ["id" ] not in scores :
88+ scores [doc ["id" ]] = 0
89+ scores [doc ["id" ]] += 1 / (i + k )
90+ scored_documents = sorted (scores .items (), key = lambda x : x [1 ], reverse = True )
91+ retrieved_documents = [documents_by_id [doc_id ] for doc_id , _ in scored_documents ]
92+ return retrieved_documents
9393
9494
9595def rerank (query , retrieved_documents ):
@@ -108,13 +108,13 @@ def hybrid_search(query, limit):
108108 """
109109 text_results = full_text_search (query , limit * 2 )
110110 vector_results = vector_search (query , limit * 2 )
111- combined_results = reciprocal_rank_fusion (text_results , vector_results )
112- combined_results = rerank (query , combined_results )
113- return combined_results [:limit ]
111+ fused_results = reciprocal_rank_fusion (text_results , vector_results )
112+ reranked_results = rerank (query , fused_results )
113+ return reranked_results [:limit ]
114114
115115
116116# Obtener la pregunta del usuario
117- user_question = "gris y solitario "
117+ user_question = "cual insecta es gris y velloso? "
118118
119119# Buscar la pregunta del usuario en el índice
120120retrieved_documents = hybrid_search (user_question , limit = 5 )
0 commit comments