This commit is contained in:
letai2001 2025-10-07 00:18:35 +07:00
parent 2b4836ba70
commit 3fd5370049
9 changed files with 16 additions and 4 deletions

4
.vscode/launch.json vendored
View File

@ -2,10 +2,10 @@
"version": "0.2.0",
"configurations": [
{
"name": "Debug ingest_semantic (as module)",
"name": "Debug rag_pipeline (as module)",
"type": "python",
"request": "launch",
"module": "src.ingest.ingest_semantic", // chy theo module
"module": "src.chatbot.rag_pipeline", // chy theo module
"cwd": "${workspaceFolder}", // thư mc gc project
"envFile": "${workspaceFolder}/.env", // load biến môi trưng
"env": { "PYTHONPATH": "${workspaceFolder}" }, // đm bo 'src' là package gc

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -22,7 +22,13 @@ class RAGPipeline:
self.llm = LLMClient()
def run(self, user_query: str) -> str:
docs = self.retriever.retrieve(user_query)
prompt = self.prompt_builder.build(user_query, docs)
docs = self.retriever.search(user_query)
prompt = self.prompt_builder.build_prompt(user_query, docs)
answer = self.llm.generate(prompt)
return answer
if __name__ == "__main__":
pipeline = RAGPipeline()
query = "Bạn biết Mahola là ai không?"
response = pipeline.run(query)
print("Câu hỏi:", query)
print("Trả lời:", response)

View File

@ -1,3 +1,7 @@
# -*- coding: utf-8 -*-
"""
Retriever: Tìm kiếm các đoạn văn bản liên quan trong Qdrant.
"""
from typing import List, Dict, Any
from sentence_transformers import SentenceTransformer

View File

@ -46,6 +46,8 @@ load_dotenv()
# ==== Đường dẫn dữ liệu ====
# Thư mục chứa các file .txt nguồn
DATA_RAW = Path(os.getenv("DATA_RAW", "./data/data_raw10k")).resolve()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
GEMINI_MODEL = os.getenv("GEMINI_MODEL", "models/gemini-2.0-flash-001")
# ==== Embedding model (SentenceTransformers) ====
EMBED_MODEL = os.getenv("EMBED_MODEL", "Alibaba-NLP/gte-multilingual-base")