diff --git a/.vscode/launch.json b/.vscode/launch.json index 4a4d236..7bf1ba8 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -2,10 +2,10 @@ "version": "0.2.0", "configurations": [ { - "name": "Debug ingest_semantic (as module)", + "name": "Debug rag_pipeline (as module)", "type": "python", "request": "launch", - "module": "src.ingest.ingest_semantic", // chạy theo module + "module": "src.chatbot.rag_pipeline", // chạy theo module "cwd": "${workspaceFolder}", // thư mục gốc project "envFile": "${workspaceFolder}/.env", // load biến môi trường "env": { "PYTHONPATH": "${workspaceFolder}" }, // đảm bảo 'src' là package gốc diff --git a/src/chatbot/__pycache__/llm_client.cpython-313.pyc b/src/chatbot/__pycache__/llm_client.cpython-313.pyc new file mode 100644 index 0000000..94e0163 Binary files /dev/null and b/src/chatbot/__pycache__/llm_client.cpython-313.pyc differ diff --git a/src/chatbot/__pycache__/prompt_builder.cpython-313.pyc b/src/chatbot/__pycache__/prompt_builder.cpython-313.pyc new file mode 100644 index 0000000..61914a9 Binary files /dev/null and b/src/chatbot/__pycache__/prompt_builder.cpython-313.pyc differ diff --git a/src/chatbot/__pycache__/rag_pipeline.cpython-313.pyc b/src/chatbot/__pycache__/rag_pipeline.cpython-313.pyc new file mode 100644 index 0000000..0c523b7 Binary files /dev/null and b/src/chatbot/__pycache__/rag_pipeline.cpython-313.pyc differ diff --git a/src/chatbot/__pycache__/retriever.cpython-313.pyc b/src/chatbot/__pycache__/retriever.cpython-313.pyc index 376eff9..a375d33 100644 Binary files a/src/chatbot/__pycache__/retriever.cpython-313.pyc and b/src/chatbot/__pycache__/retriever.cpython-313.pyc differ diff --git a/src/chatbot/rag_pipeline.py b/src/chatbot/rag_pipeline.py index 94b2cd8..8990921 100644 --- a/src/chatbot/rag_pipeline.py +++ b/src/chatbot/rag_pipeline.py @@ -22,7 +22,13 @@ class RAGPipeline: self.llm = LLMClient() def run(self, user_query: str) -> str: - docs = self.retriever.retrieve(user_query) - prompt = self.prompt_builder.build(user_query, docs) + docs = self.retriever.search(user_query) + prompt = self.prompt_builder.build_prompt(user_query, docs) answer = self.llm.generate(prompt) return answer +if __name__ == "__main__": + pipeline = RAGPipeline() + query = "Bạn biết Mahola là ai không?" + response = pipeline.run(query) + print("Câu hỏi:", query) + print("Trả lời:", response) \ No newline at end of file diff --git a/src/chatbot/retriever.py b/src/chatbot/retriever.py index c966378..3ac6fb2 100644 --- a/src/chatbot/retriever.py +++ b/src/chatbot/retriever.py @@ -1,3 +1,7 @@ +# -*- coding: utf-8 -*- +""" +Retriever: Tìm kiếm các đoạn văn bản liên quan trong Qdrant. +""" from typing import List, Dict, Any from sentence_transformers import SentenceTransformer diff --git a/src/core/__pycache__/config.cpython-313.pyc b/src/core/__pycache__/config.cpython-313.pyc index 5a907c7..9b01531 100644 Binary files a/src/core/__pycache__/config.cpython-313.pyc and b/src/core/__pycache__/config.cpython-313.pyc differ diff --git a/src/core/config.py b/src/core/config.py index 6e7dda3..d9105d7 100644 --- a/src/core/config.py +++ b/src/core/config.py @@ -46,6 +46,8 @@ load_dotenv() # ==== Đường dẫn dữ liệu ==== # Thư mục chứa các file .txt nguồn DATA_RAW = Path(os.getenv("DATA_RAW", "./data/data_raw10k")).resolve() +GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "") +GEMINI_MODEL = os.getenv("GEMINI_MODEL", "models/gemini-2.0-flash-001") # ==== Embedding model (SentenceTransformers) ==== EMBED_MODEL = os.getenv("EMBED_MODEL", "Alibaba-NLP/gte-multilingual-base")