From 3fd5370049b1bfdd0816ed424ee29ac7387d2a96 Mon Sep 17 00:00:00 2001 From: letai2001 Date: Tue, 7 Oct 2025 00:18:35 +0700 Subject: [PATCH] s4 --- .vscode/launch.json | 4 ++-- .../__pycache__/llm_client.cpython-313.pyc | Bin 0 -> 1709 bytes .../prompt_builder.cpython-313.pyc | Bin 0 -> 3278 bytes .../__pycache__/rag_pipeline.cpython-313.pyc | Bin 0 -> 1898 bytes .../__pycache__/retriever.cpython-313.pyc | Bin 3460 -> 3460 bytes src/chatbot/rag_pipeline.py | 10 ++++++++-- src/chatbot/retriever.py | 4 ++++ src/core/__pycache__/config.cpython-313.pyc | Bin 3043 -> 3224 bytes src/core/config.py | 2 ++ 9 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 src/chatbot/__pycache__/llm_client.cpython-313.pyc create mode 100644 src/chatbot/__pycache__/prompt_builder.cpython-313.pyc create mode 100644 src/chatbot/__pycache__/rag_pipeline.cpython-313.pyc diff --git a/.vscode/launch.json b/.vscode/launch.json index 4a4d236..7bf1ba8 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -2,10 +2,10 @@ "version": "0.2.0", "configurations": [ { - "name": "Debug ingest_semantic (as module)", + "name": "Debug rag_pipeline (as module)", "type": "python", "request": "launch", - "module": "src.ingest.ingest_semantic", // chạy theo module + "module": "src.chatbot.rag_pipeline", // chạy theo module "cwd": "${workspaceFolder}", // thư mục gốc project "envFile": "${workspaceFolder}/.env", // load biến môi trường "env": { "PYTHONPATH": "${workspaceFolder}" }, // đảm bảo 'src' là package gốc diff --git a/src/chatbot/__pycache__/llm_client.cpython-313.pyc b/src/chatbot/__pycache__/llm_client.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..94e0163e914f441364d589f42ec89dcc36f33b9a GIT binary patch literal 1709 zcmZuxO>7%Q6n?ww^&d{$#BmY@XqUE1;wl(5BvpwLgh_+RkDV%ukx&+`TxaZT>+G7@ z-Kg!UaHuMz3VMhFw@6&zfXa<3Igp4Gx3ob@G=xf4kx*|23l+hIH~wp=`VQ}B=DoN3 zzWHXVZEZn7`Kj_~`J4pshzCvaG@R-g1n)(VtOn%IRbWCLJ$*I(IdRk9hoYrTjK476noSMHdHJ7WG)V16Qs&ApQ z1{zR71uCinl{AkgkBF+Mc~wdCj(Aj0A6)ia7N}2^am`y>^YJz6SN$|l2*iRcFgKT) zEE>dg9{aE(^JPnheuZ3P@`|-YiaDIVC46!oS8t+PgN!iP*jjo4U{PoUh{tHdC&FT5 zUVw!rZp}2bC$Wbrz9A@ZB+q*!q2Xel*zB2sVyjaDVp7_D4YS@`Hp-hnxXS-=R2*uV z1tn*hd842#TjkB~b4r|;t1*H4u_`9B;QQI4OQtBbD3c4s%o;3E&vz+d;UqB$%{s;^ zNpU!6fLBS(W0PXO)}ppzb5s;XVH1;MTE-^S*3`MAa&c0-VCCEu1lQJRwxDNAC4E@4 zX->;6XPrxyqZNxQdaee4ymXEEb=@!xN7q?r>saE=HU7gkpY;Yj40j)PzO?&tdarYS zH#Gm7zwNW|r{N9X1OMP*v~NFp=0Wt#*Q0yUv$s6AgH+%c>QCd*sj z8cH1#W%b7mOG%Oy#Gd=en)2pX%4&J*W5q0QeQ795F0#lWSDko}fg8sLi}m!@DSfES zmABRn<;t?56v|t-jb=Z*(PK(F=B@FB9Ycn3J`o)`<{&Sahqq<2?NFn{0#jFWq~sWu z$>b6>Oef}{Vf-5HLdE=)*d@!f2@{CT?*KARp~q@l&Avy8(_G^nYz@^7sQBeT_d(B# z>(}pG-|rc|-!uH(xxJpTTkjqWjoeM$O?`fTed5l-afm1YPa*9pF_#RNc7)|j2r*h$MFN7B;-U8cKia@!C+!Y+UaUf3`jcz zGWvMw`1MKStAj*0vnvFN9@KSwXxt+AL%RNvn=LkW!n&R}sO=ODlbDvSQ%*n1=VlVZ zMLT$Sf`?IEw;jBsIX&wjj+b1A*t-4=&&&3_7iUEop_U&pNW>uA_dDT6iK{+F}@G@aDmVEONCM-P*lYZs`67I93>CNRvV;^A|7{TDsM-9=!(ANrCRE=y!PA~?|SW| z>a~3L+*IB9TAD`l|__4J3lj85ju{h&erDzwYR{j7o;FU0Gf*@r*Z#HM1~pU`4B} z{Myj)SGOuFH*vPIa$l$N@|OlafBsFpXZ*xyA}M@BO($5FKd)Q1%b(H}Tk&`E`_RT< zoq%u~r4eDkSyp5SC$fa^NAyo@7Ws5Qcu}AlB8W}!)mB|ljC#5zF-D?UF45w~wuJf(oFsDIhEAAhjCl$IvoVBxZw6=e z%E|*7E2d$qvo_vCRGaAPH8q1Xrs7PihHWMAwZc4p*HOtFHnLwYR#twm<4ob9kxdF? zZp@mqY;{`l&(m$0mS?36Yf?@%+*o}ePpUbkcA4N5qF2Si|N1fLZle_QDzgV9+A^TS zaLp-{^CzIcgmX-7aOps9Sp0nX9DobOs~3?Mm};7Aybkx+0D znso`@j~|56XQn{UzMH4r`IyT*s>fc(vRhj2&Vqb#TBU8m$MjG(d#HtPV05XPL`& zP^ExXfI~QuKgt>Geq?ZBz~EDCJ9=XpAtN9LQ-N`6XXdfyb-)yBNLPg9@_BG2wfU|8CIpxIE`sr8~zE+}#$S^!fa{;C(h1`P#BF-G1bt=n$uz{0T{)m5;A+iCo29&Jf$(*T7)rcu6i~_jd z*MYTYiqpio3mi1>(KJxDO$rO}%{9-o4P;eVz_vN1dip1}w!S>L)0{xXi3YKf{C`jT=6elcy&$@^N=)D~3Is*GbRwJ9!Memvh!} z5d4pR1STwcZ8y-;!KI7K!=x1k6HrdLDRs^b5g=UxNk)BhnaghEik?#?L!MUMkYh|4 z<}Zxv>^s=yDNkQ#1;J&6MA!{lCb89w%Vkv?TnF-=aXH{!FTLF$h%58-B7! z2o0L-pxZS0AN#3OCu+KZmSXD($kA#z-bifz5(Fv8rnv9aP7dbz6a#%= zCRZDxK5@TL~P3%bF|oYw1|dbYQ&q0-iU%6W2!Ae z8=ntWD)6P40dL(YABw4m>>xv}XKtGd?ty+>qy67rOaUK(pAeCrGS;C@^x?iGa zEE@<{qiA=c{KBqs--Tz9SYYqpyL#_T+`3*3fDGr?+gA-yDU7;z{+sHn{qexj#eqfp zvzMv}#7FJl(x2*q9fA0~RYmZ#bm~#Z!?T|Fc<4Eu@g1Stj{#0b;MWb>b2GY;Z3xM8 sajVQ)Bim9Dii+~g3jCMzb<>OR{#J2 literal 0 HcmV?d00001 diff --git a/src/chatbot/__pycache__/rag_pipeline.cpython-313.pyc b/src/chatbot/__pycache__/rag_pipeline.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c523b7ec208f5e1115fca97686dd55be55867b6 GIT binary patch literal 1898 zcmZ8h%}*Ow5P$1^Huhp;422S>3R?%MI0-0;LJw}$C?O~j5JbI2ipr`_%d#=snzcK- zn~ExpI8~0?ibfSGQdEhwhc>q!+6x>imHq>WRFD-MtekQq}x9hL+4)fmp=FR(< z-^?5(5)#4j=f5u&jzvOVaHT$IZE>`Li&df!Ac~-bCWP@2gvLb>$0<;mW8y@3JOUAa zO()vMqY(Ai;fdI|1d>epNQNkpG*Q|{LcyufuEv#U?hYZ^5Fu)NhK$s%gM<=;4mFYQ zuK_KS+A$*3{5#bYo+%+xJ5eS+(yn$kkwUx>&nDd8(MQT(yE$!^jiPB8(ol0q6V;7B z9NDUFJ~Cx}>xnKemge1}k&|Q`Lvr45z%=d|AP>Gj51Qp923A8$Y}9y{+|9PIher*r%=mOY)$N$61AJeFJDYy!A&{cA&cPlU%HB z{$MsYz2u+GiXNRbZO5ZGOx^LsYt~X$@M4X-xYBqfDvAHSOb_A}$OfesP+uh~0Rd?Z zDFTQ}C{2JWq8e7H8ool6aGHEfA)-W}tRyyD3GVZR2CnFo4gp**?}n6JIX#OY%X90A{-Qp z#YGGYwTd`Y*xNU+h1XqNpMGR6~u7n=+`2R#v7%S*O$fIuHw>@gI} zWG~sh^5Efv-Q>V_a$x<#o#e#|-AncCrUtfC1G}k9+o?;xp4&-{Rw9Ug-}k6*H20S-B5!(Rp2IuuKW;>c=>P&O6HO#q=VKerW__z9h1m2>3!GCP0QG7}M35ntU zLy3la*Yrbz)4G1lr@)PS@YgMBrp1_>LG^ld`Jl^$(hDDL0kv^4b_fTJV<K|-w%UI>}fSNOV=;ubgS(|8S@+rqVOiaBU-OSFYGtfAdpco>~hJn4KG~A7VE%U z+>3J;WJ>hifT8!`GGe?B*mEcjML`f=(U?Gwdx>y%n`92ig#&W_fLuAIlF<8#pg8O$ zG+z09M?CdBnf~gFVk2Z2w@&W)fYXuzu diff --git a/src/chatbot/rag_pipeline.py b/src/chatbot/rag_pipeline.py index 94b2cd8..8990921 100644 --- a/src/chatbot/rag_pipeline.py +++ b/src/chatbot/rag_pipeline.py @@ -22,7 +22,13 @@ class RAGPipeline: self.llm = LLMClient() def run(self, user_query: str) -> str: - docs = self.retriever.retrieve(user_query) - prompt = self.prompt_builder.build(user_query, docs) + docs = self.retriever.search(user_query) + prompt = self.prompt_builder.build_prompt(user_query, docs) answer = self.llm.generate(prompt) return answer +if __name__ == "__main__": + pipeline = RAGPipeline() + query = "Bạn biết Mahola là ai không?" + response = pipeline.run(query) + print("Câu hỏi:", query) + print("Trả lời:", response) \ No newline at end of file diff --git a/src/chatbot/retriever.py b/src/chatbot/retriever.py index c966378..3ac6fb2 100644 --- a/src/chatbot/retriever.py +++ b/src/chatbot/retriever.py @@ -1,3 +1,7 @@ +# -*- coding: utf-8 -*- +""" +Retriever: Tìm kiếm các đoạn văn bản liên quan trong Qdrant. +""" from typing import List, Dict, Any from sentence_transformers import SentenceTransformer diff --git a/src/core/__pycache__/config.cpython-313.pyc b/src/core/__pycache__/config.cpython-313.pyc index 5a907c7260a836a5ebf7287a5278b6dacb431420..9b01531e1b2043cf0ccf2f215e92657bde88e100 100644 GIT binary patch delta 547 zcmaDXK0}iCGcPX}0}v>Fdz^7;BCjOlsfp@qCMPmVOnlGHRKztofl+d@EF%vycMQ+O z1nJ4{jAB4ZWtJko7@o-!81V3ObD)BT%RFWA&{|1B3Q9V zGFYidDp|qQ?pHA1J_zct8Xnh!6k~ zf*?W|M2Lb2aS$OnxtJr7RRzfOn|zbw9-9)7r71UgFQ>Ct5eG;C$h2F`nRz8>M&4pg z%gIkHK{r_hqzB1(5LaQc1DCZ1$c`dK5CO8INEt}n;;;ewrZgwjuE-F`1;tHq)a1on xj!HKKg{MnSlDr_Ty&~wcpg{xQ4SvD?%C5>8au+n+F7vxL@JxQf^_vA`5&&bjbngHF delta 418 zcmbOs`B4~qoxl9>BatsVb+>-+sCHc97 zc}$t0B0Mo%lNA^Z8To)pCucHp^T5^e$AGj?p3Er1C;-yND;z8W*Cq%O;TDA}76S2^ z#e&5rYcMJEO9V^8Rf<52$!Fr$fvXnPX9$)FWGoU3mMszwmMfA7mM@YFRw$ARR+M5W zl1^vRRGR#a>EC8Umbol?Y#@iTg9uKr7eE9rh~NVe0w6*NM2LV0vB`fp5?K|2OrOa~ zocGw|fhC?rEcT-nK&xvVunCKt(p2$0D| l3P9o(hfQvNN@-52U6CG;3kvFDi^*o(j+19{|7HOx0|2&VOg;br diff --git a/src/core/config.py b/src/core/config.py index 6e7dda3..d9105d7 100644 --- a/src/core/config.py +++ b/src/core/config.py @@ -46,6 +46,8 @@ load_dotenv() # ==== Đường dẫn dữ liệu ==== # Thư mục chứa các file .txt nguồn DATA_RAW = Path(os.getenv("DATA_RAW", "./data/data_raw10k")).resolve() +GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "") +GEMINI_MODEL = os.getenv("GEMINI_MODEL", "models/gemini-2.0-flash-001") # ==== Embedding model (SentenceTransformers) ==== EMBED_MODEL = os.getenv("EMBED_MODEL", "Alibaba-NLP/gte-multilingual-base")