From 5c05617e79184ad3406c75cd1038cb7c6ea063de Mon Sep 17 00:00:00 2001 From: letai2001 Date: Mon, 6 Oct 2025 23:37:59 +0700 Subject: [PATCH] s2 --- utils/api_llm.py | 12 ++++++++ utils/llm_client_vllm.py | 66 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 utils/api_llm.py create mode 100644 utils/llm_client_vllm.py diff --git a/utils/api_llm.py b/utils/api_llm.py new file mode 100644 index 0000000..366915b --- /dev/null +++ b/utils/api_llm.py @@ -0,0 +1,12 @@ +import google.generativeai as genai +import time +genai.configure(api_key="AIzaSyDWqNUBKhaZjbFI8CW52_hKr46JtWABkGU") + +model = genai.GenerativeModel("models/gemini-2.0-flash-001") +start_time = time.time() +response = model.generate_content("Xin chào, bạn là ai?") +print(response.text) +print("Thời gian thực thi: %.2f giây" % (time.time() - start_time)) +# for m in genai.list_models(): +# if "generateContent" in m.supported_generation_methods: +# print(m.name) diff --git a/utils/llm_client_vllm.py b/utils/llm_client_vllm.py new file mode 100644 index 0000000..b071e85 --- /dev/null +++ b/utils/llm_client_vllm.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +""" +LLM Client — tương tác với vLLM server +------------------------------------- +- Dùng HTTP endpoint (OpenAI-compatible) của vLLM. +- Nhận prompt đã được build từ PromptBuilder. +- Gọi model sinh câu trả lời. +""" + +import requests +from typing import Optional, Dict, Any +from src.core.config import VLLM_URL, VLLM_MODEL, LLM_TEMPERATURE, LLM_MAX_TOKENS + + +class LLMClient: + """ + Gọi model vLLM theo giao thức OpenAI-compatible. + """ + def __init__( + self, + base_url: str = VLLM_URL, + model: str = VLLM_MODEL, + temperature: float = LLM_TEMPERATURE, + max_tokens: int = LLM_MAX_TOKENS + ): + self.base_url = base_url.rstrip("/") + self.model = model + self.temperature = temperature + self.max_tokens = max_tokens + + def generate(self, prompt: str, system_prompt: Optional[str] = None) -> str: + """ + Gửi prompt tới vLLM API và trả về câu trả lời. + """ + url = f"{self.base_url}/v1/chat/completions" + + messages = [] + if system_prompt: + messages.append({"role": "system", "content": system_prompt}) + messages.append({"role": "user", "content": prompt}) + + payload: Dict[str, Any] = { + "model": self.model, + "messages": messages, + "temperature": self.temperature, + "max_tokens": self.max_tokens, + "stream": False, + } + + try: + response = requests.post(url, json=payload, timeout=60) + response.raise_for_status() + data = response.json() + return data["choices"][0]["message"]["content"].strip() + except Exception as e: + print(f"[LLMClient] ❌ Error calling vLLM: {e}") + return f"[Error] {e}" + + +# ---- Test nhanh ---- +if __name__ == "__main__": + client = LLMClient() + prompt = "Viết đoạn mô tả ngắn về hành tinh Sao Hỏa." + answer = client.generate(prompt) + print("🪐 Kết quả từ LLM:") + print(answer)