From 0e4e438cbc8a08881ca35d7cc86914b33557f734 Mon Sep 17 00:00:00 2001 From: y9938 Date: Tue, 30 Dec 2025 02:30:08 +0300 Subject: [PATCH] feat: system/user prompts in .env --- .env.example | 10 ++++++++++ main.py | 21 ++++++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/.env.example b/.env.example index a2caab6..af6f481 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,13 @@ MD_FOLDER=my_docs EMBEDDING_MODEL=mxbai-embed-large:latest LLM_MODEL=qwen2.5:7b-instruct-q8_0 + +SYSTEM_PROMPT="You are a precise technical assistant. Cite sources using [filename]. Be concise." + +USER_PROMPT_TEMPLATE="Previous Conversation: +{history} + +Context from Docs: +{context} + +Current Question: {question}" diff --git a/main.py b/main.py index 19873be..3c169dc 100644 --- a/main.py +++ b/main.py @@ -36,6 +36,10 @@ load_dotenv() style = Style.from_dict({"prompt": "bold #6a0dad"}) +SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are a precise technical assistant. Cite sources using [filename]. Be concise.") +USER_PROMPT_TEMPLATE = os.getenv("USER_PROMPT_TEMPLATE", + "Previous Conversation:\n{history}\n\nContext from Docs:\n{context}\n\nCurrent Question: {question}") + MD_DIRECTORY = os.getenv("MD_FOLDER") EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL") LLM_MODEL = os.getenv("LLM_MODEL") @@ -58,6 +62,18 @@ MAX_PARALLEL_FILES = 3 def setup_gpu(): if torch.cuda.is_available(): torch.cuda.set_per_process_memory_fraction(0.95) + + device_id = torch.cuda.current_device() + device_name = torch.cuda.get_device_name(device_id) + + # VRAM info (in GB) + total_vram = torch.cuda.get_device_properties(device_id).total_memory / (1024**3) + allocated = torch.cuda.memory_allocated(device_id) / (1024**3) + reserved = torch.cuda.memory_reserved(device_id) / (1024**3) + free = total_vram - reserved + + console.print(f"[green]✓ GPU: {device_name}[/green]") + console.print(f"[blue] VRAM: {total_vram:.1f}GB total | {free:.1f}GB free | {allocated:.1f}GB allocated[/blue]") else: console.print("[yellow]⚠ CPU mode[/yellow]") @@ -260,10 +276,9 @@ class ConversationMemory: def get_rag_components(retriever): llm = ChatOllama(model=LLM_MODEL, temperature=0.1) - # FIX 1: Added {history} to the prompt prompt = ChatPromptTemplate.from_messages([ - ("system", "You are a precise technical assistant. Cite sources using [filename]. Be concise."), - ("human", "Previous Conversation:\n{history}\n\nContext from Docs:\n{context}\n\nCurrent Question: {question}") + ("system", SYSTEM_PROMPT), + ("human", USER_PROMPT_TEMPLATE) ]) return prompt | llm | StrOutputParser()