feat: system/user prompts in .env

2025-12-30 02:30:08 +03:00
parent 0599dbcdf4
commit 0e4e438cbc
2 changed files with 28 additions and 3 deletions
--- a/main.py
+++ b/main.py
@@ -36,6 +36,10 @@ load_dotenv()

 style = Style.from_dict({"prompt": "bold #6a0dad"})

+SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are a precise technical assistant. Cite sources using [filename]. Be concise.")
+USER_PROMPT_TEMPLATE = os.getenv("USER_PROMPT_TEMPLATE", 
+    "Previous Conversation:\n{history}\n\nContext from Docs:\n{context}\n\nCurrent Question: {question}")
+
 MD_DIRECTORY = os.getenv("MD_FOLDER")
 EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
 LLM_MODEL = os.getenv("LLM_MODEL")
@@ -58,6 +62,18 @@ MAX_PARALLEL_FILES = 3
 def setup_gpu():
    if torch.cuda.is_available():
        torch.cuda.set_per_process_memory_fraction(0.95)
+
+        device_id = torch.cuda.current_device()
+        device_name = torch.cuda.get_device_name(device_id)
+        
+        # VRAM info (in GB)
+        total_vram = torch.cuda.get_device_properties(device_id).total_memory / (1024**3)
+        allocated = torch.cuda.memory_allocated(device_id) / (1024**3)
+        reserved = torch.cuda.memory_reserved(device_id) / (1024**3)
+        free = total_vram - reserved
+        
+        console.print(f"[green]✓ GPU: {device_name}[/green]")
+        console.print(f"[blue]  VRAM: {total_vram:.1f}GB total | {free:.1f}GB free | {allocated:.1f}GB allocated[/blue]")
    else:
        console.print("[yellow]⚠ CPU mode[/yellow]")

@@ -260,10 +276,9 @@ class ConversationMemory:
 def get_rag_components(retriever):
    llm = ChatOllama(model=LLM_MODEL, temperature=0.1)
    
-    # FIX 1: Added {history} to the prompt
    prompt = ChatPromptTemplate.from_messages([
-        ("system", "You are a precise technical assistant. Cite sources using [filename]. Be concise."),
-        ("human", "Previous Conversation:\n{history}\n\nContext from Docs:\n{context}\n\nCurrent Question: {question}")
+        ("system", SYSTEM_PROMPT),
+        ("human", USER_PROMPT_TEMPLATE)
    ])
    
    return prompt | llm | StrOutputParser()