feat: system/user prompts in .env

This commit is contained in:
2025-12-30 02:30:08 +03:00
parent 0599dbcdf4
commit 0e4e438cbc
2 changed files with 28 additions and 3 deletions

21
main.py
View File

@@ -36,6 +36,10 @@ load_dotenv()
style = Style.from_dict({"prompt": "bold #6a0dad"})
SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are a precise technical assistant. Cite sources using [filename]. Be concise.")
USER_PROMPT_TEMPLATE = os.getenv("USER_PROMPT_TEMPLATE",
"Previous Conversation:\n{history}\n\nContext from Docs:\n{context}\n\nCurrent Question: {question}")
MD_DIRECTORY = os.getenv("MD_FOLDER")
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
LLM_MODEL = os.getenv("LLM_MODEL")
@@ -58,6 +62,18 @@ MAX_PARALLEL_FILES = 3
def setup_gpu():
if torch.cuda.is_available():
torch.cuda.set_per_process_memory_fraction(0.95)
device_id = torch.cuda.current_device()
device_name = torch.cuda.get_device_name(device_id)
# VRAM info (in GB)
total_vram = torch.cuda.get_device_properties(device_id).total_memory / (1024**3)
allocated = torch.cuda.memory_allocated(device_id) / (1024**3)
reserved = torch.cuda.memory_reserved(device_id) / (1024**3)
free = total_vram - reserved
console.print(f"[green]✓ GPU: {device_name}[/green]")
console.print(f"[blue] VRAM: {total_vram:.1f}GB total | {free:.1f}GB free | {allocated:.1f}GB allocated[/blue]")
else:
console.print("[yellow]⚠ CPU mode[/yellow]")
@@ -260,10 +276,9 @@ class ConversationMemory:
def get_rag_components(retriever):
llm = ChatOllama(model=LLM_MODEL, temperature=0.1)
# FIX 1: Added {history} to the prompt
prompt = ChatPromptTemplate.from_messages([
("system", "You are a precise technical assistant. Cite sources using [filename]. Be concise."),
("human", "Previous Conversation:\n{history}\n\nContext from Docs:\n{context}\n\nCurrent Question: {question}")
("system", SYSTEM_PROMPT),
("human", USER_PROMPT_TEMPLATE)
])
return prompt | llm | StrOutputParser()