feat: system/user prompts in .env
This commit is contained in:
10
.env.example
10
.env.example
@@ -1,3 +1,13 @@
|
||||
MD_FOLDER=my_docs
|
||||
EMBEDDING_MODEL=mxbai-embed-large:latest
|
||||
LLM_MODEL=qwen2.5:7b-instruct-q8_0
|
||||
|
||||
SYSTEM_PROMPT="You are a precise technical assistant. Cite sources using [filename]. Be concise."
|
||||
|
||||
USER_PROMPT_TEMPLATE="Previous Conversation:
|
||||
{history}
|
||||
|
||||
Context from Docs:
|
||||
{context}
|
||||
|
||||
Current Question: {question}"
|
||||
|
||||
21
main.py
21
main.py
@@ -36,6 +36,10 @@ load_dotenv()
|
||||
|
||||
style = Style.from_dict({"prompt": "bold #6a0dad"})
|
||||
|
||||
SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are a precise technical assistant. Cite sources using [filename]. Be concise.")
|
||||
USER_PROMPT_TEMPLATE = os.getenv("USER_PROMPT_TEMPLATE",
|
||||
"Previous Conversation:\n{history}\n\nContext from Docs:\n{context}\n\nCurrent Question: {question}")
|
||||
|
||||
MD_DIRECTORY = os.getenv("MD_FOLDER")
|
||||
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
|
||||
LLM_MODEL = os.getenv("LLM_MODEL")
|
||||
@@ -58,6 +62,18 @@ MAX_PARALLEL_FILES = 3
|
||||
def setup_gpu():
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.set_per_process_memory_fraction(0.95)
|
||||
|
||||
device_id = torch.cuda.current_device()
|
||||
device_name = torch.cuda.get_device_name(device_id)
|
||||
|
||||
# VRAM info (in GB)
|
||||
total_vram = torch.cuda.get_device_properties(device_id).total_memory / (1024**3)
|
||||
allocated = torch.cuda.memory_allocated(device_id) / (1024**3)
|
||||
reserved = torch.cuda.memory_reserved(device_id) / (1024**3)
|
||||
free = total_vram - reserved
|
||||
|
||||
console.print(f"[green]✓ GPU: {device_name}[/green]")
|
||||
console.print(f"[blue] VRAM: {total_vram:.1f}GB total | {free:.1f}GB free | {allocated:.1f}GB allocated[/blue]")
|
||||
else:
|
||||
console.print("[yellow]⚠ CPU mode[/yellow]")
|
||||
|
||||
@@ -260,10 +276,9 @@ class ConversationMemory:
|
||||
def get_rag_components(retriever):
|
||||
llm = ChatOllama(model=LLM_MODEL, temperature=0.1)
|
||||
|
||||
# FIX 1: Added {history} to the prompt
|
||||
prompt = ChatPromptTemplate.from_messages([
|
||||
("system", "You are a precise technical assistant. Cite sources using [filename]. Be concise."),
|
||||
("human", "Previous Conversation:\n{history}\n\nContext from Docs:\n{context}\n\nCurrent Question: {question}")
|
||||
("system", SYSTEM_PROMPT),
|
||||
("human", USER_PROMPT_TEMPLATE)
|
||||
])
|
||||
|
||||
return prompt | llm | StrOutputParser()
|
||||
|
||||
Reference in New Issue
Block a user