feat: system/user prompts in .env
This commit is contained in:
10
.env.example
10
.env.example
@@ -1,3 +1,13 @@
|
|||||||
MD_FOLDER=my_docs
|
MD_FOLDER=my_docs
|
||||||
EMBEDDING_MODEL=mxbai-embed-large:latest
|
EMBEDDING_MODEL=mxbai-embed-large:latest
|
||||||
LLM_MODEL=qwen2.5:7b-instruct-q8_0
|
LLM_MODEL=qwen2.5:7b-instruct-q8_0
|
||||||
|
|
||||||
|
SYSTEM_PROMPT="You are a precise technical assistant. Cite sources using [filename]. Be concise."
|
||||||
|
|
||||||
|
USER_PROMPT_TEMPLATE="Previous Conversation:
|
||||||
|
{history}
|
||||||
|
|
||||||
|
Context from Docs:
|
||||||
|
{context}
|
||||||
|
|
||||||
|
Current Question: {question}"
|
||||||
|
|||||||
21
main.py
21
main.py
@@ -36,6 +36,10 @@ load_dotenv()
|
|||||||
|
|
||||||
style = Style.from_dict({"prompt": "bold #6a0dad"})
|
style = Style.from_dict({"prompt": "bold #6a0dad"})
|
||||||
|
|
||||||
|
SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are a precise technical assistant. Cite sources using [filename]. Be concise.")
|
||||||
|
USER_PROMPT_TEMPLATE = os.getenv("USER_PROMPT_TEMPLATE",
|
||||||
|
"Previous Conversation:\n{history}\n\nContext from Docs:\n{context}\n\nCurrent Question: {question}")
|
||||||
|
|
||||||
MD_DIRECTORY = os.getenv("MD_FOLDER")
|
MD_DIRECTORY = os.getenv("MD_FOLDER")
|
||||||
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
|
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
|
||||||
LLM_MODEL = os.getenv("LLM_MODEL")
|
LLM_MODEL = os.getenv("LLM_MODEL")
|
||||||
@@ -58,6 +62,18 @@ MAX_PARALLEL_FILES = 3
|
|||||||
def setup_gpu():
|
def setup_gpu():
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
torch.cuda.set_per_process_memory_fraction(0.95)
|
torch.cuda.set_per_process_memory_fraction(0.95)
|
||||||
|
|
||||||
|
device_id = torch.cuda.current_device()
|
||||||
|
device_name = torch.cuda.get_device_name(device_id)
|
||||||
|
|
||||||
|
# VRAM info (in GB)
|
||||||
|
total_vram = torch.cuda.get_device_properties(device_id).total_memory / (1024**3)
|
||||||
|
allocated = torch.cuda.memory_allocated(device_id) / (1024**3)
|
||||||
|
reserved = torch.cuda.memory_reserved(device_id) / (1024**3)
|
||||||
|
free = total_vram - reserved
|
||||||
|
|
||||||
|
console.print(f"[green]✓ GPU: {device_name}[/green]")
|
||||||
|
console.print(f"[blue] VRAM: {total_vram:.1f}GB total | {free:.1f}GB free | {allocated:.1f}GB allocated[/blue]")
|
||||||
else:
|
else:
|
||||||
console.print("[yellow]⚠ CPU mode[/yellow]")
|
console.print("[yellow]⚠ CPU mode[/yellow]")
|
||||||
|
|
||||||
@@ -260,10 +276,9 @@ class ConversationMemory:
|
|||||||
def get_rag_components(retriever):
|
def get_rag_components(retriever):
|
||||||
llm = ChatOllama(model=LLM_MODEL, temperature=0.1)
|
llm = ChatOllama(model=LLM_MODEL, temperature=0.1)
|
||||||
|
|
||||||
# FIX 1: Added {history} to the prompt
|
|
||||||
prompt = ChatPromptTemplate.from_messages([
|
prompt = ChatPromptTemplate.from_messages([
|
||||||
("system", "You are a precise technical assistant. Cite sources using [filename]. Be concise."),
|
("system", SYSTEM_PROMPT),
|
||||||
("human", "Previous Conversation:\n{history}\n\nContext from Docs:\n{context}\n\nCurrent Question: {question}")
|
("human", USER_PROMPT_TEMPLATE)
|
||||||
])
|
])
|
||||||
|
|
||||||
return prompt | llm | StrOutputParser()
|
return prompt | llm | StrOutputParser()
|
||||||
|
|||||||
Reference in New Issue
Block a user