feat(rag): skip document chunks marked for exclusion

This commit is contained in:
2025-12-30 07:48:35 +03:00
parent 115cd0fb72
commit 718be230c1

16
main.py
View File

@@ -341,10 +341,24 @@ async def main():
console.print("No documents found to analyze!", style="red") console.print("No documents found to analyze!", style="red")
continue continue
# Exclude chunks where metadata has exclude: true
filtered_pairs = [
(text, meta) for text, meta in zip(all_texts, all_metas)
if meta and not meta.get('exclude', False)
]
excluded_count = len(all_texts) - len(filtered_pairs)
if excluded_count > 0:
console.print(f" Excluded {excluded_count} chunks marked 'exclude: true'", style="dim")
if not filtered_pairs:
console.print("All documents are marked for exclusion. Nothing to analyze.", style="yellow")
continue
full_context = "" full_context = ""
char_count = 0 char_count = 0
paired = sorted(zip(all_texts, all_metas), key=lambda x: x[1]['source']) paired = sorted(filtered_pairs, key=lambda x: x[1]['source'])
for text, meta in paired: for text, meta in paired:
entry = f"\n---\nSource: {Path(meta['source']).name}\n{text}\n" entry = f"\n---\nSource: {Path(meta['source']).name}\n{text}\n"