feat(rag): skip document chunks marked for exclusion
This commit is contained in:
16
main.py
16
main.py
@@ -341,10 +341,24 @@ async def main():
|
||||
console.print("No documents found to analyze!", style="red")
|
||||
continue
|
||||
|
||||
# Exclude chunks where metadata has exclude: true
|
||||
filtered_pairs = [
|
||||
(text, meta) for text, meta in zip(all_texts, all_metas)
|
||||
if meta and not meta.get('exclude', False)
|
||||
]
|
||||
|
||||
excluded_count = len(all_texts) - len(filtered_pairs)
|
||||
if excluded_count > 0:
|
||||
console.print(f"ℹ Excluded {excluded_count} chunks marked 'exclude: true'", style="dim")
|
||||
|
||||
if not filtered_pairs:
|
||||
console.print("All documents are marked for exclusion. Nothing to analyze.", style="yellow")
|
||||
continue
|
||||
|
||||
full_context = ""
|
||||
char_count = 0
|
||||
|
||||
paired = sorted(zip(all_texts, all_metas), key=lambda x: x[1]['source'])
|
||||
paired = sorted(filtered_pairs, key=lambda x: x[1]['source'])
|
||||
|
||||
for text, meta in paired:
|
||||
entry = f"\n---\nSource: {Path(meta['source']).name}\n{text}\n"
|
||||
|
||||
Reference in New Issue
Block a user