feat(rag): skip document chunks marked for exclusion
This commit is contained in:
16
main.py
16
main.py
@@ -341,10 +341,24 @@ async def main():
|
|||||||
console.print("No documents found to analyze!", style="red")
|
console.print("No documents found to analyze!", style="red")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Exclude chunks where metadata has exclude: true
|
||||||
|
filtered_pairs = [
|
||||||
|
(text, meta) for text, meta in zip(all_texts, all_metas)
|
||||||
|
if meta and not meta.get('exclude', False)
|
||||||
|
]
|
||||||
|
|
||||||
|
excluded_count = len(all_texts) - len(filtered_pairs)
|
||||||
|
if excluded_count > 0:
|
||||||
|
console.print(f"ℹ Excluded {excluded_count} chunks marked 'exclude: true'", style="dim")
|
||||||
|
|
||||||
|
if not filtered_pairs:
|
||||||
|
console.print("All documents are marked for exclusion. Nothing to analyze.", style="yellow")
|
||||||
|
continue
|
||||||
|
|
||||||
full_context = ""
|
full_context = ""
|
||||||
char_count = 0
|
char_count = 0
|
||||||
|
|
||||||
paired = sorted(zip(all_texts, all_metas), key=lambda x: x[1]['source'])
|
paired = sorted(filtered_pairs, key=lambda x: x[1]['source'])
|
||||||
|
|
||||||
for text, meta in paired:
|
for text, meta in paired:
|
||||||
entry = f"\n---\nSource: {Path(meta['source']).name}\n{text}\n"
|
entry = f"\n---\nSource: {Path(meta['source']).name}\n{text}\n"
|
||||||
|
|||||||
Reference in New Issue
Block a user