From 718be230c1a9ea97b087750e378fc36147e19af3 Mon Sep 17 00:00:00 2001 From: y9938 Date: Tue, 30 Dec 2025 07:48:35 +0300 Subject: [PATCH] feat(rag): skip document chunks marked for exclusion --- main.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index 7d38456..7daf7ca 100644 --- a/main.py +++ b/main.py @@ -341,10 +341,24 @@ async def main(): console.print("No documents found to analyze!", style="red") continue + # Exclude chunks where metadata has exclude: true + filtered_pairs = [ + (text, meta) for text, meta in zip(all_texts, all_metas) + if meta and not meta.get('exclude', False) + ] + + excluded_count = len(all_texts) - len(filtered_pairs) + if excluded_count > 0: + console.print(f"ℹ Excluded {excluded_count} chunks marked 'exclude: true'", style="dim") + + if not filtered_pairs: + console.print("All documents are marked for exclusion. Nothing to analyze.", style="yellow") + continue + full_context = "" char_count = 0 - paired = sorted(zip(all_texts, all_metas), key=lambda x: x[1]['source']) + paired = sorted(filtered_pairs, key=lambda x: x[1]['source']) for text, meta in paired: entry = f"\n---\nSource: {Path(meta['source']).name}\n{text}\n"