Untitled3
In [ ]:
# -*- coding: utf-8 -*-
"""
Refactored Code on Wed Mar 25 15:30:10 2026

Bible RAG - This program uses semantic search to provide responses to queries about the bible

@author: David DiPaola

"""

import os
import json
import sqlite3
from datetime import datetime
from typing import List, Dict, Any, Optional

import numpy as np
import torch
import faiss
from sentence_transformers import SentenceTransformer, CrossEncoder

from langchain_community.llms import LlamaCpp
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


class BibleRAG:
    """
    RAG system bridging canonical Bible text and Personal SQLite Notes.
    """
    def __init__( 
        self,
        llm_model_path: str = "E:/Python/AI/gemma-2-9b-it-Q5_K_L.gguf",
        #llm_model_path: str = "E:/Python/AI/Meta-Llama-3.1-8B-Instruct-Q4_K_L.gguf", # If you would like to use the Llama 3.1 8B model
        embed_model_name: str = "all-mpnet-base-v2",
        bible_path: str = 'E:/Python/bible/bible_web_embedding.json',
        bible_embeddings_path: str = "E:/Python/bible/bible_embeddings.npy",
        cross_encoder_name: str = "BAAI/bge-reranker-v2-m3",
        device: Optional[str] = None,
        max_context_tokens: int = 4096, # This was limited due to GPU constraints and optimizing speed
        batch_size: int = 8, # Batch size limited to 8 due to VRAM fragmentation and llama-cpp’s need for contigous blocks of GPU space to perform matrix multiplication
    ):
        self.llm_model_path = llm_model_path
        self.embed_model_name = embed_model_name
        self.bible_path = bible_path
        self.bible_embeddings_path = bible_embeddings_path
        self.cross_encoder_name = cross_encoder_name
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.max_context_tokens = max_context_tokens
        self.batch_size = batch_size
        
        self.llm = None
        self.notes_index = None
        self.notes_content = []

        self._set_model_type()
        self._load_bible_data()
        self._load_embedding_models()
        self._build_bible_faiss_index()

    def _set_model_type(self):
        path_lower = self.llm_model_path.lower()
        if "Meta-Llama-3.1" in path_lower or "llama-3.2" in path_lower:
            self.model_type = "llama"
        elif "gemma-2" in path_lower:
            self.model_type = "gemma"
        else:
            raise ValueError(f"Unknown or unsupported model type for path: {self.llm_model_path}")

    def _load_bible_data(self):
        with open(self.bible_path, 'r', encoding='utf-8') as bible:
            self.bible_data = json.load(bible)
    
    # Load Embed Model on CPU to save GPU space for LLM Model
    # Purposely choose to run Embed Model and Cross Encoder on CPU - lost a few seconds of speed but saved ~35% VRAM.
    def _load_embedding_models(self):
        print("Loading SentenceTransformer on CPU...")
        self.embed_model = SentenceTransformer(self.embed_model_name, device="cpu", trust_remote_code=True)
        
        # Purposely choose to run Cross Encoder on CPU because it saved 35% of VRAM and only caused a few second hit.
        print(f"Loading CrossEncoder on CPU: {self.cross_encoder_name}")
        self.cross_encoder = CrossEncoder(self.cross_encoder_name, device="cpu")

    def _build_bible_faiss_index(self):
        # Extract only the book and verse wording for inference
        bible_text = [f"{item['metadata']['book']} {item['metadata']['text_only']}" for item in self.bible_data]

        if os.path.exists(self.bible_embeddings_path):
            print("Loading cached Bible embeddings...")
            embeddings = np.load(self.bible_embeddings_path)
        else:
            print("No cache found. Embedding Bible now...")
            embeddings = self.embed_model.encode(
                bible_text,
                batch_size=self.batch_size,
                show_progress_bar=True,
                convert_to_numpy=True,
                normalize_embeddings=True
            )
            np.save(self.bible_embeddings_path, embeddings)

        dimension = embeddings.shape[1]
        self.index = faiss.IndexFlatL2(dimension)
        self.index.add(embeddings.astype('float32'))
        print(f"FAISS index built with {self.index.ntotal} vectors.")

    def _embed_query(self, query: str) -> np.ndarray:
        """Shared utility to embed user queries."""
        return self.embed_model.encode(
            [query],
            batch_size=1,
            show_progress_bar=False,
            convert_to_numpy=True,
            normalize_embeddings=True
        ).astype("float32")

    def retrieve_bible(self, query: str, k: int = 10, rerank: bool = True, bi_k: int = 40) -> List[Dict[str, Any]]:
        """
        Two-stage retrieval pipeline (as defined in the MD specification):
          1. Bi-Encoder (FAISS + all-mpnet-base-v2) → fast candidate retrieval
          2. Cross-Encoder reranking → precision boost

        Args:
            query (str): User question / prompt
            k (int): Final number of verses to return (spec recommends 5–8) 
            rerank (bool): Whether to apply cross-encoder reranking
            bi_k (int): How many candidates to fetch from FAISS before reranking
                        (higher = better reranking quality, still very fast)

        Returns:
            list[dict]: Top-k results with content, scores, and full metadata
                        Example item:
                        {
                            "book": "John",
                            "chapter": 3,
                            "verse": 16,
                            "content": "For God so loved the world...",
                            "bi_score": 0.892,
                            "cross_score": 0.945,
                            "index": 1234
                        }
        """
        query_emb = self._embed_query(query)
        search_k = bi_k if rerank else k
        distances, indices = self.index.search(query_emb, search_k)

        candidates = []
        for idx, dist in zip(indices[0], distances[0]):
            doc = self.bible_data[idx].copy()
            doc.update({
                "bi_score": float(1.0 - dist),
                "faiss_distance": float(dist),
                "index": int(idx)
            })
            candidates.append(doc)

        if not rerank or len(candidates) <= k:
            candidates.sort(key=lambda x: x["bi_score"], reverse=True)
            return candidates[:k]

        cross_inputs = [(query, doc["content"]) for doc in candidates]
        cross_scores = self.cross_encoder.predict(cross_inputs)

        for i, score in enumerate(cross_scores):
            candidates[i]["cross_score"] = float(score)

        candidates.sort(key=lambda x: x["cross_score"], reverse=True)
        return candidates[:k]

    def index_notes(self, notes_manager: 'NotesManager'):
        """
        Fetches notes from DB, embeds them, and builds a secondary FAISS index.
        """
        raw_notes = notes_manager.get_all_notes()
        if not raw_notes:
            print("No notes found to index.")
            return

        self.notes_content = [f"Date: {n[1]} | Content: {n[0]} | Verses: {n[3]}" for n in raw_notes]
        note_embeddings = self.embed_model.encode(
            self.notes_content, 
            convert_to_numpy=True, 
            normalize_embeddings=True
        ).astype('float32')

        self.notes_index = faiss.IndexFlatL2(note_embeddings.shape[1])
        self.notes_index.add(note_embeddings)
        print(f"Notes Index built with {len(raw_notes)} personal reflections.")

    def retrieve_notes(self, query: str, k_notes: int = 2, rerank: bool = True, k_notes_total: int = 10, threshold: float = 0.01) -> List[Dict[str, Any]]:
        if not self.notes_index:
            return []

        query_emb = self._embed_query(query)
        note_distances, note_indices = self.notes_index.search(query_emb, k_notes_total)

        personal_notes = []
        for i, idx in enumerate(note_indices[0]):
            if idx != -1:
                personal_notes.append({
                    "notes_content": self.notes_content[idx],
                    "note_score": float(1.0 - note_distances[0][i])
                })

        if not rerank or not personal_notes:
            personal_notes.sort(key=lambda x: x["note_score"], reverse=True)
            return personal_notes[:k_notes]

        notes_cross_inputs = [(query, note["notes_content"]) for note in personal_notes]
        notes_cross_scores = self.cross_encoder.predict(notes_cross_inputs)

        final_notes = []
        for i, score in enumerate(notes_cross_scores):
            if score > threshold:
                personal_notes[i]["note_cross_score"] = float(score)
                final_notes.append(personal_notes[i])

        final_notes.sort(key=lambda x: x["note_cross_score"], reverse=True)
        return final_notes[:k_notes]

    def initialize_llm(self):
        print(f"Loading LLM from {self.llm_model_path}...")
        self.llm = LlamaCpp(
            model_path=self.llm_model_path,
            n_ctx=self.max_context_tokens,
            max_tokens=768,
            n_batch=self.batch_size,
            n_gpu_layers=-1,
            temperature=0.3,
            repeat_penalty=1.1,
            verbose=False,
            f16_kv=False,   # KV cache uses f8 (8-bit) instead of f16 (16-bit) saving 200 - 800 MB
            use_mlock=True, # Locks the model weights in RAM so they are not swapped to disk by the OS, I have 64 GB of ram so no real contraint (running around 24 GB everything loaded)
            top_p=0.95,
            top_k=30,
            model_kwargs={
                "offload_kqv": False,   # Moves the Key-Value cache to GPU true - left false to maximize GPU space (took speed hit)
                "flash_attn": False,    # Reduces VRAM usage (if your GPU supports it / not supported on my GPU) - left false
                "context_shift": False  # Experimental feature for long-context handling, may cause instability and not needed for my context window - left false
                }
        )

    def _create_prompt_template(self) -> ChatPromptTemplate:
        # Dictionary approach allows prompt optimization for each model independently in future
        # Yes, repeating words of core section is verbose but for me this is cleaner and easier to modify independently
        templates = {
            "gemma": (
                "<start_of_turn>user\n"
                "You are a compassionate friend sharing wisdom from ONLY THE CONTEXT and my PERSONAL NOTES.\n\n"
                "Rules:\n"
                "1. Explain themes and connect passages.\n"
                "2. Synthesize BIBLE CONTEXT with USER'S PERSONAL NOTES.\n"
                "3. Cite references like (John 14:27).\n"
                "4. Long-form, reflective essay style.\n"
                "5. Conversational, heart-to-heart, respectful tone.\n"
                "6. No lists. No 'therefore'. No 'Amen'. No repetition.\n"
                "7. Stay within 768 tokens.\n\n"
                "CONTEXT:\n{context}\n\n"
                "PERSONAL NOTES:\n{personal_notes}\n\n"
                "QUESTION:\n{question}<end_of_turn>\n"
                "<start_of_turn>model "
            ),
            "llama": (
                "<|start_header_id|>system<|end_header_id|>\n"
                "You are a compassionate friend sharing wisdom from ONLY THE CONTEXT and my PERSONAL NOTES.\n\n"
                "Rules:\n"
                "1. Explain themes and connect passages.\n"
                "2. Synthesize BIBLE CONTEXT with USER'S PERSONAL NOTES.\n"
                "3. Cite references like (John 14:27).\n"
                "4. Long-form, reflective essay style.\n"
                "5. Conversational, heart-to-heart, respectful tone.\n"
                "6. No lists. No 'therefore'. No 'Amen'. No repetition.\n"
                "7. Stay within 768 tokens.\n\n"
                "<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
                "CONTEXT:\n{context}\n\n"
                "QUESTION:\n{question}\n\n"
                "<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
            )
        }
        print(f"Using prompt template for: {self.model_type}")
        return ChatPromptTemplate.from_template(templates.get(self.model_type))

    def _expand_and_merge_context(self, docs: List[Dict[str, Any]], window: int = 3) -> str:
        # Expand main verse to surrounding +/- 3 verses for contextural understanding / theme
        if not docs:
            return "No relevant verses found."
        
        # 1. Expand range of main verse to surrounding verses
        ranges = []
        for doc in docs:
            idx = doc["index"]
            start = max(0, idx - window)
            end = min(len(self.bible_data) - 1, idx + window)
            ranges.append([start, end, doc])

        # 2. Merge overlapping intervals
        ranges.sort(key=lambda x: x[0])
        merged = []
        for current in ranges:
            if not merged or merged[-1][1] < current[0] - 1:
                merged.append(current)
            else:
                merged[-1][1] = max(merged[-1][1], current[1])
                curr_score = current[2].get("cross_score", current[2].get("bi_score", 0))
                prev_score = merged[-1][2].get("cross_score", merged[-1][2].get("bi_score", 0))
                if curr_score > prev_score:
                    merged[-1][2] = current[2]
        
        # 3. Format context and visual display to user (Hybrid Approach)
        context_parts = []
        for start, end, orig_doc in merged:
            passage = []
            meta_orig = orig_doc.get("metadata", {})
            # Anchor the LLM with the "Main Verse"
            ref = f"{meta_orig.get('book')} {meta_orig.get('chapter')}:{meta_orig.get('verse')}"
            passage.append(f"** Main verse: {ref} **")

            current_book, current_chapter = None, None

            for i in range(start, end + 1):
                v_meta = self.bible_data[i].get("metadata", {})
                book, chap, vnum = v_meta.get("book", "Unknown"), v_meta.get("chapter", "?"), v_meta.get("verse", "?")
                text = v_meta.get("text_only", "").strip()

                if book != current_book or chap != current_chapter:
                    passage.append(f"\n[{book} {chap}]")
                    current_book, current_chapter = book, chap

                passage.append(f"{vnum}: {text}")

            context_parts.append("\n".join(passage))

        return "\n\n----- \n\n".join(context_parts)

    @staticmethod
    def format_notes_for_llm(notes_list: List[Dict[str, Any]]) -> str:
        formatted_notes = []
        for note in notes_list:
            parts = note['notes_content'].split(" | ")
            date = parts[0].replace("Date: ", "").strip()
            content = parts[1].replace("Content: ", "").strip()
            verses = parts[2].replace("Verses: ", "").strip()
            formatted_notes.append(f"On {date}, I reflected: '{content}' (Related to: {verses})")
        return "\n\n".join(formatted_notes)

    def query(self, question: str, b_k: int = 7, n_k: int = 3) -> str:
        if not self.llm:
            self.initialize_llm()

        bible_docs = self.retrieve_bible(query=question, k=b_k)
        notes_docs = self.retrieve_notes(query=question, k_notes=n_k)

        formatted_bible = self._expand_and_merge_context(bible_docs)
        formatted_notes = self.format_notes_for_llm(notes_docs)
        
        # Bible verses and Personal Notes retrieved via Bi / Cross Encoder interesting for personal study 
        print(f"Here is the Bible context:\n{formatted_bible}\n")
        print(f"Here are the Personal Notes:\n{formatted_notes}\n")
        print(f"Bible context extracted.\nPersonal notes extracted.\nReasoning for: '{question}'...\n")

        prompt = self._create_prompt_template()
        chain = prompt | self.llm | StrOutputParser()

        return chain.invoke({
            "context": formatted_bible,
            "personal_notes": formatted_notes,
            "question": question
        })


class NotesManager:
    def __init__(self, db_path: str = "E:/Python/bible/sovereign_notes.db"):
        self.db_path = db_path
        self._init_db()

    def _init_db(self):
        with sqlite3.connect(self.db_path) as conn:
            cursor = conn.cursor()
            cursor.execute('''
                CREATE TABLE IF NOT EXISTS personal_notes (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    date TEXT,
                    content TEXT,
                    tags TEXT,
                    associated_verses TEXT
                )
            ''')
            conn.commit()

    def add_note(self, content: str, tags: str = "", verses: str = ""):
        ''' 
        Saves a new personal note.
        
        Example:
            # Add a sample note about peace to see if the RAG can find it later
            # Date added automatically
            notes_db.add_note(
                content="I realized today that peace isn't the absence of conflict, but the
                presence of God in it. Like Jesus sleeping in the boat during the storm.",
                tags="peace, trials, personal-reflection",
                verses="Mark 4:38"
                )
        
        '''
        date_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        with sqlite3.connect(self.db_path) as conn:
            cursor = conn.cursor()
            cursor.execute(
                "INSERT INTO personal_notes (date, content, tags, associated_verses) VALUES (?, ?, ?, ?)",
                (date_str, content, tags, verses)
            )
            conn.commit()
        print(f"Note saved successfully at {date_str}")

    def get_all_notes(self) -> List[Any]:
        with sqlite3.connect(self.db_path) as conn:
            cursor = conn.cursor()
            cursor.execute("SELECT content, date, tags, associated_verses FROM personal_notes")
            return cursor.fetchall()


if __name__ == "__main__":
    # Example usage
    rag_system = BibleRAG()
    notes_db = NotesManager()
    
    rag_system.index_notes(notes_db)

    query_str = "What does the Bible and my personal notes say about finding peace in trials?"
    response_text = rag_system.query(query_str)
    print("\n=== Response ===\n")
    print(response_text)