Untitled2
In [ ]:
# -*- coding: utf-8 -*-
"""
Created on Sat Feb  7 20:29:23 2026

Transform Bible Json From: 
    
    {
  "Genesis": {
    "1": {
      "1": "In the beginning, God created the heavens and the earth.",
      "2": "The earth was formless and empty. Darkness was on the surface of the deep and God’s Spirit was hovering over the surface of the waters.",
      "3": "God said, “Let there be light,” and there was light.", ...
      }
    }
  }
    
To Restructured Bible Json for Improved Retrieval Augmented Generation:
    
    {
    "id": "GEN-1-1",
    "content": "Genesis 1:1 - In the beginning, God created the heavens and the earth.",
    "metadata": {
      "book": "Genesis",
      "chapter": 1,
      "verse": 1,
      "text_only": "In the beginning, God created the heavens and the earth."
    }

@author: David DiPaola

"""

import json

# 1. Load your downloaded JSON file
# Replace 'WORLD ENGLISH BIBLE.json' with your actual filename
input_filename = 'C:/Your/Path/bible_WEB.json'
output_filename = 'C:/Your/Path/bible_web_embedding.json'

with open(input_filename, 'r', encoding='utf-8') as f:
    bible_data = json.load(f)

flat_bible = []

# 2. Loop through the nested structure
for book_name, chapters in bible_data.items():
    for chapter_num, verses in chapters.items():
        for verse_num, verse_text in verses.items():
            
            # Create a searchable reference string
            reference = f"{book_name} {chapter_num}:{verse_num}"
            
            # Combine reference and text for better embedding quality
            full_content = f"{reference} - {verse_text}"
            
            # 3. Create the flattened object
            entry = {
                "id": f"{book_name[:3].upper()}-{chapter_num}-{verse_num}",
                "content": full_content, # This is what is embedded
                "metadata": {
                    "book": book_name,
                    "chapter": int(chapter_num),
                    "verse": int(verse_num),
                    "text_only": verse_text
                }
            }
            flat_bible.append(entry)

# 4. Save the new flat JSON file
with open(output_filename, 'w', encoding='utf-8') as f:
    json.dump(flat_bible, f, indent=2)

print(f"Success! Processed {len(flat_bible)} verses into {output_filename}")