In [ ]:
# -*- coding: utf-8 -*-
"""
Created on Sat Feb 7 20:29:23 2026
Transform Bible Json From:
{
"Genesis": {
"1": {
"1": "In the beginning, God created the heavens and the earth.",
"2": "The earth was formless and empty. Darkness was on the surface of the deep and God’s Spirit was hovering over the surface of the waters.",
"3": "God said, “Let there be light,” and there was light.", ...
}
}
}
To Restructured Bible Json for Improved Retrieval Augmented Generation:
{
"id": "GEN-1-1",
"content": "Genesis 1:1 - In the beginning, God created the heavens and the earth.",
"metadata": {
"book": "Genesis",
"chapter": 1,
"verse": 1,
"text_only": "In the beginning, God created the heavens and the earth."
}
@author: David DiPaola
"""
import json
# 1. Load your downloaded JSON file
# Replace 'WORLD ENGLISH BIBLE.json' with your actual filename
input_filename = 'C:/Your/Path/bible_WEB.json'
output_filename = 'C:/Your/Path/bible_web_embedding.json'
with open(input_filename, 'r', encoding='utf-8') as f:
bible_data = json.load(f)
flat_bible = []
# 2. Loop through the nested structure
for book_name, chapters in bible_data.items():
for chapter_num, verses in chapters.items():
for verse_num, verse_text in verses.items():
# Create a searchable reference string
reference = f"{book_name} {chapter_num}:{verse_num}"
# Combine reference and text for better embedding quality
full_content = f"{reference} - {verse_text}"
# 3. Create the flattened object
entry = {
"id": f"{book_name[:3].upper()}-{chapter_num}-{verse_num}",
"content": full_content, # This is what is embedded
"metadata": {
"book": book_name,
"chapter": int(chapter_num),
"verse": int(verse_num),
"text_only": verse_text
}
}
flat_bible.append(entry)
# 4. Save the new flat JSON file
with open(output_filename, 'w', encoding='utf-8') as f:
json.dump(flat_bible, f, indent=2)
print(f"Success! Processed {len(flat_bible)} verses into {output_filename}")