forked from kcarnold/pysword
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpysword.py
111 lines (91 loc) · 4.26 KB
/
pysword.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python
# A native Python implementation of the SWORD Project Bible Reader
# Currently only ztext Bible modules are implemented.
# * ztext format documentation
# I'll use Python's struct module's format strings.
# See http://docs.python.org/lib/module-struct.html
# Take the Old Testament (OT) for example. Three files:
#
# - ot.bzv: Maps verses to character ranges in compressed buffers.
# 10 bytes ('<IIH') for each verse in the Bible:
# - buffer_num (I): which compressed buffer the verse is located in
# - verse_start (I): the location in the uncompressed buffer where the verse begins
# - verse_len (H): length of the verse, in uncompressed characters
# These 10-byte records are densely packed, indexed by VerseKey 'Indicies' (docs later).
# So the record for the verse with index x starts at byte 10*x.
#
# - ot.bzs: Tells where the compressed buffers start and end.
# 12 bytes ('<III') for each compressed buffer:
# - offset (I): where the compressed buffer starts in the file
# - size (I): the length of the compressed data, in bytes
# - uc_size (I): the length of the uncompressed data, in bytes (unused)
# These 12-byte records are densely packed, indexed by buffer_num (see previous).
# So the record for compressed buffer buffer_num starts at byte 12*buffer_num.
#
# - ot.bzz: Contains the compressed text. Read 'size' bytes starting at 'offset'.
#
# NT is analogous.
#
# Example usage:
# python pysword.py esv 1pet 2 9
import os
modules_path = os.environ["HOME"]+"/.sword/modules/texts/ztext"
from books import ref_to_index, testaments, find_book, Book
import struct, zlib
from os.path import join as path_join
class ZModule(object):
def __init__(self, module):
self.module = module
self.files = {
'ot': self.get_files('ot'),
'nt': self.get_files('nt')
}
def get_files(self, testament):
'''Given a testament ('ot' or 'nt'), returns a tuple of files
(verse_to_buf, buf_to_loc, text)
'''
base = path_join(modules_path, self.module)
v2b_name, b2l_name, text_name = [path_join(base, '%s.bz%s' % (testament, code))
for code in ('v', 's', 'z')]
return [open(name, 'rb') for name in (v2b_name, b2l_name, text_name)]
def text_for_index(self, testament, index):
'''Get the text for a given index.'''
verse_to_buf, buf_to_loc, text = self.files[testament]
# Read the verse record.
verse_to_buf.seek(10*index)
buf_num, verse_start, verse_len = struct.unpack('<IIH', verse_to_buf.read(10))
uncompressed_text = self.uncompressed_text(testament, buf_num)
return uncompressed_text[verse_start:verse_start+verse_len]
def uncompressed_text(self, testament, buf_num):
verse_to_buf, buf_to_loc, text = self.files[testament]
# Determine where the compressed data starts and ends.
buf_to_loc.seek(buf_num*12)
offset, size, uc_size = struct.unpack('<III', buf_to_loc.read(12))
# Get the compressed data.
text.seek(offset)
compressed_data = text.read(size)
return zlib.decompress(compressed_data)
def text_for_ref(self, book, chapter, verse):
'''Get the text for a given reference'''
chapter, verse = int(chapter), int(verse)
testament, idx = ref_to_index(book, chapter, verse)
return self.text_for_index(testament, idx)
def all_verses_in_testament(self, testament):
books = testaments[testament]
for book in books:
for verse in self.all_verses_in_book(book):
yield verse
def all_verses_in_book(self, book):
if not isinstance(book, Book): book = find_book(book)
for chapter, verses in enumerate(book.chapter_lengths):
chapter = chapter + 1
for verse in range(verses):
verse = verse+1
yield (book, chapter, verse,
self.text_for_index(book.testament,
book.get_index_for_ref(chapter, verse)))
if __name__=='__main__':
import sys
mod_name, book, chapter, verse = sys.argv[1:]
module = ZModule(mod_name)
print module.text_for_ref(book, chapter, verse)