Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/add encrypted docsuments support #118

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/html/opendocument_8py_source.html
Original file line number Diff line number Diff line change
Expand Up @@ -1093,7 +1093,7 @@
<div class="line"><a name="l01000"></a><span class="lineno"> 1000</span>&#160;<span class="comment"># an open readable stream</span></div>
<div class="line"><a name="l01001"></a><span class="lineno"> 1001</span>&#160;<span class="comment"># @return a reference to the structure (an OpenDocument instance)</span></div>
<div class="line"><a name="l01002"></a><span class="lineno"> 1002</span>&#160;<span class="comment"># </span></div>
<div class="line"><a name="l01003"></a><span class="lineno"><a class="line" href="namespaceodf_1_1opendocument.html#a5f91a599e953a7b3d3cd07ad3a696467"> 1003</a></span>&#160;<span class="keyword">def </span><a class="code" href="namespaceodf_1_1opendocument.html#a5f91a599e953a7b3d3cd07ad3a696467">load</a>(odffile):</div>
<div class="line"><a name="l01003"></a><span class="lineno"><a class="line" href="namespaceodf_1_1opendocument.html#a5f91a599e953a7b3d3cd07ad3a696467"> 1003</a></span>&#160;<span class="keyword">def </span><a class="code" href="namespaceodf_1_1opendocument.html#a5f91a599e953a7b3d3cd07ad3a696467">load</a>(odffile, password=None):</div>
<div class="line"><a name="l01004"></a><span class="lineno"> 1004</span>&#160; assert(type(odffile)==type(<span class="stringliteral">u&quot;&quot;</span>) <span class="keywordflow">or</span> <span class="stringliteral">&#39;rb&#39;</span> <span class="keywordflow">in</span> repr(odffile) \</div>
<div class="line"><a name="l01005"></a><span class="lineno"> 1005</span>&#160; <span class="keywordflow">or</span> <span class="stringliteral">&#39;BufferedReader&#39;</span> <span class="keywordflow">in</span> repr(odffile) <span class="keywordflow">or</span> <span class="stringliteral">&#39;BytesIO&#39;</span> <span class="keywordflow">in</span> repr(odffile))</div>
<div class="line"><a name="l01006"></a><span class="lineno"> 1006</span>&#160;</div>
Expand All @@ -1104,7 +1104,7 @@
<div class="line"><a name="l01011"></a><span class="lineno"> 1011</span>&#160; <span class="comment"># Look in the manifest file to see if which of the four files there are</span></div>
<div class="line"><a name="l01012"></a><span class="lineno"> 1012</span>&#160; manifestpart = z.read(<span class="stringliteral">&#39;META-INF/manifest.xml&#39;</span>)</div>
<div class="line"><a name="l01013"></a><span class="lineno"> 1013</span>&#160; manifest = <a class="code" href="namespaceodf_1_1odfmanifest.html#aa30e4aac456f93d3a2ec1d6eafd77004">manifestlist</a>(manifestpart)</div>
<div class="line"><a name="l01014"></a><span class="lineno"> 1014</span>&#160; __loadxmlparts(z, manifest, doc, <span class="stringliteral">u&#39;&#39;</span>)</div>
<div class="line"><a name="l01014"></a><span class="lineno"> 1014</span>&#160; __loadxmlparts(z, manifest, doc, <span class="stringliteral">u&#39;&#39;</span>, password)</div>
<div class="line"><a name="l01015"></a><span class="lineno"> 1015</span>&#160; <span class="keywordflow">for</span> mentry,mvalue <span class="keywordflow">in</span> manifest.items():</div>
<div class="line"><a name="l01016"></a><span class="lineno"> 1016</span>&#160; <span class="keywordflow">if</span> mentry[:9] == <span class="stringliteral">u&quot;Pictures/&quot;</span> <span class="keywordflow">and</span> len(mentry) &gt; 9:</div>
<div class="line"><a name="l01017"></a><span class="lineno"> 1017</span>&#160; doc.addPicture(mvalue[<span class="stringliteral">&#39;full-path&#39;</span>], mvalue[<span class="stringliteral">&#39;media-type&#39;</span>], z.read(mentry))</div>
Expand Down
50 changes: 46 additions & 4 deletions odf/odfmanifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,16 @@ def __init__(self):
self.manifest = {}

# Tags
# FIXME: Also handle encryption data
self.elements = {
(MANIFESTNS, 'file-entry'): (self.s_file_entry, self.donothing),
(MANIFESTNS, 'file-entry'): (self.s_file_entry, self.donothing),
(MANIFESTNS, 'encryption-data'): (self.e_file_entry, self.e_file_entry_close),
(MANIFESTNS, 'algorithm'): (self.e_alg_file_entry, self.donothing),
(MANIFESTNS, 'key-derivation'): (self.e_key_der_file_entry, self.donothing),
(MANIFESTNS, 'start-key-generation'): (self.e_key_gen_file_entry, self.donothing)
}

self._encr_el_key = None

def handle_starttag(self, tag, method, attrs):
method(tag,attrs)

Expand Down Expand Up @@ -81,9 +86,46 @@ def donothing(self, tag, attrs=None):
pass

def s_file_entry(self, tag, attrs):
m = attrs.get((MANIFESTNS, 'media-type'),"application/octet-stream")
m = attrs.get((MANIFESTNS, 'media-type'),"")
p = attrs.get((MANIFESTNS, 'full-path'))
self.manifest[p] = { 'media-type':m, 'full-path':p }

self.manifest[p] = {'media-type': m, 'full-path': p}

s = attrs.get((MANIFESTNS, 'size'), None)
# only encrypted entries have 'size' attr
# so there we assume that the next element will be encrypted-data
if s:
self.manifest[p]['size'] = s
self._encr_el_key = p
self.manifest[p]['encrypted-data'] = {}

def e_file_entry(self, tag, attrs):
self.manifest[self._encr_el_key]['encrypted-data']['checksum-type'] = \
attrs.get((MANIFESTNS, 'checksum-type'), "SHA1/1K")
self.manifest[self._encr_el_key]['encrypted-data']['checksum'] = attrs.get((MANIFESTNS, 'checksum'), "")

def e_file_entry_close(self, tag):
self._encr_el_key = None

def e_alg_file_entry(self, tag, attrs):
self.manifest[self._encr_el_key]['encrypted-data']['algorithm'] = {
'algorithm-name': attrs.get((MANIFESTNS, 'algorithm-name'), "Blowfish CFB"),
'initialisation-vector': attrs.get((MANIFESTNS, 'initialisation-vector'), "")
}

def e_key_der_file_entry(self, tag, attrs):
self.manifest[self._encr_el_key]['encrypted-data']['key-derivation'] = {
'key-derivation-name': attrs.get((MANIFESTNS, 'key-derivation-name'), "PBKDF2"),
'key-size': attrs.get((MANIFESTNS, 'key-size'), "16"),
'iteration-count': attrs.get((MANIFESTNS, 'iteration-count'), "1024"),
'salt': attrs.get((MANIFESTNS, 'salt'), "")
}

def e_key_gen_file_entry(self, tag, attrs):
self.manifest[self._encr_el_key]['encrypted-data']['start-key-generation'] = {
'start-key-generation-name': attrs.get((MANIFESTNS, 'start-key-generation-name'), "SHA1"),
'key-size': attrs.get((MANIFESTNS, 'key-size'), "20")
}


#-----------------------------------------------------------------------------
Expand Down
183 changes: 177 additions & 6 deletions odf/opendocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,15 @@

__doc__="""Use OpenDocument to generate your documents."""

import base64
import hashlib
import zipfile, time, uuid, sys, mimetypes, copy, os.path

# to allow Python3 to access modules in the same path
import zlib

from Crypto.Cipher import Blowfish, AES, DES3

sys.path.append(os.path.dirname(__file__))

# using BytesIO provides a cleaner interface than StringIO
Expand Down Expand Up @@ -798,6 +804,15 @@ def getElementsByType(self, elt):

return result


class OpenDocumentException(Exception):
pass


class OpenDocumentEncryptionException(OpenDocumentException):
pass


# Convenience functions
def OpenDocumentChart():
"""
Expand Down Expand Up @@ -869,7 +884,8 @@ def OpenDocumentTextMaster():
doc.body.addElement(doc.text)
return doc

def __loadxmlparts(z, manifest, doc, objectpath):

def __loadxmlparts(z, manifest, doc, objectpath, password=None):
"""
Parses a document from its zipfile
@param z an instance of zipfile.ZipFile
Expand All @@ -895,7 +911,16 @@ def __loadxmlparts(z, manifest, doc, objectpath):
from xml.sax._exceptions import SAXParseException
##########################################################
try:
xmlpart = z.read(xmlfile).decode("utf-8")
xmlpart = z.read(xmlfile)
if 'encrypted-data' in manifest[xmlfile].keys():
if not password:
raise OpenDocumentEncryptionException('Document is encrypted and password is not provided')
try:
xmlpart = __decrypt(xmlpart, manifest[xmlfile]['encrypted-data'], password, verify_checksum=False)
except OpenDocumentEncryptionException as err:
raise OpenDocumentEncryptionException('{}: {}'.format(xmlfile, err.message))
xmlpart = xmlpart.decode("utf-8")

doc._parsing = xmlfile

parser = make_parser()
Expand Down Expand Up @@ -972,7 +997,7 @@ def __detectmimetype(zipfd, odffile):
# Fall-through to last mechanism
return u'application/vnd.oasis.opendocument.text'

def load(odffile):
def load(odffile, password=None):
"""
Load an ODF file into memory
@param odffile unicode string: name of a file, or as an alternative,
Expand All @@ -986,10 +1011,18 @@ def load(odffile):
# Look in the manifest file to see if which of the four files there are
manifestpart = z.read('META-INF/manifest.xml')
manifest = manifestlist(manifestpart)
__loadxmlparts(z, manifest, doc, u'')
for mentry,mvalue in manifest.items():
__loadxmlparts(z, manifest, doc, u'', password)
for mentry, mvalue in manifest.items():
if mentry[:9] == u"Pictures/" and len(mentry) > 9:
doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry))
raw_pic = z.read(mentry)
if 'encrypted-data' in mvalue.keys():
if not password:
raise OpenDocumentEncryptionException('Document is encrypted and password is not provided')
try:
raw_pic = __decrypt(raw_pic, mvalue['encrypted-data'], password, verify_checksum=True)
except OpenDocumentEncryptionException as err:
raise OpenDocumentEncryptionException('{}: {}'.format("filename", err.message))
doc.addPicture(mvalue['full-path'], mvalue['media-type'], raw_pic)
elif mentry == u"Thumbnails/thumbnail.png":
doc.addThumbnail(z.read(mentry))
elif mentry in (u'settings.xml', u'meta.xml', u'content.xml', u'styles.xml'):
Expand Down Expand Up @@ -1027,4 +1060,142 @@ def load(odffile):

return doc


# --------------------------------------
# Encryption functions
# --------------------------------------

def __normalize_name(algorithm_name):
""" According to the OpenDocument docs (https://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part3.html),
the algorithms presented in the "encrypted-data" section can be specified in different ways:

- As plain algorithm name: SHA1, SHA1/1K, Blowfish CFB
- As IRI: http://www.w3.org/2000/09/xmldsig#sha256
- With MANIFEST prefix: urn:oasis:names:tc:opendocument:xmlns:manifest:1.0#sha256-1k

This function tries to normalize algorithm name.
"""
assert isinstance(algorithm_name, type(u''))
if algorithm_name.startswith('http') or algorithm_name.startswith(MANIFESTNS):
algorithm_name = algorithm_name.split('#')[1]
return algorithm_name.lower()


def __inflate(data):
decompress = zlib.decompressobj(-zlib.MAX_WBITS)
inflated = decompress.decompress(data)
inflated += decompress.flush()
return inflated


def __deflate(data, compress_level=9):
compress = zlib.compressobj(compress_level, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0)
deflated = compress.compress(data)
deflated += compress.flush()
return deflated


def __make_key(password, algorithm, salt, deriv_iter_count, deriv_key_size):
""" Makes encryption key from password with addition derivation.
:param password: document's password.
:param algorithm: manifest:start-key-generation-name, the algorithm used to generate a start key from the
user password. Can be SHA1, SHA256.
:param salt: manifest:salt, base64-encoded salt
:param deriv_iter_count: manifest:iteration-count, the number of iterations used by the key derivation algorithm
to derive a key.
:param deriv_key_size: manifest:key-size, the length in octets of a key delivered by a key-developing algorithm.
"""
assert algorithm in ('sha1', 'sha256'), 'Only sha251 and sha1 are allowed'
sha_key = hashlib.new(algorithm, password.encode()).digest()
return hashlib.pbkdf2_hmac('sha1', sha_key, base64.b64decode(salt), int(deriv_iter_count), int(deriv_key_size))


def __decrypt_data(algorithm, iv, derived_key, encrypted_data):
""" Decrypt data.
:param algorithm: manifest:algorithm-name, the algorithm and mode used to encrypt a file entry. Can be:
1. An IRI listed in §5.2 of xmlenc-core (Block Encryption Algorithms): tripledes-cbc, aes128-cbc, aes192-cbc,
aes256-cbc
2. Blowfish CFB: The Blowfish algorithm in 8-bit CFB mode.
3. An IRI listed in §5.1 of xmlenc-core (Algorithm Identifiers and Implementation Requirements): NOT IMPLEMENTED
:param iv: manifest:initialisation-vector, base64-encrypted initialization vector used by the encryption algorithm.
:param derived_key: the key derived from a password.
:param encrypted_data: the encrypted data
"""
algorithm = __normalize_name(algorithm)
assert algorithm in ('blowfish cfb', 'blowfish', 'aes128-cbc', 'aes192-cbc', 'aes256-cbc', 'tripledes-cbc'), \
'Unknown algorithm: {}'.format(algorithm)

iv = base64.b64decode(iv)

if 'blowfish' in algorithm:
return Blowfish.new(key=derived_key, mode=Blowfish.MODE_CFB, IV=iv, segment_size=64).decrypt(encrypted_data)
elif 'aes' in algorithm:
return AES.new(key=derived_key, mode=AES.MODE_CBC, IV=iv).decrypt(encrypted_data)
elif 'tripledes' in algorithm:
return DES3.new(key=derived_key, mode=DES3.MODE_CBC, IV=iv).decrypt(encrypted_data)


def __decrypt(raw_data, manifest_data, password, verify_checksum=True):
# Get the encryption key from the password.
start_key_generation_alg = __normalize_name(manifest_data['start-key-generation']['start-key-generation-name'])
derived_key = __make_key(password,
algorithm=start_key_generation_alg,
salt=manifest_data['key-derivation']['salt'],
deriv_iter_count=manifest_data['key-derivation']['iteration-count'],
deriv_key_size=manifest_data['key-derivation']['key-size'])

# Add padding if needed
raw_data = __append_padding(raw_data)

# Decrypt data.
decrypted_data = __decrypt_data(algorithm=manifest_data['algorithm']['algorithm-name'],
iv=manifest_data['algorithm']['initialisation-vector'],
derived_key=derived_key,
encrypted_data=raw_data)

# Verify the result with checksum
if verify_checksum and not verify(manifest_data['checksum'], manifest_data['checksum-type'], decrypted_data):
raise OpenDocumentEncryptionException("Checksum verification failed. Wrong password or corrupted document")

# Inflate decrypted data.
try:
return __inflate(decrypted_data)
except zlib.error:
raise OpenDocumentEncryptionException("Wrong password or corrupted document")


def __append_padding(ciphertext, segment_size=64., block_size=8.):
assert isinstance(segment_size, float)
assert isinstance(block_size, float)
while not (len(ciphertext) % segment_size / block_size).is_integer():
ciphertext += b'\x00'
return ciphertext


def verify(checksum, checksum_type, decrypted_data):
"""
Verify password.

:param checksum: base64 encoded checksum from manifest
:param checksum_type: name of a digest algorithm that can be used to check password correctness. SHA1 or SHA256.
Can be:
1. SHA1/1K: SHA1 algorithm applied to first 1024 bytes of the compressed unencrypted file.
2. urn:oasis:names:tc:opendocument:xmlns:manifest:1.0#sha1-1k: The same as SHA1/1K.
3. SHA1: The same as http://www.w3.org/2000/09/xmldsig#sha1.
4. urn:oasis:names:tc:opendocument:xmlns:manifest:1.0#sha256-1k: SHA256 algorithm applied to first 1024 bytes
of the compressed unencrypted file.
:param decrypted_data: ...
"""

checksum_type = __normalize_name(checksum_type)
assert checksum_type in ('sha1/1k', 'sha1-1k', 'sha1', 'sha256-1k'), \
'Wrong checksum algorithm: {}'.format(checksum_type)

checksum = base64.b64decode(checksum)

if 'sha256' in checksum_type:
return checksum == hashlib.sha256(decrypted_data[:1024]).digest()
elif 'sha1' in checksum_type:
return checksum == hashlib.sha1(decrypted_data[:1024]).digest()[:1024]

# vim: set expandtab sw=4 :
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,5 +115,5 @@
'odfuserfield/odfuserfield',
'xml2odf/xml2odf'],
data_files=datafiles,
install_requires=['defusedxml', ]
install_requires=['defusedxml', 'pycrypto']
)
Binary file added tests/examples/aes_sample.odt
Binary file not shown.
Binary file added tests/examples/blowfish_sample.odt
Binary file not shown.
Loading