Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add custom properties support + unit-tests, feature-tests #1273

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions features/doc-customprops.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
Feature: Read and write custom document properties
In order to find documents and make them manageable by digital means
As a developer using python-docx
I need to access and modify the Dublin Core metadata for a document


Scenario: read the custom properties of a document
Given a document having known custom properties
Then I can access the custom properties object
And the expected custom properties are visible
And the custom property values match the known values


Scenario: change the custom properties of a document
Given a document having known custom properties
When I assign new values to the custom properties
Then the custom property values match the new values


Scenario: a default custom properties part is added if doc doesn't have one
Given a document having no custom properties part
When I access the custom properties object
Then a custom properties part with no values is added


Scenario: set custom properties on a document that doesn't have one
Given a document having no custom properties part
When I assign new values to the custom properties
Then the custom property values match the new values


Scenario: iterate the custom properties of a document
Given a document having known custom properties
Then I can iterate the custom properties object


Scenario: delete an existing custom property
Given a document having known custom properties
When I delete an existing custom property
Then the custom property is missing in the remaining list of custom properties
125 changes: 125 additions & 0 deletions features/steps/customprops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# encoding: utf-8

"""
Gherkin step implementations for custom properties-related features.
"""

from __future__ import (
absolute_import, division, print_function, unicode_literals
)

from datetime import datetime, timedelta

from behave import given, then, when

from docx import Document
from docx.opc.customprops import CustomProperties

from helpers import test_docx


# given ===================================================

@given('a document having known custom properties')
def given_a_document_having_known_custom_properties(context):
context.document = Document(test_docx('doc-customprops'))
context.exp_prop_names = [
'AppVersion', 'CustomPropBool', 'CustomPropInt', 'CustomPropString',
'DocSecurity', 'HyperlinksChanged', 'LinksUpToDate', 'ScaleCrop', 'ShareDoc'
]


@given('a document having no custom properties part')
def given_a_document_having_no_custom_properties_part(context):
context.document = Document(test_docx('doc-no-customprops'))
context.exp_prop_names = []


# when ====================================================

@when('I access the custom properties object')
def when_I_access_the_custom_properties_object(context):
context.document.custom_properties


@when("I assign new values to the custom properties")
def when_I_assign_new_values_to_the_custom_properties(context):
context.propvals = (
('CustomPropBool', False),
('CustomPropInt', 1),
('CustomPropString', 'Lorem ipsum'),
)
custom_properties = context.document.custom_properties
for name, value in context.propvals:
custom_properties[name] = value


@when("I delete an existing custom property")
def when_I_delete_an_existing_custom_property(context):
custom_properties = context.document.custom_properties
del custom_properties["CustomPropInt"]
context.prop_name = "CustomPropInt"


# then ====================================================

@then('a custom properties part with no values is added')
def then_a_custom_properties_part_with_no_values_is_added(context):
custom_properties = context.document.custom_properties
assert len(custom_properties) == 0


@then('I can access the custom properties object')
def then_I_can_access_the_custom_properties_object(context):
custom_properties = context.document.custom_properties
assert isinstance(custom_properties, CustomProperties)


@then('the expected custom properties are visible')
def then_the_expected_custom_properties_are_visible(context):
custom_properties = context.document.custom_properties
exp_prop_names = context.exp_prop_names
for name in exp_prop_names:
assert custom_properties.lookup(name) is not None


@then('the custom property values match the known values')
def then_the_custom_property_values_match_the_known_values(context):
known_propvals = (
('CustomPropBool', True),
('CustomPropInt', 13),
('CustomPropString', 'Test String'),
)
custom_properties = context.document.custom_properties
for name, expected_value in known_propvals:
value = custom_properties[name]
assert value == expected_value, (
"got '%s' for custom property '%s'" % (value, name)
)


@then('the custom property values match the new values')
def then_the_custom_property_values_match_the_new_values(context):
custom_properties = context.document.custom_properties
for name, expected_value in context.propvals:
value = custom_properties[name]
assert value == expected_value, (
"got '%s' for custom property '%s'" % (value, name)
)


@then('I can iterate the custom properties object')
def then_I_can_iterate_the_custom_properties_object(context):
custom_properties = context.document.custom_properties
exp_prop_names = context.exp_prop_names
act_prop_names = [name for name in custom_properties]
assert act_prop_names == exp_prop_names


@then('the custom property is missing in the remaining list of custom properties')
def then_the_custom_property_is_missing_in_the_remaining_list_of_custom_properties(context):
custom_properties = context.document.custom_properties
prop_name = context.prop_name
assert prop_name is not None
assert custom_properties.lookup(prop_name) is None
assert prop_name not in [name for name in custom_properties]
Binary file added features/steps/test_files/doc-customprops.docx
Binary file not shown.
Binary file added features/steps/test_files/doc-no-customprops.docx
Binary file not shown.
2 changes: 2 additions & 0 deletions src/docx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from docx.opc.constants import RELATIONSHIP_TYPE as RT
from docx.opc.part import PartFactory
from docx.opc.parts.coreprops import CorePropertiesPart
from docx.opc.parts.customprops import CustomPropertiesPart
from docx.parts.document import DocumentPart
from docx.parts.hdrftr import FooterPart, HeaderPart
from docx.parts.image import ImagePart
Expand All @@ -41,6 +42,7 @@ def part_class_selector(content_type: str, reltype: str) -> Type[Part] | None:

PartFactory.part_class_selector = part_class_selector
PartFactory.part_type_for[CT.OPC_CORE_PROPERTIES] = CorePropertiesPart
PartFactory.part_type_for[CT.OPC_CUSTOM_PROPERTIES] = CustomPropertiesPart
PartFactory.part_type_for[CT.WML_DOCUMENT_MAIN] = DocumentPart
PartFactory.part_type_for[CT.WML_FOOTER] = FooterPart
PartFactory.part_type_for[CT.WML_HEADER] = HeaderPart
Expand Down
8 changes: 8 additions & 0 deletions src/docx/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ def core_properties(self):
"""A |CoreProperties| object providing Dublin Core properties of document."""
return self._part.core_properties

@property
def custom_properties(self):
"""
A |CustomProperties| object providing read/write access to the custom
properties of this document.
"""
return self._part.custom_properties

@property
def inline_shapes(self):
"""The |InlineShapes| collectoin for this document.
Expand Down
1 change: 1 addition & 0 deletions src/docx/opc/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class CONTENT_TYPE:
)
OFC_VML_DRAWING = "application/vnd.openxmlformats-officedocument.vmlDrawing"
OPC_CORE_PROPERTIES = "application/vnd.openxmlformats-package.core-properties+xml"
OPC_CUSTOM_PROPERTIES = "application/vnd.openxmlformats-officedocument.custom-properties+xml"
OPC_DIGITAL_SIGNATURE_CERTIFICATE = (
"application/vnd.openxmlformats-package.digital-signature-certificate"
)
Expand Down
81 changes: 81 additions & 0 deletions src/docx/opc/customprops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# encoding: utf-8

"""
Support reading and writing custom properties to and from a .docx file.
"""

from __future__ import (
absolute_import, division, print_function, unicode_literals
)

import numbers
from lxml import etree

NS_VT = "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"


class CustomProperties(object):
"""
Corresponds to part named ``/docProps/custom.xml``, containing the custom
document properties for this document package.
"""
def __init__(self, element):
self._element = element

def __getitem__(self, item):
prop = self.lookup(item)
if prop is not None:
elm = prop[0]
if elm.tag == f"{{{NS_VT}}}i4":
try:
return int(elm.text)
except ValueError:
return elm.text
elif elm.tag == f"{{{NS_VT}}}bool":
return True if elm.text == '1' else False
return elm.text

def __setitem__(self, key, value):
prop = self.lookup(key)
if prop is None:
elm_type = 'lpwstr'
if isinstance(value, bool):
elm_type = 'bool'
value = str(1 if value else 0)
elif isinstance(value, numbers.Number):
elm_type = 'i4'
value = str(int(value))
prop = etree.SubElement(self._element, "property")
elm = etree.SubElement(prop, f"{{{NS_VT}}}{elm_type}", nsmap={'vt':NS_VT})
elm.text = value
prop.set("name", key)
# magic number "FMTID_UserDefinedProperties"
# MS doc ref: https://learn.microsoft.com/de-de/windows/win32/stg/predefined-property-set-format-identifiers
prop.set("fmtid", "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")
prop.set("pid", str(len(self._element) + 1))
else:
elm = prop[0]
if elm.tag == f"{{{NS_VT}}}i4":
elm.text = str(int(value))
elif elm.tag == f"{{{NS_VT}}}bool":
elm.text = str(1 if value else 0)
else:
elm.text = str(value)

def __delitem__(self, key):
prop = self.lookup(key)
if prop is not None:
self._element.remove(prop)

def __len__(self):
return len(self._element)

def __iter__(self):
for child in self._element:
yield child.get("name")

def lookup(self, item):
for child in self._element:
if child.get("name") == item:
return child
return None
22 changes: 22 additions & 0 deletions src/docx/opc/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from docx.opc.packuri import PACKAGE_URI, PackURI
from docx.opc.part import PartFactory
from docx.opc.parts.coreprops import CorePropertiesPart
from docx.opc.parts.customprops import CustomPropertiesPart
from docx.opc.pkgreader import PackageReader
from docx.opc.pkgwriter import PackageWriter
from docx.opc.rel import Relationships
Expand Down Expand Up @@ -35,6 +36,14 @@ def core_properties(self):
properties for this document."""
return self._core_properties_part.core_properties

@property
def custom_properties(self):
"""
|CustomProperties| object providing read/write access to the
custom properties for this document.
"""
return self._custom_properties_part.custom_properties

def iter_rels(self):
"""Generate exactly one reference to each relationship in the package by
performing a depth-first traversal of the rels graph."""
Expand Down Expand Up @@ -163,6 +172,19 @@ def _core_properties_part(self):
self.relate_to(core_properties_part, RT.CORE_PROPERTIES)
return core_properties_part

@property
def _custom_properties_part(self):
"""
|CustomPropertiesPart| object related to this package. Creates
a default custom properties part if one is not present (not common).
"""
try:
return self.part_related_by(RT.CUSTOM_PROPERTIES)
except KeyError:
custom_properties_part = CustomPropertiesPart.default(self)
self.relate_to(custom_properties_part, RT.CUSTOM_PROPERTIES)
return custom_properties_part


class Unmarshaller:
"""Hosts static methods for unmarshalling a package from a |PackageReader|."""
Expand Down
Loading