Skip to content

Commit

Permalink
Merge pull request #848 from ZeitOnline/WCM-349_search_multiple
Browse files Browse the repository at this point in the history
WCM-22: Implement automatic content query with raw SQL query
  • Loading branch information
louika authored Sep 6, 2024
2 parents b2554da + f52d10c commit 760ccf8
Show file tree
Hide file tree
Showing 21 changed files with 691 additions and 370 deletions.
1 change: 1 addition & 0 deletions core/docs/changelog/WCM-349.changelog
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
WCM-349: Search sql connector by sql expression
28 changes: 26 additions & 2 deletions core/src/zeit/connector/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,15 +197,39 @@ def locked(id):
returns None, None, None if the resource is not locked or is non-existant
"""

def search(attributes, search_expression):
"""Search for `search_expression`
def search(attributes, query):
"""Search for `query`
returns an iterator of tuples containing the unique id and the values
of the requested `attributes`:
(unique_id, attributes[0], attributes[1], ...)
"""

def search_sql(query):
"""Search for `query`
query:
SQL Select object obtained from IConnector.query()
returns a list of IResource objects
"""

def search_sql_count(query):
"""Count search results for `query`
query:
SQL Select object obtained from IConnector.query()
returns integer
"""

def query():
"""Not the most desirable API design, but functional for now
returns query object for properties table for use with search_sql[_count]
"""


class ICachingConnector(IConnector):
"""A connector that caches."""
Expand Down
19 changes: 19 additions & 0 deletions core/src/zeit/connector/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import urllib.parse
import uuid

from sqlalchemy.dialects import postgresql
import pytz
import sqlalchemy
import zope.event

from zeit.cms.content.sources import FEATURE_TOGGLES
Expand Down Expand Up @@ -46,6 +48,7 @@ class Connector(zeit.connector.filesystem.Connector):
_ignore_uuid_checks = False
_set_lastmodified_property = True
resource_class = zeit.connector.resource.WriteableCachedResource
Content = Content # only for search_sql, only id column is required

property_cache = zeit.connector.cache.AlwaysEmptyDict()
body_cache = zeit.connector.cache.AlwaysEmptyDict()
Expand Down Expand Up @@ -77,6 +80,7 @@ def _reset(self):
self._deleted = set()
self._properties = {}
self.search_result = self.search_result_default[:]
self.search_args = []

def listCollection(self, id):
"""List the filenames of a collection identified by path."""
Expand Down Expand Up @@ -315,6 +319,21 @@ def search(self, attributes, expression):

return ((unique_id,) + metadata for unique_id in unique_ids)

def _compile_sql(self, stmt):
return str(
stmt.compile(dialect=postgresql.dialect(), compile_kwargs={'literal_binds': True})
)

def search_sql(self, expression):
self.search_args.append(self._compile_sql(expression))
return [self[uniqueid] for uniqueid in self.search_result]

def query(self):
return sqlalchemy.select(self.Content)

def search_sql_count(self, query):
return len(self.search_result)

# internal helpers

def _get_cannonical_id(self, id):
Expand Down
37 changes: 33 additions & 4 deletions core/src/zeit/connector/postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,7 @@ def _reconnect(self, connection):
else:
break

def __getitem__(self, uniqueid):
uniqueid = self._normalize(uniqueid)
properties = self._get_properties(uniqueid) # may raise KeyError
def resource(self, uniqueid, properties):
return self.resource_class(
uniqueid,
uniqueid.split('/')[-1],
Expand All @@ -143,6 +141,11 @@ def __getitem__(self, uniqueid):
is_collection=properties[('is_collection', INTERNAL_PROPERTY)],
)

def __getitem__(self, uniqueid):
uniqueid = self._normalize(uniqueid)
properties = self._get_properties(uniqueid) # may raise KeyError
return self.resource(uniqueid, properties)

property_cache = TransactionBoundCache('_v_property_cache', zeit.connector.cache.PropertyCache)

def _get_properties(self, uniqueid):
Expand Down Expand Up @@ -571,7 +574,7 @@ def _get_cached_lock(self, uniqueid):
until=properties.get(('lock_until', INTERNAL_PROPERTY)),
)

def search(self, attrlist, expr):
def _search_dav(self, attrlist, expr):
if (
len(attrlist) == 1
and attrlist[0].name == 'uuid'
Expand All @@ -593,6 +596,32 @@ def search(self, attrlist, expr):
data.extend([properties[(a.name, a.namespace)] for a in attrlist])
yield tuple(data)

search = _search_dav # BBB

def search_sql(self, query):
result = []
for content in self.session.execute(query).scalars():
uniqueid = content.uniqueid
properties = content.to_webdav()
resource = self.resource(uniqueid, properties)
self.property_cache[uniqueid] = properties

if content.is_collection or not content.body:
body = b''
else:
body = content.body.encode('utf-8')
self.body_cache.update(uniqueid, BytesIO(body))

result.append(resource)

return result

def search_sql_count(self, query):
return self.session.execute(query.with_only_columns(sqlalchemy.func.count())).scalar()

def query(self):
return select(self.Content)

def _build_filter(self, expr):
op = expr.operator
if op == 'and':
Expand Down
4 changes: 2 additions & 2 deletions core/src/zeit/connector/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,13 +253,13 @@ class TestCase(zeit.cms.testing.FunctionalTestCase):
def connector(self):
return zope.component.getUtility(zeit.connector.interfaces.IConnector)

def get_resource(self, name, body=b'', properties=None, is_collection=False):
def get_resource(self, name, body=b'', properties=None, is_collection=False, type='testing'):
if not isinstance(body, bytes):
body = body.encode('utf-8')
return zeit.connector.resource.Resource(
f'{ROOT}/{name}',
name,
'testing',
type,
BytesIO(body),
properties=properties,
is_collection=is_collection,
Expand Down
8 changes: 4 additions & 4 deletions core/src/zeit/connector/tests/test_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,10 +848,10 @@ def has_body_cache(self, uniqueid):

@contextmanager
def disable_storage(self):
original = self.connector.session
self.connector.session = mock.Mock(side_effect=RuntimeError('disabled'))
yield
self.connector.session = original
with mock.patch.object(
self.connector.session, 'execute', side_effect=RuntimeError('disabled')
):
yield


class ContractSQL(
Expand Down
27 changes: 27 additions & 0 deletions core/src/zeit/connector/tests/test_postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,33 @@ def test_search_for_uuid_uses_indexed_column(self):
self.assertEqual(res.id, unique_id)
self.assertEqual('{urn:uuid:%s}' % props.id, uuid)

def test_search_by_sql_applies_query(self):
res = self.add_resource('one', type='article')
self.add_resource('two', type='centerpage')
query = self.connector.query()
query = query.filter_by(type='article')
result = self.connector.search_sql(query)
self.assertEqual(len(result), 1)
self.assertEqual(result[0].id, res.id)

def test_search_by_sql_uses_cache(self):
self.add_resource('one', body=b'mybody', type='article')
query = self.connector.query()
query = query.filter_by(type='article')
result = self.connector.search_sql(query)
with mock.patch.object(
self.connector.session, 'execute', side_effect=RuntimeError('disabled')
):
self.assertEqual(result[0].data.read(), b'mybody')

def test_search_sql_count_returns_result_count(self):
self.add_resource('one', type='article')
self.add_resource('two', type='centerpage')
self.add_resource('three', type='article')
query = self.connector.query()
query = query.filter_by(type='article')
self.assertEqual(self.connector.search_sql_count(query), 2)

def test_search_returns_uuid(self):
res = self.get_resource(
'foo',
Expand Down
6 changes: 6 additions & 0 deletions core/src/zeit/content/cp/browser/area.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ class EditAutomatic(zeit.content.cp.browser.view.GroupedSubpageForm):
'reach_section',
'reach_access',
'reach_age',
'sql_query',
'sql_order',
)

field_groups = (
Expand All @@ -220,6 +222,10 @@ class EditAutomatic(zeit.content.cp.browser.view.GroupedSubpageForm):
_('automatic-area-type-reach'),
('reach_service', 'reach_section', 'reach_access', 'reach_age'),
),
gocept.form.grouped.Fields(
_('automatic-area-type-sql-query'),
('sql_query', 'sql_order'),
),
)

def setUpWidgets(self, *args, **kw):
Expand Down
3 changes: 2 additions & 1 deletion core/src/zeit/content/cp/browser/resources/area.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ var FIELDS = {
'reach': 'reach_service',
'related-topics': 'related_topicpage',
'topicpagelist': 'topicpagelist_order',
'rss-feed': 'rss_feed'
'rss-feed': 'rss_feed',
'sql-query': 'sql_query'
};


Expand Down
17 changes: 17 additions & 0 deletions core/src/zeit/content/cp/browser/tests/test_automatic.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,23 @@ def test_stores_elasticsearch_query_properties_in_xml(self):
zeit.cms.testing.xmltotext(cp.body['lead'].xml),
)

def test_stores_sql_query_properties_in_xml(self):
b = self.browser
self.create_automatic_cp(b)
b.getControl('Automatic type', index=0).displayValue = ['sql-query']
b.getControl('SQL query').value = "type='article'"
b.getControl('Apply').click()
self.assertEllipsis('...Updated on...', b.contents)

wc = zeit.cms.checkout.interfaces.IWorkingcopy(None)
cp = list(wc.values())[0]
self.assertEllipsis(
"""\
<region...count="3" automatic="True" automatic_type="sql-query"...>...
<sql_query>type='article'</sql_query>...""",
zeit.cms.testing.xmltotext(cp.body['lead'].xml),
)

def test_stores_centerpage_properties_in_xml(self):
# Create centerpage to reference later on
self.repository['cp'] = zeit.content.cp.centerpage.CenterPage()
Expand Down
4 changes: 4 additions & 0 deletions core/src/zeit/content/cp/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ class AutomaticTypeSource(zeit.cms.content.sources.SimpleDictSource):
('reach', _('automatic-area-type-reach')),
('topicpagelist', _('automatic-area-type-topicpagelist')),
('rss-feed', _('automatic-area-type-rss-feed')),
('sql-query', _('automatic-area-type-sql-query')),
]
)

Expand Down Expand Up @@ -256,6 +257,9 @@ def automatic_area_can_read_teasers_automatically(data):
if data.automatic_type == 'topicpagelist':
return True

if data.automatic_type == 'sql-query' and data.sql_query:
return True

return False


Expand Down
2 changes: 2 additions & 0 deletions core/src/zeit/content/cp/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
cp-automatic-feed-source file://{fixtures}/feeds.xml
area-color-themes-source file://{fixtures}/area-color-themes.xml
reach-service-source file://{fixtures}/reach-services.xml
sql-query-add-clauses unsorted @@ '$$.workflow.published == "yes"' AND \
unsorted @@ '$$."zeit.content.gallery".type != "inline"'
</product-config>
""".format(fixtures='%s/tests/fixtures' % importlib.resources.files(__package__))

Expand Down
Loading

0 comments on commit 760ccf8

Please sign in to comment.