Skip to content

Commit

Permalink
Merge pull request #941 from marwoodandrew/SDAAP-122
Browse files Browse the repository at this point in the history
SDAAP-122 Handle fancy quotes in ANPA
  • Loading branch information
marwoodandrew authored Oct 27, 2024
2 parents 6a268f9 + 90f907a commit 97d4e77
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 8 deletions.
14 changes: 8 additions & 6 deletions server/aap/publish/formatters/anpa_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .field_mappers.locator_mapper import LocatorMapper
from .field_mappers.slugline_mapper import SluglineMapper
from eve.utils import config
from .unicodetoascii import to_ascii
from .unicodetoascii import to_ascii, clean_string
from .category_list_map import get_aap_category_list
import re
from superdesk.etree import parse_html, to_string, etree
Expand Down Expand Up @@ -104,7 +104,7 @@ def format(self, article, subscriber, codes=None):
anpa.append(b'\x0D\x0A')

if formatted_article.get('ednote', '') != '':
ednote = '{}\r\n'.format(to_ascii(formatted_article.get('ednote')))
ednote = '{}\r\n'.format(to_ascii(clean_string(formatted_article.get('ednote'))))
anpa.append(ednote.encode('ascii', 'replace'))

if formatted_article.get(BYLINE):
Expand All @@ -115,17 +115,18 @@ def format(self, article, subscriber, codes=None):
anpa.append(get_text(self.append_body_footer(formatted_article),
content='html').encode('ascii', 'replace'))
else:
body = to_ascii(formatted_article.get('body_html', ''))
body = to_ascii(clean_string(formatted_article.get('body_html', '')))
# we need to inject the dateline
if formatted_article.get('dateline', {}).get('text') and not article.get('auto_publish', False):
body_html_elem = parse_html(formatted_article.get('body_html'))
ptag = body_html_elem.find('.//p')
if ptag is not None:
ptag.text = formatted_article['dateline']['text'] + ' ' + (ptag.text or '')
body = to_string(body_html_elem)
body = to_ascii(clean_string(to_string(body_html_elem)))
anpa.append(self.get_text_content(body))
if formatted_article.get('body_footer'):
anpa.append(self.get_text_content(to_ascii(formatted_article.get('body_footer', ''))))
anpa.append(
self.get_text_content(to_ascii(clean_string(formatted_article.get('body_footer', '')))))

anpa.append(b'\x0D\x0A')
anpa.append(mapped_source.encode('ascii'))
Expand Down Expand Up @@ -171,7 +172,8 @@ def get_text_content(self, content):
def _process_headline(self, anpa, article, category):
# prepend the locator to the headline if required
article['headline'] = get_text(article.get('headline', ''))
headline = to_ascii(LocatorMapper().get_formatted_headline(article, category.decode('UTF-8').upper()))
headline = to_ascii(
clean_string(LocatorMapper().get_formatted_headline(article, category.decode('UTF-8').upper())))

# Set the maximum size to 64 including the sequence number if any
if len(headline) > 64:
Expand Down
23 changes: 21 additions & 2 deletions server/aap/publish/formatters/anpa_formatter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,8 +385,27 @@ def test_preformated(self):
out = resp['encoded_item']

lines = io.StringIO(out.decode())
self.assertTrue(lines.getvalue().split('\r')[3].lstrip(), 'Test line 1')
self.assertTrue(lines.getvalue().split('\r')[4], 'Test line 2')
self.assertEqual(lines.getvalue().split('\r')[3].lstrip(), 'Test line 1')
self.assertEqual(lines.getvalue().split('\r')[4], 'Test line 2')

def test_fancy_quotes(self):
f = AAPAnpaFormatter()
subscriber = self.app.data.find('subscribers', None, None)[0][0]
item = self.article.copy()
item.update({
'body_html': "<p>\"quoted”</p>"
"<p>“In“ IBAC’s</p>"
"<p>Short hyphen­not handled fix one day!</p>"
"<p>“Then ‘You can’t have it’,\" </p>",
'format': 'html',
'dateline': {'text': 'SYDNEY, June 27 AAP -'}})
resp = f.format(item, subscriber)[0]
out = resp['encoded_item']

lines = io.StringIO(out.decode())
self.assertIn('"quoted"', lines.getvalue().split('\r')[3].lstrip())
self.assertEqual(lines.getvalue().split('\r')[4].lstrip(), '"In" IBAC\'s')
self.assertEqual(lines.getvalue().split('\r')[6].lstrip(), '"Then \'You can\'t have it\'," ')

def test_embed_in_body_body(self):
f = AAPAnpaFormatter()
Expand Down

0 comments on commit 97d4e77

Please sign in to comment.