diff --git a/.github/workflows/tests-py38.yml b/.github/workflows/tests-py38.yml new file mode 100644 index 000000000..afa638535 --- /dev/null +++ b/.github/workflows/tests-py38.yml @@ -0,0 +1,75 @@ +name: "CI" + +on: + [push, pull_request] + +jobs: + + server-py-38: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: setup python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: docker-compose + run: docker-compose -f .actions-docker-compose.yml up -d + + - name: cache pip + uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('dev-requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip + ${{ runner.os }}- + + - name: pip install + run: | + python -m pip install --upgrade pip wheel setuptools + pip install -r dev-requirements.txt + + - name: flake8 + run: flake8 + + - name: pytest + run: pytest --ignore=tests/aap/ --disable-pytest-warnings --cov=newsroom + + - name: behave + run: behave --format progress2 --logging-level=ERROR + + client-node-14: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: setup node + uses: actions/setup-node@v1 + with: + node-version: 14.x + + - name: cache npm + uses: actions/cache@v2 + with: + path: ~/.npm + key: ${{ runner.os }}-npm-${{ hashFiles('package-lock.json') }} + restore-keys: | + ${{ runner.os }}-npm + ${{ runner.os }}- + + - name: npm ci + run: npm ci + + - name: lint + run: npm run lint + + - name: test + run: npm run test + + - name: build + run: npm run build diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3f580afe4..2427ff764 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -5,8 +5,8 @@ on: jobs: - server: - runs-on: ubuntu-latest + server-py-36: + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 @@ -19,6 +19,10 @@ jobs: - name: docker-compose run: docker-compose -f .actions-docker-compose.yml up -d + - run: | + sudo apt-get update + sudo apt-get install pkg-config libxml2-dev libxmlsec1-dev libxmlsec1-openssl + - name: cache pip uses: actions/cache@v2 with: @@ -42,7 +46,7 @@ jobs: - name: behave run: behave --format progress2 --logging-level=ERROR - client: + client-node-12: runs-on: ubuntu-latest steps: diff --git a/assets/companies/actions.js b/assets/companies/actions.js index a568e31c1..157e3a9c0 100644 --- a/assets/companies/actions.js +++ b/assets/companies/actions.js @@ -116,7 +116,17 @@ export function postCompany() { }; } - +export function savePermissions(company, permissions) { + return function (dispatch) { + return server.postWithCsrfToken(`/companies/${company._id}/permissions`, permissions) + .then(() => { + notify.success(gettext('Company updated successfully')); + dispatch(fetchProducts()); + dispatch(fetchCompanies()); + }) + .catch((error) => errorHandler(error, dispatch, setError)); + }; +} /** * Fetches products @@ -137,19 +147,6 @@ export function fetchProducts() { * Save permissions for a company * */ -export function savePermissions(company, permissions) { - return function (dispatch) { - return server.post(`/companies/${company._id}/permissions`, permissions) - .then(() => { - notify.success(gettext('Company updated successfully')); - dispatch(fetchProducts()); - dispatch(fetchCompanies()); - }) - .catch((error) => errorHandler(error, dispatch, setError)); - }; -} - - /** * Deletes a company * diff --git a/assets/companies/components/CompanyPermissions.jsx b/assets/companies/components/CompanyPermissions.jsx index 0b6b16622..7c21aba71 100644 --- a/assets/companies/components/CompanyPermissions.jsx +++ b/assets/companies/components/CompanyPermissions.jsx @@ -1,41 +1,50 @@ -import React from 'react'; +import React, {Component} from 'react'; import PropTypes from 'prop-types'; -import { connect } from 'react-redux'; -import { gettext } from 'utils'; -import { get } from 'lodash'; +import {connect} from 'react-redux'; +import {gettext} from 'utils'; +import {get} from 'lodash'; import CheckboxInput from 'components/CheckboxInput'; import {savePermissions} from '../actions'; -class CompanyPermissions extends React.Component { - +class CompanyPermissions extends Component { constructor(props) { super(props); this.state = this.setup(); + this.handleSubmit = this.handleSubmit.bind(this); + this.handleChange = this.handleChange.bind(this); + this.togglePermission = this.togglePermission.bind(this); } setup() { - const products = {}; - - this.props.products.forEach((product) => { - products[product._id] = get(product, 'companies', []).includes(this.props.company._id); - }); - - const sections = {}; - - if (this.props.company.sections) { - Object.assign(sections, this.props.company.sections); - } else { - this.props.sections.forEach((section) => { - sections[section._id] = true; - }); - } + const {company, sections, products} = this.props; - const archive_access = !!this.props.company.archive_access; - const events_only = !!this.props.company.events_only; + const permissions = { + sections: company.sections || sections.reduce((acc, section) => ({...acc, [section._id]: true}), {}), + products: products.reduce((acc, product) => ({ + ...acc, + [product._id]: get(product, 'companies', []).includes(company._id) + }), {}), + archive_access: company.archive_access || false, + events_only: company.events_only || false, + embedded: { + social_media_display: get(company, 'embedded.social_media_display', false), + video_display: get(company, 'embedded.video_display', false), + audio_display: get(company, 'embedded.audio_display', false), + images_display: get(company, 'embedded.images_display', false), + all_display: get(company, 'embedded.all_display', false), + social_media_download: get(company, 'embedded.social_media_download', false), + video_download: get(company, 'embedded.video_download', false), + audio_download: get(company, 'embedded.audio_download', false), + images_download: get(company, 'embedded.images_download', false), + all_download: get(company, 'embedded.all_download', false), + sdpermit_display: get(company, 'embedded.sdpermit_display', false), + sdpermit_download: get(company, 'embedded.sdpermit_download', false), + }, + }; - return {sections, products, archive_access, events_only}; + return permissions; } componentDidUpdate(prevProps) { @@ -44,20 +53,57 @@ class CompanyPermissions extends React.Component { } } - toggle(key, _id) { - const field = this.state[key]; - field[_id] = !field[_id]; - this.setState({[key]: field}); + handleSubmit(event) { + event.preventDefault(); + this.props.savePermissions(this.props.company, this.state); + } + + handleChange(key, value) { + this.setState((prevState) => { + if (key.startsWith('embedded.')) { + const [, embeddedKey] = key.split('.'); + return { + ...prevState, + embedded: { + ...prevState.embedded, + [embeddedKey]: value, + }, + }; + } else { + return { + ...prevState, + [key]: value, + }; + } + }); + } + + togglePermission(key, _id, value) { + this.setState((prevState) => ({ + ...prevState, + [key]: { + ...prevState[key], + [_id]: value, + }, + })); } render() { + const {sections, products} = this.props; + const { + archive_access, + events_only, + embedded = {}, + } = this.state; + + const optionLabels = { + Display: 'Allow Visualization', + Download: 'Allow Download' + }; return ( -
-
{ - event.preventDefault(); - this.props.savePermissions(this.props.company, this.state); - }}> -
+
+ +
    @@ -65,59 +111,105 @@ class CompanyPermissions extends React.Component { this.setState({archive_access: !this.state.archive_access})} + value={archive_access} + onChange={() => this.handleChange('archive_access', !archive_access)} />
  • this.setState({events_only: !this.state.events_only})} + value={events_only} + onChange={() => this.handleChange('events_only', !events_only)} />
-
+
+ +

+ + Default: All Content Types if none selected. Also SDpermit Media option can start to use it if required. +

+
+ {['Display', 'Download'].map((option) => ( +
+
+ +
    + {[ + {label: 'Images', key: 'images'}, + {label: 'Audios', key: 'audio'}, + {label: 'Videos', key: 'video'}, + {label: 'Social Media', key: 'social_media'}, + {label: 'SDpermit Media', key: 'sdpermit'}, + {label: 'All Above', key: 'all'}, + ].map(({label, key}) => ( +
  • + + this.handleChange( + `embedded.${key}_${option.toLowerCase()}`, + !embedded[`${key}_${option.toLowerCase()}`] + ) + } + /> +
  • + ))} +
+
+
+ ))} +
+
+ +
    - {this.props['sections'].map((item) => ( -
  • + {sections.map((section) => ( +
  • this.toggle('sections', item._id)} /> + name={section._id} + label={section.name} + value={this.state.sections[section._id] || false} + onChange={(value) => this.togglePermission('sections', section._id, value)} + />
  • ))}
-
- {this.props['sections'].map((section) => ( - [, -
    - {this.props['products'].filter((p) => (p.product_type || 'wire').toLowerCase() === section._id.toLowerCase()) +
    + {sections.map((section) => ( + + +
      + {products + .filter((p) => (p.product_type || 'wire').toLowerCase() === section._id.toLowerCase()) .map((product) => (
    • this.toggle('products', product._id)} /> + value={this.state.products[product._id] || false} + onChange={(value) => this.togglePermission('products', product._id, value)} + />
    • ))} -
    ] +
+ ))}
-
-
+ +
@@ -133,14 +225,26 @@ CompanyPermissions.propTypes = { sections: PropTypes.object, archive_access: PropTypes.bool, events_only: PropTypes.bool, + embedded: PropTypes.shape({ + social_media_display: PropTypes.bool, + video_display: PropTypes.bool, + audio_display: PropTypes.bool, + images_display: PropTypes.bool, + all_display: PropTypes.bool, + sdpermit_display: PropTypes.bool, + social_media_download: PropTypes.bool, + video_download: PropTypes.bool, + audio_download: PropTypes.bool, + images_download: PropTypes.bool, + sdpermit_download: PropTypes.bool, + all_download: PropTypes.bool, + }), }).isRequired, - sections: PropTypes.arrayOf(PropTypes.shape({ _id: PropTypes.string.isRequired, name: PropTypes.string.isRequired, })), products: PropTypes.arrayOf(PropTypes.object).isRequired, - savePermissions: PropTypes.func.isRequired, }; diff --git a/assets/company-reports/components/SubscriberActivity.jsx b/assets/company-reports/components/SubscriberActivity.jsx index 10a62f18a..932623519 100644 --- a/assets/company-reports/components/SubscriberActivity.jsx +++ b/assets/company-reports/components/SubscriberActivity.jsx @@ -61,6 +61,18 @@ class SubscriberActivity extends React.Component { itemsArray = [{ name: 'download' }, + { + name: 'download audio', + value: 'audio' + }, + { + name: 'download video', + value: 'video' + }, + { + name: 'download picture', + value: 'picture' + }, { name: 'copy' }, @@ -150,6 +162,11 @@ class SubscriberActivity extends React.Component { {get(item, 'item.item_href', null) && {get(item, 'item.item_text')}} {!get(item, 'item.item_href') && {get(item, 'item.item_text')}} + {get(item, 'association.href', null) && + ' / ' + } + {get(item, 'association.href', null) && + {get(item, 'association.text')}} {item.action} {item.user} diff --git a/assets/components/cards/render/CardFooter.jsx b/assets/components/cards/render/CardFooter.jsx index 3edea7bba..dc6d4437b 100644 --- a/assets/components/cards/render/CardFooter.jsx +++ b/assets/components/cards/render/CardFooter.jsx @@ -2,14 +2,15 @@ import React from 'react'; import PropTypes from 'prop-types'; import CardMeta from './CardMeta'; -function CardFooter({wordCount, pictureAvailable, source, versioncreated}) { +function CardFooter({wordCount, pictureAvailable, source, versioncreated, audioAvailable, videoAvailable}) { return (
); } @@ -19,6 +20,12 @@ CardFooter.propTypes = { pictureAvailable: PropTypes.bool, source: PropTypes.string, versioncreated: PropTypes.string, + audioAvailable: PropTypes.array, + videoAvailable: PropTypes.array, +}; +CardFooter.defaultProps = { + audioAvailable: [], + videoAvailable: [], }; -export default CardFooter; \ No newline at end of file +export default CardFooter; diff --git a/assets/components/cards/render/CardMeta.jsx b/assets/components/cards/render/CardMeta.jsx index fbbdda4fe..77f674a49 100644 --- a/assets/components/cards/render/CardMeta.jsx +++ b/assets/components/cards/render/CardMeta.jsx @@ -1,16 +1,28 @@ import React from 'react'; import PropTypes from 'prop-types'; +import {isEmpty} from 'lodash'; import { gettext, shortDate, fullDate } from 'utils'; -function CardMeta({wordCount, pictureAvailable, source, versioncreated, displayDivider, slugline}) { +function CardMeta({wordCount, pictureAvailable, source, versioncreated, displayDivider, slugline ,audio, video}) { return (
{pictureAvailable && - + + } + {!isEmpty(audio) && + + + + } + {!isEmpty(video) && + + + + } {displayDivider && }
@@ -34,10 +46,14 @@ CardMeta.propTypes = { versioncreated: PropTypes.string, displayDivider: PropTypes.bool, slugline: PropTypes.string, + audio: PropTypes.array, + video: PropTypes.array }; CardMeta.defaultProps = { displayDivider: false, + audio: [], + video: [], }; -export default CardMeta; \ No newline at end of file +export default CardMeta; diff --git a/assets/components/cards/render/LargePictureTextCard.jsx b/assets/components/cards/render/LargePictureTextCard.jsx index f64ff54c9..61eb3cd4a 100644 --- a/assets/components/cards/render/LargePictureTextCard.jsx +++ b/assets/components/cards/render/LargePictureTextCard.jsx @@ -1,7 +1,7 @@ import React from 'react'; import PropTypes from 'prop-types'; import {wordCount} from 'utils'; -import {getCaption, getPicture, getThumbnailRendition} from 'wire/utils'; +import {getCaption, getPicture, getThumbnailRendition ,getVideos ,getAudio} from 'wire/utils'; import CardFooter from './CardFooter'; import CardBody from './CardBody'; import CardRow from './CardRow'; @@ -10,12 +10,16 @@ const getPictureTextPanel = (item, picture, openItem, cardId) => { const rendition = getThumbnailRendition(picture); const imageUrl = rendition && rendition.href; const caption = rendition && getCaption(picture); + const audio = item && getAudio(item); + const video = item && getVideos(item); return (
openItem(item, cardId)}> {caption} { const rendition = withPictures && getThumbnailRendition(picture); const imageUrl = rendition && rendition.href; const caption = rendition && getCaption(picture); + const audio = item && getAudio(item); + const video = item && getVideos(item); return (
openItem(item, cardId)}> @@ -18,6 +20,9 @@ const getPictureTextPanel = (item, picture, openItem, withPictures, cardId) => { } { - + const rendition = getThumbnailRendition(picture, true); const imageUrl = rendition && rendition.href; const caption = rendition && getCaption(picture); + const audio = item && getAudio(item); + const video = item && getVideos(item); return (
openItem(item, cardId)}> @@ -17,6 +19,8 @@ const getTopNewsPanel = (item, picture, openItem, cardId) => {

{item.headline}

{ const rendition = getThumbnailRendition(picture, true); const imageUrl = rendition && rendition.href; const caption = rendition && getCaption(picture); + const audio = item && getAudio(item); + const video = item && getVideos(item); return (
openItem(item, cardId)}> @@ -21,6 +23,8 @@ const getTopNewsLeftPanel = (item, picture, openItem, cardId) => {

{item.headline}

{ const rendition = getThumbnailRendition(picture); const imageUrl = rendition && rendition.href; const caption = rendition && getCaption(picture); + const audio = item && getAudio(item); + const video = item && getVideos(item); return (
openItem(item, cardId)}> {caption} this.props.actions.filter(a => a.id === 'open')[0].action(null)} + detailsConfig={this.props.previewConfig} />, modal] : this.renderContent() ) diff --git a/assets/home/reducers.js b/assets/home/reducers.js index 18dcf670d..55e0abd0e 100644 --- a/assets/home/reducers.js +++ b/assets/home/reducers.js @@ -5,6 +5,7 @@ import { SET_ACTIVE, SET_CARD_ITEMS, } from './actions'; +import {unescape, get} from 'lodash'; import {BOOKMARK_ITEMS, REMOVE_BOOKMARK} from '../wire/actions'; import {CLOSE_MODAL, MODAL_FORM_VALID, RENDER_MODAL} from '../actions'; import {modalReducer} from '../reducers'; @@ -30,13 +31,18 @@ export default function homeReducer(state = initialState, action) { company: action.data.company, formats: action.data.formats || [], userSections: action.data.userSections, - context: 'wire' + context: 'wire', + uiConfig: action.data.ui_config || {} }; case OPEN_ITEM:{ + var itemToOpen = action.item; + if (itemToOpen) { + itemToOpen.body_html = unescape(get(action, 'item.body_html')); + } return { ...state, - itemToOpen: action.item || null, + itemToOpen: itemToOpen || null, }; } diff --git a/assets/server.js b/assets/server.js index 68b6dab53..6a4cbc2f2 100644 --- a/assets/server.js +++ b/assets/server.js @@ -29,6 +29,13 @@ class Server { * @param {String} url * @return {Promise} */ + + async getCsrfToken() { + const response = await this.get('/companies/get-csrf-token'); + return response.csrf_token; + + } + get(url) { return fetch(url, options({})) .then(checkStatus); @@ -63,6 +70,19 @@ class Server { })).then(checkStatus); } + postWithCsrfToken(url, data) { + return this.getCsrfToken().then(csrfToken => { + return fetch(url, options({ + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-CSRF-Token': csrfToken + }, + body: JSON.stringify(data), + })).then(checkStatus); + }); +} + /** * Make POST request to url in keeps the format of the input * diff --git a/assets/styles/icon-font.scss b/assets/styles/icon-font.scss index 3c84c75dc..d319dc06c 100644 --- a/assets/styles/icon-font.scss +++ b/assets/styles/icon-font.scss @@ -210,7 +210,8 @@ .icon--coverage-live-video:before { content: "\e629"; } -.icon--coverage-audio:before { +.icon--coverage-audio:before, +.icon--audio:before { content: "\e62a"; } .icon--coverage-live-blog:before { diff --git a/assets/styles/index.scss b/assets/styles/index.scss index d713c3369..3b7b3d0b6 100644 --- a/assets/styles/index.scss +++ b/assets/styles/index.scss @@ -17,6 +17,12 @@ h3.home-section-heading { margin-bottom: 1rem; } +figcaption { + font-size: 0.8125rem; + color: #7c7c7c; + margin: 10px 20px; +} + // variables $nav-sidebar-bg: #3e4557; $nav-sidebar-border: #323847; @@ -1819,7 +1825,10 @@ article.list { .wire-articles__item__icons { display: contents; - flex-wrap: wrap; + // The flex-wrap property is commented out because flex: 1 is already set on the flex items. + // With flex: 1, the flex items will grow and shrink as needed to fit within the flex container on a single line. + // Uncommenting flex-wrap: wrap; would allow the flex items icons to wrap onto multiple lines, which is not desired in the article popup. + // flex-wrap: wrap; margin-bottom: 10px; @include md { diff --git a/assets/styles/wire.scss b/assets/styles/wire.scss index 79707cb7e..9adb0e66b 100644 --- a/assets/styles/wire.scss +++ b/assets/styles/wire.scss @@ -38,3 +38,24 @@ font-size: 24px; } } + +$disabled-dimension: 0; +$text-color: #333333; +$font-size: 14px; + +@mixin hidden { + display: none; + visibility: hidden; +} + +.disabled-embed { + @include hidden; + height: $disabled-dimension; + width: $disabled-dimension; + overflow: hidden; +} + +.default-setting-text { + color: $text-color; + font-size: $font-size; +} diff --git a/assets/ui/components/ArticleBodyHtml.jsx b/assets/ui/components/ArticleBodyHtml.jsx index f1d625f82..19fba4425 100644 --- a/assets/ui/components/ArticleBodyHtml.jsx +++ b/assets/ui/components/ArticleBodyHtml.jsx @@ -1,9 +1,10 @@ import React from 'react'; import PropTypes from 'prop-types'; -import { get } from 'lodash'; -import { formatHTML } from 'utils'; +import {get, memoize} from 'lodash'; +import {formatHTML} from 'utils'; import {connect} from 'react-redux'; -import { selectCopy } from '../../wire/actions'; +import {selectCopy} from '../../wire/actions'; +import DOMPurify from 'dompurify'; /** * using component to fix iframely loading @@ -12,29 +13,39 @@ import { selectCopy } from '../../wire/actions'; class ArticleBodyHtml extends React.PureComponent { constructor(props) { super(props); + this.state = { + sanitizedHtml: '' + }; this.copyClicked = this.copyClicked.bind(this); this.clickClicked = this.clickClicked.bind(this); + this.preventContextMenu = this.preventContextMenu.bind(this); + + // use memoize so this function is only called when `body_html` changes + this.getBodyHTML = memoize(this._getBodyHTML.bind(this)); + this.bodyRef = React.createRef(); } componentDidMount() { + this.updateSanitizedHtml(); this.loadIframely(); + this.executeScripts(); document.addEventListener('copy', this.copyClicked); document.addEventListener('click', this.clickClicked); + this.addContextMenuEventListeners(); } clickClicked(event) { if (event != null) { const target = event.target; - if (target && target.tagName === 'A' && this.isLinkExternal(target.href)) { event.preventDefault(); event.stopPropagation(); - // security https://mathiasbynens.github.io/rel-noopener/ - var nextWindow = window.open(); + const nextWindow = window.open(target.href, '_blank', 'noopener'); - nextWindow.opener = null; - nextWindow.location.href = target.href; + if (nextWindow) { + nextWindow.opener = null; + } } } } @@ -51,8 +62,36 @@ class ArticleBodyHtml extends React.PureComponent { } } - componentDidUpdate() { + componentDidUpdate(prevProps) { + if (prevProps.item !== this.props.item) { + this.updateSanitizedHtml(); + } this.loadIframely(); + this.executeScripts(); + this.addContextMenuEventListeners(); + } + + updateSanitizedHtml() { + const item = this.props.item; + const html = this.getBodyHTML( + get(item, 'es_highlight.body_html.length', 0) > 0 ? + item.es_highlight.body_html[0] : + item.body_html + ); + this.sanitizeHtml(html); + } + + sanitizeHtml(html) { + if (!html) { + this.setState({ sanitizedHtml: '' }); + return; + } + const sanitizedHtml = DOMPurify.sanitize(html, { + ADD_TAGS: ['iframe'], + ADD_ATTR: ['allow', 'allowfullscreen', 'frameborder', 'scrolling', 'src', 'width', 'height'], + ALLOW_DATA_ATTR: true + }); + this.setState({ sanitizedHtml }); } loadIframely() { @@ -63,6 +102,63 @@ class ArticleBodyHtml extends React.PureComponent { } } + executeScripts() { + const tree = this.bodyRef.current; + const loaded = []; + + if (tree == null) { + return; + } + + tree.querySelectorAll('script').forEach((s) => { + if (s.hasAttribute('src') && !loaded.includes(s.getAttribute('src'))) { + let url = s.getAttribute('src'); + + // Check if the URL starts with 'https://' or 'http://' + if (url.startsWith('https://') || url.startsWith('http://')) { + loaded.push(url); + + // Check for specific platform URLs and corresponding global objects + if (url.includes('twitter.com/') && window.twttr != null) { + window.twttr.widgets.load(); + return; + } + + if (url.includes('instagram.com/') && window.instgrm != null) { + window.instgrm.Embeds.process(); + return; + } + + // Force Flourish to always load + if (url.includes('flourish.studio/')) { + delete window.FlourishLoaded; + } + + if (url.startsWith('http')) { + // Change https?:// to // so it uses the schema of the client + url = url.substring(url.indexOf(':') + 1); + } + + const script = document.createElement('script'); + script.src = url; + script.async = true; + + script.onload = () => { + document.body.removeChild(script); + }; + + script.onerror = (error) => { + throw new URIError('The script ' + error.target.src + ' didn\'t load.'); + }; + + document.body.appendChild(script); + } else { + console.warn('stop loading insecure script:', url); + } + } + }); + } + copyClicked() { this.props.reportCopy(this.props.item); } @@ -70,27 +166,119 @@ class ArticleBodyHtml extends React.PureComponent { componentWillUnmount() { document.removeEventListener('copy', this.copyClicked); document.removeEventListener('click', this.clickClicked); + this.removeContextMenuEventListeners(); } - render() { + addContextMenuEventListeners() { + const tree = this.bodyRef.current; + if (tree) { + tree.querySelectorAll('[data-disable-download="true"]').forEach((element) => { + element.addEventListener('contextmenu', this.preventContextMenu); + }); + } + } + + removeContextMenuEventListeners() { + const tree = this.bodyRef.current; + if (tree) { + tree.querySelectorAll('[data-disable-download="true"]').forEach((element) => { + element.removeEventListener('contextmenu', this.preventContextMenu); + }); + } + } + + preventContextMenu(event) { + event.preventDefault(); + } + + _getBodyHTML(bodyHtml) { + return !bodyHtml ? + null : + this._updateImageEmbedSources(formatHTML(bodyHtml)); + } + + /** + * Update Image Embeds to use the Web APIs Assets endpoint + * + * @param html - The `body_html` value (could also be the ES Highlight version) + * @returns {string} + * @private + */ + _updateImageEmbedSources(html) { const item = this.props.item; - if (!item.body_html) { - return null; + // Get the list of Original Rendition IDs for all Image Associations + const imageEmbedOriginalIds = Object + .keys(item.associations || {}) + .filter((key) => key.startsWith('editor_')) + .map((key) => get(item.associations[key], 'renditions.original.media')) + .filter((value) => value); + + if (!imageEmbedOriginalIds.length) { + // This item has no Image Embeds + // return the supplied html as-is + return html; } - const esHighlightedItem = get(item, 'es_highlight.body_html.length', 0) > 0 ? - { - ...item, - body_html: item.es_highlight.body_html[0] - } : item; - const html = formatHTML(esHighlightedItem.body_html); + // Create a DOM node tree from the supplied html + // We can then efficiently find and update the image sources + const container = document.createElement('div'); + let imageSourcesUpdated = false; + + container.innerHTML = html; + container + .querySelectorAll('img,video,audio') + .forEach((imageTag) => { + // Using the tag's `src` attribute, find the Original Rendition's ID + const originalMediaId = imageEmbedOriginalIds.find((mediaId) => ( + !imageTag.src.startsWith('/assets/') && + imageTag.src.includes(mediaId)) + ); + + if (originalMediaId) { + // We now have the Original Rendition's ID + // Use that to update the `src` attribute to use Newshub's Web API + imageSourcesUpdated = true; + imageTag.src = `/assets/${originalMediaId}`; + } + }); + + // Find all Audio and Video tags and mark them up for the player + container.querySelectorAll('video, audio') + .forEach((vTag) => { + vTag.classList.add('js-player'); + if (vTag.getAttribute('data-disable-download')) { + vTag.setAttribute('data-plyr-config', '{"controls": ["play-large", "play",' + + '"progress", "volume", "mute", "rewind", "fast-forward", "current-time",' + + '"captions", "restart", "duration"]}'); + + } else { + vTag.setAttribute('data-plyr-config', '{"controls": ["play-large", "play",' + + '"progress", "volume", "mute", "rewind", "fast-forward", "current-time",' + + '"captions", "restart", "duration", "download"], "urls": {"download": ' + + '"' + vTag.getAttribute('src') + '?item_id=' + item._id + '"' + + '}}'); + } + + imageSourcesUpdated = true; + }); + + return imageSourcesUpdated ? + container.innerHTML : + html; + } + + render() { + if (!this.state.sanitizedHtml) { + return null; + } return (
); } @@ -99,7 +287,11 @@ class ArticleBodyHtml extends React.PureComponent { ArticleBodyHtml.propTypes = { item: PropTypes.shape({ body_html: PropTypes.string, - es_highlight: PropTypes.Object + _id: PropTypes.string, + es_highlight: PropTypes.shape({ + body_html: PropTypes.arrayOf(PropTypes.string), + }), + associations: PropTypes.object, }).isRequired, reportCopy: PropTypes.func, }; diff --git a/assets/wire/components/ItemDetails.jsx b/assets/wire/components/ItemDetails.jsx index 42d5ba1d2..f7744515b 100644 --- a/assets/wire/components/ItemDetails.jsx +++ b/assets/wire/components/ItemDetails.jsx @@ -67,7 +67,7 @@ function ItemDetails({item, user, actions, onClose, detailsConfig, downloadVideo {isDisplayed('abstract', detailsConfig) && } {isDisplayed('body_html', detailsConfig) && } - {!isEmpty(videos) && videos.map((video) => { const previousVersions = document.getElementById(inputRef); previousVersions && previousVersions.scrollIntoView(); @@ -27,6 +27,16 @@ function PreviewMeta({item, isItemDetail, inputRef, displayConfig}) { )} + {!isEmpty(video) && + + + + } + {!isEmpty(audio) && + + + + }
{isDisplayed('urgency', displayConfig) && diff --git a/assets/wire/components/WireListItem.jsx b/assets/wire/components/WireListItem.jsx index e8fedaac8..2d477d90c 100644 --- a/assets/wire/components/WireListItem.jsx +++ b/assets/wire/components/WireListItem.jsx @@ -4,7 +4,7 @@ import classNames from 'classnames'; import {get} from 'lodash'; import {gettext, fullDate, wordCount, LIST_ANIMATIONS, getSlugline} from 'utils'; -import {getPicture, getThumbnailRendition, showItemVersions, shortText, isKilled, getVideos} from 'wire/utils'; +import {getPicture, getThumbnailRendition, showItemVersions, shortText, isKilled, getVideos, getAudio} from 'wire/utils'; import ActionButton from 'components/ActionButton'; @@ -77,6 +77,7 @@ class WireListItem extends React.Component { }); const picture = getPicture(item); const videos = getVideos(item); + const audio = getAudio(item); const isMarketPlace = this.props.context === 'aapX'; return ( @@ -98,14 +99,14 @@ class WireListItem extends React.Component {
{!isExtended && ( - + )} {item.headline} {isExtended && !isMarketPlace && (
- +
{getSlugline(item, true)} {item.source} @@ -122,7 +123,7 @@ class WireListItem extends React.Component {
,
- +
{this.wordCount} {gettext('words')}
diff --git a/assets/wire/components/WireListItemIcons.jsx b/assets/wire/components/WireListItemIcons.jsx index ec53e78fc..fa8f12e3b 100644 --- a/assets/wire/components/WireListItemIcons.jsx +++ b/assets/wire/components/WireListItemIcons.jsx @@ -2,7 +2,7 @@ import React from 'react'; import PropTypes from 'prop-types'; import {isEmpty} from 'lodash'; -function WireListItemIcons({item, picture, videos, divider}) { +function WireListItemIcons({item, picture, videos, audio, divider}) { return (
{item.type === 'text' && @@ -20,6 +20,11 @@ function WireListItemIcons({item, picture, videos, divider}) { } + {!isEmpty(audio) && + + + + } {divider && } @@ -31,6 +36,7 @@ WireListItemIcons.propTypes = { item: PropTypes.object, picture: PropTypes.object, videos: PropTypes.array, + audio: PropTypes.array, divider: PropTypes.bool, }; diff --git a/assets/wire/components/WirePreview.jsx b/assets/wire/components/WirePreview.jsx index 8edccc2f3..e00470187 100644 --- a/assets/wire/components/WirePreview.jsx +++ b/assets/wire/components/WirePreview.jsx @@ -85,7 +85,7 @@ class WirePreview extends React.PureComponent { } {isDisplayed('body_html', previewConfig) && } - {!isEmpty(videos) && videos.map((video) => get(assoc, 'type') === 'video'); } +/** + * Get audio for an item + * + * if item is audio return it, otherwise look for audio association + * + * @param {Object} item + * @return {Array} + */ +export function getAudio(item) { + return item.type === 'audio' ? [item] : Object.values(get(item, 'associations', {}) || {}).filter((assoc) => get(assoc, 'type') === 'audio'); +} /** * Get picture for an item diff --git a/features/environment.py b/features/environment.py index 987200eb2..a0e840e2e 100644 --- a/features/environment.py +++ b/features/environment.py @@ -31,7 +31,8 @@ def before_scenario(context, scenario): 'NEWS_API_ENABLED': True, 'NEWS_API_IMAGE_PERMISSIONS_ENABLED': True, 'NEWS_API_TIME_LIMIT_DAYS': 100, - 'NEWS_API_ALLOWED_RENDITIONS': 'original,16-9' + 'NEWS_API_ALLOWED_RENDITIONS': 'original,16-9', + 'EMBED_PRODUCT_FILTERING': True, } if 'rate_limit' in scenario.tags: diff --git a/features/news_api_atom.feature b/features/news_api_atom.feature index f96d2ea68..d76ad6794 100644 --- a/features/news_api_atom.feature +++ b/features/news_api_atom.feature @@ -70,6 +70,26 @@ Feature: News API News Search Then we "get" "Mick Tsikas/AAP PHOTOS" in atom xml response Scenario: Simple atom request with embedded image + Given "products" + """ + [{"name": "A fishy Product", + "decsription": "a product for those interested in fish", + "companies" : [ + "#companies._id#" + ], + "query": "fish", + "product_type": "news_api" + }, + {"name": "A pic product", + "decsription": "pic product", + "companies" : [ + "#companies._id#" + ], + "query": "", + "sd_product_id": "1", + "product_type": "news_api" + }] + """ Given "items" """ [{"body_html": "

Once upon a time there was a fish who could swim

\"altSome caption
", @@ -83,6 +103,7 @@ Feature: News API News Search "version" : "1", "byline" : "Mick Tsikas/AAP PHOTOS", "body_text" : "QUESTION TIME ALT", + "products": [{"code": "1"}], "renditions" : { "16-9" : { "href" : "/assets/5fc5dce16369ab07be3325fa", diff --git a/features/news_api_item.feature b/features/news_api_item.feature index 893de640d..32b0adbb1 100644 --- a/features/news_api_item.feature +++ b/features/news_api_item.feature @@ -204,4 +204,72 @@ Feature: News API Item "headline": "headline 1", "associations": {"featuremedia": {"renditions": {"original": {}} }} } + """ + + Scenario: Item request response strips embeds + Given "items" + """ + [{"_id": "111", "body_html": "

Once upon a time there was

a fish

who could swim

\"altSome caption

", + "headline": "headline 1", + "firstpublished": "#DATE-1#", "versioncreated": "#DATE#", + "associations": {"editor_19": {"products": [{"code": "1234"}], "renditions": {"original": {}} }}}] + """ + Given "products" + """ + [{"name": "A fishy Product", + "decsription": "a product for those interested in fish", + "companies" : [ + "#companies._id#" + ], + "query": "Once upon a time", + "product_type": "news_api" + }, + {"name": "A fishy superdesk product", + "description": "a superdesk product restricting images in the atom feed", + "companies" : [ + "#companies._id#" + ], + "sd_product_id": "1234", + "product_type": "news_api" + } + ] + """ + When we get "/news/item/111?format=NINJSFormatter&no_embeds=true&no_media=1" + Then we get existing resource + """ + { + "guid": "111", + "headline": "headline 1", + "body_html": "

Once upon a time there was

who could swim

" + } + """ + When we get "/news/item/111?format=NINJSFormatter2&no_embeds=true" + Then we get existing resource + """ + { + "guid": "111", + "headline": "headline 1", + "body_html": "

Once upon a time there was

who could swim

\"altSome caption

", + "associations": {"editor_19": {"renditions": {"original": {}}}} + } + """ + When we get "/news/item/111?format=NINJSFormatter2&no_media=true" + Then we get existing resource + """ + { + "guid": "111", + "headline": "headline 1", + "body_html": "

Once upon a time there was

a fish

who could swim

", + "associations": {} + } + """ + When we get "/news/item/111?format=NINJSFormatter3" + Then we get existing resource + """ + { + "guid": "111", + "headline": "headline 1", + "body_html": "

Once upon a time there was

who could swim

", + "associations": {} + } """ \ No newline at end of file diff --git a/features/news_api_search.feature b/features/news_api_search.feature index 21a8a38ee..01c375ccb 100644 --- a/features/news_api_search.feature +++ b/features/news_api_search.feature @@ -211,7 +211,7 @@ Feature: News API News Search "product_type": "news_api" }] """ - When we get "news/search?start_date=now-10d&include_fields=headline" + When we get "news/search?start_date=now-10d&include_fields=slugline" Then we get response code 400 Scenario: exclude fields @@ -599,6 +599,55 @@ Feature: News API News Search ]} """ + Scenario: Search request response restricted by embedded image product + Given "items" + """ + [{"_id": "111", "body_html": "

Once upon a time there

\"altSome caption

was a fish who could swim

", + "headline": "headline 1", + "firstpublished": "#DATE-1#", "versioncreated": "#DATE#", + "associations": {"editor_19": {"products": [{"code": "1234"}], "renditions": {"16-9": {"media" : "something"}} }}}, + {"_id": "222", "body_html": "

Once upon a time there

\"altSome caption

was a aardvark who could swim

", + "headline": "headline 2", + "firstpublished": "#DATE-1#", "versioncreated": "#DATE#", + "associations": {"editor_19": {"products": [{"code": "4321"}], "renditions": {"16-9": {"media" : "something"}} }}}] + """ + Given "products" + """ + [{"name": "A fishy Product", + "decsription": "a product for those interested in fish", + "companies" : [ + "#companies._id#" + ], + "query": "Once upon a time", + "product_type": "news_api" + }, + {"name": "A fishy superdesk product", + "description": "a superdesk product restricting images in the atom feed", + "companies" : [ + "#companies._id#" + ], + "sd_product_id": "1234", + "product_type": "news_api" + } + ] + """ + When we get "news/search?q=fish&include_fields=associations,body_html" + Then we get list with 1 items + """ + {"_items": [ + {"_id": "111", + "associations": {"editor_19": {"renditions": {"16-9": {"media" : "something"}} }}} + ]} + """ + Then we get "" in syndicate xml response + Then we "don't get" "<![CDATA[headline 2]]>" in syndicate xml response + + Scenario: test syndicate atom request search q + Given "items" + """ + [{"body_html": "

Once upon a time there was a monkey who could swim

", "headline": "headline 1", + "byline": "S Smith", "pubstatus": "usable", "service" : [{"name" : "Australian General News", "code" : "a"}], + "description_text": "summary", + "associations" : { + "featuremedia" : { + "mimetype" : "image/jpeg", + "description_text" : "Deputy Prime Minister Michael McCormack during Question Time", + "version" : "1", + "byline" : "Mick Tsikas/AAP PHOTOS", + "body_text" : "QUESTION TIME ALT", + "renditions" : { + "16-9" : { + "href" : "/assets/5fc5dce16369ab07be3325fa", + "height" : 720, + "width" : 1280, + "media" : "5fc5dce16369ab07be3325fa", + "poi" : { + "x" : 453, + "y" : 335 + }, + "mimetype" : "image/jpeg" + } + } + }}, + "firstpublished": "#DATE-1#", "versioncreated": "#DATE#"}] + """ + When we get "syndicate?formatter=atom&q=monkey" + Then we get OK response + Then we "get" "<![CDATA[headline 1]]>" in syndicate xml response + Then we "get" "Mick Tsikas/AAP PHOTOS" in syndicate xml response + + Scenario: test atom request with embedded image + Given "products" + """ + [{"name": "A fishy Product", + "decsription": "a product for those interested in fish", + "companies" : [ + "#companies._id#" + ], + "query": "fish", + "product_type": "news_api" + }, + {"name": "A pic product", + "decsription": "pic product", + "companies" : [ + "#companies._id#" + ], + "query": "", + "sd_product_id": "1", + "product_type": "news_api" + }] + """ + Given "items" + """ + [{"body_html": "

Once upon a time there was a fish who could swim

\"altSome caption
", + "headline": "headline 1", + "byline": "S Smith", "pubstatus": "usable", "service" : [{"name" : "Australian General News", "code" : "a"}], + "description_text": "summary", + "associations" : { + "editor_19" : { + "mimetype" : "image/jpeg", + "description_text" : "Deputy Prime Minister Michael McCormack during Question Time", + "version" : "1", + "byline" : "Mick Tsikas/AAP PHOTOS", + "body_text" : "QUESTION TIME ALT", + "products": [{"code": "1"}], + "renditions" : { + "16-9" : { + "href" : "/assets/5fc5dce16369ab07be3325fa", + "height" : 720, + "width" : 1280, + "media" : "5fc5dce16369ab07be3325fa", + "poi" : { + "x" : 453, + "y" : 335 + }, + "mimetype" : "image/jpeg" + } + } + }}, + "firstpublished": "#DATE-1#", "versioncreated": "#DATE#"}] + """ + When we get "syndicate?formatter=atom" + Then we get OK response + Then we "get" "<![CDATA[headline 1]]>" in syndicate xml response + Then we "get" "5fc5dce16369ab07be3325fa" in atom xml response + Then we "get" "src="http://" in atom xml response + + Scenario: Atom request response restricted by featured image product + Given "items" + """ + [{"body_html": "Once upon a time there was a fish who could swim", "headline": "headline 1", + "firstpublished": "#DATE-1#", "versioncreated": "#DATE#", + "associations": {"featuremedia": {"products": [{"code": "1234"}], "renditions": {"original": {}} }}}, + {"body_html": "Once upon a time there was a aardvark that could not swim", "headline": "headline 2", + "firstpublished": "#DATE-1#", "versioncreated": "#DATE#", + "associations": {"featuremedia": {"products": [{"code": "4321"}], "renditions": {"original": {}} }}}] + """ + Given "products" + """ + [{"name": "A fishy Product", + "decsription": "a product for those interested in fish", + "companies" : [ + "#companies._id#" + ], + "query": "Once upon a time", + "product_type": "news_api" + }, + {"name": "A fishy superdesk product", + "description": "a superdesk product restricting images in the atom feed", + "companies" : [ + "#companies._id#" + ], + "sd_product_id": "1234", + "product_type": "news_api" + } + ] + """ + When we get "syndicate?formatter=atom" + Then we get OK response + Then we "get" "<![CDATA[headline 1]]>" in syndicate xml response + Then we "don't get" "<![CDATA[headline 2]]>" in syndicate xml response diff --git a/features/steps/steps.py b/features/steps/steps.py index acca89f68..60342b2a8 100644 --- a/features/steps/steps.py +++ b/features/steps/steps.py @@ -17,6 +17,7 @@ from wooper.general import ( get_body ) +import logging @when('we save API token') @@ -54,8 +55,8 @@ def step_store_next_page_from_response(context): @then('we get "{text}" in text response') def we_get_text_in_response(context, text): with context.app.test_request_context(context.app.config['URL_PREFIX']): - assert(isinstance(get_body(context.response), str)) - assert(text in get_body(context.response)) + assert (isinstance(get_body(context.response), str)) + assert (text in get_body(context.response)) @when('we set api time limit to {value}') @@ -67,10 +68,28 @@ def we_set_api_time_limit(context, value): @then('we "{get}" "{text}" in atom xml response') def we_get_text_in_atom_xml_response(context, get, text): with context.app.test_request_context(context.app.config['URL_PREFIX']): - assert(isinstance(get_body(context.response), str)) + assert (isinstance(get_body(context.response), str)) tree = lxml.etree.fromstring(get_body(context.response).encode('utf-8')) assert '{http://www.w3.org/2005/Atom}feed' == tree.tag if get == 'get': - assert(text in get_body(context.response)) + assert (text in get_body(context.response)) else: assert (text not in get_body(context.response)) + + +@then('we "{get}" "{text}" in syndicate xml response') +def we_get_text_in_syndicate_xml_response(context, get, text): + with context.app.test_request_context(context.app.config['URL_PREFIX']): + response_body = get_body(context.response) + logging.info("Response body: %s", response_body) + assert (isinstance(get_body(context.response), str)) + try: + tree = lxml.etree.fromstring(response_body.encode('utf-8')) + assert '{http://www.w3.org/2005/Atom}feed' == tree.tag + if get == 'get': + assert (text in response_body) + else: + assert (text not in response_body) + except lxml.etree.XMLSyntaxError as e: + logging.error("XML parsing error: %s", e) + raise AssertionError("Response is not valid XML") diff --git a/manage.py b/manage.py index b86b2051f..54f76a733 100644 --- a/manage.py +++ b/manage.py @@ -58,15 +58,20 @@ def elastic_init(): @manager.option('-c', '--collection', dest='collection', default=None) @manager.option('-t', '--timestamp', dest='timestamp', default=None) @manager.option('-d', '--direction', dest='direction', choices=['older', 'newer'], default='older') -def index_from_mongo(hours, collection, timestamp, direction): - print('Checking if elastic index exists, a new one will be created if not') - app.data.init_elastic(app) - print('Elastic index check has been completed') +@manager.option('-s', '--start_id', dest='start_id', default=None) +@manager.option('-i', '--skip_init', dest='skip_init', default=False) +def index_from_mongo(hours, collection, timestamp, direction, start_id, skip_init): + if not skip_init: + print('Checking if elastic index exists, a new one will be created if not') + app.data.init_elastic(app) + print('Elastic index check has been completed') + else: + print('Skipping index initialisation') if timestamp: index_elastic_from_mongo_from_timestamp(collection, timestamp, direction) else: - index_elastic_from_mongo(hours=hours, collection=collection) + index_elastic_from_mongo(hours=hours, collection=collection, start_id=start_id) @manager.command diff --git a/newsroom/__init__.py b/newsroom/__init__.py index 89d9b2b6c..3a8f96751 100644 --- a/newsroom/__init__.py +++ b/newsroom/__init__.py @@ -7,6 +7,7 @@ import superdesk from superdesk import register_resource # noqa +from newsroom.user_roles import UserRole # reuse content api dbs MONGO_PREFIX = 'CONTENTAPI_MONGO' @@ -17,6 +18,20 @@ class Resource(superdesk.Resource): mongo_prefix = MONGO_PREFIX elastic_prefix = ELASTIC_PREFIX + # by default make resources available to internal users/administrators + allowed_roles = [UserRole.ADMINISTRATOR, UserRole.INTERNAL, UserRole.ACCOUNT_MANAGEMENT] + allowed_item_roles = [UserRole.ADMINISTRATOR, UserRole.INTERNAL, UserRole.ACCOUNT_MANAGEMENT] + + def __init__(self, endpoint_name, app, service, endpoint_schema=None): + super().__init__(endpoint_name, app, service, endpoint_schema) + config = app.config["DOMAIN"][endpoint_name] + config.update( + { + "allowed_roles": [role.value for role in self.allowed_roles], + "allowed_item_roles": [role.value for role in self.allowed_item_roles], + } + ) + class Service(superdesk.Service): pass diff --git a/newsroom/agenda/views.py b/newsroom/agenda/views.py index 7a97ce479..8a9d021bf 100644 --- a/newsroom/agenda/views.py +++ b/newsroom/agenda/views.py @@ -229,13 +229,10 @@ def related_wire_items(wire_id): wire_ids.append(cov['delivery_id']) wire_items = get_entities_elastic_or_mongo_or_404(wire_ids, 'items') - aggregations = {"uid": {"terms": {"field": "_uid"}}} - permissioned_result = get_resource_service('wire_search').get_items(wire_ids, size=0, aggregations=aggregations, + # Find those items that the user is permitted to view + permissioned_result = get_resource_service('wire_search').get_items(wire_ids, size=len(wire_ids), apply_permissions=True) - buckets = permissioned_result.hits['aggregations']['uid']['buckets'] - permissioned_ids = [] - for b in buckets: - permissioned_ids.append(b['key'].replace('items#', '')) + permissioned_ids = [item.get("_id") for item in permissioned_result] if permissioned_result else [] for wire_item in wire_items: set_item_permission(wire_item, wire_item.get('_id') in permissioned_ids) diff --git a/newsroom/auth/__init__.py b/newsroom/auth/__init__.py index d3f85bac5..d0884c650 100644 --- a/newsroom/auth/__init__.py +++ b/newsroom/auth/__init__.py @@ -1,4 +1,5 @@ import re +import flask import superdesk from bson import ObjectId @@ -10,7 +11,12 @@ class SessionAuth(BasicAuth): def authorized(self, allowed_roles, resource, method): - return get_user_id() + if not get_user_id(): + return False + if not resource: + return True # list of apis is open + user_role = flask.session.get("user_type") if flask.request else None + return user_role in allowed_roles def get_user(required=False): diff --git a/newsroom/companies/companies.py b/newsroom/companies/companies.py index 270744a79..d70689378 100644 --- a/newsroom/companies/companies.py +++ b/newsroom/companies/companies.py @@ -51,9 +51,62 @@ class CompaniesResource(newsroom.Resource): 'type': 'boolean', 'default': False, }, - 'company_type': { - 'type': 'string', - 'nullable': True, + 'embedded_video': { + 'type': 'boolean', + 'default': False, + }, + 'embedded': { + 'type': 'dict', + 'schema': { + 'video_display': { + 'type': 'boolean', + 'default': False, + }, + 'audio_display': { + 'type': 'boolean', + 'default': False, + }, + 'social_media_display': { + 'type': 'boolean', + 'default': False, + }, + 'images_display': { + 'type': 'boolean', + 'default': False, + }, + 'sdpermit_display': { + 'type': 'boolean', + 'default': False, + }, + 'all_display': { + 'type': 'boolean', + 'default': False, + }, + 'social_media_download': { + 'type': 'boolean', + 'default': False, + }, + 'video_download': { + 'type': 'boolean', + 'default': False, + }, + 'audio_download': { + 'type': 'boolean', + 'default': False, + }, + 'images_download': { + 'type': 'boolean', + 'default': False, + }, + 'sdpermit_download': { + 'type': 'boolean', + 'default': False, + }, + 'all_download': { + 'type': 'boolean', + 'default': False, + } + } }, 'account_manager': { 'type': 'string' diff --git a/newsroom/companies/views.py b/newsroom/companies/views.py index b80a78e4a..1419afa27 100644 --- a/newsroom/companies/views.py +++ b/newsroom/companies/views.py @@ -3,7 +3,7 @@ import flask from bson import ObjectId -from flask import jsonify, current_app as app +from flask import current_app as app from flask_babel import gettext from superdesk import get_resource_service from werkzeug.exceptions import NotFound @@ -13,6 +13,8 @@ from newsroom.utils import query_resource, find_one, get_entity_or_404, get_json_or_400, set_original_creator, \ set_version_creator import ipaddress +from flask import request, jsonify, current_app, session +import secrets def get_company_types_options(company_types): @@ -154,14 +156,45 @@ def update_products(updates, company_id): def update_company(data, _id): updates = {k: v for k, v in data.items() if k in ('sections', 'archive_access', 'events_only')} + + embedded_fields = [ + 'video_display', 'audio_display', 'social_media_display', 'images_display', 'sdpermit_display', 'all_display', + 'social_media_download', 'video_download', 'audio_download', 'images_download', 'sdpermit_download', + 'all_download' + ] + + if 'embedded' in data: + embedded_updates = {k: v for k, v in data['embedded'].items() if k in embedded_fields} + if embedded_updates: + updates['embedded'] = embedded_updates + get_resource_service('companies').patch(_id, updates=updates) @blueprint.route('/companies/<_id>/permissions', methods=['POST']) @account_manager_only def save_company_permissions(_id): + csrf_token = request.headers.get('X-CSRF-Token') + expected_csrf_token = session.get('csrf_token') orig = get_entity_or_404(_id, 'companies') data = get_json_or_400() - update_products(data['products'], _id) - update_company(data, orig['_id']) - return jsonify(), 200 + if not csrf_token or csrf_token != expected_csrf_token: + current_app.logger.error(f"Permisson CSRF validation failed: {str(e)}") + return jsonify({"error": "Permisson CSRF token validation failed"}), 403 + + try: + update_products(data['products'], _id) + update_company(data, orig['_id']) + except Exception as e: + current_app.logger.error(f"Error updating company permissions: {str(e)}") + return jsonify({"error": "An error occurred while updating permissions"}), 500 + + return jsonify({"message": "Permissions updated successfully"}), 200 + + +@blueprint.route('/companies/get-csrf-token', methods=['GET']) +@account_manager_only +def get_csrf_token(): + csrf_token = secrets.token_hex(32) + session['csrf_token'] = csrf_token + return jsonify({'csrf_token': csrf_token}) diff --git a/newsroom/data_updates.py b/newsroom/data_updates.py index 483902f37..457550646 100644 --- a/newsroom/data_updates.py +++ b/newsroom/data_updates.py @@ -278,7 +278,7 @@ def run(self, resource_name, global_update=False): class DataUpdate: def apply(self, direction): - assert(direction in ['forwards', 'backwards']) + assert (direction in ['forwards', 'backwards']) collection = current_app.data.get_mongo_collection(self.resource) db = current_app.data.driver.db getattr(self, direction)(collection, db) diff --git a/newsroom/default_settings.py b/newsroom/default_settings.py index a51f03ac2..7368bf7ce 100644 --- a/newsroom/default_settings.py +++ b/newsroom/default_settings.py @@ -324,3 +324,10 @@ # Enables the application of product filtering to image references in the API and ATOM responses NEWS_API_IMAGE_PERMISSIONS_ENABLED = strtobool(env('NEWS_API_IMAGE_PERMISSIONS_ENABLED', 'false')) + + +# Enable the Plyr media player in the body_html +PLYR = strtobool(env('PLYR', 'false')) + +# If enabled Superdesk product filtering will be applied to the media items embedded in the stories +EMBED_PRODUCT_FILTERING = strtobool(env('EMBED_PRODUCT_FILTERING', 'false')) diff --git a/newsroom/history.py b/newsroom/history.py index 1e5cf65ac..6731fce71 100644 --- a/newsroom/history.py +++ b/newsroom/history.py @@ -2,15 +2,17 @@ import newsroom import pymongo.errors import werkzeug.exceptions +from bson import ObjectId from superdesk import get_resource_service from superdesk.resource import not_analyzed, not_enabled from superdesk.utc import utcnow -from flask import json, abort, Blueprint, jsonify +from flask import json, abort, Blueprint, jsonify, g, current_app as app from flask_babel import gettext from eve.utils import ParsedRequest from newsroom.utils import get_json_or_400 from newsroom.auth import get_user +from newsroom.products.products import get_products_by_company blueprint = Blueprint('history', __name__) @@ -82,6 +84,110 @@ def transform(item): def create_history_record(self, items, action, user, section, monitoring=None): self.create(items, action, user, section, monitoring) + def create_media_history_record(self, item, association_name, action, user, section): + """ + Log the download of an association belonging to an item + :param item: + :param association_name: + :param action: + :param user: + :param section: + :return: + """ + now = utcnow() + if action is None: + action = "media" + entry = { + 'action': action, + 'versioncreated': now, + 'user': user.get('_id', None), + 'company': user.get('company', None), + 'item': item.get('_id'), + 'version': item.get('version') if item.get('version') else item.get('_current_version', ''), + 'section': section, + 'extra_data': association_name + } + try: + super().create([entry]) + except (werkzeug.exceptions.Conflict, pymongo.errors.BulkWriteError): + pass + + def _find_association(self, item, media_id): + """ + Find the matching media association in the item + :param item: item object + :param media_id: ID of the media + :return: tuple (name, association) or a 404 + """ + for name, association in (item.get('associations') or {}).items(): + for rendition in association.get("renditions", []): + if association.get('renditions').get(rendition).get('media') == media_id: + return name, association + # not found + abort(404) + + def _get_permitted_products(self, company, section): + """ + Get the list of permitted Superdesk products for the user's company + :param company: company + :param section: section name + :return: list of permitted products + """ + return [p.get('sd_product_id') for p in get_products_by_company(company, None, section) if + p.get('sd_product_id')] + + def _check_permissions(self, item, company, name, section): + """ + Check the passed item rendition is allowed for the given company if required + :param item: + :param company: + :param name: + :param section: + :return: + """ + if app.config.get("EMBED_PRODUCT_FILTERING"): + permitted_products = self._get_permitted_products(company, section) + embed_products = [p.get('code') for p in + ((item.get('associations') or {}).get(name) or {}).get('products', [])] + + if not len(set(embed_products) & set(permitted_products)): + abort(403) + + def log_media_download(self, item_id, media_id, section='wire'): + """ + Given am item, media reference and a user record the download + :param item: + :param media: + :return: + """ + user = get_user(required=True) + item = get_resource_service('items').find_one(req=None, _id=item_id) + if not item: + abort(404) + + name, association = self._find_association(item, media_id) + self._check_permissions(item, user.get('company'), name, section) + + action = 'download ' + association.get('type') + self.create_media_history_record(item, name, action, user, 'wire') + + def log_api_media_download(self, item_id, media_id): + """ + Given am item, media reference and a user record the download + :param item: + :param media: + :return: + """ + item = get_resource_service('items').find_one(req=None, _id=item_id) + if not item: + abort(404) + + name, association = self._find_association(item, media_id) + self._check_permissions(item, g.user, name, 'news_api') + + action = 'download ' + association.get('type') + self.create_media_history_record(item, name, action, {'_id': None, 'company': ObjectId(g.user)}, 'news_api') + def query_items(self, query): if query['from'] >= 1000: # https://www.elastic.co/guide/en/elasticsearch/guide/current/pagination.html#pagination diff --git a/newsroom/mongo_utils.py b/newsroom/mongo_utils.py index 892587d96..b06a0f82e 100644 --- a/newsroom/mongo_utils.py +++ b/newsroom/mongo_utils.py @@ -2,6 +2,7 @@ import time import pymongo import superdesk +from bson import ObjectId from datetime import timedelta, datetime from flask import current_app as app @@ -13,7 +14,7 @@ default_page_size = 500 -def index_elastic_from_mongo(hours=None, collection=None): +def index_elastic_from_mongo(hours=None, collection=None, start_id=None): print('Starting indexing from mongodb for "{}" collection hours={}'.format(collection, hours)) resources = app.data.get_elastic_resources() @@ -25,7 +26,7 @@ def index_elastic_from_mongo(hours=None, collection=None): for resource in resources: print('Starting indexing collection {}'.format(resource)) - for items in _get_mongo_items(resource, hours): + for items in _get_mongo_items(resource, hours, start_id): print('{} Inserting {} items'.format(time.strftime('%X %x %Z'), len(items))) s = time.time() @@ -94,15 +95,16 @@ def index_elastic_from_mongo_from_timestamp(collection, timestamp_str, direction print('Finished indexing collection {}'.format(collection)) -def _get_mongo_items(mongo_collection_name, hours=None): +def _get_mongo_items(mongo_collection_name, hours=None, start_id=None): """Generate list of items from given mongo collection per default page size. :param mongo_collection_name: Name of the collection to get the items :return: list of items """ - print('Indexing data from mongo/{} to elastic/{} for hours={}'.format(mongo_collection_name, - mongo_collection_name, - hours)) + print('Indexing data from mongo/{} to elastic/{} for hours={} from id>{}'.format(mongo_collection_name, + mongo_collection_name, + hours, + start_id)) db = app.data.get_mongo_collection(mongo_collection_name) args = {'limit': default_page_size, 'sort': [(config.ID_FIELD, pymongo.ASCENDING)]} @@ -113,16 +115,24 @@ def _get_mongo_items(mongo_collection_name, hours=None): now = utcnow() args['filter'] = {} - last_id = None + if start_id: + last_id = ObjectId(start_id) + else: + last_id = None while True: if last_id: args['filter'].update({config.ID_FIELD: {'$gt': last_id}}) cursor = db.find(**args) - if not cursor.count(): - break items = list(cursor) + if not len(items): + break + last_id = items[-1][config.ID_FIELD] - yield items + + if start_id: + yield [item for item in items if isinstance(item.get('_id'), ObjectId)] + else: + yield items def _get_mongo_items_from_timestamp(collection, timestamp, direction): diff --git a/newsroom/monitoring/email_alerts.py b/newsroom/monitoring/email_alerts.py index 123cec8d4..b2d7c5499 100644 --- a/newsroom/monitoring/email_alerts.py +++ b/newsroom/monitoring/email_alerts.py @@ -24,7 +24,6 @@ from .utils import get_monitoring_file, truncate_article_body, get_date_items_dict import base64 import os -import re try: from urllib.parse import urlparse @@ -255,9 +254,10 @@ def filter_users(self, m, company): u['is_enabled'] and u['company'] == company['_id']]] # append any addresses from the profile if m.get('email'): - for address in re.split(r'[, ]*', m.get('email')): - if address not in email_addresses: - email_addresses.append(address) + address_list = m.get('email').split(',') + for address in address_list: + if address.strip() not in email_addresses: + email_addresses.append(address.strip()) return email_addresses def send_alerts(self, monitoring_list, created_from, created_from_time, now): diff --git a/newsroom/monitoring/forms.py b/newsroom/monitoring/forms.py index d1c57cf2e..616c1f874 100644 --- a/newsroom/monitoring/forms.py +++ b/newsroom/monitoring/forms.py @@ -4,7 +4,6 @@ from wtforms import SelectField from wtforms.validators import DataRequired, Email, Optional from copy import deepcopy -import re alert_types = [('full_text', gettext('Full text')), ('linked_text', gettext('Linked extract(s)'))] format_types = [('monitoring_pdf', gettext('PDF')), ('monitoring_rtf', gettext('RTF')), @@ -32,10 +31,10 @@ class Meta: query = TextAreaField(gettext('Query')) def validate_email(form, field): - address_list = re.split(r'[, ]*', field.data) + address_list = field.data.split(',') input_data = deepcopy(field.data) for address in address_list: - v = Email(message=field.gettext('Invalid email address: ') + address) - field.data = address + v = Email(message=field.gettext('Invalid email address: ') + address.strip()) + field.data = address.strip() v(form, field) field.data = input_data diff --git a/newsroom/monitoring/utils.py b/newsroom/monitoring/utils.py index 55f2594e6..b5f5c6115 100644 --- a/newsroom/monitoring/utils.py +++ b/newsroom/monitoring/utils.py @@ -1,7 +1,7 @@ from flask import current_app as app import collections from superdesk.text_utils import get_text -from newsroom.utils import get_items_by_id +from newsroom.utils import get_items_by_id, remove_all_embeds from superdesk import etree as sd_etree @@ -45,6 +45,7 @@ def truncate_article_body(items, monitoring_profile, full_text=False): # To make sure PDF creator and RTF creator does truncate for linked_text settings # Manually truncate it for i in items: + remove_all_embeds(i) i['body_str'] = get_text(i.get('body_html', ''), content='html', lf_on_block=True) if monitoring_profile['alert_type'] == 'linked_text': if not full_text and len(i['body_str']) > 160: diff --git a/newsroom/news_api/formatters/service.py b/newsroom/news_api/formatters/service.py index 73e1d4025..e19d37083 100644 --- a/newsroom/news_api/formatters/service.py +++ b/newsroom/news_api/formatters/service.py @@ -2,12 +2,14 @@ from superdesk.utils import ListCursor from flask import abort from superdesk import get_resource_service -from flask import current_app as app +from flask import current_app as app, g from eve.versioning import versioned_id_field from datetime import timedelta from superdesk.utc import utcnow from newsroom.settings import get_setting from newsroom import Service +from newsroom.wire.formatters.utils import remove_unpermissioned_embeds +from ..utils import update_embed_urls, set_association_links class APIFormattersService(Service): @@ -42,5 +44,8 @@ def get_version(self, id, version, formatter_name): if utcnow() - timedelta(days=int(get_setting('news_api_time_limit_days'))) > item.get('versioncreated', utcnow()): abort(404) + remove_unpermissioned_embeds(item, g.user, 'news_api') + update_embed_urls(item, None) + set_association_links(item) ret = formatter.format_item(item) return {'formatted_item': ret, 'mimetype': formatter.MIMETYPE, 'version': item.get('version')} diff --git a/newsroom/news_api/news/assets/assets.py b/newsroom/news_api/news/assets/assets.py index b9174970f..4aaa3c892 100644 --- a/newsroom/news_api/news/assets/assets.py +++ b/newsroom/news_api/news/assets/assets.py @@ -1,6 +1,6 @@ import superdesk import flask -from flask import abort +from flask import abort, request from newsroom.upload import ASSETS_RESOURCE from flask_babel import gettext import bson.errors @@ -19,8 +19,19 @@ def init_app(app): def get_item(asset_id): auth = app.auth if not auth.authorized([], None, flask.request.method): - return abort(401, gettext('Invalid token')) + token = request.args.get('token') + if token: + if not auth.check_auth(token, allowed_roles=None, resource=None, method='GET'): + # a try for a client that is not encoding the token + token = token.replace(' ', '+') + if not auth.check_auth(token, allowed_roles=None, resource=None, method='GET'): + abort(401, gettext('Invalid token')) + else: + return abort(401, gettext('Invalid token')) + item_id = request.args.get('item_id') + if item_id: + superdesk.get_resource_service('history').log_api_media_download(item_id, asset_id) try: media_file = flask.current_app.media.get(asset_id, ASSETS_RESOURCE) except bson.errors.InvalidId: diff --git a/newsroom/news_api/news/atom/atom.py b/newsroom/news_api/news/atom/atom.py deleted file mode 100644 index 13d5e5ac6..000000000 --- a/newsroom/news_api/news/atom/atom.py +++ /dev/null @@ -1,150 +0,0 @@ -import superdesk -import flask -from eve.methods.get import get_internal -from lxml import etree, html as lxml_html -from lxml.etree import SubElement -from superdesk.utc import utcnow -from superdesk.etree import to_string -from flask import current_app as app -import datetime -import logging -import re -from newsroom.news_api.utils import check_association_permission - -blueprint = superdesk.Blueprint('atom', __name__) - - -logger = logging.getLogger(__name__) - - -def init_app(app): - superdesk.blueprint(blueprint, app) - - -@blueprint.route('/atom', methods=['GET']) -def get_atom(): - - def _format_date(date): - iso8601 = date.isoformat() - if date.tzinfo: - return iso8601 - return iso8601 + 'Z' - - def _format_update_date(date): - DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" - return date.strftime(DATETIME_FORMAT) + 'Z' - - auth = app.auth - if not auth.authorized([], None, flask.request.method): - return auth.authenticate() - - XML_ROOT = '' - - _message_nsmap = {None: 'http://www.w3.org/2005/Atom', 'dcterms': 'http://purl.org/dc/terms/', - 'media': 'http://search.yahoo.com/mrss/', - 'mi': 'http://schemas.ingestion.microsoft.com/common/'} - -# feed = etree.Element('feed', attrib={'lang': 'en-us'}, nsmap=_message_nsmap) - feed = etree.Element('feed', nsmap=_message_nsmap) - SubElement(feed, 'title').text = etree.CDATA('{} Atom Feed'.format(app.config['SITE_NAME'])) - SubElement(feed, 'updated').text = _format_update_date(utcnow()) - SubElement(SubElement(feed, 'author'), 'name').text = app.config['SITE_NAME'] - SubElement(feed, 'id').text = flask.url_for('atom.get_atom', _external=True) - SubElement(feed, 'link', attrib={'href': flask.url_for('atom.get_atom', _external=True), 'rel': 'self'}) - - response = get_internal('news/search') -# req = ParsedRequest() -# req.args = {'include_fields': 'abstract'} -# response = superdesk.get_resource_service('news/search').get(req=req, lookup=None) - - for item in response[0].get('_items'): - try: - complete_item = superdesk.get_resource_service('items').find_one(req=None, _id=item.get('_id')) - - # If featuremedia is not allowed for the company don't add the item - if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): - if not check_association_permission(complete_item): - continue - - entry = SubElement(feed, 'entry') - - # If the item has any parents we use the id of the first, this should be constant throught the update - # history - if complete_item.get('ancestors') and len(complete_item.get('ancestors')): - SubElement(entry, 'id').text = complete_item.get('ancestors')[0] - else: - SubElement(entry, 'id').text = complete_item.get('_id') - - SubElement(entry, 'title').text = etree.CDATA(complete_item.get('headline')) - SubElement(entry, 'published').text = _format_date(complete_item.get('firstpublished')) - SubElement(entry, 'updated').text = _format_update_date(complete_item.get('versioncreated')) - SubElement(entry, 'link', attrib={'rel': 'self', 'href': flask.url_for('news/item.get_item', - item_id=item.get('_id'), - format='TextFormatter', - _external=True)}) - if complete_item.get('byline'): - SubElement(SubElement(entry, 'author'), 'name').text = complete_item.get('byline') - - if complete_item.get('pubstatus') == 'usable': - SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ - 'start={}; end={}; scheme=W3C-DTF'.format(_format_date(utcnow()), - _format_date(utcnow() + datetime.timedelta(days=30))) - else: - # in effect a kill set the end date into the past - SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ - 'start={}; end={}; scheme=W3C-DTF'.format(_format_date(utcnow()), - _format_date(utcnow() - datetime.timedelta(days=30))) - - categories = [{'name': s.get('name')} for s in complete_item.get('service', [])] - for category in categories: - SubElement(entry, 'category', attrib={'term': category.get('name')}) - - SubElement(entry, 'summary').text = etree.CDATA(complete_item.get('description_text', '')) - - # If there are any image embeds then reset the source to a Newshub asset - html_updated = False - regex = r' EMBED START Image {id: \"editor_([0-9]+)' - root_elem = lxml_html.fromstring(complete_item.get('body_html', '')) - comments = root_elem.xpath('//comment()') - for comment in comments: - if 'EMBED START Image' in comment.text: - m = re.search(regex, comment.text) - # Assumes the sibling of the Embed Image comment is the figure tag containing the image - figure_elem = comment.getnext() - if figure_elem is not None and figure_elem.tag == "figure": - imgElem = figure_elem.find("./img") - if imgElem is not None and m and m.group(1): - embed_id = "editor_" + m.group(1) - src = complete_item.get("associations").get(embed_id).get("renditions").get("16-9") - if src: - imgElem.attrib["src"] = flask.url_for('assets.get_item', asset_id=src.get('media'), - _external=True) - html_updated = True - if html_updated: - complete_item["body_html"] = to_string(root_elem, method="html") - - SubElement(entry, 'content', attrib={'type': 'html'}).text = etree.CDATA(complete_item.get('body_html', '')) - - if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): - image = ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions').get( - "16-9") - metadata = ((complete_item.get('associations') or {}).get('featuremedia') or {}) - - url = flask.url_for('assets.get_item', _external=True, asset_id=image.get('media')) - media = SubElement(entry, etree.QName(_message_nsmap.get('media'), 'content'), - attrib={'url': url, 'type': image.get('mimetype'), 'medium': 'image'}) - - SubElement(media, etree.QName(_message_nsmap.get('media'), 'credit')).text = metadata.get('byline') - SubElement(media, etree.QName(_message_nsmap.get('media'), 'title')).text = metadata.get( - 'description_text') - SubElement(media, etree.QName(_message_nsmap.get('media'), 'text')).text = metadata.get('body_text') - focr = SubElement(media, etree.QName(_message_nsmap.get('mi'), 'focalRegion')) - SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x1')).text = str(image.get('poi').get('x')) - SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x2')).text = str(image.get('poi').get('x')) - SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y1')).text = str(image.get('poi').get('y')) - SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y2')).text = str(image.get('poi').get('y')) - except Exception as ex: - logger.exception('processing {} - {}'.format(item.get('_id'), ex)) - - return flask.Response(XML_ROOT + etree.tostring(feed, pretty_print=True).decode('utf-8'), - mimetype='application/atom+xml') diff --git a/newsroom/news_api/news/item/item.py b/newsroom/news_api/news/item/item.py index aa62c51c5..2e9f84b48 100644 --- a/newsroom/news_api/news/item/item.py +++ b/newsroom/news_api/news/item/item.py @@ -17,7 +17,12 @@ def init_app(app): def get_item(item_id): auth = app.auth if not auth.authorized([], None, flask.request.method): - return abort(401, gettext('Invalid token')) + token = flask.request.args.get('token') + if token: + if not auth.check_auth(token, allowed_roles=None, resource=None, method='GET'): + abort(401, gettext('Invalid token')) + else: + return abort(401, gettext('Invalid token')) _format = flask.request.args.get('format', 'NINJSFormatter') _version = flask.request.args.get('version') diff --git a/newsroom/news_api/news/search_service.py b/newsroom/news_api/news/search_service.py index 743d059d9..2c175f5db 100644 --- a/newsroom/news_api/news/search_service.py +++ b/newsroom/news_api/news/search_service.py @@ -7,6 +7,8 @@ import pytz from bson import ObjectId from flask import current_app as app, g +from werkzeug.datastructures import ImmutableMultiDict +from eve.utils import ParsedRequest from superdesk import get_resource_service from superdesk.utc import utcnow, local_to_utc @@ -16,9 +18,11 @@ from content_api.errors import BadParameterValueError, UnexpectedParameterError from newsroom.news_api.settings import ELASTIC_DATETIME_FORMAT -from newsroom.news_api.utils import post_api_audit, remove_internal_renditions, check_association_permission +from newsroom.news_api.utils import post_api_audit, check_featuremedia_association_permission, \ + update_embed_urls, set_association_links from newsroom.search import BaseSearchService, query_string from newsroom.products.products import get_products_by_company +from newsroom.wire.formatters.utils import remove_unpermissioned_embeds, remove_internal_renditions class NewsAPINewsService(BaseSearchService): @@ -40,7 +44,7 @@ class NewsAPINewsService(BaseSearchService): # set of fields that can be specified in the include_fields parameter allowed_include_fields = {'type', 'urgency', 'priority', 'language', 'description_html', 'located', 'keywords', 'source', 'subject', 'place', 'wordcount', 'charcount', 'body_html', 'readtime', - 'profile', 'service', 'genre', 'associations'} + 'profile', 'service', 'genre', 'associations', 'headline', 'extra'} default_fields = { '_id', 'uri', 'embargoed', 'pubstatus', 'ednote', 'signal', 'copyrightnotice', 'copyrightholder', @@ -55,10 +59,20 @@ class NewsAPINewsService(BaseSearchService): limit_days_setting = 'news_api_time_limit_days' def get(self, req, lookup): - resp = super().get(req, lookup) - orig_request_params = getattr(req, 'args', MultiDict()) + # The associations are needed to reset the embeded urls in the body_html + if app.config.get("EMBED_PRODUCT_FILTERING") and \ + 'body_html' in orig_request_params.get('include_fields', '') and \ + 'associations' not in orig_request_params.get('include_fields', ''): + args = orig_request_params.to_dict() + args['include_fields'] = args.get('include_fields') + ',associations' + areq = ParsedRequest() + areq.args = ImmutableMultiDict(args) + resp = super().get(areq, lookup) + else: + resp = super().get(req, lookup) + # Can't get the exclude projection to work do pop the exclude fields here exclude_fields = self.mandatory_exclude_fields.union( set(orig_request_params.get('exclude_fields').split(','))) if orig_request_params.get( @@ -68,11 +82,21 @@ def get(self, req, lookup): for field in exclude_fields: doc.pop(field, None) + if app.config.get("EMBED_PRODUCT_FILTERING") and \ + 'body_html' in orig_request_params.get('include_fields', ''): + update_embed_urls(doc, None) + remove_unpermissioned_embeds(doc, g.user, 'news_api') + if 'associations' in orig_request_params.get('include_fields', ''): - if not check_association_permission(doc): - doc.pop('associations', None) - else: - remove_internal_renditions(doc) + set_association_links(doc) + if not check_featuremedia_association_permission(doc): + if doc.get('associations', {}).get('featuremedia'): + doc.get('associations').pop('featuremedia') + if not doc.get('associations'): + doc.pop('associations', None) + remove_internal_renditions(doc, remove_media=True) + else: + doc.pop('associations', None) return resp diff --git a/newsroom/news_api/news/syndicate/__init__.py b/newsroom/news_api/news/syndicate/__init__.py new file mode 100644 index 000000000..4cf192da9 --- /dev/null +++ b/newsroom/news_api/news/syndicate/__init__.py @@ -0,0 +1,44 @@ +import superdesk +import logging +from flask import request, make_response, jsonify +from eve.methods.get import get_internal +from .error_handlers import process_error_response +from .auth import authenticate +from .syndicate_handlers import FORMAT_HANDLERS, FEED_GENERATORS as FORMAT_HANDLERS_INIT +from .resource import NewsAPISyndicateResource +from .service import NewsAPISyndicateService +from werkzeug.routing import BaseConverter + +syndicate_blueprint = superdesk.Blueprint('syndicate', __name__) + +logger = logging.getLogger(__name__) + + +class RegExConverter(BaseConverter): + def __init__(self, map, regex='[^/]+'): + super().__init__(map) + self.regex = regex + + +@syndicate_blueprint.route('/', methods=['GET']) +@syndicate_blueprint.route('//', methods=['GET']) +@authenticate +def get_syndicate_feed(syndicate_type, token=None): + response = get_internal('news/syndicate') + format_param = request.args.get('formatter') + if format_param: + format_param = format_param.upper().strip() + try: + return FORMAT_HANDLERS[format_param]['handler'](response[0], format_param) + except ValueError as e: + error_message = f"An error occurred in converting response to {format_param}: {e}" + error_response = make_response(jsonify({'error': error_message}), 400) + return process_error_response(error_response) + + return FORMAT_HANDLERS_INIT[syndicate_type.lower()](response[0]) + + +def init_app(app): + superdesk.register_resource('news/syndicate', NewsAPISyndicateResource, NewsAPISyndicateService, _app=app) + app.url_map.converters['regex'] = RegExConverter + superdesk.blueprint(syndicate_blueprint, app) diff --git a/newsroom/news_api/news/syndicate/auth.py b/newsroom/news_api/news/syndicate/auth.py new file mode 100644 index 000000000..ed0d2074d --- /dev/null +++ b/newsroom/news_api/news/syndicate/auth.py @@ -0,0 +1,18 @@ +from functools import wraps +from flask import current_app as app +import flask + + +def authenticate(func): + @wraps(func) + def wrapper(*args, **kwargs): + auth = app.auth + token = kwargs.get('token') + if not auth.authorized([], None, flask.request.method): + if token: + if not auth.check_auth(token, allowed_roles=None, resource=None, method=flask.request.method): + return auth.authenticate() + else: + return auth.authenticate() + return func(*args, **kwargs) + return wrapper diff --git a/newsroom/news_api/news/syndicate/error_handlers.py b/newsroom/news_api/news/syndicate/error_handlers.py new file mode 100644 index 000000000..e53efe803 --- /dev/null +++ b/newsroom/news_api/news/syndicate/error_handlers.py @@ -0,0 +1,54 @@ +from typing import Union, Mapping, Dict +from flask import request, make_response, jsonify + + +def handle_unsupported_format(data, formatter=None): + error_message = f"Unsupported formatter: {formatter if formatter is not None else ''} " + error_response = make_response(jsonify({'error': error_message}), 400) + return process_error_response(error_response) + + +def process_error_response(response): + error_message: Union[bytes, str] = response.data.decode( + 'utf-8') if response.data else 'error message empty,contact admin for log information' + + def syndicate_examples() -> Mapping[str, str]: + examples = { + 'json': ( + f"{request.url_root}syndicate?format=json&q=trump&start_date=2020-04-01" + f"&timezone=Australia/Sydney" + ), + 'atom': ( + f"{request.url_root}syndicate?format=atom&start_date=now-30d&end_date=now" + f"&timezone=Australia/Sydney&include_fields=headline,byline,slugline,description_html," + f"located,keywords,source,subject,place,wordcount,charcount,body_html,readtime,profile," + f"service,genre,associations" + ), + 'rss': ( + f"{request.url_root}syndicate?format=rss&exclude_fields=version,versioncreated," + f"firstcreated" + ) + } + return examples + + def syndicate_parameters() -> Dict[str, str]: + parameters = { + 'format': "Specifies the desired format of the response. Accepts 'json', 'atom', or 'rss'.", + # ... (other parameters) ... + } + return parameters + + error_payload: Dict[str, Dict[str, Union[int, str, Dict[str, str], Mapping[str, str]]]] = { + "error": { + "code": response.status_code, + "message": error_message, + }, + "usage": { + "endpoint": str(request.url), + "method": request.method, + "description": "This API endpoint allows formats (JSON, ATOM, RSS).", + "parameters": syndicate_examples(), + "examples": syndicate_parameters(), + }, + } + return jsonify(error_payload) diff --git a/newsroom/news_api/news/syndicate/resource.py b/newsroom/news_api/news/syndicate/resource.py new file mode 100644 index 000000000..b37e32d23 --- /dev/null +++ b/newsroom/news_api/news/syndicate/resource.py @@ -0,0 +1,12 @@ +from newsroom import Resource + + +class NewsAPISyndicateResource(Resource): + resource_title = 'News Syndicate' + datasource = { + 'search_backend': 'elastic', + 'source': 'items', + } + + item_methods = [] + resource_methods = [] diff --git a/newsroom/news_api/news/syndicate/service.py b/newsroom/news_api/news/syndicate/service.py new file mode 100644 index 000000000..1f4934ff2 --- /dev/null +++ b/newsroom/news_api/news/syndicate/service.py @@ -0,0 +1,335 @@ +from content_api.errors import BadParameterValueError +from newsroom.news_api.news.search_service import NewsAPINewsService +from superdesk import get_resource_service +from lxml import etree +from lxml.etree import SubElement +from superdesk.utc import utcnow +from flask import current_app as app, g, Response, url_for +import logging +from newsroom.news_api.utils import check_featuremedia_association_permission, update_embed_urls +from newsroom.wire.formatters.utils import remove_unpermissioned_embeds +from datetime import timedelta +from email import utils + + +class NewsAPISyndicateService(NewsAPINewsService): + allowed_params = { + 'start_date', 'end_date', + 'include_fields', 'exclude_fields', + 'max_results', 'page_size', 'page', 'timezone', + 'version', 'where', 'item_source', + 'q', 'default_operator', 'filter', + 'service', 'subject', 'genre', 'urgency', + 'priority', 'type', 'item_source', 'timezone', 'products', + 'exclude_ids', 'formatter', 'sort' + } + default_sort = [{'versioncreated': 'asc'}] + + allowed_exclude_fields = {'version', 'firstcreated', 'headline', 'byline', 'slugline'} + + def on_fetched(self, doc): + self._enhance_hateoas(doc) + super().on_fetched(doc) + + def _enhance_hateoas(self, doc): + doc.setdefault('_links', {}) + doc['_links']['parent'] = { + 'title': 'Home', + 'href': '/' + }, + self._hateoas_set_item_links(doc) + + def _hateoas_set_item_links(self, doc): + for item in doc.get('_items') or []: + doc_id = str(item['_id']) + item.setdefault('_links', {}) + item['_links']['self'] = { + 'href': 'news/item/{}'.format(doc_id), + 'title': 'News Item' + } + item.pop('_updated', None) + item.pop('_created', None) + item.pop('_etag', None) + + def prefill_search_query(self, search, req=None, lookup=None): + super().prefill_search_query(search, req, lookup) + + if search.args.get('exclude_ids'): + search.args['exclude_ids'] = search.args['exclude_ids'].split(',') + + try: + search.args['max_results'] = int(search.args.get('max_results') or 200) + except ValueError: + raise BadParameterValueError('Max Results must be a number') + + search.args['size'] = search.args['max_results'] + + @staticmethod + def _format_date(date): + iso8601 = date.isoformat() + if date.tzinfo: + return iso8601 + return iso8601 + 'Z' + + @staticmethod + def _format_update_date(date): + DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" + return date.strftime(DATETIME_FORMAT) + 'Z' + + @staticmethod + def _format_date_publish(date): + return utils.format_datetime(date) + + @staticmethod + def generate_atom_feed(response, token=None): + XML_ROOT = '' + _message_nsmap = {None: 'http://www.w3.org/2005/Atom', 'dcterms': 'http://purl.org/dc/terms/', + 'media': 'http://search.yahoo.com/mrss/', + 'mi': 'http://schemas.ingestion.microsoft.com/common/'} + + feed = etree.Element('feed', nsmap=_message_nsmap) + SubElement(feed, 'title').text = etree.CDATA('{} Atom Feed'.format(app.config['SITE_NAME'])) + SubElement(feed, 'updated').text = __class__._format_update_date(utcnow()) + SubElement(SubElement(feed, 'author'), 'name').text = app.config['SITE_NAME'] + feed_url = url_for('syndicate.get_syndicate_feed', + syndicate_type='syndicate', + _external=True, + formatter='atom') + + SubElement(feed, 'id').text = feed_url + SubElement(feed, 'link', + attrib={'href': feed_url, 'rel': 'self'}) + item_resource = get_resource_service('items') + image = None + for item in response['_items']: + try: + complete_item = item_resource.find_one(req=None, _id=item.get('_id')) + # If featuremedia is not allowed for the company don't add the item + if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): + if not check_featuremedia_association_permission(complete_item): + continue + remove_unpermissioned_embeds(complete_item, g.user, 'news_api') + entry = SubElement(feed, 'entry') + # If the item has any parents we use the id of the first, this should be constant throught the update + # history + if complete_item.get('ancestors') and len(complete_item.get('ancestors')): + SubElement(entry, 'id').text = complete_item.get('ancestors')[0] + else: + SubElement(entry, 'id').text = complete_item.get('_id') + + SubElement(entry, 'title').text = etree.CDATA(complete_item.get('headline')) + SubElement(entry, 'published').text = __class__._format_date(complete_item.get('firstpublished')) + SubElement(entry, 'updated').text = __class__._format_update_date(complete_item.get('versioncreated')) + if token: + SubElement(entry, 'link', attrib={'rel': 'self', 'href': url_for('news/item.get_item', + item_id=item.get('_id'), + format='TextFormatter', + token=token, + _external=True)}) + else: + SubElement(entry, 'link', attrib={'rel': 'self', 'href': url_for('news/item.get_item', + item_id=item.get('_id'), + format='TextFormatter', + _external=True)}) + + if complete_item.get('byline'): + name = complete_item.get('byline') + if complete_item.get('source') and not app.config['COPYRIGHT_HOLDER'].lower() == complete_item.get( + 'source', '').lower(): + name = name + " - " + complete_item.get('source') + SubElement(SubElement(entry, 'author'), 'name').text = name + else: + SubElement(SubElement(entry, 'author'), 'name').text = complete_item.get( + 'source') if complete_item.get( + 'source') else app.config['COPYRIGHT_HOLDER'] + + SubElement(entry, 'rights').text = complete_item.get('source', '') + + if complete_item.get('pubstatus') == 'usable': + SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ + 'start={}; end={}; scheme=W3C-DTF'.format(__class__._format_date(utcnow()), + __class__._format_date(utcnow() + timedelta(days=30))) + else: + SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ + 'start={}; end={}; scheme=W3C-DTF'.format(__class__._format_date(utcnow()), + __class__._format_date(utcnow() - timedelta(days=30))) + + categories = [{'name': s.get('name')} for s in complete_item.get('service', [])] + for category in categories: + SubElement(entry, 'category', attrib={'term': category.get('name')}) + + SubElement(entry, 'summary').text = etree.CDATA(complete_item.get('description_text', '')) + update_embed_urls(complete_item, token) + SubElement(entry, 'content', attrib={'type': 'html'}).text = etree.CDATA( + complete_item.get('body_html', '')) + if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): + image = ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions').get( + "16-9") + if image: + metadata = ((complete_item.get('associations') or {}).get('featuremedia') or {}) + + url = url_for('assets.get_item', _external=True, asset_id=image.get('media'), + token=token) if token else url_for( + 'assets.get_item', _external=True, asset_id=image.get('media')) + + media = SubElement(entry, etree.QName(_message_nsmap.get('media'), 'content'), + attrib={'url': url, 'type': image.get('mimetype'), 'medium': 'image'}) + + SubElement(media, etree.QName(_message_nsmap.get('media'), 'credit')).text = metadata.get( + 'byline') + SubElement(media, etree.QName(_message_nsmap.get('media'), 'title')).text = metadata.get( + 'description_text') + SubElement(media, etree.QName(_message_nsmap.get('media'), 'text')).text = metadata.get( + 'body_text') + if image.get('poi'): + focr = SubElement(media, etree.QName(_message_nsmap.get('mi'), 'focalRegion')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x1')).text = str( + image.get('poi').get('x')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x2')).text = str( + image.get('poi').get('x')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y1')).text = str( + image.get('poi').get('y')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y2')).text = str( + image.get('poi').get('y')) + except Exception as ex: + __class__.handle_exception(item, ex) + continue + return Response(XML_ROOT + etree.tostring(feed, method='xml', pretty_print=True).decode('utf-8'), + mimetype='application/atom+xml') + + @staticmethod + def generate_rss_feed(response, token=None): + XML_ROOT = '' + + _message_nsmap = {'dcterms': 'http://purl.org/dc/terms/', 'media': 'http://search.yahoo.com/mrss/', + 'dc': 'http://purl.org/dc/elements/1.1/', + 'mi': 'http://schemas.ingestion.microsoft.com/common/', + 'content': 'http://purl.org/rss/1.0/modules/content/'} + + feed = etree.Element('rss', attrib={'version': '2.0'}, nsmap=_message_nsmap) + channel = SubElement(feed, 'channel') + SubElement(channel, 'title').text = '{} RSS Feed'.format(app.config['SITE_NAME']) + SubElement(channel, 'description').text = '{} RSS Feed'.format(app.config['SITE_NAME']) + feed_url = url_for('syndicate.get_syndicate_feed', + syndicate_type='syndicate', + _external=True, + formatter='rss') + SubElement(channel, 'link').text = feed_url + item_resource = get_resource_service('items') + image = None + for item in response['_items']: + try: + complete_item = item_resource.find_one(req=None, _id=item.get('_id')) + + if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): + if not check_featuremedia_association_permission(complete_item): + continue + remove_unpermissioned_embeds(complete_item, g.user, 'news_api') + + entry = SubElement(channel, 'item') + if complete_item.get('ancestors') and len(complete_item.get('ancestors')): + SubElement(entry, 'guid').text = complete_item.get('ancestors')[0] + else: + SubElement(entry, 'guid').text = complete_item.get('_id') + + SubElement(entry, 'title').text = etree.CDATA(complete_item.get('headline')) + SubElement(entry, 'pubDate').text = __class__._format_date_publish(complete_item.get('firstpublished')) + SubElement(entry, + etree.QName(_message_nsmap.get('dcterms'), 'modified')).text = __class__._format_update_date( + complete_item.get('versioncreated')) + if token: + SubElement(entry, 'link').text = url_for('news/item.get_item', + item_id=item.get('_id'), + format='TextFormatter', + token=token, + _external=True) + else: + SubElement(entry, 'link').text = url_for('news/item.get_item', + item_id=item.get('_id'), + format='TextFormatter', + _external=True) + + if complete_item.get('byline'): + name = complete_item.get('byline') + if complete_item.get('source') and not app.config[ + 'COPYRIGHT_HOLDER'].lower() == complete_item.get( + 'source', '').lower(): + name = name + " - " + complete_item.get('source') + SubElement(entry, etree.QName(_message_nsmap.get('dc'), 'creator')).text = name + else: + SubElement(entry, etree.QName(_message_nsmap.get('dc'), 'creator')).text = \ + complete_item.get('source') if complete_item.get('source') else app.config[ + 'COPYRIGHT_HOLDER'] + + SubElement(entry, 'source', + attrib={'url': feed_url}).text = \ + complete_item.get('source', '') + + if complete_item.get('pubstatus') == 'usable': + SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ + 'start={}; end={}; scheme=W3C-DTF'.format(__class__._format_date_publish( + complete_item.get('firstpublished')), + __class__._format_date( + utcnow() + timedelta(days=30))) + else: + # in effect a kill set the end date into the past + SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ + 'start={}; end={}; scheme=W3C-DTF'.format(__class__._format_date(utcnow()), + __class__._format_date( + utcnow() - timedelta(days=30))) + + categories = [{'name': s.get('name')} for s in complete_item.get('service', [])] \ + + [{'name': s.get('name')} for s in complete_item.get('subject', [])] \ + + [{'name': s.get('name')} for s in complete_item.get('place', [])] \ + + [{'name': k} for k in complete_item.get('keywords', [])] + for category in categories: + SubElement(entry, 'category').text = category.get('name') + + SubElement(entry, 'description').text = etree.CDATA(complete_item.get('description_text', '')) + + update_embed_urls(complete_item, token) + + SubElement(entry, etree.QName(_message_nsmap.get('content'), 'encoded')).text = etree.CDATA( + complete_item.get('body_html', '')) + + if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): + image = ((complete_item.get('associations') or {}).get('featuremedia') or {}).get( + 'renditions').get( + "16-9") + if image: + metadata = ((complete_item.get('associations') or {}).get('featuremedia') or {}) + + url = url_for('assets.get_item', _external=True, asset_id=image.get('media'), + token=token) if token else url_for( + 'assets.get_item', _external=True, asset_id=image.get('media')) + + media = SubElement(entry, etree.QName(_message_nsmap.get('media'), 'content'), + attrib={'url': url, 'type': image.get('mimetype'), 'medium': 'image'}) + + SubElement(media, etree.QName(_message_nsmap.get('media'), 'credit')).text = metadata.get( + 'byline') + SubElement(media, etree.QName(_message_nsmap.get('media'), 'title')).text = metadata.get( + 'description_text') + SubElement(media, etree.QName(_message_nsmap.get('media'), 'text')).text = metadata.get( + 'body_text') + if image.get('poi'): + focr = SubElement(media, etree.QName(_message_nsmap.get('mi'), 'focalRegion')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x1')).text = str( + image.get('poi').get('x')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x2')).text = str( + image.get('poi').get('x')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y1')).text = str( + image.get('poi').get('y')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y2')).text = str( + image.get('poi').get('y')) + except Exception as ex: + __class__.handle_exception(item, ex) + continue + return Response(XML_ROOT + etree.tostring(feed, method='xml', pretty_print=True).decode('utf-8'), + mimetype='application/rss+xml') + + @staticmethod + def handle_exception(item, ex): + item_id = item.get('_id') + log_message = f"Processing {item_id} - {str(ex)}" + logging.exception(log_message) diff --git a/newsroom/news_api/news/syndicate/syndicate_handlers.py b/newsroom/news_api/news/syndicate/syndicate_handlers.py new file mode 100644 index 000000000..934449abe --- /dev/null +++ b/newsroom/news_api/news/syndicate/syndicate_handlers.py @@ -0,0 +1,41 @@ +from collections import defaultdict +from .service import NewsAPISyndicateService +from flask import make_response, jsonify +from .error_handlers import process_error_response + + +def convert_to_syndicate(data, formatter): + # remove token from requirments + if formatter and formatter == 'ATOM': + return NewsAPISyndicateService.generate_atom_feed(data) + elif formatter and formatter == 'RSS': + return NewsAPISyndicateService.generate_rss_feed(data) + elif formatter and formatter == 'JSON': + return jsonify(data) + else: + raise ValueError("Invalid formatter specified") + + +FORMAT_HANDLERS = defaultdict( + lambda: {'handler': handle_unsupported_format, 'content_type': 'application/json'}, + { + 'ATOM': {'handler': convert_to_syndicate, 'content_type': 'application/xml'}, + 'RSS': {'handler': convert_to_syndicate, 'content_type': 'application/xml'}, + 'JSON': {'handler': convert_to_syndicate, 'content_type': 'application/json'}, + } +) +FEED_GENERATORS = defaultdict( + lambda: handle_unsupported_format, + { + 'atom': NewsAPISyndicateService.generate_atom_feed, + 'rss': NewsAPISyndicateService.generate_rss_feed, + } +) + + +def handle_unsupported_format(data, formatter=None): + if formatter and formatter != 'JSON': + error_message = f"Unsupported formatter: {formatter if formatter is not None else 'empty value'} " + error_response = make_response(jsonify({'error': error_message}), 400) + return process_error_response(error_response) + return jsonify(data) diff --git a/newsroom/news_api/settings.py b/newsroom/news_api/settings.py index 04b57c5e3..e327c0e9b 100644 --- a/newsroom/news_api/settings.py +++ b/newsroom/news_api/settings.py @@ -12,6 +12,7 @@ 'content_api.items_versions', 'newsroom.news_api.section_filters', 'newsroom.news_api.products', + 'newsroom.news_api.news.syndicate', 'newsroom.news_api.formatters', 'newsroom.news_api.news', 'newsroom.news_api.news.item.item', @@ -21,7 +22,6 @@ 'newsroom.news_api.api_audit', 'newsroom.news_api.news.assets.assets', 'newsroom.upload', - 'newsroom.news_api.news.atom.atom', 'newsroom.history' ] diff --git a/newsroom/news_api/utils.py b/newsroom/news_api/utils.py index 38afbd7e9..a5ae8c57d 100644 --- a/newsroom/news_api/utils.py +++ b/newsroom/news_api/utils.py @@ -1,8 +1,8 @@ from superdesk import get_resource_service from superdesk.utc import utcnow -from flask import request, g, current_app as app +from flask import request, g, current_app as app, url_for from newsroom.products.products import get_products_by_company -from newsroom.settings import get_setting +from newsroom.utils import update_embeds_in_body def post_api_audit(doc): @@ -37,27 +37,7 @@ def format_report_results(search_result, unique_endpoints, companies): return results -def remove_internal_renditions(item): - clean_renditions = dict() - - # associations featuremedia will contain the internal newsroom renditions, we need to remove these. - if ((item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): - for key, rendition in\ - item['associations']['featuremedia']['renditions'].items(): - if key in get_setting('news_api_allowed_renditions').split(','): - rendition.pop('media', None) - clean_renditions[key] = rendition - - item['associations']['featuremedia']['renditions'] = clean_renditions - for key, meta in item.get('associations', {}).items(): - if isinstance(meta, dict): - meta.pop('products', None) - meta.pop('subscribers', None) - - return item - - -def check_association_permission(item): +def check_featuremedia_association_permission(item): """ Check if any of the products that the passed image item matches are permissioned superdesk products for the company @@ -80,3 +60,48 @@ def check_association_permission(item): return True if len(set(im_products) & set(sd_products)) else False else: return True + + +def set_association_links(item): + """ + Updates the links in the associations to the endpoint that logs the download + :param item: + :return: + """ + if not app.config.get("EMBED_PRODUCT_FILTERING"): + return + + for key, ass in item.get("associations", {}).items(): + if isinstance(ass, dict) and not key == "featuremedia": + for rendition in ass.get("renditions"): + if ass.get('renditions', {}).get(rendition, {}).get("href"): + ass.get('renditions', {}).get(rendition, {})["href"] = ass.get('renditions', {}).get(rendition, + {}).get( + "href") + '?item_id=' + item.get("_id") + + +def update_embed_urls(item, token): + """ + Update the urls in the embeds to the endpoint that allows logging + :param item: + :param token: + :return: + """ + def update_embed(item, elem, group): + embed_id = "editor_" + group + if elem.tag in ["audio", "video"]: + rendition = "original" + elif elem.tag == "img": + rendition = "16-9" + src = item.get("associations", {}).get(embed_id, {}).get("renditions", {}).get( + rendition) + if src is not None and elem is not None: + params = {"item_id": item.get("_id")} + if token: + params["token"] = token + elem.attrib["src"] = url_for('assets.get_item', asset_id=src.get('media'), + _external=True, **params) + return True + + if app.config.get("EMBED_PRODUCT_FILTERING"): + update_embeds_in_body(item, update_embed, update_embed, update_embed) diff --git a/newsroom/notifications/notifications.py b/newsroom/notifications/notifications.py index 143289319..f2917096e 100644 --- a/newsroom/notifications/notifications.py +++ b/newsroom/notifications/notifications.py @@ -4,6 +4,7 @@ import superdesk import pymongo.errors import werkzeug.exceptions +from html import escape from bson import ObjectId from superdesk.utc import utcnow @@ -71,6 +72,8 @@ def get_initial_notifications(): items = [] try: items.extend(superdesk.get_resource_service('wire_search').get_items(item_ids)) + for item in items: + item["body_html"] = escape(item["body_html"]) except KeyError: # wire disabled pass try: diff --git a/newsroom/reports/reports.py b/newsroom/reports/reports.py index 84667a487..ea7da742b 100644 --- a/newsroom/reports/reports.py +++ b/newsroom/reports/reports.py @@ -221,6 +221,17 @@ def get_section_name(s): 'item_href': '/{}?item={}'.format(doc['section'] if doc['section'] != 'news_api' else 'wire', doc['item']) } + try: + if 'download' in doc.get('action') and not doc.get('extra_data') is None: + ass = wire_items[doc['item']['_id']].get('associations', {}).get(doc.get('extra_data'), {}) + doc['association'] = { + 'text': ass.get('headline', 'N/A'), + 'href': '/assets/{}'.format(ass.get('renditions', {}).get('original', {}).get('media')), + 'type': ass.get('type') + } + except Exception: + pass + elif doc.get('item') in agenda_items: doc['item'] = { 'item_text': (agenda_items[doc['item']].get('name') or agenda_items[doc['item']].get('slugline')), diff --git a/newsroom/static/logo/360info.png b/newsroom/static/logo/360info.png new file mode 100644 index 000000000..59439861f Binary files /dev/null and b/newsroom/static/logo/360info.png differ diff --git a/newsroom/templates/base_layout.html b/newsroom/templates/base_layout.html index 999dadbd1..f559b8fce 100644 --- a/newsroom/templates/base_layout.html +++ b/newsroom/templates/base_layout.html @@ -123,17 +123,26 @@ function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); - gtag('config', '{{ get_setting('google_analytics') | safe }}', { - custom_map: { - dimension1: 'company', - dimension2: 'user', - } + gtag('set', 'user_properties', { + company: (window.profileData || {}).companyName || 'none', + user: ((window.profileData || {}).user || {}).first_name || 'unknown', }); {% if session.get('user') %} gtag('set', {'user_id': '{{ hash(session['user']) | safe }}' }); {% endif %} + if ('{{ get_setting('google_analytics') }}'.startsWith('UA')) { + gtag('config', '{{ get_setting('google_analytics') | safe }}', { + custom_map: { + dimension1: 'company', + dimension2: 'user', + } + }); + } else { + gtag('config', '{{ get_setting('google_analytics') | safe }}'); + } + {% with messages = get_flashed_messages(category_filter=['analytics']) %} {% if messages %} analytics.sendEvents({{ messages | tojson | safe }}); @@ -158,6 +167,11 @@ {% endif %} +{% if config.PLYR %} + + +{% endif %} + diff --git a/newsroom/templates/download_embed.html b/newsroom/templates/download_embed.html new file mode 100644 index 000000000..e872156ad --- /dev/null +++ b/newsroom/templates/download_embed.html @@ -0,0 +1,52 @@ + + + + + + + {% block title %}{% endblock %} + + + + + {{ javascript_tag('common') | safe }} + {{ javascript_tag('newsroom_css') | safe }} + + + + +
+ +
+ +
+

+ All contents © Copyright {{ get_date().year }} {{ config.COPYRIGHT_HOLDER }}. All rights reserved. + +

+
+ + + \ No newline at end of file diff --git a/newsroom/templates/download_item.html b/newsroom/templates/download_item.html new file mode 100644 index 000000000..c4f79d812 --- /dev/null +++ b/newsroom/templates/download_item.html @@ -0,0 +1,17 @@ + + + + + {% block title %}{{ item.headline }}{% endblock %} + + +{% block content %} +

{{ item.slugline }}

+ +

{{ item.headline }}

+

{{ _('By:') }} {{ item.byline }} {{ _('On:') }} {{ item.versioncreated|datetime_long }}

+ + {{ item.description_html | safe }} + {{ item.body_html | safe }} +{% endblock %} + \ No newline at end of file diff --git a/newsroom/topics/topics.py b/newsroom/topics/topics.py index 635a1d368..b57ed3c7e 100644 --- a/newsroom/topics/topics.py +++ b/newsroom/topics/topics.py @@ -1,6 +1,7 @@ import newsroom import superdesk +from newsroom.user_roles import UserRole class TopicsResource(newsroom.Resource): @@ -22,6 +23,8 @@ class TopicsResource(newsroom.Resource): 'schema': {'type': 'string'}, } } + allowed_roles = [role for role in UserRole] + allowed_item_roles = allowed_roles class TopicsService(newsroom.Service): diff --git a/newsroom/upload.py b/newsroom/upload.py index 2fb25f41a..3d179b6b7 100644 --- a/newsroom/upload.py +++ b/newsroom/upload.py @@ -7,6 +7,7 @@ from werkzeug.utils import secure_filename from flask import request, url_for, current_app as newsroom_app from superdesk.upload import upload_url as _upload_url +from superdesk import get_resource_service from newsroom.decorator import login_required @@ -52,6 +53,10 @@ def get_upload(media_id): else: response.headers['Content-Disposition'] = 'inline' + item_id = request.args.get('item_id') + if item_id: + get_resource_service('history').log_media_download(item_id, media_id) + return response diff --git a/newsroom/user_roles.py b/newsroom/user_roles.py new file mode 100644 index 000000000..ffc1f8dc3 --- /dev/null +++ b/newsroom/user_roles.py @@ -0,0 +1,9 @@ +import enum + + +class UserRole(enum.Enum): + ADMINISTRATOR = "administrator" + INTERNAL = "internal" + PUBLIC = "public" + COMPANY_ADMIN = "company_admin" + ACCOUNT_MANAGEMENT = "account_management" diff --git a/newsroom/users/__init__.py b/newsroom/users/__init__.py index 12be114d4..3f39a7fd7 100644 --- a/newsroom/users/__init__.py +++ b/newsroom/users/__init__.py @@ -1,7 +1,7 @@ from flask import Blueprint from flask_babel import lazy_gettext import superdesk -from .users import UsersResource, UsersService +from .users import AuthUserResource, UsersResource, UsersService, AuthUserService blueprint = Blueprint('users', __name__) @@ -10,6 +10,7 @@ def init_app(app): superdesk.register_resource('users', UsersResource, UsersService, _app=app) + superdesk.register_resource("auth_user", AuthUserResource, AuthUserService, _app=app) app.add_template_global(views.get_view_data, 'get_user_profile_data') app.settings_app('users', lazy_gettext('User Management'), weight=200, data=views.get_settings_data, allow_account_mgr=True) diff --git a/newsroom/users/users.py b/newsroom/users/users.py index 301427d13..581124f27 100644 --- a/newsroom/users/users.py +++ b/newsroom/users/users.py @@ -2,10 +2,42 @@ from flask import current_app as app, session import newsroom +import superdesk +from flask import request from content_api import MONGO_PREFIX from superdesk.utils import is_hashed, get_hash -from newsroom.auth import get_user_id +from newsroom.auth import get_user, get_user_id, SessionAuth from newsroom.utils import set_original_creator, set_version_creator +from newsroom.user_roles import UserRole + + +class UserAuthentication(SessionAuth): + def authorized(self, allowed_roles, resource, method): + if super().authorized(allowed_roles, resource, method): + return True + + if not get_user_id(): + return False + + if not request.view_args or not request.view_args.get("_id"): + # not a request for a specific user, stop + return False + + if request.view_args["_id"] == str(get_user_id()): + # current user editing current user + return True + + current_user = get_user() + if not current_user.get("company") or current_user.get("user_type") != UserRole.COMPANY_ADMIN.value: + # current user not a company admin + return False + + request_user = superdesk.get_resource_service("users").find_one(req=None, _id=request.view_args["_id"]) + if request_user.get("company") and request_user["company"] == current_user["company"]: + # if current user is a company admin for request user + return True + + return False class UsersResource(newsroom.Resource): @@ -13,6 +45,8 @@ class UsersResource(newsroom.Resource): Users schema """ + authentication = UserAuthentication() + schema = { 'password': { 'type': 'string', @@ -96,7 +130,7 @@ class UsersResource(newsroom.Resource): mongo_prefix = MONGO_PREFIX datasource = { 'source': 'users', - 'projection': {'password': 0}, + 'projection': {'password': 0, 'token': 0}, 'default_sort': [('last_name', 1)] } mongo_indexes = { @@ -144,3 +178,22 @@ def password_match(self, password, hashed_password): def on_deleted(self, doc): app.cache.delete(str(doc.get('_id'))) + + +class AuthUserResource(newsroom.Resource): + internal_resource = True + + schema = { + "email": UsersResource.schema["email"], + "password": UsersResource.schema["password"], + "token": UsersResource.schema["token"], + "token_expiry_date": UsersResource.schema["token_expiry_date"], + } + + datasource = { + "source": "users", + } + + +class AuthUserService(newsroom.Service): + pass diff --git a/newsroom/utils.py b/newsroom/utils.py index c2b1027e4..a8e430a93 100644 --- a/newsroom/utils.py +++ b/newsroom/utils.py @@ -3,7 +3,11 @@ from dateutil.relativedelta import relativedelta from uuid import uuid4 import pytz +import re +from lxml import html as lxml_html +from lxml.html import clean +from superdesk.etree import to_string from superdesk.utc import utcnow from superdesk.json_utils import try_cast from bson import ObjectId @@ -422,3 +426,74 @@ def get_end_date(date_range, start_date): if date_range == 'now/M': return start_date + relativedelta(months=+1) - timedelta(days=1) return start_date + + +def update_embeds_in_body(item, update_image=None, update_audio=None, update_video=None): + """ + Scans the story body for editor3 embeds and calls the appropriate passed function for each embed type. + The functions should expect the item, element and the number associated with the association + :param item: + :param update_image: + :param update_audio: + :param update_video: + :return: + """ + regex = r" EMBED START (?:Image|Video|Audio) {id: \"editor_([0-9]+)" + body_updated = False + root_elem = lxml_html.fromstring(item.get('body_html', '

') or '

') + comments = root_elem.xpath('//comment()') + for comment in comments: + m = re.search(regex, comment.text) + if m and m.group(1): + # Assumes the sibling of the Embed Image comment is the figure tag containing the image + figure_elem = comment.getnext() + if figure_elem is not None and figure_elem.tag == "figure": + elem = figure_elem.find("./img") + if elem is not None and update_image: + body_updated = update_image(item, elem, m.group(1)) or body_updated + continue + elem = figure_elem.find("./audio") + if elem is not None and update_audio: + body_updated = update_audio(item, elem, m.group(1)) or body_updated + continue + elem = figure_elem.find("./video") + if elem is not None and update_video: + body_updated = update_video(item, elem, m.group(1)) or body_updated + if body_updated: + item['body_html'] = to_string(root_elem, method="html") + + +def remove_all_embeds(item, remove_by_class=True, remove_media_embeds=True): + """ + Remove the all embeds from the body of the article, including any divs with the embed_block attribute + :param item: + :param remove_by_class: If true removes any divs that have the embed-block class, should remove such things as + embedded tweets + :param remove_media_embeds: Remove any figure tags if the passed value is true + :return: + """ + if not item.get("body_html", ""): + return + + root_elem = lxml_html.fromstring(item.get("body_html", "")) + + if remove_by_class: + # all embedded tweets etc should be in a div with the class embeded-block, these are removed + embeds = root_elem.xpath('//div[@class=\'embed-block\']') + for embed in embeds: + embed.getparent().remove(embed) + + if not remove_media_embeds: + item["body_html"] = to_string(root_elem, encoding="unicode", method='html') + return + + # clean all the embedded figures from the html, it will remove the comments as well + cleaner = clean.Cleaner(add_nofollow=False, kill_tags=["figure"]) + cleaned_xhtml = cleaner.clean_html(root_elem) + + # remove the associations relating to the embeds + kill_keys = [key for key in item.get("associations", {}) if key.startswith("editor_")] + for key in kill_keys: + item.get("associations", {}).pop(key, None) + + item["body_html"] = to_string(cleaned_xhtml, encoding="unicode", method='html') diff --git a/newsroom/wire/__init__.py b/newsroom/wire/__init__.py index f35a9a6da..13c418473 100644 --- a/newsroom/wire/__init__.py +++ b/newsroom/wire/__init__.py @@ -61,13 +61,18 @@ def init_app(app): app.sidenav(lazy_gettext('Saved/Watched Items'), 'wire.bookmarks', 'bookmark', group=1, blueprint='wire', badge='saved-items-count') - from .formatters import TextFormatter, NITFFormatter, NewsMLG2Formatter, JsonFormatter, PictureFormatter + from .formatters import TextFormatter, NITFFormatter, NewsMLG2Formatter, JsonFormatter, PictureFormatter, \ + NINJSDownloadFormatter, HTMLFormatter, HTMLMediaFormatter, HTMLPackageFormatter app.download_formatter('text', TextFormatter(), lazy_gettext('Plain Text'), ['wire', 'agenda'], ['text']) app.download_formatter('nitf', NITFFormatter(), 'NITF', ['wire'], ['text']) app.download_formatter('newsmlg2', NewsMLG2Formatter(), 'NewsMLG2', ['wire'], ['text']) app.download_formatter('json', JsonFormatter(), 'Json', ['agenda'], ['text']) if app.config.get('ALLOW_PICTURE_DOWNLOAD', True): app.download_formatter('picture', PictureFormatter(), lazy_gettext('Story Image'), ['wire'], ['picture']) + app.download_formatter('downloadninjs', NINJSDownloadFormatter(), 'Ninjs', ['wire']) + app.download_formatter('html', HTMLFormatter(), 'Plain HTML', ['wire']) + app.download_formatter('htmlmedia', HTMLMediaFormatter(), 'HTML with embedded media', ['wire']) + app.download_formatter('htmlpackage', HTMLPackageFormatter(), 'HTML package', ['wire']) app.add_template_global(utils.get_picture, 'get_picture') app.add_template_global(utils.get_caption, 'get_caption') diff --git a/newsroom/wire/block_media/company_factory.py b/newsroom/wire/block_media/company_factory.py new file mode 100644 index 000000000..cc8d522f1 --- /dev/null +++ b/newsroom/wire/block_media/company_factory.py @@ -0,0 +1,84 @@ +import time +from flask import session, g +from superdesk import get_resource_service + + +class CompanyFactory: + _company_cache = {} + _cache_expiration_time = 30 + + @staticmethod + def get_user_company(user): + current_time = time.time() + if not user.get('company'): + return [] + if user and user.get('company') in CompanyFactory._company_cache: + cached_data = CompanyFactory._company_cache[user['company']] + if current_time - cached_data['timestamp'] < CompanyFactory._cache_expiration_time: + return cached_data['company'] + + company = get_resource_service('companies').find_one(req=None, _id=user['company']) + if company: + CompanyFactory._company_cache[user['company']] = { + 'company': company, + 'timestamp': current_time + } + CompanyFactory._update_embedded_data_in_session(user, company) + return company + + company = get_resource_service('companies').find_one(req=None, _id=g.user) if hasattr(g, 'user') else None + if company: + CompanyFactory._company_cache[g.user] = { + 'company': company, + 'timestamp': current_time + } + CompanyFactory._update_embedded_data_in_session(g.user, company) + return company + + @staticmethod + def get_embedded_data(user): + company = CompanyFactory.get_user_company(user) + if not company: + return { + "embedded": { + "social_media_display": False, + "video_display": False, + "audio_display": False, + "images_display": False, + "all_display": True, + "social_media_download": False, + "video_download": False, + "audio_download": False, + "images_download": False, + "all_download": False, + "sdpermit_display": False, + "sdpermit_download": False + } + } + + embedded = session.get(f"embedded_data_{user['company']}", {}) + + if embedded != company.get("embedded", {}): + CompanyFactory._update_embedded_data_in_session(user, company) + embedded = company.get("embedded", {}) + + return embedded + + @staticmethod + def _update_embedded_data_in_session(user, company): + session[f"embedded_data_{user['company']}"] = company.get("embedded", { + "social_media_display": False, + "video_display": False, + "audio_display": False, + "images_display": False, + "all_display": True, + "social_media_download": False, + "video_download": False, + "audio_download": False, + "images_download": False, + "all_download": False, + "sdpermit_display": False, + "sdpermit_download": False + }) + session.permanent = False + session.modified = True diff --git a/newsroom/wire/block_media/download_items.py b/newsroom/wire/block_media/download_items.py new file mode 100644 index 000000000..c360205a8 --- /dev/null +++ b/newsroom/wire/block_media/download_items.py @@ -0,0 +1,95 @@ +from newsroom.auth import get_user +from newsroom.wire.block_media.company_factory import CompanyFactory +from newsroom.wire.block_media.filter_media import get_allowed_tags +from newsroom.wire.block_media.permission_media import PermissionMedia + +from lxml import html as lxml_html +import re +import logging +logger = logging.getLogger(__name__) + + +def filter_items_download(func): + def wrapper(_ids, item_type, filter_func=None): + items = func(_ids, item_type) + if filter_func and items: + items = filter_func(items) + return items + return wrapper + + +def block_items_by_embedded_data(items): + def remove_editors_media(item, allowed_tags): + associations = item.get("associations") + if associations: + editors_to_remove = [] + allowed_tags = ['picture' if tag == 'img' else tag for tag in allowed_tags] + for key, value in associations.items(): + if key.startswith("editor_") and ((value and value.get("type") not in allowed_tags)): + editors_to_remove.append(key) + + for editor in editors_to_remove: + associations.pop(editor, None) + + disable_downloads = PermissionMedia.permission_editor_in_item(item) + + if disable_downloads: + for disable_download in disable_downloads: + if disable_download in associations: + associations.pop(disable_download) + item["associations"] = associations + return item + + download_social_tag = False + user = get_user(required=True) + embedded_data = CompanyFactory.get_embedded_data(user) + embedded_tags = get_allowed_tags(embedded_data) + allowed_tags = embedded_tags['download_tags'] + if 'all' in allowed_tags or (not any(allowed_tags)): + allowed_tags = ['video', 'audio', 'img', 'social_media'] + download_social_tag = True + if 'social_media' in allowed_tags: + download_social_tag = True + filtered_items = [] + for item in items: + html_updated = False + root_elem = lxml_html.fromstring(item.get('body_html', '')) + + if allowed_tags: + tag_map = {'video': 'Video', 'audio': 'Audio', 'img': 'Image', 'social_media': 'social_media'} + excluded_tags = set(tag_map.keys()) - set(allowed_tags) + regex_parts = [tag_map[tag] for tag in excluded_tags] + regex = rf" EMBED START (?:{'|'.join(regex_parts)}) {{id: \"editor_([0-9]+)" + comments = root_elem.xpath('//comment()') + for comment in comments: + m = re.search(regex, comment.text) + if m and m.group(1): + figure = comment.getnext() + for elem in figure.iterchildren(): + if elem.tag in excluded_tags: + if 'data-disable-download' not in elem.attrib or elem.attrib['data-disable-download'] != 'true': + elem.attrib['data-disable-download'] = 'true' + html_updated = True + break + + if not download_social_tag: + social_media_embeds = root_elem.xpath('//div[@class="embed-block"]') + for social_media_embed in social_media_embeds: + if 'disabled-embed' not in social_media_embed.attrib.get('class', ''): + social_media_embed.attrib['class'] = social_media_embed.attrib.get('class', '') + ' disabled-embed' + blockquote_elements = social_media_embed.xpath('.//blockquote') + for blockquote in blockquote_elements: + if 'data-disable-download' not in blockquote.attrib: + blockquote.attrib['data-disable-download'] = 'true' + html_updated = True + break + + if html_updated: + for elem in root_elem.xpath('//*[@data-disable-download="true"]'): + elem.getparent().remove(elem) + item["body_html"] = lxml_html.tostring(root_elem, encoding='unicode', method="html") + + item_remove = remove_editors_media(item, allowed_tags) + filtered_items.append(item_remove) + + return filtered_items diff --git a/newsroom/wire/block_media/filter_htmlpackage.py b/newsroom/wire/block_media/filter_htmlpackage.py new file mode 100644 index 000000000..6fe5761c7 --- /dev/null +++ b/newsroom/wire/block_media/filter_htmlpackage.py @@ -0,0 +1,14 @@ +from functools import wraps +import flask +from newsroom.auth import get_user +from newsroom.wire.block_media.company_factory import CompanyFactory + + +def filter_embedded_data(func): + @wraps(func) + def wrapper(self, item, item_type='items'): + embedded_data = CompanyFactory.get_embedded_data(get_user(required=True)) + if any(embedded_data): + return str.encode(flask.render_template('download_embed.html', item=item), 'utf-8') + return func(self, item, item_type) + return wrapper diff --git a/newsroom/wire/block_media/filter_media.py b/newsroom/wire/block_media/filter_media.py new file mode 100644 index 000000000..d7c9313c0 --- /dev/null +++ b/newsroom/wire/block_media/filter_media.py @@ -0,0 +1,168 @@ +from functools import wraps +from flask import current_app as app +from newsroom.auth import get_user +from newsroom.wire.block_media.company_factory import CompanyFactory +from lxml import html as lxml_html +import re +import logging +from superdesk.etree import to_string +logger = logging.getLogger(__name__) + + +def filter_media(func): + @wraps(func) + def wrapper(*args, **kwargs): + if not app.config.get("EMBED_PRODUCT_FILTERING"): + return func(*args, **kwargs) + + item_arg = get_item_argument(args, kwargs) + if item_arg is None: + return func(*args, **kwargs) + + embedded_data = get_embedded_data() + if not any(embedded_data.values()): + return func(*args, **kwargs) + + item_arg = process_item_embeds(item_arg, embedded_data) + + return func(*args, **kwargs) + + return wrapper + + +def get_item_argument(args, kwargs): + if len(args) > 1 and isinstance(args[1], dict) and 'body_html' in args[1]: + return args[1] + + for arg in args: + if isinstance(arg, dict) and 'body_html' in arg: + return arg + + return kwargs.get('item') + + +def get_embedded_data(): + try: + user = get_user(required=True) + return CompanyFactory.get_embedded_data(user) + except Exception as e: + logger.error(f"Error in from embedded data: {str(e)}") + return {} + + +def process_item_embeds(item_arg, embedded_data): + html_updated = False + html_string = item_arg.get('body_html', '') + root_elem = lxml_html.fromstring(html_string) + + allowed_tags = get_allowed_tags(embedded_data) + + if allowed_tags: + html_updated = process_allowed_tags(root_elem, allowed_tags) + + if html_updated: + item_arg["body_html"] = to_string(root_elem, method="html") + + es_highlight = item_arg.get('es_highlight', {}) + es_highlight_body_html = es_highlight.get('body_html', []) + + if len(es_highlight_body_html) > 0: + es_highlight_html_string = es_highlight_body_html[0] + es_highlight_root_elem = lxml_html.fromstring(es_highlight_html_string) + + es_highlight_allowed_tags = allowed_tags + + if es_highlight_allowed_tags: + es_highlight_html_updated = process_allowed_tags(es_highlight_root_elem, es_highlight_allowed_tags) + + if es_highlight_html_updated: + item_arg['es_highlight']['body_html'][0] = to_string(es_highlight_root_elem, method="html") + + return item_arg + + +def get_allowed_tags(embedded_data): + tag_mapping = { + 'video': ('video_display', 'video_download'), + 'audio': ('audio_display', 'audio_download'), + 'img': ('images_display', 'images_download'), + 'all': ('all_display', 'all_download'), + 'social_media': ('social_media_display', 'social_media_download'), + 'sd': ('sdpermit_display', 'sdpermit_download'), + } + + allowed_tags = { + 'display_tags': [tag for tag, (display_key, _) in tag_mapping.items() if embedded_data.get(display_key, False)], + 'download_tags': [tag for tag, (_, download_key) in tag_mapping.items() if embedded_data.get(download_key, False)], + } + + return allowed_tags + + +def process_allowed_tags(root_elem, allowed_tags): + html_updated = False + + display_social_tag = False + download_social_tag = False + + display_tags = allowed_tags['display_tags'] + + if 'all' in display_tags or (not any(display_tags)): + display_tags = ['video', 'audio', 'img', 'social_media'] + display_social_tag = True + if 'social_media' in display_tags: + display_social_tag = True + + download_tags = allowed_tags['download_tags'] + if 'all' in download_tags or (not any(download_tags)): + download_tags = ['video', 'audio', 'img', 'social_media'] + download_social_tag = True + if 'social_media' in download_tags: + download_social_tag = True + + tag_map = {'video': 'Video', 'audio': 'Audio', 'img': 'Image'} + display_regex_parts = ['|'.join(tag_map[tag] for tag in tag_map if tag not in display_tags)] + + display_regex = rf" EMBED START (?:{'|'.join(display_regex_parts)}) {{id: \"editor_([0-9]+)" + download_regex_parts = ['|'.join(tag_map[tag] for tag in tag_map if tag not in download_tags)] + download_regex = rf" EMBED START (?:{'|'.join(download_regex_parts)}) {{id: \"editor_([0-9]+)" + + comments = root_elem.xpath('//comment()') + for comment in comments: + display_match = re.search(display_regex, comment.text) + download_match = re.search(download_regex, comment.text) + + if display_match and display_match.group(1): + figure = comment.getnext() + for elem in figure.iterchildren(): + if elem.tag not in display_tags: + figure.attrib['class'] = 'disabled-embed' + html_updated = True + break + + figure = comment.getnext() + if figure is None: + continue + if download_match and download_match.group(1): + for elem in figure.iterchildren(): + if elem.tag not in download_tags: + elem.attrib['data-disable-download'] = 'true' + html_updated = True + break + + if not display_social_tag: + social_media_embeds = root_elem.xpath('//div[@class="embed-block"]') + for social_media_embed in social_media_embeds: + social_media_embed.attrib['class'] = 'embed-block disabled-embed' + html_updated = True + + if not download_social_tag: + social_media_embeds = root_elem.xpath('//div[@class="embed-block"]') + for social_media_embed in social_media_embeds: + blockquote_elements = social_media_embed.xpath('.//blockquote') + for blockquote in blockquote_elements: + blockquote.attrib['data-disable-download'] = 'true' + html_updated = True + break + + return html_updated diff --git a/newsroom/wire/block_media/permission_media.py b/newsroom/wire/block_media/permission_media.py new file mode 100644 index 000000000..996fe3fc3 --- /dev/null +++ b/newsroom/wire/block_media/permission_media.py @@ -0,0 +1,29 @@ +from newsroom.auth import get_user +from newsroom.companies import get_user_company +from newsroom.products.products import get_products_by_company +from flask import request + + +class PermissionMedia: + @staticmethod + def permission_editor_in_item(item): + user = get_user(required=True) + company = get_user_company(user) + + if company is None: + return [] + + permitted_products = [p.get('sd_product_id') for p in + get_products_by_company(company.get('_id'), None, request.args.get('type', 'wire')) + if p.get('sd_product_id')] + + disable_download = [] + for key, embed_item in item.get("associations", {}).items(): + if key.startswith("editor_") and embed_item and embed_item.get('type') in ['audio', 'video', 'picture']: + embed_products = [p.get('code') for p in + ((item.get('associations') or {}).get(key) or {}).get('products', [])] + + if not set(embed_products) & set(permitted_products): + disable_download.append(key) + + return disable_download diff --git a/newsroom/wire/formatters/__init__.py b/newsroom/wire/formatters/__init__.py index 35fdc261a..a663c885b 100644 --- a/newsroom/wire/formatters/__init__.py +++ b/newsroom/wire/formatters/__init__.py @@ -24,3 +24,7 @@ def __init__(cls, name, bases, attrs): from .ninjs import NINJSFormatter # noqa from .picture import PictureFormatter # noqa from .ninjs2 import NINJSFormatter2 # noqa +from .downloadninjs import NINJSDownloadFormatter # noqa +from .html import HTMLFormatter # noqa +from .htmlwithmedia import HTMLMediaFormatter # noqa +from .htmlpackage import HTMLPackageFormatter # noqa \ No newline at end of file diff --git a/newsroom/wire/formatters/downloadninjs.py b/newsroom/wire/formatters/downloadninjs.py new file mode 100644 index 000000000..6d3b54fd7 --- /dev/null +++ b/newsroom/wire/formatters/downloadninjs.py @@ -0,0 +1,81 @@ +from superdesk.logging import logger +from .ninjs import NINJSFormatter +from .utils import remove_internal_renditions, rewire_featuremedia, log_media_downloads, remove_unpermissioned_embeds +from newsroom.utils import update_embeds_in_body + + +class NINJSDownloadFormatter(NINJSFormatter): + """ + Overload the NINJSFormatter and add the associations as a field to copy + """ + + def __init__(self): + self.direct_copy_properties += ('associations',) + + def rewire_embeded_images(self, item): + + def _get_source_ref(marker, item): + widest = -1 + src_rendition = "" + for rendition in item.get("associations").get(marker).get("renditions"): + width = item.get("associations").get(marker).get("renditions").get(rendition).get("width") + if width > widest: + widest = width + src_rendition = rendition + + if widest > 0: + return item.get("associations").get(marker).get("renditions").get(src_rendition).get("href").lstrip('/') + + logger.warning( + "href not found for the original in NINJSDownload formatter") + return None + + def _get_source_set_refs(marker, item): + """ + For the given marker (association) return the set of available hrefs and the widths + :param marker: + :param item: + :return: + """ + srcset = [] + for rendition in item.get("associations").get(marker).get("renditions"): + srcset.append( + item.get("associations").get(marker).get("renditions").get(rendition).get("href").lstrip('/') + + " " + + str(item.get("associations").get(marker).get("renditions").get(rendition).get("width")) + + "w" + ) + return ",".join(srcset) + + def update_image(item, elem, group): + embed_id = "editor_" + group + elem.attrib["id"] = embed_id + src = _get_source_ref(embed_id, item) + if src: + elem.attrib["src"] = src + srcset = _get_source_set_refs(embed_id, item) + if srcset: + elem.attrib["srcset"] = srcset + elem.attrib["sizes"] = "80vw" + return True + + def update_video_or_audio(item, elem, group): + embed_id = "editor_" + group + elem.attrib["id"] = embed_id + # cleanup the element to ensure the html will validate + elem.attrib.pop("alt", None) + elem.attrib.pop("width", None) + elem.attrib.pop("height", None) + return True + + update_embeds_in_body(item, update_image, update_video_or_audio, update_video_or_audio) + + def _transform_to_ninjs(self, item): + remove_unpermissioned_embeds(item) + # Remove the renditions we should not be showing the world + remove_internal_renditions(item, remove_media=False) + # set the references embedded in the html body of the story + self.rewire_embeded_images(item) + rewire_featuremedia(item) + log_media_downloads(item) + return super()._transform_to_ninjs(item) diff --git a/newsroom/wire/formatters/html.py b/newsroom/wire/formatters/html.py new file mode 100644 index 000000000..07e0c9304 --- /dev/null +++ b/newsroom/wire/formatters/html.py @@ -0,0 +1,20 @@ +import flask +from .base import BaseFormatter +from newsroom.utils import remove_all_embeds + + +class HTMLFormatter(BaseFormatter): + """ + Formatter that allows the download of "plain" html with any embeds in the html body stripped + """ + + FILE_EXTENSION = 'html' + MIMETYPE = 'text/html' + + def format_item(self, item, item_type='items'): + remove_all_embeds(item) + + if item_type == 'items': + return str.encode(flask.render_template('download_item.html', item=item), 'utf-8') + else: + return str.encode(flask.render_template('download_agenda.txt', item=item), 'utf-8') diff --git a/newsroom/wire/formatters/htmlpackage.py b/newsroom/wire/formatters/htmlpackage.py new file mode 100644 index 000000000..5c264daa9 --- /dev/null +++ b/newsroom/wire/formatters/htmlpackage.py @@ -0,0 +1,77 @@ +import flask +from .base import BaseFormatter +from .utils import remove_internal_renditions, rewire_featuremedia, log_media_downloads, remove_unpermissioned_embeds +from newsroom.utils import update_embeds_in_body +from superdesk.logging import logger +from newsroom.wire.block_media.filter_htmlpackage import filter_embedded_data + + +class HTMLPackageFormatter(BaseFormatter): + + FILE_EXTENSION = 'html' + MIMETYPE = 'text/html' + + def rewire_embeded_images(self, item): + + def _get_source_ref(marker, item): + widest = -1 + src_rendition = "" + for rendition in item.get("associations").get(marker).get("renditions"): + width = item.get("associations").get(marker).get("renditions").get(rendition).get("width") + if width > widest: + widest = width + src_rendition = rendition + + if widest > 0: + return item.get("associations").get(marker).get("renditions").get(src_rendition).get("href").lstrip('/') + + logger.warning( + "href not found for the original in HTMLPackage formatter") + return None + + def _get_source_set_refs(marker, item): + """ + For the given marker (association) return the set of available hrefs and the widths + :param marker: + :param item: + :return: + """ + srcset = [] + for rendition in item.get("associations").get(marker).get("renditions"): + ref = item.get("associations").get(marker).get("renditions").get(rendition).get("href").lstrip('/') + srcset.append(ref + " " + str( + item.get("associations").get(marker).get("renditions").get(rendition).get("width")) + "w") + return ",".join(srcset) + + def update_image(item, elem, group): + embed_id = "editor_" + group + elem.attrib["id"] = embed_id + src = _get_source_ref(embed_id, item) + if src: + elem.attrib["src"] = src + srcset = _get_source_set_refs(embed_id, item) + if srcset: + elem.attrib["srcset"] = srcset + elem.attrib["sizes"] = "80vw" + return True + + def update_video_or_audio(item, elem, group): + embed_id = "editor_" + group + elem.attrib["id"] = embed_id + elem.attrib["src"] = item.get("associations").get(embed_id).get("renditions").get( + "original").get("href").lstrip('/') + elem.attrib.pop("alt", None) + elem.attrib.pop("width", None) + elem.attrib.pop("height", None) + return True + + update_embeds_in_body(item, update_image, update_video_or_audio, update_video_or_audio) + + @filter_embedded_data + def format_item(self, item, item_type='items'): + remove_unpermissioned_embeds(item) + remove_internal_renditions(item, remove_media=False) + self.rewire_embeded_images(item) + rewire_featuremedia(item) + log_media_downloads(item) + return str.encode(flask.render_template('download_embed.html', item=item), 'utf-8') diff --git a/newsroom/wire/formatters/htmlwithmedia.py b/newsroom/wire/formatters/htmlwithmedia.py new file mode 100644 index 000000000..2720a4a13 --- /dev/null +++ b/newsroom/wire/formatters/htmlwithmedia.py @@ -0,0 +1,83 @@ +import flask +from .base import BaseFormatter +from .utils import remove_internal_renditions, log_media_downloads, remove_unpermissioned_embeds +from newsroom.utils import update_embeds_in_body +from ...upload import ASSETS_RESOURCE +from newsroom.wire.block_media.filter_htmlpackage import filter_embedded_data + +import base64 + + +class HTMLMediaFormatter(BaseFormatter): + + FILE_EXTENSION = 'html' + MIMETYPE = 'text/html' + + def get_base64image(self, marker, item): + widest = -1 + src_rendition = "" + for rendition in item.get("associations").get(marker).get("renditions"): + width = item.get("associations").get(marker).get("renditions").get(rendition).get("width") + if width > widest: + widest = width + src_rendition = rendition + + src = item.get("associations").get(marker).get("renditions").get(src_rendition).get("media") + mimetype = item.get("associations").get(marker).get("renditions").get(src_rendition).get("mimetype") + file = flask.current_app.media.get(src, ASSETS_RESOURCE) + b64 = "data:{};base64,".format(mimetype) + base64.b64encode(file.read()).decode() + + return b64 + + def get_base64href(self, marker, item): + src = item.get("associations").get(marker).get("renditions").get("original").get("media") + mimetype = item.get("associations").get(marker).get("renditions").get("original").get("mimetype") + file = flask.current_app.media.get(src, ASSETS_RESOURCE) + b64 = "data:{};base64,".format(mimetype) + base64.b64encode(file.read()).decode() + return b64 + + def rewire_embedded_images(self, item): + + def update_image(item, elem, group): + embed_id = "editor_" + group + elem.attrib["id"] = embed_id + src = self.get_base64image(embed_id, item) + if src: + elem.attrib["src"] = src + return True + + def update_video_or_audio(item, elem, group): + embed_id = "editor_" + group + elem.attrib["id"] = embed_id + src = self.get_base64href(embed_id, item) + if src: + elem.attrib["src"] = src + elem.attrib.pop("alt", None) + elem.attrib.pop("width", None) + elem.attrib.pop("height", None) + return True + + update_embeds_in_body(item, update_image, update_video_or_audio, update_video_or_audio) + + def rewire_featuremedia(self, item): + """ + Set the references in the feature media to base64 encoded versions + :param item: + :return: + """ + renditions = item.get('associations', {}).get('featuremedia', {}).get('renditions', []) + for rendition in renditions: + src = item.get("associations").get('featuremedia').get("renditions").get(rendition).get("media") + mimetype = item.get("associations").get('featuremedia').get("renditions").get(rendition).get("mimetype") + file = flask.current_app.media.get(src, ASSETS_RESOURCE) + item['associations']['featuremedia']['renditions'][rendition]['href'] = "data:{};base64,".format( + mimetype) + base64.b64encode(file.read()).decode() + + @filter_embedded_data + def format_item(self, item, item_type='items'): + remove_unpermissioned_embeds(item) + remove_internal_renditions(item) + self.rewire_embedded_images(item) + self.rewire_featuremedia(item) + log_media_downloads(item) + return str.encode(flask.render_template('download_embed.html', item=item), 'utf-8') diff --git a/newsroom/wire/formatters/newsmlg2.py b/newsroom/wire/formatters/newsmlg2.py index 5a54102a8..80159b6ab 100644 --- a/newsroom/wire/formatters/newsmlg2.py +++ b/newsroom/wire/formatters/newsmlg2.py @@ -4,6 +4,7 @@ from superdesk.publish.formatters.nitf_formatter import NITFFormatter from superdesk.publish.formatters.newsml_g2_formatter import NewsMLG2Formatter as SuperdeskFormatter +from newsroom.utils import remove_all_embeds from .base import BaseFormatter @@ -34,6 +35,7 @@ class NewsMLG2Formatter(BaseFormatter): nitf_formatter = NITFFormatter() def format_item(self, item, item_type='items'): + remove_all_embeds(item) item = item.copy() item.setdefault('guid', item['_id']) item.setdefault('_current_version', item['version']) diff --git a/newsroom/wire/formatters/ninjs.py b/newsroom/wire/formatters/ninjs.py index 24514e33e..6d723e42b 100644 --- a/newsroom/wire/formatters/ninjs.py +++ b/newsroom/wire/formatters/ninjs.py @@ -1,6 +1,8 @@ +import flask import json from .base import BaseFormatter from superdesk.utils import json_serialize_datetime_objectId +from newsroom.utils import remove_all_embeds class NINJSFormatter(BaseFormatter): @@ -20,7 +22,21 @@ def format_item(self, item, item_type='items'): return json.dumps(ninjs, default=json_serialize_datetime_objectId) + @staticmethod + def test_for_true(value): + """ + Test if the value indicates false + :param value: + :return: + """ + return value.lower() == 'true' or value == '1' + def _transform_to_ninjs(self, item): + no_embeds = flask.request.args.get('no_embeds', default=False, type=self.test_for_true) + no_media = flask.request.args.get('no_media', default=False, type=self.test_for_true) + if no_media or no_embeds: + remove_all_embeds(item, remove_media_embeds=no_media, remove_by_class=no_embeds) + ninjs = { 'guid': item.get('_id'), 'version': str(item.get('version', 1)), diff --git a/newsroom/wire/formatters/ninjs2.py b/newsroom/wire/formatters/ninjs2.py index 9713c18b7..cdb1b0f31 100644 --- a/newsroom/wire/formatters/ninjs2.py +++ b/newsroom/wire/formatters/ninjs2.py @@ -1,5 +1,7 @@ from .ninjs import NINJSFormatter -from newsroom.news_api.utils import remove_internal_renditions, check_association_permission +from newsroom.news_api.utils import check_featuremedia_association_permission +from newsroom.wire.formatters.utils import remove_internal_renditions +from newsroom.utils import remove_all_embeds class NINJSFormatter2(NINJSFormatter): @@ -11,6 +13,20 @@ def __init__(self): self.direct_copy_properties += ('associations',) def _transform_to_ninjs(self, item): - if not check_association_permission(item): - item.pop('associations', None) - return remove_internal_renditions(super()._transform_to_ninjs(item)) + if not check_featuremedia_association_permission(item): + if item.get('associations', {}).get('featuremedia'): + item.get('associations').pop('featuremedia') + if not item.get('associations'): + item.pop('associations', None) + return remove_internal_renditions(super()._transform_to_ninjs(item), remove_media=True) + + +class NINJSFormatter3(NINJSFormatter2): + """ + Format with no Embeds + """ + + def _transform_to_ninjs(self, item): + remove_all_embeds(item) + ninjs = super()._transform_to_ninjs(item) + return ninjs diff --git a/newsroom/wire/formatters/nitf.py b/newsroom/wire/formatters/nitf.py index af44987ed..7dd87761a 100644 --- a/newsroom/wire/formatters/nitf.py +++ b/newsroom/wire/formatters/nitf.py @@ -1,6 +1,7 @@ from lxml import etree from superdesk.publish.formatters.nitf_formatter import NITFFormatter as SuperdeskNITFFormatter +from newsroom.utils import remove_all_embeds from .base import BaseFormatter @@ -14,6 +15,7 @@ class NITFFormatter(BaseFormatter): formatter = SuperdeskNITFFormatter() def format_item(self, item, item_type='items'): + remove_all_embeds(item) dest = {} nitf = self.formatter.get_nitf(item, dest, '') return etree.tostring(nitf, xml_declaration=True, pretty_print=True, encoding=self.encoding) diff --git a/newsroom/wire/formatters/text.py b/newsroom/wire/formatters/text.py index eea015991..d7a36d8f2 100644 --- a/newsroom/wire/formatters/text.py +++ b/newsroom/wire/formatters/text.py @@ -1,6 +1,7 @@ import flask from .base import BaseFormatter +from newsroom.utils import remove_all_embeds class TextFormatter(BaseFormatter): @@ -9,6 +10,7 @@ class TextFormatter(BaseFormatter): MIMETYPE = 'text/plain' def format_item(self, item, item_type='items'): + remove_all_embeds(item) if item_type == 'items': return str.encode(flask.render_template('download_item.txt', item=item), 'utf-8') else: diff --git a/newsroom/wire/formatters/utils.py b/newsroom/wire/formatters/utils.py new file mode 100644 index 000000000..596e60df6 --- /dev/null +++ b/newsroom/wire/formatters/utils.py @@ -0,0 +1,183 @@ +import flask +from flask import current_app as app +from lxml import html as lxml_html +import re +from ...upload import ASSETS_RESOURCE +from newsroom.settings import get_setting +from superdesk import get_resource_service +from superdesk.etree import to_string +from newsroom.products.products import get_products_by_company +from newsroom.auth import get_user + + +def remove_internal_renditions(item, remove_media=False): + """ + Remove the internal and original image renditions from the feature media and embedded media. The media can + optionaly be removed as we do not serve this on the api. + :param item: + :param remove_media: + :return: + """ + allowed_pic_renditions = get_setting('news_api_allowed_renditions').split(',') + for association_key, association_item in item.get('associations', {}).items(): + clean_renditions = dict() + for key, rendition in association_item.get('renditions', {}).items(): + if association_item.get('type') == 'picture': + if key in allowed_pic_renditions: + if remove_media: + rendition.pop('media', None) + clean_renditions[key] = rendition + else: + clean_renditions[key] = rendition + + item['associations'][association_key]['renditions'] = clean_renditions + + if isinstance(association_item, dict): + association_item.pop('products', None) + association_item.pop('subscribers', None) + + return item + + +# def add_media(zf, item): +# """ +# Add the media files associated with the item +# :param zf: Zipfile +# :param item: +# :return: +# """ +# added_files = [] +# for _key, associated_item in item.get('associations', {}).items(): +# if associated_item is None: +# continue +# renditions = associated_item.get('renditions') +# if renditions and isinstance(renditions, dict): +# for rendition in associated_item.get('renditions'): +# name = associated_item.get('renditions').get(rendition).get('href').lstrip('/') +# if name in added_files: +# continue +# file = flask.current_app.media.get(associated_item.get('renditions').get(rendition).get('media'), +# ASSETS_RESOURCE) +# zf.writestr(name, file.read()) +# added_files.append(name) + +def add_media(zf, item): + added_files = [] + associations = item.get('associations', {}) + for associated_item in associations.values(): + if not associated_item: + continue + + renditions = associated_item.get('renditions') + if not renditions or not isinstance(renditions, dict): + continue + + for rendition_data in renditions.values(): + if not rendition_data: + continue + + name = rendition_data.get('href', '').lstrip('/') + if name in added_files: + continue + + media_id = rendition_data.get('media') + if not media_id: + flask.current_app.logger.warning(f"Media ID not found for rendition: {name}") + continue + + file = flask.current_app.media.get(media_id, ASSETS_RESOURCE) + if not file: + flask.current_app.logger.warning(f"File not found: {name}") + continue + + try: + zf.writestr(name, file.read()) + added_files.append(name) + except Exception as e: + flask.current_app.logger.error(f"Error adding file to zip: {name}. Error: {str(e)}") + + +def rewire_featuremedia(item): + """ + Set the references in the feature media strip the leading / to make it a legitimate relative path + :param item: + :return: + """ + renditions = item.get('associations', {}).get('featuremedia', {}).get('renditions', []) + for rendition in renditions: + item['associations']['featuremedia']['renditions'][rendition]['href'] = \ + item['associations']['featuremedia']['renditions'][rendition]['href'].lstrip('/') + + +def log_media_downloads(item): + """ + Given an item create a download entry for all the associations + :param item: + :return: + """ + for _key, associated_item in item.get('associations', {}).items(): + action = 'download ' + associated_item.get('type') + get_resource_service('history').create_media_history_record(item, _key, action, get_user(required=True), + flask.request.args.get('type', 'wire')) + + +def remove_unpermissioned_embeds(item, company_id=None, section='wire'): + """ + :param item: + :param company_id: + :param section + :return: The item with the embeds that the user is not allowed to download removed + """ + + if not app.config.get("EMBED_PRODUCT_FILTERING"): + return + + kill_keys = [] + + if company_id is None: + user = get_user(required=False) + if user: + company_id = user.get('company') + else: + company_id = flask.g.user + + # get the list of superdesk products that the company is permissioned for + permitted_products = [p.get('sd_product_id') for p in + get_products_by_company(company_id, None, section) if p.get('sd_product_id')] + + for key, embed_item in item.get("associations", {}).items(): + if key.startswith("editor_"): + # get the list of products that the embedded item matched in superdesk + embed_products = [p.get('code') for p in + ((item.get('associations') or {}).get(key) or {}).get('products', [])] + + if not len(set(embed_products) & set(permitted_products)): + kill_keys.append(key) + + # Nothing to do + if len(kill_keys) == 0: + return + + root_elem = lxml_html.fromstring(item.get('body_html', '')) + regex = r" EMBED START (?:Image|Video|Audio) {id: \"editor_([0-9]+)" + html_updated = False + comments = root_elem.xpath('//comment()') + for comment in comments: + m = re.search(regex, comment.text) + # if we've found an Embed Start comment + if m and m.group(1): + if "editor_" + m.group(1) in kill_keys: + parent = comment.getparent() + for elem in comment.itersiblings(): + parent.remove(elem) + if elem.text and ' EMBED END ' in elem.text: + break + parent.remove(comment) + html_updated = True + if html_updated: + item["body_html"] = to_string(root_elem, method="html") + + for key in kill_keys: + item.get("associations", {}).pop(key, None) + if "refs" in item: + item["refs"] = [r for r in item.get("refs", []) if r["key"] != key] diff --git a/newsroom/wire/search.py b/newsroom/wire/search.py index e9b545047..a3905a151 100644 --- a/newsroom/wire/search.py +++ b/newsroom/wire/search.py @@ -1,11 +1,12 @@ -import logging from datetime import datetime, timedelta from copy import deepcopy - -from eve.utils import ParsedRequest -from flask import current_app as app, json +from eve.utils import ParsedRequest, config +from flask import current_app as app, json, request from superdesk import get_resource_service +from superdesk.etree import to_string from werkzeug.exceptions import Forbidden +from lxml import html as lxml_html +import re import newsroom from newsroom.products.products import get_products_by_navigation @@ -13,8 +14,13 @@ from newsroom.template_filters import is_admin from newsroom.utils import get_local_date, get_end_date from newsroom.search import BaseSearchService, SearchQuery, query_string +from newsroom.auth import get_user +from newsroom.companies import get_user_company +from newsroom.products.products import get_products_by_company +from newsroom.wire.block_media.filter_media import filter_media +from superdesk.logging import logger -logger = logging.getLogger(__name__) +# logger = logging.getLogger(__name__) def get_bookmarks_count(user_id, product_type): @@ -177,8 +183,12 @@ def get_product_items(self, product_id, size): self.gen_source_from_search(search) search.source['post_filter'] = {'bool': {'must': []}} internal_req = self.get_internal_request(search) + docs = list(self.internal_get(internal_req, None)) + if app.config.get("EMBED_PRODUCT_FILTERING"): + for item in docs: + self.permission_embeds_in_item(item, self.get_permitted_products()) - return list(self.internal_get(internal_req, None)) + return docs def get_navigation_story_count(self, navigations, section, company, user): """Get story count by navigation""" @@ -518,3 +528,57 @@ def get_matching_bookmarks(self, item_ids, active_users, active_companies): bookmark_users.append(bookmark) return bookmark_users + + def get_permitted_products(self): + current_user = get_user(required=True) + company = get_user_company(current_user) + if company is None: + return [] + # get a list of products that match Superdesk products for this user/company + return [p.get('sd_product_id') for p in + get_products_by_company(company.get('_id'), None, request.args.get('type', 'wire')) + if p.get('sd_product_id')] + + @filter_media + def permission_embeds_in_item(self, item, permitted_products): + disable_download = [] + for key, embed_item in item.get("associations", {}).items(): + if key.startswith("editor_") and embed_item and (embed_item.get('type', '')) in ['audio', 'video', 'picture']: + # get the list of products that the embedded item matched in Superdesk + embed_products = [p.get('code') for p in + ((item.get('associations') or {}).get(key) or {}).get('products', [])] + + if not len(set(embed_products) & set(permitted_products)): + disable_download.append(key) + if len(disable_download) == 0: + logger.info("No embedded items require download disabling.") + return + # logger.info( + # f"Disable download for the following embedded items:- {disable_download}- {item['body_html']}-{item['associations']}") + + root_elem = lxml_html.fromstring(item.get('body_html', '')) + regex = r" EMBED START (?:Video|Audio|Image) {id: \"editor_([0-9]+)" + html_updated = False + comments = root_elem.xpath('//comment()') + for comment in comments: + m = re.search(regex, comment.text) + if m and m.group(1): + figure = comment.getnext() + for elem in figure.iterchildren(): + if elem.tag in ['video', 'audio', 'img']: + if "editor_" + m.group(1) in disable_download: + # elem.attrib['data-disable-download'] = 'true' + if 'data-disable-download' not in elem.attrib or elem.attrib['data-disable-download'] != 'true': + elem.attrib['data-disable-download'] = 'true' + html_updated = True + if elem.text and ' EMBED END ' in elem.text: + break + html_updated = True + + if html_updated: + item["body_html"] = to_string(root_elem, method="html") + + def on_fetched(self, doc): + if app.config.get("EMBED_PRODUCT_FILTERING"): + for item in doc[config.ITEMS]: + self.permission_embeds_in_item(item, self.get_permitted_products()) diff --git a/newsroom/wire/views.py b/newsroom/wire/views.py index 6a674e168..d434980ec 100644 --- a/newsroom/wire/views.py +++ b/newsroom/wire/views.py @@ -2,7 +2,8 @@ import flask import zipfile import superdesk - +import json +from html import escape from bson import ObjectId from operator import itemgetter from flask import current_app as app, request, jsonify, url_for @@ -11,9 +12,9 @@ from werkzeug.utils import secure_filename from flask_babel import gettext from superdesk.utc import utcnow +from .formatters.utils import add_media from superdesk import get_resource_service - from newsroom.navigations.navigations import get_navigations_by_company from newsroom.products.products import get_products_by_company from newsroom.wire import blueprint @@ -24,13 +25,13 @@ from newsroom.email import send_email from newsroom.companies import get_user_company from newsroom.utils import get_entity_or_404, get_json_or_400, parse_dates, get_type, is_json_request, query_resource, \ - get_agenda_dates, get_location_string, get_public_contacts, get_links, get_items_for_user_action + get_agenda_dates, get_location_string, get_public_contacts, get_links, get_items_for_user_action, get_entities_elastic_or_mongo_or_404 from newsroom.notifications import push_user_notification, push_notification from newsroom.companies import section from newsroom.template_filters import is_admin_or_internal - from .search import get_bookmarks_count from ..upload import ASSETS_RESOURCE +from newsroom.wire.block_media.download_items import filter_items_download, block_items_by_embedded_data HOME_ITEMS_CACHE_KEY = 'home_items' HOME_EXTERNAL_ITEMS_CACHE_KEY = 'home_external_items' @@ -93,14 +94,18 @@ def get_items_by_card(cards): items_by_card = {} for card in cards: if card['config'].get('product'): - items_by_card[card['label']] = superdesk.get_resource_service('wire_search').\ + items = superdesk.get_resource_service('wire_search'). \ get_product_items(ObjectId(card['config']['product']), card['config']['size']) + if items: + for item in items: + item["body_html"] = escape(item["body_html"]) + items_by_card[card['label']] = items elif card['type'] == '4-photo-gallery': # Omit external media, let the client manually request these # using '/media_card_external' endpoint items_by_card[card['label']] = None - app.cache.set(HOME_ITEMS_CACHE_KEY, items_by_card, timeout=300) + app.cache.set(HOME_ITEMS_CACHE_KEY, items_by_card, timeout=1) return items_by_card @@ -120,6 +125,7 @@ def get_home_data(): 'formats': [{'format': f['format'], 'name': f['name'], 'types': f['types'], 'assets': f['assets']} for f in app.download_formatters.values()], 'context': 'wire', + 'ui_config': get_resource_service('ui_config').getSectionConfig('wire') } @@ -134,6 +140,22 @@ def get_previous_versions(item): return [] +@filter_items_download +def get_items_for_user_action_block(_ids, item_type): + # Getting entities from elastic first so that we get all fields + # even those which are not a part of ItemsResource(content_api) schema. + items = get_entities_elastic_or_mongo_or_404(_ids, item_type) + + if not items or items[0].get('type') != 'text': + return items + + for item in items: + if item.get('slugline') and item.get('anpa_take_key'): + item['slugline'] = '{0} | {1}'.format(item['slugline'], item['anpa_take_key']) + + return items + + @blueprint.route('/') @login_required def index(): @@ -182,8 +204,7 @@ def download(_ids): user = get_user(required=True) _format = flask.request.args.get('format', 'text') item_type = get_type() - items = get_items_for_user_action(_ids.split(','), item_type) - + items = get_items_for_user_action_block(_ids.split(','), item_type , filter_func=block_items_by_embedded_data) _file = io.BytesIO() formatter = app.download_formatters[_format]['formatter'] mimetype = None @@ -211,6 +232,21 @@ def download(_ids): except ValueError: pass _file.seek(0) + elif _format == 'downloadninjs': + with zipfile.ZipFile(_file, mode='w') as zf: + for item in items: + formated_item = json.loads(formatter.format_item(item, item_type=item_type)) + add_media(zf, item) + zf.writestr(secure_filename(formatter.format_filename(item)), + json.dumps(formated_item).encode('utf-8')) + _file.seek(0) + elif _format == 'htmlpackage': + with zipfile.ZipFile(_file, mode='w') as zf: + for item in items: + formated_item = formatter.format_item(item, item_type=item_type) + add_media(zf, item) + zf.writestr(secure_filename(formatter.format_filename(item)), formated_item) + _file.seek(0) elif len(items) == 1 or _format == 'monitoring': item = items[0] args_item = item if _format != 'monitoring' else items @@ -231,7 +267,8 @@ def download(_ids): update_action_list(_ids.split(','), 'downloads', force_insert=True) get_resource_service('history').create_history_record(items, 'download', user, request.args.get('type', 'wire')) - return flask.send_file(_file, mimetype=mimetype, attachment_filename=attachment_filename, as_attachment=True) + return flask.send_file(_file, mimetype=mimetype, attachment_filename=attachment_filename, as_attachment=True, + cache_timeout=0) @blueprint.route('/wire_share', methods=['POST']) diff --git a/package.json b/package.json index 258a7d15e..73bd7c2da 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "bootstrap": "4.1.3", "classnames": "^2.2.5", "css-loader": "^0.28.5", + "dompurify": "^3.1.6", "enzyme-adapter-react-16": "1.7.1", "extract-text-webpack-plugin": "3.0.2", "fetch-mock": "^5.12.2", diff --git a/requirements.txt b/requirements.txt index 0887b37cc..dd7decff2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,11 +3,12 @@ PyRTF3>=0.47.5 # Fix an issue between xhtml2pdf v0.2.4 and reportlab v3.6.7 # https://github.com/xhtml2pdf/xhtml2pdf/issues/589 reportlab==3.6.6 -xhtml2pdf>=0.2.4 +xhtml2pdf==0.2.5 werkzeug>=0.9.4,<=0.11.15 urllib3<1.26 + -e . -git+https://github.com/superdesk/superdesk-planning.git@support/1.33#egg=superdesk-planning -git+https://github.com/superdesk/superdesk-core.git@support/1.33#egg=superdesk-core +git+https://github.com/superdesk/superdesk-planning.git@v1.33.3#egg=superdesk-planning +git+https://github.com/superdesk/superdesk-core.git@hotfix/1.33.17#egg=Superdesk-Core diff --git a/tests/news_api/test_api_audit.py b/tests/news_api/test_api_audit.py index e03832e99..22b16044f 100644 --- a/tests/news_api/test_api_audit.py +++ b/tests/news_api/test_api_audit.py @@ -38,7 +38,8 @@ def test_get_item_audit_creation(client, app): app.data.insert('items', [{ "_id": "111", "pubstatus": "usable", - "headline": "Headline of the story" + "headline": "Headline of the story", + "body_html": "

" }]) app.data.insert('news_api_tokens', [{"company": ObjectId(company_id), "enabled": True}]) token = app.data.find_one('news_api_tokens', req=None, company=ObjectId(company_id)) diff --git a/tests/search/test_search_filters.py b/tests/search/test_search_filters.py index 5bf0a76ed..43940c1c3 100644 --- a/tests/search/test_search_filters.py +++ b/tests/search/test_search_filters.py @@ -9,7 +9,7 @@ from newsroom.utils import get_local_date from .fixtures import PUBLIC_USER_ID, ADMIN_USER_ID, TEST_USER_ID, USERS, \ - COMPANIES,\ + COMPANIES, \ NAV_1, NAV_3, NAV_5, NAVIGATIONS, \ PRODUCTS, \ SECTION_FILTERS diff --git a/tests/search/test_search_params.py b/tests/search/test_search_params.py index f5dd85500..9e699d428 100644 --- a/tests/search/test_search_params.py +++ b/tests/search/test_search_params.py @@ -9,7 +9,7 @@ from newsroom.search import SearchQuery, BaseSearchService from .fixtures import PUBLIC_USER_ID, ADMIN_USER_ID, TEST_USER_ID, USERS, \ - COMPANY_1, COMPANY_2, COMPANY_3, COMPANIES,\ + COMPANY_1, COMPANY_2, COMPANY_3, COMPANIES, \ NAV_1, NAV_2, NAV_3, NAV_4, NAV_5, NAV_6, NAVIGATIONS, \ PROD_1, PROD_2, PROD_3, PRODUCTS diff --git a/tests/test_agenda.py b/tests/test_agenda.py index 5c274b4a5..08e66d989 100644 --- a/tests/test_agenda.py +++ b/tests/test_agenda.py @@ -1,5 +1,5 @@ import pytz -from flask import json +from flask import json, session from datetime import datetime from urllib import parse from unittest import mock @@ -7,6 +7,7 @@ import newsroom.auth # noqa - Fix cyclic import when running single test file from newsroom.utils import get_location_string, get_agenda_dates, get_public_contacts, get_entity_or_404, \ get_local_date, get_end_date +from newsroom.agenda.views import related_wire_items from .fixtures import items, init_items, agenda_items, init_agenda_items, init_auth, init_company, PUBLIC_USER_ID # noqa from .utils import post_json, delete_json, get_json, get_admin_user_id, mock_send_email from copy import deepcopy @@ -569,3 +570,49 @@ def test_filter_events_only(client): assert 'urn:conference' == data['_items'][0]['_id'] assert 'planning_items' not in data['_items'][0] assert 'coverages' not in data['_items'][0] + + +def test_related_wire_items(client, app): + test_planning_with_coveragre = deepcopy(test_planning) + test_planning_with_coveragre["coverages"] = [ + { + "coverage_id": "d01ce39aed17", + "delivery_id": "812f8bb5a5d7", + "coverage_type": "text" + }, + { + "coverage_id": "250363d911b0", + "delivery_id": "05e339456ea0", + "coverage_type": "text" + }, + { + "coverage_id": "7a53221bca0a", + "planning_id": "a1f6f076f7b4", + "coverage_type": "picture" + }, + { + "coverage_id": "954757c9881c", + "delivery_id": "97d3b5cd0861", + "coverage_type": "text" + } + ] + + coverage_items = [{"_id": "812f8bb5a5d7", 'service': [{'code': 'a', 'name': 'Service A'}]}, + {"_id": "05e339456ea0", 'service': [{'code': 'b', 'name': 'Service B'}]}, + {"_id": "97d3b5cd0861", 'service': [{'code': 'a', 'name': 'Service A'}]}] + app.data.insert('agenda', [test_planning_with_coveragre]) + app.data.insert('items', coverage_items) + app.data.insert('products', [{ + '_id': ObjectId('5e65964bf5db68883df561d0'), 'name': 'Sport1', 'description': 'sport product 1', + 'is_enabled': True, 'product_type': 'wire', + 'navigations': [], + 'companies': ['1'], + 'query': 'service.code:a' + }]) + with app.test_request_context(): + session['user'] = PUBLIC_USER_ID + session['name'] = 'Test' + resp = related_wire_items("05e339456ea0") + data = json.loads(resp[0].get_data()) + assert data.get('agenda_item').get('_id') == 'foo' + assert (next(item for item in data.get('wire_items') if item["_id"] == "05e339456ea0").get("_access")) is False diff --git a/tests/test_api_auth.py b/tests/test_api_auth.py new file mode 100644 index 000000000..048fcf4ed --- /dev/null +++ b/tests/test_api_auth.py @@ -0,0 +1,38 @@ +from flask import url_for +from bson import ObjectId + + +def test_public_user_api(app, client): + company_ids = app.data.insert('companies', [{ + 'phone': '2132132134', + 'sd_subscriber_id': '12345', + 'name': 'Press Co.', + 'is_enabled': True, + 'contact_name': 'Tom' + }]) + user = { + '_id': ObjectId("5c5914275f627d5885fee6a8"), + 'first_name': 'Normal', + 'last_name': 'User', + 'email': 'normal@sourcefabric.org', + 'password': '$2b$12$HGyWCf9VNfnVAwc2wQxQW.Op3Ejk7KIGE6urUXugpI0KQuuK6RWIG', + 'user_type': 'public', + 'is_validated': True, + 'is_enabled': True, + 'is_approved': True, + 'receive_email': True, + 'phone': '2132132134', + 'expiry_alert': True, + 'company': company_ids[0]} + app.data.insert('users', [user]) + client.post( + url_for('auth.login'), + data={'email': 'normal@sourcefabric.org', 'password': 'admin'}, + follow_redirects=True + ) + + resp = client.get("/api") + assert 200 == resp.status_code + + resp = client.get("/api/users") + assert resp.status_code == 401 diff --git a/tests/test_download.py b/tests/test_download.py index 6014cff50..01f4f3547 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -42,6 +42,16 @@ def nitf_content_test(content): assert items[0]['headline'] == head.find('title').text +def ninjs_content_test(content): + data = json.loads(content.decode('utf-8')) + assert data.get('associations').get('editor_1') + assert not data.get('associations').get('editor_0') + assert not data.get('associations').get('editor_2') + assert data.get('headline') == 'Amazon Is Opening More Bookstores' + assert 'editor_1' in data.get('body_html') + assert 'editor_0' not in data.get('body_html') + + def newsmlg2_content_test(content): tree = lxml.etree.parse(io.BytesIO(content)) root = tree.getroot() @@ -124,10 +134,12 @@ def setup_image(client, app): associations = { 'featuremedia': { 'mimetype': 'image/jpeg', + 'type': 'picture', 'renditions': { 'baseImage': { 'mimetype': 'image/jpeg', 'media': media_id, + 'href': 'http://a.b.c/xxx.jpg', }, } } @@ -135,6 +147,113 @@ def setup_image(client, app): app.data.update('items', item['_id'], {'associations': associations}, item) +def setup_embeds(client, app): + media_id = str(bson.ObjectId()) + upload_binary('picture.jpg', client, media_id=media_id) + associations = { + 'featuremedia': { + 'mimetype': 'image/jpeg', + 'type': 'picture', + 'renditions': { + '16-9': { + 'mimetype': 'image/jpeg', + 'href': 'http://a.b.c/xxx.jpg', + 'media': media_id, + 'width': 1280, + 'height': 720, + }, + '4-3': { + "href": "/assets/633d11b9fb5122dcf06a6f02", + "width": 800, + "height": 600, + 'media': media_id, + "mimetype": "image/jpeg", + } + } + }, + "editor_1": { + "type": "video", + "renditions": { + "original": { + "mimetype": "video/mp4", + "href": "/assets/640ff0bdfb5122dcf06a6fc3", + 'media': media_id, + } + }, + "mimetype": "video/mp4", + "products": [{"code": "123", "name": "Product A"}, {"code": "321", "name": "Product B"}] + }, + "editor_0": { + "type": "audio", + "renditions": { + "original": { + "mimetype": "audio/mp3", + "href": "/assets/640feb9bfb5122dcf06a6f7c", + "media": "640feb9bfb5122dcf06a6f7c" + } + }, + "mimetype": "audio/mp3", + "products": [ + { + "code": "999", + "name": "NSW News" + } + ] + }, + "editor_2": { + "type": "picture", + "renditions": { + "4-3": { + "href": "/assets/633d11b9fb5122dcf06a6f02", + "width": 800, + "height": 600, + "mimetype": "image/jpeg", + "media": "633d11b9fb5122dcf06a6f02", + }, + "16-9": { + "href": "/assets/633d0f59fb5122dcf06a6ee8", + "width": 1280, + "height": 720, + "mimetype": "image/jpeg", + "media": "633d0f59fb5122dcf06a6ee8", + "poi": { + } + } + }, + "products": [{"code": "888"}] + }, + "editor_3": None + } + app.data.update('items', item['_id'], {'associations': associations, 'body_html': + '

Par 1

' + '' + '
minns
' + '
' + '' + '


' + '

Par 2

' + '' + '
' + '' + '
Scomo whinging
' + '
' + '' + '


Par 3

' + '' + '
' + '' + '
Prime Minister Scott Morrison and Liberal member for ' + 'Higgins Katie Allen
' + '
' + '' + '

Par 4

'}, item) + + def test_download_single(client, app): setup_image(client, app) for _format in wire_formats: @@ -165,6 +284,45 @@ def test_wire_download(client, app): assert history[0].get('section') == 'wire' +def test_ninjs_download(client, app): + setup_embeds(client, app) + app.config['EMBED_PRODUCT_FILTERING'] = True + app.data.insert('companies', [{ + '_id': '1', + 'name': 'Press co.', + 'is_enabled': True, + }]) + user = app.data.find_one('users', req=None, first_name='admin') + assert user + app.data.update('users', user['_id'], {'company': '1'}, user) + app.data.insert('products', [{ + '_id': 10, + 'name': 'product test', + 'sd_product_id': '123', + 'companies': ['1'], + 'is_enabled': True, + 'product_type': 'wire' + }]) + app.general_setting('news_api_allowed_renditions', 'Foo', default='16-9,4-3') + + _file = download_zip_file(client, 'downloadninjs', 'wire') + with zipfile.ZipFile(_file) as zf: + assert filename('amazon-bookstore-opening.json', item) in zf.namelist() + content = zf.open(filename('amazon-bookstore-opening.json', item)).read() + ninjs_content_test(content) + + history = app.data.find('history', None, None) + assert 4 == history.count() + assert 'download' in history[0]['action'] + assert 'download' in history[1]['action'] + assert history[0].get('user') + assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow() + assert history[0].get('item') in items_ids + assert history[0].get('version') + assert history[0].get('company') == '1' + assert history[0].get('section') == 'wire' + + def test_agenda_download(client, app): setup_image(client, app) for _format in agenda_formats: diff --git a/tests/test_monitoring.py b/tests/test_monitoring.py index 7bcc14d41..9c5637ecb 100644 --- a/tests/test_monitoring.py +++ b/tests/test_monitoring.py @@ -835,7 +835,14 @@ def test_send_immediate_email_alerts(client, app): 'products': [{'code': '12345'}], "versioncreated": utcnow(), 'byline': 'Testy McTestface', - 'body_html': '

line 1 of the article text\nline 2 of the story\nand a bit more.

', + 'body_html': '

line 1 of the article text\nline 2 of the story\nand a bit more.

' + '' + '
' + ' ' + '
Assistant Treasurer
' + '
' + '' + '

Something after the embed', 'source': 'AAAA' }]) w = app.data.find_one('monitoring', None, _id='5db11ec55f627d8aa0b545fb') @@ -849,6 +856,8 @@ def test_send_immediate_email_alerts(client, app): assert outbox[0].recipients == ['foo_user@bar.com', 'foo_user2@bar.com'] assert outbox[0].sender == 'newsroom@localhost' assert outbox[0].subject == 'Monitoring Subject' + assert 'Something after the embed' in outbox[0].body + assert 'Assistant Treasurer' not in outbox[0].body assert 'Newsroom Monitoring: W1' in outbox[0].body @@ -976,3 +985,19 @@ def test_send_profile_email(client, app): assert len(outbox[0].recipients) == 4 assert 'atest@a.com' in outbox[0].recipients assert 'btest@b.com' in outbox[0].recipients + + +def test_save_monitoring_email(client, app): + test_login_succeeds_for_admin(client) + m = app.data.find_one('monitoring', None, _id="5db11ec55f627d8aa0b545fb") + m['email'] = 'axb.com, a@b.com' + response = client.post('/monitoring/5db11ec55f627d8aa0b545fb', data=json.dumps(m), content_type='application/json') + data = json.loads(response.get_data()) + assert data['email'][0] == 'Invalid email address: axb.com' + m['email'] = 'a@b.com , d@e.com' + response = client.post('/monitoring/5db11ec55f627d8aa0b545fb', data=json.dumps(m), content_type='application/json') + data = json.loads(response.get_data()) + assert data['success'] is True + response = client.get('/monitoring/5db11ec55f627d8aa0b545fb') + data = json.loads(response.get_data()) + assert data['email'] == 'a@b.com,d@e.com' diff --git a/tests/test_push.py b/tests/test_push.py index 18a80dbb2..9b9a3db63 100644 --- a/tests/test_push.py +++ b/tests/test_push.py @@ -625,7 +625,7 @@ def test_matching_topics_for_user_with_inactive_company(client, app): def test_push_parsed_item(client, app): client.post('/push', data=json.dumps(item), content_type='application/json') parsed = get_entity_or_404(item['guid'], 'wire_search') - assert type(parsed['firstcreated']) == datetime + assert isinstance(parsed['firstcreated'], datetime) assert 2 == parsed['wordcount'] assert 7 == parsed['charcount'] @@ -635,9 +635,9 @@ def test_push_parsed_dates(client, app): payload['embargoed'] = '2019-01-31T00:01:00+00:00' client.post('/push', data=json.dumps(payload), content_type='application/json') parsed = get_entity_or_404(item['guid'], 'items') - assert type(parsed['firstcreated']) == datetime - assert type(parsed['versioncreated']) == datetime - assert type(parsed['embargoed']) == datetime + assert isinstance(parsed['firstcreated'], datetime) + assert isinstance(parsed['versioncreated'], datetime) + assert isinstance(parsed['embargoed'], datetime) def test_push_event_coverage_info(client, app): diff --git a/tests/test_push_events.py b/tests/test_push_events.py index 246ee5ad0..57b60b3ff 100644 --- a/tests/test_push_events.py +++ b/tests/test_push_events.py @@ -194,7 +194,7 @@ def test_push_parsed_event(client, app): event = deepcopy(test_event) client.post('/push', data=json.dumps(event), content_type='application/json') parsed = get_entity_or_404(event['guid'], 'agenda') - assert type(parsed['firstcreated']) == datetime + assert isinstance(parsed['firstcreated'], datetime) assert parsed['dates']['tz'] == 'Australia/Sydney' assert parsed['dates']['end'] == datetime.\ strptime('2018-05-28T05:00:00+0000', '%Y-%m-%dT%H:%M:%S+0000').replace(tzinfo=pytz.UTC) @@ -225,7 +225,7 @@ def test_push_cancelled_event(client, app): resp = client.post('/push', data=json.dumps(event), content_type='application/json') assert resp.status_code == 200 parsed = get_entity_or_404(event['guid'], 'agenda') - assert type(parsed['firstcreated']) == datetime + assert isinstance(parsed['firstcreated'], datetime) assert 1 == len(parsed['event']['event_contact_info']) assert 1 == len(parsed['location']) assert parsed['event']['pubstatus'] == 'cancelled' @@ -246,7 +246,7 @@ def test_push_updated_event(client, app): } client.post('/push', data=json.dumps(event), content_type='application/json') parsed = get_entity_or_404(event['guid'], 'agenda') - assert type(parsed['firstcreated']) == datetime + assert isinstance(parsed['firstcreated'], datetime) assert 1 == len(parsed['event']['event_contact_info']) assert 1 == len(parsed['location']) assert parsed['dates']['end'].day == 30 @@ -257,7 +257,7 @@ def test_push_parsed_planning_for_an_existing_event(client, app): event['guid'] = 'foo4' client.post('/push', data=json.dumps(event), content_type='application/json') parsed = get_entity_or_404(event['guid'], 'agenda') - assert type(parsed['firstcreated']) == datetime + assert isinstance(parsed['firstcreated'], datetime) assert 1 == len(parsed['event']['event_contact_info']) assert 1 == len(parsed['location']) @@ -299,7 +299,7 @@ def test_push_coverages_with_different_dates_for_an_existing_event(client, app): event['guid'] = 'foo4' client.post('/push', data=json.dumps(event), content_type='application/json') parsed = get_entity_or_404(event['guid'], 'agenda') - assert type(parsed['firstcreated']) == datetime + assert isinstance(parsed['firstcreated'], datetime) assert 1 == len(parsed['event']['event_contact_info']) assert 1 == len(parsed['location']) @@ -334,7 +334,7 @@ def test_push_planning_with_different_dates_for_an_existing_event(client, app): event['guid'] = 'foo4' client.post('/push', data=json.dumps(event), content_type='application/json') parsed = get_entity_or_404(event['guid'], 'agenda') - assert type(parsed['firstcreated']) == datetime + assert isinstance(parsed['firstcreated'], datetime) assert 1 == len(parsed['event']['event_contact_info']) assert 1 == len(parsed['location']) @@ -365,7 +365,7 @@ def test_push_cancelled_planning_for_an_existing_event(client, app): event['guid'] = 'foo5' client.post('/push', data=json.dumps(event), content_type='application/json') parsed = get_entity_or_404(event['guid'], 'agenda') - assert type(parsed['firstcreated']) == datetime + assert isinstance(parsed['firstcreated'], datetime) assert 1 == len(parsed['event']['event_contact_info']) assert 1 == len(parsed['location']) @@ -402,7 +402,7 @@ def test_push_parsed_adhoc_planning_for_an_non_existing_event(client, app): client.post('/push', data=json.dumps(planning), content_type='application/json') parsed = get_entity_or_404('bar3', 'agenda') - assert type(parsed['firstcreated']) == datetime + assert isinstance(parsed['firstcreated'], datetime) assert 2 == len(parsed['coverages']) assert 1 == len(parsed['planning_items']) assert parsed['headline'] == 'Planning headline' diff --git a/tests/test_topics.py b/tests/test_topics.py index bc04cae54..5c05467b8 100644 --- a/tests/test_topics.py +++ b/tests/test_topics.py @@ -1,8 +1,9 @@ from flask import json -from newsroom.topics.views import get_topic_url from .fixtures import init_company, PUBLIC_USER_ID, TEST_USER_ID # noqa from unittest import mock from .utils import mock_send_email +from newsroom.topics.views import get_topic_url + topic = { 'label': 'Foo', @@ -36,6 +37,7 @@ def test_post_topic_user(client): with client as app: with client.session_transaction() as session: session['user'] = user_id + session['user_type'] = 'administrator' resp = app.post( topics_url, data=json.dumps(topic), @@ -52,6 +54,7 @@ def test_update_topic_fails_for_different_user(client): with client as app: with client.session_transaction() as session: session['user'] = user_id + session['user_type'] = 'administrator' resp = app.post( topics_url, data=json.dumps(topic), @@ -74,6 +77,7 @@ def test_update_topic(client): with client as app: with client.session_transaction() as session: session['user'] = user_id + session['user_type'] = 'administrator' resp = app.post( topics_url, data=json.dumps(topic), @@ -99,6 +103,7 @@ def test_delete_topic(client): with client as app: with client.session_transaction() as session: session['user'] = user_id + session['user_type'] = 'administrator' resp = app.post( topics_url, data=json.dumps(topic), diff --git a/tests/test_users.py b/tests/test_users.py index e9b07432c..ea8d675a8 100644 --- a/tests/test_users.py +++ b/tests/test_users.py @@ -5,7 +5,7 @@ from datetime import datetime, timedelta from superdesk import get_resource_service -from newsroom.auth import get_user_by_email +from newsroom.auth import get_auth_user_by_email from newsroom.utils import get_user_dict, get_company_dict, is_valid_login from .utils import mock_send_email from unittest import mock @@ -79,7 +79,7 @@ def test_reset_password_token_sent_for_user_succeeds(app, client): response = client.post('/users/59b4c5c61d41c8d736852fbf/reset_password') assert response.status_code == 200 assert '"success": true' in response.get_data(as_text=True) - user = get_resource_service('users').find_one(req=None, email='test@sourcefabric.org') + user = get_resource_service('auth_user').find_one(req=None, email='test@sourcefabric.org') assert user.get('token') is not None @@ -185,7 +185,7 @@ def test_create_new_user_succeeds(app, client): assert 'account created' in outbox[0].subject # get reset password token - user = get_user_by_email('new.user@abc.org') + user = get_auth_user_by_email('new.user@abc.org') client.get(url_for('auth.reset_password', token=user['token'])) # change the password diff --git a/tests/test_wire.py b/tests/test_wire.py index 0b85e31df..697776646 100644 --- a/tests/test_wire.py +++ b/tests/test_wire.py @@ -9,6 +9,7 @@ from .utils import get_json, get_admin_user_id, mock_send_email from unittest import mock from tests.test_users import ADMIN_USER_ID +from tests.test_download import setup_embeds from superdesk import get_resource_service @@ -647,3 +648,22 @@ def test_highlighting(client, app): data = json.loads(resp.get_data()) assert data['_items'][0]['es_highlight']['body_html'][0] == 'Story that involves ' \ 'cheese and onions' + + +def test_embed_mark_disable_download(client, app): + app.config['EMBED_PRODUCT_FILTERING'] = True + user = app.data.find_one('users', req=None, _id=ADMIN_USER_ID) + app.data.update('users', user['_id'], {'company': 1}, user) + app.data.insert('products', [{ + '_id': 10, + 'name': 'product test', + 'sd_product_id': '123', + 'companies': ['1'], + 'is_enabled': True, + 'product_type': 'wire' + }]) + setup_embeds(client, app) + resp = client.get('/wire/search?type=wire') + data = json.loads(resp.get_data()) + assert "data-disable-download" in data['_items'][0]['body_html'] + assert data['_items'][0]['body_html'].count("data-disable-download") == 1