From dc8157b843d4fcb07399a02a33ba8b2b391f94a1 Mon Sep 17 00:00:00 2001 From: Ivanov Dmitry Date: Thu, 18 May 2017 16:37:58 +0300 Subject: [PATCH] Have been added: 1) Code strip tags. 2) Settings for necessary tags which will be no cutting off. --- src/ContentParser.php | 12 ++++++++++++ src/ContentParserMercury.php | 2 +- src/ParsingResult.php | 9 +++++++++ src/config/mercury.php | 2 +- src/config/parser.php | 31 ++++++++++++++++++++++++++++++- 5 files changed, 53 insertions(+), 3 deletions(-) diff --git a/src/ContentParser.php b/src/ContentParser.php index 529a255..bb41811 100644 --- a/src/ContentParser.php +++ b/src/ContentParser.php @@ -53,6 +53,9 @@ public function __construct($url, Array $params = []) { $this->url = $url; $this->urlParams = parse_url($this->getURL()); $this->parsingResult = $this->parse(); + if ($this->needsCodeStrip()) { + $this->parsingResult->stripContent(); + } if ($this->needsCodeClean()) { $this->parsingResult->cleanContent(); } @@ -96,6 +99,15 @@ final protected function needsCodeClean() { return ((bool)config('deepslam.parser.clean_code')); } + /** + * Checks whether have to strip tags or not. + * + * @return bool True - need to strip tags, false - needn't to strip tags + */ + final protected function needsCodeStrip() { + return ((bool)config('deepslam.parser.strip_tags')); + } + /*** STATIC LAYER ***/ /** diff --git a/src/ContentParserMercury.php b/src/ContentParserMercury.php index 467dfcc..7ac4a6e 100644 --- a/src/ContentParserMercury.php +++ b/src/ContentParserMercury.php @@ -9,7 +9,7 @@ final protected function parse():ParsingResult { $result = new ParsingResult(); $response = Curl::to('https://mercury.postlight.com/parser?url='.urlencode($this->getURL())) - ->withHeaders(array('x-api-key: '.config('deepslam.mercury.api-key'))) + ->withHeaders(array('x-api-key: '.config('deepslam.mercury.api_key'))) ->asJson() ->get(); if (!is_null($response)) { diff --git a/src/ParsingResult.php b/src/ParsingResult.php index 680acdd..16102fc 100644 --- a/src/ParsingResult.php +++ b/src/ParsingResult.php @@ -73,5 +73,14 @@ public function isEmpty() { public function cleanContent() { $this->params["content"] = preg_replace( '/\s?(style|class|id)=[\'"]{1}.*[\'"]{1}/sUi', '', $this->params["content"], -1 ); } + + /** + * Let's strip tags from unwilling tags + * + *@return String Stripped content + */ + public function stripContent() { + $this->params["content"] = strip_tags($this->params["content"], implode('',config('deepslam.parser.allowed_tags'))); + } } ?> \ No newline at end of file diff --git a/src/config/mercury.php b/src/config/mercury.php index 9459153..0261542 100644 --- a/src/config/mercury.php +++ b/src/config/mercury.php @@ -1,5 +1,5 @@ 'YVMpLAlWqWk9DSJHcWMCdsFhqMSrIhoK0YaQwfX6', + 'api_key' => 'YVMpLAlWqWk9DSJHcWMCdsFhqMSrIhoK0YaQwfX6', ]; ?> \ No newline at end of file diff --git a/src/config/parser.php b/src/config/parser.php index 647164a..1c2496a 100644 --- a/src/config/parser.php +++ b/src/config/parser.php @@ -1,6 +1,35 @@ true, + "clean_code" => true,//Clean code from different styles, classes etc. + "strip_tags" => true,//Strip tags? + "allowed_tags" => [//List of the allowed tags + '
', + '

', + '', + '