diff --git a/PACKAGIST.md b/PACKAGIST.md deleted file mode 100644 index 53a2671..0000000 --- a/PACKAGIST.md +++ /dev/null @@ -1,80 +0,0 @@ - -# Packagist Version -## Installation -```bash -composer require mediashare/spider -``` - -## Usage -### Create index.php file and init the config. -```php -setWebspider(true); // Crawl all website -$config->setReportsDir(__DIR__.'/reports/'); // Default reports path -$config->setModulesDir(__DIR__.'/modules/'); // Default modules path -// Prompt Console / Dump -$config->setVerbose(true); // Prompt verbose output -$config->setJson(false); // Prompt json output -// Modules Activation -$config->enableAllModule(true); // Enable all modules -// Modules Activation -$config->enableAllModule(true); // Enable all modules -// $config->addModules(['Links', 'Search']);// Select one or more modules to use with class name - -// Url -$url = new \Mediashare\Entity\Url('http://marquand.pro'); -// Spider -$spider = new \Mediashare\Spider($url, $config); -$result = $spider->run(); -// dump($result); -``` - -### Create own module to execute actions when the crawler scraps a webpage. -```php -// ./modules/Links.php -webpage->getUrl(); - $links = []; - foreach($this->dom->filter('a') as $link) { - if (!empty($link)) { - $href = rtrim(ltrim($link->getAttribute('href'))); - if ($href) { - if (isset($links[$href])) { - $links[$href]++; - } else { - $links[$href] = 1; - } - } - } - } - return $links; - } -} -``` -### Execute the code from the console. -```bash -php index.php -``` -#### Output -```sh --*--*--*--*--*--*--*--*--*--*--*--*--*--*--*--*--* -* Output file result: /home/slote/Bureau/Spider/var/reports/marquand.pro/5dfaf1c0147c6.json --*--*--*--*--*--*--*--*--*--*--*--*--*--*--*--*--* -``` \ No newline at end of file diff --git a/README.md b/README.md index 86f1e5d..2a75eb0 100644 --- a/README.md +++ b/README.md @@ -30,23 +30,6 @@ Use Spider library in your project & create your own modules. ```bash composer require mediashare/spider ``` -#### Github -```bash -git clone https://github.com/Mediashare/Spider -cd Spider -composer install -``` -#### Docker -```bash -docker pull slote/spider -docker run slote/spider php exemple.com -``` - -# Packagist Version -## Installation -```bash -composer require mediashare/spider -``` ## Usage ### Create index.php file and init the config. @@ -65,8 +48,6 @@ $config->setVerbose(true); // Prompt verbose output $config->setJson(false); // Prompt json output // Modules Activation $config->enableAllModule(true); // Enable all modules -// Modules Activation -$config->enableAllModule(true); // Enable all modules // $config->addModules(['Links', 'Search']);// Select one or more modules to use with class name // Url diff --git a/exemple.php b/exemple.php deleted file mode 100644 index af59597..0000000 --- a/exemple.php +++ /dev/null @@ -1,28 +0,0 @@ -setId("Audit"); // Id|Name report (uniqid() by default) -$config->setWebspider(true); // Crawl all website -$config->setRequires([]); // Path requires -$config->setExceptions([]); // Path exceptions -// Directories -// $config->setReportsDir(__DIR__.'/reports/'); // Default reports path -// $config->setModulesDir(__DIR__.'/modules/'); // Default modules path -// Prompt Console / Dump -$config->setVerbose(false); // Prompt verbose output -$config->setJson(true); // Prompt json output -// Modules Activation -$config->enableAllModule(true); // Enable all modules -$config->addModules(['Links']);// Select one or more modules to use with class name -// $config->addVariables(['Search' => ['Thibault Marquand']]); // Inject this variables in modules - -// Url -$url = new \Mediashare\Entity\Url('http://marquand.pro'); - -// Spider -$spider = new \Mediashare\Spider($url, $config); -$result = $spider->run(); -// var_dump($result); \ No newline at end of file diff --git a/exemples/console.php b/exemples/console.php new file mode 100644 index 0000000..1e15d91 --- /dev/null +++ b/exemples/console.php @@ -0,0 +1,13 @@ +setVerbose(true); // Prompt verbose output +// Url +$url = new \Mediashare\Entity\Url('http://marquand.pro'); + +// Spider +$spider = new \Mediashare\Spider($url, $config); +$result = $spider->run(); \ No newline at end of file diff --git a/exemples/json-http-response.php b/exemples/json-http-response.php new file mode 100644 index 0000000..f957746 --- /dev/null +++ b/exemples/json-http-response.php @@ -0,0 +1,13 @@ +setJson(true); // Prompt json response +// Url +$url = new \Mediashare\Entity\Url('http://marquand.pro'); +// Spider +$spider = new \Mediashare\Spider($url, $config); +$result = $spider->run(); \ No newline at end of file diff --git a/exemples/module-selection.php b/exemples/module-selection.php new file mode 100644 index 0000000..0040304 --- /dev/null +++ b/exemples/module-selection.php @@ -0,0 +1,15 @@ +addModules(['Links', 'Metadata']); // Select one or more modules to use with class name + +// Url +$url = new \Mediashare\Entity\Url('http://marquand.pro'); + +// Spider +$spider = new \Mediashare\Spider($url, $config); +$result = $spider->run(); +// var_dump($result); \ No newline at end of file diff --git a/exemples/require.php b/exemples/require.php new file mode 100644 index 0000000..4b657e7 --- /dev/null +++ b/exemples/require.php @@ -0,0 +1,15 @@ +setRequires(['/images/', '/tags/']); // Path requires + +// Url +$url = new \Mediashare\Entity\Url('http://marquand.pro'); + +// Spider +$spider = new \Mediashare\Spider($url, $config); +$result = $spider->run(); +// var_dump($result); \ No newline at end of file diff --git a/index.php b/index.php new file mode 100644 index 0000000..09b81c8 --- /dev/null +++ b/index.php @@ -0,0 +1,28 @@ +setId("Audit_marquand.pro"); // Id|Name report (uniqid() by default) +$config->setWebspider(true); // Crawl all website +$config->setRequires(['/projet']); // Path requires +// $config->setExceptions(['/contact']); // Path exceptions +// Directories +$config->setReportsDir(__DIR__.'/reports/'); // Default reports path +$config->setModulesDir(__DIR__.'/modules/'); // Default modules path +// Prompt Console / Dump +$config->setVerbose(true); // Prompt verbose output +$config->setJson(false); // Prompt json output +// Modules Activation +$config->enableAllModule(false); // Enable all modules +$config->addModules(['Links', 'Search', 'Metadata']);// Select one or more modules to use with class name +$config->addVariables(['Search' => ['ces deux exchanges']]); // Inject this variables in modules + +// Url +$url = new \Mediashare\Entity\Url('http://marquand.pro'); + +// Spider +$spider = new \Mediashare\Spider($url, $config); +$result = $spider->run(); +// var_dump($result); \ No newline at end of file diff --git a/src/Service/Output.php b/src/Service/Output.php index 7b54f53..ad8ef51 100644 --- a/src/Service/Output.php +++ b/src/Service/Output.php @@ -67,7 +67,7 @@ public function progressBar(int $counter, int $max_counter, ?string $message) { public function fileDirection(string $file_direction) { if ($this->config->getVerbose()) { $climate = new \League\CLImate\CLImate; - $climate->clear(); + // $climate->clear(); $climate->border('-*-', 50)->animation('right'); echo $this->echoColor("* Output file result: ",'white').$this->echoColor($file_direction."\xA",'green'); $climate->border('-*-', 50); diff --git a/src/Spider.php b/src/Spider.php index 7b9a04b..450a09f 100644 --- a/src/Spider.php +++ b/src/Spider.php @@ -4,7 +4,6 @@ use Mediashare\Entity\Url; use Mediashare\Entity\Config; use Mediashare\Controller\Webspider; -session_start(); class Spider {