diff --git a/.gitignore b/.gitignore index fdb5924..ecdf2d7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,2 @@ vendor .idea -composer.lock \ No newline at end of file diff --git a/README.md b/README.md index dbe94bb..919c6aa 100644 --- a/README.md +++ b/README.md @@ -74,9 +74,9 @@ Visit the RDF storage at: http://localhost:8890 Update information about vocabularies: ``` -$ docker exec triple-store-dev ./vendor/bin/robo update_version +$ docker-compose exec web vendor/bin/robo update_version ``` -This command can be executed only after executed within this code base `docker-compose up` and `docker-compose exec web composer install` +This command can be executed only after execution within this code base `docker-compose up -d` and `docker-compose exec web composer update --dev` Fetch remote data: @@ -120,6 +120,26 @@ DBA_PASSWORD Default values set via environment variables will override values set in `robo.yml`. +## Update vocabularies version in source code + +Currently, information regarding the latest version of vocabularies can be discovered on `https://op.europa.eu/en/home` site. +For automatic update of source code you can do by following steps: + +1. Download and start the supplied Docker images: +``` +$ docker-compose up -d +``` +2. Run composer install: +``` +$ docker-compose exec web composer update --dev +``` +3. Run automatic update information about vocabularies with using crawler: +``` +$ docker-compose exec web vendor/bin/robo update_version +``` +4. Test built image in your application. +5. Commit changed files except for `composer.lock` file. + ## Working with Docker Compose In Docker Compose declare service as follow: diff --git a/RoboFile.php b/RoboFile.php index f2ca59f..8c8fe16 100644 --- a/RoboFile.php +++ b/RoboFile.php @@ -88,6 +88,8 @@ public function purge() { /** * Update information about OP vocabularies with automatic updating URLs. * + * @TODO Refactor after accepting POC. + * * @command update_version */ public function updateVersions() { @@ -99,17 +101,19 @@ public function updateVersions() { ] ); $current_voc_titles = []; + $data_values = []; foreach ($this->config->get('data') as $datum) { $current_voc_titles[] = $datum['title']; + $data_values[$datum['title']] = $datum; } - try{ + try { $parsedown = new Parsedown(); $parsed_readme = $parsedown->parse(file_get_contents('README.md')); $raw_readme = file_get_contents('README.md'); $updated_readme = FALSE; $crawler = new \Symfony\Component\DomCrawler\Crawler($parsed_readme); - $links_to_op_vocs = $crawler->filter('li>a'); + $links_to_op_vocs = $crawler->filter('li > a'); /** @var \Facebook\WebDriver\WebDriverBy $webdriver_by */ $webdriver_by = \Facebook\WebDriver\WebDriverBy::class; foreach ($links_to_op_vocs as $link) { @@ -117,6 +121,7 @@ public function updateVersions() { if (!in_array($link->textContent, $current_voc_titles)) { continue; } + $data = $data_values[$link->textContent]; $web_driver->get($link->getAttribute('href')); sleep(10); @@ -133,19 +138,19 @@ public function updateVersions() { continue; } - // Find last version. + // Find latest version. $latests_link = $web_driver->findElement($webdriver_by::cssSelector('div.tab-content .eu-vocabularies-latest-version'))->findElement($webdriver_by::xpath('../span/a')); $latests_link->click(); sleep(10); $title = str_replace(' ', '[[:space:]]', $link->textContent); - $regexp = '/^([[:space:]]\-[[:space:]]\[' . $title . '\])(\(.*\))$/m'; + $regexp = '/^(\-[[:space:]]\[' . $title . '\])(\(.*\))$/m'; $raw_readme = preg_replace($regexp, '$1' . '(' . $web_driver->getCurrentURL() . ')', $raw_readme); $updated_readme = TRUE; // Visit page with links to rdf files. $web_driver->findElement($webdriver_by::linkText('Downloads'))->click(); sleep(10); - $rdf_link_url = $web_driver->findElement($webdriver_by::partialLinkText('-skos-ap-act.rdf'))->getAttribute('href'); + $rdf_link_url = $web_driver->findElement($webdriver_by::partialLinkText($data['partial_link_text']))->getAttribute('href'); parse_str(parse_url(urldecode($rdf_link_url))['query'], $query); $rdf_urls_for_update[$link->textContent] = $query['cellarURI']; } diff --git a/composer.json b/composer.json index 08b3e94..6147703 100644 --- a/composer.json +++ b/composer.json @@ -11,13 +11,13 @@ "require-dev": { "erusev/parsedown": "^1.7", "php-webdriver/webdriver": "^1.12", - "symfony/css-selector": "^3.3", - "symfony/dom-crawler": "^3.3" + "symfony/css-selector": "^4.4", + "symfony/dom-crawler": "^4.4" }, "config": { "sort-packages": true, "platform": { - "php": "7.0" + "php": "8.0" } }, "scripts": { diff --git a/docker-compose.yml b/docker-compose.yml index f205a62..55999dd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,7 @@ version: '2' services: web: - image: fpfis/httpd-php-dev:7.4 + image: fpfis/httpd-php-dev:8.0 working_dir: /var/www/html ports: - 8080:8080 @@ -15,7 +15,7 @@ services: XDEBUG_MODE: "develop, debug" PHP_IDE_CONFIG: "serverName=Docker" # Enable step debugging for all PHP request. See ./README.md#step-debugging for more information. - XDEBUG_SESSION: 1 + # XDEBUG_SESSION: 1 # If you would like to see what is going on you can run the following on your host: # docker run --rm -p 4444:4444 -p 5900:5900 --network="host" selenium/standalone-chrome-debug:latest diff --git a/robo.yml b/robo.yml index 227fa2c..b13133af 100644 --- a/robo.yml +++ b/robo.yml @@ -10,24 +10,28 @@ data: title: 'Corporate body classification' graph: 'http://publications.europa.eu/resource/authority/corporate-body-classification' url: 'http://publications.europa.eu/resource/cellar/9619bd59-f70c-11ec-b94a-01aa75ed71a1.0001.01/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: corporate-body title: 'Corporate body' graph: 'http://publications.europa.eu/resource/authority/corporate-body' url: 'http://publications.europa.eu/resource/cellar/a4b6959a-f70c-11ec-b94a-01aa75ed71a1.0001.03/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: country title: Country graph: 'http://publications.europa.eu/resource/authority/country' url: 'http://publications.europa.eu/resource/cellar/6451e772-f70c-11ec-b94a-01aa75ed71a1.0001.04/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: digital-europa-thesaurus title: 'Digital Europa Thesaurus' graph: 'http://data.europa.eu/uxp' url: 'http://publications.europa.eu/resource/cellar/8335da20-ed4a-11ec-a534-01aa75ed71a1.0001.03/DOC_1' + partial_link_text: DET-skos-core.zip file: DET-skos-core.rdf format: zip - @@ -35,6 +39,7 @@ data: title: 'EuroVoc Thesaurus' graph: 'http://publications.europa.eu/resource/dataset/eurovoc' url: 'http://publications.europa.eu/resource/cellar/65e724e6-fe92-11ec-b94a-01aa75ed71a1.0001.04/DOC_1' + partial_link_text: skos_core_concepts.zip file: eurovoc_in_skos_core_concepts.rdf format: zip - @@ -42,76 +47,89 @@ data: title: 'EU Programme' graph: 'http://publications.europa.eu/resource/authority/eu-programme' url: 'http://publications.europa.eu/resource/cellar/c8c0ceb1-f70c-11ec-b94a-01aa75ed71a1.0001.02/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: language title: Language graph: 'http://publications.europa.eu/resource/authority/language' url: 'http://publications.europa.eu/resource/cellar/d4ba7363-f70c-11ec-b94a-01aa75ed71a1.0001.04/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: organization-type title: 'Organization type' graph: 'http://publications.europa.eu/resource/authority/organization-type' url: 'http://publications.europa.eu/resource/cellar/d0a4a644-2070-11ec-bd8e-01aa75ed71a1.0001.04/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: place title: Place graph: 'http://publications.europa.eu/resource/dataset/place' url: 'http://publications.europa.eu/resource/cellar/8d0a1938-f70c-11ec-b94a-01aa75ed71a1.0001.03/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: public-event-type title: 'Public event type' graph: 'http://publications.europa.eu/resource/dataset/public-event-type' url: 'http://publications.europa.eu/resource/cellar/d707bec4-f70c-11ec-b94a-01aa75ed71a1.0001.01/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: resource-type title: 'Resource type' graph: 'http://publications.europa.eu/resource/authority/resource-type' url: 'http://publications.europa.eu/resource/cellar/b258ba2c-f70c-11ec-b94a-01aa75ed71a1.0001.03/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: target-audience title: 'Target audience' graph: 'http://publications.europa.eu/resource/authority/target-audience' url: 'http://publications.europa.eu/resource/cellar/efe194a2-20f4-11ec-bd8e-01aa75ed71a1.0001.02/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: SDG title: 'Sustainable Development Goals' graph: 'http://publications.europa.eu/resource/dataset/sdg' url: 'http://publications.europa.eu/resource/cellar/5ea37239-0495-11eb-a511-01aa75ed71a1.0001.01/DOC_1' + partial_link_text: sdg-skos-ap-eu.rdf format: rdf - name: human-sex title: 'Human sex' graph: 'http://publications.europa.eu/resource/dataset/human-sex' url: 'http://publications.europa.eu/resource/cellar/ea0ea4ef-20f4-11ec-bd8e-01aa75ed71a1.0001.01/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: role title: Role graph: 'http://publications.europa.eu/resource/dataset/role' url: 'http://publications.europa.eu/resource/cellar/99179de0-f787-11ec-b94a-01aa75ed71a1.0001.03/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: role-qualifier title: 'Role qualifier' graph: 'http://publications.europa.eu/resource/dataset/role-qualifier' url: 'http://publications.europa.eu/resource/cellar/ec32e7b9-f933-11ec-b94a-01aa75ed71a1.0001.03/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: strategic-priority title: 'Strategic priority' graph: 'http://publications.europa.eu/resource/authority/strategic-priority' url: 'http://publications.europa.eu/resource/cellar/807aeb47-f70c-11ec-b94a-01aa75ed71a1.0001.01/DOC_1' + partial_link_text: '-skos-ap-act.rdf' format: rdf - name: com-web-presence title: 'European Commission web presence classes' graph: 'http://publications.europa.eu/resource/dataset/com-web-presence' url: 'http://publications.europa.eu/resource/cellar/82ebffdf-ed4a-11ec-a534-01aa75ed71a1.0001.01/DOC_1' + partial_link_text: comm-web-presence-skos-ap-eu.rdf format: rdf