Skip to content

Commit

Permalink
EWPP-2479: Update README.md file and adjust selectors for extracting …
Browse files Browse the repository at this point in the history
…rdf files.
  • Loading branch information
sergepavle committed Sep 26, 2022
1 parent 2b6c106 commit 92d5de2
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 13 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
vendor
.idea
composer.lock
24 changes: 22 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ Visit the RDF storage at: http://localhost:8890
Update information about vocabularies:

```
$ docker exec triple-store-dev ./vendor/bin/robo update_version
$ docker-compose exec web vendor/bin/robo update_version
```
This command can be executed only after executed within this code base `docker-compose up` and `docker-compose exec web composer install`
This command can be executed only after execution within this code base `docker-compose up -d` and `docker-compose exec web composer update --dev`

Fetch remote data:

Expand Down Expand Up @@ -120,6 +120,26 @@ DBA_PASSWORD

Default values set via environment variables will override values set in `robo.yml`.

## Update vocabularies version in source code

Currently, information regarding the latest version of vocabularies can be discovered on `https://op.europa.eu/en/home` site.
For automatic update of source code you can do by following steps:

1. Download and start the supplied Docker images:
```
$ docker-compose up -d
```
2. Run composer install:
```
$ docker-compose exec web composer update --dev
```
3. Run automatic update information about vocabularies with using crawler:
```
$ docker-compose exec web vendor/bin/robo update_version
```
4. Test built image in your application.
5. Commit changed files except for `composer.lock` file.

## Working with Docker Compose

In Docker Compose declare service as follow:
Expand Down
15 changes: 10 additions & 5 deletions RoboFile.php
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ public function purge() {
/**
* Update information about OP vocabularies with automatic updating URLs.
*
* @TODO Refactor after accepting POC.
*
* @command update_version
*/
public function updateVersions() {
Expand All @@ -99,24 +101,27 @@ public function updateVersions() {
]
);
$current_voc_titles = [];
$data_values = [];
foreach ($this->config->get('data') as $datum) {
$current_voc_titles[] = $datum['title'];
$data_values[$datum['title']] = $datum;
}

try{
try {
$parsedown = new Parsedown();
$parsed_readme = $parsedown->parse(file_get_contents('README.md'));
$raw_readme = file_get_contents('README.md');
$updated_readme = FALSE;
$crawler = new \Symfony\Component\DomCrawler\Crawler($parsed_readme);
$links_to_op_vocs = $crawler->filter('li>a');
$links_to_op_vocs = $crawler->filter('li > a');
/** @var \Facebook\WebDriver\WebDriverBy $webdriver_by */
$webdriver_by = \Facebook\WebDriver\WebDriverBy::class;
foreach ($links_to_op_vocs as $link) {
// Use only links to OP.
if (!in_array($link->textContent, $current_voc_titles)) {
continue;
}
$data = $data_values[$link->textContent];
$web_driver->get($link->getAttribute('href'));
sleep(10);

Expand All @@ -133,19 +138,19 @@ public function updateVersions() {
continue;
}

// Find last version.
// Find latest version.
$latests_link = $web_driver->findElement($webdriver_by::cssSelector('div.tab-content .eu-vocabularies-latest-version'))->findElement($webdriver_by::xpath('../span/a'));
$latests_link->click();
sleep(10);
$title = str_replace(' ', '[[:space:]]', $link->textContent);
$regexp = '/^([[:space:]]\-[[:space:]]\[' . $title . '\])(\(.*\))$/m';
$regexp = '/^(\-[[:space:]]\[' . $title . '\])(\(.*\))$/m';
$raw_readme = preg_replace($regexp, '$1' . '(' . $web_driver->getCurrentURL() . ')', $raw_readme);
$updated_readme = TRUE;

// Visit page with links to rdf files.
$web_driver->findElement($webdriver_by::linkText('Downloads'))->click();
sleep(10);
$rdf_link_url = $web_driver->findElement($webdriver_by::partialLinkText('-skos-ap-act.rdf'))->getAttribute('href');
$rdf_link_url = $web_driver->findElement($webdriver_by::partialLinkText($data['partial_link_text']))->getAttribute('href');
parse_str(parse_url(urldecode($rdf_link_url))['query'], $query);
$rdf_urls_for_update[$link->textContent] = $query['cellarURI'];
}
Expand Down
6 changes: 3 additions & 3 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
"require-dev": {
"erusev/parsedown": "^1.7",
"php-webdriver/webdriver": "^1.12",
"symfony/css-selector": "^3.3",
"symfony/dom-crawler": "^3.3"
"symfony/css-selector": "^4.4",
"symfony/dom-crawler": "^4.4"
},
"config": {
"sort-packages": true,
"platform": {
"php": "7.0"
"php": "8.0"
}
},
"scripts": {
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: '2'
services:
web:
image: fpfis/httpd-php-dev:7.4
image: fpfis/httpd-php-dev:8.0
working_dir: /var/www/html
ports:
- 8080:8080
Expand All @@ -15,7 +15,7 @@ services:
XDEBUG_MODE: "develop, debug"
PHP_IDE_CONFIG: "serverName=Docker"
# Enable step debugging for all PHP request. See ./README.md#step-debugging for more information.
XDEBUG_SESSION: 1
# XDEBUG_SESSION: 1

# If you would like to see what is going on you can run the following on your host:
# docker run --rm -p 4444:4444 -p 5900:5900 --network="host" selenium/standalone-chrome-debug:latest
Expand Down
18 changes: 18 additions & 0 deletions robo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,108 +10,126 @@ data:
title: 'Corporate body classification'
graph: 'http://publications.europa.eu/resource/authority/corporate-body-classification'
url: 'http://publications.europa.eu/resource/cellar/9619bd59-f70c-11ec-b94a-01aa75ed71a1.0001.01/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: corporate-body
title: 'Corporate body'
graph: 'http://publications.europa.eu/resource/authority/corporate-body'
url: 'http://publications.europa.eu/resource/cellar/a4b6959a-f70c-11ec-b94a-01aa75ed71a1.0001.03/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: country
title: Country
graph: 'http://publications.europa.eu/resource/authority/country'
url: 'http://publications.europa.eu/resource/cellar/6451e772-f70c-11ec-b94a-01aa75ed71a1.0001.04/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: digital-europa-thesaurus
title: 'Digital Europa Thesaurus'
graph: 'http://data.europa.eu/uxp'
url: 'http://publications.europa.eu/resource/cellar/8335da20-ed4a-11ec-a534-01aa75ed71a1.0001.03/DOC_1'
partial_link_text: DET-skos-core.zip
file: DET-skos-core.rdf
format: zip
-
name: eurovoc-thesaurus
title: 'EuroVoc Thesaurus'
graph: 'http://publications.europa.eu/resource/dataset/eurovoc'
url: 'http://publications.europa.eu/resource/cellar/65e724e6-fe92-11ec-b94a-01aa75ed71a1.0001.04/DOC_1'
partial_link_text: skos_core_concepts.zip
file: eurovoc_in_skos_core_concepts.rdf
format: zip
-
name: eu-programme
title: 'EU Programme'
graph: 'http://publications.europa.eu/resource/authority/eu-programme'
url: 'http://publications.europa.eu/resource/cellar/c8c0ceb1-f70c-11ec-b94a-01aa75ed71a1.0001.02/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: language
title: Language
graph: 'http://publications.europa.eu/resource/authority/language'
url: 'http://publications.europa.eu/resource/cellar/d4ba7363-f70c-11ec-b94a-01aa75ed71a1.0001.04/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: organization-type
title: 'Organization type'
graph: 'http://publications.europa.eu/resource/authority/organization-type'
url: 'http://publications.europa.eu/resource/cellar/d0a4a644-2070-11ec-bd8e-01aa75ed71a1.0001.04/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: place
title: Place
graph: 'http://publications.europa.eu/resource/dataset/place'
url: 'http://publications.europa.eu/resource/cellar/8d0a1938-f70c-11ec-b94a-01aa75ed71a1.0001.03/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: public-event-type
title: 'Public event type'
graph: 'http://publications.europa.eu/resource/dataset/public-event-type'
url: 'http://publications.europa.eu/resource/cellar/d707bec4-f70c-11ec-b94a-01aa75ed71a1.0001.01/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: resource-type
title: 'Resource type'
graph: 'http://publications.europa.eu/resource/authority/resource-type'
url: 'http://publications.europa.eu/resource/cellar/b258ba2c-f70c-11ec-b94a-01aa75ed71a1.0001.03/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: target-audience
title: 'Target audience'
graph: 'http://publications.europa.eu/resource/authority/target-audience'
url: 'http://publications.europa.eu/resource/cellar/efe194a2-20f4-11ec-bd8e-01aa75ed71a1.0001.02/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: SDG
title: 'Sustainable Development Goals'
graph: 'http://publications.europa.eu/resource/dataset/sdg'
url: 'http://publications.europa.eu/resource/cellar/5ea37239-0495-11eb-a511-01aa75ed71a1.0001.01/DOC_1'
partial_link_text: sdg-skos-ap-eu.rdf
format: rdf
-
name: human-sex
title: 'Human sex'
graph: 'http://publications.europa.eu/resource/dataset/human-sex'
url: 'http://publications.europa.eu/resource/cellar/ea0ea4ef-20f4-11ec-bd8e-01aa75ed71a1.0001.01/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: role
title: Role
graph: 'http://publications.europa.eu/resource/dataset/role'
url: 'http://publications.europa.eu/resource/cellar/99179de0-f787-11ec-b94a-01aa75ed71a1.0001.03/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: role-qualifier
title: 'Role qualifier'
graph: 'http://publications.europa.eu/resource/dataset/role-qualifier'
url: 'http://publications.europa.eu/resource/cellar/ec32e7b9-f933-11ec-b94a-01aa75ed71a1.0001.03/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: strategic-priority
title: 'Strategic priority'
graph: 'http://publications.europa.eu/resource/authority/strategic-priority'
url: 'http://publications.europa.eu/resource/cellar/807aeb47-f70c-11ec-b94a-01aa75ed71a1.0001.01/DOC_1'
partial_link_text: '-skos-ap-act.rdf'
format: rdf
-
name: com-web-presence
title: 'European Commission web presence classes'
graph: 'http://publications.europa.eu/resource/dataset/com-web-presence'
url: 'http://publications.europa.eu/resource/cellar/82ebffdf-ed4a-11ec-a534-01aa75ed71a1.0001.01/DOC_1'
partial_link_text: comm-web-presence-skos-ap-eu.rdf
format: rdf

0 comments on commit 92d5de2

Please sign in to comment.