-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.php
81 lines (70 loc) · 2.53 KB
/
scraper.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
<?php
require_once 'vendor/autoload.php';
use Goutte\Client;
class Scraper {
protected $client;
public function __construct() {
$this->client = new Client();
}
public function getKishiLady($no, $format = 'array') {
return $this->getKishi($no, $format, 2);
}
public function getKishi($no, $format = 'array', $sex = 1) {
$data = array();
// 棋士ページ
if ($sex == 1) {
$crawler = $this->client->request('GET', 'https://www.shogi.or.jp/player/pro/' . $no . '.html');
// 女流棋士ページ
} else {
$crawler = $this->client->request('GET', 'https://www.shogi.or.jp/player/lady/' . $no . '.html');
}
// 名前を取得
$dom = $crawler->filter('div.nameArea');
$dom->each(function ($node) use (&$data) {
$data['name'] = $node->filter('span')->eq(0)->text();
});
if (!$data) {
// 将棋連盟サイトは404ページが無いので名前の取得で正常判断する
return false;
}
// 画像URLを取得
$dom = $crawler->filter('div.imgArea img');
$dom->each(function ($node) use (&$data) {
$data['image'] = 'https://www.shogi.or.jp' . $node->attr('src');
});
// 基本情報を取得
$dom = $crawler->filter('div.uniqueLayoutElements03 table.tableElements02 tr');
$dom->each(function ($node) use (&$data) {
$th = $node->filter('th')->text();
$td = $node->filter('td')->text();
if ($th == '棋士番号') {
$data['no'] = $td;
} else if ($th == '生年月日') {
$data['birthday'] = $td;
} else if ($th == '出身地') {
$data['birthplace'] = $td;
} else if ($th == '師匠') {
$data['mentor'] = $td;
} else if ($th == '竜王戦') {
$data['ryuou'] = $td;
} else if ($th == '順位戦') {
$data['junni'] = $td;
}
});
if (empty($data['no'])) {
$data['no'] = $no;
}
// 性別を取得
// TODO:現時点では棋士 or 女流棋士で判断しているが女性棋士が誕生したらこれだとNG
if ($sex == 1) {
$data['sex'] = 'man';
} else {
$data['sex'] = 'woman';
}
if ($format == 'json') {
return json_encode($data);
} else {
return $data;
}
}
}