From 4ba3dc98dd3998ceb21ccb1190f698a455c3b21e Mon Sep 17 00:00:00 2001 From: jc Date: Sun, 28 Aug 2016 21:04:05 +0200 Subject: [PATCH] mdb: update ini files --- config/mdb/allocine.fr.bing.ini | 61 +++++++++ config/mdb/allocine.fr.google.ini | 64 +++++++++ config/mdb/allocine.fr.ini | 61 +++++++++ config/mdb/imdb.com.ask.ini | 105 +++++++++----- config/mdb/imdb.com.bing.ini | 96 ++++++++----- config/mdb/imdb.com.bing_series.ini | 110 +++++++++++++++ config/mdb/imdb.com.imdb.ini | 111 ++++++++++----- config/mdb/imdb.com.imdb_series.ini | 118 ++++++++++++++++ config/mdb/imdb.com.imdb_series.onscreen.ini | 95 +++++++++++++ config/mdb/tvdb.com.bing.ini | 136 +++++++++++++++++++ config/mdb/tvdb.com.bing_change_language.txt | 45 ++++++ 11 files changed, 904 insertions(+), 98 deletions(-) create mode 100644 config/mdb/allocine.fr.bing.ini create mode 100644 config/mdb/allocine.fr.google.ini create mode 100644 config/mdb/allocine.fr.ini create mode 100644 config/mdb/imdb.com.bing_series.ini create mode 100644 config/mdb/imdb.com.imdb_series.ini create mode 100644 config/mdb/imdb.com.imdb_series.onscreen.ini create mode 100644 config/mdb/tvdb.com.bing.ini create mode 100644 config/mdb/tvdb.com.bing_change_language.txt diff --git a/config/mdb/allocine.fr.bing.ini b/config/mdb/allocine.fr.bing.ini new file mode 100644 index 0000000..648673a --- /dev/null +++ b/config/mdb/allocine.fr.bing.ini @@ -0,0 +1,61 @@ +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites +* @MinSWversion : +* @Site: allocine.fr +* @Revision 0 - [04/06/2013] Francis De Paemeleere +* - creation (adjust from allocine.fr.google.ini) +* @Remarks: primary search with bing.com +* @header_end +**------------------------------------------------------------------------------------------------ +* +site {url=allocine.fr|cultureinfo=fr-FR|charset=UTF-8|matchfactor=60|searchsite=bing} +* primary search: +url_primarysearch {url|http://www.bing.com/search?q=|site%3Aallocine.fr+|'title'|+|'productiondate'|+&scope=web&qs=ns&form=QBRE&qb=2} +show_id.scrub {multi()|primary|allocine.fr/film/fichefilm_gen_cfilm=||.html|.html} +* +* imdb url's: +url_mdb_p1 {url|primary|http://allocine.fr/film/fichefilm_gen_cfilm=|show_id|.html} +url_mdb_p2 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/casting} +url_mdb_p3 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/critiques/presse/} +url_mdb_p4 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/critiques/spectateurs/membres-critiques/} +* +* imdb elements +mdb_title.scrub {single()|p1|
Titre original
|||} * original title when redirected +mdb_title.scrub {single()|p1||" />} * normal title +mdb_title.scrub {multi()|p1|Ce film dans d'autres pays|||} * aka's +mdb_title.modify {cleanup()} + +mdb_productiondate.scrub {single()|p1|
Année de production
|||} * original title when redirected + +mdb_actor.scrub {multi()|p2|itemprop="actors"|||} + +mdb_director.scrub {multi()|p2|itemprop="director"|||} + +mdb_starrating.scrub {single()|p1||\|} +mdb_starrating.modify {substring(type=element)|-1 1} * get the last part (should be the rating) + +mdb_starratingvotes.scrub {single()|p1||

|

} +mdb_description.modify {cleanup(tags="<"">")} + +mdb_commentsummary.scrub {multi(max=5)|p4|

|

|

} * comments spectateur +mdb_commentsummary.modify {cleanup(tags="<"">")} + +mdb_review.scrub {multi()|p3|
|

|

|

} * comments presse +mdb_review.modify {cleanup(tags="<"">")} + +* remove emty comments = containing "Voir la critique sur" +******mdb_temp_6.modify {addstart()|'mdb_review'} +******mdb_temp_1.modify {calculate(debug type=element format=F0)|'mdb_review' #} * number of comments +*******mdb_review.modify {clear} +*******loop {('mdb_temp_1' > "0" max=50)|end} +******mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index +******mdb_temp_2.modify {substring(debug type=element)|'mdb_temp_6' 'mdb_temp_1' 1} +******mdb_temp_3.modify {addstart(debug 'mdb_temp_2' not~ "Voir la critique sur")|####'mdb_temp_2'} * only add non-empty comments +*******end_loop +****** +******mdb_review.modify {addstart(debug)|'mdb_temp_3'} * make multi +******mdb_review.modify {replace(debug)|####|\|} * make multi diff --git a/config/mdb/allocine.fr.google.ini b/config/mdb/allocine.fr.google.ini new file mode 100644 index 0000000..a562b43 --- /dev/null +++ b/config/mdb/allocine.fr.google.ini @@ -0,0 +1,64 @@ +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites +* @MinSWversion : +* @Site: allocine.fr +* @Revision 1 - [04/06/2013] Francis De Paemeleere +* - add correct title scrub +* - max=5 for mdb_commentsummary +* @Revision 0 - [03/06/2013] Francis De Paemeleere +* - creation +* @Remarks: primary search with google.com +* @header_end +**------------------------------------------------------------------------------------------------ +* +site {url=allocine.fr|cultureinfo=fr-FR|charset=UTF-8|matchfactor=60|searchsite=google} +* primary search: +url_primarysearch {url(urlencode=1,2,3,4)|http://www.google.com/search?num=10&q=|site:allocine.fr "|'title'|" "|'productiondate'|"} +show_id.scrub {multi()|primary|allocine.fr/film/fichefilm_gen_cfilm=||.html|.html} +* +* imdb url's: +url_mdb_p1 {url|primary|http://allocine.fr/film/fichefilm_gen_cfilm=|show_id|.html} +url_mdb_p2 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/casting} +url_mdb_p3 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/critiques/presse/} +url_mdb_p4 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/critiques/spectateurs/membres-critiques/} +* +* imdb elements +mdb_title.scrub {single()|p1|
Titre original
|||} * original title when redirected +mdb_title.scrub {single()|p1||" />} * normal title +mdb_title.scrub {multi()|p1|Ce film dans d'autres pays|||} * aka's +mdb_title.modify {cleanup()} + +mdb_productiondate.scrub {single()|p1|
Année de production
|||} * original title when redirected + +mdb_actor.scrub {multi()|p2|itemprop="actors"|||} + +mdb_director.scrub {multi()|p2|itemprop="director"|||} + +mdb_starrating.scrub {single()|p1||\|} +mdb_starrating.modify {substring(type=element)|-1 1} * get the last part (should be the rating) + +mdb_starratingvotes.scrub {single()|p1||

|

} +mdb_description.modify {cleanup(tags="<"">")} + +mdb_commentsummary.scrub {multi(max=5)|p4|

|

|

} * comments spectateur +mdb_commentsummary.modify {cleanup(tags="<"">")} + +mdb_review.scrub {multi()|p3|
|

|

|

} * comments presse +mdb_review.modify {cleanup(tags="<"">")} + +* remove emty comments = containing "Voir la critique sur" +******mdb_temp_6.modify {addstart()|'mdb_review'} +******mdb_temp_1.modify {calculate(debug type=element format=F0)|'mdb_review' #} * number of comments +*******mdb_review.modify {clear} +*******loop {('mdb_temp_1' > "0" max=50)|end} +******mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index +******mdb_temp_2.modify {substring(debug type=element)|'mdb_temp_6' 'mdb_temp_1' 1} +******mdb_temp_3.modify {addstart(debug 'mdb_temp_2' not~ "Voir la critique sur")|####'mdb_temp_2'} * only add non-empty comments +*******end_loop +****** +******mdb_review.modify {addstart(debug)|'mdb_temp_3'} * make multi +******mdb_review.modify {replace(debug)|####|\|} * make multi diff --git a/config/mdb/allocine.fr.ini b/config/mdb/allocine.fr.ini new file mode 100644 index 0000000..93da7f0 --- /dev/null +++ b/config/mdb/allocine.fr.ini @@ -0,0 +1,61 @@ +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites +* @MinSWversion : +* @Site: allocine.fr +* @Revision 0 - [04/06/2013] Francis De Paemeleere +* - creation (adjust from allocine.fr.google.ini) +* @Remarks: primary search with allocine.fr self +* @header_end +**------------------------------------------------------------------------------------------------ +* +site {url=allocine.fr|cultureinfo=fr-FR|charset=UTF-8|matchfactor=60|searchsite=allocine} +* primary search: +url_primarysearch {url(urlencode=1,2,3,4)|http://www.allocine.fr/recherche/?q=|'title'|} +show_id.scrub {multi()|primary|/film/fichefilm_gen_cfilm=||.html|.html} +* +* imdb url's: +url_mdb_p1 {url|primary|http://allocine.fr/film/fichefilm_gen_cfilm=|show_id|.html} +url_mdb_p2 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/casting} +url_mdb_p3 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/critiques/presse/} +url_mdb_p4 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/critiques/spectateurs/membres-critiques/} +* +* imdb elements +mdb_title.scrub {single()|p1|
Titre original
|||} * original title when redirected +mdb_title.scrub {single()|p1||" />} * normal title +mdb_title.scrub {multi()|p1|Ce film dans d'autres pays|||} * aka's +mdb_title.modify {cleanup()} + +mdb_productiondate.scrub {single()|p1|
Année de production
|||} * original title when redirected + +mdb_actor.scrub {multi()|p2|itemprop="actors"|||} + +mdb_director.scrub {multi()|p2|itemprop="director"|||} + +mdb_starrating.scrub {single()|p1||\|} +mdb_starrating.modify {substring(type=element)|-1 1} * get the last part (should be the rating) + +mdb_starratingvotes.scrub {single()|p1||

|

} +mdb_description.modify {cleanup(tags="<"">")} + +mdb_commentsummary.scrub {multi(max=5)|p4|

|

|

} * comments spectateur +mdb_commentsummary.modify {cleanup(tags="<"">")} + +mdb_review.scrub {multi()|p3|
|

|

|

} * comments presse +mdb_review.modify {cleanup(tags="<"">")} + +* remove emty comments = containing "Voir la critique sur" +******mdb_temp_6.modify {addstart()|'mdb_review'} +******mdb_temp_1.modify {calculate(debug type=element format=F0)|'mdb_review' #} * number of comments +*******mdb_review.modify {clear} +*******loop {('mdb_temp_1' > "0" max=50)|end} +******mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index +******mdb_temp_2.modify {substring(debug type=element)|'mdb_temp_6' 'mdb_temp_1' 1} +******mdb_temp_3.modify {addstart(debug 'mdb_temp_2' not~ "Voir la critique sur")|####'mdb_temp_2'} * only add non-empty comments +*******end_loop +****** +******mdb_review.modify {addstart(debug)|'mdb_temp_3'} * make multi +******mdb_review.modify {replace(debug)|####|\|} * make multi diff --git a/config/mdb/imdb.com.ask.ini b/config/mdb/imdb.com.ask.ini index 0b89c3b..a92da40 100644 --- a/config/mdb/imdb.com.ask.ini +++ b/config/mdb/imdb.com.ask.ini @@ -1,61 +1,98 @@ -* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites -* Site : imdb.com, primary search with ask.com -* revision : 1 correction in production date -* Jan van Straaten, 14/04/2012 +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites +* @MinSWversion: V1.1.1/56.25 +* @Site: imdb.com, primary search with ask.com +* @Revision 11 - [22/05/2016] Jan van Straaten +* - added mdbinitype +* - fixed star-rating and original title +* @Revision 10 - [07/12/2015] Jan van Straaten +* - change element names, mdb_show_id and mdb_episode_id +* @Revision 9 - [25/09/2015] Jan van Straaten +* - added mdb_category +* @Revision 8 - [10/10/2014] Jan van Straaten +* - improved showid scub, also numbers upto 5000000 (was 2500000) +* @Revision 7 - [09/06/2014] Jan van Straaten +* - added url header +* @Revision 6 - [07/06/2014] Jan van Straaten/Jagad +* - added mdb_showicon +* @Revision 5 - [20/12/2013] Jan van Straaten +* - changes in (aka)titles +* @Revision 4 - [23/11/2013] Jan van Straaten +* - changes in actor and director due to site changes +* @Revision 3 - [11/08/2013] Jan van Straaten +* - small changes in commentsummary due to imdb.com changes +* @Revision 2 - [16/02/2013] Jan van Straaten +* - small changes in actor due to imdb.com changes +* @Revision 1 - [14/04/2012] Jan van Straaten +* - correction in production date +* @Remarks: none +* @header_end +**------------------------------------------------------------------------------------------------ +* +* +site {url=imdb.com|mdbinitype=movie|cultureinfo=en-GB|charset=UTF-8|matchfactor=60|searchsite=ask} * -site {url=imdb.com|cultureinfo=en-GB|charset=UTF-8|matchfactor=60|searchsite=ask} * primary search: http://www.ask.com/web?&q=imdb%2bDer+grosse+Edison%2b%2bClarence+Brown&/NCR *url_primarysearch {url(urlencode=1,2,3,4,5,6)|http://www.ask.com/web?&q=|imdb+|'title'|+|'productiondate'|+|'credit'|&/NCR} -url_primarysearch {url(debug urlencode=1,2,3,4)|http://www.ask.com/web?&q=|imdb+|'title'|+|'credit'|&/NCR} -show_id.scrub {multi|primary|imdb|/tt|/|onmousedown} +url_primarysearch {url(urlencode=1,2,3,4)|http://www.ask.com/web?&q=|imdb+|'title'|+|'credit'|&/NCR} +mdb_show_id.scrub {multi|primary|imdb|/tt|/|onmousedown} * -show_id.modify {remove| } * remove spaces +mdb_show_id.modify {remove| } * remove spaces * filter showid (7 char long): -mdb_temp_1.modify {calculate(type=element format=F0)|'show_id' #} * number of show_id's = loop index +mdb_temp_1.modify {calculate(type=element format=F0)|'mdb_show_id' #} * number of mdb_show_id's = loop index loop {('mdb_temp_1' > "0" max=50)|4} mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index -mdb_temp_2.modify {substring(type=element)|'show_id' 'mdb_temp_1' 1} * the showid to inspect -mdb_temp_3.modify {calculate(type=char format=F0)|'mdb_temp_2' #} * how many chars in this show_id? -show_id.modify {remove('mdb_temp_3' not "7" type=element)|'show_id' 'mdb_temp_1' 1} * remove this show_id if not 7 chars +mdb_temp_2.modify {substring(type=element)|'mdb_show_id' 'mdb_temp_1' 1} * the showid to inspect +mdb_temp_3.modify {calculate(type=char format=F0)|'mdb_temp_2' #} * how many chars in this mdb_show_id? +mdb_show_id.modify {remove('mdb_temp_3' not "7" type=element)|'mdb_show_id' 'mdb_temp_1' 1} * remove this mdb_show_id if not 7 chars * end loop -* filter showid (only numbers and < 2500000): -mdb_temp_1.modify {calculate(type=element format=F0)|'show_id' #} * number of show_id's = loop index +* filter showid (only numbers and < 5000000): +mdb_temp_1.modify {calculate(type=element format=F0)|'mdb_show_id' #} * number of mdb_show_id's = loop index loop {('mdb_temp_1' > "0" max=50)|5} mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index -mdb_temp_2.modify {substring(type=element)|'show_id' 'mdb_temp_1' 1} * the showid to inspect +mdb_temp_2.modify {substring(type=element)|'mdb_show_id' 'mdb_temp_1' 1} * the showid to inspect mdb_temp_3.modify {calculate(format=F0)|'mdb_temp_2'} * convert to number -show_id.modify {remove('mdb_temp_3' "0" type=element)|'show_id' 'mdb_temp_1' 1} * remove this show_id if not only numbers -show_id.modify {remove('mdb_temp_3' > "2500000" type=element)|'show_id' 'mdb_temp_1' 1} * remove this show_id if > 2500000 +mdb_show_id.modify {remove('mdb_temp_3' "0" type=element)|'mdb_show_id' 'mdb_temp_1' 1} * remove this mdb_show_id if not only numbers +mdb_show_id.modify {remove('mdb_temp_3' > "5000000" type=element)|'mdb_show_id' 'mdb_temp_1' 1} * remove this mdb_show_id if > 5000000 * end loop * * imdb url's: -url_mdb_p1 {url|primary|http://www.imdb.com/title/tt|show_id|/} -*url_mdb_p1 {url|primary|http://www.imdb.com/find?q=tt|show_id|&s=all} +url_mdb_p1 {url()|primary|http://www.imdb.com/title/tt|mdb_show_id|/} +*url_mdb_p1 {url|primary|http://www.imdb.com/find?q=tt|mdb_show_id|&s=all} *http://www.imdb.com/find?q=tt2200000&s=all *url_mdb_p2.modify {addstart|'url_mdb_p1'plotsummary} *url_mdb_p3.modify {addstart|'url_mdb_p1'releaseinfo#akas} *url_mdb_p4.modify {addstart|'url_mdb_p1'reviews} *url_mdb_p5.modify {addstart|'url_mdb_p1'fullcredits#cast} * -url_mdb_p2 {url|primary|http://www.imdb.com/title/tt|show_id|/plotsummary} -url_mdb_p3 {url|primary|http://www.imdb.com/title/tt|show_id|/releaseinfo#akas} -url_mdb_p4 {url|primary|http://www.imdb.com/title/tt|show_id|/reviews} -url_mdb_p5 {url|primary|http://www.imdb.com/title/tt|show_id|/fullcredits#cast} +url_mdb_p2 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/plotsummary} +url_mdb_p3 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/releaseinfo#akas} +url_mdb_p4 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/reviews} +url_mdb_p5 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/fullcredits#cast} +* +url_mdb.headers {customheader=Accept-Encoding=gzip,deflate} * * imdb elements -mdb_title.scrub {single|p1||||} * original title when redirected +mdb_title.scrub {single(separator=":")|p1|
|}* original title when redirected +mdb_title.modify {cleanup(tags="/=\""} * removes starting " mdb_title.scrub {single(separator=" - " exclude="IMDb" include=first)|p1|||(|} -mdb_title.scrub {multi(separator=" - ")|p3|
Also Known As (AKA)
|\n||} *aka's -*mdb_productiondate.scrub {single|p1||(|)|} +mdb_title.scrub {multi(exclude="(""title")|p3|\n |
|
} *aka's +* mdb_productiondate.scrub {single|p1||||} -mdb_actor.scrub {multi|p1|itemprop="actors"|>||
} -mdb_actor.scrub {multi(exclude="||} * full list -mdb_director.scrub {multi|p1|itemprop="director"|>||
} -mdb_director.scrub {multi|p5|Directed by|/">||} * fulllist -mdb_starrating.scrub {single|p1|Ratings:|itemprop="ratingValue">|
|from} -mdb_starratingvotes.scrub {single|p1|Ratings:|itemprop="ratingCount">|
|users} -mdb_commentsummary.scrub {multi(max=5 exclude="This review may contain spoilers")|p4||||Add another review} +** new: +mdb_category.scrub {regex()|p1||(.+?)||} +** +mdb_actor.scrub {multi(exclude="onclick=")|p1|itemprop="actors"|||
} +mdb_actor.scrub {multi(exclude="|
|} * full list +mdb_director.scrub {multi|p1|itemprop="director"|||
} +mdb_director.scrub {multi|p5|Directed by|" > ||} * fulllist +mdb_starrating.scrub {single()|p1|
|itemprop="ratingValue">||
} +mdb_starratingvotes.scrub {single|p1|
|based on|user ratings|
} +mdb_commentsummary.scrub {multi(max=5 exclude="This review may contain spoilers")|p4||

|

|Add another review} mdb_review.scrub {multi(exclude="SPOILERS ARE INCLUDED" include=first)|p4|
|

\n

|

\n\n|
|||} -mdb_description.scrub {single|p1|||} +* diff --git a/config/mdb/imdb.com.bing.ini b/config/mdb/imdb.com.bing.ini index d04b8f5..af16b93 100644 --- a/config/mdb/imdb.com.bing.ini +++ b/config/mdb/imdb.com.bing.ini @@ -1,57 +1,91 @@ -* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites -* Site : imdb.com, primary search with bing.com -* revision : 1 correction in productiondate -* Jan van Straaten, 14/04/2012 +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites +* @MinSWversion: V1.1.1/56.25 +* @Site: imdb.com, primary search with bing.com +* @Revision 10 - [22/05/2016] Jan van Straaten +* - added mdbinitype +* - fixed star-rating and original title +* @Revision 9 - [07/12/2015] Jan van Straaten +* - change element names, mdb_show_id and mdb_episode_id +* @Revision 8 - [25/09/2015] Jan van Straaten +* - added mdb_category +* @Revision 7 - [10/10/2014] Jan van Straaten +* - improved showid scub, also numbers upto 5000000 (was 2500000) +* @Revision 6 - [09/06/2014] Jan van Straaten +* - added url header +* @Revision 5 - [07/06/2014] Jan van Straaten/Jagad +* - added mdb_showicon +* @Revision 4 - [20/12/2013] Jan van Straaten +* - changes in (aka)titles +* @Revision 3 - [23/11/2013] Jan van Straaten +* - changes in actor and director due to site changes +* @Revision 2 - [11/08/2013] Jan van Straaten +* - small changes in title, actor and commentsummary due to imdb.com changes +* @Revision 1 - [14/04/2012] Jan van Straaten +* - correction in production date +* @Remarks: none +* @header_end +**------------------------------------------------------------------------------------------------ * -site {url=imdb.com|cultureinfo=en-GB|charset=UTF-8|matchfactor=60|searchsite=bing} +* +site {url=imdb.com|mdbinitype=movie|cultureinfo=en-GB|charset=UTF-8|matchfactor=60|searchsite=bing} * primary search: url_primarysearch {url|http://www.bing.com/search?q=|imdb+title/tt+|'title'|+|'productiondate'|+|'credit'|&scope=web&setmkt=en-US&qs=ns&form=QBRE&qb=2} *scope=web&setmkt=es-ES&setlang=match -show_id.scrub {multi(exclude="AND")|primary|imdb|/tt|/|onmousedown} +mdb_show_id.scrub {regex()|primary||title/tt(\d{7})/||} * * filter showid (7 char long): -show_id.modify {remove| } * remove spaces -mdb_temp_1.modify {calculate(type=element format=F0)|'show_id' #} * number of show_id's = loop index +mdb_show_id.modify {remove| } * remove spaces +mdb_temp_1.modify {calculate(type=element format=F0)|'mdb_show_id' #} * number of mdb_show_id's = loop index loop {('mdb_temp_1' > "0" max=50)|4} mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index -mdb_temp_2.modify {substring(type=element)|'show_id' 'mdb_temp_1' 1} * the showid to inspect -mdb_temp_3.modify {calculate(type=char format=F0)|'mdb_temp_2' #} * how many chars in this show_id? -show_id.modify {remove('mdb_temp_3' not "7" type=element)|'show_id' 'mdb_temp_1' 1} * remove this show_id if not 7 chars +mdb_temp_2.modify {substring(type=element)|'mdb_show_id' 'mdb_temp_1' 1} * the showid to inspect +mdb_temp_3.modify {calculate(type=char format=F0)|'mdb_temp_2' #} * how many chars in this mdb_show_id? +mdb_show_id.modify {remove('mdb_temp_3' not "7" type=element)|'mdb_show_id' 'mdb_temp_1' 1} * remove this mdb_show_id if not 7 chars * end loop -* filter showid (only numbers and < 2500000): -mdb_temp_1.modify {calculate(type=element format=F0)|'show_id' #} * number of show_id's = loop index +* filter showid (only numbers and < 5000000): +mdb_temp_1.modify {calculate(type=element format=F0)|'mdb_show_id' #} * number of mdb_show_id's = loop index loop {('mdb_temp_1' > "0" max=50)|5} mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index -mdb_temp_2.modify {substring(type=element)|'show_id' 'mdb_temp_1' 1} * the showid to inspect +mdb_temp_2.modify {substring(type=element)|'mdb_show_id' 'mdb_temp_1' 1} * the showid to inspect mdb_temp_3.modify {calculate(format=F0)|'mdb_temp_2'} * convert to number -show_id.modify {remove('mdb_temp_3' "0" type=element)|'show_id' 'mdb_temp_1' 1} * remove this show_id if not only numbers -show_id.modify {remove('mdb_temp_3' > "2500000" type=element)|'show_id' 'mdb_temp_1' 1} * remove this show_id if > 2500000 +mdb_show_id.modify {remove('mdb_temp_3' "0" type=element)|'mdb_show_id' 'mdb_temp_1' 1} * remove this mdb_show_id if not only numbers +mdb_show_id.modify {remove('mdb_temp_3' > "5000000" type=element)|'mdb_show_id' 'mdb_temp_1' 1} * remove this mdb_show_id if > 5000000 * end loop * * imdb url's: -url_mdb_p1 {url|primary|http://www.imdb.com/title/tt|show_id|/} +url_mdb_p1 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/} *url_mdb_p2.modify {addstart|'url_mdb_p1'plotsummary} *url_mdb_p3.modify {addstart|'url_mdb_p1'releaseinfo#akas} *url_mdb_p4.modify {addstart|'url_mdb_p1'reviews} *url_mdb_p5.modify {addstart|'url_mdb_p1'fullcredits#cast} * -url_mdb_p2 {url|primary|http://www.imdb.com/title/tt|show_id|/plotsummary} -url_mdb_p3 {url|primary|http://www.imdb.com/title/tt|show_id|/releaseinfo#akas} -url_mdb_p4 {url|primary|http://www.imdb.com/title/tt|show_id|/reviews} -url_mdb_p5 {url|primary|http://www.imdb.com/title/tt|show_id|/fullcredits#cast} +url_mdb_p2 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/plotsummary} +url_mdb_p3 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/releaseinfo#akas} +url_mdb_p4 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/reviews} +url_mdb_p5 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/fullcredits#cast} +* +url_mdb.headers {customheader=Accept-Encoding=gzip,deflate} * * imdb elements -mdb_title.scrub {single|p1||||} * original title when redirected +mdb_title.scrub {single(separator=":")|p1|
|}* original title when redirected +mdb_title.modify {cleanup(tags="/=\""} * removes starting " mdb_title.scrub {single(separator=" - " exclude="IMDb" include=first)|p1|||(|} -mdb_title.scrub {multi(separator=" - ")|p3|
Also Known As (AKA)
|\n||} *aka's +mdb_title.scrub {multi(exclude="(""title")|p3|\n |
|
} *aka's +* mdb_productiondate.scrub {single|p1||||} -mdb_actor.scrub {multi|p1|itemprop="actors"|>||
} -mdb_actor.scrub {multi(exclude="||} * full list -mdb_director.scrub {multi|p1|itemprop="director"|>||
} -mdb_director.scrub {multi|p5|Directed by|/">||} * fulllist -mdb_starrating.scrub {single|p1|Ratings:|itemprop="ratingValue">|
|from} -mdb_starratingvotes.scrub {single|p1|Ratings:|itemprop="ratingCount">|
|users} -mdb_commentsummary.scrub {multi(max=5 exclude="This review may contain spoilers")|p4||||Add another review} +** new: +mdb_category.scrub {regex()|p1||(.+?)||} +** +mdb_actor.scrub {multi(exclude="onclick=")|p1|itemprop="actors"|||} +mdb_actor.scrub {multi(exclude="|
|} * full list +mdb_director.scrub {multi|p1|itemprop="director"|||} +mdb_director.scrub {multi|p5|Directed by|" > ||} * fulllist +mdb_starrating.scrub {single()|p1|
|itemprop="ratingValue">||
} +mdb_starratingvotes.scrub {single|p1|
|based on|user ratings|
} +mdb_commentsummary.scrub {multi(max=5 exclude="This review may contain spoilers")|p4||

|

|Add another review} mdb_review.scrub {multi(exclude="SPOILERS ARE INCLUDED" include=first)|p4|
|

\n

|

\n\n|
|||} -mdb_description.scrub {single|p1|||} \ No newline at end of file diff --git a/config/mdb/imdb.com.bing_series.ini b/config/mdb/imdb.com.bing_series.ini new file mode 100644 index 0000000..5763de0 --- /dev/null +++ b/config/mdb/imdb.com.bing_series.ini @@ -0,0 +1,110 @@ +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites +* @MinSWversion: V1.1.1/56.25 +* - (postprocess V2.0) +* @Site: imdb.com +* @Revision 1 - [15/06/2016] Jan van Straaten +* - support for match on episode-num or sub-title, use of new scopes +* - added mdbinitype +* @Revision 0 - [13/02/2016] Jan van Straaten +* - Bing as primary search, based on imdb.com.imdb.ini rev 6 +* @Remarks: Series data extraction. English version. +* Bing improves matching especially for foreign language titles. +* @header_end +**------------------------------------------------------------------------------------------------ +* +site {url=imdb.com|mdbinitype=serie|cultureinfo=en-GB|charset=UTF-8|matchfactor=70|searchsite=imdb|episodesystem=onscreen}} +scope.range {(primarysearch)|end} +* primary search (using bing): +url_primarysearch {url()|https://www.bing.com/search?q=IMDb+|'title'|} +https://www.bing.com/search?q=IMDb+Unge+kommissarie+Morse + +url_primarysearch.modify {replace| |+} +*http://www.imdb.com/search/title?title=Touched%20by%20an%20Angel&title_type=tv_series +url_primarysearch.headers {customheader=Accept-Encoding=gzip,deflate} + +mdb_show_id.scrub {regex|primary||||} +* episode-num +mdb_episodenumlist.scrub {regex(pattern="'S1'.'E1'")|p1||(.+?)||} +* +mdb_title.scrub {single(separator="(" include=first exclude="")|p1||||} * the original title +mdb_title.scrub {single(separator="(" include=first)|p1||||} +mdb_title.modify {cleanup(tags="/=\"")} * removes starting " +mdb_title.modify {cleanup(tags="\"=/")} +* +** aka's not yet implemented if at all possible +*mdb_title.scrub {multi(separator=" - ")|p3|
Also Known As (AKA)
|\n||} *aka's +*mdb_title.scrub {multi|p3|
Also Known As (AKA)
|\n||} *aka's +* +mdb_temp_6.scrub {regex()|p1||\s+?(||} * all the episodes * +end_scope +* +scope.range {(getelements)|end} +* in case of matched subtitle +mdb_temp_1.modify {calculate('mdb_episodetitlelist' not "" type=element format=F0)|'mdb_episodetitlelist' 'mdb_subtitle' @} * index of the episode +* in case of matched episodenum +mdb_temp_1.modify {calculate('mdb_episodenumlist' not "" type=element format=F0)|'mdb_episodenumlist' 'mdb_episode' @} * index of the episode +mdb_temp_1.modify {substring(type=element)|'mdb_temp_6' 'mdb_temp_1' 1} * the episode in xml format +* +* elements from mdb_temp_1 (the episode) +** get the mdb_episode_id +mdb_episode_id.modify {substring(type=regex)|'mdb_temp_1' "(\d\{7\})/\">"} * get the tt nbr for the episode +* +* the following elements are taken from the episode detail page mdb-p2 +* there is a story line, director and actors , starrating, episodenum +* also a 'full synopsys' on a separate page mdb-p3 ?? +* +** full productiondate +mdb_productiondate.scrub {single()|p2|} +mdb_productiondate.modify {calculate(format=productiondate)} * only year allowed! +mdb_category.scrub {regex()|p2||(.+?)||} +mdb_actor.scrub {multi()|p4|?ref_=ttfc_fc_cl_t|itemprop="name">||} +mdb_director.scrub {multi|p4|?ref_=ttfc_fc_dr|" >||} +mdb_starrating.scrub {single()|p2|
|itemprop="ratingValue">||
} +mdb_starratingvotes.scrub {single|p2|
|based on|user ratings|
} +mdb_showicon.scrub {single|p2|Poster"|src="|"|"image" />} +mdb_commentsummary.scrub {multi(exclude="SPOILERS ARE INCLUDED""This review may contain spoilers""Add another review" include=first)|p6||

|

|Add another review} +mdb_review.scrub {multi(exclude="SPOILERS ARE INCLUDED""This review may contain spoilers""Add another review" include=first)|p6|
|

|

\n\n|Add another review} +mdb_plot.scrub {single(separator="Storyline|

|

|
} +mdb_description.scrub {regex|p2||||} +* +* subtitle when not already done with episodetitlelist +mdb_subtitle.modify {substring("" type=regex)|'mdb_temp_1' "(.+?)"} +* episode must be last because it is used to get mdb_temp_1 (the actual episode data from mdb_temp_6) +mdb_episode.modify {clear} +loop {('mdb_episode' "" max=1)|end} +mdb_episode.modify {substring(type=regex)|'mdb_temp_1' "(.+?)"} +mdb_temp_3.modify {substring(type=regex)|'mdb_episode' "\.(\d*)"} * episode part +mdb_episode.modify {substring(type=regex)|'mdb_episode' "(\d*)\."} * the season part +* onsceen format +mdb_episode.modify {addstart(not "")|S} +mdb_episode.modify {addend('mdb_temp_3' not "")|E'mdb_temp_3'} +* convert to xmltv_ns +*mdb_temp_3.modify {calculate(not "" format=F0)|1 -} +*mdb_episode.modify {substring(type=regex)|'mdb_episode' "(\d*)\."} * the season part +*mdb_episode.modify {calculate(not "" format=F0)|1 -} +*mdb_episode.modify {addend()|.'mdb_temp_3'.} +end_loop +end_scope \ No newline at end of file diff --git a/config/mdb/imdb.com.imdb.ini b/config/mdb/imdb.com.imdb.ini index 7b6279a..35e099c 100644 --- a/config/mdb/imdb.com.imdb.ini +++ b/config/mdb/imdb.com.imdb.ini @@ -1,58 +1,103 @@ -* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites -* Site : imdb.com, primary search with imdb -* revision : 0 -* Jan van Straaten, 14/05/2012 +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites +* @MinSWversion: V1.1.1/56.25 +* @Site: imdb.com, primary search with imdb.com +* @Revision 10 - [22/05/2016] Jan van Straaten +* - added mdbinitype +* - fixed star-rating and original title +* @Revision 9 - [07/12/2015] Jan van Straaten +* - change element names, mdb_show_id and mdb_episode_id +* @Revision 8 - [25/09/2015] Jan van Straaten +* - added mdb_category +* @Revision 7 - [10/10/2014] Jan van Straaten +* - improved showid scub, also numbers upto 5000000 (was 2500000) +* @Revision 6 - [09/06/2014] Jan van Straaten +* - added url header +* @Revision 5 - [07/06/2014] Jan van Straaten/Jagad +* - added mdb_showicon +* @Revision 4 - [20/12/2013] Jan van Straaten +* - changes in (aka)titles +* @Revision 3 - [23/11/2013] Jan van Straaten +* - changes in actor and director due to site changes +* @Revision 2 - [11/08/2013] Jan van Straaten +* - small changes in title and commentsummary due to site changes +* @Revision 1 - [16/02/2013] Jan van Straaten +* - url primary search connects to advanced search, +* this is less effective than the normal search from rev 0 +* but that became useless because imdb.com changed the result +* of that (too many hits) +* - small changes in actor due to imdb.com changes +* @Revision 0 - [14/05/2012] Jan van Straaten +* - creation +* @Remarks: It is advised to use this ini as the 'second chance' ini, +* see mdb.config +* @header_end +**------------------------------------------------------------------------------------------------ * -site {url=imdb.com|cultureinfo=en-GB|charset=UTF-8|matchfactor=60|searchsite=imdb} +* +site {url=imdb.com|mdbinitype=movie|cultureinfo=en-GB|charset=UTF-8|matchfactor=60|searchsite=imdb} * primary search: -url_primarysearch {url(urlencode=1,2)|http://www.imdb.com/find?q=|'title'| + |%28|'productiondate'|%29+&s=tt} +*url_primarysearch {url(urlencode=1,2)|http://www.imdb.com/find?q=|'title'| + |%28|'productiondate'|%29+&s=tt} *http://www.imdb.com/find?q=You+Only+Live+Twice+%2B+%281967%29&s=tt -show_id.scrub {multi|primary|||} * * filter showid (7 char long): -show_id.modify {remove| } * remove spaces -mdb_temp_1.modify {calculate(type=element format=F0)|'show_id' #} * number of show_id's = loop index +mdb_show_id.modify {remove| } * remove spaces +mdb_temp_1.modify {calculate(type=element format=F0)|'mdb_show_id' #} * number of mdb_show_id's = loop index loop {('mdb_temp_1' > "0" max=50)|4} mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index -mdb_temp_2.modify {substring(type=element)|'show_id' 'mdb_temp_1' 1} * the showid to inspect -mdb_temp_3.modify {calculate(type=char format=F0)|'mdb_temp_2' #} * how many chars in this show_id? -show_id.modify {remove('mdb_temp_3' not "7" type=element)|'show_id' 'mdb_temp_1' 1} * remove this show_id if not 7 chars +mdb_temp_2.modify {substring(type=element)|'mdb_show_id' 'mdb_temp_1' 1} * the showid to inspect +mdb_temp_3.modify {calculate(type=char format=F0)|'mdb_temp_2' #} * how many chars in this mdb_show_id? +mdb_show_id.modify {remove('mdb_temp_3' not "7" type=element)|'mdb_show_id' 'mdb_temp_1' 1} * remove this mdb_show_id if not 7 chars * end loop -* filter showid (only numbers and < 2500000): -mdb_temp_1.modify {calculate(type=element format=F0)|'show_id' #} * number of show_id's = loop index +* filter showid (only numbers and < 5000000): +mdb_temp_1.modify {calculate(type=element format=F0)|'mdb_show_id' #} * number of mdb_show_id's = loop index loop {('mdb_temp_1' > "0" max=50)|5} mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index -mdb_temp_2.modify {substring(type=element)|'show_id' 'mdb_temp_1' 1} * the showid to inspect +mdb_temp_2.modify {substring(type=element)|'mdb_show_id' 'mdb_temp_1' 1} * the showid to inspect mdb_temp_3.modify {calculate(format=F0)|'mdb_temp_2'} * convert to number -show_id.modify {remove('mdb_temp_3' "0" type=element)|'show_id' 'mdb_temp_1' 1} * remove this show_id if not only numbers -show_id.modify {remove('mdb_temp_3' > "2500000" type=element)|'show_id' 'mdb_temp_1' 1} * remove this show_id if > 2500000 +mdb_show_id.modify {remove('mdb_temp_3' "0" type=element)|'mdb_show_id' 'mdb_temp_1' 1} * remove this mdb_show_id if not only numbers +mdb_show_id.modify {remove('mdb_temp_3' > "5000000" type=element)|'mdb_show_id' 'mdb_temp_1' 1} * remove this mdb_show_id if > 5000000 * end loop * * imdb url's: -url_mdb_p1 {url|primary|http://www.imdb.com/title/tt|show_id|/} +url_mdb_p1 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/} *url_mdb_p2.modify {addstart|'url_mdb_p1'plotsummary} *url_mdb_p3.modify {addstart|'url_mdb_p1'releaseinfo#akas} *url_mdb_p4.modify {addstart|'url_mdb_p1'reviews} *url_mdb_p5.modify {addstart|'url_mdb_p1'fullcredits#cast} * -url_mdb_p2 {url|primary|http://www.imdb.com/title/tt|show_id|/plotsummary} -url_mdb_p3 {url|primary|http://www.imdb.com/title/tt|show_id|/releaseinfo#akas} -url_mdb_p4 {url|primary|http://www.imdb.com/title/tt|show_id|/reviews} -url_mdb_p5 {url|primary|http://www.imdb.com/title/tt|show_id|/fullcredits#cast} +url_mdb_p2 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/plotsummary} +url_mdb_p3 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/releaseinfo#akas} +url_mdb_p4 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/reviews} +url_mdb_p5 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/fullcredits#cast} +* +url_mdb.headers {customheader=Accept-Encoding=gzip,deflate} * * imdb elements -mdb_title.scrub {single|p1||||} * original title when redirected +mdb_title.scrub {single(separator=":")|p1|
|}* original title when redirected +mdb_title.modify {cleanup(tags="/=\""} * removes starting " mdb_title.scrub {single(separator=" - " exclude="IMDb" include=first)|p1|||(|} -*mdb_title.scrub {multi(separator=" - ")|p3|
Also Known As (AKA)
|\n||} *aka's -mdb_title.scrub {multi|p3|
Also Known As (AKA)
|\n||} *aka's +mdb_title.scrub {multi(exclude="(""title")|p3|\n |
|
} *aka's +* mdb_productiondate.scrub {single|p1||||} -mdb_actor.scrub {multi|p1|itemprop="actors"|>||
} -mdb_actor.scrub {multi(exclude="||} * full list -mdb_director.scrub {multi|p1|itemprop="director"|>||} -mdb_director.scrub {multi|p5|Directed by|/">||} * fulllist -mdb_starrating.scrub {single|p1|Ratings:|itemprop="ratingValue">||from} -mdb_starratingvotes.scrub {single|p1|Ratings:|itemprop="ratingCount">||users} -mdb_commentsummary.scrub {multi(max=5 exclude="This review may contain spoilers")|p4||||Add another review} +** new: +mdb_category.scrub {regex()|p1||(.+?)||} +** +mdb_actor.scrub {multi(exclude="onclick=")|p1|itemprop="actors"|||} +mdb_actor.scrub {multi(exclude="||} * full list +mdb_director.scrub {multi|p1|itemprop="director"|||} +mdb_director.scrub {multi|p5|Directed by|" > ||} * fulllist +mdb_starrating.scrub {single()|p1|
|itemprop="ratingValue">||
} +mdb_starratingvotes.scrub {single|p1|
|based on|user ratings|
} +mdb_commentsummary.scrub {multi(max=5 exclude="This review may contain spoilers")|p4||

|

|Add another review} mdb_review.scrub {multi(exclude="SPOILERS ARE INCLUDED" include=first)|p4|
|

\n

|

\n\n|
|||} -mdb_description.scrub {single|p1|||} \ No newline at end of file diff --git a/config/mdb/imdb.com.imdb_series.ini b/config/mdb/imdb.com.imdb_series.ini new file mode 100644 index 0000000..3c98d72 --- /dev/null +++ b/config/mdb/imdb.com.imdb_series.ini @@ -0,0 +1,118 @@ +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites +* @MinSWversion: V1.1.1/56.25 +* - (postprocess V2.0) +* @Site: imdb.com +* @Revision 7 - [25/05/2016] Jan van Straaten +* - support for match on episode-num or sub-title, use of new scopes +* - added mdbinitype +* @Revision 6 - [12/02/2016] Jan van Straaten +* - fix of the episode and productiondate +* @Revision 5 - [07/12/2015] Jan van Straaten +* - change element names, mdb_show_id and mdb_episode_id +* @Revision 4 - [30/09/2015] Jan van Straaten +* - added mdb-category +* @Revision 3 - [11/08/2014] Jan van Straaten +* - improved mdb_episode_id selection +* @Revision 2 - [09/06/2014] Jan van Straaten +* - added url header +* @Revision 1 - [24/11/2013] Jan van Straaten +* - version check enabled +* @Revision 0 - [09/11/2013] Jan van Straaten +* - creation +* @Remarks: Series data extraction. English version +* @header_end +**------------------------------------------------------------------------------------------------ +* +* +site {url=imdb.com|mdbinitype=serie|cultureinfo=en-GB|charset=UTF-8|matchfactor=70|searchsite=imdb|episodesystem=xmltv_ns} +scope.range {(primarysearch)|end} +* primary search (using imdb's advanced search): +url_primarysearch {url()|http://www.imdb.com/search/title?&title=|'title'|&title_type=tv_series} +url_primarysearch.modify {replace| |%20} +*http://www.imdb.com/search/title?title=Touched%20by%20an%20Angel&title_type=tv_series +url_primarysearch.headers {customheader=Accept-Encoding=gzip,deflate} +mdb_show_id.scrub {multi|primary||} +* +* imdb url's: +url_mdb_p1.modify {addstart()|http://www.imdb.com/title/tt'mdb_show_id'/epdate} * all the episodes date sorted with episode title and mdb_episode_id +url_mdb_p2.modify {addstart()|http://www.imdb.com/title/tt'mdb_episode_id'} * the episode detail page +* or http://www.imdb.com/title/tt0553267/?ref_=tt_ep_pr = same +url_mdb_p3.modify {addstart|http://www.imdb.com/title/tt'mdb_episode_id'/synopsis?ref_=tt_stry_pl} * the full synopsis +* is same as * http://www.imdb.com/title/tt2288518/synopsis full synopsis +url_mdb_p4.modify {addstart|http://www.imdb.com/title/tt'mdb_episode_id'/fullcredits?ref_=tt_ql_1} *full cast and crew (director, writer, actor) +* http://www.imdb.com/title/tt2288518/fullcredits?ref_=tt_ql_1 +url_mdb_p5.modify {addstart|http://www.imdb.com/title/tt'mdb_episode_id'/plotsummary?ref_=tt_ql_5} *plot summary (not used) +url_mdb_p6.modify {addstart|http://www.imdb.com/title/tt'mdb_episode_id'/reviews?ref_=tt_ql_7} *user reviews +* +url_mdb.headers {customheader=Accept-Encoding=gzip,deflate} +end_scope +* +scope.range {(match)|end} +* imdb elements +* possible mustmatch elements +* episodetitle (sub-title) +mdb_episodetitlelist.scrub {multi()|p1|||} +* episode-num +mdb_episodenumlist.scrub {regex(pattern="'S1'.'E1'")|p1||(.+?)||} +* +mdb_title.scrub {single(separator="(" include=first exclude="")|p1||||} * the original title +mdb_title.scrub {single(separator="(" include=first)|p1||||} +mdb_title.modify {cleanup(tags="/=\"")} * removes starting " +mdb_title.modify {cleanup(tags="\"=/")} +* +** aka's not yet implemented if at all possible +*mdb_title.scrub {multi(separator=" - ")|p3|
Also Known As (AKA)
|\n||} *aka's +*mdb_title.scrub {multi|p3|
Also Known As (AKA)
|\n||} *aka's +* +mdb_temp_6.scrub {regex()|p1||\s+?(||} * all the episodes * +end_scope +* +scope.range {(getelements)|end} +* in case of matched subtitle +mdb_temp_1.modify {calculate('mdb_episodetitlelist' not "" type=element format=F0)|'mdb_episodetitlelist' 'mdb_subtitle' @} * index of the episode +* in case of matched episodenum +mdb_temp_1.modify {calculate('mdb_episodenumlist' not "" type=element format=F0)|'mdb_episodenumlist' 'mdb_episode' @} * index of the episode +mdb_temp_1.modify {substring(type=element)|'mdb_temp_6' 'mdb_temp_1' 1} * the episode in xml format +* +* elements from mdb_temp_1 (the episode) +** get the mdb_episode_id +mdb_episode_id.modify {substring(type=regex)|'mdb_temp_1' "(\d\{7\})/\">"} * get the tt nbr for the episode +* +* the following elements are taken from the episode detail page mdb-p2 +* there is a story line, director and actors , starrating, episodenum +* also a 'full synopsys' on a separate page mdb-p3 ?? +* +** full productiondate +mdb_productiondate.scrub {single()|p2|} +mdb_productiondate.modify {calculate(format=productiondate)} * only year allowed! +mdb_category.scrub {regex()|p2||(.+?)||} +mdb_actor.scrub {multi()|p4|?ref_=ttfc_fc_cl_t|itemprop="name">||} +mdb_director.scrub {multi|p4|?ref_=ttfc_fc_dr|" >||} +mdb_starrating.scrub {single()|p2|
|itemprop="ratingValue">||
} +mdb_starratingvotes.scrub {single|p2|
|based on|user ratings|
} +mdb_showicon.scrub {single|p2|Poster"|src="|"|"image" />} +mdb_commentsummary.scrub {multi(exclude="SPOILERS ARE INCLUDED""This review may contain spoilers""Add another review" include=first)|p6||

|

|Add another review} +mdb_review.scrub {multi(exclude="SPOILERS ARE INCLUDED""This review may contain spoilers""Add another review" include=first)|p6|
|

|

\n\n|Add another review} +mdb_plot.scrub {single(separator="Storyline|

|

|
} +mdb_description.scrub {regex|p2||||} +* +* subtitle when not already done with episodetitlelist +mdb_subtitle.modify {substring("" type=regex)|'mdb_temp_1' "(.+?)"} +* episode must be last because it is used to get mdb_temp_1 (the actual episode data from mdb_temp_6) +mdb_episode.modify {clear} +loop {('mdb_episode' "" max=1)|end} +mdb_episode.modify {substring(type=regex)|'mdb_temp_1' "(.+?)"} +mdb_temp_3.modify {substring(type=regex)|'mdb_episode' "\.(\d*)"} * episode part +mdb_episode.modify {substring(type=regex)|'mdb_episode' "(\d*)\."} * the season part +* onsceen format +mdb_episode.modify {addstart(not "")|S} +mdb_episode.modify {addend('mdb_temp_3' not "")|E'mdb_temp_3'} +* convert to xmltv_ns +*mdb_temp_3.modify {calculate(not "" format=F0)|1 -} +*mdb_episode.modify {substring(type=regex)|'mdb_episode' "(\d*)\."} * the season part +*mdb_episode.modify {calculate(not "" format=F0)|1 -} +*mdb_episode.modify {addend()|.'mdb_temp_3'.} +end_loop +end_scope \ No newline at end of file diff --git a/config/mdb/imdb.com.imdb_series.onscreen.ini b/config/mdb/imdb.com.imdb_series.onscreen.ini new file mode 100644 index 0000000..2b94646 --- /dev/null +++ b/config/mdb/imdb.com.imdb_series.onscreen.ini @@ -0,0 +1,95 @@ +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites +* @MinSWversion: V1.1.1/56.12 +* - (postprocess V1.11) +* @Site: imdb.com +* @Revision 4 - [07/12/2015] Jan van Straaten +* - change element names, mdb_show_id and mdb_episode_id +* @Revision 3 - [30/09/2015] Jan van Straaten +* - added mdb-category +* @Revision 2 - [11/08/2014] Jan van Straaten +* - improved mdb_episode_id selection +* @Revision 1 - [09/06/2014] Jan van Straaten +* - added url header +* @Revision 0 - [07/04/2014] Jan van Straaten +* - creation +* @Remarks: Series data extraction. English version +* - variant of imdb.com.imdb_series.ini rev 1 , episode in onscreen s2e4 format +* @header_end +**------------------------------------------------------------------------------------------------ +* +* +site {url=imdb.com|cultureinfo=en-GB|charset=UTF-8|matchfactor=60|searchsite=imdb|episodesystem=onscreen} +* primary search (using imdb's advanced search): +url_primarysearch {url()|http://www.imdb.com/search/title?&title=|'title'|&title_type=tv_series} +url_primarysearch.modify {replace| |%20} +*http://www.imdb.com/search/title?title=Touched%20by%20an%20Angel&title_type=tv_series +url_primarysearch.headers {customheader=Accept-Encoding=gzip,deflate} +mdb_show_id.scrub {multi|primary||} +* +* imdb url's: +url_mdb_p1.modify {addstart|http://www.imdb.com/title/tt'mdb_show_id'/epdate} * all the episodes date sorted with episode title and mdb_episode_id +url_mdb_p2.modify {addstart|http://www.imdb.com/title/tt'mdb_episode_id'} * the episode detail page +* or http://www.imdb.com/title/tt0553267/?ref_=tt_ep_pr = same +url_mdb_p3.modify {addstart|http://www.imdb.com/title/tt'mdb_episode_id'/synopsis?ref_=tt_stry_pl} * the full synopsis +* is same as * http://www.imdb.com/title/tt2288518/synopsis full synopsis +url_mdb_p4.modify {addstart|http://www.imdb.com/title/tt'mdb_episode_id'/fullcredits?ref_=tt_ql_1} *full cast and crew (director, writer, actor) +* http://www.imdb.com/title/tt2288518/fullcredits?ref_=tt_ql_1 +url_mdb_p5.modify {addstart|http://www.imdb.com/title/tt'mdb_episode_id'/plotsummary?ref_=tt_ql_5} *plot summary (not used) +url_mdb_p6.modify {addstart|http://www.imdb.com/title/tt'mdb_episode_id'/reviews?ref_=tt_ql_7} *user reviews +* +url_mdb.headers {customheader=Accept-Encoding=gzip,deflate} +* +* imdb elements +mdb_subtitle.scrub {multi|p1|||} +mdb_title.scrub {single(separator="(" include=first exclude="")|p1||||} * the original title +mdb_title.scrub {single(separator="(" include=first)|p1||||} +mdb_title.modify {cleanup(tags="/=\"")} * removes starting " +mdb_title.modify {cleanup(tags="\"=/")} +* +** aka's not yet implemented if at all possible +*mdb_title.scrub {multi(separator=" - ")|p3|
Also Known As (AKA)
|\n||} *aka's +*mdb_title.scrub {multi|p3|
Also Known As (AKA)
|\n||} *aka's +* +** get the mdb_episode_id +** this is the procedure to follow: from an index page with all episodes and episode titles on it, split it in individual episodes +mdb_temp_1.scrub {multi|p1|

Episodes Rated by Date

|} * all the episodes +mdb_temp_1.modify {select|">'mdb_subtitle'<" ~} * select the one and only with the episode title +mdb_episode_id.modify {substring(type=regex)|'mdb_temp_1' "(\d\{7\})/\">"} * get the tt nbr for the episode +* +* the following elements are taken from the episode detail page mdb-p2 +* there is a story line, director and actors , starrating, episodenum +* also a 'full synopsys' on a separate page mdb-p3 +** productiodate as year +*mdb_productiondate.scrub {single|p2||||} +*mdb_productiondate.modify {calculate(format=productiondate)} +** full productiondate +mdb_productiondate.scrub {single|p2|

|(|)|

} +mdb_productiondate.modify {calculate(format=productiondate)} * only year allowed! +** new: +mdb_category.scrub {regex()|p2||(.+?)
||} +** +mdb_temp_2.scrub {single(include="Season""Episode")|p2|

|||

} * episode +mdb_actor.scrub {multi|p4|?ref_=ttfc_fc_cl_t|itemprop="name">||} +mdb_director.scrub {multi|p4|?ref_=ttfc_fc_dr|" >||} +mdb_starrating.scrub {single|p2|Ratings:|itemprop="ratingValue">||} +mdb_starratingvotes.scrub {single|p2|Ratings:|itemprop="ratingCount">||users} +mdb_showicon.scrub {single|p2|Poster"|src="|"|"image" />} +mdb_commentsummary.scrub {multi(exclude="SPOILERS ARE INCLUDED""This review may contain spoilers""Add another review" include=first)|p6||

|

|Add another review} +mdb_review.scrub {multi(exclude="SPOILERS ARE INCLUDED""This review may contain spoilers""Add another review" include=first)|p6|
|

|

\n\n|Add another review} +mdb_plot.scrub {single(separator="Storyline|

|

|} +mdb_description.scrub {single|p3|
||
|} +mdb_description.modify {replace|

| } +* +* standard 'onscreen' episode +mdb_episode.modify {addstart('mdb_temp_2' not "")|'mdb_temp_2'} +mdb_episode.modify {replace|Season |s} +mdb_episode.modify {replace|Episode |e} +mdb_episode.modify {remove|, } +* convert episode to xmltv_ns: +*mdb_episode.modify {substring(type=regex)|'mdb_temp_2' "Season.(\d+)"} +*mdb_episode.modify {calculate(> "0" format=F0)|1 -} +*mdb_temp_2.modify {substring(type=regex)|"Episode.(\d+)"} +*mdb_temp_2.modify {calculate(> "0" format=F0)|1 -} +*mdb_episode.modify {addend|.'mdb_temp_2'.} \ No newline at end of file diff --git a/config/mdb/tvdb.com.bing.ini b/config/mdb/tvdb.com.bing.ini new file mode 100644 index 0000000..235806c --- /dev/null +++ b/config/mdb/tvdb.com.bing.ini @@ -0,0 +1,136 @@ +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing MDB data from TvGuide websites +* @MinSWversion : V1.1.1/56.25 +* - (postprocess V2.0) +* @Site: thetvdb.com +* @Revision 2 - [19/06/2016] Jan van Straaten +* - improved mdb_show_id +* @Revision 1 - [22/05/2016] Jan van Straaten +* - support for match on episode-num or sub-title, use of new scopes +* - added mdbiniype +* @Revision 0 - [02/12/2015] Jan van Straaten +* - creation +* @Remarks: - series database , primarysearch with bing +* @header_end +**------------------------------------------------------------------------------------------------ +* +* +site {url=thetvdb.com|mdbinitype=serie|cultureinfo=en-US|charset=utf-8,iso-8859-2|matchfactor=70|searchsite=bing} +*site {episodesystem=xmltv_ns} +* primary search: +url_primarysearch {url()|http://www.bing.com/search?q=thetvdb+|'title'|} +url_primarysearch.headers {customheader=Accept-Encoding=gzip,deflate} +*http://www.bing.com/search?q=thetvdb+Malcolm+in+the+Middle +scope.range {(primarysearch)|end} +url_primarysearch.modify {replace()| |+} * if title has spaces +*
+* followed by all the known episodes in a element +* following the mdbconfig mustmatch="title,subtitle" the program looks for a match of the title (in the element) +* and a match of the subtitle in one of the elements +* +scope.range {(match)|end} +* the subtitle is the episode title. The next scrub first results in all the subtitles for this series +* but the matching routine will automatically replace it by the one that matches (highest matchfactor ) +*mdb_subtitle.scrub {multi|p1||||} * old style: use episodetitlelist instead: +mdb_episodetitlelist.scrub {multi|p1||||} +***************************** +* the episodenum can be used as alternative for episode matching if subtitle is not available +* it will be matched following the same procedure as for subtitle matching it with the xmltv episode-num +mdb_episodenumlist.scrub {regex(pattern="'E1''S1'")|p1||(.+?)||} +mdb_episodenumlist.modify {remove(type=regex)|'mdb_episodenumlist' "(.+?)"} +mdb_episodenumlist.modify {replace|| } +************************ +* get the matching element (with most of the episode data) +* and select the matching one +* mdb_temp_6 contains all the episodes +mdb_temp_6.scrub {multi|p1||||} * all episodes +*mdb_temp_6.modify {replace()|\||\n\n\|} * testing only +end_scope +scope.range {(getelements)|end} +*************** + +* in case of matched subtitle +mdb_temp_1.modify {calculate('mdb_episodetitlelist' not "" type=element format=F0)|'mdb_episodetitlelist' 'mdb_subtitle' @} * index of the episode +* in case of matched episodenum +mdb_temp_1.modify {calculate('mdb_episodenumlist' not "" type=element format=F0)|'mdb_episodenumlist' 'mdb_episode' @} * index of the episode +mdb_temp_1.modify {substring(type=element)|'mdb_temp_6' 'mdb_temp_1' 1} * the episode in xml format +* from here mdb_temp_1 holds the with all the data +* episode_id +mdb_episode_id.modify {substring('mdb_temp_1' not "" type=regex)|'mdb_temp_1' "(\d+?)"} +***************** +* mdb elements: +* in the top element (from p1): +mdb_title.scrub {single()|p1||||} +mdb_actor.scrub {regex()|p1||(.*?)||} +mdb_category.scrub {single|p1||||} +mdb_category.modify {replace|!?!?!|\|} +* the rest of the elements (from temp_1) + + +* +* subtitle, if not already there +mdb_subtitle.modify {substring("" type=regex)|'mdb_temp_1' "(.*?)"} +* description +mdb_description.modify {substring(type=regex)|'mdb_temp_1' "(.*?)"} +* starrating +mdb_starrating.modify {substring(type=regex)|'mdb_temp_1' "(.*?)"} +* director +mdb_director.modify {substring(type=regex)|'mdb_temp_1' "(.*?)"} +mdb_director.modify {replace|!?!?!|\|} +* actor, if not already from the top elements +mdb_actor.modify {substring("" type=regex)|'mdb_temp_1' "(.*?)"} +mdb_actor.modify {replace|!?!?!|\|} +* writer +*mdb_writer.modify {substring(type=regex)|'mdb_temp_1' "(.*?)"} +mdb_showicon.modify {substring(type=regex)|'mdb_temp_1' "(.*?)"} +mdb_showicon.modify {addstart(not "")|http://www.thetvdb.com/banners/} +* productiondate +mdb_productiondate.modify {substring(type=regex)|'mdb_temp_1' "(\d{4}.*?)"} +* +* episodenum if not already done in the episodenumlist +* this part as the last one because episode is used to get mdb_temp_1 and may not be changed +* during the part above +* at this spot it can be changed from the internal 'onscreen' Sn En format to xmltv_ns or whatever else. +7 + 3 +mdb_episode.modify {clear} +loop {('mdb_episode' "" max=1)|end} +mdb_episode.modify {substring("" type=regex)|'mdb_temp_1' "(\d*?)"} +mdb_temp_2.modify {substring(type=regex)|'mdb_temp_1' "(\d*?)(?:\.\d\|)"} +* onscreen +mdb_episode.modify {addstart(not "")|S} +mdb_episode.modify {addend('mdb_temp_2' not "")| E'mdb_temp_2'} *format Sx Ey +* xmltv_ns conversie +*mdb_temp_2.modify {calculate(not "" format=F0)|1 -} * episode is 1 based in tvdb! +*mdb_episode.modify {calculate(not "" format=F0)|1 -} * season is 1 based +*mdb_episode.modify {addend()|.'mdb_temp_2'.} * in xmltv_ns +end_loop +end_scope diff --git a/config/mdb/tvdb.com.bing_change_language.txt b/config/mdb/tvdb.com.bing_change_language.txt new file mode 100644 index 0000000..e93abf2 --- /dev/null +++ b/config/mdb/tvdb.com.bing_change_language.txt @@ -0,0 +1,45 @@ + +Language +Language customization of tvdb.com.bing.ini + +This tvdb ini is a variant of tvdb.com.ini. It uses Bing as primary search engine which improves the matching result for series with very simple titles , like 'friends' or 'cops' +It also allows the language preference of the matching in thetvdb. It will then look for series data for that prefered language first . Also it will automatically revert to English if nothing is available for that language. (Which happens a lot!!) +To set the prefered language, do the following: + +1. Open for editing tvdb.com.bing.ini . +2. Have a look at the table below, it gives the language codes of the available languages in thetvdb. +Remember the tow letter code for your language. e.g sv for Swedisch +The language number is not relevant here. +3. Locate this line: +mdb_temp_4.modify {addend|'mdb_temp_1'/all/en####} * change this line to set your prefered language!! +And change en#### into sv#### for Swedisch +4. Save tvdb.com.bing.ini , obtionally with another name , like tvdb.com.bing_sv.ini to distinguis it from the original. +In that case also change the filename in mdb.config + +That's all!! + +thetvdb available languages and their codes: + +English 7 en +Svenska 8 sv +Norsk 9 no +Dansk 10 da +Suomeksi 11 fi +Nederlands 13 nl +Deutsch 14 de +Italiano 15 it +Español 16 es +Français 17 fr +Polski 18 pl +Magyar 19 hu +Greek 20 el +Turkish 21 tr +Russian 22 ru +Hebrew 24 he +Japanese 25 ja +Portuguese 26 pt +Chinese 27 zh +Czech 28 cs +Slovenian 30 sl +Croatian 31 hr +Korean 32 ko