From 4ba3dc98dd3998ceb21ccb1190f698a455c3b21e Mon Sep 17 00:00:00 2001
From: jc | |} * original title when redirected
+mdb_title.scrub {single()|p1||" />} * normal title
+mdb_title.scrub {multi()|p1|Ce film dans d'autres pays|||} * aka's
+mdb_title.modify {cleanup()}
+
+mdb_productiondate.scrub {single()|p1|| |} * original title when redirected
+
+mdb_actor.scrub {multi()|p2|itemprop="actors"|||}
+
+mdb_director.scrub {multi()|p2|itemprop="director"|||}
+
+mdb_starrating.scrub {single()|p1||\|}
+mdb_starrating.modify {substring(type=element)|-1 1} * get the last part (should be the rating)
+
+mdb_starratingvotes.scrub {single()|p1||
|
|} * comments spectateur +mdb_commentsummary.modify {cleanup(tags="<"">")} + +mdb_review.scrub {multi()|p3||
|} * comments presse +mdb_review.modify {cleanup(tags="<"">")} + +* remove emty comments = containing "Voir la critique sur" +******mdb_temp_6.modify {addstart()|'mdb_review'} +******mdb_temp_1.modify {calculate(debug type=element format=F0)|'mdb_review' #} * number of comments +*******mdb_review.modify {clear} +*******loop {('mdb_temp_1' > "0" max=50)|end} +******mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index +******mdb_temp_2.modify {substring(debug type=element)|'mdb_temp_6' 'mdb_temp_1' 1} +******mdb_temp_3.modify {addstart(debug 'mdb_temp_2' not~ "Voir la critique sur")|####'mdb_temp_2'} * only add non-empty comments +*******end_loop +****** +******mdb_review.modify {addstart(debug)|'mdb_temp_3'} * make multi +******mdb_review.modify {replace(debug)|####|\|} * make multi diff --git a/config/mdb/allocine.fr.google.ini b/config/mdb/allocine.fr.google.ini new file mode 100644 index 0000000..a562b43 --- /dev/null +++ b/config/mdb/allocine.fr.google.ini @@ -0,0 +1,64 @@ +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites +* @MinSWversion : +* @Site: allocine.fr +* @Revision 1 - [04/06/2013] Francis De Paemeleere +* - add correct title scrub +* - max=5 for mdb_commentsummary +* @Revision 0 - [03/06/2013] Francis De Paemeleere +* - creation +* @Remarks: primary search with google.com +* @header_end +**------------------------------------------------------------------------------------------------ +* +site {url=allocine.fr|cultureinfo=fr-FR|charset=UTF-8|matchfactor=60|searchsite=google} +* primary search: +url_primarysearch {url(urlencode=1,2,3,4)|http://www.google.com/search?num=10&q=|site:allocine.fr "|'title'|" "|'productiondate'|"} +show_id.scrub {multi()|primary|allocine.fr/film/fichefilm_gen_cfilm=||.html|.html} +* +* imdb url's: +url_mdb_p1 {url|primary|http://allocine.fr/film/fichefilm_gen_cfilm=|show_id|.html} +url_mdb_p2 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/casting} +url_mdb_p3 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/critiques/presse/} +url_mdb_p4 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/critiques/spectateurs/membres-critiques/} +* +* imdb elements +mdb_title.scrub {single()|p1||
|} * comments spectateur +mdb_commentsummary.modify {cleanup(tags="<"">")} + +mdb_review.scrub {multi()|p3||
|} * comments presse +mdb_review.modify {cleanup(tags="<"">")} + +* remove emty comments = containing "Voir la critique sur" +******mdb_temp_6.modify {addstart()|'mdb_review'} +******mdb_temp_1.modify {calculate(debug type=element format=F0)|'mdb_review' #} * number of comments +*******mdb_review.modify {clear} +*******loop {('mdb_temp_1' > "0" max=50)|end} +******mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index +******mdb_temp_2.modify {substring(debug type=element)|'mdb_temp_6' 'mdb_temp_1' 1} +******mdb_temp_3.modify {addstart(debug 'mdb_temp_2' not~ "Voir la critique sur")|####'mdb_temp_2'} * only add non-empty comments +*******end_loop +****** +******mdb_review.modify {addstart(debug)|'mdb_temp_3'} * make multi +******mdb_review.modify {replace(debug)|####|\|} * make multi diff --git a/config/mdb/allocine.fr.ini b/config/mdb/allocine.fr.ini new file mode 100644 index 0000000..93da7f0 --- /dev/null +++ b/config/mdb/allocine.fr.ini @@ -0,0 +1,61 @@ +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites +* @MinSWversion : +* @Site: allocine.fr +* @Revision 0 - [04/06/2013] Francis De Paemeleere +* - creation (adjust from allocine.fr.google.ini) +* @Remarks: primary search with allocine.fr self +* @header_end +**------------------------------------------------------------------------------------------------ +* +site {url=allocine.fr|cultureinfo=fr-FR|charset=UTF-8|matchfactor=60|searchsite=allocine} +* primary search: +url_primarysearch {url(urlencode=1,2,3,4)|http://www.allocine.fr/recherche/?q=|'title'|} +show_id.scrub {multi()|primary|/film/fichefilm_gen_cfilm=||.html|.html} +* +* imdb url's: +url_mdb_p1 {url|primary|http://allocine.fr/film/fichefilm_gen_cfilm=|show_id|.html} +url_mdb_p2 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/casting} +url_mdb_p3 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/critiques/presse/} +url_mdb_p4 {url|primary|http://www.allocine.fr/film/fichefilm-|show_id|/critiques/spectateurs/membres-critiques/} +* +* imdb elements +mdb_title.scrub {single()|p1||
|} * comments spectateur +mdb_commentsummary.modify {cleanup(tags="<"">")} + +mdb_review.scrub {multi()|p3||
|} * comments presse +mdb_review.modify {cleanup(tags="<"">")} + +* remove emty comments = containing "Voir la critique sur" +******mdb_temp_6.modify {addstart()|'mdb_review'} +******mdb_temp_1.modify {calculate(debug type=element format=F0)|'mdb_review' #} * number of comments +*******mdb_review.modify {clear} +*******loop {('mdb_temp_1' > "0" max=50)|end} +******mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index +******mdb_temp_2.modify {substring(debug type=element)|'mdb_temp_6' 'mdb_temp_1' 1} +******mdb_temp_3.modify {addstart(debug 'mdb_temp_2' not~ "Voir la critique sur")|####'mdb_temp_2'} * only add non-empty comments +*******end_loop +****** +******mdb_review.modify {addstart(debug)|'mdb_temp_3'} * make multi +******mdb_review.modify {replace(debug)|####|\|} * make multi diff --git a/config/mdb/imdb.com.ask.ini b/config/mdb/imdb.com.ask.ini index 0b89c3b..a92da40 100644 --- a/config/mdb/imdb.com.ask.ini +++ b/config/mdb/imdb.com.ask.ini @@ -1,61 +1,98 @@ -* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites -* Site : imdb.com, primary search with ask.com -* revision : 1 correction in production date -* Jan van Straaten, 14/04/2012 +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing IMDB data from TvGuide websites +* @MinSWversion: V1.1.1/56.25 +* @Site: imdb.com, primary search with ask.com +* @Revision 11 - [22/05/2016] Jan van Straaten +* - added mdbinitype +* - fixed star-rating and original title +* @Revision 10 - [07/12/2015] Jan van Straaten +* - change element names, mdb_show_id and mdb_episode_id +* @Revision 9 - [25/09/2015] Jan van Straaten +* - added mdb_category +* @Revision 8 - [10/10/2014] Jan van Straaten +* - improved showid scub, also numbers upto 5000000 (was 2500000) +* @Revision 7 - [09/06/2014] Jan van Straaten +* - added url header +* @Revision 6 - [07/06/2014] Jan van Straaten/Jagad +* - added mdb_showicon +* @Revision 5 - [20/12/2013] Jan van Straaten +* - changes in (aka)titles +* @Revision 4 - [23/11/2013] Jan van Straaten +* - changes in actor and director due to site changes +* @Revision 3 - [11/08/2013] Jan van Straaten +* - small changes in commentsummary due to imdb.com changes +* @Revision 2 - [16/02/2013] Jan van Straaten +* - small changes in actor due to imdb.com changes +* @Revision 1 - [14/04/2012] Jan van Straaten +* - correction in production date +* @Remarks: none +* @header_end +**------------------------------------------------------------------------------------------------ +* +* +site {url=imdb.com|mdbinitype=movie|cultureinfo=en-GB|charset=UTF-8|matchfactor=60|searchsite=ask} * -site {url=imdb.com|cultureinfo=en-GB|charset=UTF-8|matchfactor=60|searchsite=ask} * primary search: http://www.ask.com/web?&q=imdb%2bDer+grosse+Edison%2b%2bClarence+Brown&/NCR *url_primarysearch {url(urlencode=1,2,3,4,5,6)|http://www.ask.com/web?&q=|imdb+|'title'|+|'productiondate'|+|'credit'|&/NCR} -url_primarysearch {url(debug urlencode=1,2,3,4)|http://www.ask.com/web?&q=|imdb+|'title'|+|'credit'|&/NCR} -show_id.scrub {multi|primary|imdb|/tt|/|onmousedown} +url_primarysearch {url(urlencode=1,2,3,4)|http://www.ask.com/web?&q=|imdb+|'title'|+|'credit'|&/NCR} +mdb_show_id.scrub {multi|primary|imdb|/tt|/|onmousedown} * -show_id.modify {remove| } * remove spaces +mdb_show_id.modify {remove| } * remove spaces * filter showid (7 char long): -mdb_temp_1.modify {calculate(type=element format=F0)|'show_id' #} * number of show_id's = loop index +mdb_temp_1.modify {calculate(type=element format=F0)|'mdb_show_id' #} * number of mdb_show_id's = loop index loop {('mdb_temp_1' > "0" max=50)|4} mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index -mdb_temp_2.modify {substring(type=element)|'show_id' 'mdb_temp_1' 1} * the showid to inspect -mdb_temp_3.modify {calculate(type=char format=F0)|'mdb_temp_2' #} * how many chars in this show_id? -show_id.modify {remove('mdb_temp_3' not "7" type=element)|'show_id' 'mdb_temp_1' 1} * remove this show_id if not 7 chars +mdb_temp_2.modify {substring(type=element)|'mdb_show_id' 'mdb_temp_1' 1} * the showid to inspect +mdb_temp_3.modify {calculate(type=char format=F0)|'mdb_temp_2' #} * how many chars in this mdb_show_id? +mdb_show_id.modify {remove('mdb_temp_3' not "7" type=element)|'mdb_show_id' 'mdb_temp_1' 1} * remove this mdb_show_id if not 7 chars * end loop -* filter showid (only numbers and < 2500000): -mdb_temp_1.modify {calculate(type=element format=F0)|'show_id' #} * number of show_id's = loop index +* filter showid (only numbers and < 5000000): +mdb_temp_1.modify {calculate(type=element format=F0)|'mdb_show_id' #} * number of mdb_show_id's = loop index loop {('mdb_temp_1' > "0" max=50)|5} mdb_temp_1.modify {calculate(format=F0)|1 -} * decrease index -mdb_temp_2.modify {substring(type=element)|'show_id' 'mdb_temp_1' 1} * the showid to inspect +mdb_temp_2.modify {substring(type=element)|'mdb_show_id' 'mdb_temp_1' 1} * the showid to inspect mdb_temp_3.modify {calculate(format=F0)|'mdb_temp_2'} * convert to number -show_id.modify {remove('mdb_temp_3' "0" type=element)|'show_id' 'mdb_temp_1' 1} * remove this show_id if not only numbers -show_id.modify {remove('mdb_temp_3' > "2500000" type=element)|'show_id' 'mdb_temp_1' 1} * remove this show_id if > 2500000 +mdb_show_id.modify {remove('mdb_temp_3' "0" type=element)|'mdb_show_id' 'mdb_temp_1' 1} * remove this mdb_show_id if not only numbers +mdb_show_id.modify {remove('mdb_temp_3' > "5000000" type=element)|'mdb_show_id' 'mdb_temp_1' 1} * remove this mdb_show_id if > 5000000 * end loop * * imdb url's: -url_mdb_p1 {url|primary|http://www.imdb.com/title/tt|show_id|/} -*url_mdb_p1 {url|primary|http://www.imdb.com/find?q=tt|show_id|&s=all} +url_mdb_p1 {url()|primary|http://www.imdb.com/title/tt|mdb_show_id|/} +*url_mdb_p1 {url|primary|http://www.imdb.com/find?q=tt|mdb_show_id|&s=all} *http://www.imdb.com/find?q=tt2200000&s=all *url_mdb_p2.modify {addstart|'url_mdb_p1'plotsummary} *url_mdb_p3.modify {addstart|'url_mdb_p1'releaseinfo#akas} *url_mdb_p4.modify {addstart|'url_mdb_p1'reviews} *url_mdb_p5.modify {addstart|'url_mdb_p1'fullcredits#cast} * -url_mdb_p2 {url|primary|http://www.imdb.com/title/tt|show_id|/plotsummary} -url_mdb_p3 {url|primary|http://www.imdb.com/title/tt|show_id|/releaseinfo#akas} -url_mdb_p4 {url|primary|http://www.imdb.com/title/tt|show_id|/reviews} -url_mdb_p5 {url|primary|http://www.imdb.com/title/tt|show_id|/fullcredits#cast} +url_mdb_p2 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/plotsummary} +url_mdb_p3 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/releaseinfo#akas} +url_mdb_p4 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/reviews} +url_mdb_p5 {url|primary|http://www.imdb.com/title/tt|mdb_show_id|/fullcredits#cast} +* +url_mdb.headers {customheader=Accept-Encoding=gzip,deflate} * * imdb elements -mdb_title.scrub {single|p1||||} * original title when redirected +mdb_title.scrub {single(separator=":")|p1|| | |
|
\n\n|| | |
|
\n\n||
\n\n|Add another review} +mdb_plot.scrub {single(separator="Storyline||
|} +mdb_description.scrub {regex|p2||||} +* +* subtitle when not already done with episodetitlelist +mdb_subtitle.modify {substring("" type=regex)|'mdb_temp_1' "| | |
|
\n\n||
\n\n|Add another review} +mdb_plot.scrub {single(separator="Storyline||
|} +mdb_description.scrub {regex|p2||||} +* +* subtitle when not already done with episodetitlelist +mdb_subtitle.modify {substring("" type=regex)|'mdb_temp_1' "|
\n\n|Add another review} +mdb_plot.scrub {single(separator="Storyline||
|} +mdb_description.scrub {single|p3|