diff --git a/members/wikipedia-standingdown.py b/members/wikipedia-standingdown.py index ae3f41f7..d0ff2f22 100755 --- a/members/wikipedia-standingdown.py +++ b/members/wikipedia-standingdown.py @@ -13,19 +13,18 @@ sys.path.append("../pyscraper") from resolvemembernames import memberList -today = '2010-04-12' +today = '2024-05-24' -page = open('../rawdata/MPs_standing_down_in_2010').read() +page = open('../rawdata/Members_of_the_2024_standing_down').read() +page = re.sub('(?s)^.*?Members of Parliament not standing for re-election', '', page) +page = re.sub('(?s).*', '', page) print(''' ''') -m = re.findall('
  • ]*>([^<]*)', page) +m = re.findall(r'\s*.*?]*>([^<]*)', page) for row in m: url, name = row - name = name.decode('utf-8') - if name in ('Iris Robinson', 'Ashok Kumar', 'David Taylor'): continue - id, canonname, canoncons = memberList.matchfullnamecons(name, None, today) - pid = memberList.membertoperson(id) - print((' ' % (pid, name)).encode('iso-8859-1')) + pid, canonname, canoncons = memberList.matchfullnamecons(name, None, today) + print((' ' % (pid, name))) print('') diff --git a/members/wikipedia-standingdown.xml b/members/wikipedia-standingdown.xml new file mode 100644 index 00000000..1d3f9ddc --- /dev/null +++ b/members/wikipedia-standingdown.xml @@ -0,0 +1,117 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/scripts/weeklyupdate b/scripts/weeklyupdate index 9d21c288..d776d100 100755 --- a/scripts/weeklyupdate +++ b/scripts/weeklyupdate @@ -36,12 +36,12 @@ curl -s "https://en.wikipedia.org/wiki/3rd_Scottish_Parliament" > Members_of_the curl -s "https://en.wikipedia.org/wiki/4th_Scottish_Parliament" > Members_of_the_4th_Scottish_Parliament curl -s "https://en.wikipedia.org/wiki/5th_Scottish_Parliament" > Members_of_the_5th_Scottish_Parliament curl -s "https://en.wikipedia.org/wiki/6th_Scottish_Parliament" > Members_of_the_6th_Scottish_Parliament -# curl -s "https://en.wikipedia.org/wiki/MPs_standing_down_in_the_United_Kingdom_general_election,_2010" > MPs_standing_down_in_2010 +curl -s "https://en.wikipedia.org/wiki/2024_United_Kingdom_general_election" > Members_of_the_2024_standing_down #svn -q commit -m "Weekly rawdata scrape commit" cd ~/parlparse/members ./wikipedia-lords.py > wikipedia-lords.xml ./wikipedia-commons.py > wikipedia-commons.xml -# ./wikipedia-standingdown.py > wikipedia-standingdown.xml +./wikipedia-standingdown.py > wikipedia-standingdown.xml # ./bbcconv.py > bbc-links.xml cd ~/parlparse/pyscraper ni/wikipedia-mla.py > ../members/wikipedia-mla.xml