From 7b287be3734b80da700dfaa8ca282c48fcc5cb3f Mon Sep 17 00:00:00 2001 From: Thinkwee Date: Fri, 18 Jun 2021 21:54:52 +0800 Subject: [PATCH] init --- .vscode/settings.json | 3 + README.md | 13 +- collect_result.ipynb | 570 ++++++++++++++++++++++++++++++++++++++++++ get_datasets.py | 155 ++++++++++++ infer.py | 106 ++++++++ model.py | 216 ++++++++++++++++ params.py | 11 + process_datasets.py | 225 +++++++++++++++++ train.py | 289 +++++++++++++++++++++ 9 files changed, 1587 insertions(+), 1 deletion(-) create mode 100644 .vscode/settings.json create mode 100644 collect_result.ipynb create mode 100644 get_datasets.py create mode 100644 infer.py create mode 100644 model.py create mode 100644 params.py create mode 100644 process_datasets.py create mode 100644 train.py diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..6ba6019 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.pythonPath": "C:\\Python38\\python.exe" +} \ No newline at end of file diff --git a/README.md b/README.md index 8040f9f..4a64619 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,13 @@ # SubjectiveBiasABS -code for the arxiv paper "Subjective Bias in Abstractive Summarization" +- code for the arxiv paper "Subjective Bias in Abstractive Summarization" + +# introduction +- params.py:hyperparameters +- get_datasets.py:get the topk oracle sentences in article then parse +- process_dataset:turn parsed file into the format of DGL graph triplet +- model.py:the self-supervised GCN model for extractive subjective style embedding +- train.py:train, concat small graphs into a batch +- infer.py: infer the whole training set to get subjective style embedding + +# detail +- negative samples of oracle sentences are uniform-sampled by the jaccard sim \ No newline at end of file diff --git a/collect_result.ipynb b/collect_result.ipynb new file mode 100644 index 0000000..5088804 --- /dev/null +++ b/collect_result.ipynb @@ -0,0 +1,570 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2020-01-11T13:39:25.699933Z", + "start_time": "2020-01-11T13:39:25.693706Z" + } + }, + "outputs": [], + "source": [ + "import re" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# collect results from different clusters" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2020-01-11T13:40:15.532525Z", + "start_time": "2020-01-11T13:40:15.093738Z" + } + }, + "outputs": [], + "source": [ + "for i in range(4):\n", + " src = [\"\" for _ in range(11490)]\n", + " ref = [\"\" for _ in range(11490)]\n", + " pred = [\"\" for _ in range(11490)]\n", + " \n", + " with open(\"./fairseq/cnndm_\" + str(i) + \".log\",\"r\") as f:\n", + " for line in f:\n", + " if line[0]=='S':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " article = line.split(\"\\t\")[1:][0]\n", + " src[num] = article\n", + " if line[0]=='T':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " gold = line.split(\"\\t\")[1:][0]\n", + " ref[num] = gold\n", + " if line[0]=='H':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " summary = line.split(\"\\t\")[2:][0]\n", + " pred[num] = summary\n", + " with open(\"./result/subjective_bias/pred_cnndm_\" + str(i) + \".txt\",\"w\") as f:\n", + " for s in pred:\n", + " f.write(s) " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2020-01-11T13:40:19.436523Z", + "start_time": "2020-01-11T13:40:19.222183Z" + } + }, + "outputs": [], + "source": [ + "for i in [\"random\",\"ratio\"]:\n", + " src = [\"\" for _ in range(11490)]\n", + " ref = [\"\" for _ in range(11490)]\n", + " pred = [\"\" for _ in range(11490)]\n", + " \n", + " with open(\"./fairseq/cnndm_\" + i + \".log\",\"r\") as f:\n", + " for line in f:\n", + " if line[0]=='S':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " article = line.split(\"\\t\")[1:][0]\n", + " src[num] = article\n", + " if line[0]=='T':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " gold = line.split(\"\\t\")[1:][0]\n", + " ref[num] = gold\n", + " if line[0]=='H':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " summary = line.split(\"\\t\")[2:][0]\n", + " pred[num] = summary\n", + " with open(\"./result/subjective_bias/pred_cnndm_\" + i + \".txt\",\"w\") as f:\n", + " for s in pred:\n", + " f.write(s) " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2020-01-04T10:30:13.773133Z", + "start_time": "2020-01-04T10:30:13.212237Z" + } + }, + "outputs": [], + "source": [ + "with open(\"./result/src_cnndm.txt\",\"w\") as f:\n", + " for s in src:\n", + " f.write(s) " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2020-01-04T10:30:14.731484Z", + "start_time": "2020-01-04T10:30:14.723939Z" + } + }, + "outputs": [], + "source": [ + "with open(\"./result/ref_cnndm.txt\",\"w\") as f:\n", + " for s in ref:\n", + " f.write(s) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# collect sentence-level results from different clusters" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2020-01-11T13:40:25.299898Z", + "start_time": "2020-01-11T13:40:25.294404Z" + } + }, + "outputs": [], + "source": [ + "import _pickle as pickle" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2020-01-11T13:40:25.862488Z", + "start_time": "2020-01-11T13:40:25.838315Z" + } + }, + "outputs": [], + "source": [ + "sentence_count = pickle.load(open(\"./fairseq/raw_datasets/test_index.bin\",\"rb\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2020-01-11T13:40:38.453501Z", + "start_time": "2020-01-11T13:40:37.807784Z" + } + }, + "outputs": [], + "source": [ + "for i in range(4):\n", + " src = [\"\" for _ in range(60000)]\n", + " ref = [\"\" for _ in range(60000)]\n", + " pred = [\"\" for _ in range(60000)]\n", + " \n", + " with open(\"./fairseq/cnndm_sen_\" + str(i) + \".log\",\"r\") as f:\n", + " for line in f:\n", + " if line[0]=='S':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " article = line.split(\"\\t\")[1:][0]\n", + " src[num] = article[:-1]\n", + " if line[0]=='T':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " gold = line.split(\"\\t\")[1:][0]\n", + " ref[num] = gold\n", + " if line[0]=='H':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " summary = line.split(\"\\t\")[2:][0]\n", + " pred[num] = summary[:-1] \n", + " with open(\"./result/subjective_bias/pred_cnndm_sen_\" + str(i) + \".txt\",\"w\") as f:\n", + " index = 0\n", + " for count in sentence_count:\n", + " s = \" . \".join(pred[index:index+count])\n", + " index += count\n", + " f.write(s + \"\\n\") " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"./result/subjective_bias/pred_oracle.txt\",\"w\") as f:\n", + " index = 0\n", + " for count in sentence_count:\n", + " s = \" . \".join(src[index:index+count])\n", + " index += count\n", + " f.write(s + \"\\n\") " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2020-01-11T13:40:41.241505Z", + "start_time": "2020-01-11T13:40:40.926538Z" + } + }, + "outputs": [], + "source": [ + "for i in [\"random\",\"ratio\"]:\n", + " src = [\"\" for _ in range(60000)]\n", + " ref = [\"\" for _ in range(60000)]\n", + " pred = [\"\" for _ in range(60000)]\n", + " \n", + " with open(\"./fairseq/cnndm_sen_\" + i + \".log\",\"r\") as f:\n", + " for line in f:\n", + " if line[0]=='S':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " article = line.split(\"\\t\")[1:][0]\n", + " src[num] = article\n", + " if line[0]=='T':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " gold = line.split(\"\\t\")[1:][0]\n", + " ref[num] = gold\n", + " if line[0]=='H':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " summary = line.split(\"\\t\")[2:][0]\n", + " pred[num] = summary[:-1]\n", + " with open(\"./result/subjective_bias/pred_cnndm_sen_\" + i + \".txt\",\"w\") as f:\n", + " index = 0\n", + " for count in sentence_count:\n", + " s = \" . \".join(pred[index:index+count])\n", + " index += count\n", + " f.write(s + \"\\n\") " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2020-01-11T13:40:45.968901Z", + "start_time": "2020-01-11T13:40:45.782812Z" + } + }, + "outputs": [], + "source": [ + "src = [\"\" for _ in range(60000)]\n", + "ref = [\"\" for _ in range(60000)]\n", + "pred = [\"\" for _ in range(60000)]\n", + "\n", + "with open(\"./fairseq/cnndm_sen.log\",\"r\") as f:\n", + " for line in f:\n", + " if line[0]=='S':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " article = line.split(\"\\t\")[1:][0]\n", + " src[num] = article\n", + " if line[0]=='T':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " gold = line.split(\"\\t\")[1:][0]\n", + " ref[num] = gold\n", + " if line[0]=='H':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " summary = line.split(\"\\t\")[2:][0]\n", + " pred[num] = summary[:-1]\n", + " \n", + "with open(\"./result/subjective_bias/pred_cnndm_sen.txt\",\"w\") as f:\n", + " index = 0\n", + " for count in sentence_count:\n", + " s = \" . \".join(pred[index:index+count])\n", + " index += count\n", + " f.write(s + \"\\n\") " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2020-01-11T13:40:46.665674Z", + "start_time": "2020-01-11T13:40:46.550609Z" + } + }, + "outputs": [], + "source": [ + "src = [\"\" for _ in range(11490)]\n", + "ref = [\"\" for _ in range(11490)]\n", + "pred = [\"\" for _ in range(11490)]\n", + "\n", + "with open(\"./fairseq/cnndm.log\",\"r\") as f:\n", + " for line in f:\n", + " if line[0]=='S':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " article = line.split(\"\\t\")[1:][0]\n", + " src[num] = article\n", + " if line[0]=='T':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " gold = line.split(\"\\t\")[1:][0]\n", + " ref[num] = gold\n", + " if line[0]=='H':\n", + " num = int(line.split(\"\\t\")[0].split(\"-\")[1])\n", + " summary = line.split(\"\\t\")[2:][0]\n", + " pred[num] = summary\n", + " \n", + "with open(\"./result/subjective_bias/pred_cnndm.txt\",\"w\") as f:\n", + " for s in pred:\n", + " f.write(s) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# collect result of abs-rl-rerank" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-23T11:39:08.522867Z", + "start_time": "2019-12-23T11:39:08.354599Z" + } + }, + "outputs": [], + "source": [ + "pred = []\n", + "for i in range(11490):\n", + " model_name = \"../../ROUGE/ROUGE-1.5.5/RELEASE-1.5.5/abs-rl-rerank/models/model.\" + str(i) + \".txt\"\n", + " pred.append(open(model_name).read())\n", + "with open(\"./result/pred_abs-rl-rerank.txt\",\"w\") as f:\n", + " for s in pred:\n", + " f.write(s)\n", + " if s[-1]!=\"\\n\":\n", + " f.write(\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# collect result of pointer-generator" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-28T03:39:31.058732Z", + "start_time": "2019-12-28T03:39:31.053590Z" + } + }, + "outputs": [], + "source": [ + "pred = []\n", + "article = []\n", + "article_sorted = []" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-28T03:39:31.343756Z", + "start_time": "2019-12-28T03:39:31.341821Z" + } + }, + "outputs": [], + "source": [ + "pat = '[a-zA-Z]+'" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-28T03:39:33.223285Z", + "start_time": "2019-12-28T03:39:31.677491Z" + } + }, + "outputs": [], + "source": [ + "with open(\"/home/lyn/liuwei/Subjective_Bias/data/cnndm/test.src\",\"r\") as f:\n", + " for line in f:\n", + " line = re.sub(u\" -lrb- | -rrb- |-lsb- (.*?) -rsb-\", \" \", line)\n", + " article_sorted.append(\"\".join(re.findall(pat,line)))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-28T03:39:37.510704Z", + "start_time": "2019-12-28T03:39:35.677932Z" + } + }, + "outputs": [], + "source": [ + "for i in range(11490):\n", + " index = i\n", + " pred_name = \"../../ROUGE/ROUGE-1.5.5/RELEASE-1.5.5/pointer_generator/models/model.\" + str(index) + \".txt\"\n", + " article_name = \"../../ROUGE/ROUGE-1.5.5/RELEASE-1.5.5/pointer_generator/articles/\" + str(index).zfill(6) + \"_article.txt\"\n", + " pred.append(open(pred_name).read())\n", + " article_sample = re.sub(u\"\\\\(.*?\\\\)|\\\\{.*?}|\\\\[.*?]\", \"\", open(article_name).read())\n", + " article.append(\"\".join(re.findall(pat,article_sample))) " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-28T03:46:40.635697Z", + "start_time": "2019-12-28T03:46:40.620960Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "990 tornadoesfiercewindsandseverethunderstormswithlargehailarepredictedforthemidwestandfortheplainsfromtheozarkseastwardtothelowerohiovalleyonthursdayandfridaythenationalweatherservicesaidsevereweatherisperilousanytimeofcoursebutcnnmeteorologistchadmyerssaysthattornadoconditionsaremoredangerousduringthenighttornadoescanbemoredeadlywhenpeoplearesleepingandnotpayingattentiontothewarningshesaidscatteredstormswillsoakillinoisandmissouriandwindandhailwillcontinuetobemoderateinthosestatesthenationalweatherservicesaidbythursdayafternoonstormswillhitpartsofindianaandkentuckyearlierthisweeksevereweatherstruckthesouthalabamaarkansasoklahomaandgeorgiareportedlargehailresidentsinshawneeoklahomawereawakenedearlywednesdaymorningbyaseverestormproducinggolfballsizedhailthehailcameoutofnowheresoitwaskindofshockingsherrimcdonaldsaidtocnninanireportthehaildingedhercar 989\n", + "1363 afootballfanisfacingathreeyearbanafterrunningtoplessontoapitchandperformingaseriesofsomersaultsduringhisfirstevergameatthestadiumcharliesumnerstagedaonemanpitchinvasionatreadingsmadejskistadiumduringtheteamsfacupquarterfinalreplaywithbradfordcitylastmonththereadingsupporterwhoseteamwereupatthetimedidfourfrontflipsonthepitchlandingonhisbackeachtimebeforebeingtackledbystewardscharliesumnerstagedaonemanpitchinvasionatreadingsmadejskistadiumduringtheirquarterfinalreplaywithbradfordcitylastmonththereadingsupporterwhoseteamwereupatthetimedidfourfrontflipsonthepitchlandingonhisbackeachtimesumnerwhosaidgoingtohisfirstgamewasabrilliantexperienceisnowfacingapotentialthreeyearbanforcarryingoutthestunttheorderduetobedecidedonaprilwouldstophimgoingtoallreadingsfuturehomeandawaymatchessumnerfromwokinghamberkshiresaidhehasnoregretsabouttheincidentsayingthathehaddoneitforlaughandthathisfamilyhadseenthefunnysidespeakingoutsidecourthesaidafterdoingitandseeinggoodcommentsandmessagesevenfromtheawayfansidontlookbackandregretitobviouslyiwouldntinformotherpeopletodoititwasjustspurofthemomentitsgoodtoseegoodfeedbackandnotnegativefeedbackidhadafewdrinksiwashappyandreadingwerewinningandijustwentforitandhadalaughitwasbrilliantaskedwhetherhegoestomanyreadingmatchesheaddedthatwasmyfirstgameactuallyitwasagoodexperienceobviouslybeingthereonthefirstgamenotafterwhativedonebutstandinginthestadiumwatchingthegamefromthestadiumitsabrilliantexperienceobviouslywiththebaniwouldntbeabletoexperiencethatexperienceagaincharliesumnerpicturedoutsidereadingmagistratescourtheisfacingapotentialbanningorderafterrunningontothepitchsumnerwastackledbystewardsthefootageofsumnersanticswentviraltheincidenttookplaceonmarchwhenthematchwasbeingbroadcastlivetomillionsofviewersonbbconeafterwardsfootageofsumnersanticsalsowentviralontheinternetsumnerwhowenttothematchwithagroupofeightfriendssaidhetalkedwithhisfriendsinataxionthewaytothematchaboutpotentiallyrunningontothepitchididsaynoatfirstthenreadingwerewinningandigotabitexcitedhesaidiwenttoraveonfridayandtheresanothervideoonfacebookandimdoingflipsthereivehadafewselfiesafewdrinksboughtformesumnersaidhespoketofriendsonthewaytothestadiumaboutcarryingoutthestunthesaidhehadnoregretsandthathisfamilyhadseenthefunnysidebutsumnerwhoworksasadrylinerfixingceilingssaidhethoughtapotentialfootballbanningorderseemedabitmuchidontwantathreeyearbanbecauseobviouslyitsquiteabithesaidimnotencouragingpeopledoitandifigotoanothergameimnotgoingtodoitagainattodayshearingsumnerssolicitorenteredaguiltypleaonhisclientsbehalftoonecountofgoingontoaplayingareaatafootballmatchunderthefootballoffencesactsumnerwhowasatcourtbutmissedhishearingafterleavingthebuildingbrieflywasreleasedonbailwithconditionsthathedoesnotattendreadingshomeorawaymatcheshewillappearatreadingmagistratescourtonaprilwhenadecisionwillbemadeonwhetherhereceivesafootballbanningorder 1109\n", + "2090 robertlewandowskireturnedtohaunthisformerclubasbayernmunichearnedanarrowvictoryagainstborussiadortmundatthesignalidunaparkonsaturdaypolandinternationallewandowskiscoredthewinneronhisfirstvisitbacktotheclubheleftonafreetransferlastsummeraspepguardiolassiderestoredtheirpointleadatthetopofthebundesligalewandowskispentfouryearsatdortmundleadingthemtobacktobackleaguetitlesduringthatperiodandofferedonlyamutedcelebrationwhenhepouncedtogivethevisitorstheleadminutesbeforehalftimerobertlewandowskibeatsmarcelschmelzertotheballtoheadbayernmunichinfrontagainstdortmundthebayernplayerscelebrateinfrontofthetravellingsupportersafterclaiminganarrowvictoryromanweidenfellersavedthomasmullersoriginaleffortbutheballfellkindlytothepolandstrikerweidenfellercomplainstohisdefenceaslewandowskimakeshiswaytohisfeetafterscoringdortmundweidenfellersokratissubotichummelsschmelzerbendergundoganblaszczykowskireuskamplaubameyangsubsnotusedkehllangerakginterdudziakbookedaubameyangschmelzerbayernmunichneuerbenatiaboatengdanterafinhalahmalonsoschweinsteigerbernatmullerlewandowskisubsnotusedpizarrogaudinoreinaweiserbookedschweinsteigeralonsorodescorerlewandowskibayernsufferedashockhomedefeatbyborussiamonchengladbachbeforetheinternationalbreakbutrespondedtogettheirtitlechargebackontrackagainsttheirformerchampionshiprivalsdortmundwereunbeatenintheirlasteightdomesticmatchessittingcomfortablyinmidtableafterrescuingadesperatelookingsituationearlierthisyearthatsawjurgenkloppssidebottomofthetablebuttheywerebehindafterminutesagainstbayernthomasmullerwasreleaseddowntheleftchannelbuthislowdriveafterburstingtowardstheboxwasparriedbyromanweidenfellerunfortunatelyforthedortmundgoalkeepertheballreboundedinvitinglyforlewandowskitobeatmarcelschmelzertotheballandheadhomefromyardsdortmundofferedaspiritedfightbackasthehostsappliedmuchofthesecondhalfpressurebutthebayerndefenceheldfirmalthoughtheyhadmanuelneuertothankaftersavingbrilliantlyfromamarcoreusfreekickintheclosingstagesashisteammatesroaredwildlyaroundhimlewandowskididnotcelebrateagainsthisformerclubthegoaldidhoweversendthebayernbenchintorapturesalthoughpepguardiolaremainedcalmgoalscorerlewandowskiholdsoffthechallengeofdortmundskevinkampltheteamslineuptoobserveaminutesilenceforthepassengersofthegermanwingsflightthatcrashed 1704\n", + "3157 bryonyhibbertthegirlfriendofthefootballercaughthavingdrunkensexinthedugouthasslammedthoseresponsiblefortheclipthisisthefirstpictureofthegirlfriendofthesemiprofessionalfootballercaughthavingdrunkensexwithafemalesupporterinthedugoutafterhisteamsdefeatstrikerjayhartcouldbeheardlaughingonthemobilephonefootageashewascaughtrompingwiththemysteryblondeinhisclubtshirtwithhistracksuitbottomsaroundhislegshewasdismissedafterthesexclipofhistrystatmossleyafcintamesidewassharedonsocialmediaandhasnowbeggedforhisyearoldgirlfriendbryonyhibbertsforgivenesstherdminutesubstitutefornonleagueclitheroefcinlancashiretodayadmittedhehadnoideawhothewomanwasandreachedouttowaynerooneyontwitterforadvicespeakingforthefirsttimesincehewascaughtintheacthesaidalliwantsayisthatiregretdoingwhatididiwasdrunkidhadmorethanafewdrinksandiwanttosayimsorrytomyfamilyandeveryoneinvolvedatclitheroeiregretthatididitnotthatigotcaughtivespokenandtriedtosortthingsoutwithbryonyeveryoneintheworldknowsaboutitijustwanttogetmyapologyoutthereihaventseenthegirlsinceidonthaveacluewhosheisdonthaveacluehisteamhadjustsufferedadefeatatmossleysseelparkstadiuminthefinalgameoftheseasonwhichwasbilledasaladiesdaytoattractmorefemalesupportersthefaceoftheblondefancannotbeseeninthecloseupfootagehartwassackedbytheclubwhichplayinevostikdivisionnorthwithinhoursofthefootagebeingsharedtofanshisgirlfriendbryonyhibbertfromoswaldtwistlelancashirewhohastwoyoungchildrenslammedthoseresponsibleforfilmingandsharingtheclipwritingontheclitheroefootballclubfacebookpageshesaidhaveabitofdecencyforthepeopleitsaffectedthankgodmykidsaretooyoungtoreaditsdisgustingibettheirfamiliesarefarfromperfectstrikerjayhartcouldbeheardlaughingonthemobilephonefootageashewascaughtrompingwiththemysteryblondestillinhisclubtshirtwithhistracksuitbottomsaroundhislegsonepersonwhosawthefilmwhichwassharedontwitterandinstagramsaidmrhartwasunawarehewasbeingfilmedatfirsthesaidthecameracameroundthedugoutandtherewasablondefemalewithherheaddownthemancouldntseethecamerabecausehiseyeswereclosedafterafewsecondsheopenedhiseyesandlaughedandthecamerapannedawayagainivebeentomossleyafcafewtimesanditsclearlytheirgroundthewholeclipisonlytosecondsmrhartsaidhethinksofmshibbertschildrenashisownandhasntseenthemsincetheincidenthismotherdawnhartalsorevealedjaysyoungersistershavebeentargetedbytrollswithsomeonesendingthevideototheyoungestagednineiknowhesdonewrongbutattheendofthedayhesmysonandtoseewhatpeoplearesayingabouthimitsnotniceshesaidmrhartcouldbeheardlaughingonthemobilephonefootageashewascaughtrompingwiththemysteryblondepeoplehavedonealotworsesomeonesentthevideotomygirlsaswelltheyknowsomethingsgoneonbuttheydontknowwhatexactlyandtohavetoexplainittothemwhensomeonesendsthemsomethinglikethatishorribleitshadaknockoneffectonhisgirlsaswellclitheroechairwomanannebarkersaidtheplayerhadbroughttheclubintodisreputeshesaidicanthavesomebodywearingtheclitheroefctracksuitbringingusdownithasbroughttheclubintodisreputeanditsnotproperforhimtoscupperourreputationiexpectourplayerstoactprofessionallyitwasbroughttomyattentiononsaturdaynightanddealtwiththroughthemanagementididoffertospeaktohimmyselfbutsimonsaidhewouldtakecareofitmrhartpreviouslyplayedforcolnefcandpadihamfcbeforejoiningclitheroewhocurrentlysitthintheevostikleaguefirstdivisionnorthfirstteammanagersimongarnerwhosignedmrhartsaidatthetimeofthisunfortunateoccurrenceonsaturdayiwasntthereihadalreadyleftthegroundifoundoutlateronwhenisawhowitplayedoutintwitterimdisappointedbutwecantcontrolwhattheplayersdooutsidethetimewearewiththemtheychoosewhattheywanttodounfortunatelyjasonwaswearingtheclubtracksuitandheknowshehaslettheclubdownsotheclubhasdonewhatithasdonehesdisappointedithasgonethisfaritsgoingtohaveconsequencesforhimbeyondleavingthisfootballclubwithhisfamilyandpossiblybeyondthatfootballisonlyasmallpartofitifhehadthoughtaboutwhatwouldhavehappenedwhenhedidithewouldnthavedoneithewillpayforitwhenispoketohimhesoundedveryapologeticaboutitimdisappointedforhimhewillregretwhathedidanditwastotallywrongnoonefrommossleyafcwasavailableforcommentyesterdayifyouknowtheidentityoftheblondeemailjennyawforddailymailcoukorcallhewasdismissedafterthesexclipofhistrystatmossleyafcintamesidewassharedonsocialmediaandhasnowbeggedforhisyearoldgirlfriendbryonyhibbertsforgivenesshisteamhadjustsufferedadefeatatmossleysseelparkstadiuminthefinalgameoftheseasonwhichwasbilledasaladiesdayclitheroechairwomanannebarkersaidtheplayerhadbroughttheclubintodisreputepicturedistheclubsofficialstatement 2954\n", + "3652 floydmayweathervmannypacquiaowillbethebiggestfightofalltimefinanciallyandthemostsignificantthiscenturyevaluatingthegreatesteverfightsisacomplexvariableofseveralfactorsincludingnotonlythequalityandexcitementoftheactionbutthepoliticalsocialandculturalundertonesthescaleofexpectationandoutsideinfluenceswheremoneymanvpacmancomestorankamongthemostimportantfightsinringhistorywilldependuponwhathappensthatcomingnightinthemgmgrandgardenarenainthisthefifthinaseriesoffightsthatshapedboxinghistoryilookbackononeofthegreatestsportingeventsofalltimetheneardeathfightmuhammadalivjoefrazierthethrillainmanilaquezoncityphilippinesoctoberjustastheyaresayingaboutmayweathervpacquiaotheperceivedwisdomwasthatalifrazierwascomingfiveyearstoolatefortwogreatboxersintheirsletushopetheyaresospectacularlywrongagainthatstarspanglednightinnewyorkwhensmokinjoeknockeddownanddefeatedthegreatesthadbeenfollowedinbyatamerscrappierpointsvictoryforaliaposterformuhammadalivjoefrazieralsoknownasthethrillainmanilainquezoncityphilippinesaliswingsforfrazierwhoeventuallylostinthethroundtoatechnicalknockoutandsotothetrilogyayearlaterbothmenhadcomethroughbattleswithgeorgeforemanfrazierlosingandalifamouslywinningtherumbleinthejungleinzairegeorgeforemanvmuhammadalioctoberkinshasazairejoefraziervmuhammadalimarchmadisonsquaregardennewyorkjackjohnsonvjamesjeffriesjulyrenonevadajoelouisvmaxschmelingjuneyankeestadiumnewyorkthistimedonkinghadmanilainmindasthenextheavyweightstopforhisglobetrottingcircusphilippinespresidentferdinandmarcoslikezairespresidentmobutobeforehimsawsponsorshipofthefightasadistractionfromthepovertyandsocialunrestinhiscountryitwasaliwhobecamemostdistractedduringthebuildupwheninvitedtoaphotoopportunitywithmarcosandhisinfamouslybigspendingwifeimeldahetookhismistressveronicaporschealongandintroducedherashiswifemrsbelindaaliwatchedthathappenonustelevisionandimmediatelyflewtomanilatoconfrontherhusbandloudlyinhishotelsuitealireturnedtohiscustomarypsychologicalbaitingofhisopponentshehadinsultedfrazierbeforetheirfirstfightanddidsoagainchantingitwillbekillaandathrillaandachillawhenigetthegorillainmanilathusthetitleforthefightthethrillainmanilawascoinedalipicturedalongsidehisinfamouslybigspendingwifemrsbelindaaliinseptemberfrazierthequietmanwasplottingastrategyofbreakingdownaliwithshotstothestomachribskidneysandliverquotingtheoldringmaximifyoukillthebodytheheadwilldiealiwouldtargetthefrazierheadandwontheearlyroundswhilehislegswereattheirfreshestbutitwasimpossibleforhimtokeepdancinginthepressurecookerheatwiththeringtemperaturereachingdegreesfahrenheitasmuhammadstartedslowingjoebegansmokintheywouldengageinoneofthemostbrutalspectaclesintheannalsofthehardestgamealiwasforcedtolowerhisguardastheylandedmorethanblowstotheheadbetweenthemeachofsuchforcethatanyoneofthemwouldhavestoppedlesserfightersalisupplementedthosepowershotswithhisquickjabsandcombinationsandtherebyinflictedmoreofthefacialswellingboxinggreatsfrazierandalipicturedintheringonoctoberforthethrillainmanilaaliandfrazierengagedinoneofthemostbrutalspectaclesintheannalsofthehardestgamefrazierseyesbeganclosingintheninthbythetimehecameoutforthethhecouldbarelyseeandalisenthisgumshieldflyingwiththesecondoftwomassiverighthooksexhaustedthoughhewasaliwentforthekillainthethalthoughthemonstrousbarragewhichsentfrazierreelingandstaggeringcouldnotputhimdownitwasenoughtobringasensationalendtoanepicbattlealiorderedhiscornermanherbertmuhammadtocutoffhisglovessayinghecouldnotgoonbutbeforehecoulddosofrazierstrainereddiefutchcalledahaltarelievedalifamouslysaidthisistheclosesttodeathihaveeverbeenfrazierbeggedtobeallowedtoseeoutthethandfinalroundbutfutchtoldhimitsovernoonewilleverforgetwhatyoudidheretodayfrazierneverforgavehimnotfullynoraliforthetrilogyofinsultsalidefiedtherigoursofparkinsonstoattendfraziersfuneralaliliftshisarmsintheairafterdefeatingfrazierinthethroundbutlaterattendedhisgreatopponentsfuneralwhenhediedattheageof 3611\n", + "3653 floydmayweathervmannypacquiaowillbethebiggestfightofalltimefinanciallyandthemostsignificantthiscenturyevaluatingthegreatesteverfightsisacomplexvariableofseveralfactorsincludingnotonlythequalityandexcitementoftheactionbutthepoliticalsocialandculturalundertonesthescaleofexpectationandoutsideinfluenceswheremoneymanvpacmancomestorankamongthemostimportantfightsinringhistorywilldependuponwhathappensthatcomingnightinthemgmgrandgardenarenainthisthefourthinaseriesoffightsthatshapedboxinghistoryilookbackononeofthegreatestsportingeventsofalltimerumbleinthejunglegeorgeforemanvmuhammadalikinshasazaireoctoberthefightvotedthegreatestsportingeventofthethcenturythefightwhichcrossedoceansandcontinentsthefightwhichdroveboxingthroughsocialbarriersthefightwhichputashadowycentralafricanrepublicontheworldmapthefightwhichestablishedmuhammadaliasthemostrecognisablehumanbeingonearththefightwhichmadegeorgeforemanfamousindefeatthefightaboutwhichmillionsofwordshavebeenwrittenandmoviesmadeaphotographtakenonoctobershowsmuhammadaliandgeorgeforemanusasforemanapproacheshiscompatriotaliduringtheiconicrumbleinthejungleintherumbleinthejunglewasthebrainchildofpromotionalgeniusdonkingbackedbythemultimilliondollarinvestmentthroughwhichzairepresidentmobutosesesekohopedtoexcitehispeopleintokeepinghiminpowerkinghadpersuadedaliandforemantosignmillioncontractswhichhecouldnotaffordbutwouldtalkpresidentmobutointobankrollingjoefraziervmuhammadalimarchmadisonsquaregardennewyorkjackjohnsonvjamesjeffriesjulyrenonevadajoelouisvmaxschmelingjuneyankeestadiumnewyorkastheyearoldworldheavyweightchampionwithenormouspunchingpowerforemanwasoddsonfavouritetocrushthebrillianceofthemantheycalledthegreatestbutwhoatanddiminishedbyhisthreeandahalfyearbanishmentfromtheringforrefusingtogotowarinvietnamwasperceivedasafadingforcealiwenttoworkonforemanspsychefromthemomentthecircusarrivedinkinshasaturningupathistrainingsessiontomockbiggeorgeforhissizeandmakefunofhisintellecttheirritatingeffectonforemanwasprolongedafterhesufferedaneyecutinsparringthedateofthefightwasputbackmorethanamonthandkingfearfulofcancellationinsistedtheystayedinzaireinsteadoftakingabreakbackinamericaalialsocharmedthelocalpopulationwhohappilytooktothestreetsthroughoutthoseweekschantingalibomayealikillhimthefightdevelopedintoatacticaltriumphforalialbeitaselfpunishingonewhichmayhavecontributedtothesubsequentonsetofparkinsonswhichafflictshimnowalioftenfoundhimselfagainsttheropesinastrategywhichevensurprisedhistrainerangelodundeealispentmuchofhistimeagainsttheropesinvitingforemantopunchhimselfoutbeforelandinghisblowinastrategywhichhelaterentitledropeadopeandwhichcameasasurprisetoeveryoneincludinghisfabledtrainerangelodundeealispentmuchofhistimelollingagainsttheropesinvitingforemantopunchhimselfoutbiggeorgedulyobligedbythrowinghundredsofblowsinthefirstsevenroundsalievadedmanyofthosepunchesandblockednumerouswithhisarmsbuthehadtowithstandthebruteforceofthosewhichgotthroughhisdefencestheverbaltauntingwentontheytoldmeyoucouldpunchgeorgehesaidasforemanbegantoflagisthatallyougotgeorgeheraspedasforemanconnectedtothealichinwithvirtuallythelastofthepuncheswhichhadknockedoutmanyaformerfoeithadnotbeenallnegativethroughoutalihadpepperedtheswellingforemanfacewithbreakoutwithlightningcombinationsandthesteamyamheathadcontributedtotheweakeningprocessforemantheworldheavyweightchampionisdownoncounttwoafterbeingstruckbyalisrighthanderalilooksonafterheknockedforemantothecanvaswiththeholymotherofallrighthandersduringtheboutcometheeighthroundcomethecoupdegraceasuccessionofrighthooksatwofistedburstandthenalefthookwhichstraightenedupbiggeorgetoreceivetheholymotherofallrighthandersforemanstaggeredtohisfeetatthecountofninebutrefereezackclaytonknewhewasfinishedsensationbedlaminthejungleastonishmentaroundthesatelliteworldforemanwouldeventuallyemergefromdenialtobecomealisgreatfriendaninspiringchristianpreacherandthesellerofcountlesstopoftherangebarbecuesaliwasstillthegreatestalisstaffandtherefereeputtheirarmsaroundaliafterhesettledtherumbleinthejunglein 3652\n", + "3655 floydmayweathervsmannypacquiaowillbethebiggestfightofalltimefinanciallyandthemostsignificantthiscenturyevaluatingthegreatesteverfightsisacomplexvariableofseveralfactorsincludingnotonlythequalityandexcitementoftheactionbutthepoliticalsocialandculturalundertonesthescaleofexpectationandoutsideinfluenceswheremoneymanvspacmancomestorankamongthemostimportantfightsinringhistorywilldependuponwhathappensthatcomingnightinthemgmgrandgardenarenahereisthesecondofmymostsignificantfightsinboxingshistorythefightagainstthenazisyankeestadiumnewyorkjunejoelouisvsmaxschmelingjoelouisknocksoutmaxschmelinginthefirstroundtowintheheavyweighttitleonjunealthoughherrhitlerwastrumpetingmaxschmelingasaheroofthethirdreichfollowinghisknockoutvictoryoverjoelouistwoyearsearlierthegermanwasantinaziandremainedsoduringworldwariiwhichwasimminentatthetimeoftherematchlouishadcomebackfromthatshocktowintheworldheavyweighttitlebydefeatingjamesjbraddockbutannouncedhewouldnotconsiderhimselfthetruechampionuntilheavengedthelosstoschmelingthefightwasbilledasapoliticalwarbetweenanamericastrugglingtoemergefromthegreatdepressionandagermanyprovokingarmedconflictineuropebyannexingaustriayetsuchwasschmelingsdistasteforbeingexploitedbyadolfhitlerspropagandamachineandhisloyalrefusaltosackhisjewishmanagerthathiswifeandmotherwererefusedpermissiontotraveltonewyorktosupporthimincasehedefectedtotheusthesocialpicturewascloudedfurtherbyblackamericasbeliefinlouisasahighprofilecampaignerforcivilrightslouisiscarriedbyhissupportersaheadofhisrematchwithschmelingaposterfortherematchwhilelouisandschmelingshakehandsattheweighinaheadoftheirfightjackjohnsonvsjamesjeffrieslouiswhohadcomplacentlytrainedonthelinksratherinthegymforthefirstfightgaveupgolfandwomeninseriouspreparationforthereturnboutpresidentfranklindrooseveltinvitedlouistothewhitehouseandtoldhimjoeweneedmuscleslikeyourstobeatthegermanslouiswouldsaythewholedamnedcountrywasdependingonmeclarkgablegarycooperandgregorypeckwereamonghollywoodcelebritiesinthecrowdalongwithfbibossjedgarhooverlouisispicturedjumpingasideasschmelingfallsaftertheknockoutlouishadbeentacticallyoutthoughtintheirfirstfightsodecidedtogoforbrokefromthefirstbellabarrageoflefthooksfollowedbyarighttothebodyhadschmelingdoubledupinpainandrefereearthurdonovangavehimastandingeightcountlouisswitchedhisattacktotheheaddroppingschmelingtwiceanddonovancalledahaltonlytwominutesandsecondsintothefightlouisthebrownbomberhadfracturedschmelingsspinewithhisbodyattacksasthemanfrombrandenburgwastakentohospitaltheambulancepassedthroughharlemandherecalledlatericouldthenoiseofcrowdsdancingbandshadleftthebarsandwereplayingonthesidewalkseverywherewasfilledwithcelebrationsandsaxophoneslouisgoesontheattackagainstschmelingashegainsrevengeafterdefeattwoyearsearlierschmelingwouldbeforciblyconscriptedasagermanparatrooperwarbutservedonlybrieflyandhisresistancetohitlermadehimaheroinhiscountryforasecondtimeafterthewarheinvestedinsuccessfulbusinessesandliveduntilshortlybeforehisthbirthdayinlouislosthismoneyandbecameagreeteratcaesarspalaceinlasvegashisoldfriendschmelingvisitedhimthereonceeveryyearuntilhisdeathinwhenheactedasapallbeareratlouisfuneral 3654\n", + "3656 floydmayweathervsmannypacquiaowillbethebiggestfightofalltimefinanciallyandthemostsignificantthiscenturyevaluatingthegreatesteverfightsisacomplexvariableofseveralfactorsincludingnotonlythequalityandexcitementoftheactionbutthepoliticalsocialandculturalundertonesthescaleofexpectationandoutsideinfluenceswheremoneymanvspacmancomestorankamongthemostimportantfightsinringhistorywilldependuponwhathappensthatcomingnightinthemgmgrandgardenarenaherearemymostsignificantfightsinboxingshistorythefightforracejackjohnsonvjamesjjeffriesjulyinrenonevadajimjeffriescameoutofasixyearretirementtotakeonjackjohnsonthefirstblackheavyweightchampionjackjohnsonasthefirstblackworldheavyweightchampionwasthebaneofthewhitesupremacistsinamericaintheearlystheyhateditwhenthegalvestongiantbeattommyburnsinsydneyaustraliaintowinthetitletheyhadbrandishedasthesymboloftheirracialsuperioritythusbeganthequestforthegreatwhitehopewhenjohnsonkodtheirbesthopeofallstanleyketchelthecrusadetobringtheiroldidoljeffriesoutofsixyearretirementreachedafrenzyjeffriesfinallyagreedtorespondtothatportionofthewhiteracethathasbeenlookingformetodefenditsathleticsuperiorityinreturnforapurseofsoracistwasthepresscoverageofthebuilduptothefightthateventhenewyorktimeswroteiftheblackmanwinsthousandsandthousandsofhisignorantbrotherswillmisinterprethisvictoryasjustifyingclaimstomuchmorethanmerephysicalequalitywiththeirwhiteneighboursastheracialtensionsgrewthesheriffofrenotookthethenunusualstepofbanninggunsandalcoholinthecapacityarenaerecteddowntownjeffriesbythenacontentedfarmerhadshedmorethanlbstogetclosetohisoldfightingweightwhiteamericahateditwhengalvestongiantjohnsonbeattommyburnsinsydneyaustraliaintowinthetitleandafterafrenziedcampaignjeffriesagreedtotakethefightforwhatwouldequalmtheboutwasscheduledforroundsbutjohnsonsaidheknewitwasasgoodasoverinthefourthwhenhesawthealarmedreactiontohistrademarkuppercutandtheoldshipsinkingjeffrieshithertoundefeatedhadneverbeenoffhisfeetbutaftertwoknockdownshiscornerthrewinthetowelattheendofthethhedismayedwhiteamericabysayingicouldneverhavewhippedjohnsonatmybestnotinathousandyearssymbolicallythefourthofjulyshookthefoundationsofamericasoldwhiteorderriotsbrokeoutthatnightincitiesacrosstheunitedstatesandcitiesincludingnewyorkandwashingtonaspolicejoinedenragedwhitecitizensinsubduingblackamericancelebrationspeoplewerekilledandhundredsinjuredjohnsoncontinuedtoangertheracistsonseveralcountsasheavyweightchampionhereignedanotherfiveyearsbeforesurrenderingthecrowntowhitechallengerjesswillardafterwardsclaiminghehadthrownthefightafterbeatingjeffriesconvincinglyjohnsonwouldbeworldchampionforanotherfiveyearsjohnsonthensurrenderedthecrowntowhitegiantjesswillardandlaterclaimedtohavethrownthefightoutsidetheringhelivedthehighlifeofacelebrityandangeredtheoldorderagainbycontinuingtomarrywhitewivesthreeoftheminalltheamericanestablishmentventeditsgrudgebyaccusinghimoftakingwomenacrossstatelinesforimmoralpurposesandpursuinghimintoexileineuropeandcubahefinallygavehimselfupatthemexicanborderandservedanominaloneyearprisonsentencebeforehisdeathinattheageofhecollaboratedonmoviesabouthislifeandabroadwayplayentitledthegreatwhitehopejackjohnsonisstillregardedbymanyboxinghistoriansasthegreatestheavyweightofalltime 3655\n", + "4008 propertyportfoliosolcampbellandhiswifefionathepollsmighthavethepartiesneckandneckbutitlookslikesolcampbellistakingnochancesforthesecondtimethisyeartheformerenglandfootballerissellingamultimillionpoundpropertyleadingtospeculationhewantstododgelaboursmansiontaxifthepartywinstheelectionanoutspokencriticofthepolicycampbellhaseventhreatenedtoleavethecountryifthetaxonpropertiesovermillioncomesintoforceearlierthisyearitwasrevealedhehadtakentheapparentlybizarredecisiontosellamillionlondontownhousetoacompanyownedbyhimselflandregistrydocumentssuggestthesalewasregisteredthedayafterhigherstampdutyleviescameintoeffectbutitispossiblethesaletookplaceshortlybeforemeaningtheincreasedchargeswouldhavebeenavoidednowcampbellhaslistedasecondpropertyforsaleinthesameexclusiveneighbourhoodofchelseaaformerhomeofjamesbondauthorianflemingtheluxurypenthousebythethamesisonthemarketformillioncampbellboughtitinformillionafterhereportedlystagedasitinatanestateagentsinthehopeofsecuringitforhisfamilyhiswifefionabarrattcampbellandtheirchildrenisabellaandethanithassincebeenextensivelyrenovatedbyhiswifeaninteriordesignerandgranddaughterofbarratthomesfoundersirlawriebarrattanadvertforthepropertystatestherearefewpropertieswithinchelseathatcanofferthecombinationofviewsspaciousaccommodationandexquisitefinishyetcampbellseemstobekeentoridhimselfoftheflatbeforetheelectioneyefordetailthelivingroominsidecampbellsmillionchelseaflatboastslavishfurnishingsnicetouchtheformerarsenalandtottenhamhotspurdefenderskitchenisfilledwithmarblesumptuousgildedmirrorslinethehallincampbellsluxuriousflatwhichisinanexclusiveneighbourhoodlastyeartheformerarsenalandtottenhamdefenderblastedtheproposedmansiontaxdescribingitasacheapandeasywaytoextractmoneyfromindividualswhohavedonewellhetoldthetimeswithamansiontaxyouretaxingmeforbeingprudentiflabourgetsinidleaveidhavetosaysorryiwanttoshipoutcampbellhasalsosaidimnotearningwhatiwasearningfiveyearsagoexpertshavesaidthatpropertiesworthbetweenmillionandmillioncouldincuranannualchargeofuptounderalabourgovernmenthenrypryorabuyingagentandpropertycommentatorsaidsellerslikesolcampbellfaceafinancialdilemmatotakeacertainhitnoworawaitthepotentialtaxbillthatalabourorlibdemgovernmentmaygeneratesincecampbellhasalsoownedgradeiilistedhallingtonhallanthcenturymansioninnorthumberlandfortunecampbellaccumulatedanumberofpropertiesbeforeandafterhisfootballcareerbuthassincesaididontearnwhatiusedtocampbellrecentlyfeaturedinahardhittingcampaigntoencourageethnicminoritybritonstovote 3149\n", + "6367 annfldefensivebackwithahistoryoflegalproblemsisbeinginvestigatedafterreportsofanearlymorningincidentinvolvingaphysicalfightandatleastoneshotbeingfiredatatexasclubdenverbroncoscornerbackaqibtalibandhisbrotherarebeinginvestigatedforaggravatedassaultafteranaltercationatadallasareanightclubonwednesdaymorningaccordingtoreportsduringaincidentinvolvingtwogunswhenheplayingforthetampabaybuccaneerstalibandhismotherwereaccusedofshootingagunathissistersliveinboyfriendingarlandtexasdenverbroncoscornerbackaqibtalibandhisbrotheryaqubarebeinginvestigatedforaggravatedassaulttalibwasintroublebeforeandwaswantedonchargesofaggravatedassaultwithadeadlyweaponinduringthelatestincidentashotwasfiredoutsideofclubluxxindallastexasafteraphysicalfightinsideduringtheincidentonwednesdaytalibandhisbrotheryaqubhadaphysicalaltercationwithsomeoneinsideclubluxxandthenashotwasfiredoutsidearoundamwfaareportedwhendallasofficersrespondedtotheactiveshootercallthemanwhocalledpolicesaidthreeblackmenhadbeeninvolvedinadisturbanceandoneofthemhadtriedtopunchhimthemanalsosaidoneofthemenpulledoutagunandaimeditathimbeforefiringaroundoffintotheskyoutsidethebarandcausinganimmediatepanicthetalibslefttheareainarangeroverandajaguarandwenttothextccabaretpolicefoundthebrothersatthesecondclubandimpoundedthevehiclestolookforevidencethebrotherswerebothquestionedandaninvestigationintotheincidentisongoingthegeneralmanagerofclubluxxconfirmedthetalibswereattheclubbutdeniedthatanythingwithaguntranspiredthedallasmorningnewsreportedhanneykamalsaidonehundredpercentthatdidnothappeniwastheresomebodymighthavecalledthepolicebecausetherewasayellingargumenttalibsignedasixyearmillioncontractlastyearitsathletessomeoneseesthemyellinganddialseveryonewasaskedtoleavetheclubwhichtheydidoftheirownfreewillandthatwasitbroncosvicepresidentofpublicrelationspatricksmythsaidwehavebeenintouchwithaqibregardingthesereportsandwillcontinuetomonitorthissituationdallaspolicefalselyreportedtalibhadbeenarrestedforthrowingbottleslastjuneduringannflcampafterpartythedenverpostreportedthepoliceapologizedafteritendedupbeingyaqubwhowasarrestedduringtheincidentingarlandfelonywarrantswereissuedfortalibandhismotherokolotalibonchargesofaggravatedassaultwithadeadlyweaponpolicesuspectedthembothoffiringshotsfromarugerhandgunatshannonbillingstheboyfriendoftalibssisterandalsobelievedthenflbackattemptedtopistolwhipbillingswithaspringfieldmmsemiautomatichandgunduringtheincidentthebostonglobereportedtheyhadapparentlylearnedbillingswasbeingphysicallyabusivetowardstheirfamilymembertalibwhoownedbothweaponsreportedlyyelledduringthepistolwhippingyouregoingtomakemethrowawaymycareerthechargesweredroppedbecauseprosecutorsbelievedbillingsaregisteredsexoffenderwiththestateoftexaswouldnotmakeacrediblewitnessgarlandpolicespokesmanjoeharnsaiditwasveryfortunatenoonewaskilledandnoonewashurtseriouslytherewasntagreatdealofdistancefromonepersontotheotherthatwasshootingaftertheincidentafelonywarrantwasissuedforokolotalibforanassaultwithadeadlyweaponchargeinadditiontoplayingforthebroncostalibhasalsoplayedforthenewenglandpatriotsandtampabaybucsinadditiontothegunincidentstalibwasinvestigatedafteranallegedassaultonafloridacabdriverhethenresistedarrestonchargesofsimplebatterytheshutdowncorneralsofoughtwithteammatesandwassuspendedforviolatingthenflspolicyonperformanceenhancingsubstancestalibsignedasixyearmillioncontractwithdenverinmarchhealsohasplayedforthenewenglandpatriotsduringhissevenyearcareerduringhistimewiththereigningsuperbowlchampstalibsaidimacompetitoronthefieldmanthatskindofwhatweretalkingaboutrightnowjustfootballifchargesarebroughtwithregardtotheincidentindallastalibcouldfacedisciplineunderthenflspersonalconductpolicyandasuspensionseemspossiblegivenhishistorytalibwasinvestigatedafteranallegedassaultonafloridacabdriverandresistedarrestonabatterycharge 3798\n", + "6752 richiebenaudspentalifetimeinthesunandinhisfinalyearsheruefullyadmittedthathewaspayingthepriceforitwithskincancersonhisforeheadandthetopofhisheadtheyearoldwhodiedonfridaytalkedabouthiscancerandthetreatmenthewasundergoingwithtypicalcandourwheniwasakidwenevereverworeacapbenaudtoldtheninenetworkaboutgrowingupinaustraliainthesandsiwishihadyouliveandlearnasyougoalongthecricketcommentaryboxlegendtalkedabouttheradiationtreatmenthewasreceivingforhisskincancerbenaudwhohadntreturnedtohiscommentaryjobfortheninenetworkfollowingacaraccidentinoctoberinwhichhesufferedseriousinjurieshadlookedincreasinglyfrailandhadlostweightbuthehadbeenoutwalkingwithhisbelovedwifedaphneforminuteseverymorninginthehopethathecouldreturntoworkscrolldownforvideoskincancerrichiebenaudpicturedatthescginjanuaryrevealedlatelastyearthathehadskincancerandwasreceivingradiationtreatmentadmittingthatasakidwenevereverworeacapbutusingtheopportunitytoadviseallaustralianstocovertheirheadsmarvellousrichiebenaudpicturedininbritainwhenhewascaptainingthetestseriesadmittedlastyearthathehadneverwornheadgearasayoungplayerbecausehehadbeeninfluencebylegendaryallrounderkeithmillersayingwheniwasakidyounevereverworeacapthatsbecausekeithmillerneverworeacapmasterofthemicrophonecricketfanspicturedapplaudingrichiebenaudashecoveredhislastmatchonenglishsoilattheovalinlondontheashestestbetweenenglandandaustraliainsupereffortavictoriousaustraliancaptainrichiebenaudleadshisteamfromthefieldatoldtraffordinafterwinningtheashesinthefourthtestagainstenglandinwhichhesensationallytookfivewicketsforrunsinballsinstigatingenglandscollapsemorningeverybodyrichiebenaudcommentatedcricketformorethanfourdecadesfollowingastellarcareerasatestcricketerduringwhichhebattedbowledandcaptainedaustraliafromthelatestohisretirementinbenaudhadbeendrivinghisbelovedcarasunbeamalpinebackfromhittingpracticegolfballsattheaustraliangolfclubinsydneywhenhelostcontrolandhitaparkedcarandthenasmallbrickfencehewashospitalisedwithshoulderspinalandchestinjuriesandabrokensternumhewassubsequentlydiagnosedwithskincancerandbegantreatmentlastyearrevealingthatdoctorswerepleasedwithhisprogresshetoldreporterslastnovemberhewascopingwithitverywellthecricketlegendsaidhebelievedthecancerswerecausedbyplayingcricketinthesunwithoutahatorsunscreenlonghistorybehindthemicrophonerichiebenaudlooksonduringthefifthtestmatchbetweenenglandandaustraliainaugustwearingoneofhistrademarklightcolouredjacketswhichwerehissignaturealongwithhisuniquecommentaryfanfavouriterichiebenaudwassuchalegendasacricetcommentatorinhistrademarkcreamjacketthatheinspiredalegionoffansdressedupinaustraliancricketfansdressedupandwearingsilverwigsandandcarryingfoammicrophonesattheashestestatthescginjanuarythevoiceofaustraliancricketrichiebenaudpicturedattheovalduringtheseptemberashestestwasoneofaustraliansportsgreatpersonalitiesandthebossofthecommentaryboxattestmatchesinaustraliaandbritainrichiebenaudpicturedwithworcestorcaptaindonkenyonbeforeanaustraliavworcestermatchinenglandcaptainedaustraliaandplayedtestmatchesinthelatesandtheearlyshelpingtorestorethecountrysreputationasacricketingnationwhatacatchrichiebenaudpicturedwithhisbelovedwifedaphneatrandwickracecourseinsaidthatfollowingaseriouscarcrashandthentreatmentforskincancerheanddaphnehadbeenwalkingforminutesadaytoeasehimbacktogoodhealthinthehopehecouldreturntothecommentaryboxtheformeraustraliantestcricketerseizedontheopportunitytoencourageaustralianstoprotectthemselvesfromthesunirecommendtoeveryonetheywearprotectionontheirheadshesaideightyfouryearoldsdontseemtomendaswellastheyusedtohesaidhewasstillrecoveringfromthecaraccidentinjurieswhichincludedtwofracturedvertebraehoweverheexpressedthebeliefhewouldmakeareturntocoverthefourthtestagainstindiaatthescginsydneyandhadbeenoutwalkingwithhisbelovedwifedaphneeverymorninginanefforttoregainhishealthheroworshipfansdressedasrichiebenaudinsilverwigsandcreamorbeigesportsjacketsholdaloftimitationninenetworkmicrophonesatthesydneytestmatchinjanuarylegendamonglegendsbenaudatthescgwithfellowcricketersturnedcommentatorsianhealeymichaelslaterbrettleeianchappellandbilllawrydressedinpinkformcgrathfoundationdayduringthesydneytestbetweenaustraliaandsrilankainjanuaryprogressisslowhesaidivebeendoingalotofwalkingweareouteverymorningdoingminutewalkseverymorninganditsshowingbeneficialeffectswithouttheinformationonskincancerdangernowavailableduringhisplayingdaysbenaudrevealedhewasinfluencedbytheexampleofgreatallrounderkeithmillerwheniwasakidyounevereverworeacapnotaflashonebenaudsaidthatsbecausekeithmillerneverworeacaparthurmorrisdidwhenhewentouttobatwefollowvariouspeopleandnuggetmillerneverworeanythingonhisheadsoididntiwishihadvalerichiebenaudbenaudpicturedinmorethanyearsafterhisretirementfromastellarcricketingcareerwhichhefollowedwithdecadesasoneofaustraliasmostlovedandrespectedcommentators 1873\n", + "9209 defiantnigelfaragetodayinsistedhedidnotlosemyragafterroundingontheaudienceofalivetvdebateforbeingtooleftwingtheukipleaderwasbooedbyvotersatwestminstersmethodistcentralhallashefacedclaimsheblamedallofbritainsproblemsonmigrantspollstericmwhichwashiredbythebbctoselecttheaudiencememberstodaydefendeditsprocessusingrandomlocationselectiontechniquesnigelfaragetodayinsistedhedidnotlosemyragafterroundingontheaudienceofalivetvdebateforbeingtooleftwingmrfaragepicturedtodayriskedalienatingthosewatchingatwestminsterscentralhallinlondonasheprotestedtheywerearemarkableaudienceevenbytheleftwingstandardsofthebbcthebbcstressedthattheaudiencewascarefullyselectedbyanprocessbutmrfaragesaidsomethinghadgonewrongwiththeprocessaskedwhetherhehadlosthistempermrfaragetoldthebbcnewschanneliwasverycalmaboutitididntlosemyraghesaidhisprotesthadbeensparkedbythehostileaudiencereactiontohisargumentthathighlevelsofimmigrationwereincreasingpressureonthehousingmarketsomethinghesaidwouldbeacceptedbymostrationalpeopleandheaddedthisisnotthefirsttimeiveseenthisthenightofthebyelectionthatdouglascarswellwoninclactonwithalandslidetherewasaukiprepresentativepatrickoflynnonquestiontimeinclactonandtheaudienceweredeeplyhostiletohimaskedifheblamedthebbcmrfaragesaidsometimesthesethingsgowrongsometimesyougetgroupswhoapplytobeonprogrammeswhoperhapsarentastruthfulontheirapplicationsastheycouldbeinthiscasethebbcgavethejobtoapollingcompanycalledicmwhoarefamousforgettingeverythingaboutukipwrongandthatithinkwasthemistakeamigoingtomakeacomplaintivegotanelectiontofightwhatmattersisntthepeopleintheroomwhatreallymattersarethemillionsofpeoplewatchingontelevisionmrfaragesoutburstcamelessthanhalfanhourintotheeventaftersomeofhiscommentsaboutpressureonhousingduetoimmigrationweregreetedwithmuttersfromthosewatchingindependentpollstersicmwerehiredbythebbctoselecttheaudienceofaroundpeopleafterfacingstiffcriticismfromukipthefirmtodayreleasedastatementonhowitchosetheaudiencememberstheywereselectedfromamileradiusofthevenueinwestminstertheregionwasbrokenupintosmallareasandatleastpeoplewerechosenfromeachareaeachpersonwasselectedtoreflectthecompositionoftheukpopulationbygenderageethnicityandsocialgradeandpoliticalprotocolsthatreflectedthebalanceasagreedbetweenthebroadcastersandthepoliticalpartiesicmsaidafifthofthepeopledeclaredthemselvestobeundecidedvotersasmallnumberofsnpandplaidcymrusupporterswerebroughtinfromscotlandandwalesicmaddedtherecruitmentapproachreplicatedthoseusedbyicmatalltheleaderdebatesboththecleggvsfaragedebateslastyearandtherecentitvdebatemrfaragewaswidelyconsideredtohavewonthedebatesagainstnickclegglastyearafterwhichtheukipleadermadenocomplaintabouttheaudiencecompositionlastnightsawfiveoppositionpartyleadersgoheadtoheadinaminutelivetvdebatemrfarageappearedalongsidelaboursedmilibandsnpleadernicolasturgeongreennataliebennettandplaidcymrusleannewoodduringtheearlierexchangestheleadersonmrfarageforhisantiimmigrationpoliciesattractingapplausefromtheaudiencemrfaragewastakenabackandattackedatotallackofcomprehensionfromthosewatchingandprotestedtheywerearemarkableaudienceevenbytheleftwingstandardsofthebbchisoutburstcamelessthanhalfanhourintotheeventaftersomeofhiscommentsaboutpressureonhousingduetoimmigrationweregreetedwithmuttersfromthosewatchingmrfaragewarnedthatforeignersarrivingintheukhasdirectlycontributedtothehousingcrisisbuthecameunderfireforlinkingallofthecountriesproblemstoimmigrationgreenpartyleadernataliebennetttookaswipeatmrfaragethereissomeonehereonthisplatformwhowantstototallydemonisemigrantsandyouknowwhoimeanshehitbackatclaimsmigrantsarecausingacrisisinthehealthserviceoneinfourdoctorsisforeignbornpercentofnhsstaffareforeignbornmrmilibandsaidtoldmrfarageyouabuseimmigrantsandthosewithhivandthenyoucomplainthatukipisbeingabusedmisssturgeonwonloudapplauseassheurgedmrfaragetoputthebogeymantoonesideshetoldhimwehaveahousingcrisisacrossthiscountryyouknowwhatnigelfarageitisntcausedbyimmigrantsinyourworldeveryproblemiscausedbyimmigrantsbutmrfarageclaimedtheaudiencewasbiasedagainsthimtherejustseemstobeatotallackofcomprehensiononthispanelandindeedamongstthisaudiencewhichisaremarkableaudienceevenbytheleftwingstandardsofthebbcmrfaragewasheckledbymembersoftheaudiencebeforedaviddimblebywhowaspresentingthebroadcastonbbcintervenedtosaynigelletmejustsayonethingthisisanaudiencethathasbeencarefullychosennotbythebbcbutbyanindependentpollingorganisationtorepresentthebalancebetweenallpartieslabourleaderedmilibandgavetheukipleadertheadviceitsneveragreatideatoattacktheaudiencenigelinmyopinionbutmrfarageretortedtherealaudiencearesittingathomeexplanationmrfaragewasheckledbeforedaviddimblebywhowaspresentingthebroadcastonbbcintervenedtosayhowtheaudiencehadbeenchosenthebbcinsiststheaudiencewascarefullyselectedbyanindependentpollsterbutmrfaragesaidsomethinghadgonewrongwiththeprocessunimpressedukipvotermikecrippsaukipvoterhasinsistedheishavingsecondthoughtsaboutbackingthepartyafternigelfarageattackedthestudioaudiencesecurityworkermikecrippsofbromleysoutheastlondonbelievedtheukipleaderhaddoneacoupleofstupidthingsthefirstwashiscommentontheaudienceandthesecondwasraisinghisviewsonpeoplewithhivvisitingtheukfornhstreatmentmrcrippssaidaskedifmrfaragesremarkshadputhimoffsupportingukipmrcrippssaiditdidabityeahidontthinkheputhimselfacrossrightifwerejusttalkingaboutthedebatetonightitswhatweshouldbetalkingaboutnotwhathesdonesixyearsagoortwoyearsagooryesterdayformenicolasturgeonwasthebestshewasslickshewaswellpreparedshehadbeenwellbriefedthatsmypersonalopinionedmilibanditwaslikehewasreadingoffascriptthebbcinsistedtheaudiencehadbeenchosenbyanindependentpollingorganisationtoreflectthesupportersofallpartiesnotjustthoserepresentedbytheirleaderslastnightaspokesmanforthecorporationtoldmailonlineyesterdayicmrecruitedanaudiencetogiveacrosssectionofpoliticalopinionicmapublicopinionresearchcompanyfoundedinwasunavailableforcommentlastnightukipmeanwhilewassaidtobepressingforaninquiryintohowtheaudiencewaschosenaskedafterthedebateaboutmrfaragescriticismoftheaudienceukipdeputyleaderpaulnuttallsaidiamnotsurewhetherwewillcomplainalliwillsayisididntthinktheaudiencewasrepresentativeoftheviewsofbritishpeopleithinknigelfaragewouldhavedoneverywellwithpeoplesittingathomemrnuttallsaidhehadspokenaboutthingswhichhadhappenedtohimonquestiontimetellingreporterswhoknowsmaybesomepeoplearebeingdisingenuousontheirapplicationformsaskedifadoptinganattackontheaudiencewasaneasygetoutclausefordismissingoppositiontoukipsviewsheaddedohnopeopleathomeagreewithusandthatswhatreallymattersthemillionsofpeopleathomewatchingthisweknowforexampleonimmigrationontheeuropeanuniontheyagreewithwhereukipstandswhattheaudienceinherereallythinkisisupposeirrelevanttweetingduringthedebatelordashcroftsaidaminorityviewonceagainnigelfaragehavingagoatthebbcaudiencemaywellappealtotheviewerswhosevoteheistargetingandskynewspresenterkayburleytweetedfaragehavingapopatbalanceofaudiencedodgygroundtherebroadcastersjumpthroughhoopstomakesurestudioisfairastheendcreditsrolledfollowingthebbcdebatelastnightfourofthefivepartyleadersshookhandswitheachotherbutmrfaragejustdrankfromaglassofwaterfrombehindhisstandasmissbennettandmisswoodwalkedawaymisssturgeonthenwenttoshakemrfarageshandandmrmilibandofferedtheukipleaderapatonthebackthenalloftheleaderswenttotheaudiencetoshaketheirhandswiththeexceptionagainofmrfarageheshookhandswithmrdimblebybeforeleavingwithoutgoinguptotheaudienceleftaloneastheendcreditsrolledfollowingthebbcdebatelastnightfourofthefivepartyleadersshookhandswitheachotherbutmrfaragejustdrankfromaglassofwaterfrombehindhisstandalloftheleaderswenttotheaudiencetoshaketheirhandswiththeexceptionagainofmrfarageheshookhandswithmrdimblebybeforeleavingwithoutgoinguptotheaudience 7700\n", + "9974 thepodiumgirlisthecentreofasexismstormafterlewishamiltonsprayedchampagneinherfacesaidshewasntoffendedbyhisstuntandwasjustdoingherjobthebritishformularacerdrewcriticismfromaroundtheworldwhenheaimedthefizzdirectlyasthefaceofyearoldgridgirlliusiyingwhowaspicturedlookinglessthanimpressedbuttheshanghaiinstituteofvisualartgraduatewhowasapodiumgirlatthechinesegrandprixsaidshedidntthinkmuchabouttheincidentandwasntannoyedscrolldownforvideoliusiyingwaspicturedgrimacingaslewishamiltonsprayedchampagneatherfaceafterwinningtheracesexismcampaignerscalledhamiltonsbehaviourselfishbutsiyingsaidshedidnotthinktoomuchaboutitthehostesswhohasadegreesaidiwastoldbymyemployertostandonthepodiumandthatswhatididitlastedforonlyoneortwosecondsandididnotthinktoomuchaboutitatallmisssiyingtoldtheshanghaidailyithinksomeforeignmediaaremoresensitiveaboutthetopiccomparedtolocalmediaiwasjusttoldbymyemployertostandonthepodiumandthatswhatididmisssiyinghadaskedtoworkasapodiummodelonthestageofthechinesegrandprixinshanghaibecauseshewasafanoffinnishdriverkimiraikkonenaftertheincidentonsundayobjectacampaigngroupagainstsexismcondemnedthebehaviourofthedriverasselfishandinconsiderateandcalledonhamiltontoapologisetothegraduateothershavecalledthedrivingaceanembarrassmenttotheukwhileanothersaidhisactionsshowedthathewasanignorantclownscrolldownforvideoitsnotthefirsttimeracingacelewishamiltonhassprayedagridgirlwithbubblyhesalsodoneitafterwinningthespanishgrandprixandevenattheaustriangrandprixwherehelostoutonthetopspotafterseeingthepicturesthechiefexecutiveofcampaigngroupobjectaskedforhamiltontoapologisespeakingonmondayrozhardiechiefexecutiveofobjectsaidmisssiyingwouldhavehadittlechoicebuttostandthereandtakeitshesaidthephotographsappeartoshowthatthewomanisnotjustbeingsplashedbutthatthechampagneisbeingveryspecificallydirectedintoherfacewhichdoesnotlooklikeavoluntarypieceofhorseplayonherpartifthisifthecasewethinklewishamiltonshouldapologiseforhisactionsandthinkcarefullyabouthowhebehavesinthefutureformostpeopleitwouldbeapparentthatsheisnotenjoyingititissurelyaverydifficultpositiontobeagridgirlandshewouldhavehadlittleoptionbuttostandthereandtakeitthatissomethingofwhichheshouldbeawarebutinsteadheappearstohaveabusedherpositionitsunfortunatethatagreatvictoryhasbeenmarredbywhatappearstobeselfishandinconsideratebehaviourmshardieaddedthattheepisodehighlightedthemoregeneralissueofwomenbeingviewedassexualobjectsinthemotoringworldshesaidmotorracingappearstounnecessarilyportraywomenassexualisedobjectsandthatprobablymakesitevenharderforthewomentostandupforthemselveswewouldhopepeopleintheindustrywouldberespectfultothesewomenontwitterhamiltonwasdescribedasabullyanddisgustinganotherusersimplysaidmanpleasestopsprayingchampagneonthepodiumladiestheydontlikeitwhileoneoutragedusersaidhewasascumbaganembarrassmenttoukanotherwrotehaveabitmorerespectfortheladiespleasesprayingchampagneinherfacewasoutoforderchildidiotandonetweetedshameonyoufandlewishamiltonfordisgracefultreatmentofwomenduringchampagnecelebrationsexismitisnotthefirsttimetheracingiconhaswhippedouthisvictorymovehecelebratedinasimilarfashionafterwinningspanishgrandprixlastyearwhichwashisfourthconsecutivewininarowandtheonethatfinallyputhimontopoftheworldchampionshiptableandalthoughhelosttheaustriangrandprixtoteammatenicorosberghedidntshyawayfrompoppingthecorkdirectlyatahostesstheyearoldwasevenaskedtodemonstratehischampagneskillsongrahamnortonstalkshowinhamiltonalsosprayedahostessatmontmeloafterwinningthespanishgrandprixatcircuitdecatalunyabutnoteveryonewasoffendedbyhisbehaviourmanyfoundhisactionsamusingwhileanothersaidicanthinkofworsethingstohappenatworkthangettingsprayedinthefacewithchampagnebylewishamiltonlightenuppeoplehamiltoncontrolledthecontestinshanghaitosealhissecondvictoryinthreeracesandextendhisleadatthesummitoftheformulaoneworldchampionshipyesterdaybutitwasntallsmilesatyesterdaysraceafteraseriousfeudwassparkedbetweenhamiltonandhisteammatenicorosbergfollowingapressconferencehismercedespartneraccusedhimofseverelycompromisinghisstrategyandleavinghimunderunnecessarypressurefromferrarissebastianvettelwiththetopthreedriverscoveredbyonlysecondsaheadofthefinalroundofpitstopsrosbergreportedontheradiothatlewisisdrivingveryslowlytellhimtospeeduphamiltonseeminglyprotectinghistyreswasgiventhehurryupbymercedesandbothheandrosbergeventuallyemergedfromtheirfinalpitstopfirstandsecondwithvettelthirdtheworldchampionsittingbesidehisteammatetoldthepressconferenceyesterdayiwasntcontrollinghisraceiwascontrollingmyownracemygoalwastolookaftermycarihadnorealthreatfromnicothroughoutthewholeraceitwasntallsmilesafteraseriousfeudwassparkedbetweenhamiltonandhisteammatenicorosbergthemercedesdriverposeswithhistrophyandchampagnebottleonthepodiumafterwinningonsundayavisiblyangryrosbergreplieditsjustnowinterestingtohearfromyoulewisthatyouwerejustthinkingaboutyourselfwiththepaceinfrontwhenthatwascompromisingmyracedrivingslowerthanwasmaybenecessaryinthebeginningofthestintmeantsebastianwasclosetomeanditopeneduptheopportunityforsebastiantotryanearlypitstoptotryandjumpmeandthenihadtocoverhimitwasunnecessarilyclosewithsebastianasaresultandalsoitcostmealotofracetimebecauseihadtocoverhimthenmytyresdiedattheendoftheracebecausemystintwasjustsomuchlongersoimunhappyaboutthatthespathasreopenedthewoundsfromlastyearwhenbothmentriedtooutmanoeuvreandattimesoutpyschoneanotherastheirbattleforthetitleunfoldeditculminatedinthepaircollidingwithoneanotherinthebelgiangrandprixresultinginrosbergbeingreprimandedandsanctionedbytheteamthisprovedtobeaturningpointinthechampionshipashamiltonwentontodominatetheclosingracesenroutetotakinghissecondcrownnicorosbergaccusedhamiltonofseverelycompromisinghisstrategyandleavinghimunderpressure 2849\n" + ] + } + ], + "source": [ + "index_of_article = dict()\n", + "for index,s in enumerate(article_sorted):\n", + " if s[:400] in index_of_article:\n", + " print(index,s,index_of_article[s[:400]])\n", + " index_of_article[s[:400]]=index" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-28T03:25:19.661040Z", + "start_time": "2019-12-28T03:25:19.655753Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "11048" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(index_of_article)" + ] + }, + { + "cell_type": "code", + "execution_count": 375, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-24T09:20:35.311810Z", + "start_time": "2019-12-24T09:20:35.292230Z" + } + }, + "outputs": [], + "source": [ + "s_list = [\"\" for _ in range(11490)]\n", + "for i in range(11490):\n", + " index = index_of_article[article[i][:100]]\n", + " while s_list[index] != \"\":\n", + " index -= 1\n", + " if index == 11490:\n", + " index = 11489\n", + " break\n", + " s_list[index] = pred[i]" + ] + }, + { + "cell_type": "code", + "execution_count": 385, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-24T09:23:19.553322Z", + "start_time": "2019-12-24T09:23:19.542864Z" + } + }, + "outputs": [], + "source": [ + "with open(\"./result/cnndm/pred_PointerGenerator.txt\",\"w\") as f:\n", + " for s in s_list:\n", + " f.write(s)\n", + " if s[-1]!=\"\\n\":\n", + " f.write(\"\\n\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/get_datasets.py b/get_datasets.py new file mode 100644 index 0000000..b782f10 --- /dev/null +++ b/get_datasets.py @@ -0,0 +1,155 @@ +import spacy +from tqdm import tqdm +import _pickle as pickle +import params + + +class SummaryDataProcessor(): + def __init__(self, topk): + self.nlp = spacy.load("en_core_web_lg") + self.doc = "" + self.summary = "" + self.topk = topk + self.doc_sentences = [] + self.summary_sentences = [] + self.candidate = [] + self.pos2id = dict() + self.dependency2id = dict() + self.word2id = dict() + self.word_count = 0 + self.dependency_count = 0 + self.pos_count = 0 + + def update(self, doc, summary): + if "\n" in doc: + self.doc = doc.split("\n")[0] + else: + self.doc = doc + + if "\n" in summary: + self.summary = summary.split("\n")[0] + else: + self.summary = summary + + self.doc_sentences = self.doc.split(" . ")[:-1] + + if " . " in self.summary: + self.summary_sentences = self.summary.split(" . ")[:-1] + else: + self.summary_sentences = [self.summary] + + self.candidate = [] + + def jacsim(self, str1, str2): + list1 = str1.split(" ") + list2 = str2.split(" ") + unionlength = float(len(set(list1) | set(list2))) + interlength = float(len(set(list1) & set(list2))) + return float('%.3f' % (interlength / unionlength)) + + def make_k_candidate(self): + for summary_sentence in self.summary_sentences: + jacsim_list = [ + self.jacsim(sentence, summary_sentence) + for sentence in self.doc_sentences + ] + sorted_sentences = [ + item[1] + for item in sorted(zip(jacsim_list, self.doc_sentences), + reverse=True) + ] + pos_sample = sorted_sentences[0] + step = (len(jacsim_list) - 1) // (self.topk - 1) + try: + temp = sorted_sentences[1::step] # negative sample set + except ValueError: + print(step) + print(len(self.doc_sentences)) + print(len(jacsim_list)) + print(self.doc_sentences) + print(self.doc) + break + temp = temp[:self.topk - 1] # only topk-1 negative sample + temp.insert(0, pos_sample) # second is pos_sample + temp.insert(0, summary_sentence) # first is summary + self.candidate.append(temp) + + break + + def build_graph(self, index): + sample = [] + for summary_wise in self.candidate: + for sentence in summary_wise: + sample_dict = dict() + doc = self.nlp(sentence) + sample_dict['sentence'] = sentence + sample_dict['index'] = index + edges = [] + for token in doc: + if token.text not in self.word2id: + self.word2id[token.text] = self.word_count + self.word_count += 1 + if token.pos_ not in self.pos2id: + self.pos2id[token.pos_] = self.pos_count + self.pos_count += 1 + if token.dep_ not in self.dependency2id: + self.dependency2id[token.dep_] = self.dependency_count + self.dependency_count += 1 + edges.append( + tuple([ + token.text, token.i, token.pos_, token.dep_, + token.head.text, token.head.i, token.head.pos_ + ])) + sample_dict['edges'] = edges + sample.append(sample_dict) + return sample + + +if __name__ == "__main__": + sdp = SummaryDataProcessor(params.topk) + src = [] + tgt = [] + + max_count = params.train_count + result = [] + + with open("./data/cnndm/train.src", "r") as f: + for idx, line in enumerate(f): + src.append(line) + if idx == max_count: + break + + with open("./data/cnndm/train.tgt", "r") as f: + for idx, line in enumerate(f): + tgt.append(line) + if idx == max_count: + break + + save_every = params.train_count // 10 + + skip_count = 0 + for i in tqdm(range(max_count)): + sdp.update(src[i], tgt[i]) + if len(sdp.doc_sentences) < params.topk: + skip_count += 1 + continue + sdp.make_k_candidate() + graph = sdp.build_graph(i) + if len(graph) == 0: + print(sdp.candidate) + print(sdp.doc_sentences) + print(sdp.summary_sentences) + print([sdp.doc, sdp.summary]) + else: + result.append(graph) + if i % save_every == 0: + pickle.dump(result, open("./data/train.bin", "wb")) + pickle.dump(sdp.word2id, open("./data/word2id", "wb")) + pickle.dump(sdp.dependency2id, open("./data/dependency2id", "wb")) + pickle.dump(sdp.pos2id, open("./data/pos2id", "wb")) + + pickle.dump(result, open("./data/train.bin", "wb")) + pickle.dump(sdp.word2id, open("./data/word2id", "wb")) + pickle.dump(sdp.dependency2id, open("./data/dependency2id", "wb")) + pickle.dump(sdp.pos2id, open("./data/pos2id", "wb")) + print("skipped %d samples" % skip_count) diff --git a/infer.py b/infer.py new file mode 100644 index 0000000..02aff4c --- /dev/null +++ b/infer.py @@ -0,0 +1,106 @@ +import torch +import _pickle as pickle +from torch.utils.data import DataLoader +import params +from argparse import ArgumentParser +from model import SyntacticGraphNet, SyntacticGraphScoreNet +import dgl +from tqdm import tqdm + + +def collate(samples): + graph_summary, onehot_summary, graph_pos, onehot_pos, graph_neg, onehot_neg = map( + list, zip(*samples)) + + batched_graph_summary = dgl.batch(graph_summary) + batched_graph_pos = dgl.batch(graph_pos) + batched_graph_neg = dgl.batch(graph_neg) + + onehot_summary = sum(onehot_summary, []) + onehot_pos = sum(onehot_pos, []) + onehot_neg = sum(onehot_neg, []) + + return batched_graph_summary, torch.tensor( + onehot_summary), batched_graph_pos, torch.tensor( + onehot_pos), batched_graph_neg, torch.tensor(onehot_neg) + + +if __name__ == "__main__": + # parse argument + parser = ArgumentParser() + parser.add_argument( + "-d", + "--data", + help="dataset name, small|middle|large_undirected|large_directed", + default="middle") + parser.add_argument("-m", + "--model", + help="model name, embedding|score", + default="score") + + args = parser.parse_args() + dataset_path = "./data/" + args.data + ".bin" + model_name = args.model + save_name = 'model_' + model_name + "_" + args.data + + # load vocab + type2id = pickle.load(open("./data/type2id", "rb")) + + # loss function + loss_func = torch.nn.TripletMarginLoss(margin=params.loss_margin) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # create model + if model_name == "embedding": + model = SyntacticGraphNet(in_feats=params.hidden_size, + n_hidden=params.hidden_size, + n_hidden_layers=1, + vocab_size=len(type2id)).to(device) + elif model_name == "score": + model = SyntacticGraphScoreNet(in_feats=params.hidden_size, + n_hidden=params.hidden_size, + n_hidden_layers=1, + vocab_size=len(type2id)).to(device) + + # load model + model.load_state_dict(torch.load('./save_model/' + save_name + '.pkl')) + model.eval() + + # infer + with torch.no_grad(): + for epoch in range(params.bin_total): + # load data bin + train_bin = pickle.load(open(dataset_path + str(epoch), "rb")) + data_loader = DataLoader(train_bin, + batch_size=params.batch_size_infer, + shuffle=False, + collate_fn=collate, + num_workers=4) + + # infer + for iter, (gs, os, gp, op, gn, on) in tqdm(enumerate(data_loader)): + + gs.to(device) + os = os.to(device) + gp.to(device) + op = op.to(device) + gn.to(device) + on = on.to(device) + + _ = model(gs, os, gp, op, gn, on) + + graph_embedding_summary, graph_embedding_pos = model.get_graph_embedding( + ) + + concatenated = torch.cat( + [graph_embedding_summary, graph_embedding_pos], 1) + + if iter == 0 and epoch == 0: + result = concatenated + else: + result = torch.cat([result, concatenated], 0) + del (train_bin) + del (data_loader) + + print(result.size()) + pickle.dump(result, open("./clustering/" + save_name + ".pkl", "wb")) diff --git a/model.py b/model.py new file mode 100644 index 0000000..f8f911c --- /dev/null +++ b/model.py @@ -0,0 +1,216 @@ +import math +import torch +import torch.nn as nn +import dgl.function as fn +import params +import dgl + + +class GCNLayer(nn.Module): + def __init__(self, + in_feats, + out_feats, + activation, + bias=True, + batch_normalization=False): + super(GCNLayer, self).__init__() + self.weight = nn.Parameter(torch.Tensor(in_feats, out_feats)) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_feats)) + else: + self.bias = None + + if activation == "ReLU": + self.activation = nn.ReLU() + else: + self.activation = nn.LeakyReLU() + + self.reset_parameters() + + if batch_normalization: + self.bn = nn.BatchNorm1d(num_features=in_feats, affine=True) + else: + self.bn = None + + def reset_parameters(self): + stdv = 1. / math.sqrt(self.weight.size(1)) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.uniform_(-stdv, stdv) + + def forward(self, g): + + # linear + g.ndata['h'] = torch.mm(g.ndata['h'], self.weight) + + # load node feature and forward gcn with symmetric normalization based on u_mul_e + g.update_all(fn.u_mul_e('h', 'sym_norm', 'm'), fn.sum('m', 'h')) + + # bias + if self.bias is not None: + g.ndata['h'] = g.ndata['h'] + self.bias + + # batch normalization + if self.bn: + g.ndata['h'] = self.bn(g.ndata['h']) + + # activation + if self.activation: + g.ndata['h'] = self.activation(g.ndata['h']) + + return g + + +class GCN(nn.Module): + def __init__(self, in_feats, n_hidden, n_hidden_layers, activation): + super(GCN, self).__init__() + + self.layers = nn.ModuleList() + + # input layer, no dropout + self.layers.append(GCNLayer(in_feats, n_hidden, activation)) + + # hidden layers + for _ in range(n_hidden_layers): + self.layers.append(GCNLayer(n_hidden, n_hidden, activation)) + + # output layer, no activation + self.layers.append(GCNLayer(n_hidden, n_hidden, None)) + + def forward(self, g): + for layer in self.layers: + g = layer(g) + return g + + +class SyntacticGraphNet(nn.Module): + def __init__(self, + in_feats, + n_hidden, + n_hidden_layers, + vocab_size, + activation="ReLU"): + super(SyntacticGraphNet, self).__init__() + self.gcn = GCN(in_feats, n_hidden, n_hidden_layers, activation) + self.linear = self.Linear(in_feats, in_feats) + self.embedding = torch.nn.Embedding(vocab_size, in_feats) + + def Linear(self, in_features, out_features, dropout=0): + m = nn.Linear(in_features, out_features) + nn.init.normal_(m.weight, + mean=0, + std=math.sqrt((1 - dropout) / in_features)) + nn.init.constant_(m.bias, 0) + return nn.utils.weight_norm(m) + + def get_graph_embedding(self): + return self.graph_embedding_summary, self.graph_embedding_pos + + def l2_norm(self, input, axis=1): + norm = torch.norm(input, dim=axis, keepdim=True) + output = torch.div(input, norm) + return output + + def forward(self, g_summary, onehot_summary, g_pos, onehot_pos, g_neg, + onehot_neg): + # get embedding + h_summary = self.embedding(onehot_summary) + h_pos = self.embedding(onehot_pos) + h_neg = self.embedding(onehot_neg) + + # set graph feature + g_summary.ndata['h'] = h_summary + g_pos.ndata['h'] = h_pos + g_neg.ndata['h'] = h_neg + + # forward gcn + g_summary = self.gcn(g_summary) + g_pos = self.gcn(g_pos) + g_neg = self.gcn(g_neg) + + # get graph representation by mean pooling + self.graph_embedding_summary = self.linear( + dgl.mean_nodes(g_summary, 'h')) + self.graph_embedding_pos = self.linear(dgl.mean_nodes(g_pos, 'h')) + self.graph_embedding_neg = self.linear(dgl.mean_nodes(g_neg, 'h')) + + # normalize graph embedding + self.graph_embedding_summary = self.l2_norm( + self.graph_embedding_summary) + self.graph_embedding_pos = self.l2_norm(self.graph_embedding_pos) + self.graph_embedding_neg = self.l2_norm(self.graph_embedding_neg) + + return self.graph_embedding_summary, self.graph_embedding_pos, self.graph_embedding_neg + + +class SyntacticGraphScoreNet(nn.Module): + def __init__(self, + in_feats, + n_hidden, + n_hidden_layers, + vocab_size, + activation="ReLU"): + super(SyntacticGraphScoreNet, self).__init__() + self.gcn = GCN(in_feats, n_hidden, n_hidden_layers, activation) + self.linear = self.Linear(2 * in_feats, 1) + self.embedding = torch.nn.Embedding(vocab_size, in_feats) + + def Linear(self, in_features, out_features, dropout=0): + m = nn.Linear(in_features, out_features) + nn.init.normal_(m.weight, + mean=0, + std=math.sqrt((1 - dropout) / in_features)) + nn.init.constant_(m.bias, 0) + return nn.utils.weight_norm(m) + + def get_graph_embedding(self): + return self.graph_embedding_summary, self.graph_embedding_pos + + def l2_norm(self, input, axis=1): + norm = torch.norm(input, dim=axis, keepdim=True) + output = torch.div(input, norm) + return output + + def forward(self, g_summary, onehot_summary, g_pos, onehot_pos, g_neg, + onehot_neg): + # get embedding + h_summary = self.embedding(onehot_summary) + h_pos = self.embedding(onehot_pos) + h_neg = self.embedding(onehot_neg) + + # set graph feature + g_summary.ndata['h'] = h_summary + g_pos.ndata['h'] = h_pos + g_neg.ndata['h'] = h_neg + + # forward gcn + g_summary = self.gcn(g_summary) + g_pos = self.gcn(g_pos) + g_neg = self.gcn(g_neg) + + # get graph representation by mean pooling + self.graph_embedding_summary = dgl.mean_nodes(g_summary, 'h') + self.graph_embedding_pos = dgl.mean_nodes(g_pos, 'h') + self.graph_embedding_neg = dgl.mean_nodes(g_neg, 'h') + + # normalize graph embedding + self.graph_embedding_summary = self.l2_norm( + self.graph_embedding_summary) + self.graph_embedding_pos = self.l2_norm(self.graph_embedding_pos) + self.graph_embedding_neg = self.l2_norm(self.graph_embedding_neg) + + # score the relation using linear + score_sum_pos = torch.sigmoid( + self.linear( + torch.cat( + (self.graph_embedding_summary, self.graph_embedding_pos), + dim=1))) + score_sum_neg = torch.sigmoid( + self.linear( + torch.cat( + (self.graph_embedding_summary, self.graph_embedding_neg), + dim=1))) + score_sum_pos = torch.mean(score_sum_pos) + score_sum_neg = torch.mean(score_sum_neg) + + return score_sum_pos, score_sum_neg diff --git a/params.py b/params.py new file mode 100644 index 0000000..da6d783 --- /dev/null +++ b/params.py @@ -0,0 +1,11 @@ +hidden_size = 256 +topk = 4 +train_count = 287220 +batch_size = 2048 +epoches = 240 +loss_margin = 1.0 +loss_margin_score = 0.5 +print_every = 20 +batch_size_infer = 512 +bin_size = 70000 +bin_total = 4 \ No newline at end of file diff --git a/process_datasets.py b/process_datasets.py new file mode 100644 index 0000000..a155594 --- /dev/null +++ b/process_datasets.py @@ -0,0 +1,225 @@ +import dgl +import _pickle as pickle +import torch +import params +import networkx as nx +import matplotlib.pyplot as plt +import random +from tqdm import tqdm +from argparse import ArgumentParser + + +def show_statistic(g): + print("nodes: %d" % (g.number_of_nodes())) + print("edges: %d" % (g.number_of_edges())) + print(g.ndata['norm']) + return + + +def create_type2id(dependency2id, pos2id): + '''combine dependecy2id and pos2id to type2id + ''' + + type2id = dict() + count = 0 + for dep in dependency2id: + type2id[dep] = count + count += 1 + + for pos in pos2id: + type2id[pos] = count + count += 1 + + return type2id + + +def extract_bin(atom, + sth2id, + graph, + name, + heterogeneous=False, + visualize=False, + save_gml=False): + ''' parse data in each atom of train/valid/test.bin and create dgl_graph + each sample contains topk + 1 atom which are[summary,pos,neg_1,...,neg_topk-1] + each atom is a dictionary : {'sentence':str,'edges':list of tuple(src_word,src_id, src_pos,dep,tgt_word,tgt_id, tgt_pos)} + ''' + if heterogeneous: + return None + else: + g = dgl.DGLGraph() + + # we treat dep as a node but different dependency relations with same dep type are treated as one node + word_count = len(atom['edges']) + dep_set = set([x[3] for x in atom['edges']]) + dep_count = len(dep_set) + g.add_nodes(word_count + dep_count) + onehot = [-1 for _ in range(word_count + dep_count)] + + # transform word and dep into nodes id + # add dep node onehot + dep2node = dict() + count = word_count + for dep in dep_set: + dep2node[dep] = count + onehot[count] = sth2id[dep] + count += 1 + + # add edges and word node onehot + edge_list = [] + if count == word_count + dep_count: + for idx, edge in enumerate(atom['edges']): + # add src -> dep + edge_list.append(tuple([edge[1], dep2node[edge[3]]])) + # add dep -> tgt + edge_list.append(tuple([dep2node[edge[3]], edge[5]])) + # add attribute (word pos) + onehot[idx] = sth2id[edge[2]] + else: + print("wrong") + print(count, word_count, dep_count) + print(atom['sentence']) + + # save gml graph for better visualization + if save_gml: + print(atom['index']) + print(atom['sentence']) + if atom['index'] == 9: + G_gml = nx.Graph() + for idx, edge in enumerate(atom['edges']): + # add src -> dep + G_gml.add_edge(edge[2] + '_' + str(edge[1]), edge[3]) + # add dep -> tgt + G_gml.add_edge(edge[3], edge[6] + '_' + str(edge[5])) + nx.write_gml(G_gml, + './G_' + name + "_" + str(atom['index']) + ".gml") + + # add edges into DGL graph + # double direction? + src, dst = tuple(zip(*edge_list)) + g.add_edges(src, dst) + if graph == "undirected": + g.add_edges(dst, src) + + # add norm for all nodes + unnormed = g.in_degrees(g.nodes()) + g.out_degrees(g.nodes()) + g.ndata['norm'] = torch.sqrt(unnormed.float()) + + # add symmetric norm value on edge + for i in range(g.number_of_edges()): + src, tgt = g.find_edges(i) + g.edges[i].data['sym_norm'] = 1.0 / \ + (g.nodes[src].data['norm'] * g.nodes[tgt].data['norm']) + + if visualize: + # visualize graph + id2sth = {v: k for k, v in sth2id.items()} + labels = dict() + for idx, i in enumerate(onehot): + labels[idx] = id2sth[i] + + nx_G = g.to_networkx() + # pos = nx.kamada_kawai_layout(nx_G) + pos = nx.nx_agraph.graphviz_layout(nx_G, prog='dot') + nx.draw(nx_G, + pos, + with_labels=True, + labels=labels, + node_size=800, + node_color=[[.7, .7, .7]], + arrowsize=5) + plt.show() + + return g, onehot + + +def build_homogeneous(train_bin, graph): + ''' only use pos and dependency as feature of nodes, all nodes share the same type + ''' + # get id + dependency2id = pickle.load(open("./data/dependency2id", "rb")) + pos2id = pickle.load(open("./data/pos2id", "rb")) + type2id = create_type2id(dependency2id, pos2id) + pickle.dump(type2id, open("./data/type2id", "wb")) + + # prepare processed sample lists + result_list = [] + sentence_pair_list = [] + id_list = [] + + count = 0 + bin_num = 0 + + if graph == "directed": + prefix = "./data/large_directed" + elif graph == "undirected": + prefix = "./data/large_undirected" + + for sample in tqdm(train_bin): + try: + summary_graph, summary_onehot = extract_bin(sample[0], + type2id, + graph, + "gold", + save_gml=True) + except IndexError: + print(sample) + exit + + s = input() + + pos_graph, pos_onehot = extract_bin(sample[1], + type2id, + graph, + "pos", + save_gml=True) + + rand_choose = random.randint(2, params.topk) + neg_graph, neg_onehot = extract_bin(sample[rand_choose], + type2id, + graph, + "neg", + save_gml=True) + + temp = tuple([ + summary_graph, summary_onehot, pos_graph, pos_onehot, neg_graph, + neg_onehot + ]) + result_list.append(temp) + sentence_pair_list.append( + tuple([sample[0]['sentence'], sample[1]['sentence']])) + id_list.append(sample[0]['index']) + + count += 1 + + if count % params.bin_size == 0: + pickle.dump(result_list, open(prefix + ".bin" + str(bin_num), + "wb")) + pickle.dump( + sentence_pair_list, + open(prefix + "sentence_pair" + ".bin" + str(bin_num), "wb")) + pickle.dump(id_list, + open(prefix + "id_list" + ".bin" + str(bin_num), "wb")) + del (result_list) + del (sentence_pair_list) + del (id_list) + result_list = [] + sentence_pair_list = [] + id_list = [] + bin_num += 1 + + +if __name__ == "__main__": + # parse argument + parser = ArgumentParser() + parser.add_argument("-g", + "--graph", + help="graph type, undirected|directed", + default="undirected") + + args = parser.parse_args() + graph = args.graph + + train_bin = pickle.load(open("./data/train.bin", "rb")) + print("train_bin loaded") + build_homogeneous(train_bin, graph) diff --git a/train.py b/train.py new file mode 100644 index 0000000..d37669f --- /dev/null +++ b/train.py @@ -0,0 +1,289 @@ +import torch +import _pickle as pickle +import dgl +from torch.utils.data import DataLoader +import params +import torch.optim as optim +from model import SyntacticGraphNet, SyntacticGraphScoreNet +# from matplotlib import pyplot as plt +from tqdm import tqdm +from visdom import Visdom +from argparse import ArgumentParser + + +def collate(samples): + graph_summary, onehot_summary, graph_pos, onehot_pos, graph_neg, onehot_neg = map( + list, zip(*samples)) + + batched_graph_summary = dgl.batch(graph_summary) + batched_graph_pos = dgl.batch(graph_pos) + batched_graph_neg = dgl.batch(graph_neg) + + onehot_summary = sum(onehot_summary, []) + onehot_pos = sum(onehot_pos, []) + onehot_neg = sum(onehot_neg, []) + + return batched_graph_summary, torch.tensor( + onehot_summary), batched_graph_pos, torch.tensor( + onehot_pos), batched_graph_neg, torch.tensor(onehot_neg) + + +def model_summary(model): + print(model) + print(sum(p.numel() for p in model.parameters() if p.requires_grad)) + + +def margin_triplet_score_loss(score_pos, score_neg, margin): + return max(0, score_neg - score_pos + margin) + + +def load_data(dataset_path, bin_num): + # load data + train_bin = pickle.load(open(dataset_path + str(bin_num), "rb")) + viz.text(dataset_path + str(bin_num) + " loaded", win='log', append=True) + data_loader = DataLoader(train_bin, + batch_size=params.batch_size, + shuffle=True, + collate_fn=collate) + return train_bin, data_loader + + +if __name__ == "__main__": + # parse argument + parser = ArgumentParser() + parser.add_argument( + "-d", + "--data", + help="dataset name, small|middle|large_undirected|large_directed", + default="middle") + parser.add_argument("-m", + "--model", + help="model name, embedding|score", + default="score") + + args = parser.parse_args() + dataset_path = "./data/" + args.data + ".bin" + model_name = args.model + save_name = 'model_' + model_name + "_" + args.data + + # visualize loss + viz = Visdom(env=save_name) + opts_loss = { + 'title': save_name, + 'xlabel': 'every batch', + 'ylabel': 'Loss', + 'showlegend': 'true' + } + opts_dis_sim = { + 'title': 'Similarity Distance', + 'xlabel': 'every batch', + 'ylabel': 'Distance', + 'showlegend': 'true' + } + opts_dis_score = { + 'title': 'Pos/Neg Diff Score', + 'xlabel': 'every batch', + 'ylabel': 'Score Diff', + 'showlegend': 'true' + } + opts_dis_embed = { + 'title': 'Embedding Distance', + 'xlabel': 'every batch', + 'ylabel': 'Distance', + 'showlegend': 'true' + } + + type2id = pickle.load(open("./data/type2id", "rb")) + viz.text("vocab loaded", win='log', append=False) + + loss_func = torch.nn.TripletMarginLoss(margin=params.loss_margin) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # create model + if model_name == "embedding": + model = SyntacticGraphNet(in_feats=params.hidden_size, + n_hidden=params.hidden_size, + n_hidden_layers=1, + vocab_size=len(type2id)).to(device) + elif model_name == "score": + model = SyntacticGraphScoreNet(in_feats=params.hidden_size, + n_hidden=params.hidden_size, + n_hidden_layers=1, + vocab_size=len(type2id)).to(device) + + optimizer = optim.Adam(model.parameters(), lr=0.001) + model.train() + step = 0 + loss_save = [] + dis_sim_save = [] + dis_embed_save = [] + + if model_name == "embedding": + for epoch in range(params.epoches): + bin_num = epoch % params.bin_total + if epoch % 20 == 0: + train_bin, data_loader = load_data(dataset_path, bin_num) + for iter, (gs, os, gp, op, gn, on) in tqdm(enumerate(data_loader)): + update = 'append' if step > 1 else None + + gs.to(device) + os = os.to(device) + gp.to(device) + op = op.to(device) + gn.to(device) + on = on.to(device) + + graph_embedding_summary, graph_embedding_pos, graph_embedding_neg = model( + gs, os, gp, op, gn, on) + + loss = loss_func(graph_embedding_summary, graph_embedding_pos, + graph_embedding_neg) + + optimizer.zero_grad() + try: + loss.backward() + except AttributeError: + print(loss) + continue + optimizer.step() + + step += 1 + + loss_value = loss.detach().item() + + sim_sum_p = torch.norm(graph_embedding_summary - + graph_embedding_pos, + dim=1, + out=None, + keepdim=False) + sim_sum_n = torch.norm(graph_embedding_summary - + graph_embedding_neg, + dim=1, + out=None, + keepdim=False) + + dis_sim = torch.mean(sim_sum_n - sim_sum_p, + dim=0).detach().item() + + raw_embedding_summary, raw_embedding_pos = model.get_graph_embedding( + ) + dis_embedding = torch.norm(raw_embedding_summary - + raw_embedding_pos, + dim=1, + out=None, + keepdim=False) + dis_embedding_mean = torch.mean(dis_embedding, + dim=0).detach().item() + + loss_save.append(loss_value) + dis_sim_save.append(dis_sim) + dis_embed_save.append(dis_embedding_mean) + + if step % params.print_every == 0: + viz.text('step {}, loss {:.4f}'.format( + step, + loss.detach().item()), + win='log', + append=True) + viz.line(X=torch.FloatTensor([step]), + Y=torch.FloatTensor([loss_value]), + win='loss', + update=update, + opts=opts_loss, + name='train') + + viz.line(X=torch.FloatTensor([step]), + Y=torch.FloatTensor([dis_sim]), + win='dis_sim', + update=update, + opts=opts_dis_sim, + name='train') + + viz.line(X=torch.FloatTensor([step]), + Y=torch.FloatTensor([dis_embedding_mean]), + win='dis_embedding', + update=update, + opts=opts_dis_embed, + name='train') + elif model_name == "score": + for epoch in range(params.epoches): + bin_num = epoch % params.bin_total + if epoch % 20 == 0: + train_bin, data_loader = load_data(dataset_path, bin_num) + for iter, (gs, os, gp, op, gn, on) in tqdm(enumerate(data_loader)): + update = 'append' if step > 1 else None + + gs.to(device) + os = os.to(device) + gp.to(device) + op = op.to(device) + gn.to(device) + on = on.to(device) + + score_sum_pos, score_sum_neg = model(gs, os, gp, op, gn, on) + + loss = margin_triplet_score_loss(score_sum_pos, score_sum_neg, + params.loss_margin_score) + + optimizer.zero_grad() + try: + loss.backward() + except AttributeError: + print(loss) + continue + optimizer.step() + + step += 1 + + loss_value = loss.detach().item() + + dis_score = score_sum_pos.detach().item( + ) - score_sum_neg.detach().item() + + raw_embedding_summary, raw_embedding_pos = model.get_graph_embedding( + ) + dis_embedding = torch.norm(raw_embedding_summary - + raw_embedding_pos, + dim=1, + out=None, + keepdim=False) + dis_embedding_mean = torch.mean(dis_embedding, + dim=0).detach().item() + + if step % params.print_every == 0: + viz.text('step {}, loss {:.4f}'.format( + step, + loss.detach().item()), + win='log', + append=True) + viz.line(X=torch.FloatTensor([step]), + Y=torch.FloatTensor([loss_value]), + win='loss', + update=update, + opts=opts_loss, + name='train') + + viz.line(X=torch.FloatTensor([step]), + Y=torch.FloatTensor([dis_score]), + win='dis_score', + update=update, + opts=opts_dis_score, + name='train') + + viz.line(X=torch.FloatTensor([step]), + Y=torch.FloatTensor([dis_embedding_mean]), + win='dis_embedding', + update=update, + opts=opts_dis_embed, + name='train') + + loss_save.append(loss_value) + dis_sim_save.append(dis_score) + dis_embed_save.append(dis_embedding_mean) + + torch.save(model.state_dict(), './save_model/' + save_name + '.pkl') + pickle.dump(loss_save, open("./record/loss_" + save_name + '.pkl', "wb")) + pickle.dump(dis_sim_save, + open("./record/dis_sim_" + save_name + '.pkl', "wb")) + pickle.dump(dis_embed_save, + open("./record/dis_embed_" + save_name + '.pkl', "wb"))