Skip to content


Restructured DPhate as a class, added usage in readme, added some pro…
Browse files Browse the repository at this point in the history
  • Loading branch information
DrejcPesjak committed Apr 7, 2022
1 parent e1ddffc commit 894a280
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 166 deletions.
284 changes: 167 additions & 117 deletions
Original file line number Diff line number Diff line change
@@ -1,124 +1,174 @@

''' *************************DPhate algorithm**************************** '''

import warnings
import numpy as np
import json
import torch
import re
def decontracted(phrase):
# specific
phrase = re.sub(r"won\'t", "will not", phrase)
phrase = re.sub(r"can\'t", "can not", phrase)
# general
phrase = re.sub(r"n\'t", " not", phrase)
phrase = re.sub(r"\'re", " are", phrase)
phrase = re.sub(r"\'s", " is", phrase)
phrase = re.sub(r"\'d", " would", phrase)
phrase = re.sub(r"\'ll", " will", phrase)
phrase = re.sub(r"\'t", " not", phrase)
phrase = re.sub(r"\'ve", " have", phrase)
phrase = re.sub(r"\'m", " am", phrase)
return phrase

from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from collections import deque
from detoxify import Detoxify
from nltk.tokenize import RegexpTokenizer
tokenizer = RegexpTokenizer(r'\w+')
from nltk import pos_tag
from better_profanity import profanity

def paraphrase_toxic(phrase,x,y,z,t):
pra = get_response(phrase, num_return_sequences=int(x), num_beams=int(y), groups=int(z), diversityP=t)
results = modelD.predict(pra)
tox = np.array(results['toxicity'])
ix = np.where(tox < 0.5 )[0]
return pra,tox,ix

def similar(pra,ix, phraseO,simStep):
praGood = list(np.array(pra)[ix])
sim = similarity(phraseO, praGood)[0]
threshold = 0.57 + 0.1*(3-simStep)
if len(phraseO.split()) <= 4:
threshold = 0.9
print(threshold, sim)
ixSim = np.where(sim>threshold)[0]
return praGood,ixSim

def post_processing(plist):
post = [s for s in plist if not(("NationMaster" in s) or ("888-" in s) or ("800-" in s)) and not(s.isupper())]
return post

def delete_vulgar_adj(h):
h = h.replace("’","'").replace("' ","'")
dph = decontracted(h)
## pos tagging:
splt = tokenizer.tokenize(dph)
tagged = pos_tag(splt)
## censored:
p2 = profanity.censor(dph)
p2token = tokenizer.tokenize(p2)
## get censored words:
intersect = list(set(splt)-set(p2token))
new_sent = ""
for e in tagged:
tag = e[1]
word = e[0]
if word not in intersect:
new_sent+=(word+" ")
if tag.startswith('JJ') or tag.startswith('RB'): #or tag == 'FW'
new_sent+=(word+" ")
return new_sent

hate = np.loadtxt('hate.txt', dtype='str' , delimiter="\n")
div = np.loadtxt('div.txt', dtype='str' , delimiter="\n")
div = div.astype(int)
#for i in range(len(hate)):
for i in range(len(div)):
print(100*'*', i)
x,y,z,t = values[div[i]]
phraseO = hate[i]
phrase = delete_vulgar_adj(phraseO)
pra,tox,ix = paraphrase_toxic(phrase,x,y,z,t)
if len(ix) > 0:
print("first: ix > 0")
praGood,ixSim = similar(pra,ix,phraseO, div[i])
if len(ixSim) > 0:
print("first: ixSim > 0")
plist = np.array(praGood)[ixSim]
post = post_processing(plist)
if len(post)>0:
if i%20==0:
fname = "dataset3/data" + str(i) + ".json"
with open(fname,'w') as fp:

class DPhate:
def __init__(self):
model_name = 'tuner007/pegasus_paraphrase'
self.torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
self.tokenizerP = PegasusTokenizer.from_pretrained(model_name)
self.modelP = PegasusForConditionalGeneration.from_pretrained(model_name).to(self.torch_device)

self.model = SentenceTransformer('bert-base-nli-mean-tokens')

self.modelD = Detoxify('original', device=self.torch_device)

self.values= [[ 20., 100., 25., 1.],
[ 30., 100., 50., 3.],
[ 40., 100., 50., 2.],
[ 40., 300., 150., 2.]]

self.tokenizer = RegexpTokenizer(r'\w+')

def get_response(self,input_text,num_return_sequences=20,num_beams=100, groups=25, diversityP=1.0):
batch = self.tokenizerP([input_text],truncation=True,padding='longest', return_tensors="pt").to(self.torch_device)
translated = self.modelP.generate(**batch,
tgt_text = self.tokenizerP.batch_decode(translated, skip_special_tokens=True)
return tgt_text

def similarity(self,base, phrases):
sentences = deque(phrases)
sentences = list(sentences)
sentence_embeddings = self.model.encode(sentences)
sim = cosine_similarity([sentence_embeddings[0]],sentence_embeddings[1:])
return sim

def decontracted(self,phrase):
# specific
phrase = re.sub(r"won\'t", "will not", phrase)
phrase = re.sub(r"can\'t", "can not", phrase)
# general
phrase = re.sub(r"n\'t", " not", phrase)
phrase = re.sub(r"\'re", " are", phrase)
phrase = re.sub(r"\'s", " is", phrase)
phrase = re.sub(r"\'d", " would", phrase)
phrase = re.sub(r"\'ll", " will", phrase)
phrase = re.sub(r"\'t", " not", phrase)
phrase = re.sub(r"\'ve", " have", phrase)
phrase = re.sub(r"\'m", " am", phrase)
return phrase

def paraphrase_toxic(self,phrase,x,y,z,t):
pra = self.get_response(phrase, num_return_sequences=int(x), num_beams=int(y), groups=int(z), diversityP=t)
results = self.modelD.predict(pra)
tox = np.array(results['toxicity'])
ix = np.where(tox < 0.5 )[0]
return pra,tox,ix

def similar(self,pra,ix, phraseO,simStep):
praGood = list(np.array(pra)[ix])
sim = self.similarity(phraseO, praGood)[0]
threshold = 0.57 + 0.1*(3-simStep)
if len(phraseO.split()) <= 4:
threshold = 0.9
print(threshold, sim)
ixSim = np.where(sim>threshold)[0]
return praGood,ixSim

def post_processing(self,plist):
post = [s for s in plist if not(("NationMaster" in s) or ("888-" in s) or ("800-" in s)) and not(s.isupper())]
return post

def delete_vulgar_adj(self,h):
h = h.replace("’","'").replace("' ","'")
dph = self.decontracted(h)
## pos tagging:
splt = self.tokenizer.tokenize(dph)
tagged = pos_tag(splt)
## censored:
p2 = profanity.censor(dph)
p2token = self.tokenizer.tokenize(p2)
## get censored words:
intersect = list(set(splt)-set(p2token))
new_sent = ""
for e in tagged:
tag = e[1]
word = e[0]
if word not in intersect:
new_sent+=(word+" ")
print("going second paraphrase")
#phraseO = phrase
sim = similarity(phraseO, pra)[0]
#cond = np.where(tox>0.5) and np.where(sim>0.57)
cond = list(set(np.where(tox>0.5)[0]).intersection(set(np.where(sim>0.57)[0])))
if len(cond)==0:
phrase = pra[np.argmin(tox[cond])]
#phrase = pra[np.argmin(tox[np.where(tox>0.5)])]
pra,tox,ix = paraphrase_toxic(phrase,x,y,z,t)
if len(ix) > 0:
print("second: ix > 0")
praGood,ixSim = similar(pra,ix,phraseO,div[i])
if len(ixSim) > 0:
print("second: ixSim > 0")
plist = np.array(praGood)[ixSim]
post = post_processing(plist)
if len(post)>0:
if i%20==0:
fname = "dataset3/data" + str(i) + ".json"
with open(fname,'w') as fp:
if tag.startswith('JJ') or tag.startswith('RB'): #or tag == 'FW'
new_sent+=(word+" ")
return new_sent

def predict(self, text, toxCategory):

x,y,z,t = self.values[toxCategory]

newText = self.delete_vulgar_adj(text)
paraList,toxList,ix = self.paraphrase_toxic(newText,x,y,z,t)
if len(ix) > 0:
print("first: ix > 0")
simList,ixSim = self.similar(paraList,ix,text,toxCategory)
if len(ixSim) > 0:
print("first: ixSim > 0")
simNonToxList = np.array(simList)[ixSim]
post = self.post_processing(simNonToxList)
if len(post)>0:
return post

print("going second paraphrase")
simList = self.similarity(text, paraList)[0]
cond = list(set(np.where(toxList>0.5)[0]).intersection(set(np.where(simList>0.57)[0])))
if len(cond)==0:
return [];
minTox = paraList[np.argmin(toxList[cond])]

paraList,toxList,ix = self.paraphrase_toxic(minTox,x,y,z,t)
if len(ix) > 0:
print("second: ix > 0")
simList,ixSim = self.similar(paraList,ix,text,toxCategory)
if len(ixSim) > 0:
print("second: ixSim > 0")
simNonToxList = np.array(simList)[ixSim]
post = self.post_processing(simNonToxList)
if len(post)>0:
return post

return [];

if __name__ == "__main__":
hate = np.loadtxt('data-generated/hate.txt', dtype='str' , delimiter="\n")
div = np.loadtxt('data-generated/div.txt', dtype='str' , delimiter="\n")
div = div.astype(int)
dphate = DPhate()
for i in range(len(div)):
data[hate[i]] = dphate.predict(hate[i],div[i])
if i%10==0:
fname = "dataset3/data" + str(i) + ".json"
with open(fname,'w') as fp:
21 changes: 19 additions & 2 deletions
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
# DPhate-double-paraphrasing-hate-speech
Bachelor's thesis on removing hate from online comments using paraphrasing: algorithm DPhate
Bachelor's thesis on removing hate from online comments using paraphrasing: algorithm DPhate.

Fine-tuned T5 models are too big for GitHub and can be downloaded [here]( It is a 2.3GB zip file, which contains 3 different T5 models.

## Usage
To recreate the data generated in the research paper (also available [here](data-generated/data3570.json)), where the input are hateful sentences from the Hatexplain dataset, use:

To test the algorithm on your own examples use the followoing python code:
from DPhate import DPhate
dphate = DPhate()
phrase = "I fucking love your mother."
toxicity = dphate.modelD.predict(phrase)['toxicity']
toxCategory = int((toxicity-0.5)//0.125)

Fine-tuned T5 models are too big for GitHub and can be downloaded [here]( It is a 2.3GB zip file, which contains 3 different T5 models.
51 changes: 4 additions & 47 deletions
Original file line number Diff line number Diff line change
@@ -1,23 +1,8 @@
import warnings

import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
model_name = 'tuner007/pegasus_paraphrase'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizerP = PegasusTokenizer.from_pretrained(model_name)
modelP = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
from detoxify import Detoxify
modelD = Detoxify('original')

def get_response(input_text,num_return_sequences=20,num_beams=100, groups=25, diversityP=1.0):
batch = tokenizerP([input_text],truncation=True,padding='longest', return_tensors="pt").to(torch_device)
translated = modelP.generate(**batch,
tgt_text = tokenizerP.batch_decode(translated, skip_special_tokens=True)
return tgt_text
from datasets import load_dataset
dataset = load_dataset("hatexplain")

def print_list(str_list):
if len(str_list)==0:
Expand All @@ -26,34 +11,6 @@ def print_list(str_list):
print('> ', end='')

from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
model = SentenceTransformer('bert-base-nli-mean-tokens')
from collections import deque

from detoxify import Detoxify
modelD = Detoxify('original')
#modelD = Detoxify('original', device='cuda')

import numpy as np
import json

values= [[ 20., 100., 25., 1.],
[ 30., 100., 50., 3.],
[ 40., 100., 50., 2.],
[ 40., 300., 150., 2.]]

def similarity(base, phrases):
sentences = deque(phrases)
sentences = list(sentences)
sentence_embeddings = model.encode(sentences)
sim = cosine_similarity([sentence_embeddings[0]],sentence_embeddings[1:])
return sim

from datasets import load_dataset
dataset = load_dataset("hatexplain")

def majority(lst):
Expand Down
1 change: 1 addition & 0 deletions thoughts_and_ideas.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ questionable practices:
. why did you use specifically these algorithms - actually test them, objectively - not just because i feel like they are ok
. why spliting on 3 when analysing microworkers data
. toxCategory - do it better than just a linear uniform split - logical conclusion: more hateful comment must be more changed
. in preprocessing add a random seed - for repeatable results

Expand Down

0 comments on commit 894a280

Please sign in to comment.