Skip to content

Commit

Permalink
Updated
Browse files Browse the repository at this point in the history
  • Loading branch information
rks-ds authored Feb 20, 2020
1 parent 84b141c commit 74ec4a7
Show file tree
Hide file tree
Showing 3 changed files with 483 additions and 82 deletions.
86 changes: 4 additions & 82 deletions RealorFake/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,88 +11,10 @@
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
stop=set(stopwords.words('english'))
from package.Clean_data import PreProcessTweets

app = Flask(__name__)

#Text Cleaning
class PreProcessTweets:
def __init__(self):
self.stop = set(stopwords.words('english'))
self.special = string.punctuation

def _removestopwords(self, text):
if text is not None:
tokens = [x for x in word_tokenize(text) if x not in self.stop]
return " ".join(tokens)
else:
return None

def _removeURL(self, text):
return re.sub(r'http\S+', '', text)

def _removehtml(self, text):
html=re.compile(r'<.*?>')
return html.sub(r'',text)

def _removeemoji(self, text):
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
u"\U00002702-\U000027B0"
u"\U000024C2-\U0001F251"
"]+", flags=re.UNICODE)
return emoji_pattern.sub(r'', text)

def _removepunct(self, text):
table=str.maketrans('','',self.special)
return text.translate(table)

def _removenum(self, text):
return re.sub(r'\b[0-9]+\b', '', text)

def _removesmallwords(self, text):
shortword = re.compile(r'\W*\b\w{1,2}\b')
return shortword.sub('', text)

def clean_tweet(self, tweet):
tweet=tweet.lower()
tweet=self._removeURL(tweet)
tweet=self._removehtml(tweet)
tweet=self._removeemoji(tweet)
tweet=self._removepunct(tweet)
tweet=self._removestopwords(tweet)
tweet=self._removenum(tweet)
tweet=self._removesmallwords(tweet)
tweet=tweet.replace('\s+', ' ')
return tweet

def clean_keywords(self, keyword):
keyword=keyword.lower()
keyword=self._removeemoji(keyword)
keyword=keyword.replace('%20', ' ')
keyword=keyword.replace('\s+', ' ')
return keyword

#feature creation
class CreateFeature:
def __init__(self):
self.Preprocessor=PreProcessTweets()
self.ps = PorterStemmer()

def avg_word_len(self, tweet):
tweet=self.Preprocessor.clean_tweet(tweet)
return np.average([len(i) for i in tweet.split()])

def num_of_links(self, tweet):
return len(re.findall(r"http", tweet))

def kw_weight(self, keyword):
keyword=self.Preprocessor.clean_keywords(keyword)
key_dict=pickle.load(open("keyword_dict.pkl", 'rb'))
stem_key=self.ps.stem(keyword.strip())
return key_dict[stem_key] if stem_key in list(key_dict.keys()) else 0
app = Flask(__name__)

@app.route('/')
def home():
Expand All @@ -101,8 +23,8 @@ def home():
@app.route('/predict',methods=['POST'])
def predict():

loaded_model = pickle.load(open("base_model3.pkl", 'rb'))
cf=CreateFeature()
loaded_model = pickle.load(open("base_model5.pkl", 'rb'))
cf=PreProcessTweets()

if request.method == 'POST':
message = request.form['message']
Expand Down
Binary file added RealorFake/base_model5.pkl
Binary file not shown.
Loading

0 comments on commit 74ec4a7

Please sign in to comment.