-
Notifications
You must be signed in to change notification settings - Fork 490
/
Copy pathcreate_word_cloud.py
100 lines (95 loc) · 2.59 KB
/
create_word_cloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import argparse
import numpy as np
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt
my_parser = argparse.ArgumentParser()
my_parser.add_argument(
"--text",
action="store",
type=str,
required=True,
help="Enter the path to the any file",
)
my_parser.add_argument(
"--background",
action="store",
type=str,
required=False,
help="Enter a background color of your choice",
)
my_parser.add_argument(
"--mask",
action="store",
type=str,
required=False,
help="Enter the path to a mask of your choice",
)
my_parser.add_argument(
"--contour_width",
action="store",
type=int,
required=False,
help="Enter the width of the contour you prefer",
)
my_parser.add_argument(
"--contour_color",
action="store",
type=str,
required=False,
help="Enter the color of the contour you prefer",
)
my_parser.add_argument(
"--color_func",
action="store",
type=bool,
required=False,
help="Do you want the color of the mask intact?",
)
def createWordCloud(
text, background_color, mask, contour_width, contour_color, color_func
):
print(text, background_color, mask, contour_color, color_func)
name = text.split(".")[0]
ext = text.split(".")[1]
if ext == "txt" or ext == "doc" or ext == "pdf":
text = open(text).read()
elif ext == "csv":
df = pd.read_csv(text, encoding="latin-1")
text = ""
stopwords = set(STOPWORDS)
for val in df.CONTENT:
val = str(val)
tokens = val.split()
for tok in range(len(tokens)):
tokens[i] = tokens[i].lower()
text = " ".join(tokens) + " "
mask = np.array(Image.open(mask))
if color_func is not None:
mask_colors = ImageColorGenerator(mask)
else:
mask_colors = None
wordcloud = WordCloud(
stopwords=STOPWORDS,
mask=mask,
max_font_size=50,
max_words=1000,
background_color=background_color,
color_func=mask_colors,
contour_width=contour_width,
contour_color=contour_color,
).generate(text)
plt.figure()
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()
wordcloud.to_file(name + ".png")
args = my_parser.parse_args()
text = args.text
background_color = args.background
mask = args.mask
contour_width = args.contour_width
contour_color = args.contour_color
color_func = args.color_func
createWordCloud(text, background_color, mask, contour_width, contour_color, color_func)