-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtextos.py
94 lines (81 loc) · 2.5 KB
/
textos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# -*- encodig utf-8 -*-
import feedparser
import time
from subprocess import check_output
import sys
feed_name="Data Science"
urlrss='https://news.google.com/news/rss/search/section/q/data%20science/data%20science?hl=pt-BR&ned=pt-BR_br'
db = "/tmp/arquivo.txt"
limit = 12 * 3600 * 1000
#
# function to get the current time
#
current_time_millis = lambda: int(round(time.time() * 1000))
current_timestamp = current_time_millis()
def post_is_in_db(title):
with open(db, 'r') as database:
for line in database:
if title in line:
return True
return False
# return true if the title is in the database with a timestamp > limit
def post_is_in_db(title):
with open(db, 'r') as database:
for line in database:
if title in line:
return True
return False
def get():
current_time_millis = lambda: int(round(time.time() * 1000))
current_timestamp = current_time_millis()
#
# get the feed data from the url
#
feed = feedparser.parse(urlrss)
#
# figure out which posts to print
#
posts_to_print = []
posts_to_skip = []
for post in feed.entries:
# if post is already in the database, skip it
# TODO check the time
title = post.title #+ '|' + post.links[0].href
if post_is_in_db(title):
posts_to_skip.append(title)
else:
posts_to_print.append(title)
#
# add all the posts we're going to print to the database with the current timestamp
# (but only if they're not already in there)
#
f = open(db, 'a')
for title in posts_to_print:
if not post_is_in_db(title):
f.write(title + "|" + str(current_timestamp) + "\n")
f.close
#
# output all of the new posts
#
count = 1
blockcount = 1
for title in posts_to_print:
if count % 5 == 1:
print("\n" + time.strftime("%a, %b %d %I:%M %p") + ' ((( ' + feed_name + ' - ' + str(blockcount) + ' )))')
print("-----------------------------------------\n")
blockcount += 1
print(title + "\n")
count += 1
def getNoticia():
#//get()
feed = feedparser.parse(urlrss)
for post in feed.entries:
title = post.title
if post_is_in_db(title):
pass
else:
with open(db, 'a') as f:
f.write(title + "|" + str(current_timestamp) + "\n")
return (title,post.links[0].href)
if __name__ == "__main__":
print(getNoticia())