Skip to content

Commit

Permalink
Merge pull request #695 from ibra-kdbra/ibra-kdbra
Browse files Browse the repository at this point in the history
Big Comeback with load of script.
  • Loading branch information
Mrinank-Bhowmick authored Feb 11, 2024
2 parents 650f1aa + f71f185 commit 25b323d
Show file tree
Hide file tree
Showing 34 changed files with 808 additions and 0 deletions.
123 changes: 123 additions & 0 deletions projects/Comics_Scraper/comicXCD_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#! python3
import requests, os, bs4, re

url = 'https://xkcd.com'
#create a directory to store all the comics
os.makedirs('xkcd', exist_ok=True)


def imgdownloader(url):
while not url.endswith('#'):
#print out the current page
res = requests.get(url)
res.raise_for_status() #returns None as the request received is 200 which is fine, if received status is 404 there is an exception for bad request
soup = bs4.BeautifulSoup(res.text,"lxml") #r.text is the content of the response in unicode, and r.content is the content of the response in bytes.
#find the comic image on the current page
comic = soup.select('#comic img') #finds tag with comic and its sub tag img
#print(comic)
if comic == []:
#the page did not contaib a comic.. move on
print("No comic was found..")
break
else:
try:
#get the full url to the comic
comicimg = 'http:' + comic[0].get('src') #finds url from the list comic|| basically comic[0] is used as there is just single one element in list!! try print(comic) && print(comic[0]) to see for yourself..
#check that it is actually a comic and not an interactive page
if "/comics/" in comicimg:
print('Download image %s' % comicimg)
res = requests.get(comicimg)
res.raise_for_status()
#write the image to the xkcd folder
image = open(os.path.join('xkcd', os.path.basename(comicimg)), 'wb')
for chunk in res.iter_content(10000): #default way to write requested content basically chunk is byte by byte writing
image.write(chunk)
image.close()
print('Finished')
break
else:
print("No comic was found..")
break
except requests.exceptions.MissingSchema:
print("Error in downloading img!!")
break


def getLatestComicNumber(url):
res=requests.get(url)
res.raise_for_status() #returns None as the request received is 200 which is fine, if received status is 404 there is an exception for bad request
soup=bs4.BeautifulSoup(res.text,"lxml")
prevLink=soup.select('a[rel="prev"]')[0]
url = 'https://xkcd.com' + prevLink.get('href')
x=re.findall('\d+',url)
x=int(x[0])+1
#print(x)
return x;


#this function is basically traversing backwards, it starts from the most recent comic and goes back until n-1 n being number of pages
#as there are no prev before 1 ( :p quite obvious)
def getNextComic(soup):
prevLink=soup.select('a[rel="prev"]')[0]
url = 'https://xkcd.com' + prevLink.get('href') # gets /comic-num/ from current page prev button ..basic crawling!!
return url;

def getSpecificComic(comic_number): #comic_number
res=url+'/'+comic_number+'/'
try:
imgdownloader(res)
except Exception as e:
print(str(e))


def batchDownloader():
url = 'https://xkcd.com'
#check to make sure it's not the first page
while not url.endswith('#'):
#print out the current page
print('Current page: %s' % url)
res = requests.get(url)
res.raise_for_status() #returns None as the request received is 200 which is fine, if received status is 400
soup = bs4.BeautifulSoup(res.text,"lxml") #r.text is the content of the response in unicode, and r.content is the content of the response in bytes.
#find the comic image on the current page
comic = soup.select('#comic img') #finds tag with comic and its sub tag img
#print(comic)
if comic == []:
#the page did not contaib a comic.. move on
print("No comic was found..")
else:
try:
#get the full url to the comic
comicimg = 'http:' + comic[0].get('src') #finds url from the list comic|| basically comic[0] is used as there is just single one element in list!! try print(comic) && print(comic[0]) to see for yourself..
#check that it is actually a comic and not an interactive page
if "/comics/" in comicimg:
print('Download image %s' % comicimg)
res = requests.get(comicimg)
res.raise_for_status()
#write the image to the xkcd folder
image = open(os.path.join('xkcd', os.path.basename(comicimg)), 'wb')
for chunk in res.iter_content(10000): #default way to write requested content basically chunk is byte by byte writing
image.write(chunk)
image.close()
else:
print("No comic was found..")
except requests.exceptions.MissingSchema:
url = getNextComic(soup)
continue
url=getNextComic(soup) #basically for downloading the first image
#all comics have downloaded
print('Finished')

def main():
x=int(input("Choose your option: \n1.Download all images\t2.Download Specific image\n"))
if x==1:
batchDownloader()
if x==2:
y=str(input("Enter any comic number between 1-"+str(getLatestComicNumber(url))))
try:
getSpecificComic(y)
except Exception as e:
print(str(e))

if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions projects/Comics_Scraper/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bs4
1 change: 1 addition & 0 deletions projects/Text_to_SpreadSheet/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
openpyxl
5 changes: 5 additions & 0 deletions projects/Text_to_SpreadSheet/text1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
text1
tex1, 1
tex1, 2
text1, 3
text1, 4
5 changes: 5 additions & 0 deletions projects/Text_to_SpreadSheet/text2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
text2
text2, 1
text2, 2
text2, 3
text2, 4
5 changes: 5 additions & 0 deletions projects/Text_to_SpreadSheet/text3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
text3
text3, 1
text3, 2
text3, 3
text3, 4
32 changes: 32 additions & 0 deletions projects/Text_to_SpreadSheet/textToSheet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os
import openpyxl


def textToSheet(directory, filename):
"""converts text files to columns in excel worksheet
Args:
directory (str): folder containing text files
filename (str): name of excel file
Returns:
None
"""
wb = openpyxl.Workbook()
wb.create_sheet(index=0, title='result')
sheet = wb.active

colIndex = 1

# write text files as columns in worksheet
for file in os.listdir():
if file.endswith('.txt'):
rowIndex = 1
with open(file) as f:
for line in f:
sheet.cell(row=rowIndex, column=colIndex).value = line
rowIndex += 1
colIndex += 1

wb.save(filename)

if __name__ == "__main__":
textToSheet('.', 'text-to-cols.xlsx')
1 change: 1 addition & 0 deletions projects/Worksheet_to_text/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
openpyxl
26 changes: 26 additions & 0 deletions projects/Worksheet_to_text/sheetToTextFile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
import openpyxl


def toTextFiles(filename):
"""writes column data in worksheet into text files
Args:
filename (str): name of worksheet to read from
Returns:
None
"""
wb = openpyxl.load_workbook(filename)
sheet = wb.active
count = 1

for colObj in sheet.columns:

with open('text-'+str(count)+'.txt', 'w') as file:
for cellObj in colObj:
file.write(cellObj.value)

count += 1


if __name__ == "__main__":
toTextFiles('worksheet.xlsx')
Binary file added projects/Worksheet_to_text/worksheet.xlsx
Binary file not shown.
61 changes: 61 additions & 0 deletions projects/chore-assignment-emailer/chore-emailer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import random
import smtplib

def emailer(chores, emails):
"""emails random chores to emails
Args:
chores: list of chores
emails: list of emails to send chores
Returns:
None
"""
if not emails:
print('emails list should not be empty')
return

if not chores:
print('chores list should not be empty')
return

chores_dict = {}

f = 0 # front of emails list

while chores:

randomChore = random.choice(chores)
chores.remove(randomChore)
email = emails[f]
chores_dict.setdefault(email, [])
chores_dict[email].append(randomChore)

f = (f+1) % len(emails) # use list circularly

smtpObj = smtplib.SMTP('smtp.gmail.com', 587)
smtpObj.ehlo()


email = input('Enter your email: ')
password = input('Enter your email password: ')

smtpObj.starttls()
smtpObj.login(email, password)
# See https://support.google.com/accounts/answer/6010255 if (Bad Credentials Error)

for k, v in chores_dict.items():
c = ', '.join(v)
print('Sending email to %s...' % k)
sendmailStatus = smtpObj.sendmail(email, k, \
'Subject: Your Chores.\nHi There!, {} are your chores'.format(c))
if sendmailStatus != {}:
print('There was a problem sending email to %s: %s' % (email,
sendmailStatus))

smtpObj.quit()





if __name__ == "__main__":
emailer(['dishes', 'bathroom', 'vacuum', 'walk dog'], ['[email protected], [email protected]'])
21 changes: 21 additions & 0 deletions projects/comma-code/comma-code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
def comma_code(items):
""" Combines list into a string of the form item1, item2, and item 3
Args:
items (list): List of strings
Returns:
string: list items combined into a string
"""
item_len = len(items)

if item_len == 0:
return ''
elif item_len == 1:
return items[0]

return ', '.join(items[:-1]) + ', and ' + items[-1]


if __name__ == "__main__":
spam = ['apples', 'bananas', 'tofu', 'cats']
print(comma_code(spam))
64 changes: 64 additions & 0 deletions projects/custom-invitations/customInvitations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import os


import docx
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt


def createInvitations(txtFile, docName):
"""Creates invitations based on names in txt file
Args:
txtFile (str): text file to read from
docName (str): doc file to save invitations in
"""
doc = docx.Document()

intro = 'It would be a pleasure to have the company of'
address = 'at 11101 Memory lane on the evening of'
date = 'April 31st'
time = "at 24 O'Clock"

with open(txtFile) as guestList:
for guest in guestList:
name = guest[:-1]
p1 = doc.add_paragraph()
p1.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
f1 = p1.add_run(intro)
f1.font.bold = True
f1.font.italic = True
f1.font.size = Pt(13)

p2 = doc.add_paragraph()
p2.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
f2 = p2.add_run(name)
f2.font.bold = True
f2.font.size = Pt(15)

p3 = doc.add_paragraph()
p3.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
f3 = p3.add_run(address)
f3.font.bold = True
f3.font.italic = True
f3.font.size = Pt(12)

p4 = doc.add_paragraph()
p4.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
f4 = p4.add_run(date)
f4.font.size = Pt(12)

p5 = doc.add_paragraph()
p5.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
f5 = p5.add_run(time)
f5.font.bold = True
f5.font.italic = True
f5.font.size = Pt(12)

doc.add_page_break()

doc.save(docName)



if __name__ == "__main__":
createInvitations('guests.txt', 'invitations.docx')
5 changes: 5 additions & 0 deletions projects/custom-invitations/guests.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Prof. Plum
Miss Scarlet
Col. Mustard
Al Sweigart
Robocop
1 change: 1 addition & 0 deletions projects/custom-invitations/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
docx
Binary file added projects/custom-seating-cards/Pacifico.ttf
Binary file not shown.
Loading

0 comments on commit 25b323d

Please sign in to comment.