Merge pull request #695 from ibra-kdbra/ibra-kdbra

Big Comeback with load of script.
Mrinank-Bhowmick · Feb 11, 2024 · 25b323d · 25b323d
2 parents 650f1aa + f71f185
commit 25b323d
Show file tree

Hide file tree

Showing 34 changed files with 808 additions and 0 deletions.
diff --git a/projects/Comics_Scraper/comicXCD_scraper.py b/projects/Comics_Scraper/comicXCD_scraper.py
@@ -0,0 +1,123 @@
+#! python3
+import requests, os, bs4, re
+
+url = 'https://xkcd.com'
+#create a directory to store all the comics
+os.makedirs('xkcd', exist_ok=True)
+
+
+def imgdownloader(url):
+    while not url.endswith('#'):
+    #print out the current page
+        res = requests.get(url)
+        res.raise_for_status()  #returns None as the request received is 200 which is fine, if received status is 404 there is an exception for bad request
+        soup = bs4.BeautifulSoup(res.text,"lxml")  #r.text is the content of the response in unicode, and r.content is the content of the response in bytes.
+        #find the comic image on the current page 
+        comic = soup.select('#comic img')  #finds tag with comic and its sub tag img 
+        #print(comic)
+        if comic == []:
+            #the page did not contaib a comic.. move on
+            print("No comic was found..")
+            break
+        else:
+            try:
+                #get the full url to the comic
+                comicimg = 'http:' + comic[0].get('src')  #finds url from the list comic|| basically comic[0] is used as there is just single one element in list!! try print(comic) && print(comic[0]) to see for yourself..
+                #check that it is actually a comic and not an interactive page
+                if "/comics/" in comicimg:
+                    print('Download image %s' % comicimg)
+                    res = requests.get(comicimg)
+                    res.raise_for_status()
+                    #write the image to the xkcd folder
+                    image = open(os.path.join('xkcd', os.path.basename(comicimg)), 'wb')
+                    for chunk in res.iter_content(10000):  #default way to write requested content basically chunk is byte by byte writing 
+                        image.write(chunk)
+                    image.close()
+                    print('Finished')
+                    break
+                else:
+                    print("No comic was found..")
+                    break
+            except requests.exceptions.MissingSchema:
+                print("Error in downloading img!!")
+                break
+
+
+def getLatestComicNumber(url):
+    res=requests.get(url)
+    res.raise_for_status()  #returns None as the request received is 200 which is fine, if received status is 404 there is an exception for bad request
+    soup=bs4.BeautifulSoup(res.text,"lxml") 
+    prevLink=soup.select('a[rel="prev"]')[0]
+    url = 'https://xkcd.com' + prevLink.get('href')
+    x=re.findall('\d+',url)
+    x=int(x[0])+1
+    #print(x)
+    return x; 
+
+
+#this function is basically traversing backwards, it starts from the most recent comic and goes back until n-1 n being number of pages  
+#as there are no prev before 1 ( :p quite obvious)
+def getNextComic(soup):  
+    prevLink=soup.select('a[rel="prev"]')[0]
+    url = 'https://xkcd.com' + prevLink.get('href')   # gets /comic-num/ from current page prev button ..basic crawling!!
+    return url;
+
+def getSpecificComic(comic_number):  #comic_number
+    res=url+'/'+comic_number+'/'
+    try:
+        imgdownloader(res)
+    except Exception as e:
+        print(str(e))
+
+
+def batchDownloader():
+    url = 'https://xkcd.com'
+    #check to make sure it's not the first page
+    while not url.endswith('#'):
+        #print out the current page
+        print('Current page: %s' % url)
+        res = requests.get(url)
+        res.raise_for_status()  #returns None as the request received is 200 which is fine, if received status is 400 
+        soup = bs4.BeautifulSoup(res.text,"lxml")  #r.text is the content of the response in unicode, and r.content is the content of the response in bytes.
+        #find the comic image on the current page 
+        comic = soup.select('#comic img')  #finds tag with comic and its sub tag img 
+        #print(comic)
+        if comic == []:
+            #the page did not contaib a comic.. move on
+            print("No comic was found..")
+        else:
+            try:
+                #get the full url to the comic
+                comicimg = 'http:' + comic[0].get('src')  #finds url from the list comic|| basically comic[0] is used as there is just single one element in list!! try print(comic) && print(comic[0]) to see for yourself..
+                #check that it is actually a comic and not an interactive page
+                if "/comics/" in comicimg:
+                    print('Download image %s' % comicimg)
+                    res = requests.get(comicimg)
+                    res.raise_for_status()
+                    #write the image to the xkcd folder
+                    image = open(os.path.join('xkcd', os.path.basename(comicimg)), 'wb')
+                    for chunk in res.iter_content(10000):  #default way to write requested content basically chunk is byte by byte writing 
+                        image.write(chunk)
+                    image.close()
+                else:
+                    print("No comic was found..")
+            except requests.exceptions.MissingSchema:
+                url = getNextComic(soup)
+                continue
+        url=getNextComic(soup)  #basically for downloading the first image
+    #all comics have downloaded
+    print('Finished')
+
+def main():
+    x=int(input("Choose your option: \n1.Download all images\t2.Download Specific image\n"))
+    if x==1:
+        batchDownloader()
+    if x==2:
+        y=str(input("Enter any comic number between 1-"+str(getLatestComicNumber(url))))
+        try:
+            getSpecificComic(y)
+        except Exception as e:
+            print(str(e))    
+
+if __name__ == '__main__':
+    main()
diff --git a/projects/Comics_Scraper/requirements.txt b/projects/Comics_Scraper/requirements.txt
@@ -0,0 +1 @@
+bs4
diff --git a/projects/Text_to_SpreadSheet/requirements.txt b/projects/Text_to_SpreadSheet/requirements.txt
@@ -0,0 +1 @@
+openpyxl
diff --git a/projects/Text_to_SpreadSheet/text1.txt b/projects/Text_to_SpreadSheet/text1.txt
@@ -0,0 +1,5 @@
+text1
+tex1, 1
+tex1, 2
+text1, 3
+text1, 4
diff --git a/projects/Text_to_SpreadSheet/text2.txt b/projects/Text_to_SpreadSheet/text2.txt
@@ -0,0 +1,5 @@
+text2
+text2, 1
+text2, 2
+text2, 3
+text2, 4
diff --git a/projects/Text_to_SpreadSheet/text3.txt b/projects/Text_to_SpreadSheet/text3.txt
@@ -0,0 +1,5 @@
+text3
+text3, 1
+text3, 2
+text3, 3
+text3, 4
diff --git a/projects/Text_to_SpreadSheet/textToSheet.py b/projects/Text_to_SpreadSheet/textToSheet.py
@@ -0,0 +1,32 @@
+import os
+import openpyxl
+
+
+def textToSheet(directory, filename):
+    """converts text files to columns in excel worksheet
+    Args:
+        directory (str): folder containing text files
+        filename (str): name of excel file
+    Returns:
+        None
+    """
+    wb = openpyxl.Workbook()
+    wb.create_sheet(index=0, title='result')
+    sheet = wb.active
+
+    colIndex = 1
+
+    # write text files as columns in worksheet
+    for file in os.listdir():
+        if file.endswith('.txt'):
+            rowIndex = 1
+            with open(file) as f:
+                for line in f:
+                    sheet.cell(row=rowIndex, column=colIndex).value = line
+                    rowIndex += 1
+            colIndex += 1
+
+    wb.save(filename)
+
+if __name__ == "__main__":
+    textToSheet('.', 'text-to-cols.xlsx')
diff --git a/projects/Worksheet_to_text/requirements.txt b/projects/Worksheet_to_text/requirements.txt
@@ -0,0 +1 @@
+openpyxl
diff --git a/projects/Worksheet_to_text/sheetToTextFile.py b/projects/Worksheet_to_text/sheetToTextFile.py
@@ -0,0 +1,26 @@
+import os
+import openpyxl
+
+
+def toTextFiles(filename):
+    """writes column data in worksheet into text files
+    Args:
+        filename (str): name of worksheet to read from
+    Returns:
+        None
+    """
+    wb = openpyxl.load_workbook(filename)
+    sheet = wb.active
+    count = 1
+
+    for colObj in sheet.columns:
+
+        with open('text-'+str(count)+'.txt', 'w') as file:
+            for cellObj in colObj:
+                file.write(cellObj.value)
+
+        count += 1
+
+
+if __name__ == "__main__":
+    toTextFiles('worksheet.xlsx')
diff --git a/projects/Worksheet_to_text/worksheet.xlsx b/projects/Worksheet_to_text/worksheet.xlsx
diff --git a/projects/chore-assignment-emailer/chore-emailer.py b/projects/chore-assignment-emailer/chore-emailer.py
@@ -0,0 +1,61 @@
+import random
+import smtplib
+
+def emailer(chores, emails):
+    """emails random chores to emails
+    Args:
+        chores: list of chores
+        emails: list of emails to send chores
+    Returns:
+        None
+    """
+    if not emails:
+        print('emails list should not be empty')
+        return
+
+    if not chores:
+        print('chores list should not be empty')
+        return
+
+    chores_dict = {}
+
+    f = 0 # front of emails list
+
+    while chores:
+
+        randomChore = random.choice(chores)
+        chores.remove(randomChore)
+        email = emails[f]
+        chores_dict.setdefault(email, [])
+        chores_dict[email].append(randomChore)
+
+        f = (f+1) % len(emails)  # use list circularly
+
+    smtpObj = smtplib.SMTP('smtp.gmail.com', 587)
+    smtpObj.ehlo()
+
+
+    email = input('Enter your email: ')
+    password = input('Enter your email password: ')
+
+    smtpObj.starttls()
+    smtpObj.login(email, password)
+    # See https://support.google.com/accounts/answer/6010255 if (Bad Credentials Error)
+
+    for k, v in chores_dict.items():
+        c = ', '.join(v)
+        print('Sending email to %s...' % k)
+        sendmailStatus = smtpObj.sendmail(email, k, \
+                         'Subject: Your Chores.\nHi There!, {} are your chores'.format(c))
+        if sendmailStatus != {}:
+           print('There was a problem sending email to %s: %s' % (email,
+           sendmailStatus))
+
+    smtpObj.quit()
+
+
+
+
+
+if __name__ == "__main__":
+    emailer(['dishes', 'bathroom', 'vacuum', 'walk dog'], ['[email protected], [email protected]'])
diff --git a/projects/comma-code/comma-code.py b/projects/comma-code/comma-code.py
@@ -0,0 +1,21 @@
+def comma_code(items):
+    """ Combines list into a string of the form item1, item2, and item 3
+    Args:
+        items (list): List of strings
+
+    Returns:
+        string: list items combined into a string
+    """
+    item_len = len(items)
+
+    if item_len == 0:
+        return ''
+    elif item_len == 1:
+        return items[0]
+
+    return ', '.join(items[:-1]) + ', and ' + items[-1]
+
+
+if __name__ == "__main__":
+    spam = ['apples', 'bananas', 'tofu', 'cats']
+    print(comma_code(spam))
diff --git a/projects/custom-invitations/customInvitations.py b/projects/custom-invitations/customInvitations.py
@@ -0,0 +1,64 @@
+import os
+
+
+import docx
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+from docx.shared import Pt
+
+
+def createInvitations(txtFile, docName):
+    """Creates invitations based on names in txt file
+    Args:
+        txtFile (str): text file to read from
+        docName (str): doc file to save invitations in
+    """
+    doc = docx.Document()
+
+    intro = 'It would be a pleasure to have the company of'
+    address = 'at 11101 Memory lane on the evening of'
+    date = 'April 31st'
+    time = "at 24 O'Clock"
+
+    with open(txtFile) as guestList:
+        for guest in guestList:
+            name = guest[:-1]
+            p1 = doc.add_paragraph()
+            p1.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
+            f1 = p1.add_run(intro)
+            f1.font.bold = True
+            f1.font.italic = True
+            f1.font.size = Pt(13)
+
+            p2 = doc.add_paragraph()
+            p2.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
+            f2 = p2.add_run(name)
+            f2.font.bold = True
+            f2.font.size = Pt(15)
+
+            p3 = doc.add_paragraph()
+            p3.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
+            f3 = p3.add_run(address)
+            f3.font.bold = True
+            f3.font.italic = True
+            f3.font.size = Pt(12)
+
+            p4 = doc.add_paragraph()
+            p4.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
+            f4 = p4.add_run(date)
+            f4.font.size = Pt(12)
+
+            p5 = doc.add_paragraph()
+            p5.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
+            f5 = p5.add_run(time)
+            f5.font.bold = True
+            f5.font.italic = True
+            f5.font.size = Pt(12)
+
+            doc.add_page_break()
+
+    doc.save(docName)
+
+
+
+if __name__ == "__main__":
+    createInvitations('guests.txt', 'invitations.docx')
diff --git a/projects/custom-invitations/guests.txt b/projects/custom-invitations/guests.txt
@@ -0,0 +1,5 @@
+Prof. Plum
+Miss Scarlet
+Col. Mustard
+Al Sweigart
+Robocop
diff --git a/projects/custom-invitations/requirements.txt b/projects/custom-invitations/requirements.txt
@@ -0,0 +1 @@
+docx
diff --git a/projects/custom-seating-cards/Pacifico.ttf b/projects/custom-seating-cards/Pacifico.ttf