vncgrvs · vg-leanix · Dec 14, 2020 · Dec 14, 2020 · Dec 14, 2020 · Dec 19, 2020
diff --git a/.gitignore b/.gitignore
@@ -10,4 +10,6 @@ data.json
 *.pptx
 /backend/output
 *.pyc
-*.log
+*.log
+mongodb/
+db-backup/
diff --git a/README.md b/README.md
@@ -1,5 +1,10 @@
+
 # knowlix
 
+
 Microservice to autmatically create onboarding slides
 
 ![LIX Builder](https://github.com/vg-leanix/pptx-tool/blob/main/Thumbnail.png)
+
+## Architecture 
+![Architecture](https://github.com/vg-leanix/knowlix/blob/v1.1_mongodb/knowlix%20architecture.png)
diff --git a/backend/.gitignore b/backend/.gitignore
@@ -0,0 +1 @@
+env/
diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -1,11 +1,12 @@
-FROM python:3.8.1
+FROM python:3.9.1-slim-buster
 
 WORKDIR /usr/app
 
+
 COPY req.txt ./
 COPY api.py core.py main.py master.pptx req.txt server.py ./
 
-# RUN mkdir output 
+RUN mkdir output  
 
 RUN pip install --upgrade pip
 RUN pip install -r req.txt --no-cache-dir

diff --git a/backend/api.py b/backend/api.py
@@ -1,39 +1,46 @@
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
+import pymongo
 from fastapi.responses import FileResponse
 from pydantic import BaseModel
 from typing import List
 from main import create_pptx, get_sections
 import os
+from datetime import datetime
 from pptx import Presentation
 from server import celery
 import json
+import uuid
+from datetime import datetime
+from pymongo import MongoClient
 
-
+## CONFIG ##
 file_path = "master.pptx"
-pres= Presentation(file_path)
+pres = Presentation(file_path)
+MONGODB = os.getenv("MONGODB")
+client = MongoClient(MONGODB)
+db = client["taskdb"]["ta"]
 
-tags_metadata= [
+tags_metadata = [
     {
-    "name": "powerpoint",
-    "description": "handling powerpoint"
+        "name": "powerpoint",
+        "description": "handling powerpoint"
     },
     {
-    "name": "job management",
-    "description": "managing celery tasks"
+        "name": "job management",
+        "description": "managing celery tasks"
     },
-    
+
 ]
 
 app = FastAPI(
-    title= "SurfBoard",
-    description= "API Hub for the LeanIX Onboarding Deck",
-    version= "1.0.0",
+    title="Knowlix",
+    description="API Hub for the LeanIX Onboarding Deck",
+    version="1.0.0",
     openapi_tags=tags_metadata)
 
 
-
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -43,42 +50,133 @@
     expose_headers=[]
 )
 
+
 class PPTX(BaseModel):
     sections: List[str]
-
-
-@app.get("/v1/sections", tags = ["powerpoint"])
+
+
+class Download(BaseModel):
+    taskID: str
+
+
+## API ENDPOINTS ##
+@app.get("/v1/sections", tags=["powerpoint"])
 async def provide_sections():
 
     sections = get_sections(pres)
 
-    if (not sections) or (len(sections)==0):
-        raise HTTPException(status_code=404, detail="No Sections in Master pptx")
+    if (not sections) or (len(sections) == 0):
+        raise HTTPException(
+            status_code=404, detail="No Sections in Master pptx")
 
-    return JSONResponse(sections,status_code=200)
+    return JSONResponse(sections, status_code=200)
 
 
-@app.post("/v1/pptxjob", tags = ["job management"])
-async def deliver_pptx(pptx: PPTX):
+@app.post("/v1/pptxjob", tags=["job management"])
+async def trigger_pptx_task(pptx: PPTX):
     task_name = "pptx"
     sections = pptx.sections
-    kwargs ={
-        'sections':sections,
-        'downloadStatus': 'ready'
-        }
-
+    no_sections = len(sections)
+    sections_available = True
+    exists_already = False
+    status = None
+    custom_id = str(uuid.uuid4().hex)
+    timestamp = datetime.now().isoformat()
+
+    kwargs = {
+        'sections': sections,
+        'customID': custom_id,
+        'downloaded': False,
+        'date_started': timestamp
 
+    }
+
+    if no_sections != 0:
+        exists_already = check_existence(sections, db)
+    else:
+        sections_available = False
+
+    if not exists_already and sections_available:
+        task = celery.send_task(task_name, kwargs=kwargs, serializer='json')
+
+    if sections_available and not exists_already:
+        status = "success"
+
+    elif not sections_available:
+        status = "no_sections"
 
-    task = celery.send_task(task_name, kwargs = kwargs, serializer='json')
+    elif exists_already:
+        status = "pptx_exists"
 
     package = {
-        'taskID': task.id,
-        'sections': sections
+        'taskID': custom_id,
+        'sections': sections,
+        'status': status
     }
 
-
     return JSONResponse(package)
-
-
-
-
+
+
+@app.post("/v1/download", tags=["powerpoint"])
+async def download_pptx(download: Download):
+
+    task_id = download.taskID
+
+    result = db.find_one({"kwargs.customID": task_id}, {'result': 1, '_id': 0})
+    unpack = result["result"]
+    unpack = json.loads(unpack)
+    file_path = unpack["filePath"]
+
+    # return file_path
+    return FileResponse(file_path)
+
+
+@app.post("/v1/registerDownload", tags=["powerpoint"], status_code=201)
+async def register_download(task_id: Download):
+    task_id = task_id.taskID
+
+    res = db.update_one({"kwargs.customID": task_id},
+                        {"$set": {"kwargs.downloaded": True}
+                         })
+
+    changed_docs = res.modified_count
+
+    return {'changedDocuments': changed_docs}
+
+
+@app.get("/v1/getDownloads", tags=["powerpoint"])
+async def getDownloads():
+    res = db.find({}).sort(
+        [("kwargs.date_started", pymongo.DESCENDING)]).limit(10)
+    results = list()
+
+    for item in res:
+        taskID = item["kwargs"]["customID"]
+        date_started = item["kwargs"]["date_started"]
+        status = item["status"]
+        sections = item["kwargs"]["sections"]
+
+        package = {
+            'taskID': taskID,
+            'date_started': date_started,
+            'status': status,
+            'sections': sections
+        }
+        results.append(package)
+
+    return JSONResponse(results, status_code=200)
+
+
+### UTILS ###
+
+def check_existence(sections, db):
+    exists_already = False
+    no_sections = len(sections)
+    query = {"kwargs.sections": {"$size": no_sections, "$all": sections}}
+
+    hits = db.count_documents(query)
+
+    if hits > 0:
+        exists_already = True
+
+    return exists_already
diff --git a/backend/clean_output.py b/backend/clean_output.py
diff --git a/backend/core.py b/backend/core.py
@@ -2,87 +2,88 @@
 import uuid
 import lxml.etree as etree
 
+
 def extract_slide_mapping(slidelist):
     """this method will get the mapping between a slide_id and rID"""
 
-    slide_mapping=dict()
-    
+    slide_mapping = dict()
+
     for slide in slidelist:
-        rid=slide.attrib['{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id']
-        slide_id=slide.attrib['id']
-        slide_mapping[slide_id]=rid
-        
+        rid = slide.attrib['{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id']
+        slide_id = slide.attrib['id']
+        slide_mapping[slide_id] = rid
+
     return slide_mapping
 
-def prepare_sections(keys, presentation, mapping,all_sections=False):
+
+def prepare_sections(keys, presentation, mapping, all_sections=False):
     """this method will prepare a xml tree based on the passed section names the user wants to have in 
     the pptx"""
 
-    nmap=presentation.slides._sldIdLst.nsmap
-
-    all_sections=compile_sections(presentation,mapping)
-    root=etree.Element('{http://schemas.openxmlformats.org/presentationml/2006/main}sldIdLst', nsmap=nmap)
-
-    #TODO: create toggle for 
-    if (all_sections) and (len(keys)!=0):
+    nmap = presentation.slides._sldIdLst.nsmap
+
+    all_sections = compile_sections(presentation, mapping)
+    root = etree.Element(
+        '{http://schemas.openxmlformats.org/presentationml/2006/main}sldIdLst', nsmap=nmap)
+
+    # TODO: create toggle for
+    if (all_sections) and (len(keys) != 0):
         for key in keys:
-            section=all_sections[key]
+            section = all_sections[key]
 
             for slide in section:
-                etree.SubElement(root, '{http://schemas.openxmlformats.org/presentationml/2006/main}sldId',attrib=slide,nsmap=nmap)
-
-            
+                etree.SubElement(
+                    root, '{http://schemas.openxmlformats.org/presentationml/2006/main}sldId', attrib=slide, nsmap=nmap)
+
     return root
 
+
 def compile_sections(presentation, mapping):
     """this method will get all the sections that are in the pptx"""
-    
-    ns='{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id'
-    xml=etree.fromstring(presentation.part.blob)
-    nsmap = {'p14':'http://schemas.microsoft.com/office/powerpoint/2010/main'}
+
+    ns = '{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id'
+    xml = etree.fromstring(presentation.part.blob)
+    nsmap = {'p14': 'http://schemas.microsoft.com/office/powerpoint/2010/main'}
     sections = xml.xpath('.//p14:sectionLst', namespaces=nsmap)[0]
-
-    collector=dict()
-    pairs_col=list()
 
-
+    collector = dict()
+    pairs_col = list()
+
     for section in sections:
-        key=section.attrib['name']
+        key = section.attrib['name']
 
         for slidelist in section:
             for slide in slidelist:
-                pairs=dict()
-                slide_id=slide.attrib['id']
+                pairs = dict()
+                slide_id = slide.attrib['id']
 
+                # lookup in slide mapping to get rID
+                rID = mapping[slide_id]
 
-                #lookup in slide mapping to get rID
-                rID=mapping[slide_id]
-
-                pairs['id']=slide_id
-                pairs[ns]=rID
+                pairs['id'] = slide_id
+                pairs[ns] = rID
 
                 pairs_col.append(pairs)
 
+        collector[key] = pairs_col
+        pairs_col = list()
 
-        collector[key]=pairs_col
-        pairs_col=list()
-
     return collector
-
-def replace_slides(new_xml,presentation,folder, save=False):
+
+
+def replace_slides(new_xml, presentation, folder, save=False):
     """This method will take a xml tree and create the final pptx out of it"""
-    uid=str(uuid.uuid4().hex)[:10]
-    file_path= f"{folder}/{uid}.pptx"
-    slidelist=presentation.slides._sldIdLst
+    uid = str(uuid.uuid4().hex)[:10]
+    file_path = f"{folder}/{uid}.pptx"
+    slidelist = presentation.slides._sldIdLst
+
+    slidelist.getparent().replace(slidelist, new_xml)
 
-
-    slidelist.getparent().replace(slidelist,new_xml)
-
     if save:
         presentation.save(file_path)
-    
+
     return file_path
 
 
 def print_xml(xml):
-    print(etree.tostring(xml, pretty_print=True, encoding="unicode"))
+    print(etree.tostring(xml, pretty_print=True, encoding="unicode"))