Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented a method to download a block onto your local device. #128

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 90 additions & 1 deletion notion/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
import json
import re
import uuid
import time
import os

from requests import Session, HTTPError
from requests import Session, HTTPError, get
from requests.cookies import cookiejar_from_dict
from urllib.parse import urljoin
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from zipfile import ZipFile

from .block import Block, BLOCK_TYPES
from .collection import (
Expand Down Expand Up @@ -168,6 +171,92 @@ def refresh_collection_rows(self, collection_id):
row_ids = [row.id for row in self.get_collection(collection_id).get_rows()]
self._store.set_collection_rows(collection_id, row_ids)

def _get_task_id(self, response):
"""
When you export a file, notion creates a task to make the file with the 'enqueueTask' endpoint.
Then another method looks at the task ID and returns the file when the task finishes.
So, we need to save the taskId into a variable. This is a helper function to do that.
"""
return response.json()['taskId']

# Source from https://requests.readthedocs.io/en/master/user/quickstart/#raw-response-content
def _download_url(self, url, save_path, chunk_size=128):
"""
Downloads the zip file and saves it to a file.
url - string of the url from which to download.
save_path - string of the file name to output the zip file into.
chunk_size = size of the chunk. This is adjustable. See the documentation for more info.
"""
r = get(url, stream=True)
with open(save_path, 'wb') as fd:
for chunk in r.iter_content(chunk_size=chunk_size):
fd.write(chunk)

def _unzip_file(self, file, delete=True):
"""
Helper function to unzip the zipped download.
file - string of the zip file name
delete - delete the zip file or not.
"""
with ZipFile(file) as zipObj:
zipObj.extractall()
if delete:
os.remove(file)

def download_block(self, block_id, export_type, event_name="exportBlock", recursive=False, time_zone="America/Chicago", locale="en"):
"""
block_id - id of the block. Should be a string.
export_type - Type of the output file. The options are 'markdown', 'pdf', 'html'
eventName - notion object you're exporting. I haven't seen anything other than exportBlock yet.
recursive - include sub pages or not.
time_zone - I don't know what values go here. I'm in the Chicago timezone (central) and this is what I saw in the request.
locale - self explanatory.

TODO: If export_type are 'pdf' or 'html', there is another field in exportOptions called 'pdfFormat'. It should be set to "Letter".
This needs to be implemented.
TODO: Add support for downloading a list of blocks
TODO: Review this code. Does it suck? Error handling? This is version 0 of this method and my first open source contribution.
Give me some criticisms so I can improve as a programmer!
"""
tmp_zip = 'tmp.zip'
data = {
"task" : {
"eventName" : event_name,
"request" : {
"blockId" : block_id,
"recursive" : recursive,
"exportOptions" : {
"exportType" : export_type,
"timeZone" : time_zone,
"locale" : locale
}
}
}
}

task_id = self.post("enqueueTask", data).json()['taskId']
response = self.post("getTasks", {"taskIds" : [task_id]})

task = response.json()

# This is a simple way to ensure that we're getting the data when it's ready.
while 'status' not in task['results'][0]:
time.sleep(0.1)
response = self.post('getTasks', {'taskIds' : [task_id]})
task = response.json()

while 'exportURL' not in task['results'][0]['status']:
time.sleep(0.1)
response = self.post('getTasks', {'taskIds' : [task_id]})
task = response.json()

url = task['results'][0]['status']['exportURL']

self._download_url(url, tmp_zip)
self._unzip_file(tmp_zip)



def post(self, endpoint, data):
"""
All API requests on Notion.so are done as POSTs (except the websocket communications).
Expand Down