Skip to content

Commit

Permalink
apply black
Browse files Browse the repository at this point in the history
  • Loading branch information
steven-mi committed Aug 14, 2023
1 parent 77afd46 commit 48b0252
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 43 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ export DATABRICKS_TOKEN="mydatabrickstoken"
If your project is not a pip package already you have to turn it into one. You can use dbrocket to do that.

```sh
dbrocket setup
rocket setup
```

Will create a setup.py for you.

## Using db-rocket

```sh
dbrocket launch
rocket launch
```

The command returns the exact command you have to perform in your notebook next.
Expand Down
1 change: 0 additions & 1 deletion rocket/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@


6 changes: 4 additions & 2 deletions rocket/file_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def __init__(self, watcher_instance):
def on_modified(self, event):
if event.is_directory:
return
if os.path.splitext(event.src_path)[1] == '.py':
if os.path.splitext(event.src_path)[1] == ".py":
self.watcher_instance.modified_files.append(event.src_path)

def __init__(self, path_to_watch, callback, recursive=True):
Expand All @@ -25,7 +25,9 @@ def __init__(self, path_to_watch, callback, recursive=True):
self.handler = self._Handler(self)

def start(self):
self.observer.schedule(self.handler, self.path_to_watch, recursive=self.recursive)
self.observer.schedule(
self.handler, self.path_to_watch, recursive=self.recursive
)
self.observer.start()
try:
while True:
Expand Down
102 changes: 68 additions & 34 deletions rocket/rocket.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@

from rocket.file_watcher import FileWatcher
from rocket.logger import logger
from rocket.utils import execute_shell_command, extract_python_package_dirs, extract_python_files_from_folder, \
execute_for_each_multithreaded
from rocket.utils import (
execute_shell_command,
extract_python_package_dirs,
extract_python_files_from_folder,
execute_for_each_multithreaded,
)


class Rocket:
Expand Down Expand Up @@ -45,10 +49,10 @@ def setup(self):
logger.info("Setup.py file created, feel free to modify it with your needs.")

def launch(
self,
project_location: str = ".",
dbfs_path: Optional[str] = None,
watch=True,
self,
project_location: str = ".",
dbfs_path: Optional[str] = None,
watch=True,
):
"""
Entrypoint of the application, triggers a build and deploy
Expand All @@ -64,9 +68,7 @@ def launch(
raise Exception("`dbfs_path` must start with dbfs:/")

try:
execute_shell_command(
f"databricks fs ls dbfs:/"
)
execute_shell_command(f"databricks fs ls dbfs:/")
except Exception as e:
raise Exception(
f"Error accessing DBFS via databricks-cli. Please check if your databricks token is set and valid? Try to generate a new token and update existing one with `databricks configure --token`. Error details: {e}"
Expand All @@ -79,16 +81,27 @@ def launch(

self._build_and_deploy(watch, project_location, dbfs_path)
if watch:
watcher = FileWatcher(project_location,
lambda x: self._build_and_deploy(watch=watch, modified_files=watcher.modified_files,
dbfs_path=dbfs_path,
project_location=project_location))
watcher = FileWatcher(
project_location,
lambda x: self._build_and_deploy(
watch=watch,
modified_files=watcher.modified_files,
dbfs_path=dbfs_path,
project_location=project_location,
),
)
watcher.start()

def _build_and_deploy(self, watch, project_location, dbfs_path, modified_files=None):
def _build_and_deploy(
self, watch, project_location, dbfs_path, modified_files=None
):
if modified_files:
logger.info(f"Found changes in {modified_files}. Overwriting them.")
self._deploy(file_paths=modified_files, dbfs_path=dbfs_path, project_location=project_location)
self._deploy(
file_paths=modified_files,
dbfs_path=dbfs_path,
project_location=project_location,
)
return

if watch:
Expand All @@ -104,7 +117,9 @@ def _build_and_deploy(self, watch, project_location, dbfs_path, modified_files=N
files.append(f"{project_location}/{project_file}")

if os.path.exists(f"{project_location}/pyproject.toml"):
execute_shell_command("poetry export -f requirements.txt --with-credentials --without-hashes --output requirements.txt")
execute_shell_command(
"poetry export -f requirements.txt --with-credentials --without-hashes --output requirements.txt"
)

dependency_file_exist = False
dependency_files = ["requirements.in", "requirements.txt"]
Expand All @@ -117,22 +132,28 @@ def _build_and_deploy(self, watch, project_location, dbfs_path, modified_files=N
uploaded_dependency_file = dependency_file
dependency_file_exist = True
with open(dependency_file_path) as f:
index_urls = [line.strip() for line in f.readlines() if "index-url" in line]
self._deploy(file_paths=files, dbfs_path=dbfs_path, project_location=project_location)
index_urls = [
line.strip()
for line in f.readlines()
if "index-url" in line
]
self._deploy(
file_paths=files, dbfs_path=dbfs_path, project_location=project_location
)

install_path = f'{dbfs_path.replace("dbfs:/", "/dbfs/")}'
index_urls_options = " ".join(index_urls)

if dependency_file_exist:
logger.info(
f"""Watch activated. Uploaded your project to databricks. Install your project in your databricks notebook by running:
%pip install --upgrade pip
%pip install {index_urls_options} -r {install_path}/{uploaded_dependency_file}
%pip install --no-deps -e {install_path}
%pip install --upgrade pip
%pip install {index_urls_options} -r {install_path}/{uploaded_dependency_file}
%pip install --no-deps -e {install_path}
and following in a new Python cell:
%load_ext autoreload
%autoreload 2""")
and following in a new Python cell:
%load_ext autoreload
%autoreload 2""")
else:
logger.info(
f"""Watch activated. Uploaded your project to databricks. Install your project in your databricks notebook by running:
Expand All @@ -141,11 +162,18 @@ def _build_and_deploy(self, watch, project_location, dbfs_path, modified_files=N
and following in a new Python cell:
%load_ext autoreload
%autoreload 2""")
%autoreload 2"""
)
else:
logger.info("Watch is disabled. Building creating a python wheel from your project")
logger.info(
"Watch is disabled. Building creating a python wheel from your project"
)
wheel_path, wheel_file = self.create_python_project_wheel(project_location)
self._deploy(file_paths=[wheel_path], dbfs_path=dbfs_path, project_location=project_location)
self._deploy(
file_paths=[wheel_path],
dbfs_path=dbfs_path,
project_location=project_location,
)
install_path = f'{dbfs_path.replace("dbfs:/", "/dbfs/")}/{wheel_file}'

dependency_files = ["requirements.in", "requirements.txt"]
Expand All @@ -154,19 +182,23 @@ def _build_and_deploy(self, watch, project_location, dbfs_path, modified_files=N
dependency_file_path = f"{project_location}/{dependency_file}"
if os.path.exists(dependency_file_path):
with open(dependency_file_path) as f:
index_urls = [line.strip() for line in f.readlines() if "index-url" in line]
index_urls = [
line.strip()
for line in f.readlines()
if "index-url" in line
]
index_urls_options = " ".join(index_urls)

logger.info(f"""Uploaded wheel to databricks. Install your library in your databricks notebook by running:
logger.info(
f"""Uploaded wheel to databricks. Install your library in your databricks notebook by running:
%pip install --upgrade pip
%pip install {index_urls_options} {install_path} --force-reinstall""")
%pip install {index_urls_options} {install_path} --force-reinstall"""
)

def _deploy(self, file_paths, dbfs_path, project_location):
def helper(file):
target_path = f"{dbfs_path}/{os.path.relpath(file, project_location)}"
execute_shell_command(
f"databricks fs cp --overwrite {file} {target_path}"
)
execute_shell_command(f"databricks fs cp --overwrite {file} {target_path}")
logger.info(f"Uploaded {file} to {target_path}")

execute_for_each_multithreaded(file_paths, lambda x: helper(x))
Expand All @@ -182,7 +214,9 @@ def create_python_project_wheel(self, project_location):
)
elif os.path.exists(f"{project_location}/pyproject.toml"):
logger.info("Found pyproject.toml. Building python library with poetry")
execute_shell_command(f"cd {project_location} ; poetry build --format wheel")
execute_shell_command(
f"cd {project_location} ; poetry build --format wheel"
)
else:
raise Exception(
"To be turned into a library your project has to contain a setup.py or pyproject.toml file"
Expand Down
6 changes: 3 additions & 3 deletions rocket/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def execute_for_each_multithreaded(lst, func, max_threads=None):

def extract_package_name_from_wheel(wheel_filename):
# Split the filename on '-' and take the first part
return wheel_filename.split('-')[0]
return wheel_filename.split("-")[0]


def extract_project_name_from_wheel(wheel_filename):
Expand All @@ -34,7 +34,7 @@ def extract_python_package_dirs(root_dir):
packages = []
for item in os.listdir(root_dir):
item_path = os.path.join(root_dir, item)
if os.path.isdir(item_path) and '__init__.py' in os.listdir(item_path):
if os.path.isdir(item_path) and "__init__.py" in os.listdir(item_path):
packages.append(item_path)
return packages

Expand All @@ -49,7 +49,7 @@ def extract_python_files_from_folder(path):

for root, dirs, files in os.walk(path):
for file in files:
if file.endswith('.py'):
if file.endswith(".py"):
py_files.append(os.path.join(root, file))

return py_files
Expand Down
1 change: 0 additions & 1 deletion rocket_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@
from rocket.rocket import main

if __name__ == "__main__":

main()

0 comments on commit 48b0252

Please sign in to comment.