Skip to content

Faster slimmer harder #90

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 39 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
9e5ca08
now with newer versions
daanelson Jun 26, 2023
d6b9ba9
use python:3
technillogue Jun 29, 2023
fd3503e
try a dockerfile
technillogue Jun 29, 2023
2b6ddf8
fix
technillogue Jun 30, 2023
f94cbb1
use poetry to lock dependencies and get the dependency graph to separ…
technillogue Jun 30, 2023
d7f19af
also include cog's dependencies, but not cog
technillogue Jun 30, 2023
c977125
install cog
technillogue Jun 30, 2023
29b09c8
try installing safetensors at model download time
technillogue Jun 30, 2023
d4f74cc
dtype
technillogue Jun 30, 2023
7977547
diffusers
technillogue Jun 30, 2023
f705566
give up on resolving diffusers in advance
technillogue Jun 30, 2023
dc2257e
apparently feature extractor is required now
technillogue Jun 30, 2023
f364909
include safetensors conversion
technillogue Jun 30, 2023
8394ed2
get fp16 safetensors
Jul 1, 2023
4319285
add env vars
technillogue Jul 3, 2023
8f8e4e0
add a ton of time logging
technillogue Jul 3, 2023
f13dff0
fix logging
Jul 4, 2023
2d01f26
more logging
technillogue Jul 4, 2023
69cdadd
change dependency_splitter
technillogue Jul 5, 2023
3a64dbe
trim reqs
technillogue Jul 5, 2023
35526f4
pget
technillogue Jul 6, 2023
b1c9421
idk, try uploading
technillogue Jul 6, 2023
2cca7c4
try to fix
Jul 7, 2023
08b7ec3
fmt
technillogue Jul 7, 2023
dfc86ae
don't load predictor
technillogue Jul 7, 2023
6f585ad
fixes
Jul 7, 2023
08e2528
fake out Input
technillogue Jul 7, 2023
815b0b5
use pickle + profiler
technillogue Jul 10, 2023
4401022
fix
Jul 11, 2023
ab9f5a1
try setting mtime...?
technillogue Jul 11, 2023
b31e957
try using timestop for setting timestamps to epoch
technillogue Jul 12, 2023
ae32cb7
draft version with separate cuda install
Jul 18, 2023
a376197
keep at it
technillogue Jul 18, 2023
b42dcd5
more messing around
Jul 18, 2023
1731d57
patchelf
Jul 19, 2023
6871cdd
revert cufft efforts for the moment
Jul 20, 2023
da82e09
overwrite much more cog to defer imports
Jul 20, 2023
462d5d1
no libnvrtc
Jul 25, 2023
c116e63
clean up dockerfile
technillogue Jul 26, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
FROM nvidia/cuda:11.8.0-devel-ubuntu18.04 as cuda

# seperate tini stage to not need curl in final stage
FROM appropriate/curl as tini
ARG SOURCE_DATE_EPOCH=0
RUN set -eux; \
TINI_VERSION=v0.19.0; \
TINI_ARCH="amd64"; \
curl -sSL -o /sbin/tini "https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-${TINI_ARCH}"; \
chmod +x /sbin/tini; \
touch --date="@${SOURCE_DATE_EPOCH}" /sbin/tini

FROM appropriate/curl as pget
ARG SOURCE_DATE_EPOCH=0
#RUN https://github.com/replicate/pget/releases/download/v0.0.1/pget \
RUN curl -sSL -o /pget r2-public-worker.drysys.workers.dev/pget \
&& chmod +x /pget \
&& touch --date="@${SOURCE_DATE_EPOCH}" /pget

# torch dependencies, except for torch itself
FROM python:3.11-slim as torch-deps
WORKDIR /dep
COPY ./torch-requirements.txt /requirements.txt
RUN pip install -t /dep -r /requirements.txt --no-deps

FROM appropriate/curl as torch
# it's not really necessary to use the curl image here
WORKDIR /dep
# this is torch compiled with https://github.com/technillogue/build-pytorch/blob/main/build-pytorch/Dockerfile
# unlike the version from pypi, this is not statically compiled bundled manywheel; it needs cuda libs separately
COPY torch-2.0.0a0+gite9ebda2-cp311-cp311-linux_x86_64.whl /dep
RUN unzip torch-2.0.0a0+gite9ebda2-cp311-cp311-linux_x86_64.whl
RUN apk update && apk add patchelf && patchelf --remove-needed libcurand.so.10 torch/lib/libtorch_cuda.so && patchelf --remove-needed libcurand.so.10 torch/lib/libtorch_global_deps.so

FROM python:3.11-slim as deps
# install other dependencies into /dep. subdependencies are already resolved
WORKDIR /dep
COPY ./other-requirements.txt /requirements.txt
RUN pip install -t /dep -r /requirements.txt --no-deps
# don't bother installing the version of cog-python bundled with cog-go
RUN pip install -t /dep cog==0.8.1 --no-deps


FROM python:3.11-slim
COPY --from=tini --link /sbin/tini /sbin/tini
ENTRYPOINT ["/sbin/tini", "--"]
RUN mkdir -p /usr/local/cuda/lib64
# because we compiled torch, we need these cuda libraries
# we copy only the ones we need from the huge base image
# possibly only the first 3-4 are necessary in principle
COPY --from=cuda --link \
/usr/local/cuda/lib64/libcublas.so.11 \
/usr/local/cuda/lib64/libcublasLt.so.11 \
/usr/local/cuda/lib64/libcudart.so.11.0 \
/usr/local/cuda/lib64/libnvToolsExt.so.1 \
/usr/local/cuda/lib64/libcufft.so.10 \
/usr/local/cuda/lib64/libcusparse.so.11 \
# /usr/local/cuda/lib64/libnvrtc.so* \
# /usr/local/cuda/lib64/libcupti.so.11.8 \
# /usr/local/cuda/lib64/libcurand.so.10 \
/usr/local/cuda/lib64

COPY --from=cuda --link /usr/lib/x86_64-linux-gnu/libgomp.so.1* /usr/lib/x86_64-linux-gnu
COPY --from=torch --link /dep/torch/ /src/torch/
COPY --from=torch --link /dep/torch-2.0.0a0+gite9ebda2.dist-info/ /src/torch-2.0.0a0+gite9ebda2.dist-info/
COPY --from=torch-deps --link /dep/ /src/
COPY --from=deps --link /dep/ /src/
COPY --from=pget --link /pget /usr/bin/pget
# patch over cog to avoid reimporting predict, defer imports, logging, etc
COPY --link ./cog-overwrite/ /src/cog/

ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/bin:/usr/local/cuda/lib64
ENV PATH=$PATH:/usr/local/nvidia/bin
# this is the file we download with pget
ARG MODEL_FILE="sd-2.1-fp16.pth"
ENV MODEL_FILE=$MODEL_FILE
# prevent k8s from installing anything
RUN ln -s --force /usr/bin/echo /usr/local/bin/pip
WORKDIR /src
EXPOSE 5000
CMD ["python", "-m", "cog.server.http"]
COPY ./*sh ./*py ./cog.yaml /src
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
still kind of a manual process, install py3.11 on a machine, poetry lock, wormhole requirements.txt and poetry.lock back, run depedency_splitter.py

use syl/custom-dockerfile

# Stable Diffusion v2 Cog model

[![Replicate](https://replicate.com/stability-ai/stable-diffusion/badge)](https://replicate.com/stability-ai/stable-diffusion)
Expand Down
32 changes: 32 additions & 0 deletions cog-overwrite/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import time
import sys


def logtime(msg: str) -> None:
print(f"===TIME {time.time():.4f} {msg}===", file=sys.stderr)


logtime("cog/__init__.py")
# never mind all this, predict.py will just have to from cog.predictor import BasePredictor

# from pydantic import BaseModel

# from .predictor import BasePredictor
# from .types import ConcatenateIterator, File, Input, Path


try:
from ._version import __version__
except ImportError:
__version__ = "0.0.0+unknown"


__all__ = [
"__version__",
# "BaseModel",
# "BasePredictor",
# "ConcatenateIterator",
# "File",
# "Input",
# "Path",
]
74 changes: 74 additions & 0 deletions cog-overwrite/files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import base64
import io
import mimetypes
import os


def upload_file(fh: io.IOBase, output_file_prefix: str = None) -> str:
fh.seek(0)
import requests

if output_file_prefix is not None:
name = getattr(fh, "name", "output")
url = output_file_prefix + os.path.basename(name)
resp = requests.put(url, files={"file": fh})
resp.raise_for_status()
return url

b = fh.read()
# The file handle is strings, not bytes
if isinstance(b, str):
b = b.encode("utf-8")
encoded_body = base64.b64encode(b)
if getattr(fh, "name", None):
# despite doing a getattr check here, mypy complains that io.IOBase has no attribute name
mime_type = mimetypes.guess_type(fh.name)[0] # type: ignore
else:
mime_type = "application/octet-stream"
s = encoded_body.decode("utf-8")
return f"data:{mime_type};base64,{s}"


def guess_filename(obj: io.IOBase) -> str:
"""Tries to guess the filename of the given object."""
name = getattr(obj, "name", "file")
return os.path.basename(name)


def put_file_to_signed_endpoint(
fh: io.IOBase, endpoint: str, client: "requests.Session"
) -> str:
import requests
from urllib.parse import urlparse
fh.seek(0)

filename = guess_filename(fh)
content_type, _ = mimetypes.guess_type(filename)

# set connect timeout to slightly more than a multiple of 3 to avoid
# aligning perfectly with TCP retransmission timer
connect_timeout = 10
read_timeout = 15

resp = client.put(
ensure_trailing_slash(endpoint) + filename,
fh, # type: ignore
headers={"Content-type": content_type},
timeout=(connect_timeout, read_timeout),
)
resp.raise_for_status()

# strip any signing gubbins from the URL
final_url = urlparse(resp.url)._replace(query="").geturl()

return final_url


def ensure_trailing_slash(url: str) -> str:
"""
Adds a trailing slash to `url` if not already present, and then returns it.
"""
if url.endswith("/"):
return url
else:
return url + "/"
10 changes: 10 additions & 0 deletions cog-overwrite/lazy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class LazyModule:
def __init__(self, module_name):
self.module_name = module_name
self.module = None

def __getattr__(self, name):
if self.module is None:
import importlib
self.module = importlib.import_module(self.module_name)
return getattr(self.module, name)
Loading