Skip to content

Commit

Permalink
Save ludwig-config with model-weights in output directory (#3965)
Browse files Browse the repository at this point in the history
Co-authored-by: Sanjay <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Mar 17, 2024
1 parent 606c732 commit c09d5dc
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 10 deletions.
42 changes: 34 additions & 8 deletions ludwig/utils/upload_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from abc import ABC, abstractmethod

from huggingface_hub import HfApi, login
from huggingface_hub.hf_api import CommitInfo

from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -193,17 +196,20 @@ def _validate_upload_parameters(
</Alex(12/10/2023): TODO>
"""
files = set(os.listdir(trained_model_artifacts_path))
acceptable_model_artifact_file_nanes: set[str] = {
acceptable_model_artifact_file_names: set[str] = {
"pytorch_model.bin",
"adapter_model.bin", # Delete per formal deprecation policy TBD (per above comment).
"adapter_model.safetensors", # New format as of PEFT version "0.7.0" (per above comment).
}
if not (files & acceptable_model_artifact_file_nanes):
if not (files & acceptable_model_artifact_file_names):
raise ValueError(
f"Can't find model weights at {trained_model_artifacts_path}. Trained model weights should "
"either be saved as `pytorch_model.bin` for regular model training, or have `adapter_model.bin`"
"or `adapter_model.safetensors` if using parameter efficient fine-tuning methods like LoRA."
)
model_hyperparameters_path: str = os.path.join(model_path, "model")
if MODEL_HYPERPARAMETERS_FILE_NAME not in os.listdir(model_hyperparameters_path):
raise ValueError(f"Can't find '{MODEL_HYPERPARAMETERS_FILE_NAME}' at {model_hyperparameters_path}.")

def upload(
self,
Expand Down Expand Up @@ -256,17 +262,37 @@ def upload(
)

# Upload all artifacts in model weights folder
upload_path = self.api.upload_folder(
commit_message_weights: str | None = f"{commit_message} (weights)" if commit_message else commit_message
commit_description_weights: str | None = (
f"{commit_description} (weights)" if commit_description else commit_description
)
upload_path_weights: CommitInfo = self.api.upload_folder(
folder_path=os.path.join(model_path, "model", "model_weights"),
repo_id=repo_id,
repo_type=repo_type,
commit_message=commit_message,
commit_description=commit_description,
commit_message=commit_message_weights,
commit_description=commit_description_weights,
)

if upload_path:
logger.info(f"Model uploaded to `{upload_path}` with repository name `{repo_id}`")
return True
if upload_path_weights:
logger.info(f"Model weights uploaded to `{upload_path_weights}` with repository name `{repo_id}`")
# Upload the ludwig configuration file
commit_message_config: str | None = f"{commit_message} (config)" if commit_message else commit_message
commit_description_config: str | None = (
f"{commit_description} (config)" if commit_description else commit_description
)
upload_path_config: CommitInfo = self.api.upload_file(
path_or_fileobj=os.path.join(model_path, "model", MODEL_HYPERPARAMETERS_FILE_NAME),
path_in_repo="ludwig_config.json",
repo_id=repo_id,
repo_type=repo_type,
commit_message=commit_message_config,
commit_description=commit_description_config,
)

if upload_path_config:
logger.info(f"Model config uploaded to `{upload_path_config}` with repository name `{repo_id}`")
return True

return False

Expand Down
6 changes: 4 additions & 2 deletions tests/ludwig/utils/test_upload_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import pytest

from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME
from ludwig.utils.upload_utils import HuggingFaceHub

logger = logging.getLogger(__name__)
Expand All @@ -26,14 +27,15 @@ def _build_fake_model_repo(
names must be leaf file names, not paths).
"""
# Create a temporary folder designating training output directory.
model_directory: str = pathlib.Path(destination_directory) / experiment_name / model_directory_name
model_weights_directory: str = model_directory / model_weights_directory_name
model_directory: pathlib.Path = pathlib.Path(destination_directory) / experiment_name / model_directory_name
model_weights_directory: pathlib.Path = model_directory / model_weights_directory_name
model_weights_directory.mkdir(parents=True, exist_ok=True)

# Create files within the "model_weights" subdirectory.
file_name: str
for file_name in file_names:
pathlib.Path(model_weights_directory / file_name).touch()
pathlib.Path(model_directory / MODEL_HYPERPARAMETERS_FILE_NAME).touch()


@pytest.fixture
Expand Down

0 comments on commit c09d5dc

Please sign in to comment.