Skip to content
This repository has been archived by the owner on Jan 29, 2024. It is now read-only.

Commit

Permalink
refactor variable names
Browse files Browse the repository at this point in the history
  • Loading branch information
ixaxaar committed Oct 27, 2023
1 parent c510066 commit 8510dc9
Show file tree
Hide file tree
Showing 9 changed files with 59 additions and 486 deletions.
1 change: 0 additions & 1 deletion geniusrise_huggingface/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,5 @@
from geniusrise_huggingface.language_model import HuggingFaceLanguageModelingFineTuner
from geniusrise_huggingface.ner import HuggingFaceNamedEntityRecognitionFineTuner
from geniusrise_huggingface.question_answering import HuggingFaceQuestionAnsweringFineTuner
from geniusrise_huggingface.sentiment_analysis import HuggingFaceSentimentAnalysisFineTuner
from geniusrise_huggingface.summarization import HuggingFaceSummarizationFineTuner
from geniusrise_huggingface.translation import HuggingFaceTranslationFineTuner
22 changes: 11 additions & 11 deletions geniusrise_huggingface/base/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ class HuggingFaceAPI(HuggingFaceBulk):
model_revision (Optional[str]): The revision of the pre-trained language model.
tokenizer_name (str): The name of the tokenizer used to preprocess input text.
tokenizer_revision (Optional[str]): The revision of the tokenizer used to preprocess input text.
model_class_name (str): The name of the class of the pre-trained language model.
tokenizer_class_name (str): The name of the class of the tokenizer used to preprocess input text.
model_class (str): The name of the class of the pre-trained language model.
tokenizer_class (str): The name of the class of the tokenizer used to preprocess input text.
use_cuda (bool): Whether to use a GPU for inference.
quantization (int): The level of quantization to use for the pre-trained language model.
precision (str): The precision to use for the pre-trained language model.
Expand All @@ -46,7 +46,7 @@ class HuggingFaceAPI(HuggingFaceBulk):
text(**kwargs: Any) -> Dict[str, Any]:
Generates text based on the given prompt and decoding strategy.
listen(model_name: str, model_class_name: str = "AutoModelForCausalLM", tokenizer_class_name: str = "AutoTokenizer", use_cuda: bool = False, precision: str = "float16", quantization: int = 0, device_map: str | Dict | None = "auto", max_memory={0: "24GB"}, torchscript: bool = True, endpoint: str = "*", port: int = 3000, cors_domain: str = "http://localhost:3000", username: Optional[str] = None, password: Optional[str] = None, **model_args: Any) -> None:
listen(model_name: str, model_class: str = "AutoModelForCausalLM", tokenizer_class: str = "AutoTokenizer", use_cuda: bool = False, precision: str = "float16", quantization: int = 0, device_map: str | Dict | None = "auto", max_memory={0: "24GB"}, torchscript: bool = True, endpoint: str = "*", port: int = 3000, cors_domain: str = "http://localhost:3000", username: Optional[str] = None, password: Optional[str] = None, **model_args: Any) -> None:
Starts a CherryPy server to listen for requests to generate text.
"""

Expand Down Expand Up @@ -118,8 +118,8 @@ def text(self, **kwargs: Any) -> Dict[str, Any]:
def listen(
self,
model_name: str,
model_class_name: str = "AutoModelForCausalLM",
tokenizer_class_name: str = "AutoTokenizer",
model_class: str = "AutoModelForCausalLM",
tokenizer_class: str = "AutoTokenizer",
use_cuda: bool = False,
precision: str = "float16",
quantization: int = 0,
Expand All @@ -138,8 +138,8 @@ def listen(
Args:
model_name (str): The name of the pre-trained language model.
model_class_name (str, optional): The name of the class of the pre-trained language model. Defaults to "AutoModelForCausalLM".
tokenizer_class_name (str, optional): The name of the class of the tokenizer used to preprocess input text. Defaults to "AutoTokenizer".
model_class (str, optional): The name of the class of the pre-trained language model. Defaults to "AutoModelForCausalLM".
tokenizer_class (str, optional): The name of the class of the tokenizer used to preprocess input text. Defaults to "AutoTokenizer".
use_cuda (bool, optional): Whether to use a GPU for inference. Defaults to False.
precision (str, optional): The precision to use for the pre-trained language model. Defaults to "float16".
quantization (int, optional): The level of quantization to use for the pre-trained language model. Defaults to 0.
Expand All @@ -154,8 +154,8 @@ def listen(
**model_args (Any): Additional arguments to pass to the pre-trained language model.
"""
self.model_name = model_name
self.model_class_name = model_class_name
self.tokenizer_class_name = tokenizer_class_name
self.model_class = model_class
self.tokenizer_class = tokenizer_class
self.use_cuda = use_cuda
self.quantization = quantization
self.precision = precision
Expand All @@ -182,8 +182,8 @@ def listen(
tokenizer_name=self.tokenizer_name,
model_revision=self.model_revision,
tokenizer_revision=self.tokenizer_revision,
model_class_name=self.model_class_name,
tokenizer_class_name=self.tokenizer_class_name,
model_class=self.model_class,
tokenizer_class=self.tokenizer_class,
use_cuda=self.use_cuda,
precision=self.precision,
quantization=self.quantization,
Expand Down
12 changes: 6 additions & 6 deletions geniusrise_huggingface/base/bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ def load_models(
tokenizer_name: str,
model_revision: Optional[str] = None,
tokenizer_revision: Optional[str] = None,
model_class_name: str = "AutoModelForCausalLM",
tokenizer_class_name: str = "AutoTokenizer",
model_class: str = "AutoModelForCausalLM",
tokenizer_class: str = "AutoTokenizer",
use_cuda: bool = False,
precision: str = "float16",
quantization: int = 0,
Expand All @@ -220,8 +220,8 @@ def load_models(
Parameters:
- model_name (str): The name of the model to load.
- model_class_name (str): The class name of the model to load. Default is "AutoModelForCausalLM".
- tokenizer_class_name (str): The class name of the tokenizer to load. Default is "AutoTokenizer".
- model_class (str): The class name of the model to load. Default is "AutoModelForCausalLM".
- tokenizer_class (str): The class name of the tokenizer to load. Default is "AutoTokenizer".
- use_cuda (bool): Whether to use CUDA for GPU acceleration. Default is False.
- precision (str): The bit precision for model and tokenizer. Options are 'float32', 'float16', 'bfloat16'. Default is 'float16'.
- device_map (Union[str, Dict]): Device map for model placement. Default is "auto".
Expand Down Expand Up @@ -251,8 +251,8 @@ def load_models(
if use_cuda and not device_map:
device_map = "auto"

ModelClass = getattr(transformers, model_class_name)
TokenizerClass = getattr(transformers, tokenizer_class_name)
ModelClass = getattr(transformers, model_class)
TokenizerClass = getattr(transformers, tokenizer_class)

# Load the model and tokenizer
tokenizer = TokenizerClass.from_pretrained(tokenizer_name, revision=tokenizer_revision, torch_dtype=torch_dtype)
Expand Down
18 changes: 9 additions & 9 deletions geniusrise_huggingface/base/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

@pytest.fixture(
params=[
# model_name, model_class_name, tokenizer_class_name, use_cuda, precision, quantization, device_map, max_memory, torchscript
# model_name, model_class, tokenizer_class, use_cuda, precision, quantization, device_map, max_memory, torchscript
# fmt: off
("gpt2", "AutoModelForCausalLM", "AutoTokenizer", True, "float16", 0, None, None, False),
("gpt2", "AutoModelForCausalLM", "AutoTokenizer", False, "float32", 0, None, None, False),
Expand Down Expand Up @@ -72,8 +72,8 @@ def hfa():
def test_load_models(hfa, model_config):
(
model_name,
model_class_name,
tokenizer_class_name,
model_class,
tokenizer_class,
use_cuda,
precision,
quantization,
Expand All @@ -97,8 +97,8 @@ def test_load_models(hfa, model_config):
model_revision=model_revision,
tokenizer_name=model_name,
tokenizer_revision=tokenizer_revision,
model_class_name=model_class_name,
tokenizer_class_name=tokenizer_class_name,
model_class=model_class,
tokenizer_class=tokenizer_class,
use_cuda=use_cuda,
precision=precision,
quantization=quantization,
Expand Down Expand Up @@ -149,8 +149,8 @@ def test_load_models(hfa, model_config):
def test_generate_strategies(hfa, model_config, strategy):
(
model_name,
model_class_name,
tokenizer_class_name,
model_class,
tokenizer_class,
use_cuda,
precision,
quantization,
Expand All @@ -174,8 +174,8 @@ def test_generate_strategies(hfa, model_config, strategy):
model_revision=model_revision,
tokenizer_name=model_name,
tokenizer_revision=tokenizer_revision,
model_class_name=model_class_name,
tokenizer_class_name=tokenizer_class_name,
model_class=model_class,
tokenizer_class=tokenizer_class,
use_cuda=use_cuda,
precision=precision,
quantization=quantization,
Expand Down
18 changes: 9 additions & 9 deletions geniusrise_huggingface/base/tests/test_bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

@pytest.fixture(
params=[
# model_name, model_class_name, tokenizer_class_name, use_cuda, precision, quantization, device_map, max_memory, torchscript
# model_name, model_class, tokenizer_class, use_cuda, precision, quantization, device_map, max_memory, torchscript
# fmt: off
("gpt2", "AutoModelForCausalLM", "AutoTokenizer", True, "float16", 0, None, None, False),
("gpt2", "AutoModelForCausalLM", "AutoTokenizer", False, "float32", 0, None, None, False),
Expand Down Expand Up @@ -72,8 +72,8 @@ def hfa():
def test_load_models(hfa, model_config):
(
model_name,
model_class_name,
tokenizer_class_name,
model_class,
tokenizer_class,
use_cuda,
precision,
quantization,
Expand All @@ -97,8 +97,8 @@ def test_load_models(hfa, model_config):
model_revision=model_revision,
tokenizer_name=model_name,
tokenizer_revision=tokenizer_revision,
model_class_name=model_class_name,
tokenizer_class_name=tokenizer_class_name,
model_class=model_class,
tokenizer_class=tokenizer_class,
use_cuda=use_cuda,
precision=precision,
quantization=quantization,
Expand Down Expand Up @@ -149,8 +149,8 @@ def test_load_models(hfa, model_config):
def test_generate_strategies(hfa, model_config, strategy):
(
model_name,
model_class_name,
tokenizer_class_name,
model_class,
tokenizer_class,
use_cuda,
precision,
quantization,
Expand All @@ -174,8 +174,8 @@ def test_generate_strategies(hfa, model_config, strategy):
model_revision=model_revision,
tokenizer_name=model_name,
tokenizer_revision=tokenizer_revision,
model_class_name=model_class_name,
tokenizer_class_name=tokenizer_class_name,
model_class=model_class,
tokenizer_class=tokenizer_class,
use_cuda=use_cuda,
precision=precision,
quantization=quantization,
Expand Down
24 changes: 12 additions & 12 deletions geniusrise_huggingface/embeddings/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ class EmbeddingsAPI(HuggingFaceAPI):
genius EmbeddingsAPI rise \
listen \
--model_name=bert-base-uncased \
--model_class_name=AutoModelForCausalLM \
--tokenizer_class_name=AutoTokenizer \
--model_class=AutoModelForCausalLM \
--tokenizer_class=AutoTokenizer \
--sentence_transformer_model=paraphrase-MiniLM-L6-v2 \
--use_cuda=True \
--precision=float16 \
Expand All @@ -68,8 +68,8 @@ class EmbeddingsAPI(HuggingFaceAPI):
method: "listen"
args:
model_name: "bert-base-uncased"
model_class_name: "AutoModelForCausalLM"
tokenizer_class_name: "AutoTokenizer"
model_class: "AutoModelForCausalLM"
tokenizer_class: "AutoTokenizer"
sentence_transformer_model: "paraphrase-MiniLM-L6-v2"
use_cuda: True
precision: "float16"
Expand Down Expand Up @@ -234,8 +234,8 @@ def sentence_permutations(self, **kwargs: Any) -> Dict[str, Any]:
def listen( # type: ignore
self,
model_name: str,
model_class_name: str = "AutoModelForCausalLM",
tokenizer_class_name: str = "AutoTokenizer",
model_class: str = "AutoModelForCausalLM",
tokenizer_class: str = "AutoTokenizer",
use_cuda: bool = False,
precision: str = "float16",
quantization: int = 0,
Expand All @@ -254,8 +254,8 @@ def listen( # type: ignore
Parameters:
- model_name (str): The name of the Hugging Face model to use.
- model_class_name (str, optional): The class name of the model. Defaults to "AutoModelForCausalLM".
- tokenizer_class_name (str, optional): The class name of the tokenizer. Defaults to "AutoTokenizer".
- model_class (str, optional): The class name of the model. Defaults to "AutoModelForCausalLM".
- tokenizer_class (str, optional): The class name of the tokenizer. Defaults to "AutoTokenizer".
- sentence_transformer_model (str, optional): The name of the Sentence Transformer model to use. Defaults to "paraphrase-MiniLM-L6-v2".
- use_cuda (bool, optional): Whether to use CUDA for computation. Defaults to False.
- precision (str, optional): The precision to use for computations. Defaults to "float16".
Expand All @@ -273,8 +273,8 @@ def listen( # type: ignore
None
"""
self.model_name = model_name
self.model_class_name = model_class_name
self.tokenizer_class_name = tokenizer_class_name
self.model_class = model_class
self.tokenizer_class = tokenizer_class
self.use_cuda = use_cuda
self.quantization = quantization
self.precision = precision
Expand Down Expand Up @@ -302,8 +302,8 @@ def listen( # type: ignore
tokenizer_name=self.tokenizer_name,
model_revision=self.model_revision,
tokenizer_revision=self.tokenizer_revision,
model_class_name=self.model_class_name,
tokenizer_class_name=self.tokenizer_class_name,
model_class=self.model_class,
tokenizer_class=self.tokenizer_class,
use_cuda=self.use_cuda,
precision=self.precision,
quantization=self.quantization,
Expand Down
24 changes: 12 additions & 12 deletions geniusrise_huggingface/embeddings/bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ def load_models(
tokenizer_name: str,
model_revision: Optional[str] = None,
tokenizer_revision: Optional[str] = None,
model_class_name: str = "AutoModelForCausalLM",
tokenizer_class_name: str = "AutoTokenizer",
model_class: str = "AutoModelForCausalLM",
tokenizer_class: str = "AutoTokenizer",
use_cuda: bool = False,
precision: str = "float16",
quantization: int = 0,
Expand All @@ -117,8 +117,8 @@ def load_models(
Parameters:
- model_name (str): The name of the model to load.
- model_class_name (str): The class name of the model to load. Default is "AutoModelForCausalLM".
- tokenizer_class_name (str): The class name of the tokenizer to load. Default is "AutoTokenizer".
- model_class (str): The class name of the model to load. Default is "AutoModelForCausalLM".
- tokenizer_class (str): The class name of the tokenizer to load. Default is "AutoTokenizer".
- use_cuda (bool): Whether to use CUDA for GPU acceleration. Default is False.
- precision (str): The bit precision for model and tokenizer. Options are 'float32', 'float16', 'bfloat16'. Default is 'float16'.
- device_map (Union[str, Dict]): Device map for model placement. Default is "auto".
Expand Down Expand Up @@ -148,8 +148,8 @@ def load_models(
if use_cuda and not device_map:
device_map = "auto"

ModelClass = getattr(transformers, model_class_name)
TokenizerClass = getattr(transformers, tokenizer_class_name)
ModelClass = getattr(transformers, model_class)
TokenizerClass = getattr(transformers, tokenizer_class)

# Load the model and tokenizer
tokenizer = TokenizerClass.from_pretrained(tokenizer_name, revision=tokenizer_revision, torch_dtype=torch_dtype)
Expand Down Expand Up @@ -199,8 +199,8 @@ def generate(
self,
kind: str,
model_name: str,
model_class_name: str = "AutoModelForCausalLM",
tokenizer_class_name: str = "AutoTokenizer",
model_class: str = "AutoModelForCausalLM",
tokenizer_class: str = "AutoTokenizer",
sentence_transformer_model: str = "paraphrase-MiniLM-L6-v2",
use_cuda: bool = False,
precision: str = "float16",
Expand All @@ -220,8 +220,8 @@ def generate(
This method reads text data from the specified input path, generates embeddings, and saves them to the specified output path.
"""
self.model_name = model_name
self.model_class_name = model_class_name
self.tokenizer_class_name = tokenizer_class_name
self.model_class = model_class
self.tokenizer_class = tokenizer_class
self.use_cuda = use_cuda
self.quantization = quantization
self.precision = precision
Expand Down Expand Up @@ -255,8 +255,8 @@ def generate(
tokenizer_name=self.tokenizer_name,
model_revision=self.model_revision,
tokenizer_revision=self.tokenizer_revision,
model_class_name=self.model_class_name,
tokenizer_class_name=self.tokenizer_class_name,
model_class=self.model_class,
tokenizer_class=self.tokenizer_class,
use_cuda=self.use_cuda,
precision=self.precision,
quantization=self.quantization,
Expand Down
Loading

0 comments on commit 8510dc9

Please sign in to comment.