Skip to content

Commit

Permalink
apply style
Browse files Browse the repository at this point in the history
Signed-off-by: Kyle Sayers <[email protected]>
  • Loading branch information
kylesayrs committed Feb 6, 2025
1 parent ba77bbd commit 8e24f52
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions src/llmcompressor/transformers/finetune/data/peoples_speech.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from copy import deepcopy
from typing import TYPE_CHECKING, Dict, Any
from typing import TYPE_CHECKING, Any, Dict

from loguru import logger
from datasets.formatting.formatting import LazyRow
from loguru import logger

from llmcompressor.transformers.finetune.data import TextGenerationDataset
from llmcompressor.transformers.finetune.data.base import get_columns
Expand Down Expand Up @@ -47,11 +47,11 @@ def dataset_template(self, example):
if self.processor_type == "Qwen2AudioProcessor":
messages = [
{"role": "user", "content": [{"audio": None}]},
{"role": "user", "content": [{"text": "What did the person say?"}]}
{"role": "user", "content": [{"text": "What did the person say?"}]},
]
text = self.processor.apply_chat_template(messages)
return {"audios": [audio], "sampling_rate": sampling_rate, "text": text}

else:
# chat template decoder ids are appended later by self.processor.__call__
text = " " + example["text"].capitalize()
Expand All @@ -63,7 +63,7 @@ def filter_tokenizer_args(self, dataset: DatasetType) -> DatasetType:
column_names = get_columns(dataset)

return dataset.remove_columns(list(set(column_names) - set(tokenizer_args)))

else:
return super().filter_tokenizer_args(dataset)

Expand All @@ -82,6 +82,6 @@ def tokenize(self, data: LazyRow) -> Dict[str, Any]:
del text_inputs["input_ids"]

return dict(**audio_inputs, **text_inputs)

else:
return super().tokenize(data)
return super().tokenize(data)

0 comments on commit 8e24f52

Please sign in to comment.