diff --git a/src/petals/models/bloom/block.py b/src/petals/models/bloom/block.py index 439b9ca1..01a74b21 100644 --- a/src/petals/models/bloom/block.py +++ b/src/petals/models/bloom/block.py @@ -7,7 +7,7 @@ import torch from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask -from transformers.models.bloom.modeling_bloom import BloomBlock, BloomModel, build_alibi_tensor +from transformers.models.bloom.modeling_bloom import BloomBlock, build_alibi_tensor from petals.utils.misc import is_dummy diff --git a/src/petals/models/llama/block.py b/src/petals/models/llama/block.py index 1c89ad3f..4ff9d3f6 100644 --- a/src/petals/models/llama/block.py +++ b/src/petals/models/llama/block.py @@ -15,7 +15,6 @@ LlamaConfig, LlamaDecoderLayer, LlamaMLP, - LlamaModel, LlamaRMSNorm, repeat_kv, rotate_half, diff --git a/src/petals/models/mixtral/block.py b/src/petals/models/mixtral/block.py index 7a2bd9fe..58acd144 100644 --- a/src/petals/models/mixtral/block.py +++ b/src/petals/models/mixtral/block.py @@ -1,4 +1,3 @@ -import json from typing import Optional, Tuple import torch @@ -8,7 +7,7 @@ _prepare_4d_causal_attention_mask, _prepare_4d_causal_attention_mask_for_sdpa, ) -from transformers.models.mixtral.modeling_mixtral import MixtralDecoderLayer, MixtralModel +from transformers.models.mixtral.modeling_mixtral import MixtralDecoderLayer class WrappedMixtralBlock(MixtralDecoderLayer):