Skip to content

Commit

Permalink
[CI/code quality] Add clear github runner caches job + mypy fixes (#1783
Browse files Browse the repository at this point in the history
)
  • Loading branch information
felixdittrich92 authored Nov 19, 2024
1 parent 99842ba commit dee760f
Show file tree
Hide file tree
Showing 19 changed files with 54 additions and 39 deletions.
15 changes: 15 additions & 0 deletions .github/workflows/clear_caches.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: Clear GitHub runner caches

on:
workflow_dispatch:
schedule:
- cron: '0 0 * * *' # Runs once a day

jobs:
clear:
name: Clear caches
runs-on: ubuntu-latest
steps:
- uses: MyAlbum/purge-cache@v2
with:
max-age: 172800 # Caches older than 2 days are deleted
2 changes: 1 addition & 1 deletion .github/workflows/style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}
key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-style
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
4 changes: 2 additions & 2 deletions doctr/datasets/datasets/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]:
@staticmethod
def collate_fn(samples: List[Tuple[torch.Tensor, Any]]) -> Tuple[torch.Tensor, List[Any]]:
images, targets = zip(*samples)
images = torch.stack(images, dim=0) # type: ignore[assignment]
images = torch.stack(images, dim=0)

return images, list(targets) # type: ignore[return-value]
return images, list(targets)


class VisionDataset(AbstractDataset, _VisionDataset): # noqa: D101
Expand Down
2 changes: 1 addition & 1 deletion doctr/io/image/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,4 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -

def get_img_shape(img: torch.Tensor) -> Tuple[int, int]:
"""Get the shape of an image"""
return img.shape[-2:] # type: ignore[return-value]
return img.shape[-2:]
2 changes: 1 addition & 1 deletion doctr/models/classification/predictor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def forward(
predicted_batches = [out_batch.argmax(dim=1).cpu().detach().numpy() for out_batch in predicted_batches]

class_idxs = [int(pred) for batch in predicted_batches for pred in batch]
classes = [int(self.model.cfg["classes"][idx]) for idx in class_idxs] # type: ignore[union-attr]
classes = [int(self.model.cfg["classes"][idx]) for idx in class_idxs] # type: ignore
confs = [round(float(p), 2) for prob in probs for p in prob]

return [class_idxs, classes, confs]
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def compute_loss(
dice_map = torch.softmax(out_map, dim=1)
else:
# compute binary map instead
dice_map = 1 / (1 + torch.exp(-50.0 * (prob_map - thresh_map)))
dice_map = 1 / (1 + torch.exp(-50.0 * (prob_map - thresh_map))) # type: ignore[assignment]
# Class reduced
inter = (seg_mask * dice_map * seg_target).sum((0, 2, 3))
cardinality = (seg_mask * (dice_map + seg_target)).sum((0, 2, 3))
Expand Down
4 changes: 2 additions & 2 deletions doctr/models/detection/predictor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ def forward(
]
# Remove padding from loc predictions
preds = _remove_padding(
pages, # type: ignore[arg-type]
pages,
[pred for batch in predicted_batches for pred in batch["preds"]],
preserve_aspect_ratio=preserve_aspect_ratio,
symmetric_pad=symmetric_pad,
assume_straight_pages=assume_straight_pages,
assume_straight_pages=assume_straight_pages, # type: ignore[arg-type]
)

if return_maps:
Expand Down
8 changes: 4 additions & 4 deletions doctr/models/kie_predictor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def forward(
for out_map in out_maps
]
if self.detect_orientation:
general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps) # type: ignore[arg-type]
general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps)
orientations = [
{"value": orientation_page, "confidence": None} for orientation_page in origin_pages_orientations
]
Expand All @@ -97,7 +97,7 @@ def forward(
general_pages_orientations = None
origin_pages_orientations = None
if self.straighten_pages:
pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations) # type: ignore
pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)
# update page shapes after straightening
origin_page_shapes = [page.shape[:2] for page in pages]

Expand All @@ -124,7 +124,7 @@ def forward(
crops = {}
for class_name in dict_loc_preds.keys():
crops[class_name], dict_loc_preds[class_name] = self._prepare_crops(
pages, # type: ignore[arg-type]
pages,
dict_loc_preds[class_name],
channels_last=channels_last,
assume_straight_pages=self.assume_straight_pages,
Expand Down Expand Up @@ -169,7 +169,7 @@ def forward(
languages_dict = None

out = self.doc_builder(
pages, # type: ignore[arg-type]
pages,
boxes_per_page,
objectness_scores_per_page,
text_preds_per_page,
Expand Down
2 changes: 1 addition & 1 deletion doctr/models/modules/layers/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def _identity_to_conv(
id_tensor = torch.from_numpy(kernel_value).to(identity.weight.device)
self.id_tensor = self._pad_to_mxn_tensor(id_tensor)
kernel = self.id_tensor
std = (identity.running_var + identity.eps).sqrt()
std = (identity.running_var + identity.eps).sqrt() # type: ignore
t = (identity.weight / std).reshape(-1, 1, 1, 1)
return kernel * t, identity.bias - identity.running_mean * identity.weight / std

Expand Down
6 changes: 3 additions & 3 deletions doctr/models/modules/transformer/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
Returns:
positional embeddings (batch, max_len, d_model)
"""
x = x + self.pe[:, : x.size(1)]
x = x + self.pe[:, : x.size(1)] # type: ignore[index]
return self.dropout(x)


Expand All @@ -49,8 +49,8 @@ def scaled_dot_product_attention(
scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(query.size(-1))
if mask is not None:
# NOTE: to ensure the ONNX compatibility, masked_fill works only with int equal condition
scores = scores.masked_fill(mask == 0, float("-inf"))
p_attn = torch.softmax(scores, dim=-1)
scores = scores.masked_fill(mask == 0, float("-inf")) # type: ignore[attr-defined]
p_attn = torch.softmax(scores, dim=-1) # type: ignore[call-overload]
return torch.matmul(p_attn, value), p_attn


Expand Down
8 changes: 4 additions & 4 deletions doctr/models/predictor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def forward(
for out_map in out_maps
]
if self.detect_orientation:
general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps) # type: ignore[arg-type]
general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps)
orientations = [
{"value": orientation_page, "confidence": None} for orientation_page in origin_pages_orientations
]
Expand All @@ -95,7 +95,7 @@ def forward(
general_pages_orientations = None
origin_pages_orientations = None
if self.straighten_pages:
pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations) # type: ignore
pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)
# update page shapes after straightening
origin_page_shapes = [page.shape[:2] for page in pages]

Expand All @@ -118,7 +118,7 @@ def forward(

# Crop images
crops, loc_preds = self._prepare_crops(
pages, # type: ignore[arg-type]
pages,
loc_preds,
channels_last=channels_last,
assume_straight_pages=self.assume_straight_pages,
Expand Down Expand Up @@ -146,7 +146,7 @@ def forward(
languages_dict = None

out = self.doc_builder(
pages, # type: ignore[arg-type]
pages,
boxes,
objectness_scores,
text_preds,
Expand Down
8 changes: 4 additions & 4 deletions doctr/models/preprocessor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def sample_transforms(self, x: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
else:
x = x.to(dtype=torch.float32) # type: ignore[union-attr]

return x
return x # type: ignore[return-value]

def __call__(self, x: Union[torch.Tensor, np.ndarray, List[Union[torch.Tensor, np.ndarray]]]) -> List[torch.Tensor]:
"""Prepare document data for model forwarding
Expand All @@ -99,7 +99,7 @@ def __call__(self, x: Union[torch.Tensor, np.ndarray, List[Union[torch.Tensor, n
elif x.dtype not in (torch.uint8, torch.float16, torch.float32):
raise TypeError("unsupported data type for torch.Tensor")
# Resizing
if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]:
if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]: # type: ignore[union-attr]
x = F.resize(
x, self.resize.size, interpolation=self.resize.interpolation, antialias=self.resize.antialias
)
Expand All @@ -114,11 +114,11 @@ def __call__(self, x: Union[torch.Tensor, np.ndarray, List[Union[torch.Tensor, n
# Sample transform (to tensor, resize)
samples = list(multithread_exec(self.sample_transforms, x))
# Batching
batches = self.batch_inputs(samples)
batches = self.batch_inputs(samples) # type: ignore[assignment]
else:
raise TypeError(f"invalid input type: {type(x)}")

# Batch transforms (normalize)
batches = list(multithread_exec(self.normalize, batches))

return batches
return batches # type: ignore[return-value]
4 changes: 2 additions & 2 deletions doctr/models/recognition/master/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def make_source_and_target_mask(
# NOTE: nn.TransformerDecoder takes the inverse from this implementation
# [True, True, True, ..., False, False, False] -> False is masked
# (N, 1, 1, max_length)
target_pad_mask = (target != self.vocab_size + 2).unsqueeze(1).unsqueeze(1)
target_pad_mask = (target != self.vocab_size + 2).unsqueeze(1).unsqueeze(1) # type: ignore[attr-defined]
target_length = target.size(1)
# sub mask filled diagonal with True = see and False = masked (max_length, max_length)
# NOTE: onnxruntime tril/triu works only with float currently (onnxruntime 1.11.1 - opset 14)
Expand Down Expand Up @@ -139,7 +139,7 @@ def compute_loss(
# Input length : number of timesteps
input_len = model_output.shape[1]
# Add one for additional <eos> token (sos disappear in shift!)
seq_len = seq_len + 1
seq_len = seq_len + 1 # type: ignore[assignment]
# Compute loss: don't forget to shift gt! Otherwise the model learns to output the gt[t-1]!
# The "masked" first gt char is <sos>. Delete last logit of the model output.
cce = F.cross_entropy(model_output[:, :-1, :].permute(0, 2, 1), gt[:, 1:], reduction="none")
Expand Down
6 changes: 3 additions & 3 deletions doctr/models/recognition/parseq/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def generate_permutations(self, seqlen: torch.Tensor) -> torch.Tensor:

sos_idx = torch.zeros(len(final_perms), 1, device=seqlen.device)
eos_idx = torch.full((len(final_perms), 1), max_num_chars + 1, device=seqlen.device)
combined = torch.cat([sos_idx, final_perms + 1, eos_idx], dim=1).int()
combined = torch.cat([sos_idx, final_perms + 1, eos_idx], dim=1).int() # type: ignore[list-item]
if len(combined) > 1:
combined[1, 1:] = max_num_chars + 1 - torch.arange(max_num_chars + 1, device=seqlen.device)
return combined
Expand Down Expand Up @@ -280,7 +280,7 @@ def decode_autoregressive(self, features: torch.Tensor, max_len: Optional[int] =

# Stop decoding if all sequences have reached the EOS token
# NOTE: `break` isn't correctly translated to Onnx so we don't break here if we want to export
if not self.exportable and max_len is None and (ys == self.vocab_size).any(dim=-1).all():
if not self.exportable and max_len is None and (ys == self.vocab_size).any(dim=-1).all(): # type: ignore[attr-defined]
break

logits = torch.cat(pos_logits, dim=1) # (N, max_length, vocab_size + 1)
Expand All @@ -295,7 +295,7 @@ def decode_autoregressive(self, features: torch.Tensor, max_len: Optional[int] =

# Create padding mask for refined target input maskes all behind EOS token as False
# (N, 1, 1, max_length)
target_pad_mask = ~((ys == self.vocab_size).int().cumsum(-1) > 0).unsqueeze(1).unsqueeze(1)
target_pad_mask = ~((ys == self.vocab_size).int().cumsum(-1) > 0).unsqueeze(1).unsqueeze(1) # type: ignore[attr-defined]
mask = (target_pad_mask.bool() & query_mask[:, : ys.shape[1]].bool()).int()
logits = self.head(self.decode(ys, features, mask, target_query=pos_queries))

Expand Down
2 changes: 1 addition & 1 deletion doctr/models/recognition/sar/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def compute_loss(
# Input length : number of timesteps
input_len = model_output.shape[1]
# Add one for additional <eos> token
seq_len = seq_len + 1
seq_len = seq_len + 1 # type: ignore[assignment]
# Compute loss
# (N, L, vocab_size + 1)
cce = F.cross_entropy(model_output.permute(0, 2, 1), gt, reduction="none")
Expand Down
2 changes: 1 addition & 1 deletion doctr/models/recognition/vitstr/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def compute_loss(
# Input length : number of steps
input_len = model_output.shape[1]
# Add one for additional <eos> token (sos disappear in shift!)
seq_len = seq_len + 1
seq_len = seq_len + 1 # type: ignore[assignment]
# Compute loss: don't forget to shift gt! Otherwise the model learns to output the gt[t-1]!
# The "masked" first gt char is <sos>.
cce = F.cross_entropy(model_output.permute(0, 2, 1), gt[:, 1:], reduction="none")
Expand Down
2 changes: 1 addition & 1 deletion doctr/models/utils/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def export_model_to_onnx(model: nn.Module, model_name: str, dummy_input: torch.T
"""
torch.onnx.export(
model,
dummy_input, # type: ignore[arg-type]
dummy_input,
f"{model_name}.onnx",
input_names=["input"],
output_names=["logits"],
Expand Down
8 changes: 4 additions & 4 deletions doctr/transforms/functional/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ def invert_colors(img: torch.Tensor, min_val: float = 0.6) -> torch.Tensor:
rgb_shift = min_val + (1 - min_val) * torch.rand(shift_shape)
# Inverse the color
if out.dtype == torch.uint8:
out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8)
out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8) # type: ignore[attr-defined]
else:
out = out * rgb_shift.to(dtype=out.dtype)
out = out * rgb_shift.to(dtype=out.dtype) # type: ignore[attr-defined]
# Inverse the color
out = 255 - out if out.dtype == torch.uint8 else 1 - out
return out
Expand Down Expand Up @@ -77,7 +77,7 @@ def rotate_sample(
rotated_geoms: np.ndarray = rotate_abs_geoms(
_geoms,
angle,
img.shape[1:], # type: ignore[arg-type]
img.shape[1:],
expand,
).astype(np.float32)

Expand Down Expand Up @@ -124,7 +124,7 @@ def random_shadow(img: torch.Tensor, opacity_range: Tuple[float, float], **kwarg
Returns:
shaded image
"""
shadow_mask = create_shadow_mask(img.shape[1:], **kwargs) # type: ignore[arg-type]
shadow_mask = create_shadow_mask(img.shape[1:], **kwargs)

opacity = np.random.uniform(*opacity_range)
shadow_tensor = 1 - torch.from_numpy(shadow_mask[None, ...])
Expand Down
6 changes: 3 additions & 3 deletions doctr/transforms/modules/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
# Reshape the distribution
noise = self.mean + 2 * self.std * torch.rand(x.shape, device=x.device) - self.std
if x.dtype == torch.uint8:
return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8)
return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8) # type: ignore[attr-defined]
else:
return (x + noise.to(dtype=x.dtype)).clamp(0, 1)
return (x + noise.to(dtype=x.dtype)).clamp(0, 1) # type: ignore[attr-defined]

def extra_repr(self) -> str:
return f"mean={self.mean}, std={self.std}"
Expand Down Expand Up @@ -194,7 +194,7 @@ def __call__(self, x: torch.Tensor) -> torch.Tensor:
try:
if x.dtype == torch.uint8:
return (
(
( # type: ignore[attr-defined]
255
* random_shadow(
x.to(dtype=torch.float32) / 255,
Expand Down

0 comments on commit dee760f

Please sign in to comment.