-
-
Notifications
You must be signed in to change notification settings - Fork 160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve draw performance #2522
Improve draw performance #2522
Conversation
@@ -108,6 +226,7 @@ aaline(PyObject *self, PyObject *arg, PyObject *kwargs) | |||
int drawn_area[4] = {INT_MAX, INT_MAX, INT_MIN, | |||
INT_MIN}; /* Used to store bounding box values */ | |||
Uint32 color; | |||
int (*set_func)(SDL_Surface*, int, int, Uint32); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like we need to set a default value for set_func
here to please gcc. I guess something like:
int (*set_func)(SDL_Surface*, int, int, Uint32); | |
int (*set_func)(SDL_Surface*, int, int, Uint32) = &set_at_32bit_clip; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One thing I'm slightly concerned about using this approach is that C function pointers and performance intensive tight loops are classically not always the best of friends, though this is somewhat compiler dependant.
As a test, I've tried swapping out the function pointer call in set_and_check_rect()
for a clone of the function it is calling:
static int
set_at_32bit_no_func_pointer(SDL_Surface *surf, int x, int y, Uint32 color)
{
if (x < surf->clip_rect.x || x >= surf->clip_rect.x + surf->clip_rect.w ||
y < surf->clip_rect.y || y >= surf->clip_rect.y + surf->clip_rect.h)
{
return 0;
}
*((Uint32 *)((Uint8 *)surf->pixels + y * surf->pitch) + x) = color;
return 1;
}
static void
set_and_check_rect(SDL_Surface *surf, int x, int y, Uint32 color,
int *drawn_area, int (*set_func)(SDL_Surface*, int, int, Uint32))
{
if (set_at_32bit_no_func_pointer(surf, x, y, color))
add_pixel_to_drawn_list(x, y, drawn_area);
}
And on my PC the version without the function pointer was running 15-30% faster in my simple line drawing performance test:
from sys import stdout
from pstats import Stats
from cProfile import Profile
import pygame
pygame.init()
def draw_line_short_inside():
lines = [
((256, 256), (0, 0)),
((128, 256), (160, 0)),
((50, 256), (128, 0)),
((0, 240), (12, 56)),
((50, 40), (60, 11)),
]
widths = [1]
surf = pygame.Surface((256, 256), pygame.SRCALPHA, 32)
surf.fill((255, 255, 255, 255))
for iterations in range(0, 10000):
for line in lines:
for width in widths:
pygame.draw.line(
surf, pygame.Color("red"), line[0], line[1], width=width
)
def draw_line_long_inside():
lines = [
((1256, 256), (0, 0)),
((1128, 256), (128, 0)),
((150, 1256), (128, 0)),
((0, 240), (1200, 56)),
((50, 40), (1060, 11)),
]
widths = [1]
surf = pygame.Surface((2048, 2048), pygame.SRCALPHA, 32)
surf.fill((255, 255, 255, 255))
for iterations in range(0, 10000):
for line in lines:
for width in widths:
pygame.draw.line(
surf, pygame.Color("red"), line[0], line[1], width=width
)
def draw_line_short_outside():
lines = [
((256, 256), (0, 0)),
((128, 256), (160, 0)),
((50, 256), (128, 0)),
((0, 240), (12, 56)),
((50, 40), (80, 11)),
]
widths = [1]
surf = pygame.Surface((64, 64), pygame.SRCALPHA, 32)
surf.fill((255, 255, 255, 255))
for iterations in range(0, 10000):
for line in lines:
for width in widths:
pygame.draw.line(
surf, pygame.Color("red"), line[0], line[1], width=width
)
def draw_line_long_outside():
lines = [
((2256, 256), (0, 0)),
((2128, 256), (128, 0)),
((150, 4256), (128, 0)),
((0, 240), (2200, 56)),
((50, 40), (4060, 11)),
]
widths = [1]
surf = pygame.Surface((1024, 1024), pygame.SRCALPHA, 32)
surf.fill((255, 255, 255, 255))
for iterations in range(0, 10000):
for line in lines:
for width in widths:
pygame.draw.line(
surf, pygame.Color("red"), line[0], line[1], width=width
)
if __name__ == "__main__":
print("Draw Line - short, inside surface")
profiler = Profile()
profiler.runcall(draw_line_short_inside)
stats = Stats(profiler, stream=stdout)
stats.strip_dirs()
stats.sort_stats("cumulative")
stats.print_stats()
print("\nDraw Line - long, inside surface")
profiler = Profile()
profiler.runcall(draw_line_long_inside)
stats = Stats(profiler, stream=stdout)
stats.strip_dirs()
stats.sort_stats("cumulative")
stats.print_stats()
print("\nDraw Line - short, edges outside surface")
profiler = Profile()
profiler.runcall(draw_line_short_outside)
stats = Stats(profiler, stream=stdout)
stats.strip_dirs()
stats.sort_stats("cumulative")
stats.print_stats()
print("\nDraw Line - long, edges a long way outside surface")
profiler = Profile()
profiler.runcall(draw_line_long_outside)
stats = Stats(profiler, stream=stdout)
stats.strip_dirs()
stats.sort_stats("cumulative")
stats.print_stats()
I'd be curious if you see similar results.
Closed in favor of #2551 |
Fix #2517
Still need to check what needs clip version, and what doesn't.