|
334 | 334 | r10= vec_xxnpmadd(U7, r5, r10); \
|
335 | 335 | r11= vec_xxnpmadd(U1, r5, r11);
|
336 | 336 |
|
| 337 | + |
| 338 | +//same as _vec_su3_multiply_double2 but loading a 32bit gauge field |
| 339 | +#define _vec_su3_multiply_double2_32(u) \ |
| 340 | + U0 = vec_ld2(0, (float*) &(u)->c00); \ |
| 341 | + U3 = vec_ld2(0, (float*) &(u)->c01); \ |
| 342 | + U6 = vec_ld2(0, (float*) &(u)->c02); \ |
| 343 | + U1 = vec_ld2(0, (float*) &(u)->c10); \ |
| 344 | + U4 = vec_ld2(0, (float*) &(u)->c11); \ |
| 345 | + U7 = vec_ld2(0, (float*) &(u)->c12); \ |
| 346 | + U2 = vec_ld2(0, (float*) &(u)->c20); \ |
| 347 | + r6 = vec_xmul(r0, U0); \ |
| 348 | + r7 = vec_xmul(r0, U1); \ |
| 349 | + r8 = vec_xmul(r0, U2); \ |
| 350 | + r9 = vec_xmul(r3, U0); \ |
| 351 | + r10= vec_xmul(r3, U1); \ |
| 352 | + r11= vec_xmul(r3, U2); \ |
| 353 | + \ |
| 354 | + r6 = vec_xxnpmadd(U0, r0, r6); \ |
| 355 | + r7 = vec_xxnpmadd(U1, r0, r7); \ |
| 356 | + r8 = vec_xxnpmadd(U2, r0, r8); \ |
| 357 | + r9 = vec_xxnpmadd(U0, r3, r9); \ |
| 358 | + r10= vec_xxnpmadd(U1, r3, r10); \ |
| 359 | + r11= vec_xxnpmadd(U2, r3, r11); \ |
| 360 | + U0 = vec_ld2(0, (float*) &(u)->c21); \ |
| 361 | + \ |
| 362 | + r6 = vec_xmadd(r1, U3, r6); \ |
| 363 | + r7 = vec_xmadd(r1, U4, r7); \ |
| 364 | + r8 = vec_xmadd(r1, U0, r8); \ |
| 365 | + r9 = vec_xmadd(r4, U3, r9); \ |
| 366 | + r10= vec_xmadd(r4, U4, r10); \ |
| 367 | + r11= vec_xmadd(r4, U0, r11); \ |
| 368 | + \ |
| 369 | + r6 = vec_xxnpmadd(U3, r1, r6); \ |
| 370 | + r7 = vec_xxnpmadd(U4, r1, r7); \ |
| 371 | + r8 = vec_xxnpmadd(U0, r1, r8); \ |
| 372 | + r9 = vec_xxnpmadd(U3, r4, r9); \ |
| 373 | + r10= vec_xxnpmadd(U4, r4, r10); \ |
| 374 | + r11= vec_xxnpmadd(U0, r4, r11); \ |
| 375 | + U1 = vec_ld2(0, (float*) &(u)->c22); \ |
| 376 | + \ |
| 377 | + r6 = vec_xmadd(r2, U6, r6); \ |
| 378 | + r7 = vec_xmadd(r2, U7, r7); \ |
| 379 | + r8 = vec_xmadd(r2, U1, r8); \ |
| 380 | + r9 = vec_xmadd(r5, U6, r9); \ |
| 381 | + r10= vec_xmadd(r5, U7, r10); \ |
| 382 | + r11= vec_xmadd(r5, U1, r11); \ |
| 383 | + \ |
| 384 | + r6 = vec_xxnpmadd(U6, r2, r6); \ |
| 385 | + r7 = vec_xxnpmadd(U7, r2, r7); \ |
| 386 | + r8 = vec_xxnpmadd(U1, r2, r8); \ |
| 387 | + r9 = vec_xxnpmadd(U6, r5, r9); \ |
| 388 | + r10= vec_xxnpmadd(U7, r5, r10); \ |
| 389 | + r11= vec_xxnpmadd(U1, r5, r11); |
| 390 | + |
| 391 | + |
| 392 | + |
| 393 | + |
| 394 | +#define _vec_su3_multiply(u) \ |
| 395 | + U0 = vec_ld2(0, (double*) &(u)->c00); \ |
| 396 | + U3 = vec_ld2(0, (double*) &(u)->c01); \ |
| 397 | + U6 = vec_ld2(0, (double*) &(u)->c02); \ |
| 398 | + U1 = vec_ld2(0, (double*) &(u)->c10); \ |
| 399 | + U4 = vec_ld2(0, (double*) &(u)->c11); \ |
| 400 | + U7 = vec_ld2(0, (double*) &(u)->c12); \ |
| 401 | + U2 = vec_ld2(0, (double*) &(u)->c20); \ |
| 402 | + r6 = vec_xmul(r0, U0); \ |
| 403 | + r7 = vec_xmul(r0, U1); \ |
| 404 | + r8 = vec_xmul(r0, U2); \ |
| 405 | + \ |
| 406 | + r6 = vec_xxnpmadd(U0, r0, r6); \ |
| 407 | + r7 = vec_xxnpmadd(U1, r0, r7); \ |
| 408 | + r8 = vec_xxnpmadd(U2, r0, r8); \ |
| 409 | + U0 = vec_ld2(0, (double*) &(u)->c21); \ |
| 410 | + \ |
| 411 | + r6 = vec_xmadd(r1, U3, r6); \ |
| 412 | + r7 = vec_xmadd(r1, U4, r7); \ |
| 413 | + r8 = vec_xmadd(r1, U0, r8); \ |
| 414 | + \ |
| 415 | + r6 = vec_xxnpmadd(U3, r1, r6); \ |
| 416 | + r7 = vec_xxnpmadd(U4, r1, r7); \ |
| 417 | + r8 = vec_xxnpmadd(U0, r1, r8); \ |
| 418 | + U1 = vec_ld2(0, (double*) &(u)->c22); \ |
| 419 | + \ |
| 420 | + r6 = vec_xmadd(r2, U6, r6); \ |
| 421 | + r7 = vec_xmadd(r2, U7, r7); \ |
| 422 | + r8 = vec_xmadd(r2, U1, r8); \ |
| 423 | + \ |
| 424 | + r6 = vec_xxnpmadd(U6, r2, r6); \ |
| 425 | + r7 = vec_xxnpmadd(U7, r2, r7); \ |
| 426 | + r8 = vec_xxnpmadd(U1, r2, r8); \ |
| 427 | + |
| 428 | + |
| 429 | + |
| 430 | +#define _vec_su3_inverse_multiply(u) \ |
| 431 | + U0 = vec_ld2(0, (double*) &(u)->c00); \ |
| 432 | + U1 = vec_ld2(0, (double*) &(u)->c01); \ |
| 433 | + U2 = vec_ld2(0, (double*) &(u)->c02); \ |
| 434 | + \ |
| 435 | + r6 = vec_xmul(U0, r0); \ |
| 436 | + r7 = vec_xmul(U1, r0); \ |
| 437 | + r8 = vec_xmul(U2, r0); \ |
| 438 | + \ |
| 439 | + r6 = vec_xxcpnmadd(r0, U0, r6); \ |
| 440 | + r7 = vec_xxcpnmadd(r0, U1, r7); \ |
| 441 | + r8 = vec_xxcpnmadd(r0, U2, r8); \ |
| 442 | + \ |
| 443 | + U3 = vec_ld2(0, (double*) &(u)->c10); \ |
| 444 | + U4 = vec_ld2(0, (double*) &(u)->c11); \ |
| 445 | + U6 = vec_ld2(0, (double*) &(u)->c12); \ |
| 446 | + \ |
| 447 | + r6 = vec_xmadd(U3, r1, r6); \ |
| 448 | + r7 = vec_xmadd(U4, r1, r7); \ |
| 449 | + r8 = vec_xmadd(U6, r1, r8); \ |
| 450 | + \ |
| 451 | + r6 = vec_xxcpnmadd(r1, U3, r6); \ |
| 452 | + r7 = vec_xxcpnmadd(r1, U4, r7); \ |
| 453 | + r8 = vec_xxcpnmadd(r1, U6, r8); \ |
| 454 | + \ |
| 455 | + U0 = vec_ld2(0, (double*) &(u)->c20); \ |
| 456 | + U1 = vec_ld2(0, (double*) &(u)->c21); \ |
| 457 | + U2 = vec_ld2(0, (double*) &(u)->c22); \ |
| 458 | + \ |
| 459 | + r6 = vec_xmadd(U0, r2, r6); \ |
| 460 | + r7 = vec_xmadd(U1, r2, r7); \ |
| 461 | + r8 = vec_xmadd(U2, r2, r8); \ |
| 462 | + \ |
| 463 | + r6 = vec_xxcpnmadd(r2, U0, r6); \ |
| 464 | + r7 = vec_xxcpnmadd(r2, U1, r7); \ |
| 465 | + r8 = vec_xxcpnmadd(r2, U2, r8); \ |
| 466 | + |
| 467 | + |
| 468 | + |
| 469 | + |
337 | 470 | // expects the spinor to act on in
|
338 | 471 | // r0, r1 -> s0
|
339 | 472 | // r2, r3 -> s1
|
|
376 | 509 | r5 = vec_xxnpmadd(U7, r7, r5); \
|
377 | 510 | r6 = vec_xxnpmadd(U1, r7, r6);
|
378 | 511 |
|
| 512 | + |
| 513 | + |
| 514 | +#define _vec_su3_multiply_double2c_32(u) \ |
| 515 | + r8 = vec_gpci(00145); \ |
| 516 | + r9 = vec_gpci(02367); \ |
| 517 | + U0 = vec_ld2(0, (float*) &(u)->c00); \ |
| 518 | + U3 = vec_ld2(0, (float*) &(u)->c01); \ |
| 519 | + U6 = vec_ld2(0, (float*) &(u)->c02); \ |
| 520 | + U1 = vec_ld2(0, (float*) &(u)->c10); \ |
| 521 | + r7 = vec_perm(r0, r2, r8); \ |
| 522 | + U4 = vec_ld2(0, (float*) &(u)->c11); \ |
| 523 | + U7 = vec_ld2(0, (float*) &(u)->c12); \ |
| 524 | + U2 = vec_ld2(0, (float*) &(u)->c20); \ |
| 525 | + r4 = vec_xmul(r7, U0); \ |
| 526 | + r5 = vec_xmul(r7, U1); \ |
| 527 | + r6 = vec_xmul(r7, U2); \ |
| 528 | + \ |
| 529 | + r4 = vec_xxnpmadd(U0, r7, r4); \ |
| 530 | + r5 = vec_xxnpmadd(U1, r7, r5); \ |
| 531 | + r6 = vec_xxnpmadd(U2, r7, r6); \ |
| 532 | + r7 = vec_perm(r0, r2, r9); \ |
| 533 | + U0 = vec_ld2(0, (float*) &(u)->c21); \ |
| 534 | + \ |
| 535 | + r4 = vec_xmadd(r7, U3, r4); \ |
| 536 | + r5 = vec_xmadd(r7, U4, r5); \ |
| 537 | + r6 = vec_xmadd(r7, U0, r6); \ |
| 538 | + \ |
| 539 | + r4 = vec_xxnpmadd(U3, r7, r4); \ |
| 540 | + r5 = vec_xxnpmadd(U4, r7, r5); \ |
| 541 | + r6 = vec_xxnpmadd(U0, r7, r6); \ |
| 542 | + r7 = vec_perm(r1, r3, r8); \ |
| 543 | + U1 = vec_ld2(0, (float*) &(u)->c22); \ |
| 544 | + \ |
| 545 | + r4 = vec_xmadd(r7, U6, r4); \ |
| 546 | + r5 = vec_xmadd(r7, U7, r5); \ |
| 547 | + r6 = vec_xmadd(r7, U1, r6); \ |
| 548 | + \ |
| 549 | + r4 = vec_xxnpmadd(U6, r7, r4); \ |
| 550 | + r5 = vec_xxnpmadd(U7, r7, r5); \ |
| 551 | + r6 = vec_xxnpmadd(U1, r7, r6); |
| 552 | + |
| 553 | + |
| 554 | + |
379 | 555 | #define _vec_su3_multiply_double2ct(u) \
|
380 | 556 | r8 = vec_gpci(00167); \
|
381 | 557 | U0 = vec_ld2(0, (double*) &(u)->c00); \
|
|
478 | 654 | r11= vec_xxcpnmadd(r5, U2, r11);
|
479 | 655 |
|
480 | 656 |
|
| 657 | +//same as _vec_su3_inverse_multiply_double2 but for 32bit gauge field |
| 658 | +#define _vec_su3_inverse_multiply_double2_32(u) \ |
| 659 | + U0 = vec_ld2(0, (float*) &(u)->c00); \ |
| 660 | + U1 = vec_ld2(0, (float*) &(u)->c01); \ |
| 661 | + U2 = vec_ld2(0, (float*) &(u)->c02); \ |
| 662 | + \ |
| 663 | + r6 = vec_xmul(U0, r0); \ |
| 664 | + r7 = vec_xmul(U1, r0); \ |
| 665 | + r8 = vec_xmul(U2, r0); \ |
| 666 | + r9 = vec_xmul(U0, r3); \ |
| 667 | + r10= vec_xmul(U1, r3); \ |
| 668 | + r11= vec_xmul(U2, r3); \ |
| 669 | + \ |
| 670 | + r6 = vec_xxcpnmadd(r0, U0, r6); \ |
| 671 | + r7 = vec_xxcpnmadd(r0, U1, r7); \ |
| 672 | + r8 = vec_xxcpnmadd(r0, U2, r8); \ |
| 673 | + r9 = vec_xxcpnmadd(r3, U0, r9); \ |
| 674 | + r10= vec_xxcpnmadd(r3, U1, r10); \ |
| 675 | + r11= vec_xxcpnmadd(r3, U2, r11); \ |
| 676 | + \ |
| 677 | + U3 = vec_ld2(0, (float*) &(u)->c10); \ |
| 678 | + U4 = vec_ld2(0, (float*) &(u)->c11); \ |
| 679 | + U6 = vec_ld2(0, (float*) &(u)->c12); \ |
| 680 | + \ |
| 681 | + r6 = vec_xmadd(U3, r1, r6); \ |
| 682 | + r7 = vec_xmadd(U4, r1, r7); \ |
| 683 | + r8 = vec_xmadd(U6, r1, r8); \ |
| 684 | + r9 = vec_xmadd(U3, r4, r9); \ |
| 685 | + r10= vec_xmadd(U4, r4, r10); \ |
| 686 | + r11= vec_xmadd(U6, r4, r11); \ |
| 687 | + \ |
| 688 | + r6 = vec_xxcpnmadd(r1, U3, r6); \ |
| 689 | + r7 = vec_xxcpnmadd(r1, U4, r7); \ |
| 690 | + r8 = vec_xxcpnmadd(r1, U6, r8); \ |
| 691 | + r9 = vec_xxcpnmadd(r4, U3, r9); \ |
| 692 | + r10= vec_xxcpnmadd(r4, U4, r10); \ |
| 693 | + r11= vec_xxcpnmadd(r4, U6, r11); \ |
| 694 | + \ |
| 695 | + U0 = vec_ld2(0, (float*) &(u)->c20); \ |
| 696 | + U1 = vec_ld2(0, (float*) &(u)->c21); \ |
| 697 | + U2 = vec_ld2(0, (float*) &(u)->c22); \ |
| 698 | + \ |
| 699 | + r6 = vec_xmadd(U0, r2, r6); \ |
| 700 | + r7 = vec_xmadd(U1, r2, r7); \ |
| 701 | + r8 = vec_xmadd(U2, r2, r8); \ |
| 702 | + r9 = vec_xmadd(U0, r5, r9); \ |
| 703 | + r10= vec_xmadd(U1, r5, r10); \ |
| 704 | + r11= vec_xmadd(U2, r5, r11); \ |
| 705 | + \ |
| 706 | + r6 = vec_xxcpnmadd(r2, U0, r6); \ |
| 707 | + r7 = vec_xxcpnmadd(r2, U1, r7); \ |
| 708 | + r8 = vec_xxcpnmadd(r2, U2, r8); \ |
| 709 | + r9 = vec_xxcpnmadd(r5, U0, r9); \ |
| 710 | + r10= vec_xxcpnmadd(r5, U1, r10); \ |
| 711 | + r11= vec_xxcpnmadd(r5, U2, r11); |
| 712 | + |
| 713 | + |
| 714 | + |
481 | 715 | #define _vec_su3_inverse_multiply_double2c(u) \
|
482 | 716 | U0 = vec_ld2(0, (double*) &(u)->c00); \
|
483 | 717 | r8 = vec_gpci(00145); \
|
|
520 | 754 | r5 = vec_xxcpnmadd(r7, U1, r5); \
|
521 | 755 | r6 = vec_xxcpnmadd(r7, U2, r6);
|
522 | 756 |
|
| 757 | + |
| 758 | +#define _vec_su3_inverse_multiply_double2c_32(u) \ |
| 759 | + U0 = vec_ld2(0, (float*) &(u)->c00); \ |
| 760 | + r8 = vec_gpci(00145); \ |
| 761 | + r9 = vec_gpci(02367); \ |
| 762 | + U1 = vec_ld2(0, (float*) &(u)->c01); \ |
| 763 | + r7 = vec_perm(r0, r2, r8); \ |
| 764 | + U2 = vec_ld2(0, (float*) &(u)->c02); \ |
| 765 | + \ |
| 766 | + r4 = vec_xmul(U0, r7); \ |
| 767 | + r5 = vec_xmul(U1, r7); \ |
| 768 | + r6 = vec_xmul(U2, r7); \ |
| 769 | + \ |
| 770 | + r4 = vec_xxcpnmadd(r7, U0, r4); \ |
| 771 | + r5 = vec_xxcpnmadd(r7, U1, r5); \ |
| 772 | + r6 = vec_xxcpnmadd(r7, U2, r6); \ |
| 773 | + \ |
| 774 | + r7 = vec_perm(r0, r2, r9); \ |
| 775 | + U3 = vec_ld2(0, (float*) &(u)->c10); \ |
| 776 | + U4 = vec_ld2(0, (float*) &(u)->c11); \ |
| 777 | + U6 = vec_ld2(0, (float*) &(u)->c12); \ |
| 778 | + \ |
| 779 | + r4 = vec_xmadd(U3, r7, r4); \ |
| 780 | + r5 = vec_xmadd(U4, r7, r5); \ |
| 781 | + r6 = vec_xmadd(U6, r7, r6); \ |
| 782 | + \ |
| 783 | + r4 = vec_xxcpnmadd(r7, U3, r4); \ |
| 784 | + r5 = vec_xxcpnmadd(r7, U4, r5); \ |
| 785 | + r6 = vec_xxcpnmadd(r7, U6, r6); \ |
| 786 | + \ |
| 787 | + r7 = vec_perm(r1, r3, r8); \ |
| 788 | + U0 = vec_ld2(0, (float*) &(u)->c20); \ |
| 789 | + U1 = vec_ld2(0, (float*) &(u)->c21); \ |
| 790 | + U2 = vec_ld2(0, (float*) &(u)->c22); \ |
| 791 | + \ |
| 792 | + r4 = vec_xmadd(U0, r7, r4); \ |
| 793 | + r5 = vec_xmadd(U1, r7, r5); \ |
| 794 | + r6 = vec_xmadd(U2, r7, r6); \ |
| 795 | + \ |
| 796 | + r4 = vec_xxcpnmadd(r7, U0, r4); \ |
| 797 | + r5 = vec_xxcpnmadd(r7, U1, r5); \ |
| 798 | + r6 = vec_xxcpnmadd(r7, U2, r6); |
| 799 | + |
| 800 | + |
| 801 | + |
| 802 | + |
523 | 803 | #define _vec_su3_inverse_multiply_double2ct(u) \
|
524 | 804 | U0 = vec_ld2(0, (double*) &(u)->c00); \
|
525 | 805 | r8 = vec_gpci(00167); \
|
|
0 commit comments