Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

crypto/chacha20, crypto/poly1305: add MIPSLE assembly version #294

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions chacha20/chacha_mipsle.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build gc && !purego

package chacha20

const bufSize = blockSize

//go:noescape
func xorKeyStream(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)

func (s *Cipher) xorKeyStreamBlocks(dst, src []byte) {
xorKeyStream(dst, src, &s.key, &s.nonce, &s.counter)
}
185 changes: 185 additions & 0 deletions chacha20/chacha_mipsle.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Ported from https://github.com/torvalds/linux/blob/1b294a1f35616977caddaddf3e9d28e576a1adbc/arch/mips/crypto/chacha-core.S
// which is licensed under:
// # ====================================================================
// # SPDX-License-Identifier: GPL-2.0 OR MIT
// #
// # Copyright (C) 2016-2018 René van Dorst <[email protected]>. All Rights Reserved.
// # Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
// # ====================================================================

//go:build gc && !purego

#include "textflag.h"

#define X0 R1
#define X1 R2
#define X2 R3
#define X3 R4
#define X4 R5
#define X5 R6
#define X6 R7
#define X7 R8
#define X8 R9
#define X9 R10
#define X10 R11
#define X11 R12
#define X12 R13
#define X13 R14
#define X14 R15
#define X15 R16

#define DST R17
#define SRC R18
#define SRC_LEN R19
#define KEY R20
#define NONCE R21
#define CTR R22

#define LOOP_I R24
#define TMP R25

#ifdef GOMIPS_r2
#define hasROTR
#endif
#ifdef GOMIPS_r5
#define hasROTR
#endif

#ifdef hasROTR
#define ROTL(S, R) \
ROTR $(32-S), R
#else
#define ROTL(S, R) \
SLL $(S), R, TMP \
SRL $(32-S), R \
OR TMP, R
#endif

#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \
ADDU K, A \
ADDU L, B \
ADDU M, C \
ADDU N, D \
XOR A, V \
XOR B, W \
XOR C, Y \
XOR D, Z \
ROTL (S, V) \
ROTL (S, W) \
ROTL (S, Y) \
ROTL (S, Z)

#define FOR_STATE(OP, OP_MEM) \
OP ( $0x61707865, X0 ) \ // expa
OP ( $0x3320646e, X1 ) \ // nd 3
OP ( $0x79622d32, X2 ) \ // 2-by
OP ( $0x6b206574, X3 ) \ // te k
OP_MEM ( 0(KEY), X4 ) \
OP_MEM ( 4(KEY), X5 ) \
OP_MEM ( 8(KEY), X6 ) \
OP_MEM ( 12(KEY), X7 ) \
OP_MEM ( 16(KEY), X8 ) \
OP_MEM ( 20(KEY), X9 ) \
OP_MEM ( 24(KEY), X10 ) \
OP_MEM ( 28(KEY), X11 ) \
OP ( CTR, X12 ) \
OP_MEM ( 0(NONCE), X13 ) \
OP_MEM ( 4(NONCE), X14 ) \
OP_MEM ( 8(NONCE), X15 )

#define movw(x, y) \
MOVW x, y

#define ADD(V, REG) \
ADDU V, REG

#define ADD_MEM(ADDR, REG) \
MOVW ADDR, TMP \
ADDU TMP, REG

// XOR_STREAM_WORD works with unaligned memory, this is quite important since the strams might not be aligned.
// Especially during the use in TLS the memory is often unaligned.
#define XOR_STREAM_WORD( OFF, REG) \
MOVWL (4*OFF + 3)(SRC), TMP \
MOVWR (4*OFF)(SRC), TMP \
XOR REG, TMP \
MOVWL TMP, (4*OFF + 3)(DST) \
MOVWR TMP, (4*OFF)(DST)

// func xorKeyStream(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
TEXT ·xorKeyStream(SB), NOSPLIT|NOFRAME, $0
MOVW dst+0(FP), DST
MOVW src+12(FP), SRC
MOVW src_len+16(FP), SRC_LEN
MOVW key+24(FP), KEY
MOVW nonce+28(FP), NONCE
MOVW counter+32(FP), CTR

// load counter
MOVW (CTR), CTR

chacha:

// load initial State into X*
FOR_STATE ( movw, movw )

// set number of rounds
MOVW $20, LOOP_I

loop:
AXR( X0,X1,X2,X3, X4,X5,X6,X7, X12,X13,X14,X15, 16)
AXR( X8,X9,X10,X11, X12,X13,X14,X15, X4,X5,X6,X7, 12)
AXR( X0,X1,X2,X3, X4,X5,X6,X7, X12,X13,X14,X15, 8)
AXR( X8,X9,X10,X11, X12,X13,X14,X15, X4,X5,X6,X7, 7)
AXR( X0,X1,X2,X3, X5,X6,X7,X4, X15,X12,X13,X14, 16)
AXR( X10,X11,X8,X9, X15,X12,X13,X14, X5,X6,X7,X4, 12)
AXR( X0,X1,X2,X3, X5,X6,X7,X4, X15,X12,X13,X14, 8)
AXR( X10,X11,X8,X9, X15,X12,X13,X14, X5,X6,X7,X4, 7)

ADDU $-2, LOOP_I
BNE LOOP_I, loop

// add back the initial state to generate the key stream
FOR_STATE ( ADD, ADD_MEM )

// xor the key stream with the source and write out the result
XOR_STREAM_WORD (0, X0)
XOR_STREAM_WORD (1, X1)
XOR_STREAM_WORD (2, X2)
XOR_STREAM_WORD (3, X3)
XOR_STREAM_WORD (4, X4)
XOR_STREAM_WORD (5, X5)
XOR_STREAM_WORD (6, X6)
XOR_STREAM_WORD (7, X7)
XOR_STREAM_WORD (8, X8)
XOR_STREAM_WORD (9, X9)
XOR_STREAM_WORD (10, X10)
XOR_STREAM_WORD (11, X11)
XOR_STREAM_WORD (12, X12)
XOR_STREAM_WORD (13, X13)
XOR_STREAM_WORD (14, X14)
XOR_STREAM_WORD (15, X15)

// decrement length
ADDU $-64, SRC_LEN, SRC_LEN

// increment pointers
MOVW $64(DST), DST
MOVW $64(SRC), SRC

// increment counter
ADDU $1, CTR

// loop if there's still data
BNE SRC_LEN, chacha

// store Counter
MOVW counter+32(FP), TMP
MOVW CTR, (TMP)

RET

2 changes: 1 addition & 1 deletion chacha20/chacha_noasm.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build (!arm64 && !s390x && !ppc64le) || !gc || purego
//go:build (!arm64 && !s390x && !ppc64le && !mipsle) || !gc || purego

package chacha20

Expand Down
2 changes: 1 addition & 1 deletion chacha20poly1305/chacha20poly1305_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ func benchamarkChaCha20Poly1305Open(b *testing.B, buf []byte, nonceSize int) {
}

func BenchmarkChacha20Poly1305(b *testing.B) {
for _, length := range []int{64, 1350, 8 * 1024} {
for _, length := range []int{64, 1024, 1350, 2 * 1024, 4 * 1024, 8 * 1024, 16 * 1024} {
b.Run("Open-"+strconv.Itoa(length), func(b *testing.B) {
benchamarkChaCha20Poly1305Open(b, make([]byte, length), NonceSize)
})
Expand Down
2 changes: 1 addition & 1 deletion internal/poly1305/mac_noasm.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build (!amd64 && !ppc64le && !s390x) || !gc || purego
//go:build (!amd64 && !ppc64le && !s390x && !mipsle) || !gc || purego

package poly1305

Expand Down
53 changes: 53 additions & 0 deletions internal/poly1305/sum_mipsle.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build gc && !purego

package poly1305

// mac is a wrapper for macGeneric that redirects calls that would have gone to
// updateGeneric to update.
//
// Its Write and Sum methods are otherwise identical to the macGeneric ones, but
// using function pointers would carry a major performance cost.
type mac struct{ macGeneric }

func (h *mac) Write(p []byte) (int, error) {
nn := len(p)
if h.offset > 0 {
n := copy(h.buffer[h.offset:], p)
if h.offset+n < TagSize {
h.offset += n
return nn, nil
}
p = p[n:]
h.offset = 0
update(&h.macState, h.buffer[:], 1)
}
if n := len(p) - (len(p) % TagSize); n > 0 {
update(&h.macState, p[:n], 1)
p = p[n:]
}
if len(p) > 0 {
h.offset += copy(h.buffer[h.offset:], p)
}
return nn, nil
}

func (h *mac) Sum(out *[16]byte) {
state := h.macState
if n := h.offset; n > 0 {
h.buffer[n] = 1
n++
for ; n < TagSize; n++ {
h.buffer[n] = 0
}

update(&state, h.buffer[:], 0)
}
finalize(out, &state.h, &state.s)
}

//go:noescape
func update(state *macState, msg []byte, padbit uint32)
Loading