Skip to content

Commit

Permalink
Merge pull request #3635 from Yawning/feature/aes
Browse files Browse the repository at this point in the history
core/crypto: Add AES
  • Loading branch information
Kelimion authored Jun 1, 2024
2 parents 72a5e74 + c751e4b commit c07a46a
Show file tree
Hide file tree
Showing 19 changed files with 2,198 additions and 0 deletions.
28 changes: 28 additions & 0 deletions core/crypto/_aes/aes.odin
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package _aes

// KEY_SIZE_128 is the AES-128 key size in bytes.
KEY_SIZE_128 :: 16
// KEY_SIZE_192 is the AES-192 key size in bytes.
KEY_SIZE_192 :: 24
// KEY_SIZE_256 is the AES-256 key size in bytes.
KEY_SIZE_256 :: 32

// BLOCK_SIZE is the AES block size in bytes.
BLOCK_SIZE :: 16

// ROUNDS_128 is the number of rounds for AES-128.
ROUNDS_128 :: 10
// ROUNDS_192 is the number of rounds for AES-192.
ROUNDS_192 :: 12
// ROUNDS_256 is the number of rounds for AES-256.
ROUNDS_256 :: 14

// GHASH_KEY_SIZE is the GHASH key size in bytes.
GHASH_KEY_SIZE :: 16
// GHASH_BLOCK_SIZE is the GHASH block size in bytes.
GHASH_BLOCK_SIZE :: 16
// GHASH_TAG_SIZE is the GHASH tag size in bytes.
GHASH_TAG_SIZE :: 16

// RCON is the AES keyschedule round constants.
RCON := [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36}
96 changes: 96 additions & 0 deletions core/crypto/_aes/ct64/api.odin
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package aes_ct64

import "base:intrinsics"
import "core:mem"

STRIDE :: 4

// Context is a keyed AES (ECB) instance.
Context :: struct {
_sk_exp: [120]u64,
_num_rounds: int,
_is_initialized: bool,
}

// init initializes a context for AES with the provided key.
init :: proc(ctx: ^Context, key: []byte) {
skey: [30]u64 = ---

ctx._num_rounds = keysched(skey[:], key)
skey_expand(ctx._sk_exp[:], skey[:], ctx._num_rounds)
ctx._is_initialized = true
}

// encrypt_block sets `dst` to `AES-ECB-Encrypt(src)`.
encrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
assert(ctx._is_initialized)

q: [8]u64
load_blockx1(&q, src)
_encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
store_blockx1(dst, &q)
}

// encrypt_block sets `dst` to `AES-ECB-Decrypt(src)`.
decrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
assert(ctx._is_initialized)

q: [8]u64
load_blockx1(&q, src)
_decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
store_blockx1(dst, &q)
}

// encrypt_blocks sets `dst` to `AES-ECB-Encrypt(src[0], .. src[n])`.
encrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) {
assert(ctx._is_initialized)

q: [8]u64 = ---
src, dst := src, dst

n := len(src)
for n > 4 {
load_blocks(&q, src[0:4])
_encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
store_blocks(dst[0:4], &q)

src = src[4:]
dst = dst[4:]
n -= 4
}
if n > 0 {
load_blocks(&q, src)
_encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
store_blocks(dst, &q)
}
}

// decrypt_blocks sets dst to `AES-ECB-Decrypt(src[0], .. src[n])`.
decrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) {
assert(ctx._is_initialized)

q: [8]u64 = ---
src, dst := src, dst

n := len(src)
for n > 4 {
load_blocks(&q, src[0:4])
_decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
store_blocks(dst[0:4], &q)

src = src[4:]
dst = dst[4:]
n -= 4
}
if n > 0 {
load_blocks(&q, src)
_decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
store_blocks(dst, &q)
}
}

// reset sanitizes the Context. The Context must be re-initialized to
// be used again.
reset :: proc(ctx: ^Context) {
mem.zero_explicit(ctx, size_of(ctx))
}
265 changes: 265 additions & 0 deletions core/crypto/_aes/ct64/ct64.odin
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
// Copyright (c) 2016 Thomas Pornin <[email protected]>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

package aes_ct64

import "base:intrinsics"

// Bitsliced AES for 64-bit general purpose (integer) registers. Each
// invocation will process up to 4 blocks at a time. This implementation
// is derived from the BearSSL ct64 code, and distributed under a 1-clause
// BSD license with permission from the original author.
//
// WARNING: "hic sunt dracones"
//
// This package also deliberately exposes enough internals to be able to
// function as a replacement for `AESENC` and `AESDEC` from AES-NI, to
// allow the implementation of non-AES primitives that use the AES round
// function such as AEGIS and Deoxys-II. This should ONLY be done when
// implementing something other than AES itself.

sub_bytes :: proc "contextless" (q: ^[8]u64) {
// This S-box implementation is a straightforward translation of
// the circuit described by Boyar and Peralta in "A new
// combinational logic minimization technique with applications
// to cryptology" (https://eprint.iacr.org/2009/191.pdf).
//
// Note that variables x* (input) and s* (output) are numbered
// in "reverse" order (x0 is the high bit, x7 is the low bit).

x0 := q[7]
x1 := q[6]
x2 := q[5]
x3 := q[4]
x4 := q[3]
x5 := q[2]
x6 := q[1]
x7 := q[0]

// Top linear transformation.
y14 := x3 ~ x5
y13 := x0 ~ x6
y9 := x0 ~ x3
y8 := x0 ~ x5
t0 := x1 ~ x2
y1 := t0 ~ x7
y4 := y1 ~ x3
y12 := y13 ~ y14
y2 := y1 ~ x0
y5 := y1 ~ x6
y3 := y5 ~ y8
t1 := x4 ~ y12
y15 := t1 ~ x5
y20 := t1 ~ x1
y6 := y15 ~ x7
y10 := y15 ~ t0
y11 := y20 ~ y9
y7 := x7 ~ y11
y17 := y10 ~ y11
y19 := y10 ~ y8
y16 := t0 ~ y11
y21 := y13 ~ y16
y18 := x0 ~ y16

// Non-linear section.
t2 := y12 & y15
t3 := y3 & y6
t4 := t3 ~ t2
t5 := y4 & x7
t6 := t5 ~ t2
t7 := y13 & y16
t8 := y5 & y1
t9 := t8 ~ t7
t10 := y2 & y7
t11 := t10 ~ t7
t12 := y9 & y11
t13 := y14 & y17
t14 := t13 ~ t12
t15 := y8 & y10
t16 := t15 ~ t12
t17 := t4 ~ t14
t18 := t6 ~ t16
t19 := t9 ~ t14
t20 := t11 ~ t16
t21 := t17 ~ y20
t22 := t18 ~ y19
t23 := t19 ~ y21
t24 := t20 ~ y18

t25 := t21 ~ t22
t26 := t21 & t23
t27 := t24 ~ t26
t28 := t25 & t27
t29 := t28 ~ t22
t30 := t23 ~ t24
t31 := t22 ~ t26
t32 := t31 & t30
t33 := t32 ~ t24
t34 := t23 ~ t33
t35 := t27 ~ t33
t36 := t24 & t35
t37 := t36 ~ t34
t38 := t27 ~ t36
t39 := t29 & t38
t40 := t25 ~ t39

t41 := t40 ~ t37
t42 := t29 ~ t33
t43 := t29 ~ t40
t44 := t33 ~ t37
t45 := t42 ~ t41
z0 := t44 & y15
z1 := t37 & y6
z2 := t33 & x7
z3 := t43 & y16
z4 := t40 & y1
z5 := t29 & y7
z6 := t42 & y11
z7 := t45 & y17
z8 := t41 & y10
z9 := t44 & y12
z10 := t37 & y3
z11 := t33 & y4
z12 := t43 & y13
z13 := t40 & y5
z14 := t29 & y2
z15 := t42 & y9
z16 := t45 & y14
z17 := t41 & y8

// Bottom linear transformation.
t46 := z15 ~ z16
t47 := z10 ~ z11
t48 := z5 ~ z13
t49 := z9 ~ z10
t50 := z2 ~ z12
t51 := z2 ~ z5
t52 := z7 ~ z8
t53 := z0 ~ z3
t54 := z6 ~ z7
t55 := z16 ~ z17
t56 := z12 ~ t48
t57 := t50 ~ t53
t58 := z4 ~ t46
t59 := z3 ~ t54
t60 := t46 ~ t57
t61 := z14 ~ t57
t62 := t52 ~ t58
t63 := t49 ~ t58
t64 := z4 ~ t59
t65 := t61 ~ t62
t66 := z1 ~ t63
s0 := t59 ~ t63
s6 := t56 ~ ~t62
s7 := t48 ~ ~t60
t67 := t64 ~ t65
s3 := t53 ~ t66
s4 := t51 ~ t66
s5 := t47 ~ t65
s1 := t64 ~ ~s3
s2 := t55 ~ ~t67

q[7] = s0
q[6] = s1
q[5] = s2
q[4] = s3
q[3] = s4
q[2] = s5
q[1] = s6
q[0] = s7
}

orthogonalize :: proc "contextless" (q: ^[8]u64) {
CL2 :: 0x5555555555555555
CH2 :: 0xAAAAAAAAAAAAAAAA
q[0], q[1] = (q[0] & CL2) | ((q[1] & CL2) << 1), ((q[0] & CH2) >> 1) | (q[1] & CH2)
q[2], q[3] = (q[2] & CL2) | ((q[3] & CL2) << 1), ((q[2] & CH2) >> 1) | (q[3] & CH2)
q[4], q[5] = (q[4] & CL2) | ((q[5] & CL2) << 1), ((q[4] & CH2) >> 1) | (q[5] & CH2)
q[6], q[7] = (q[6] & CL2) | ((q[7] & CL2) << 1), ((q[6] & CH2) >> 1) | (q[7] & CH2)

CL4 :: 0x3333333333333333
CH4 :: 0xCCCCCCCCCCCCCCCC
q[0], q[2] = (q[0] & CL4) | ((q[2] & CL4) << 2), ((q[0] & CH4) >> 2) | (q[2] & CH4)
q[1], q[3] = (q[1] & CL4) | ((q[3] & CL4) << 2), ((q[1] & CH4) >> 2) | (q[3] & CH4)
q[4], q[6] = (q[4] & CL4) | ((q[6] & CL4) << 2), ((q[4] & CH4) >> 2) | (q[6] & CH4)
q[5], q[7] = (q[5] & CL4) | ((q[7] & CL4) << 2), ((q[5] & CH4) >> 2) | (q[7] & CH4)

CL8 :: 0x0F0F0F0F0F0F0F0F
CH8 :: 0xF0F0F0F0F0F0F0F0
q[0], q[4] = (q[0] & CL8) | ((q[4] & CL8) << 4), ((q[0] & CH8) >> 4) | (q[4] & CH8)
q[1], q[5] = (q[1] & CL8) | ((q[5] & CL8) << 4), ((q[1] & CH8) >> 4) | (q[5] & CH8)
q[2], q[6] = (q[2] & CL8) | ((q[6] & CL8) << 4), ((q[2] & CH8) >> 4) | (q[6] & CH8)
q[3], q[7] = (q[3] & CL8) | ((q[7] & CL8) << 4), ((q[3] & CH8) >> 4) | (q[7] & CH8)
}

@(require_results)
interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check {
if len(w) < 4 {
intrinsics.trap()
}
x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3])
x0 |= (x0 << 16)
x1 |= (x1 << 16)
x2 |= (x2 << 16)
x3 |= (x3 << 16)
x0 &= 0x0000FFFF0000FFFF
x1 &= 0x0000FFFF0000FFFF
x2 &= 0x0000FFFF0000FFFF
x3 &= 0x0000FFFF0000FFFF
x0 |= (x0 << 8)
x1 |= (x1 << 8)
x2 |= (x2 << 8)
x3 |= (x3 << 8)
x0 &= 0x00FF00FF00FF00FF
x1 &= 0x00FF00FF00FF00FF
x2 &= 0x00FF00FF00FF00FF
x3 &= 0x00FF00FF00FF00FF
q0 = x0 | (x2 << 8)
q1 = x1 | (x3 << 8)
return
}

@(require_results)
interleave_out :: proc "contextless" (q0, q1: u64) -> (w0, w1, w2, w3: u32) {
x0 := q0 & 0x00FF00FF00FF00FF
x1 := q1 & 0x00FF00FF00FF00FF
x2 := (q0 >> 8) & 0x00FF00FF00FF00FF
x3 := (q1 >> 8) & 0x00FF00FF00FF00FF
x0 |= (x0 >> 8)
x1 |= (x1 >> 8)
x2 |= (x2 >> 8)
x3 |= (x3 >> 8)
x0 &= 0x0000FFFF0000FFFF
x1 &= 0x0000FFFF0000FFFF
x2 &= 0x0000FFFF0000FFFF
x3 &= 0x0000FFFF0000FFFF
w0 = u32(x0) | u32(x0 >> 16)
w1 = u32(x1) | u32(x1 >> 16)
w2 = u32(x2) | u32(x2 >> 16)
w3 = u32(x3) | u32(x3 >> 16)
return
}

@(private)
rotr32 :: #force_inline proc "contextless" (x: u64) -> u64 {
return (x << 32) | (x >> 32)
}
Loading

0 comments on commit c07a46a

Please sign in to comment.