// // Copyright � 2003..2012 : Henk van Kampen // // This file is part of pBlazASM. // // pBlazASM is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // pBlazASM is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with pBlazASM. If not, see . // // // Rijndael (AES-128) block cipher // this code assumes 128b data and a 128b key so: Nk = Nn = Nc = 4 // (c) 2003 .. 2012 Henk van Kampen, www.mediatronix.com // // based on documents by Dr. Brian Gladman, // http://gladman.plushost.co.uk/oldsite/cryptography_technology/rijndael/aes.spec.v316.pdf // or // http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf // // test data from [Gladman] // PLAINTEXT: 3243f6a8885a308d313198a2e0370734 (pi * 2^124) // KEY: 2b7e151628aed2a6abf7158809cf4f3c ( e * 2^124) // should result in: // R[10].k_sch d014f9a8c9ee2589e13f0cc8b6630ca6 // R[10].output 3925841d02dc09fbdc118597196a0b32 // // to be done: // decrypt // // SBOX I/O device, just a look-up ROM SBOX_ROM .EQU 0xF0 // uSBOX.VHD is instatiated at this port address // key value inkey .BYT 0x2B, 0x7E, 0x15, 0x16, 0x28, 0xAE, 0xD2, 0xA6, 0xAB, 0xF7, 0x15, 0x88, 0x09, 0xCF, 0x4F, 0x3C // plaintext, key and result plain .BYT 0x32, 0x43, 0xF6, 0xA8, 0x88, 0x5A, 0x30, 0x8D, 0x31, 0x31, 0x98, 0xA2, 0xE0, 0x37, 0x07, 0x34 key .BUF 16 result .BUF 16 // state buffer state .BUF 16 // special registers pKey .EQU s1 // key pointer pState .EQU s2 // state pointer // constants X .EQU 0x01 G .EQU 0x1B // 0x11B b128 .EQU 128 / 8 // 128 bytes of 8 bits // Rijndael encrypt entry // plain is assumed to be in {plain }, the key in {inkey} // both will be copied, final state will be the result Encrypt:: CALL InkeyToKey CALL InToState // state = in CALL XorRoundKey // XorRoundKey( state, k[0], Nc ) MOVE sF, X // x^(i-1) (i=1) MOVE s3, 9 // for round = 1 step 1 to Nn - 1 Round: CALL SubBytes // ..SubBytes( state, Nc ) CALL ShiftRows // ..ShiftRows( state, Nc ) CALL MixColumns // ..MixColumns( state, Nc ) CALL NextRoundKey // ..XorRoundKey( state, k[ round ], Nc ) CALL XorRoundKey SUB s3, 1 // ..step 1 JUMP NZ, Round // end for CALL SubBytes // SubBytes( state, Nc ) CALL ShiftRows // ShiftRows( state, Nc ) CALL NextRoundKey // XorRoundKey( state, k[ round ], Nc ) CALL XorRoundKey CALL StateToOut RET // result is last {state} // result should be: (Gladman or fips197) // R[10].k_sch d014f9a8c9ee2589e13f0cc8b6630ca6 // R[10].result 3925841d02dc09fbdc118597196a0b32 // XorRoundKey( state, k, Nc ) XorRoundKey: MOVE pKey, key // get pointer to key MOVE pState, state // get pointer to state xor128: MOVE s0, b128 // set up loop count xornext: LD s4, pKey // get key byte LD s5, pState // get state byte XOR s4, s5 // do the xor ST s4, pState // save new state byte ADD pKey, 1 // increment key pointer ADD pState, 1 // increment state pointer SUB s0, 1 // decrement loop counter JUMP NZ, xornext // loop back if not done 16 times (128/8) RET InToState: MOVE pKey, plain // get pointer to plain MOVE pState, state // get pointer to state JUMP ToScratch128 InkeyToKey: MOVE pKey, inkey // get pointer to plain MOVE pState, key // get pointer to state ToScratch128: MOVE s0, b128 // set up loop count putnext: LD s4, pKey // get plain byte ST s4, pState // save new state byte ADD pKey, 1 // increment key pointer ADD pState, 1 // increment state pointer SUB s0, 1 // decrement loop counter JUMP NZ, putnext // loop back if not done 16 times (128/8) RET StateToOut: MOVE pKey, state // get pointer to state MOVE pState, result // get pointer to result MOVE s0, b128 // set up loop count getnext: LD s4, pKey // get plain byte ST s4, pState // save new state byte ADD pKey, 1 // increment key pointer ADD pState, 1 // increment state pointer SUB s0, 1 // decrement loop counter JUMP NZ, getnext // loop back if not done 16 times (128/8) RET NextRoundKey: // temp = k[i - 1] LD s4, key + 12 // get last word of previous key LD s5, key + 13 LD s6, key + 14 LD s7, key + 15 MOVE s8, s4 // RotWord MOVE s4, s5 MOVE s5, s6 MOVE s6, s7 MOVE s7, s8 MOVE s8, s4 // temp=SubWord( RotWord( temp ) ) CALL SBox MOVE s4, s8 XOR s4, sF // xor Rcon( i / Nk ) SL0 sF // x^(i-1) (i+=1) JUMP NC, nowrap XOR sF, G nowrap: MOVE s8, s5 // SubWord( RotWord( temp ) ) CALL SBox MOVE s5, s8 MOVE s8, s6 // SubWord( RotWord( temp ) ) CALL SBox MOVE s6, s8 MOVE s8, s7 // SubWord( RotWord( temp ) ) CALL SBox MOVE s7, s8 MOVE pKey, key MOVE s0, b128 key96: LD s8, pKey // k[i]=k[i - Nk] ^ temp XOR s4, s8 ST s4, pKey ADD pKey, 1 LD s8, pKey // k[i]=k[i - Nk] ^ temp XOR s5, s8 ST s5, pKey ADD pKey, 1 LD s8, pKey // k[i]=k[i - Nk] ^ temp XOR s6, s8 ST s6, pKey ADD pKey, 1 LD s8, pKey // k[i]=k[i - Nk] ^ temp XOR s7, s8 ST s7, pKey ADD pKey, 1 SUB s0, 4 JUMP NZ, key96 RET // Sub bytes of one 32b word pointed at by pKey SubWord: MOVE s0, 4 SubWord1: LD s8, pKey CALL SBox ST s8, pKey ADD pKey, 1 SUB s0, 1 JUMP NZ, SubWord1 RET // SubBytes( state, Nc ) SubBytes: MOVE pState, state // get pointer to state MOVE s0, b128 // set up loop count sub128: LD s8, pState // get state byte CALL SBox ST s8, pState // save new state byte ADD pState, 1 // increment state pointer SUB s0, 1 // decrement loop counter JUMP NZ, sub128 // loop back if not done 16 times (128/8) RET // SBox( s ) SBox: OUT s8, SBOX_ROM // set index IN s8, SBOX_ROM // get data RET // soft version of SBOX, very slow SBox_Soft: CALL MulInverse // . x = sbox_affine(mul_inverse(in))// SBoxAffine: // for(counter = 1; counter <= 4; counter++) { MOVE s8, s9 // s = in RL s9 // s = (s >> (DEGREE - 1)) | (s << 1)// s &= MASK// XOR s8, s9 // in ^= s RL s9 XOR s8, s9 RL s9 XOR s8, s9 RL s9 XOR s8, s9 XOR s8, 0x63 // in ^= 0x63 RET // return in // } // MulInverse by trial and error MulInverse: MOVE s9, 0 // int result = 0 OR s8, s8 // if (in == 0) RET Z // return 0 MulInverse1: ADD s9, 1 // result = 1; result++ RET Z // result < MOD MOVE sC, s8 // in MOVE sD, s9 // result CALL GMul // gmul( in, result, ...) SUB sE, 1 // == 1 JUMP NZ, MulInverse1 // == 1? RET // return result GMul: MOVE sE, 0 GMul1: SR0 sD JUMP C, GMul2 // last bit was 1 RET Z // i2 was 0 already ? JUMP GMul3 GMul2: XOR sE, sC GMul3: SL0 sC JUMP NC, GMul1 XOR sC, G // i1 ^= field JUMP GMul1 // ShiftRows( state, Nc ) ShiftRows: LD s7, state + 1 LD s4, state + 1 + 4 LD s5, state + 1 + 4 + 4 LD s6, state + 1 + 4 + 4 + 4 ST s4, state + 1 ST s5, state + 1 + 4 ST s6, state + 1 + 4 + 4 ST s7, state + 1 + 4 + 4 + 4 LD s6, state + 2 LD s7, state + 2 + 4 LD s4, state + 2 + 4 + 4 LD s5, state + 2 + 4 + 4 + 4 ST s4, state + 2 ST s5, state + 2 + 4 ST s6, state + 2 + 4 + 4 ST s7, state + 2 + 4 + 4 + 4 LD s5, state + 3 LD s6, state + 3 + 4 LD s7, state + 3 + 4 + 4 LD s4, state + 3 + 4 + 4 + 4 ST s4, state + 3 ST s5, state + 3 + 4 ST s6, state + 3 + 4 + 4 ST s7, state + 3 + 4 + 4 + 4 RET // MixColumns( state, Nc ) MixColumns: LD s4, state + 0 LD s5, state + 1 LD s6, state + 2 LD s7, state + 3 CALL MixColumn ST s4, state + 0 ST s5, state + 1 ST s6, state + 2 ST s7, state + 3 LD s4, state + 0 + 4 LD s5, state + 1 + 4 LD s6, state + 2 + 4 LD s7, state + 3 + 4 CALL MixColumn ST s4, state + 0 + 4 ST s5, state + 1 + 4 ST s6, state + 2 + 4 ST s7, state + 3 + 4 LD s4, state + 0 + 4 + 4 LD s5, state + 1 + 4 + 4 LD s6, state + 2 + 4 + 4 LD s7, state + 3 + 4 + 4 CALL MixColumn ST s4, state + 0 + 4 + 4 ST s5, state + 1 + 4 + 4 ST s6, state + 2 + 4 + 4 ST s7, state + 3 + 4 + 4 LD s4, state + 0 + 4 + 4 + 4 LD s5, state + 1 + 4 + 4 + 4 LD s6, state + 2 + 4 + 4 + 4 LD s7, state + 3 + 4 + 4 + 4 CALL MixColumn ST s4, state + 0 + 4 + 4 + 4 ST s5, state + 1 + 4 + 4 + 4 ST s6, state + 2 + 4 + 4 + 4 ST s7, state + 3 + 4 + 4 + 4 RET MixColumn: MOVE s9, s4 // t = c[0] ^ c[3] XOR s9, s7 MOVE sA, s5 // u = c[1] ^ c[2] XOR sA, s6 MOVE sB, s9 // v = t ^ u XOR sB, sA MOVE s8, s4 // c[0] = c[0] ^ v ^ FFmul(0x02, c[0] ^ c[1]) XOR s8, s5 SL0 s8 JUMP NC, mcf1 XOR s8, G mcf1: XOR s8, sB XOR s4, s8 MOVE s8, sA // c[1] = c[1] ^ v ^ FFmul(0x02, u) SL0 s8 JUMP NC, mcf2 XOR s8, G mcf2: XOR s8, sB XOR s5, s8 MOVE s8, s6 // c[2] = c[2] ^ v ^ FFmul(0x02, c[2] ^ c[3]) XOR s8, s7 SL0 s8 JUMP NC, mcf3 XOR s8, G mcf3: XOR s8, sB XOR s6, s8 MOVE s8, s9 // c[3] = c[3] ^ v ^ FFmul(0x02, t) SL0 s8 JUMP NC, mcf4 XOR s8, G mcf4: XOR s8, sB XOR s7, s8 RET