How to Write a Program: SHA-1 in Lisp and other languages

Lisp is a fascinating language and I want to learn it so that I "get it."

Recently I looked for a Common Lisp implementation of the Secure Hash Algorithm - 1 and this is the only one I could find. It's from ironclad.

;;;; This is an implementation of the US Secure Hash Algorithm 1 (SHA1),
;;;; defined in RFC 3174, written by D. Eastlake and P. Jones, September
;;;; 2001.  The RFC was based on the document "Secure Hash Standard",
;;;; United States of America, National Institute of Science and Technology,
;;;; Federal Information Processing Standard (FIPS) 180-1, April 1993.
;;;;
;;;; It was written by Nathan J. Froyd, with many of the main ideas and
;;;; functions grabbed from Pierre R. Mai's CL implementation of MD5,
;;;; available at http://www.pmsf.de/pmai/MD5.html.
;;;;
;;;; This implementation should work on any conforming Common Lisp
;;;; implementation, but it has been optimized for CMU CL and SBCL.
;;;;
;;;; The implementation makes heavy use of (UNSIGNED-BYTE 32) arithmetic;
;;;; if your CL implementation does not implement unboxed arithmetic on
;;;; such numbers, performance will likely be greater in a 16-bit
;;;; implementation. 
;;;;
;;;; This software is "as is", and has no warranty of any kind.  The
;;;; authors assume no responsibility for the consequences of any use
;;;; of this software.

(in-package :crypto)

;;; nonlinear functions

(defconstant +k1+ #x5a827999)
(defconstant +k2+ #x6ed9eba1)
(defconstant +k3+ #x8f1bbcdc)
(defconstant +k4+ #xca62c1d6)

;;; working set

(define-digest-registers (sha1 :endian :big)
  (a #x67452301)
  (b #xefcdab89)
  (c #x98badcfe)
  (d #x10325476)
  (e #xc3d2e1f0))

(defconst +pristine-sha1-registers+ (initial-sha1-regs))

(macrolet ((sha1-rounds (block func constant low high &rest initial-order)
             ;; Yay for "implementation-dependent" behavior (6.1.1.4).
             (let ((xvars (apply #'make-circular-list initial-order)))
               (loop for i from low upto high
                     for vars on xvars by #'cddddr
                     collect (let ((a-var (first vars))
                                   (b-var (second vars))
                                   (c-var (third vars))
                                   (d-var (fourth vars))
                                   (e-var (fifth vars)))
                               `(setf ,e-var 
                                      (mod32+ (rol32 ,a-var 5)
                                              (mod32+ (mod32+ (,func ,b-var ,c-var ,d-var) ,e-var)
                                                      (mod32+ (aref ,block ,i) ,constant)))
                                      ,b-var (rol32 ,b-var 30))) into forms
                     finally (return `(progn ,@forms))))))
(defun update-sha1-block (regs block)
  (declare (type sha1-regs regs)
           (type (simple-array (unsigned-byte 32) (80)) block)
           #.(burn-baby-burn))
  (let ((a (sha1-regs-a regs)) (b (sha1-regs-b regs))
 (c (sha1-regs-c regs)) (d (sha1-regs-d regs))
        (e (sha1-regs-e regs)))
    (flet ((f1 (x y z)
             (declare (type (unsigned-byte 32) x y z))
             #+cmu
             (kernel:32bit-logical-xor z
                                       (kernel:32bit-logical-and x
                                                                 (kernel:32bit-logical-xor y z)))
             #-cmu
             (logxor z (logand x (logxor y z))))
           (f2 (x y z)
             (declare (type (unsigned-byte 32) x y z))
             #+cmu
             (kernel:32bit-logical-xor x (kernel:32bit-logical-xor y z))
             #-cmu
             (ldb (byte 32 0) (logxor x y z)))
           (f3 (x y z)
             (declare (type (unsigned-byte 32) x y z))
             #+cmu
             (kernel:32bit-logical-or (kernel:32bit-logical-or
                                       (kernel:32bit-logical-and x y)
                                       (kernel:32bit-logical-and x z))
                                      (kernel:32bit-logical-and y z))
             #-cmu
             (ldb (byte 32 0)
                  (logior (logand x y) (logand x z) (logand y z)))))
      #+ironclad-fast-mod32-arithmetic
      (declare (inline f1 f2 f3))
      ;; core of the algorithm
      (sha1-rounds block f1 +k1+ 0 19 a b c d e)
      (sha1-rounds block f2 +k2+ 20 39 a b c d e)
      (sha1-rounds block f3 +k3+ 40 59 a b c d e)
      (sha1-rounds block f2 +k4+ 60 79 a b c d e)
      ;; update and return
      (setf (sha1-regs-a regs) (mod32+ (sha1-regs-a regs) a)
            (sha1-regs-b regs) (mod32+ (sha1-regs-b regs) b)
            (sha1-regs-c regs) (mod32+ (sha1-regs-c regs) c)
            (sha1-regs-d regs) (mod32+ (sha1-regs-d regs) d)
            (sha1-regs-e regs) (mod32+ (sha1-regs-e regs) e))
      regs)))
) ; MACROLET

(declaim (inline expand-block))
(defun expand-block (block)
  "Expand the first 16 words in BLOCK to fill the entire 80 word space
available."
  (declare (type (simple-array (unsigned-byte 32) (80)) block)
           #.(burn-baby-burn))
  (loop for i of-type (integer 16 80) from 16 below 80
        do (setf (aref block i)
                 (rol32 #+cmu
                        (kernel:32bit-logical-xor
                         (kernel:32bit-logical-xor (aref block (- i 3))
                                                   (aref block (- i 8)))
                         (kernel:32bit-logical-xor (aref block (- i 14))
                                                   (aref block (- i 16))))
                        #-cmu
                        (ldb (byte 32 0)
                             (logxor (aref block (- i 3))
                                     (aref block (- i 8))
                                     (aref block (- i 14))
                                     (aref block (- i 16))))
                        1))))

;;; mid-level

(defstruct (sha1
             (:constructor %make-sha1-digest)
             (:constructor %make-sha1-state (regs amount block buffer buffer-index))
             (:copier nil))
  (regs (initial-sha1-regs) :type sha1-regs :read-only t)
  (amount 0 :type (unsigned-byte 64))
  (block (make-array 80 :element-type '(unsigned-byte 32))
    :type (simple-array (unsigned-byte 32) (80)) :read-only t)
  (buffer (make-array 64 :element-type '(unsigned-byte 8))
          :type (simple-array (unsigned-byte 8) (64)) :read-only t)
  (buffer-index 0 :type (integer 0 63)))

(defmethod reinitialize-instance ((state sha1) &rest initargs)
  (declare (ignore initargs))
  (replace (sha1-regs state) +pristine-sha1-registers+)
  (setf (sha1-amount state) 0
        (sha1-buffer-index state) 0)
  state)

(defmethod copy-digest ((state sha1) &optional copy)
  (declare (type (or cl:null sha1) copy))
  (cond
    (copy
     (replace (sha1-regs copy) (sha1-regs state))
     (replace (sha1-buffer copy) (sha1-buffer state))
     (setf (sha1-amount copy) (sha1-amount state)
           (sha1-buffer-index copy) (sha1-buffer-index state))
     copy)
    (t
     (%make-sha1-state (copy-seq (sha1-regs state))
                       (sha1-amount state)
                       (copy-seq (sha1-block state))
                       (copy-seq (sha1-buffer state))
                       (sha1-buffer-index state)))))

(define-digest-updater sha1
  (let ((regs (sha1-regs state))
        (block (sha1-block state))
        (buffer (sha1-buffer state))
        (buffer-index (sha1-buffer-index state))
        (length (- end start)))
    (declare (type sha1-regs regs) (type fixnum length)
      (type (integer 0 63) buffer-index)
      (type (simple-array (unsigned-byte 32) (80)) block)
      (type (simple-array (unsigned-byte 8) (64)) buffer))
    ;; Handle old rest
    (unless (zerop buffer-index)
      (let ((amount (min (- 64 buffer-index) length)))
 (declare (type (integer 0 63) amount))
 (copy-to-buffer sequence start amount buffer buffer-index)
 (setq start (the fixnum (+ start amount)))
        (let ((new-index (mod (+ buffer-index amount) 64)))
          (when (zerop new-index)
            (fill-block-ub8-be block buffer 0)
            (expand-block block)
            (update-sha1-block regs block))
          (when (>= start end)
            (setf (sha1-buffer-index state) new-index)
            (incf (sha1-amount state) length)
            (return-from update-digest state)))))
    (loop for offset of-type index from start below end by 64
          until (< (- end offset) 64)
          do
          (fill-block-ub8-be block sequence offset)
          (expand-block block)
          (update-sha1-block regs block)
          finally
          (let ((amount (- end offset)))
            (unless (zerop amount)
              (copy-to-buffer sequence offset amount buffer 0))
            (setf (sha1-buffer-index state) amount)))
    (incf (sha1-amount state) length)
    state))

(define-digest-finalizer (sha1 20)
  (let ((regs (sha1-regs state))
        (block (sha1-block state))
        (buffer (sha1-buffer state))
        (buffer-index (sha1-buffer-index state))
        (total-length (* 8 (sha1-amount state))))
    (declare (type sha1-regs regs)
             (type (integer 0 63) buffer-index)
             (type (simple-array (unsigned-byte 32) (80)) block)
             (type (simple-array (unsigned-byte 8) (64)) buffer))
    (setf (aref buffer buffer-index) #x80)
    (when (> buffer-index 55)
      (loop for index of-type (integer 0 64)
         from (1+ buffer-index) below 64
         do (setf (aref buffer index) #x00))
      (fill-block-ub8-be block buffer 0)
      (expand-block block)
      (update-sha1-block regs block)
      (loop for index of-type (integer 0 16)
         from 0 below 16
         do (setf (aref block index) #x00000000)))
    (when (<= buffer-index 55)
      (loop for index of-type (integer 0 64)
         from (1+ buffer-index) below 64
         do (setf (aref buffer index) #x00))
      ;; copy the data to BLOCK prematurely
      (fill-block-ub8-be block buffer 0))
    ;; fill in the remaining block data
    (store-data-length block total-length 14 t)
    (expand-block block)
    (update-sha1-block regs block)
    (finalize-registers state regs)))

(defdigest sha1 :digest-length 20 :block-length 64)

I remember reading in one of Douglas Hofstadter's wonderful Metamagical Themas that "Lisp is crisp." My initial impression is that this fragment of Lisp could be crisper. This could be be because I'm a newcomer to Lisp. Actually, the more I look at it the better it gets.

I've since spotted another Lisp version, but it's not very crisp either.

I think the SHA-1 algorithm is fairly straight-forward to state, if rather harder to reason mathematically about. But this Lisp implementation is a little opaque for my Lisp noobiness level of understanding.

The algorithm was designed to be implemented efficiently on a machine with 32-bit words and there is a little bit of gubbins necessary to get Lisp to do modulo 32-bit arithmetic. But it's not just that that makes this code hard for me to read.

While I only found two examples of SHA-1 in Lisp I found many examples in other languages and I discovered that Linus Torvalds recently spent some time improving the performance of the Git SHA-1 algorithm. Personally. So I thought I should take a closer look at that. Here it is.

/* sha1.h */

/*
 * Based on the Mozilla SHA1 (see mozilla-sha1/sha1.h),
 * optimized to do word accesses rather than byte accesses,
 * and to avoid unnecessary copies into the context array.
 */

typedef struct {
 unsigned int H[5];
 unsigned int W[16];
 unsigned long long size;
} blk_SHA_CTX;

void blk_SHA1_Init(blk_SHA_CTX *ctx);
void blk_SHA1_Update(blk_SHA_CTX *ctx, const void *dataIn, unsigned long len);
void blk_SHA1_Final(unsigned char hashout[20], blk_SHA_CTX *ctx);

#define git_SHA_CTX blk_SHA_CTX
#define git_SHA1_Init blk_SHA1_Init
#define git_SHA1_Update blk_SHA1_Update
#define git_SHA1_Final blk_SHA1_Final



/* sha1.c */

/*
 * Based on the Mozilla SHA1 (see mozilla-sha1/sha1.c),
 * optimized to do word accesses rather than byte accesses,
 * and to avoid unnecessary copies into the context array.
 */

#include <string.h>
#include <arpa/inet.h>

#include "sha1.h"

/* Hash one 64-byte block of data */
static void blk_SHA1Block(blk_SHA_CTX *ctx, const unsigned int *data);

void blk_SHA1_Init(blk_SHA_CTX *ctx)
{
 ctx->size = 0;

 /* Initialize H with the magic constants (see FIPS180 for constants)
  */
 ctx->H[0] = 0x67452301;
 ctx->H[1] = 0xefcdab89;
 ctx->H[2] = 0x98badcfe;
 ctx->H[3] = 0x10325476;
 ctx->H[4] = 0xc3d2e1f0;
}


void blk_SHA1_Update(blk_SHA_CTX *ctx, const void *data, unsigned long len)
{
 int lenW = ctx->size & 63;

 ctx->size += len;

 /* Read the data into W and process blocks as they get full
  */
 if (lenW) {
  int left = 64 - lenW;
  if (len < left)
   left = len;
  memcpy(lenW + (char *)ctx->W, data, left);
  lenW = (lenW + left) & 63;
  len -= left;
  data += left;
  if (lenW)
   return;
  blk_SHA1Block(ctx, ctx->W);
 }
 while (len >= 64) {
  blk_SHA1Block(ctx, data);
  data += 64;
  len -= 64;
 }
 if (len)
  memcpy(ctx->W, data, len);
}


void blk_SHA1_Final(unsigned char hashout[20], blk_SHA_CTX *ctx)
{
 static const unsigned char pad[64] = { 0x80 };
 unsigned int padlen[2];
 int i;

 /* Pad with a binary 1 (ie 0x80), then zeroes, then length
  */
 padlen[0] = htonl(ctx->size >> 29);
 padlen[1] = htonl(ctx->size << 3);

 i = ctx->size & 63;
 blk_SHA1_Update(ctx, pad, 1+ (63 & (55 - i)));
 blk_SHA1_Update(ctx, padlen, 8);

 /* Output hash
  */
 for (i = 0; i < 5; i++)
  ((unsigned int *)hashout)[i] = htonl(ctx->H[i]);
}

#if defined(__i386__) || defined(__x86_64__)

#define SHA_ASM(op, x, n) ({ unsigned int __res; __asm__(op " %1,%0":"=r" (__res):"i" (n), "0" (x)); __res; })
#define SHA_ROL(x,n) SHA_ASM("rol", x, n)
#define SHA_ROR(x,n) SHA_ASM("ror", x, n)

#else

#define SHA_ROT(X,l,r) (((X) << (l)) | ((X) >> (r)))
#define SHA_ROL(X,n) SHA_ROT(X,n,32-(n))
#define SHA_ROR(X,n) SHA_ROT(X,32-(n),n)

#endif

/* This "rolls" over the 512-bit array */
#define W(x) (array[(x)&15])
#define setW(x, val) (*(volatile unsigned int *)&W(x) = (val))

/*
 * Where do we get the source from? The first 16 iterations get it from
 * the input data, the next mix it from the 512-bit array.
 */
#define SHA_SRC(t) htonl(data[t])
#define SHA_MIX(t) SHA_ROL(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1)

#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
 unsigned int TEMP = input(t); setW(t, TEMP); \
 E += TEMP + SHA_ROL(A,5) + (fn) + (constant); \
 B = SHA_ROR(B, 2); } while (0)

#define T_0_15(t, A, B, C, D, E)  SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E )
#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E )
#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) ,  0xca62c1d6, A, B, C, D, E )

static void blk_SHA1Block(blk_SHA_CTX *ctx, const unsigned int *data)
{
 unsigned int A,B,C,D,E;
 unsigned int array[16];

 A = ctx->H[0];
 B = ctx->H[1];
 C = ctx->H[2];
 D = ctx->H[3];
 E = ctx->H[4];

 /* Round 1 - iterations 0-16 take their input from 'data' */
 T_0_15( 0, A, B, C, D, E);
 T_0_15( 1, E, A, B, C, D);
 T_0_15( 2, D, E, A, B, C);
 T_0_15( 3, C, D, E, A, B);
 T_0_15( 4, B, C, D, E, A);
 T_0_15( 5, A, B, C, D, E);
 T_0_15( 6, E, A, B, C, D);
 T_0_15( 7, D, E, A, B, C);
 T_0_15( 8, C, D, E, A, B);
 T_0_15( 9, B, C, D, E, A);
 T_0_15(10, A, B, C, D, E);
 T_0_15(11, E, A, B, C, D);
 T_0_15(12, D, E, A, B, C);
 T_0_15(13, C, D, E, A, B);
 T_0_15(14, B, C, D, E, A);
 T_0_15(15, A, B, C, D, E);

 /* Round 1 - tail. Input from 512-bit mixing array */
 T_16_19(16, E, A, B, C, D);
 T_16_19(17, D, E, A, B, C);
 T_16_19(18, C, D, E, A, B);
 T_16_19(19, B, C, D, E, A);

 /* Round 2 */
 T_20_39(20, A, B, C, D, E);
 T_20_39(21, E, A, B, C, D);
 T_20_39(22, D, E, A, B, C);
 T_20_39(23, C, D, E, A, B);
 T_20_39(24, B, C, D, E, A);
 T_20_39(25, A, B, C, D, E);
 T_20_39(26, E, A, B, C, D);
 T_20_39(27, D, E, A, B, C);
 T_20_39(28, C, D, E, A, B);
 T_20_39(29, B, C, D, E, A);
 T_20_39(30, A, B, C, D, E);
 T_20_39(31, E, A, B, C, D);
 T_20_39(32, D, E, A, B, C);
 T_20_39(33, C, D, E, A, B);
 T_20_39(34, B, C, D, E, A);
 T_20_39(35, A, B, C, D, E);
 T_20_39(36, E, A, B, C, D);
 T_20_39(37, D, E, A, B, C);
 T_20_39(38, C, D, E, A, B);
 T_20_39(39, B, C, D, E, A);

 /* Round 3 */
 T_40_59(40, A, B, C, D, E);
 T_40_59(41, E, A, B, C, D);
 T_40_59(42, D, E, A, B, C);
 T_40_59(43, C, D, E, A, B);
 T_40_59(44, B, C, D, E, A);
 T_40_59(45, A, B, C, D, E);
 T_40_59(46, E, A, B, C, D);
 T_40_59(47, D, E, A, B, C);
 T_40_59(48, C, D, E, A, B);
 T_40_59(49, B, C, D, E, A);
 T_40_59(50, A, B, C, D, E);
 T_40_59(51, E, A, B, C, D);
 T_40_59(52, D, E, A, B, C);
 T_40_59(53, C, D, E, A, B);
 T_40_59(54, B, C, D, E, A);
 T_40_59(55, A, B, C, D, E);
 T_40_59(56, E, A, B, C, D);
 T_40_59(57, D, E, A, B, C);
 T_40_59(58, C, D, E, A, B);
 T_40_59(59, B, C, D, E, A);

 /* Round 4 */
 T_60_79(60, A, B, C, D, E);
 T_60_79(61, E, A, B, C, D);
 T_60_79(62, D, E, A, B, C);
 T_60_79(63, C, D, E, A, B);
 T_60_79(64, B, C, D, E, A);
 T_60_79(65, A, B, C, D, E);
 T_60_79(66, E, A, B, C, D);
 T_60_79(67, D, E, A, B, C);
 T_60_79(68, C, D, E, A, B);
 T_60_79(69, B, C, D, E, A);
 T_60_79(70, A, B, C, D, E);
 T_60_79(71, E, A, B, C, D);
 T_60_79(72, D, E, A, B, C);
 T_60_79(73, C, D, E, A, B);
 T_60_79(74, B, C, D, E, A);
 T_60_79(75, A, B, C, D, E);
 T_60_79(76, E, A, B, C, D);
 T_60_79(77, D, E, A, B, C);
 T_60_79(78, C, D, E, A, B);
 T_60_79(79, B, C, D, E, A);

 ctx->H[0] += A;
 ctx->H[1] += B;
 ctx->H[2] += C;
 ctx->H[3] += D;
 ctx->H[4] += E;
}

It's interesting to compare Linus's version with this version, or even this version.

After looking at some of the many SHA-1 implementations around the web I decided to write my own version that would have these goals

1. Correct.
2. Crisp (i.e. short and sweet).
3. Aspiring to the efficiency of Linus's version.

So here is my version, which, of course, borrows ideas from many sources

/* sha1.h */

#ifndef SHA1_H_INCLUDED
#define SHA1_H_INCLUDED

/*  calculate SHA-1 digest
    Copyright (c) 2010 Anthony C. Hay
    Distributed under the BSD license, see:
    http://creativecommons.org/licenses/BSD/ */

#include <cstddef>
#include <stdexcept>

namespace nettle_soup {

class sha1 {
public:
    sha1()
    {
        reset();
    }

    // digest 'len' bytes in given 'buf'; may be called zero or more times
    void update(const void * buf, std::size_t len)
    {
        if (finalized_) {
            /*  the correct usage for this class is: c'tor or reset() followed
                by zero or more calls to update() followed by zero or more
                calls to final_value() followed, if required, by zero or more
                calls to reset(), and so on; you may not digest more data after
                the first call to final_value() without an interveining call
                to reset() */
            throw std::logic_error("sha1::update() called after call to sha1::final_value()");
        }

        // first fill block_, if we can, then process it
        const unsigned char * src = reinterpret_cast<const unsigned char *>(buf);
        unsigned char * blk = block_ + (total_ & 63);
        unsigned char * end = block_ + 64;
        total_ += len;
        while (len && blk < end) {
            *blk++ = *src++;
            --len;
        }
        if (blk < end)
            return; // block_ not yet full
        process_block(block_, h_);
        
        // process all remaining full 64-byte blocks in given data
        while (len >= 64) {
            process_block(src, h_);
            src += 64;
            len -= 64;
        }
        
        // copy any unprocessed data to block_ so it can be processed next time
        blk = block_;
        end = block_ + len;
        while (blk < end)
            *blk++ = *src++;
    }
    
    // the digest result is in the form of 5 32-bit unsigned ints
    typedef unsigned digest_type[5];
    
    // return a const ref to the digest result; may be called 0 or more times
    const digest_type & final_value()
    {
        if (!finalized_) {
            unsigned char total_bits[8];
            big_endian(total_ << 3, total_bits);
            
            // write a 1-bit and suficient 0-bits to reach 8 bytes from block end
            static const unsigned char pad[64] = { 0x80 };
            update(pad, 1 + (63 & (55 - (int)(total_ & 63))));
            
            // final 8 bytes contain total number of user data bits digested
            update(total_bits, 8);
            finalized_ = true;
        }        
        return h_;
    }
    
    // call reset() if you want to reuse the object (you may not call update()
    // after the first call to final_value() without an interveining call to
    // reset() - if you do, update() will throw)
    void reset()
    {
        h_[0] = 0x67452301;
        h_[1] = 0xEFCDAB89;
        h_[2] = 0x98BADCFE;
        h_[3] = 0x10325476;
        h_[4] = 0xC3D2E1F0;
        total_ = 0;
        finalized_ = false;
    }
    
private:
    digest_type h_;             // digest accumulated here
    unsigned char block_[64];   // copy of data waiting to be processed
    unsigned long long total_;  // total number of bytes processsed so far
    bool finalized_;            // to ensure we only finalize once

    // return given 'x' rotated left by 'count' bits
    unsigned rol(unsigned x, unsigned count) const
    {
        return (x << count) | (x >> (32 - count));
    }
    
    // write given 'n' to given 8-byte 'buf' in big-endian byte order
    void big_endian(unsigned long long n, unsigned char * buf) const
    {
        for (int i = 7; i >= 0; --i) {
            buf[i] = static_cast<unsigned char>(n);
            n >>= 8;
        }
    }
    
    // apply SHA-1 algorithm to one full 64-byte 'block'; update given 5-word 'h'
    // (lookup "FIPS 180-1" for details of the SHA-1 algorithm)
    void process_block(const unsigned char * block, unsigned * h) const
    {
        unsigned w[80];
        unsigned * p = w;
        for (; p < w+16; ++p, block += 4)
            *p = (block[0] << 24) | (block[1] << 16) | (block[2] << 8) | block[3];
        int i = 16;
        for (; i < 80; ++i)
            *p++ = rol(w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16], 1);
        
        unsigned a = h[0];
        unsigned b = h[1];
        unsigned c = h[2];
        unsigned d = h[3];
        unsigned e = h[4];
        
        for (i = 0; i < 20; ++i) {
            const unsigned newa = rol(a, 5) + e + w[i] + 0x5A827999 + (((c ^ d) & b) ^ d);
            e = d; d = c; c = rol(b, 30); b = a; a = newa;
        }
        for (; i < 40; ++i) {
            const unsigned newa = rol(a, 5) + e + w[i] + 0x6ED9EBA1 + (b ^ c ^ d);
            e = d; d = c; c = rol(b, 30); b = a; a = newa;
        }
        for (; i < 60; ++i) {
            const unsigned newa = rol(a, 5) + e + w[i] + 0x8F1BBCDC + ((b & c) + (d & (b ^ c)));
            e = d; d = c; c = rol(b, 30); b = a; a = newa;
        }
        for (; i < 80; ++i) {
            const unsigned newa = rol(a, 5) + e + w[i] + 0xCA62C1D6 + (b ^ c ^ d);
            e = d; d = c; c = rol(b, 30); b = a; a = newa;
        }
        
        h[0] += a;
        h[1] += b;
        h[2] += c;
        h[3] += d;
        h[4] += e;
    }
};

}

#endif

Did I achieve those goals?

The code produces correct results for all the sample data used. It is probably ready for some real-world testing. (I tested the code on GCC on OS X and MS VC 2003 on Windows. I also ran the code through Comeau C++ and Gimple Lint.)

The code doesn't get in the way of the algorithm. You could look at the FIPS 180-1 definition and look at this code and see the algorithm in the code. But is it crisp? In the end, this is a matter of taste.

The unit tests I wrote for this code are shown below. They run in ~52 seconds on my computer. If I replace my process_bytes() function with Linus's blk_SHA1Block() the same unit tests run in ~32 seconds - about 60% of the time required by mine. (Not forgetting how slippery a beast a benchmark is.)

There is often a balance between correctness, efficiency and lucidity. Sometimes I don't have the time or the imagination to find a good solution. In this case I spent some time trying different ideas that I thought might improve the performance of my implementation but I couldn't find any way to significantly improve the execution time while not losing the simplicity and comprehensibility of the source code.

Given that the SHA-1 algorithm isn't going to change, you're probably not going to have to work on it, so the simplicity of the source code is probably not so important. If run-time performance is important in your application you'd almost certainly be better off with Linus's code than with mine.

// unit test for nettle_soup::sha1
#include "sha1.h"
using nettle_soup::sha1;

#include <iostream>
#include <sstream>
#include <iomanip>

unsigned g_test_count;      // count of number of unit tests executed
unsigned g_fault_count;     // count of number of unit tests that fail

template <typename T1, typename T2>
void test_equal_(const T1 & value, const T2 & expected_value, const char * file, int line)
{
    ++g_test_count;
    if (value != expected_value) {
        std::cout
        << file << '(' << line << ") : "
        << " expected " << expected_value
        << ", got " << value
        << '\n';
        ++g_fault_count;
    }
}

// write a message to std::cout if value != expected_value
#define TEST_EQUAL(expected_value, value) test_equal_(value, expected_value, __FILE__, __LINE__)

// return the hex ASCII string representation of the given array of 5 unsigned ints
std::string to_string(const unsigned int (&d)[5])
{
    std::ostringstream s;
    s << std::hex << std::setfill('0');
    for (int i = 0; i < 5; ++i)
        s << std::setw(8) << d[i];
    return s.str();
}

// return the SHA-1 digest of the given string as a hex ASCII string
std::string sha1_str(const std::string & s)
{
    sha1 digest;
    digest.update(s.c_str(), s.size());
    return to_string(digest.final_value());
}

void test_assumptions()
{
    // the implementation requires these types be these sizes
    TEST_EQUAL(32, sizeof(unsigned) * CHAR_BIT);
    TEST_EQUAL(64, sizeof(unsigned long long) * CHAR_BIT);
}

void test_known_values()
{
    TEST_EQUAL("da39a3ee5e6b4b0d3255bfef95601890afd80709", sha1_str(""));
    TEST_EQUAL("356a192b7913b04c54574d18c28d46e6395428ab", sha1_str("1"));
    TEST_EQUAL("a9993e364706816aba3e25717850c26c9cd0d89d", sha1_str("abc"));
    TEST_EQUAL("9b56d519ccd9e1e5b2a725e186184cdc68de0731", sha1_str("Hello."));
    TEST_EQUAL("32d10c7b8cf96570ca04ce37f2a19d84240d3a89", sha1_str("abcdefghijklmnopqrstuvwxyz"));
    TEST_EQUAL("2fd4e1c67a2d28fced849ee1bb76e7391b93eb12", sha1_str("The quick brown fox jumps over the lazy dog"));
    TEST_EQUAL("de9f2c7fd25e1b3afad3e85a0bd17d9b100db4b3", sha1_str("The quick brown fox jumps over the lazy cog"));
    TEST_EQUAL("f042dc98a62cbad68dbe21f11bbc1e9d416d2bf6", sha1_str("9b56d519ccd9e1e5b2a725e186184cdc68de0731"));

    TEST_EQUAL("84983e441c3bd26ebaae4aa1f95129e5e54670f1",
        sha1_str("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"));

    TEST_EQUAL("761c457bf73b14d27e9e9265c46f4b4dda11f940",
        sha1_str("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"));

    TEST_EQUAL("50abf5706a150990a08b2c5ea40fa0e585554732",
        sha1_str("12345678901234567890123456789012345678901234567890123456789012345678901234567890"));
}

void test_big()
{
#   define BIG_LEN 1000000
    char big[BIG_LEN];
    for (int i = 0; i < BIG_LEN; ++i)
        big[i] = 'a';
    const char expected_result[] = "34aa973cd4c4daa4f61eeb2bdbad27316534016f";

    // one call to update() with 1,000,000 bytes
    sha1 digest;
    digest.update(big, BIG_LEN);
    TEST_EQUAL(expected_result, to_string(digest.final_value()));

    // 100 calls to update() with 10,000 bytes each
    digest.reset();
    for (int j = 0; j < 100; ++j)
        digest.update(big + BIG_LEN/100 * j, BIG_LEN/100);
    TEST_EQUAL(expected_result, to_string(digest.final_value()));

    // 1,000 calls to update() with 1,000 bytes each
    digest.reset();
    for (int j = 0; j < 1000; ++j)
        digest.update(big + BIG_LEN/1000 * j, BIG_LEN/1000);
    TEST_EQUAL(expected_result, to_string(digest.final_value()));
    
    // 1,000,000 calls to update() with 1 byte each
    digest.reset();
    for (int j = 0; j < BIG_LEN; ++j)
        digest.update(big + j, 1);
    TEST_EQUAL(expected_result, to_string(digest.final_value()));
    
    // digest 8 GiB in 64 KiB bites
    digest.reset();
    for (int j = 0; j < 128 * 1024; ++j)
        digest.update(big, 64 * 1024);
    TEST_EQUAL("971a7246e08be6e4d11ccdf362e19b19ca358792", to_string(digest.final_value()));
}

void demonstrate_usage()
{
    sha1 digest;
    
    // call update() as many times as you need to digest the data
    digest.update("abc", 3);
    digest.update("def", 3);
    digest.update("ghijklmnopqrstuvwxyz", 20);

    // call final_value() as many times as you need to access the result
    TEST_EQUAL("32d10c7b8cf96570ca04ce37f2a19d84240d3a89", to_string(digest.final_value()));
    TEST_EQUAL(0x240d3a89, digest.final_value()[4]);
    unsigned d[5];
    const unsigned * p = &digest.final_value()[0];
    std::copy(p, p + 5, d);
    TEST_EQUAL("32d10c7b8cf96570ca04ce37f2a19d84240d3a89", to_string(d));

    // call reset() to reuse the object
    digest.reset();
    digest.update("abc", 3);
    TEST_EQUAL("a9993e364706816aba3e25717850c26c9cd0d89d", to_string(digest.final_value()));
}

int main ()
{
    test_assumptions();
    demonstrate_usage();
    test_known_values();
    test_big();
    
    std::cout
        << "total tests " << g_test_count
        << ", total failures " << g_fault_count
        << "\n";
    return g_fault_count ? EXIT_FAILURE : EXIT_SUCCESS;
}

The code is available on github here. Thank you for reading.

UPDATE: The namespace nettle_soup came to me in the car. I thought: I like how it's common or garden and a bit quirky and no one will be using nettle_soup for a namespace so it won't clash with anyone's existing code. And lo, I was wrong; there is a library of cryptographic functions, including SHA-1, called Nettle.

index of blog posts

About Me

How to Write a Program

06 October 2010

SHA-1 in Lisp and other languages

No comments:

Post a Comment