Skip to content

Commit 1fe4ce4

Browse files
committed
added hash
1 parent b1b8bad commit 1fe4ce4

6 files changed

Lines changed: 198 additions & 1 deletion

File tree

CHANGELOG.txt

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,66 @@
11
CHANGELOG
22
=========
33

4+
2026-05-14 Synthesis fingerprint (--hash)
5+
- New --hash flag prints a 64-bit deterministic digest of
6+
the synthesised netlist to stdout, formatted as
7+
"hash: %016x", so that two runs of takahe on the same
8+
input source produce byte-identical digests and any
9+
divergence between runs surfaces as an immediate hash
10+
mismatch rather than as silent non-determinism that only
11+
becomes visible when somebody tries to reproduce a build
12+
months later.
13+
- The underlying algorithm is FNV-1a 64-bit, chosen because
14+
it fits in twelve lines, has zero external dependencies,
15+
and is more than sufficient for the question being asked,
16+
which is "did these two synthesis runs produce the same
17+
netlist" rather than "can an adversary forge a collision";
18+
a future bump to SHA-256 is a drop-in replacement should
19+
a stronger fingerprint ever become necessary.
20+
- The canonical byte stream consumed by the hash covers
21+
every net's width, radix, port role, and driver index,
22+
followed by every cell's type, width, output net, input
23+
arity, input net indices, and parameter value, walked in
24+
array order with index 0 reserved as the pool sentinel as
25+
is the convention everywhere else in the codebase.
26+
- Net names and cell names are deliberately excluded from
27+
the canonical form because a wire renamed from q to q_r
28+
is the same hardware and the fingerprint should agree;
29+
generation counters and other runtime bookkeeping are
30+
likewise excluded for the same reason.
31+
- Slots vacated by deletion passes (cells whose type has
32+
been overwritten with the RT_CELL_COUNT sentinel) are
33+
folded into the hash as a single zero byte so that
34+
deletions remain distinct from rewrites and so that
35+
reordering passes never accidentally collide.
36+
- The canonical form carries a version byte (HS_VER, set to
37+
1 in this release) which must be bumped any time the set
38+
of fields included in the hash or the order in which they
39+
are emitted changes, on the principle that saved digests
40+
become meaningfully invalid the moment the form changes
41+
and the version byte ensures the divergence shows up at
42+
the very first byte rather than as a confusing mid-stream
43+
collision much later.
44+
- Smoke verification on tests/smoke.sv produces the digest
45+
5e2f63ed79b19097 reproducibly across consecutive runs,
46+
tests/bigger.sv produces 8c4d1dacdbf02ee0 reproducibly
47+
both before and after optimisation since op_opt finds no
48+
work on that particular design, and designs/voyager_fds.sv
49+
correctly diverges from 40a6def1e6c5fc0e pre-optimisation
50+
to fadd81c343371411 after the optimiser applies its 24
51+
transformations, confirming that the hash both holds
52+
steady when the netlist holds steady and changes when the
53+
netlist changes.
54+
- Implementation lives in src/emit/tk_hash.c at roughly 125
55+
lines, uses the hs_ file prefix consistent with the rest
56+
of the emit directory, exposes mp_hash as the public
57+
entry point in include/takahe.h alongside mp_blif and
58+
mp_yosys, and compiles cleanly under the full project
59+
warning gauntlet of -Wall -Wextra -Werror -pedantic plus
60+
twelve additional diagnostics.
61+
- All 76 existing tests continue to pass with no regression
62+
introduced by the new flag or its supporting code.
63+
464
2026-03-28 ABEL-HDL frontend (third language)
565
- Lexer and parser for ABEL-HDL (Data I/O / Synario, 1995)
666
- Definition file: 37 keywords, 36 operators (abel_tok.def)

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ SRCS = src/main.c \
6060
src/map/tk_fpga.c \
6161
src/emit/tk_blif.c \
6262
src/emit/tk_yosys.c \
63-
src/emit/tk_vlog.c
63+
src/emit/tk_vlog.c \
64+
src/emit/tk_hash.c
6465

6566
OBJS = $(SRCS:.c=.o)
6667
TARGET = takahe

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ takahe [flags] <source.sv|.vhd>
166166
--parse dump AST + RTL
167167
--opt optimise (cprop + pattern match + DCE)
168168
--equiv equivalence check (pre-opt vs post-opt)
169+
--hash print 64-bit fingerprint of synthesised netlist
169170
--blif <f> emit BLIF netlist
170171
--yosys <f> emit Yosys JSON netlist
171172
--lib <f> Liberty .lib cell library

include/takahe.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,7 @@ int op_dce (rt_mod_t *M);
599599
/* Technology mapping / export */
600600
int mp_blif (const rt_mod_t *M, FILE *fp);
601601
int mp_yosys(const rt_mod_t *M, FILE *fp);
602+
uint64_t mp_hash (const rt_mod_t *M);
602603

603604
/* ---- Exact Timing Arithmetic ----
604605
* Femtoseconds and attofarads. Integer. No floats.

src/emit/tk_hash.c

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/* Copyright (c) 2026 Zane Hambly
2+
* This Source Code Form is subject to the terms of the Mozilla Public
3+
* License, v. 2.0. If a copy of the MPL was not distributed with this
4+
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
5+
6+
/*
7+
* tk_hash.c -- Synthesis fingerprint for Takahe RTL
8+
*
9+
* Hash a netlist deterministically. Two runs on the same
10+
* input produce the same digest. When they don't, something
11+
* has crept in that you didn't intend: a drifted pass
12+
* iteration order, an uninitialised byte, a clock cycle
13+
* the gremlins won at darts.
14+
*
15+
* FNV-1a, 64 bits. Not a cryptographic hash. It won't
16+
* survive an adversary but it will catch the day your
17+
* pipeline turned non-deterministic, which is the one
18+
* thing anyone actually wants to know about a synthesis
19+
* tool. IBM used CRCs to detect tape errors for forty
20+
* years before anyone needed SHA-anything. The principle
21+
* holds.
22+
*
23+
* Canonical form: bump HS_VER any time you change what
24+
* goes into the hash or in what order. Saved digests
25+
* become meaningfully invalid the moment the form changes,
26+
* like postage stamps after a regime change.
27+
*/
28+
29+
#include "takahe.h"
30+
31+
#define HS_VER 1u
32+
#define HS_OFFSET 0xcbf29ce484222325ULL
33+
#define HS_PRIME 0x100000001b3ULL
34+
35+
/* ---- FNV-1a primitives ----
36+
* Twelve lines, no dependencies, faster than fetching
37+
* SHA-256 off the internet. A measurable engineering
38+
* virtue. */
39+
40+
static uint64_t
41+
hs_byte(uint64_t h, uint8_t b)
42+
{
43+
h ^= (uint64_t)b;
44+
h *= HS_PRIME;
45+
return h;
46+
}
47+
48+
static uint64_t
49+
hs_u32(uint64_t h, uint32_t v)
50+
{
51+
h = hs_byte(h, (uint8_t)( v & 0xffu));
52+
h = hs_byte(h, (uint8_t)((v >> 8) & 0xffu));
53+
h = hs_byte(h, (uint8_t)((v >> 16) & 0xffu));
54+
h = hs_byte(h, (uint8_t)((v >> 24) & 0xffu));
55+
return h;
56+
}
57+
58+
static uint64_t
59+
hs_u64(uint64_t h, uint64_t v)
60+
{
61+
h = hs_u32(h, (uint32_t)( v & 0xffffffffULL));
62+
h = hs_u32(h, (uint32_t)( v >> 32));
63+
return h;
64+
}
65+
66+
/* ---- Public: hash a module ----
67+
* Walks nets then cells in array order. The walk order
68+
* is part of the canonical form: anyone changing pool
69+
* allocation strategy must bump HS_VER. Cell and net
70+
* indices are deterministic per run on the same input,
71+
* which is all a fingerprint needs to be useful.
72+
*
73+
* Names are NOT hashed. A wire renamed from q to q_r is
74+
* the same hardware. The hash agrees. */
75+
76+
uint64_t
77+
mp_hash(const rt_mod_t *M)
78+
{
79+
uint64_t h = HS_OFFSET;
80+
uint32_t i;
81+
uint8_t j;
82+
83+
if (!M) return 0;
84+
85+
/* Magic + version. Later versions diverge from byte one,
86+
* so a v1 digest will never collide with a v2 digest by
87+
* accident. Belt, braces, and a length of fencing wire. */
88+
h = hs_byte(h, (uint8_t)'T');
89+
h = hs_byte(h, (uint8_t)'K');
90+
h = hs_byte(h, (uint8_t)'H');
91+
h = hs_byte(h, (uint8_t)'1');
92+
h = hs_u32(h, HS_VER);
93+
94+
/* Nets. Index 0 is the sentinel; every pool reserves it
95+
* for "nobody's home". Skip it deliberately. */
96+
h = hs_u32(h, M->n_net);
97+
for (i = 1; i < M->n_net; i++) {
98+
const rt_net_t *n = &M->nets[i];
99+
h = hs_u32 (h, n->width);
100+
h = hs_byte(h, n->radix);
101+
h = hs_byte(h, n->is_port);
102+
h = hs_u32 (h, n->driver);
103+
}
104+
105+
/* Cells. RT_CELL_COUNT marks a slot vacated by some
106+
* pass — record the gap as a single zero byte so a
107+
* deletion doesn't collide with an unrelated rewrite. */
108+
h = hs_u32(h, M->n_cell);
109+
for (i = 1; i < M->n_cell; i++) {
110+
const rt_cell_t *c = &M->cells[i];
111+
if (c->type == RT_CELL_COUNT) {
112+
h = hs_byte(h, 0);
113+
continue;
114+
}
115+
h = hs_byte(h, (uint8_t)c->type);
116+
h = hs_u32 (h, c->width);
117+
h = hs_u32 (h, c->out);
118+
h = hs_byte(h, c->n_in);
119+
for (j = 0; j < c->n_in; j++)
120+
h = hs_u32(h, c->ins[j]);
121+
h = hs_u64(h, (uint64_t)c->param);
122+
}
123+
124+
return h;
125+
}

src/main.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ usage(const char *prog)
3232
printf(" --parse dump AST + RTL IR\n");
3333
printf(" --opt optimise (constant propagation + DCE)\n");
3434
printf(" --equiv equivalence check (pre-opt vs post-opt)\n");
35+
printf(" --hash print 64-bit fingerprint of synthesised netlist\n");
3536
printf(" --tmr radiation hardening (triplicate DFFs + voters)\n");
3637
printf(" --tmr-full radiation hardening (triplicate everything)\n");
3738
printf(" --fpga <f> emit nextpnr JSON for iCE40 FPGA\n");
@@ -66,6 +67,7 @@ main(int argc, char **argv)
6667
int mode_vhdl = 0;
6768
int mode_abel = 0;
6869
int mode_equiv = 0;
70+
int mode_hash = 0;
6971
const char *fpga_path = NULL;
7072
int mode_tmr = 0;
7173
int tmr_full = 0;
@@ -126,6 +128,9 @@ main(int argc, char **argv)
126128
mode_equiv = 1;
127129
mode_opt = 1;
128130
mode_parse = 1;
131+
} else if (strcmp(argv[i], "--hash") == 0) {
132+
mode_hash = 1;
133+
mode_parse = 1;
129134
} else if (strcmp(argv[i], "--vhdl") == 0) {
130135
mode_vhdl = 1;
131136
def_path = "defs/vhdl_tok.def";
@@ -503,6 +508,10 @@ main(int argc, char **argv)
503508
fpga_path);
504509
}
505510
}
511+
if (mode_hash) {
512+
printf("hash: %016" PRIx64 "\n",
513+
mp_hash(rtl));
514+
}
506515
if (map_path) {
507516
if (!lib_path) {
508517
fprintf(stderr,

0 commit comments

Comments
 (0)