first reported by watzon on IRC:
git clone https://github.com/Leandros/PackCC
cd PackCC
zig cc -o packcc src/packcc.c
./packcc test.peg
letter <- [a-z]
upper_letter <- [A-Z]
digit <- [0-9]
hex_digit <- [a-fA-F0-9]
./packcc test.peg
zsh: illegal hardware instruction ./packcc < test.peg

zig targets "native": {
"triple": "x86_64-macosx.10.15.4...10.15.4-gnu",
"cpu": {
"arch": "x86_64",
"name": "skylake",
"features": [
"64bit",
"adx",
"aes",
"avx",
"avx2",
"bmi",
"bmi2",
"clflushopt",
"cmov",
"cx16",
"cx8",
"ermsb",
"f16c",
"false_deps_popcnt",
"fast_gather",
"fast_scalar_fsqrt",
"fast_shld_rotate",
"fast_variable_shuffle",
"fast_vector_fsqrt",
"fma",
"fsgsbase",
"fxsr",
"idivq_to_divl",
"invpcid",
"lzcnt",
"macrofusion",
"merge_to_threeway_branch",
"mmx",
"movbe",
"nopl",
"pclmul",
"popcnt",
"prfchw",
"rdrnd",
"rdseed",
"rtm",
"sahf",
"sgx",
"slow_3ops_lea",
"sse",
"sse2",
"sse3",
"sse4_1",
"sse4_2",
"ssse3",
"vzeroupper",
"x87",
"xsave",
"xsavec",
"xsaveopt",
"xsaves"
]
},
"os": "macosx",
"abi": "gnu"
}
}
zig cc -### -o packcc src/packcc.c/Users/mike/project/zig/work/main/_build/zig clang -c -MD -MV -MF zig-cache/tmp/IxXTZWm1VAPe-packcc.o.d -nostdinc -fno-spell-checking -isystem /Users/mike/project/zig/work/main/lib/include -isystem /Applications/Xcode_11.4.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include -target x86_64-unknown-macosx-gnu -Xclang -target-cpu -Xclang skylake -Xclang -target-feature -Xclang -3dnow,-3dnowa,+64bit,+adx,+aes,+avx,+avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,+bmi,+bmi2,-branchfusion,-cldemote,+clflushopt,-clwb,-clzero,+cmov,+cx16,+cx8,-enqcmd,+ermsb,+f16c,-false-deps-lzcnt-tzcnt,+false-deps-popcnt,-fast-11bytenop,-fast-15bytenop,-fast-bextr,+fast-gather,-fast-hops,-fast-lzcnt,+fast-scalar-fsqrt,-fast-scalar-shift-masks,+fast-shld-rotate,+fast-variable-shuffle,+fast-vector-fsqrt,-fast-vector-shift-masks,+fma,-fma4,+fsgsbase,+fxsr,-gfni,-idivl-to-divb,+idivq-to-divl,+invpcid,-lea-sp,-lea-uses-ag,-lwp,+lzcnt,+macrofusion,+merge-to-threeway-branch,+mmx,+movbe,-movdir64b,-movdiri,-mpx,-mwaitx,+nopl,-pad-short-functions,+pclmul,-pconfig,-pku,+popcnt,-prefer-128-bit,-prefer-256-bit,-prefer-mask-registers,-prefetchwt1,+prfchw,-ptwrite,-rdpid,+rdrnd,+rdseed,-retpoline,-retpoline-external-thunk,-retpoline-indirect-branches,-retpoline-indirect-calls,+rtm,+sahf,+sgx,-sha,-shstk,+slow-3ops-lea,-slow-incdec,-slow-lea,-slow-pmaddwd,-slow-pmulld,-slow-shld,-slow-two-mem-ops,-slow-unaligned-mem-16,-slow-unaligned-mem-32,-soft-float,+sse,-sse-unaligned-mem,+sse2,+sse3,+sse4.1,+sse4.2,-sse4a,+ssse3,-tbm,-use-aa,-use-glm-div-sqrt-costs,-vaes,-vpclmulqdq,+vzeroupper,-waitpkg,-wbnoinvd,+x87,-xop,+xsave,+xsavec,+xsaveopt,+xsaves -fno-omit-frame-pointer -fsanitize=undefined -fsanitize-trap=undefined -D_DEBUG -Og -fstack-protector-strong --param ssp-buffer-size=4 -fPIC -o zig-cache/tmp/IxXTZWm1VAPe-packcc.o src/packcc.c
ar rcs /Users/mike/Library/Application Support/zig/stage1/o/GvmuHNTPAJzWv4hHlwxAEBIeh15etRIXXmlsOcHFEOZ061si1Hhb4A02b2UnKWcO/libcompiler_rt.a /Users/mike/Library/Application Support/zig/stage1/o/GvmuHNTPAJzWv4hHlwxAEBIeh15etRIXXmlsOcHFEOZ061si1Hhb4A02b2UnKWcO/compiler_rt.o
lld -demangle -dynamic -arch x86_64 -macosx_version_min 10.15.4 -sdk_version 10.15.4 -pie -o zig-cache/o/xJW7ZQWEqb9z7llI0wHLzdlmW8QF1hAv0PAfNnSefVEjBrp42BCdVeEZDXKfEhii/packcc zig-cache/o/H6HlXLAQ3Ic2o5f8hlLVAOk1a4sTAYHY2s6FwsovThZM9Jbpmzbu7BI1KFVmn90R/packcc.o /Users/mike/Library/Application Support/zig/stage1/o/GvmuHNTPAJzWv4hHlwxAEBIeh15etRIXXmlsOcHFEOZ061si1Hhb4A02b2UnKWcO/libcompiler_rt.a -lSystem
zig targets from watzon on fedora"native": {
"triple": "x86_64-linux.5.5.10...5.5.10-gnu.2.17",
"cpu": {
"arch": "x86_64",
"name": "skylake",
"features": [
"64bit",
"adx",
"aes",
"avx",
"avx2",
"bmi",
"bmi2",
"clflushopt",
"cmov",
"cx16",
"cx8",
"ermsb",
"f16c",
"false_deps_popcnt",
"fast_gather",
"fast_scalar_fsqrt",
"fast_shld_rotate",
"fast_variable_shuffle",
"fast_vector_fsqrt",
"fma",
"fsgsbase",
"fxsr",
"idivq_to_divl",
"invpcid",
"lzcnt",
"macrofusion",
"merge_to_threeway_branch",
"mmx",
"movbe",
"nopl",
"pclmul",
"popcnt",
"prfchw",
"rdrnd",
"rdseed",
"sahf",
"sgx",
"slow_3ops_lea",
"sse",
"sse2",
"sse3",
"sse4_1",
"sse4_2",
"ssse3",
"vzeroupper",
"x87",
"xsave",
"xsavec",
"xsaveopt",
"xsaves"
]
},
"os": "linux",
"abi": "gnu"
}
I posted a workaround for this on the IRC channel. This doesn't fix the underlying issue however. This is what I posted:
(02:25:09 AM) wilsonk: watzon: it appears that zig cc is perhaps defaulting to some stricter casting checks or something. There is a 'ud2' illegal instruction being inserted into the function 'hash_string' in packcc.c when zig cc is run. This illegal instruction has two ops after it that designate the __file__ and __line__ of the issue (I can't seem to see those values in gdb though)...anyways, after hash_string iterates over 5 incoming letters of the string to hash then the hash exceeds the size of an int. This causes the 'ud2' instruction to run.
(02:27:38 AM) wilsonk: _The fix: change the 'h' variable in hash_string to a long and then cast to an int for the return (ie. int i=0; long h=0; ... return (int)h; ). That should fix things but I am not exactly sure how to fix the overall problem or if andrew would still prefer this stricter check on casting?_
(02:28:30 AM) wilsonk: watzon: then your test grammar should work and the example at the bottom of the packcc documentation also works
So the fixed code would be:
static int hash_string(const char *str) {
int i = 0;
long h = 0;
for (i = 0; str[i]; i++) {
h = h * 31 + str[i];
}
return (int)h;
}
Zig is compiling the c code with clang's undefined behavior sanitizer when built in debug mode. So when you overflow h as a signed type, an illegal instruction is executed.
Is the hash depending on wrap around semantics? If it is, then making h unsigned is the correct fix, since unsigned overflow is well defined to wrap around.
hash_string invokes undefined behavior as the h variable used to hold the hash value is signed and, as y'all already know, that's UB in C.
The ud2 is inserted by LLVM in order to catch this error together with many other. If you compile the C file with the following invocation you'll get the same ASM code and the same SIGILL error.
clang-10 -fno-omit-frame-pointer -O1 -fsanitize=undefined -fsanitize-undefined-trap-on-error -o packcc src/packcc.c
If you check out the ASM produced by zig cc you can easily see that many safety checks are inserted:
216550: 55 pushq %rbp
216551: 48 89 e5 movq %rsp, %rbp
216554: 48 85 ff testq %rdi, %rdi
# Here it checks that `str` != NULL
216557: 74 58 je 88 <hash_string+0x61>
216559: 45 31 c0 xorl %r8d, %r8d
21655c: 48 89 fa movq %rdi, %rdx
21655f: 31 c0 xorl %eax, %eax
216561: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax)
21656b: 0f 1f 44 00 00 nopl (%rax,%rax)
216570: 0f be 12 movsbl (%rdx), %edx
216573: 85 d2 testl %edx, %edx
216575: 74 3c je 60 <hash_string+0x63>
216577: 6b c0 1f imull $31, %eax, %eax
# Check for signed integer overflow (h * 31)
21657a: 70 35 jo 53 <hash_string+0x61>
21657c: 01 d0 addl %edx, %eax
# Check for signed integer overflow (prev_value + str[i])
21657e: 70 31 jo 49 <hash_string+0x61>
216580: 41 ff c0 incl %r8d
# Check for signed integer overflow (i++)
216583: 70 2c jo 44 <hash_string+0x61>
216585: 4d 63 c8 movslq %r8d, %r9
216588: 31 c9 xorl %ecx, %ecx
21658a: 4c 89 ca movq %r9, %rdx
21658d: 48 01 fa addq %rdi, %rdx
216590: 0f 92 c1 setb %cl
216593: 31 f6 xorl %esi, %esi
216595: 48 39 fa cmpq %rdi, %rdx
216598: 40 0f 93 c6 setae %sil
21659c: 45 85 c9 testl %r9d, %r9d
21659f: 0f 48 f1 cmovsl %ecx, %esi
2165a2: 48 85 ff testq %rdi, %rdi
# More overflow/wraparound checks
2165a5: 74 0a je 10 <hash_string+0x61>
2165a7: 48 85 d2 testq %rdx, %rdx
# More overflow/wraparound checks
2165aa: 74 05 je 5 <hash_string+0x61>
2165ac: 40 84 f6 testb %sil, %sil
2165af: 75 bf jne -65 <hash_string+0x20>
2165b1: 0f 0b ud2
2165b3: 5d popq %rbp
2165b4: c3 retq
2165b5: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax)
2165bf: 90 nop
Figuring out why such checks are always enabled in zig cc is left as an exercise for the reader.
Figuring out why such checks are always enabled in zig cc is left as an exercise for the reader.
I can speak to this. zig cc is intentionally an interface to the zig compiler, a bit higher level than using clang directly. With no optimization flags specified, zig cc infers debug mode. As you know from writing Zig code, debug mode has safety checks to prevent undefined behavior at runtime. This applies for C code as well, taking advantage of clang's UBSAN. The fact that debug mode is "default" is entirely intentional. I expect this to identify many bugs in existing codebases as people use zig cc out of convenience or curiosity and their code gets vetted by UBSAN for the first time.
Note that the presence of -O2,-O3 will cause zig to select release-fast, -Os will cause zig to select release-small, and optimization flags plus -fsanitize=undefined will cause zig to select release-safe.
Most helpful comment
I can speak to this.
zig ccis intentionally an interface to the zig compiler, a bit higher level than using clang directly. With no optimization flags specified,zig ccinfers debug mode. As you know from writing Zig code, debug mode has safety checks to prevent undefined behavior at runtime. This applies for C code as well, taking advantage of clang's UBSAN. The fact that debug mode is "default" is entirely intentional. I expect this to identify many bugs in existing codebases as people usezig ccout of convenience or curiosity and their code gets vetted by UBSAN for the first time.Note that the presence of
-O2,-O3will cause zig to select release-fast,-Oswill cause zig to select release-small, and optimization flags plus-fsanitize=undefinedwill cause zig to select release-safe.