Just to make some use of my cross compiler, here’s how the assembler for x86_64 stacks up for the restrict example which I posted before.
Without restrict:
fibincr2:
movl (%rdi), %eax # eax = *a A
addl (%rsi), %eax # eax += *b A + B
movl %eax, (%rdi) # *a = eax A + B
movl (%rsi), %edx # edx = *b B
subl %eax, %edx # edx -= eax -A
movl %edx, (%rsi) # *b = edx -A
movl (%rdi), %eax # eax = *a A + B
subl %edx, %eax # eax -= edx 2A + B
movl %eax, (%rdi) # *a = eax 2A + B
addl %eax, (%rsi) # *b += eax A + B
ret
6 mov, 2 sub (2 register only), 2 add
With restrict:
fibincr2:
movl (%rdi), %eax # eax = *a A
movl %eax, %edx # edx = eax A
addl (%rsi), %edx # edx += *b A + B
negl %eax # eax = -eax -A
movl %eax, (%rsi) # *b = eax -A
subl %eax, %edx # edx -= eax 2A + B
addl %edx, (%rsi) # *b += edx A + B
movl %edx, (%rdi) # *a = edx 2A + B
ret
4 mov (1 register only), 1 sub (1 register only), 2 add, 1 neg (1 register only)
Alternate algorithm:
fibincr2:
movl (%rdi), %edx
leal (%rdx,%rdx), %eax
addl (%rsi), %eax
addl %edx, (%rsi)
movl %eax, (%rdi)
ret
2 mov, 2 add, 1 lea.
So the instruction counts stay exactly the same for the algorithms but as the x86_64 calling convention uses registers to pass variables we have no additional overhead for stack accessing instructions.