本文尝试从汇编的角度给出有符号整数比较与无符号整数比较的区别所在。 在《深入理解计算机系统》(英文版第二版)一书中的Page#77,有下面一个练习题:
将上述示例代码写入foo1.c文件,运行并分析bug产生的代码行。
1. foo1.c
1 #include <stdio.h> 2 3 float sum_elements(float a[], unsigned length) 4 { 5 int i; 6 float result = 0; 7 for (i = 0; i <= length-1; i++) 8 result += a[i]; 9 return result; 10 } 11 12 int main(int argc, char *argv[]) 13 { 14 float a[] = {1.0, 2.0, 3.0}; 15 float m = sum_elements(a, 0); 16 printf("%.1f\n", m); 17 return 0; 18 }
编译并运行,发现存在着非法内存访问,
$ ulimit -c unlimited$ gcc -g -Wall -std=c99 -o foo1 foo1.c $ ./foo1 Segmentation fault (core dumped)
用gdb查看一下core文件,
$ gdb foo1 core GNU gdb (Ubuntu 7.7.1-0ubuntu5~14.04.2) 7.7.1 ...<snip>.................................... Reading symbols from foo1...done. [New LWP 3403] Core was generated by `./foo1‘. Program terminated with signal SIGSEGV, Segmentation fault. #0 0x08048446 in sum_elements (a=0xbfdd50a4, length=0) at foo1.c:8 8 result += a[i]; (gdb) bt #0 0x08048446 in sum_elements (a=0xbfdd50a4, length=0) at foo1.c:8 #1 0x080484a1 in main (argc=1, argv=0xbfdd5154) at foo1.c:15 (gdb) l 6,8 6 float result = 0; 7 for (i = 0; i <= length-1; i++) 8 result += a[i]; (gdb)
我们可以看出,core的位置在第8行,但有bug的代码则是第7行。 (第6行不可能有bug) 注意length是一个无符号整数,而i则是一个有符号整数,我们期望的结果是,当length等于0的时候,length-1为-1,其实则不然。于是实际运行的时候,i <= length-1的条件满足,代码运行到第8行,当i>=3的时候,必然出现非法的内存访问错误。 从C语言编程的角度,修复这一行很简单,有两种方法:
- for (i = 0; i < length; i++)
- for (i = 0; i <= (int)length - 1; i++)
但这还不足以说明问题的本质。下面使用第二种修复方法给出foo2.c,然后通过反汇编比较foo1.c和foo2.c,从而给出有符号整数比较与无符号整数比较的区别所在。
2. foo2.c
1 #include <stdio.h> 2 3 float sum_elements(float a[], unsigned length) 4 { 5 int i; 6 float result = 0; 7 for (i = 0; i <= (int)length-1; i++) 8 result += a[i]; 9 return result; 10 } 11 12 int main(int argc, char *argv[]) 13 { 14 float a[] = {1.0, 2.0, 3.0}; 15 float m = sum_elements(a, 0); 16 printf("%.1f\n", m); 17 return 0; 18 }
编译并运行
$ rm -f core $ ulimit -c unlimited $ gcc -g -Wall -std=c99 -o foo2 foo2.c $ ./foo2 0.0
将foo1里的函数sum_elements反汇编存入foo1.gdb.out,
1 (gdb) disas /m sum_elements 2 Dump of assembler code for function sum_elements: 3 4 { 4 0x0804841d <+0>: push ebp 5 0x0804841e <+1>: mov ebp,esp 6 0x08048420 <+3>: sub esp,0x18 7 8 5 int i; 9 6 float result = 0; 10 0x08048423 <+6>: mov eax,ds:0x8048558 11 0x08048428 <+11>: mov DWORD PTR [ebp-0x4],eax 12 13 7 for (i = 0; i <= length-1; i++) 14 0x0804842b <+14>: mov DWORD PTR [ebp-0x8],0x0 15 0x08048432 <+21>: jmp 0x8048451 <sum_elements+52> 16 0x0804844d <+48>: add DWORD PTR [ebp-0x8],0x1 17 0x08048451 <+52>: mov eax,DWORD PTR [ebp-0x8] 18 0x08048454 <+55>: mov edx,DWORD PTR [ebp+0xc] 19 0x08048457 <+58>: sub edx,0x1 20 0x0804845a <+61>: cmp eax,edx 21 0x0804845c <+63>: jbe 0x8048434 <sum_elements+23> 22 23 8 result += a[i]; 24 0x08048434 <+23>: fld DWORD PTR [ebp-0x4] 25 0x08048437 <+26>: mov eax,DWORD PTR [ebp-0x8] 26 0x0804843a <+29>: lea edx,[eax*4+0x0] 27 0x08048441 <+36>: mov eax,DWORD PTR [ebp+0x8] 28 0x08048444 <+39>: add eax,edx 29 0x08048446 <+41>: fld DWORD PTR [eax] 30 0x08048448 <+43>: faddp st(1),st 31 0x0804844a <+45>: fstp DWORD PTR [ebp-0x4] 32 33 9 return result; 34 0x0804845e <+65>: mov eax,DWORD PTR [ebp-0x4] 35 0x08048461 <+68>: mov DWORD PTR [ebp-0x18],eax 36 0x08048464 <+71>: fld DWORD PTR [ebp-0x18] 37 38 10 } 39 0x08048467 <+74>: leave 40 0x08048468 <+75>: ret 41 42 End of assembler dump.
将foo2里的函数sum_elements反汇编存入foo2.gdb.out,
1 (gdb) disas /m sum_elements 2 Dump of assembler code for function sum_elements: 3 4 { 4 0x0804841d <+0>: push ebp 5 0x0804841e <+1>: mov ebp,esp 6 0x08048420 <+3>: sub esp,0x18 7 8 5 int i; 9 6 float result = 0; 10 0x08048423 <+6>: mov eax,ds:0x8048558 11 0x08048428 <+11>: mov DWORD PTR [ebp-0x4],eax 12 13 7 for (i = 0; i <= (int)length-1; i++) 14 0x0804842b <+14>: mov DWORD PTR [ebp-0x8],0x0 15 0x08048432 <+21>: jmp 0x8048451 <sum_elements+52> 16 0x0804844d <+48>: add DWORD PTR [ebp-0x8],0x1 17 0x08048451 <+52>: mov eax,DWORD PTR [ebp+0xc] 18 0x08048454 <+55>: sub eax,0x1 19 0x08048457 <+58>: cmp eax,DWORD PTR [ebp-0x8] 20 0x0804845a <+61>: jge 0x8048434 <sum_elements+23> 21 22 8 result += a[i]; 23 0x08048434 <+23>: fld DWORD PTR [ebp-0x4] 24 0x08048437 <+26>: mov eax,DWORD PTR [ebp-0x8] 25 0x0804843a <+29>: lea edx,[eax*4+0x0] 26 0x08048441 <+36>: mov eax,DWORD PTR [ebp+0x8] 27 0x08048444 <+39>: add eax,edx 28 0x08048446 <+41>: fld DWORD PTR [eax] 29 0x08048448 <+43>: faddp st(1),st 30 0x0804844a <+45>: fstp DWORD PTR [ebp-0x4] 31 32 9 return result; 33 0x0804845c <+63>: mov eax,DWORD PTR [ebp-0x4] 34 0x0804845f <+66>: mov DWORD PTR [ebp-0x18],eax 35 0x08048462 <+69>: fld DWORD PTR [ebp-0x18] 36 37 10 } 38 0x08048465 <+72>: leave 39 0x08048466 <+73>: ret 40 41 End of assembler dump.
使用meld对比如下,
时间: 2024-10-11 18:18:27