我们来考察下pthread中锁的实现。
首先看下初始化宏:PTHREAD_MUTEX_INITIALIZER。
# define PTHREAD_MUTEX_INITIALIZER { { 0, 0, 0, 0, 0, __PTHREAD_SPINS, { 0, 0 } } }
/* Data structures for mutex handling. The structure of the attribute type is not exposed on purpose. *//*删减了32位的代码*/ typedef union { struct __pthread_mutex_s { int __lock; unsigned int __count; int __owner; #ifdef __x86_64__ unsigned int __nusers; #endif /* KIND must stay at this position in the structure to maintain binary compatibility with static initializers. */ int __kind; #ifdef __x86_64__ short __spins; short __elision; __pthread_list_t __list; # define __PTHREAD_MUTEX_HAVE_PREV 1 /* Mutex __spins initializer used by PTHREAD_MUTEX_INITIALIZER. */ # define __PTHREAD_SPINS 0, 0 #else #endif } __data; char __size[__SIZEOF_PTHREAD_MUTEX_T]; long int __align; } pthread_mutex_t;
注意PTHREAD_MUTEX_INITIALIZER 是8个成员的结构体,与pthread_mutex_t定义相符。并且所有成为初始化为0。 初始化之后,我们接着看看pthread_mutex_lock操作:
1 #ifndef __pthread_mutex_lock 2 strong_alias (__pthread_mutex_lock, pthread_mutex_lock) 3 hidden_def (__pthread_mutex_lock) 4 #endif 5 6 int 7 __pthread_mutex_lock (pthread_mutex_t *mutex) 8 { 9 assert (sizeof (mutex->__size) >= sizeof (mutex->__data)); 10 11 unsigned int type = PTHREAD_MUTEX_TYPE_ELISION (mutex); 12 13 LIBC_PROBE (mutex_entry, 1, mutex); 14 15 if (__builtin_expect (type & ~(PTHREAD_MUTEX_KIND_MASK_NP 16 | PTHREAD_MUTEX_ELISION_FLAGS_NP), 0)) 17 return __pthread_mutex_lock_full (mutex); 18 19 if (__glibc_likely (type == PTHREAD_MUTEX_TIMED_NP)) 20 { 21 FORCE_ELISION (mutex, goto elision); 22 simple: 23 /* Normal mutex. */ 24 LLL_MUTEX_LOCK (mutex); 25 assert (mutex->__data.__owner == 0); 26 } 27 #ifdef HAVE_ELISION 28 else if (__glibc_likely (type == PTHREAD_MUTEX_TIMED_ELISION_NP)) 29 { 30 elision: __attribute__((unused)) 31 /* This case can never happen on a system without elision, 32 as the mutex type initialization functions will not 33 allow to set the elision flags. */ 34 /* Don‘t record owner or users for elision case. This is a 35 tail call. */ 36 return LLL_MUTEX_LOCK_ELISION (mutex); 37 } 38 #endif 39 else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex) 40 == PTHREAD_MUTEX_RECURSIVE_NP, 1)) 41 { 42 /* Recursive mutex. */ 43 pid_t id = THREAD_GETMEM (THREAD_SELF, tid); 44 45 /* Check whether we already hold the mutex. */ 46 if (mutex->__data.__owner == id) 47 { 48 /* Just bump the counter. */ 49 if (__glibc_unlikely (mutex->__data.__count + 1 == 0)) 50 /* Overflow of the counter. */ 51 return EAGAIN; 52 53 ++mutex->__data.__count; 54 55 return 0; 56 } 57 58 /* We have to get the mutex. */ 59 LLL_MUTEX_LOCK (mutex); 60 61 assert (mutex->__data.__owner == 0); 62 mutex->__data.__count = 1; 63 } 64 else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex) 65 == PTHREAD_MUTEX_ADAPTIVE_NP, 1)) 66 { 67 if (! __is_smp) 68 goto simple; 69 70 if (LLL_MUTEX_TRYLOCK (mutex) != 0) 71 { 72 int cnt = 0; 73 int max_cnt = MIN (MAX_ADAPTIVE_COUNT, 74 mutex->__data.__spins * 2 + 10); 75 do 76 { 77 if (cnt++ >= max_cnt) 78 { 79 LLL_MUTEX_LOCK (mutex); 80 break; 81 } 82 atomic_spin_nop (); 83 } 84 while (LLL_MUTEX_TRYLOCK (mutex) != 0); 85 86 mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8; 87 } 88 assert (mutex->__data.__owner == 0); 89 } 90 else 91 { 92 pid_t id = THREAD_GETMEM (THREAD_SELF, tid); 93 assert (PTHREAD_MUTEX_TYPE (mutex) == PTHREAD_MUTEX_ERRORCHECK_NP); 94 /* Check whether we already hold the mutex. */ 95 if (__glibc_unlikely (mutex->__data.__owner == id)) 96 return EDEADLK; 97 goto simple; 98 } 99 100 pid_t id = THREAD_GETMEM (THREAD_SELF, tid); 101 102 /* Record the ownership. */ 103 mutex->__data.__owner = id; 104 #ifndef NO_INCR 105 ++mutex->__data.__nusers; 106 #endif 107 108 LIBC_PROBE (mutex_acquired, 1, mutex); 109 110 return 0; 111 }
首先看下第一句
assert (sizeof (mutex->__size) >= sizeof (mutex->__data));这句的意思是成员_size和_data所占内存相同,我们来验证下。
char __size[__SIZEOF_PTHREAD_MUTEX_T]的字节数:40.
1 #ifdef __x86_64__ 2 # if __WORDSIZE == 64 3 # define __SIZEOF_PTHREAD_ATTR_T 56 4 # define __SIZEOF_PTHREAD_MUTEX_T 40
另一方面_data中的字节数是int、short、unsigned、__pthread_list_t这些个加起来,刚好为40字节.
所以这个union在64位计算机上最大的空间为40个字节。
接着是:
1 unsigned int type = PTHREAD_MUTEX_TYPE_ELISION (mutex);
1 #define PTHREAD_MUTEX_TYPE_ELISION(m) 2 ((m)->__data.__kind & (127|PTHREAD_MUTEX_ELISION_NP))
因为__kind为0,所以这里的type显然为0;
1 if (__builtin_expect (type & ~(PTHREAD_MUTEX_KIND_MASK_NP 2 | PTHREAD_MUTEX_ELISION_FLAGS_NP), 0)) 3 return __pthread_mutex_lock_full (mutex);
这里的结果为0,所以显然不走这个分支。
PTHREAD_MUTEX_TIMED_NP值为0,所以我们的代码显然是进入如下第一行的分支。
根据注释/* Normal mutex. */,很可能是通过这里得到锁。我们继续探索下,
/* Mutex types. */ enum { PTHREAD_MUTEX_TIMED_NP, PTHREAD_MUTEX_RECURSIVE_NP, PTHREAD_MUTEX_ERRORCHECK_NP, PTHREAD_MUTEX_ADAPTIVE_NP #if defined __USE_UNIX98 || defined __USE_XOPEN2K8
1 if (__glibc_likely (type == PTHREAD_MUTEX_TIMED_NP)) 2 { 3 FORCE_ELISION (mutex, goto elision); 4 simple: 5 /* Normal mutex. */ 6 LLL_MUTEX_LOCK (mutex); 7 assert (mutex->__data.__owner == 0); 8 } 9 #ifdef HAVE_ELISION 10 else if (__glibc_likely (type == PTHREAD_MUTEX_TIMED_ELISION_NP)) 11 { 12 elision: __attribute__((unused)) 13 /* This case can never happen on a system without elision, 14 as the mutex type initialization functions will not 15 allow to set the elision flags. */ 16 /* Don‘t record owner or users for elision case. This is a 17 tail call. */ 18 return LLL_MUTEX_LOCK_ELISION (mutex); 19 } 20 #endif 21 else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex) 22 == PTHREAD_MUTEX_RECURSIVE_NP, 1)) 23 { 24 /* Recursive mutex. */ 25 pid_t id = THREAD_GETMEM (THREAD_SELF, tid); 26 27 /* Check whether we already hold the mutex. */ 28 if (mutex->__data.__owner == id) 29 { 30 /* Just bump the counter. */ 31 if (__glibc_unlikely (mutex->__data.__count + 1 == 0)) 32 /* Overflow of the counter. */ 33 return EAGAIN; 34 35 ++mutex->__data.__count; 36 37 return 0; 38 } 39 40 /* We have to get the mutex. */ 41 LLL_MUTEX_LOCK (mutex); 42 43 assert (mutex->__data.__owner == 0); 44 mutex->__data.__count = 1; 45 } 46 else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex) 47 == PTHREAD_MUTEX_ADAPTIVE_NP, 1)) 48 { 49 if (! __is_smp) 50 goto simple; 51 52 if (LLL_MUTEX_TRYLOCK (mutex) != 0) 53 { 54 int cnt = 0; 55 int max_cnt = MIN (MAX_ADAPTIVE_COUNT, 56 mutex->__data.__spins * 2 + 10); 57 do 58 { 59 if (cnt++ >= max_cnt) 60 { 61 LLL_MUTEX_LOCK (mutex); 62 break; 63 } 64 atomic_spin_nop (); 65 } 66 while (LLL_MUTEX_TRYLOCK (mutex) != 0); 67 68 mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8; 69 } 70 assert (mutex->__data.__owner == 0); 71 } 72 else 73 { 74 pid_t id = THREAD_GETMEM (THREAD_SELF, tid); 75 assert (PTHREAD_MUTEX_TYPE (mutex) == PTHREAD_MUTEX_ERRORCHECK_NP); 76 /* Check whether we already hold the mutex. */ 77 if (__glibc_unlikely (mutex->__data.__owner == id)) 78 return EDEADLK; 79 goto simple; 80 }
这里的意思:将_data中的__lock作为参数填入lll_lock,注意,这里是宏定义。
#ifndef LLL_MUTEX_LOCK # define LLL_MUTEX_LOCK(mutex) lll_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
1 #if LLL_PRIVATE == 0 && LLL_SHARED == 128 2 # define PTHREAD_MUTEX_PSHARED(m) 3 ((m)->__data.__kind & 128) 4 #else
这里的PTHREAD_MUTEX_PSHARED将__kind字段和128做&操作,推测是第8个标志位用来标识该锁是否共享。
既然如此, 我们这里两者填入的都是0,但是第一个__lock在后续使用中有取地址的可能。
我们接着看看lll_lock:
1 #define lll_lock(futex, private) 2 __lll_lock (&(futex), private)
取了地址, 那么这里就是原mutex中__lock字段的地址和数值0.
1 #define __lll_lock(futex, private) 2 ((void) 3 ({ 4 int *__futex = (futex); 5 if (__glibc_unlikely 6 (atomic_compare_and_exchange_bool_acq (__futex, 1, 0))) 7 { 8 if (__builtin_constant_p (private) && (private) == LLL_PRIVATE) 9 __lll_lock_wait_private (__futex); 10 else 11 __lll_lock_wait (__futex, private); 12 } 13 }))
根据值, 走__lll_lock_wait:
1 /* Note that we need no lock prefix. */ 2 #define atomic_exchange_acq(mem, newvalue) 3 ({ __typeof (*mem) result; 4 if (sizeof (*mem) == 1) 5 __asm __volatile ("xchgb %b0, %1" 6 : "=q" (result), "=m" (*mem) 7 : "0" (newvalue), "m" (*mem)); 8 else if (sizeof (*mem) == 2) 9 __asm __volatile ("xchgw %w0, %1" 10 : "=r" (result), "=m" (*mem) 11 : "0" (newvalue), "m" (*mem)); 12 else if (sizeof (*mem) == 4) 13 __asm __volatile ("xchgl %0, %1" 14 : "=r" (result), "=m" (*mem) 15 : "0" (newvalue), "m" (*mem)); 16 else 17 __asm __volatile ("xchgq %q0, %1" 18 : "=r" (result), "=m" (*mem) 19 : "0" ((atomic64_t) cast_to_integer (newvalue)), 20 "m" (*mem)); 21 result; })
1 /* This function doesn‘t get included in libc. */ 2 #if IS_IN (libpthread) 3 void 4 __lll_lock_wait (int *futex, int private) 5 { 6 if (*futex == 2) 7 lll_futex_wait (futex, 2, private); /* Wait if *futex == 2. */ 8 9 while (atomic_exchange_acq (futex, 2) != 0) 10 lll_futex_wait (futex, 2, private); /* Wait if *futex == 2. */ 11 } 12 #endif
所以到了关键的地方, 这里是将futex(&__lock)的值从0原子变为2就成功。否则调用lll_futex_wait,阻塞。这里的atomic_exchange_acq是一个返回旧值的原子操作,直接采用了内敛汇编(xchg)的方式,并且根据变量类型从而选取linux下不同的汇编指令。
到了这里,只要这个原子xchg的是正确的,并且阻塞与唤醒(wake up)之间的协议是正确的,那么这个mutex的语义就得到保证了。
我们接着看看lll_futex_wait是怎么样的(val = 2, private = 0):
1 /* Wait while *FUTEXP == VAL for an lll_futex_wake call on FUTEXP. */ 2 #define lll_futex_wait(futexp, val, private) 3 lll_futex_timed_wait (futexp, val, NULL, private)
参数多了个NULL(val = 2, timeout = NULL, private = 0),
1 #define lll_futex_timed_wait(futexp, val, timeout, private) 2 lll_futex_syscall (4, futexp, 3 __lll_private_flag (FUTEX_WAIT, private), 4 val, timeout)
展开__lll_private_flag
1 # else 2 # define __lll_private_flag(fl, private) 3 ((fl) | THREAD_GETMEM (THREAD_SELF, header.private_futex)) 4 # endif
1 # define THREAD_SELF 2 ({ struct pthread *__self; 3 asm ("mov %%fs:%c1,%0" : "=r" (__self) 4 : "i" (offsetof (struct pthread, header.self))); 5 __self;})
这里是从struct pthread中取得private_futex来计算的,值为0。这里实际上只保留了FUTEX_WAIT的值,同样为0.
1 #define FUTEX_WAIT 0 2 #define FUTEX_WAKE 1 3 #define FUTEX_REQUEUE 3 4 #define FUTEX_CMP_REQUEUE 4 5 #define FUTEX_WAKE_OP 5 6 #define FUTEX_OP_CLEAR_WAKE_IF_GT_ONE ((4 << 24) | 1) 7 #define FUTEX_LOCK_PI 6 8 #define FUTEX_UNLOCK_PI 7 9 #define FUTEX_TRYLOCK_PI 8 10 #define FUTEX_WAIT_BITSET 9 11 #define FUTEX_WAKE_BITSET 10 12 #define FUTEX_WAIT_REQUEUE_PI 11 13 #define FUTEX_CMP_REQUEUE_PI 12 14 #define FUTEX_PRIVATE_FLAG 128 15 #define FUTEX_CLOCK_REALTIME 256 16 17 #define FUTEX_BITSET_MATCH_ANY 0xffffffff
所以这里的lll_futex_syscall调用简化为:
lll_futex_syscall (4, futexp, 0, 2, NULL)
我们接着看:
#define lll_futex_syscall(nargs, futexp, op, ...) \ ({ INTERNAL_SYSCALL_DECL (__err); long int __ret = INTERNAL_SYSCALL (futex, __err, nargs, futexp, op, __VA_ARGS__); (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (__ret, __err)) ? -INTERNAL_SYSCALL_ERRNO (__ret, __err) : 0); })
这里的futex作为字符串字面量后续使用,__VA_ARGS__指代了2和NULL。
我们看一下INTERNAL_SYSCALL:
# define INTERNAL_SYSCALL(name, err, nr, args...) INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args) # define INTERNAL_SYSCALL_NCS(name, err, nr, args...) ({ unsigned long int resultvar; LOAD_ARGS_##nr (args) LOAD_REGS_##nr asm volatile ( "syscall\n\t" : "=a" (resultvar) : "0" (name) ASM_ARGS_##nr : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); (long int) resultvar; })
那么这里的INTERNAL_SYSCALL_NCS调用, 参数为( __NR_futex,err,4, futexp,0, 2, NULL)。第四个参数开始为futexp,0, 2, NULL。
# define LOAD_ARGS_4(a1, a2, a3, a4) LOAD_ARGS_TYPES_4 (long int, a1, long int, a2, long int, a3, long int, a4) # define LOAD_REGS_4 LOAD_REGS_TYPES_4 (long int, a1, long int, a2, long int, a3, long int, a4) # define ASM_ARGS_4 ASM_ARGS_3, "r" (_a4)
将LOAD_ARGS_##nr (args)、LOAD_REGS_##nr、ASM_ARGS_##nr、REGISTERS_CLOBBERED_BY_SYSCALL展开带入,之后可将INTERNAL_SYSCALL_NCS转换为如下:
unsigned long long int resultvar; long int __arg4 = (long int) (NULL); long int __arg3 = (long int) (2); long int __arg2 = (long int) (0); long int __arg1 = (long int) (futexp); \ register long int _a4 asm ("r10") = __arg4; register long int _a3 asm ("rdx") = __arg3; register long int _a2 asm ("rsi") = __arg2; register long int _a1 asm ("rdi") = __arg1; asm volatile ( \ "syscall\n\t" : "=a" (resultvar) : "0" (__NR_futex), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) : "memory", "cc", "r11", "cx"); (long long int) resultvar; })
这里的__NR_futex为找不到,这应该是个linux系统定义的系统调用号,并且由它来定义SYS_futex的值。
#define SYS_futex __NR_futex
那么上面的那段代码真的确定是使用(FUTEX_WAIT)futex来陷入了阻塞吗? 让我尝试将之前写的一段直接采用futex做同步区块的代码修改下做检验。原代码:
1 #include <stdio.h> 2 #include <pthread.h> 3 #include <linux/futex.h> 4 #include <syscall.h> 5 #include <unistd.h> 6 #include <sys/time.h> 7 8 9 #define NUM 1000 10 11 12 int num = 0; 13 int futex_addr = 0; 14 15 int futex_wait(void* addr, int val){ 16 return syscall(SYS_futex, addr, FUTEX_WAIT, val, NULL, NULL, 0); 17 } 18 int futex_wake(void* addr, int val){ 19 return syscall(SYS_futex, addr, FUTEX_WAKE, val, NULL, NULL, 0); 20 } 21 22 void* thread_f(void* par){ 23 int id = (int) par; 24 25 /*go to sleep*/ 26 for(int i = 0; i < 1000; ++i){ 27 while(1 == __sync_val_compare_and_swap(&futex_addr, 0, 1) ){ 28 futex_wait(&futex_addr,1); 29 } 30 ++num; 31 futex_addr = 0; 32 futex_wake(&futex_addr, NUM); 33 } 34 // printf("Thread %d starting to work!\n",id); 35 return NULL; 36 } 37 38 int main(){ 39 pthread_t threads[NUM]; 40 int i; 41 42 printf("Everyone go...\n"); 43 float time_use=0; 44 struct timeval start; 45 struct timeval end; 46 gettimeofday(&start,NULL); 47 48 49 50 for (i=0;i<NUM;i++){ 51 pthread_create(&threads[i],NULL,thread_f,(void *)i); 52 } 53 54 /*wake threads*/ 55 56 /*give the threads time to complete their tasks*/ 57 for (i=0;i<NUM;i++){ 58 pthread_join(*(threads + i), NULL); 59 } 60 61 62 printf("Main is quitting...\n"); 63 printf("and num is %d\n", num); 64 65 gettimeofday(&end,NULL); 66 time_use=(end.tv_sec-start.tv_sec)+(end.tv_usec-start.tv_usec) / 1000000.0;//微秒 67 printf("time_use is %f \n",time_use); 68 return 0; 69 }
执行输出为:
Everyone go...
Main is quitting...
and num is 1000000
time_use is 0.283753
1000个线程执行1000次+1,答案为1000000正确。
我们尝试将futex_wait中sys_call做一下修改:
int futex_wait(void* addr, int val){ // return syscall(SYS_futex, addr, FUTEX_WAIT, val, NULL, NULL, 0); return INTERNAL_SYSCALL_NCS(addr, FUTEX_WAIT, val, NULL); }
然后添加宏INTERNAL_SYSCALL_NCS:
#define INTERNAL_SYSCALL_NCS(a1, a2, a3, a4) \ ({ unsigned long long int resultvar; long int __arg4 = (long int) (a4); long int __arg3 = (long int) (a3); long int __arg2 = (long int) (a2); long int __arg1 = (long int) (a1); \ register long int _a4 asm ("r10") = __arg4; register long int _a3 asm ("rdx") = __arg3; register long int _a2 asm ("rsi") = __arg2; register long int _a1 asm ("rdi") = __arg1; asm volatile ( "syscall\n\t" : "=a" (resultvar) : "0" (SYS_futex), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) : "memory", "cc", "r11", "cx"); (long long int) resultvar; })
得到如下代码:
1 #include <stdio.h> 2 #include <pthread.h> 3 #include <linux/futex.h> 4 #include <syscall.h> 5 #include <unistd.h> 6 #include <sys/time.h> 7 8 9 #define NUM 1000 10 11 #define INTERNAL_SYSCALL_NCS(a1, a2, a3, a4) 12 ({ 13 unsigned long long int resultvar; 14 long int __arg4 = (long int) (a4); 15 long int __arg3 = (long int) (a3); 16 long int __arg2 = (long int) (a2); 17 long int __arg1 = (long int) (a1); \ 18 register long int _a4 asm ("r10") = __arg4; 19 register long int _a3 asm ("rdx") = __arg3; 20 register long int _a2 asm ("rsi") = __arg2; 21 register long int _a1 asm ("rdi") = __arg1; 22 asm volatile ( 23 "syscall\n\t" 24 : "=a" (resultvar) 25 : "0" (SYS_futex), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) : "memory", "cc", "r11", "cx"); 26 (long long int) resultvar; }) 27 28 29 int num = 0; 30 int futex_addr = 0; 31 32 int futex_wait(void* addr, int val){ 33 // return syscall(SYS_futex, addr, FUTEX_WAIT, val, NULL, NULL, 0); 34 return INTERNAL_SYSCALL_NCS(addr, FUTEX_WAIT, val, NULL); 35 } 36 int futex_wake(void* addr, int val){ 37 return syscall(SYS_futex, addr, FUTEX_WAKE, val, NULL, NULL, 0); 38 } 39 40 void* thread_f(void* par){ 41 int id = (int) par; 42 43 /*go to sleep*/ 44 for(int i = 0; i < 1000; ++i){ 45 while(1 == __sync_val_compare_and_swap(&futex_addr, 0, 1) ){ 46 futex_wait(&futex_addr,1); 47 } 48 ++num; 49 futex_addr = 0; 50 futex_wake(&futex_addr, NUM); 51 } 52 // printf("Thread %d starting to work!\n",id); 53 return NULL; 54 } 55 56 int main(){ 57 pthread_t threads[NUM]; 58 int i; 59 60 printf("Everyone go...\n"); 61 float time_use=0; 62 struct timeval start; 63 struct timeval end; 64 gettimeofday(&start,NULL); 65 66 67 68 for (i=0;i<NUM;i++){ 69 pthread_create(&threads[i],NULL,thread_f,(void *)i); 70 } 71 72 /*wake threads*/ 73 74 /*give the threads time to complete their tasks*/ 75 for (i=0;i<NUM;i++){ 76 pthread_join(*(threads + i), NULL); 77 } 78 79 80 printf("Main is quitting...\n"); 81 printf("and num is %d\n", num); 82 83 gettimeofday(&end,NULL); 84 time_use=(end.tv_sec-start.tv_sec)+(end.tv_usec-start.tv_usec) / 1000000.0;//微秒 85 printf("time_use is %f \n",time_use); 86 return 0; 87 }
注意到我们这里与pthread不一样的地方在于
1 == __sync_val_compare_and_swap(&futex_addr, 0, 1)
注意到我们这里的和pthread_mutex不一样的地方在于我们是原子得将值futex_addr从0改为1.
执行如上代码,输出为:
Everyone go...
Main is quitting...
and num is 1000000
time_use is 0.254833
答案同样是1000000,所以这个采用汇编形式的调用符合了我们的预期,应该是和系统调用一致的。
我们之后接着来看看pthread_mutex_unlock的实现。