OpenMP 并行化处理测试
#pragma omp parallel for 这条语句是用来指定后面的for循环语句变成并行执行的,将for循环里的语句变成并行执行后效率会不会提高呢?还是测试一 下吧,测试的时候,循环最好大一些,增加计算量,不然程序耗时太短,很难区分。
1 #include <omp.h> 2 #include <stdio.h> 3 #include <sys/time.h> 4 void test() { 5 int a = 0; 6 for(int i = 0; i < 10000000; i++) { 7 a = i + 1; 8 } 9 } 10 int main() { 11 int nthrds = 4; 12 omp_set_num_threads(nthrds); 13 timeval tStart,cTime; 14 long long tPassed = 0; 15 gettimeofday(&tStart, 0); 16 for (int i = 0; i < 100; i++) { 17 test(); 18 } 19 gettimeofday(&cTime, 0); 20 cTime.tv_sec -= tStart.tv_sec; 21 cTime.tv_usec -= tStart.tv_usec; 22 tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec; 23 tPassed /= 1000; 24 printf("Time = %lld/n", tPassed); 25 gettimeofday(&tStart, 0); 26 #pragma omp parallel for 27 for (int i = 0; i < 100; i++) { 28 test(); 29 } 30 gettimeofday(&cTime, 0); 31 cTime.tv_sec -= tStart.tv_sec; 32 cTime.tv_usec -= tStart.tv_usec; 33 tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec; 34 tPassed /= 1000; 35 printf("Time = %lld/n", tPassed); 36 return 1; 37 }
result:
1 [[email protected] zlt]# g++ omp.c -o omp -fopenmp 2 [[email protected] zlt]# ./omp 3 Time = 2859 4 Time = 718 5 [[email protected] zlt]#
在四核的机器上,开了四个线程,加速比基本上是4,果然不错。
此外,BS一下clock()函数,我向来觉得它一无是处。。。
- #include <omp.h>
- #include <stdio.h>
- #include <time.h>
- void test() {
- int a = 0;
- for(int i = 0; i < 10000000; i++) {
- a = i + 1;
- }
- }
- int main() {
- int nthrds = 4;
- omp_set_num_threads(nthrds);
- long start = clock();
- for (int i = 0; i < 100; i++) {
- test();
- }
- long end = clock();
- printf("Time = %ld/n", end - start);
- start = clock();
- #pragma omp parallel for
- for (int i = 0; i < 100; i++) {
- test();
- }
- end = clock();
- printf("Time = %ld/n", end - start);
- return 1;
- }
结果:
1 2 3 [[email protected] zlt]# g++ omp.c -o omp -fopenmp 4 [[email protected] zlt]# ./omp 5 Time = 2860000 6 Time = 2860000 7 [[email protected] zlt]# 8
clock有三个问题:
1)如果超过一个小时,将要导致溢出.
2)函数clock没有考虑CPU被子进程使用的情况.
3)也不能区分用户空间和内核空间.
时间: 2024-10-12 03:34:44