Smith-waterman算法 openmp+mpi实现

//此Smith-Waterman 算法分别用mpi与openmp实现是没问题的,但是两个混合编程的时候就会出各种问题,希望懂的能够给指条明路。。。万分感谢
#include <omp.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <mpi.h>
/*File pointers*/
FILE *ptr_file_1, *ptr_file_2,*ptr_file_3;	//test file pointers

/*Definitions*/
#define TRUE 1
#define FALSE 0
#define Match 1
#define MissMatch -1
#define GapPenalty 2
#define MAXLEN 20

/*Global Variables*/
char inputC[5];		//user input character
int inputI;
int StrLen1,StrLen2;
int intcheck = TRUE;

char holder, ch;
int filelen1 = 0;
int filelen2 = 0;
int filelen3 = 0;
int i,j,k,l,m,n,lenA,lenB,compval,top,left,left_top,rows,columns,contrl;
char FASTA1[MAXLEN],FASTA2[MAXLEN];
char dash = '-';

char strA[MAXLEN];			//holds 1st string to be aligned in character array
char strB[MAXLEN];			//holds 2nd string to be aligned in character array
int HiScore;			//holds value of highest scoring alignment(s).
int HiScorePos[2];		//holds the position of the HiScore
int SWArray[MAXLEN][MAXLEN];	//S-W Matrix

char MaxA[MAXLEN];
char MaxB[MAXLEN];
char OptA[MAXLEN];
char OptB[MAXLEN];

int MaxAcounter = 1;	//MaxA counter
int MaxBcounter = 1;	//MaxB counter
int cont = TRUE;
int check;

int rank,size;
MPI_Status status;
int buff[3];

/*ALIGNMENT FUNCTION*/
int Align(int PosA,int PosB) {

	/*Function Variables*/
	int relmax = -1;		//hold highest value in sub columns and rows
	int relmaxpos[2];		//holds position of relmax

	if(SWArray[PosA-1][PosB-1] == 0) {
		cont = FALSE;
	}

	while(cont == TRUE) {	//until the diagonal of the current cell has a value of zero

		/*Find relmax in sub columns and rows*/
		for(i=PosA; i>0; --i) {

			if(relmax < SWArray[i-1][PosB-1]) {

				relmax = SWArray[i-1][PosB-1];
				relmaxpos[0] = i-1;
				relmaxpos[1] = PosB-1;
			}
		}

		for(j=PosB; j>0; --j) {

			if(relmax < SWArray[PosA-1][j-1]) {

				relmax = SWArray[PosA-1][j-1];
				relmaxpos[0]=PosA-1;
				relmaxpos[1]=j-1;
			}
		}

		/*Align strings to relmax*/
		if((relmaxpos[0] == PosA-1) && (relmaxpos[1] == PosB-1)) {	//if relmax position is diagonal from current position simply align and increment counters

			MaxA[MaxAcounter] = strA[relmaxpos[0]-1];
			++MaxAcounter;
			MaxB[MaxBcounter] = strB[relmaxpos[1]-1];
			++MaxBcounter;

		}

		else {

			if((relmaxpos[1] == PosB-1) && (relmaxpos[0] != PosA-1)) {	//maxB needs at least one '-'

				for(i=PosA-1; i>relmaxpos[0]-1; --i) {	//for all elements of strA between PosA and relmaxpos[0]

						MaxA[MaxAcounter]= strA[i-1];
						++MaxAcounter;
				}
				for(j=PosA-1; j>relmaxpos[0]; --j) {	//set dashes to MaxB up to relmax

					MaxB[MaxBcounter] = dash;
					++MaxBcounter;
				}

				MaxB[MaxBcounter] = strB[relmaxpos[1]-1];	//at relmax set pertinent strB value to MaxB
				++MaxBcounter;
			}

			if((relmaxpos[0] == PosA-1) && (relmaxpos[1] != PosB-1)) {	//MaxA needs at least one '-'

				for(j=PosB-1; j>relmaxpos[1]-1; --j) {	//for all elements of strB between PosB and relmaxpos[1]

					MaxB[MaxBcounter] = strB[j-1];
					++MaxBcounter;
				}
				for(i=PosB-1; i>relmaxpos[1]; --i) {		//set dashes to strA

					MaxA[MaxAcounter] = dash;
					++MaxAcounter;
				}

				MaxA[MaxAcounter] = strA[relmaxpos[0]-1];
				++MaxAcounter;
			}
		}

		//printf("(%i,%i)",relmaxpos[0],relmaxpos[1]);
		Align(relmaxpos[0],relmaxpos[1]);
	}

	return(cont);
}

int SWMax(int num1,int num2,int num3)
{
	int max=-1;
	if(num1>num2)
	{
		max = num1;
	}
	else if(num2>num3)
	{
		max = num2;
	}else
	{
		max =num3;
	}
	return max;
}

/*MAIN FUNCTIONS*/
int main(int argc,char ** argv)
{

	MPI_Init(&argc,&argv);//MPI初始化
	MPI_Comm_rank(MPI_COMM_WORLD,&rank);//得到当前进程标识
	MPI_Comm_size(MPI_COMM_WORLD,&size);//总的进程个数

	printf("size=%d\n",size);

	/*open first file*/
	ptr_file_1 = fopen("d:/mpi/str1.fa", "r");
	/*check to see if it opened okay*/
	if(ptr_file_1 == NULL) {
		printf("Error opening 'str1.fa'\n");
		system("PAUSE");
		exit(8);
	}

	/*open second file*/
	ptr_file_2 = fopen("d:/mpi/str2.fa", "r");
	/*check to see if it opened okay*/
	if(ptr_file_2 == NULL) {
		printf("Error opening 'str2.fa'\n");
		system("PAUSE");
		exit(8);
	}
	/*measure file1 length*/
	while(holder != EOF) {
		holder = fgetc(ptr_file_1);
		if(filelen1 < MAXLEN && holder !=EOF) {
			strA[filelen1] = holder;
			++filelen1;
		}
	}
	strA[filelen1] = -1;
	holder = '0';
	/*measure file2 length*/
	while(holder != EOF) {
		holder = fgetc(ptr_file_2);
		if(filelen2 < MAXLEN && holder !=EOF) {
			strB[filelen2] = holder;
			++filelen2;
		}
	}
	strB[filelen2] = -1;
	fclose(ptr_file_1);
	fclose(ptr_file_2);								

	lenA = strlen(strA)-1;
	lenB = strlen(strB)-1;

	time_t  t1 = time(NULL);
	printf("t1=%d\n",t1);

	/*----------------------------问题代码处*---------------------------*/
	for(contrl=0;contrl<lenA+lenB+1;contrl++)
	{
		if(contrl<=lenA)
		{
			rows = contrl;
			j = 0;
		}
		else
		{
			rows = lenA;
			j = contrl-lenA;
		}

		if(rank==0)
		{

			//#pragma omp parallel for
			for(columns = j;columns <= lenB;columns++)
			{
				//printf("%d\n",columns);
				if(rows == 0||columns == 0)
				{
					SWArray[rows][columns]=0;
				}
				else
				{
					left = SWArray[rows][columns-1] - GapPenalty;
					top = SWArray[rows-1][columns] - GapPenalty;
					if(strA[rows-1] == strB[columns-1])
					{
						left_top = (SWArray[rows-1][columns-1] + Match);
					}else
					{
						left_top = SWArray[rows-1][columns-1] + MissMatch;
					}
					compval = SWMax(left,top,left_top);
					if(compval<0) compval = 0;
					SWArray[rows][columns] = compval;
					buff[0]=compval;
					buff[1]=rows;
					buff[2]=columns;
					MPI_Send(buff,4,MPI_INT,rank+1,99,MPI_COMM_WORLD);
					MPI_Recv(buff,4,MPI_INT,rank+1,99,MPI_COMM_WORLD,&status);
					SWArray[buff[1]][buff[2]]=buff[0];
					compval = 0;
				}
			}
		}
		else
		{
			//#pragma omp parallel for
			for(columns = j;columns <= lenB;columns++)
			{
				//printf("%d\n",columns);
				if(rows == 0||columns == 0)
				{
					SWArray[rows][columns]=0;
				}
				else
				{
					left = SWArray[rows][columns-1] - GapPenalty;
					top = SWArray[rows-1][columns] - GapPenalty;
					if(strA[rows-1] == strB[columns-1])
					{
						left_top = (SWArray[rows-1][columns-1] + Match);
					}else
					{
						left_top = SWArray[rows-1][columns-1] + MissMatch;
					}
					compval = SWMax(left,top,left_top);
					if(compval<0) compval = 0;
					SWArray[rows][columns] = compval;
					buff[0]=compval;
					buff[1]=rows;
					buff[2]=columns;
					MPI_Send(buff,4,MPI_INT,rank-1,99,MPI_COMM_WORLD);
					MPI_Recv(buff,4,MPI_INT,rank-1,99,MPI_COMM_WORLD,&status);
					SWArray[buff[1]][buff[2]]=buff[0];
					compval = 0;
				}
			}

		}
		MPI_Barrier(MPI_COMM_WORLD);
	}

	/*PRINT S-W Table*/
	ptr_file_3 = fopen("str3.fa", "w+");
	if(ptr_file_3 == NULL) {
		printf("Error opening 'str3.fa'\n");
		system("PAUSE");
		exit(8);
	}
	fprintf(ptr_file_3,"   0");
	for(i = 0; i <= lenB; ++i) {
		fprintf(ptr_file_3,"  %c",strB[i]);
	}
	fprintf(ptr_file_3,"\n");

	for(i = 0; i <= lenA; ++i) {
		if(i < 1) {
			fprintf(ptr_file_3,"0");
		}

		if(i > 0) {
			fprintf(ptr_file_3,"%c",strA[i-1]);
		}

		for(j = 0; j <= lenB; ++j) {
				fprintf(ptr_file_3,"%3i",SWArray[i][j]);
		}
		fprintf(ptr_file_3,"\n");
	}
	fclose(ptr_file_3);
	/*MAKE ALIGNMENTT*/
	for(i=0; i<=lenA; ++i) {	//find highest score in matrix: this is the starting point of an optimal local alignment

		for(j=0; j<=lenB; ++j) {

			if(SWArray[i][j] > HiScore) {

				HiScore = SWArray[i][j];
				HiScorePos[0] = i;
				HiScorePos[1] = j;
			}
		}
	}

	//send Position to alignment function
	MaxA[0] = strA[HiScorePos[0]-1];
	MaxB[0] = strB[HiScorePos[1]-1];

	check = Align(HiScorePos[0],HiScorePos[1]);

	//in the end reverse Max A and B
	k=0;
	for(i = strlen(MaxA)-1; i > -1; --i) {
		OptA[k] = MaxA[i];
		++k;
	}

	k=0;
	for(j=strlen(MaxB)-1; j > -1; --j) {
		OptB[k] = MaxB[j];
		++k;
	}

	printf("%s\n%s	\n",OptA,OptB);

	time_t  t2 = time(NULL);
	printf("t2=%d\n",t2);
	printf("Time-consuming is:%ds\n",(t2-t1));

	MPI_Finalize();

	return(0);

}

Smith-waterman算法 openmp+mpi实现,布布扣,bubuko.com

时间: 2024-11-11 18:57:03

Smith-waterman算法 openmp+mpi实现的相关文章

DNA序列局部比对(Smith–Waterman algorithm)

生物信息原理作业第三弹:DNA序列局部比对,利用Smith–Waterman算法,python3.6代码实现. 实例以及原理均来自https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm. 1 import numpy as np 2 import pandas as pd 3 sequence1 = 'TGTTACGG' 4 sequence2 = 'GGTTGACTA' 5 s1 = '' 6 s2 = '' 7 gap =

[Sequence Alignment Methods] Smith–Waterman algorithm

Smith–Waterman algorithm 首先需要澄清一个事实,Smith–Waterman algorithm是求两个序列的最佳subsequence匹配,与之对应的算法但是求两个序列整体匹配的算法是Needleman-Wusch algorithm,即 Smith–Waterman algorithm:Local Needleman-Wusch algorithm: Global Needleman-Wusch algorithm与longest common subsequence

中文句子相似度之計算與應用

原文:http://www.aclweb.org/anthology/O05-1008 中文句子相似度之计算与应用 郑守益 梁婷国立交通大学信息科学系 摘要 近年來受惠于国内外各项语料库资源的建置及网际网路上的大量中文语料,使计算机语文辅助教材的涵盖层面日趋广泛.因此如何产生大量且具高质量之辅助教材日益受到许多自然语言处理研究者的重视.有鉴于此,本論文提出以中文句子相似度为基础的研究与应用.相似度的计算乃考虑句子的组合及聚合性.我们实作此一应用,并提出解决未知词的语意计算问题的方法.实验结果显示

有了OpenMP,MPI,为什么还要MapReduce? (转载)

OpenMP和MPI是并行编程的两个手段,对比如下: OpenMP:线程级(并行粒度):共享存储:隐式(数据分配方式):可扩展性差: MPI:进程级:分布式存储:显式:可扩展性好. OpenMP采用共享存储,意味着它只适应于SMP,DSM机器,不适合于集群.MPI虽适合于各种机器,但它的编程模型复杂: 需要分析及划分应用程序问题,并将问题映射到分布式进程集合: 需要解决通信延迟大和负载不平衡两个主要问题: 调试MPI程序麻烦: MPI程序可靠性差,一个进程出问题,整个程序将错误: 其中第2个问题

微博估计要火一阵的SleepSort之Shell及C实现

今日在微博看到如此神奇的代码,居然还有新的sort算法,对于我这种渣渣必须研究一下,代码如下: #!/bin.bash function f() { sleep "$1" //sleep 这么多s echo "$1" } while [ -n "$1" ] //第一个参数不为空 do f "$1" & //后台运行,相当于fork一个进程去执行f, 父进程同时继续下去 shift //输入参数左移,也即覆盖掉第一个参数

7种深度学习工具介绍

1)TensorFlow TensorFlow是Google基于DistBelief进行研发的第二代人工智能学习系统,其命名来源于本身的运行原理. –Tensor(张量)意味着N维数组,Flow(流)意味着基于数据流图的计算,TensorFlow为张量从图像的一端流动到另一端的计算过程. –TensorFlow是将复杂的数据结构,传输至人工智能神经网中进行分析和处理过程的系统. TensorFlow表达了高层次的机器学习计算,可被用于语音识别或图像识别等多项机器深度学习领域. –TensorFl

MIC C编程(offload模式)

MIC C编程(offload模式) 编程特点 简单---隐藏大量细节,语法与OpenMPI类似(不需要开辟空间) 灵活---OpenMP MPI(但是用的不多)pThread等多种方式 传统---与CPU编程一脉相承 MIC C扩展语言结构 编译指导方式(#pragma) offload --表示之后的代码段将使用offload模式运行 运行在其他设备上(MIC) --标识内存传递参数,传递过程对用户透明 不需要手动写代码控制何时传入.何时传出 不需要手动申请卡上内存空间 不需要讲卡上内存与主

[Sequence Alignment Methods] Dynamic time warping (DTW)

本系列介绍几种序列对齐方法,包括Dynamic time warping (DTW),Smith–Waterman algorithm,Cross-recurrence plot Dynamic time warping (DTW) is a well-known technique to find an optimal alignment between two given (time-dependent) sequences under certain restrictions. ——Mei

ImageMagick的使用

关于ImageMagick ImageMagick (TM) 是一个免费的创建.编辑.合成图片的软件.它可以读取.转换.写入多种格式的图片.图片切割.颜色替换.各种效果的应用,图片的旋转.组合,文本,直线, 多边形,椭圆,曲线,附加到图片伸展旋转.ImageMagick是免费软件:全部源码开放,可以自由使用,复制,修改,发布.它遵守GPL许可协议.它 可以运行于大多数的操作系统.ImageMagick的大多数功能的使用都来源于命令行工具.通常来说,它可以支持以下程序语言: Perl, C, C+