#!/usr/bin/env python # coding:utf-8 #import pandas as pd, numpy as np; ‘‘‘ 将csv文件转换为对应的邻接矩阵mat ‘‘‘ from numpy import *; def protein_complexes_trans(): file = open(‘protein_complexes.csv‘); filePro = open(‘complexes‘, ‘a‘); fileTarget = open(‘targets(complexes)‘,‘a‘); fileInter = open(‘protein_complexes_interaction_matrix‘, ‘a‘); proteins = []; targets = []; while 1: line = file.readline(); if not line: break; token = line.split(‘;‘); targ = token[1].split(‘,‘); for i in range(0,len(targ)): targets.append(targ[i]); proteins.append(token[0]); file.seek(0); #将指针重置回第一行 proArr = unique(array(proteins)).tolist(); #去重,并删去最后一项(标题) tarArr = unique(array(targets)).tolist(); #删去最后 #mat为邻接矩阵 mat = zeros((len(proArr),len(tarArr)), dtype = int16); while 1: line = file.readline(); if not line: break; token = line.split(‘;‘); targ = token[1].split(‘,‘); row = proArr.index(token[0]); for i in range(0, len(targ)): col = tarArr.index(targ[i]); mat[row][col] = 1; #输出药物和靶向的列表 for i in proArr: filePro.write(‘%s\n‘ % i); for i in tarArr: fileTarget.write(‘%s\n‘ % i); print "%d&%d" % (len(proArr), len(tarArr)); interMat = mat.tolist(); #将矩阵写入文件 for i in range(0, len(proArr)): fileInter.write(‘%s\n‘ % (str(interMat[i]))); return 0; def drugs_targets_trans(): #print ‘hello world‘; file = open(‘drugs_targets_sum‘); fileDrug = open(‘drugs‘, ‘a‘); fileTarget = open(‘targets‘,‘a‘); fileInter = open(‘drugs_targets_interaction_matrix‘, ‘a‘); drugs = []; targets = []; while 1: line = file.readline(); if not line: break; token = line.split(‘;‘); targ = token[1].split(‘,‘); for i in range(0,len(targ)): targets.append(targ[i]); drugs.append(token[0]); file.seek(0); #将指针重置回第一行 drugArr = unique(array(drugs)).tolist(); #去重,并删去最后一项(标题) tarArr = unique(array(targets)).tolist(); #删去最后 #mat为邻接矩阵 mat = zeros((len(drugArr),len(tarArr)), dtype = int16); while 1: line = file.readline(); if not line: break; token = line.split(‘;‘); targ = token[1].split(‘,‘); row = drugArr.index(token[0]); for i in range(0, len(targ)): col = tarArr.index(targ[i]); mat[row][col] = 1; #输出药物和靶向的列表 for i in drugArr: fileDrug.write(‘%s\n‘ % i); for i in tarArr: fileTarget.write(‘%s\n‘ % i); interMat = mat.tolist(); for i in range(0, len(drugArr)): fileInter.write(‘%s\n‘ % (str(interMat[i]))); #protein_complexes_trans(); drugs_targets_trans();
时间: 2024-10-26 04:34:39