1 # -*- coding: utf-8 -*- 2 """ 3 Created on Mon Jul 11 18:12:11 2016 4 5 @author: hugo 6 7 分析输入文件,提取分子式,分子量信息 8 """ 9 10 import os 11 import linecache 12 #import numpy 13 #import sys 14 15 PeriodicTable = {‘H‘ : 1, ‘C‘ : 12, ‘O‘ : 16, ‘N‘ : 14, ‘F‘ : 19} 16 17 18 def getFileName(path): 19 # 获取指定目录下的所有指定后缀的文件名,并将文件名赋给filenamelist列表 20 21 filenamelist = [] 22 f_list = os.listdir(path) 23 for i in f_list: 24 # os.path.splitext(): 分离文件名与扩展名 25 if os.path.splitext(i)[1] == ‘.gjf‘: 26 # print i 27 filenamelist.append(i) 28 29 return filenamelist 30 31 32 if __name__ == ‘__main__‘: 33 path = ‘.‘ 34 #getFileName(path) 35 36 # 写入文件名,分子式,分子量 37 f = open(‘filename-formula-mass.txt‘, ‘a‘) 38 f.truncate() # 清空文件 39 f.write(‘Filename‘) 40 f.write(‘\t‘) 41 f.write(‘\t‘) 42 f.write(‘Molecular Formula‘) 43 f.write(‘\t‘) 44 f.write(‘Mass of Molecule‘) 45 f.write(‘\n‘) 46 47 for filename in getFileName(path): 48 # print filename 49 lines = linecache.getlines(filename)[9:] # 读取filename第9行至末尾 50 # print lines 51 lines2 = lines[:-2] # 去掉最后两个空行 52 53 54 num_of_c = 0 # C原子个数 55 num_of_h = 0 # H原子个数 56 num_of_o = 0 # O原子个数 57 num_of_n = 0 # N原子个数 58 num_of_f = 0 # N原子个数 59 mass_of_atom = 0 # 原子质量 60 mass_of_molecule = 0 # 分子质量 61 62 for symbol_of_atom in lines2: 63 # symbol_of_atom[2]:每一行为一个str,读取str第二个字符,即为原子 64 mass_of_atom = PeriodicTable[symbol_of_atom[2]] 65 mass_of_molecule = mass_of_molecule + mass_of_atom 66 67 if symbol_of_atom[2] == ‘C‘: 68 num_of_c = num_of_c + 1 69 elif symbol_of_atom[2] == ‘H‘: 70 num_of_h = num_of_h + 1 71 elif symbol_of_atom[2] == ‘O‘: 72 num_of_o = num_of_o + 1 73 elif symbol_of_atom[2] == ‘N‘ : 74 num_of_n = num_of_n + 1 75 else: 76 num_of_f = num_of_f + 1 77 78 # 分子式 79 if num_of_f == 0: 80 formula = ‘C‘ + str(num_of_c) + ‘H‘ + str(num_of_h) + 81 ‘O‘ + str(num_of_o) + ‘N‘ + str(num_of_n) 82 else: 83 formula = ‘C‘ + str(num_of_c) + ‘H‘ + str(num_of_h) + 84 ‘O‘ + str(num_of_o) + ‘N‘ + str(num_of_n) + 85 ‘F‘ + str(num_of_f) 86 87 # print formula 88 89 # 分子量 90 # print mass_of_molecule 91 linecache.clearcache() 92 93 # 写入文件名,分子式,分子量 94 95 f.write(filename) 96 f.write(‘\t‘) 97 f.write(‘\t‘) 98 f.write(formula) 99 f.write(‘\t‘) 100 f.write(‘\t‘) 101 f.write(str(mass_of_molecule)) 102 f.write(‘\n‘) 103 f.close()
输入文件例子:
%chk=sep1_c.chk
%mem=8GB
%nprocshared=8
#p nosymm Freq RB3LYP 6-31G(d)
N30:F398:A98:B98 bis-(2,2,2-trinitroethyl)-nitramine
0 1
N 0.050166 0.606549 0.111024
C -0.991352 -0.153881 0.759816
C -2.359635 -0.271483 0.040587
N -2.266367 -0.291738 -1.482893
O -1.713944 0.679205 -1.977460
O -2.729313 -1.259234 -2.061533
N -3.053844 -1.605171 0.444658
O -2.312353 -2.554503 0.637319
O -4.267284 -1.582700 0.492355
N -3.361456 0.825265 0.460123
O -3.970323 1.397169 -0.419621
O -3.464790 0.953113 1.667286
N -0.101221 2.000887 0.065310
O 0.778708 2.630096 -0.501944
O -1.102134 2.442779 0.611345
C 1.066123 0.033761 -0.743528
C 2.418849 -0.242480 -0.056897
N 3.024094 0.998601 0.606765
O 2.415534 1.388378 1.588008
O 4.019495 1.471328 0.093119
N 2.340188 -1.339446 1.017288
O 1.494052 -2.203660 0.811024
O 3.132751 -1.282849 1.932600
N 3.427657 -0.754862 -1.111580
O 3.311667 -0.266010 -2.222400
O 4.236583 -1.582004 -0.736132
H -0.617446 -1.170900 0.881856
H -1.206334 0.262848 1.746634
H 0.714386 -0.922014 -1.136239
H 1.261770 0.705098 -1.578729