1 # -*- coding:utf-8 -*- 2 import os 3 import re 4 p1=r"([0-9][0-9][AB])\.\w{3}$" 5 6 p2=r"^.+\,(\d{4}).+" 7 8 pattern1=re.compile(p1) 9 pattern2=re.compile(p2) 10 def get_dir(zz): 11 listdir=[] 12 for filename in os.listdir(‘./%s‘%zz): 13 listdir.append(filename) 14 # print(filename) 15 # print(type(filename)) 16 # print(listdir) 17 return listdir 18 19 def joint_b0(listdir,zz): 20 if not os.path.exists(‘./new/‘): 21 os.makedirs(‘./new/‘) 22 fw=open(‘./new/%s.csv‘%(zz),‘a‘) 23 for i in listdir: 24 # print(i) 25 j=0 26 matcher1=re.findall(pattern1,i) 27 fr=open(‘./%s/%s‘%(zz,i)) 28 for line in fr.readlines(): 29 try: 30 # print(len(line)) 31 # if(len(line)==1):#判断,跳过第一行 32 # continue 33 if(j==0):#跳过第一行 34 # print("xixi") 35 36 j=j+1 37 continue 38 else: 39 fw.write(matcher1[0]+‘_%s‘%j+‘,‘+line.strip(‘\n‘)+‘\n‘) 40 j=j+1 41 except: 42 pass 43 fr.close() 44 fw.close() 45 46 def joint_b1(listdir,zz):#单独拼接 47 if not os.path.exists(‘./new/‘): 48 os.makedirs(‘./new/‘) 49 fw=open(‘./new/%s.csv‘%(zz),‘a‘) 50 for i in listdir: 51 # print(i) 52 j=1 53 matcher1=re.findall(pattern1,i) 54 fr=open(‘./%s/%s‘%(zz,i)) 55 for line in fr.readlines(): 56 57 # print(len(line)) 58 if(len(line)==4): 59 continue 60 else: 61 fw.write(line) 62 # pass 63 64 65 66 fr.close() 67 fw.close() 68 69 70 def sort_joint(): 71 # dir_list=[‘b0‘],‘b3‘,‘b4‘,‘b2_idsd‘,‘b3_idcd‘ 72 73 74 dir_list=[‘b1‘]#用于编号和拼接,会在new目录下生成编号号码的文件 75 for i in range(len(dir_list)): 76 listdir=get_dir(dir_list[i]) 77 # print(listdir) 78 joint_b0(listdir,dir_list[i]) 79 80 def joint_only(): 81 dir_list=[‘stock2‘]#用于拼接,将需要拼接的放到stock目录下,会在new目录下生成stock文件,然后根据需要修改名称 82 for i in range(len(dir_list)): 83 listdir=get_dir(dir_list[i]) 84 # print(listdir) 85 joint_b1(listdir,dir_list[i]) 86 87 def updata(zz): 88 fr=open(‘./new/%s‘%(zz)) 89 fw=open(‘./new/new.csv‘,‘a‘) 90 for line in fr.readlines(): 91 matcher2=re.findall(pattern2,line) 92 if(matcher2): 93 fw.write(matcher2[0]+‘,‘+line) 94 95 96 if __name__=="__main__": 97 sort_joint() 98 # joint_only() 99 # updata(‘b4.csv‘)
时间: 2024-10-26 11:51:30