ETL应用场景中,若对端接口文件未能提供,任务会处于循环等待,直到对端提供为止,该方法极大的消耗了系统资源。为此想到了一种方法,一次获取一个平台的文件,实现思路如下:
1、第一次获取对端平台提供目录下给定日期的所有接口文件,并保存文件列表;
2、后续每隔n分钟重启获取任务,每次先获取文件列表,和上次列表进行对比,当发生如下情况时,会重新获取:
A、有新文件产生;
B、有文件大小变化
实现方法如下:
[ftp.properties] ipaddress = 10.25.xxx.xxx username = xxxxx password = xxxxx #\u5F53 encryption \u6709\u503C\u65F6\uFF0C\u5C06\u8FDB\u884C\u5BC6\u7801\u89E3\u6790 encryption = #\u5F53resolve \u4E3A False\u65F6\uFF0C\u9700\u8981\u66FF\u6362\u8FDC\u7A0B\u76EE\u5F55\u548C\u5F53\u524D\u76EE\u5F55\u7684\u53C2\u6570 resolve = 1 remoteDir = /bosscdr/tobak/jf_bass localDir = /interface/cyg/[SDT_YYYYMMDD] #\u4E0A\u6B21\u4FDD\u5B58\u7684\u6587\u4EF6\u83B7\u53D6\u5217\u8868 lastFileList = /interface/cyg/lastfilelist.txt
# -*- coding:utf-8 -*- ‘‘‘ 函数说明 :获取远程文件 编写时间: 2015-5-5 编 写 人 : chenyangang ---------------------------------------- 实现方法: 1、获取配置文件中制定的ftp服务器信息,用户名采用加密方式 2、获取远程目录下的文件列表,若存在保存的文件列表,进行比对,提取差异文件 3、根据差异文件进行文件获取 ‘‘‘ import datetime import ConfigParser import os import ftplib import cPickle class GetDataBaseDiff(object): def __init__(self, config, interfaceID = None, interfaceDate = None, delay = 0): self.config = config self.interfaceID = interfaceID #默认为当天日期 if interfaceDate == None: self.interfaceDate = datetime.date.strftime(datetime.date.today() - datetime.timedelta(delay),"%Y%m%d") def getConfig(self, interfaceDate): readConfig = ConfigParser.ConfigParser() with open(self.config,‘r‘) as configFile: readConfig.readfp(configFile) hostaddr = readConfig.get(‘ftp.properties‘,‘ipaddress‘) username = readConfig.get(‘ftp.properties‘,‘username‘) #是否解析参数和加密 resolve = readConfig.get(‘ftp.properties‘,‘resolve‘) encryption = readConfig.get(‘ftp.properties‘,‘encryption‘) #目录信息 remoteDir = readConfig.get(‘ftp.properties‘,‘remoteDir‘) localDir = readConfig.get(‘ftp.properties‘,‘localDir‘) #存储上次获取文件列表 lastFileList = readConfig.get(‘ftp.properties‘,‘lastFileList‘) if encryption == ‘‘ : password = readConfig.get(‘ftp.properties‘,‘password‘) else: command = encryption + ‘ ‘ + readConfig.get(‘ftp.properties‘,‘password‘) password = os.popen(command) if resolve == ‘1‘ : month = interfaceDate[0:6] remoteDir = remoteDir.replace(r"[SDT_YYYYMMDD]", interfaceDate) remoteDir = remoteDir.replace(r"[SDT_YYYYMM]",month) localDir = localDir.replace(r"[SDT_YYYYMMDD]", interfaceDate) localDir = localDir.replace(r"[SDT_YYYYMM]",month) return hostaddr, username, password, remoteDir, localDir, lastFileList def connect(self, hostaddr, username, password): try: connftp = ftplib.FTP(hostaddr) except ftplib.error_perm: print "The ipaddress (ipaddress) refused!" %{‘ipaddress‘:hostaddr} try: connftp.login(username, password) except ftplib.error_perm: print "This username (username) refuse connect, please check your username or password!" %{‘username‘:username} return connftp def getFileList(self, connftp, remoteDir): #获取文件详细信息,包括权限、文件大小、属主等信息,其中第5项为文件大小 connftp.cwd(remoteDir) filesDetail = connftp.nlst(‘-l‘) #保存文件名称和大小 fileList = {} for fileDetail in filesDetail: filelistFromDetail = fileDetail.strip().split() fileList[filelistFromDetail[-1]] = filelistFromDetail[4] return fileList def comparisonFileList(self, lastFileList, newFileList): #装载上一次文件获取信息 if len(open(lastFileList, "rb").readlines()) > 0 : with open(lastFileList, "rb") as fp: try: lastfileList = cPickle.load(fp) except EOFError: print "Load (filename) was failed"%{‘filename‘:lastFileList} else: lastfileList={} lastfileset = set(lastfileList.keys()) newfileSet = set(newFileList.keys()) #提取新增文件列表 diffFileList = list(newfileSet - lastfileset) sameFileName = list(newfileSet & lastfileset) #提取前后文件大小不一致的文件列表 for samefilename in sameFileName: if newFileList[samefilename] != lastfileList[samefilename]: diffFileList.append(samefilename) del lastfileList #保存最新文件获取列表 fp = open(lastFileList, "wb") lastfileList = cPickle.dump(newFileList, fp) fp.close() return diffFileList def machedFileList(self, diffFileList, interfaceID, interfaceDate): return [flist for flist in diffFileList if interfaceID in flist and interfaceDate in flist] def download(self, connftp, localDir, getFileList): #进入本地目录 if not os.path.isdir(localDir) : os.makedirs(localDir) try: os.chdir(localDir) except : print ‘Dose\‘t enter the directory , mybe you have not authority!‘ #获取最新文件 for remotefile in getFileList: try: connftp.retrbinary("RETR %s"%remotefile, open(remotefile,"wb").write) except ftplib.error_perm: print ‘ERROR: cannot read file "%s"‘ % remotefile connftp.quit() if __name__ == ‘__main__‘ : interfaceDate = ‘20150520‘ interfaceID = None getDataBaseDiff = GetDataBaseDiff(‘./config.properties‘, interfaceDate, 0) hostaddr, username, password, remoteDir, localDir, lastFileList = getDataBaseDiff.getConfig(interfaceDate) connectionFtp = getDataBaseDiff.connect(hostaddr, username, password) fileList = getDataBaseDiff.getFileList(connectionFtp, remoteDir) diffFileList = getDataBaseDiff.comparisonFileList(lastFileList, fileList) if interfaceID is not None and len(diffFileList) >0: getFileList = getDataBaseDiff.machedFileList(diffFileList, interfaceID, interfaceDate) getDataBaseDiff.download(connectionFtp, localDir, getFileList) else: getDataBaseDiff.download(connectionFtp, localDir, diffFileList)
如上,是学习python后,尝试编写的代码。可修改为配置文件中配置多个平台,获取多平台接口数据。
时间: 2024-10-25 23:58:22