文件读写操作
fileUtils.py
# -*- coding: utf-8 -*-
import os
def getFileList(dir, fileList=[]):
"""
遍历一个目录,输出所有文件名
param dir: 待遍历的文件夹
param filrList : 保存文件名的列表
return fileList: 文件名列表
"""
newDir = dir
if os.path.isfile(dir):
fileList.append(dir)
elif os.path.isdir(dir):
for s in os.listdir(dir):
# 如果需要忽略某些文件夹,使用以下代码
# if s == "xxx":
# continue
newDir = os.path.join(dir, s)
getFileList(newDir, fileList)
return fileList
def readStrFromFile(filePath):
"""
从文件中读取字符串str
param filePath: 文件路径
return string : 文本字符串
"""
with open(filePath, "rb") as f:
string = f.read()
return string
def readLinesFromFile(filePath):
"""
从文件中读取字符串列表list
param filePath: 文件路径
return lines : 文本字符串列表
"""
with open(filePath, "rb") as f:
lines = f.readlines()
return lines
def writeStrToFile(filePath, string):
"""
将字符串写入文件中
param filePath: 文件路径
param string : 字符串str
"""
with open(filePath, "wb") as f:
f.write(string)
def appendStrToFile(filePath, string):
"""
将字符串追加写入文件中
param filePath: 文件路径
param string : 字符串str
"""
with open(filePath, "ab") as f:
f.write(string)
def dumpToFile(filePath, content):
"""
将数据类型序列化存入本地文件
param filePath: 文件路径
param content : 待保存的内容(list, dict, tuple, ...)
"""
import pickle
with open(filePath, "wb") as f:
pickle.dump(content, f)
def loadFromFile(filePath):
"""
从本地文件中加载序列化的内容
param filePath: 文件路径
return content: 序列化保存的内容(e.g. list, dict, tuple, ...)
"""
import pickle
with open(filePath) as f:
content = pickle.load(f)
return content
字符串操作
zhuanma.py
# -*- coding: utf-8 -*-
import os
import sys
try:
import pkg_resources
get_module_res = lambda *res: pkg_resources.resource_stream(__name__,os.path.join(*res))
except ImportError:
get_module_res = lambda *res: open(os.path.normpath(os.path.join(os.getcwd(), os.path.dirname(__file__), *res)), ‘rb‘)
PY2 = sys.version_info[0] == 2
default_encoding = sys.getfilesystemencoding()
if PY2:
text_type = unicode
string_types = (str, unicode)
iterkeys = lambda d: d.iterkeys()
itervalues = lambda d: d.itervalues()
iteritems = lambda d: d.iteritems()
else:
text_type = str
string_types = (str,)
xrange = range
iterkeys = lambda d: iter(d.keys())
itervalues = lambda d: iter(d.values())
iteritems = lambda d: iter(d.items())
def strdecode(sentence):
if not isinstance(sentence, text_type):
try:
sentence = sentence.decode(‘utf-8‘)
except UnicodeDecodeError:
sentence = sentence.decode(‘gbk‘, ‘ignore‘)
return sentence
def resolve_filename(f):
try:
return f.name
except AttributeError:
return repr(f)
stringUtils.py
# -*- coding: utf-8 -*-
import zhuanma
def jiema(string):
"""
将字符串转为unicode编码
param string: 待转码的字符串
return : unicode编码的字符串
"""
from zhuanma import strdecode
return strdecode(string)
def filterReturnChar(string):
"""
过滤字符串中的"\r"字符
:param string:
:return: 过滤了"\r"的字符串
"""
return string.replace("\r", "")
def encodeUTF8(string):
"""
将字符串转码为UTF-8编码
:param string:
:return: UTF-8编码的字符串
"""
return jiema(string).encode("utf-8")
def filterCChar(string):
"""
过滤出字符串中的汉字
:param string: 待过滤字符串
:return: 汉字字符串
"""
import re
hanzi = re.compile(u"[\u4e00-\u9fa5]+", re.U)
return "".join(re.findall(hanzi, string))