Python 字串处理

#!/usr/bin/python
#-*- coding:utf-8 –*-

import os
import sys
import re
import shutil
import xlrd
import xlwt
import getopt
import math
from xlutils.copy import copy

‘‘‘
脚本使用：
    设置strUiPorject ui项目名称，取值如下 "mstar"/"mstar_atv"/"formal"/"formal_grey"/"haier"/"videocon"/"bbk"/"atv_project"
    删除无用字串：             ./genstr -d
    特殊标记的字串优先排序：    ./genstr -p
    给已整理好优先级高的字串添加strMark..../genstr -a
    读取 优先级字串整理.h 中的字串进行比对添加标记（未整理）./genstr -c 

脚本功能：
    1、根据strUiPorject设置的UI名称，迭代过滤UI目录的所有源文件和头文件，获取项目使用字串总数，并删除UIL多余字串
    2、对某一种语言做特殊标记strMark，标记的字串会放在翻译的最后面
    3、mstar优先级字串整理在mstar优先字串整理.h，已使用字串整理在mstar已使用字串整理.h中,
    formal优先级字串整理在formal优先字串整理.h
执行过程：
    1、设置UI项目名称
    2、根据UI名称，配置过滤路径和UIL删除路径(filterPath/strUilPath)
    3、再根据配置的路径执行过滤和删除动作

注意：
    SourceCode中，有一些是 TV_IDS_String_ID+Offset方式获取新字串的，这些字串要手动加到脚本，以防误删
        如：TV_IDS_String_GMT_0 字串
‘‘‘

#=======注意此处设置UI项目================================#
#=="mstar"/"formal"/"formal_grey"/"mstar_atv"/"haier"/"videocon"/"bbk"/"atv_project"=#
strUiPorject = "formal_grey"
#========================================================#

g_deleteMode = 0
g_priorityMode = 0
g_AddmarkMode = 0
g_CompareMode = 0

setStr = set()
tupleStr = ()
strMark = "aaaa"

#=======================以下不需要设置=======================#
if "bbk" == strUiPorject:
    filterPath = "aps/application/radisson/formal"
    strUilPath = "aps/application/radisson/formal/UI_Project/TV_UIProject/Languages"
elif "formal" == strUiPorject:
    filterPath = "aps/application/radisson/formal"
    strUilPath = "aps/application/radisson/formal/UI_Project/TV_UIProject_new/Languages"
elif "formal_grey" == strUiPorject:
    filterPath = "aps/application/radisson/formal"
    strUilPath = "aps/application/radisson/formal/UI_Project/TV_UIProject_grey/Languages"
elif "mstar_atv" == strUiPorject:
    filterPath = "aps/application/radisson/mstar"
    strUilPath = "aps/application/radisson/mstar/UI_Project/TV_UIProject_atv/Languages"
else:
    filterPath = "aps/application/radisson/%s" % strUiPorject
    strUilPath = "aps/application/radisson/%s/UI_Project/TV_UIProject/Languages" % strUiPorject
gamePath = "aps/game"

listPath = [filterPath,gamePath]

def filterUsefulString():
    listStrId = []
    for path in listPath:
        for  dirPath, dirNames, fileNames in os.walk(path):
            for  sourceFile in fileNames:
                filePath = dirPath+"/"+sourceFile
                if (re.search(".*\.c.*",sourceFile) or re.search(".*\.h.*",sourceFile))                     and sourceFile != "TV_strid.h":
                    for line in open(filePath,"r"):
                        if "TV_IDS_String" in line:
                            if line.count("TV_IDS_String") > 2:
                                print "\n\nthe number of string are more than 2 in a row \n\n "
                                print sourceFile
                                print "\n"
                                continue
                            if re.search(".*TV_(IDS_String\w*).*TV_(IDS_String\w*).*",line):
                                tupleStr=re.search(".*TV_(IDS_String\w*).*TV_(IDS_String\w*).*",line).groups()
                                for i in range(len(tupleStr)):
                                    setStr.add(tupleStr[i])
                            else:
                                setStr.add(re.search(".*TV_(IDS_String\w*).*",line).group(1))
                        elif "TV_IDS_Game_Menu_OSD_String" in line:
                            setStr.add("IDS_Game_Menu_OSD_String")
    UsedStrfilename = strUiPorject + "已使用字串整理.h"
    print "\n\n程序中共使用 %d 个字串。\n保存在当前目录 %s-文件中\n\n" % (len(setStr),UsedStrfilename)
    for line in setStr:
        listStrId.append(line)
        listStrId.append("\n")
    open(UsedStrfilename,"w").writelines(listStrId)

‘‘‘
    #读EXCEL到映射表
def excelSetting()
    setElStr = set()
    mapStr = {}
    listFirst = []
    setDiff = set()
    book = xlrd.open_workbook(r‘Languages.xls‘)
    sheet = book.sheet_by_index(0)

    listFirst = sheet.row_values(0)

    for row in range(sheet.nrows):
        cellStr = str(sheet.cell(row,0).value)
        cellStr.rstrip()
        if cellStr in setStr:
            mapStr[cellStr] = sheet.row_values(row)

    #setElStr = set(mapStr.keys())
    #setDiff = setElStr - setStr

    #写EXCEL
    wboot = xlwt.Workbook()
    sheet = wboot.add_sheet("Language")
    #操作第一行，抬头
    for col in range(len(listFirst)):
        sheet.write(0,col,listFirst[col])

    #其它行
    row = 1
    for (k,v) in mapStr.items():
        for col in range(len(v)):
            sheet.write(row,col,v[col])
        row = row + 1
    wboot.save(r‘Language_.xls‘)

‘‘‘
#处理UIL文件，对比setStr集合，删除无用字串
def deleteString():
    delCount = 0
    lanList = []
    for dirPath,dirNames,fileNames in os.walk(strUilPath):
        for sourceFile in fileNames:
            filePath = dirPath + "/" + sourceFile
            for line in open(filePath,"r"):
                #==============================================#
                #有些字串在code中是以偏移量的方式使用，不能删除
                if "IDS_String_GMT_" in line:
                    lanList.append(line)
                    continue
                elif re.search(".*IDS_String_\d{1,2}\".*",line) or ("IDS_String_LNB" in line):
                    lanList.append(line)
                    continue
                # ==============================================#

                if "<String ID=" in line:
                    if re.search("\s*<String ID=\"(\w*)\".*",line).group(1) in setStr:
                        lanList.append(line)
                    else:
                        delCount = delCount+1
                else:
                    lanList.append(line)
            open(filePath,"w").writelines(lanList)
            print(sourceFile + "删除 %s" %delCount)
            lanList = []
            delCount = 0

#处理UIL文件，迭代lanFist集合，标记字串放在UIL文件后面
def priorityString():
    lanFist = []
    lanList1 = []
    lanList2 = []
    lanList3 = []
    pat = re.compile(".*\"(IDS_String\w*)\".*")
    for line in open(strUilPath+"/English.uil","r").readlines():
        if strMark in line and pat.search(line):
            lanFist.append(pat.search(line).group(1))
    PrStrfilename = strUiPorject + "优先字串整理.h"
    open(PrStrfilename, "a").writelines([x + "\n" for x in lanFist])
    print "优先级字串共%d，如下：" %len(lanFist)
    print lanFist
    print "\n\n优先字串共 %d 个。\n保存在当前目录 %s-文件中\n\n" % (len(lanFist),PrStrfilename)

    for dirPath, dirNames, fileNames in os.walk(strUilPath):
        for sourceFile in fileNames:
            filePath = dirPath + "/" + sourceFile
            for line in open(filePath, "r"):
                if pat.search(line) and pat.search(line).group(1) in lanFist:
                    line = line.replace(strMark,"")
                    lanList1.append(line)
                elif "IDS_String_spliteLine" in line:
                    lanList3.append(line)
                else:
                    lanList2.append(line)
            if(len(lanList1) and len(lanList2)>=3):
                lanList2 = lanList2[0:-1] +lanList3 + lanList1+lanList2[-1:]
                lanList3 = []
                open(filePath, "w").writelines(lanList2)
            lanList1 = []
            lanList2 = []

#给已整理好优先级高的字串添加strMark
def AddMark():
    StringList = []
    a = 0
    for line in open(strUilPath+"/English.uil","r") :
        if ("<String ID=" in line):
            if re.search("IDS_String_spliteLine",line):
                a = 1
            elif(a == 1):
                line = line.replace(‘Value="‘,‘Value="‘+strMark)
        StringList.append(line)
    open(strUilPath+"/English.uil","w").writelines(StringList)
    print "\n添加StrMark完成\n"

#读取 优先级字串整理.h 中的字串进行比对添加标记（未整理）
def CompareAddMark():
    strSet = set()
    strList = []
    for line in open(‘formal优先字串整理.h‘, "r"):
        strSet.add(re.search(".*(IDS_String_.*).*",line).group(1))
    print strSet
    for line in open(strUilPath+"/English.uil","r") :
        if "<String ID=" in line:
            if re.search("\s*<String ID=\"(\w*)\".*", line).group(1) in strSet:
                line = line.replace(‘Value="‘,‘Value="‘+strMark)
        strList.append(line)
    open(strUilPath+"/English.uil","w").writelines(strList)    

def fun_parse_InputParam():
    global g_deleteMode
    global g_priorityMode
    global g_AddmarkMode
    global g_CompareMode
    try:
        opts, args = getopt.getopt(sys.argv[1:], ‘dpac‘)
    except getopt.GetoptError, err:
        #print str(err)
        sys.exit()

    for op, value in opts:
        if op == "-d":
            g_deleteMode = 1
        elif op == "-p":
            g_priorityMode = 1
        elif op == "-a":
            g_AddmarkMode = 1
        elif op == "-c":
            g_CompareMode = 1
        else:
            print("unhandled option")
            sys.exit()

if __name__ == "__main__":
    fun_parse_InputParam()
    if g_deleteMode:
        filterUsefulString()
        deleteString()
    if g_priorityMode:
        priorityString()
    if g_AddmarkMode:
        AddMark()
    if g_CompareMode:
        CompareAddMark()

原文地址：https://www.cnblogs.com/jiangzhaowei/p/9278403.html

时间： 2024-10-12 19:59:34

Python 字串处理的相关文章

python之字串

python字串声明: 单引('), 双引("), 三引(''' 或 """"). python字串前缀: r表示原生字串, 字串内容: (1)不能包含声明符(除非转义,但转义符也会作为字串内容.) (2)不能奇数转义符结尾(实质也是避免最后声明符转义) 简言之, 原生字串的写法一般用于正则表达式. u表示Unicode字串. 注意: python 2.x中默认不是Unicode字符串, python 3.x默认全是Unicode字符串.

leetcode无重复字符的最长字串 python实现

无重复字符的最长字串是一道字符串处理算法的题目,在日常编程中,处理字符串是常见任务.用Python来实现leetcode这道算法题,该题目会涉及到一个概念"滑动窗口". 一.题目描述给定一个字符串,请你找出其中不含有重复字符的最长子串的长度(Longest substring without repeating characters). 示例 1: 输入: "abcabcbb" 输出: 3 解释: 因为无重复字符的最长子串是 "abc",所

动态规划--求最大连续子数组的和（Python实现）&求解最大连续乘积字串（Python实现）

def MaxSum(self,array,n): sum=array[0] result=array[0] for i in range(0,n): if sum<0: sum=a[i] else: sum=sum+a[i] start1=i if sum>result: result=sum end=i start=start1 print result,start,end 上述采用的是动态规划思想:假设sum[i]表示以第i个元素结尾的最大连续字串,那么sum[i]=max{sum[i-

c#调用dll接口传递utf-8字串方法

1. 起源: VCU10之视频下载模块,采用纯python实现,c++代码调用pythonrun.h配置python运行环境启动python模块,以使界面UI能够使用其中功能. 棘手问题!用去一天时间反复打印日志,验证所传字串区别,以期望发现问题定位问题,直至下班前始有灵感. 验证发现,非中文字符可以正常下载,中文字符下载解析失败,当时即想到可能是字串不统一所致,就在python代码中做字串转换处理,均不奏效. 原接口封装代码如下: [DllImport("VideoDownloader&quo

NOIP2002 字串变换

题二字串变换 (存盘名: NOIPG2) [问题描述]: 已知有两个字串 A$, B$ 及一组字串变换的规则(至多6个规则): A1$ -> B1$ A2$ -> B2$ 规则的含义为:在 A＄中的子串 A1$ 可以变换为 B1$.A2$ 可以变换为 B2$ …. 例如:A$＝'abcd' B$＝'xyz' 变换规则为: ‘abc’->‘xu’ ‘ud’->‘y’ ‘y’->‘yz’ 则此时,A$ 可以经过一系列的变换变为 B$,其变换的过程为: ‘abcd’->‘x

hdu 4333 扩展kmp+kmp重复字串去重

题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=4333 关于kmp next数组求最短重复字串问题请看:http://www.cnblogs.com/z1141000271/p/7406198.html 扩展kmp请看:http://www.cnblogs.com/z1141000271/p/7404717.html 题目大意:一个数字,依次将第一位放到最后一位,问小于本身的数的个数及等于本身的个数和大于本身的个数,但是要注意重复的不再计算题解:

poj2406 kmp 求最小循环字串

Power Strings Time Limit: 3000MS Memory Limit: 65536K Total Submissions: 47748 Accepted: 19902 Description Given two strings a and b we define a*b to be their concatenation. For example, if a = "abc" and b = "def" then a*b = "

字串符相关 split() 字串符分隔 substring() 提取字符串 substr()提取指定数目的字符

split() 方法将字符串分割为字符串数组,并返回此数组. stringObject.split(separator,limit) 我们将按照不同的方式来分割字符串: 使用指定符号分割字符串,代码如下: var mystr = "www.imooc.com"; document.write(mystr.split(".")+"<br>"); document.write(mystr.split(".", 2)+&

C语言:自定义一个查找字串的功能函数，类似于<string.h>中的strstr()

//自定义一个字符串字串查找标准库函数strstr() #include<stdio.h> #include<string.h> char* myStrstr(char *str1,char *str2); int main() { char *str1 = "hello worl world ld"; char *str2 = " world "; puts(myStrstr(str1,str2)); return 0; } char *m