1.基本用法
#!/usr/bin/env python # coding=utf-8 import re # example 1 text ="fjsk test\t fjskd bar\t \ttest" regex = re.compile(‘\s+‘) print regex.split(text) # example 2 email =""" [email protected] [email protected] [email protected] [email protected] """ pattern = r‘[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z.0-9]{2,6}‘ regex = re.compile(pattern,flags=re.IGNORECASE) # get all print regex.findall(email) # get the first one m = regex.search(text) # print email[m.start():m.end()] print m # replace print regex.sub(‘RECORD‘,email) 显示:
[email protected]:~/workplace/python/test$ python regex.py [‘fjsk‘, ‘test‘, ‘fjskd‘, ‘bar‘, ‘test‘] [‘[email protected]‘, ‘[email protected]‘, ‘[email protected]‘, ‘[email protected]‘] None RECORD RECORD RECORD RECORD
2. 分组,返回元组
#example 3 pattern = r‘([A-Z0-9._%+=]+)@([A-Z0-9.-]+)\.([A-Z.]{2,5})‘ regex = re.compile(pattern,flags=re.IGNORECASE) m = regex.match(‘[email protected]‘) print m.groups() print regex.findall(email)
# output
(‘name‘, ‘domain‘, ‘suffi‘) [(‘jfksdfasm‘, ‘qq‘, ‘com‘), (‘test‘, ‘gamil‘, ‘com‘), (‘jfdskf‘, ‘163‘, ‘com‘), (‘jkmiao‘, ‘yahoo.com‘, ‘cn‘)]
3.给分组加名称,返回字典
#example 4 regex = re.compile(r""" (?P<userame>[A-Z0-9._%+-]+) @(?P<domain>[A-Z0-9.-]+) \. (?P<suffix>[A-Z0-9.]{2,4}) """,flags=re.IGNORECASE|re.VERBOSE) m = regex.match("[email protected]") print m.groupdict() print regex.findall(email) # output
[email protected]:~/workplace/python/test$ python regex.py {‘domain‘: ‘sysu‘, ‘userame‘: ‘jkmaio‘, ‘suffix‘: ‘com‘} [(‘jfksdfasm‘, ‘qq‘, ‘com‘), (‘test‘, ‘gamil‘, ‘com‘), (‘jfdskf‘, ‘163‘, ‘com‘), (‘jkmiao‘, ‘yahoo.com‘, ‘cn‘)]
时间: 2024-11-13 23:24:49