IOS使用正则表达式去掉html中的标签元素,获得纯文本
content是根据网址获得的网页源码字符串
NSRegularExpression
*regularExpretion=[
NSRegularExpression
regularExpressionWithPattern
:
@"<[^>]*>|\n"
options
:
0
error
:nil
];
content=[regularExpretion
stringByReplacingMatchesInString
:content
options
:
NSMatchingReportProgress
range
:NSMakeRange(
0
, content
.length
)
withTemplate
:
@"-"
];
//替换所有html和换行匹配元素为"-"
regularExpretion=[
NSRegularExpression
regularExpressionWithPattern
:
@"-{1,}"
options
:
0
error
:nil
] ;
content=[regularExpretion
stringByReplacingMatchesInString
:content
options
:
NSMatchingReportProgress
range
:NSMakeRange(
0
, content
.length
)
withTemplate
:
@"-"
];
//把多个"-"匹配为一个"-"
//根据"-"分割到数组
NSArray
*arr=[
NSArray
array
];
content=[
NSString
stringWithString
:content];
arr = [content
componentsSeparatedByString
:
@"-"
];
NSMutableArray
*marr=[
NSMutableArray
arrayWithArray
:arr];
[marr
removeObject
:
@""
];
return
marr;