1 use LWP::Simple; 2 use HTML::LinkExtor; 3 4 $html_code = get("https://tieba.baidu.com/p/4929234512"); 5 $img_link = HTML::LinkExtor->new(\&IMG); 6 $img_link->parse($html_code); 7 8 #爬图片链接 9 sub IMG{ 10 ($tag, %links) = @_; 11 if($tag eq ‘img‘){ 12 #如里是图片标签 13 foreach $key(keys %links){ 14 print "$key -> $links{$key}\n" 15 } 16 } 17 } 18 19 20 21 # src -> https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/343a66656e6768756f7069616e323031af7c 22 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 23 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 24 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 25 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 26 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 27 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 28 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 29 # src -> https://ss0.bdstatic.com/9r-1bjml2gcT8tyhnq/fc-feed/0/pic/51d89e69dd318a8c2bcb07341879ac64.jpg 30 # src -> https://ss0.bdstatic.com/9r-1bjml2gcT8tyhnq/fc-feed/0/pic/223a419756a2209b84f8f306d021a4a5.jpg 31 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 32 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 33 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 34 # src -> https://gsp0.baidu.com/5aAHeD3nKhI2p27j8IqW0jdnxx1xbK/tb/editor/images/client/image_emoticon25.png 35 # src -> https://gsp0.baidu.com/5aAHeD3nKhI2p27j8IqW0jdnxx1xbK/tb/editor/images/client/image_emoticon25.png 36 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 37 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 38 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 39 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 40 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 41 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 42 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg 43 # src -> https://imgsa.baidu.com/forum/pic/item/d933c895d143ad4bcf1ab5478b025aafa40f0604.jpg 44 # src -> https://imgsa.baidu.com/forum/pic/item/78f0f736afc379319921ed85e2c4b74542a911d4.jpg 45 # src -> https://imgsa.baidu.com/forum/pic/item/2f2eb9389b504fc23bf50aaaecdde71191ef6df3.jpg 46 # src -> https://imgsa.baidu.com/forum/pic/item/d100baa1cd11728ba5c4656bc1fcc3cec2fd2c8a.jpg 47 # src -> https://imgsa.baidu.com/forum/pic/item/2df5e0fe9925bc31b71993f157df8db1cb137017.jpg
当然, 你还可以加一下正则, 去掉不是http://开头的也行
时间: 2024-10-14 15:08:43