php采集windows 10 app的信息

<?php
/**
 * test new page crawler
 */

#$url = ‘http://apps.microsoft.com/windows/en-us/app/fotor/6f797ba2-500d-4dee-9c5a-13c2d818c958‘;
$url = ‘https://www.microsoft.com/en-us/store/apps/adobe-photoshop-express/9wzdncrfj27n‘;

$url = trim($url);
$d = array();

$content = html_entity_decode(get($url),ENT_HTML5,‘UTF-8‘);

//pfn
$pfn = ‘‘;
if(preg_match(‘/data-pfn="(.*)">/isU‘, $content, $match)){
    $d[] = $match[1];
    $pfn = $match[1];
    echo "pfn:".$pfn."\n";
}else {
    echo ‘pfn error:‘,$url,"\n";
    exit();
}

//new url
if(preg_match(‘/\[url:(.*)\]/is‘, $content,$match)){
	$newurl = $match[1];
	echo "url:".$newurl."\n";
}else{
	echo ‘get no new url‘."\n";
	exit();
}

//icon
if(preg_match(‘/class="pull-left ph-logo">.*src="(.*)".*style="background-color:(.*);.*"/isU‘, $content,$match)){
	$icon = $match[1];
	$backgroundcolor = $match[2];
	echo "icon:".$icon."\n";
	echo "backgroundcolor:".$backgroundcolor."\n";
}else{
	echo ‘get no icon‘."\n";
	exit();
}

//name
if(preg_match(‘/id="page-title".*itemprop="name">(.*)<\//‘,$content,$match)){
	$name = $match[1];
	echo "name:".$name."\n";
}else{
	echo ‘get no name‘."\n";
	exit();
}

//alias
if(preg_match(‘/apps\/(.*)\//isU‘,$newurl,$match)){
	$alias = $match[1];
	echo "alias:".$alias."\n";
}else{
	echo ‘get no alias‘."\n";
	exit();
}

//rating
if(preg_match(‘/class="srv_ratingsScore win-rating-average">(.*)<\//‘,$content, $match)){
	$rating = $match[1];
	echo "rating:".$rating."\n";
}else{
	echo ‘get no rating‘."\n";
	exit();
}

//rating num
if(preg_match(‘/class="win-rating-total">(.*)<\//‘,$content, $match)){
	$ratingcount = trim(str_replace(‘ratings‘,‘‘,preg_replace(‘/,/‘,‘‘, $match[1])));
	echo "Rating num:".$ratingcount."\n";
}else{
	echo ‘get no rating num‘."\n";
	exit();
}

//price
if(preg_match(‘/class="price srv_price"><span class="header-sub">(.*)<\//‘,$content, $match)){
	$price = $match[1];
	echo "prcie:".$price."\n";
}else{
	echo ‘get no price‘."\n";
	exit();
}

//category
if(preg_match(‘/<meta name="ms.prod_sbcat" content="(.*)" \/>/isU‘,$content, $match)){
	$category = trim($match[1]);
	echo "category:".$category."\n";
}else{
	if(preg_match(‘/<meta name="ms.prod_cat" content="(.*)" \/>/isU‘,$content, $match)){
		$category = trim($match[1]);
		echo "category:".$category."\n";
	}else{
		echo ‘get no category‘."\n";
		exit();
	}

}

//content rating
if(preg_match(‘/Content Rating: <a .*>(.*)<\//isU‘,$content, $match)){
	$contentRating = trim($match[1]);
	echo "content rating:".$contentRating."\n";
}else{
	echo ‘get no content rating‘."\n";
	$contentRating = ‘all‘;
}

//publisher
if(preg_match(‘/Publisher<\/dt>.*<div class="content.*".*>(.+)<\//isU‘,$content, $match)){
	$publisher = trim($match[1]);
	echo "publisher:".$publisher."\n";
}else{
	echo ‘get no publisher‘."\n";
	exit();
}

//works on
if(preg_match(‘/Works on: (.*)</isU‘, $content,$match)){
	$workson = trim($match[1]);
	echo ‘works on:‘.$workson."\n";
}else{
	echo ‘get no works platform‘."\n";
	//exit();
}

//size
if(preg_match(‘/Approximate size<\/dt>.*<div class="content.*".*>(.+)<\//isU‘,$content, $match)){
	$size = trim($match[1]);
	echo "size:".$size."\n";
}else{
	echo ‘get no size‘."\n";
	//exit();
}

//supportedprocessors
if(preg_match(‘/Supported processors<\/dt>.*<div class="content.*".*>(.+)<\//isU‘,$content, $match)){
	$processors = trim($match[1]);
	echo "processors:".$processors."\n";
}else{
	echo ‘get no processors‘."\n";
	//exit();
}

//age
if(preg_match(‘/Age rating<\/dt>.*<div class="content.*".*>(.+)<\//isU‘,$content, $match)){
	$age = trim($match[1]);
	echo "age:".$age."\n";
}else{
	echo ‘get no age‘."\n";
	//exit();
}

//languages
if(preg_match(‘/Supported languages<\/dt>.*<dd .*>(.*)<\/dd>/isU‘,$content,$match)){
	if(preg_match_all(‘/<div>([^<].*)<\/div>/‘,$match[1],$temp)){
		$languages = implode(",",$temp[1]);
		echo "languages:".$languages."\n";
	}
}else{
	echo ‘get no languages‘."\n";
	//exit();
}

//features
if(preg_match(‘/class="section-title.*">Features.*<ul>(.*)<\/ul>/isU‘,$content,$match)){
	if(preg_match_all(‘/<li class="avoid-break">(.*)<\/li>/isU‘, $match[1], $temp)){
		$features = $temp[1];
		echo ‘features:‘;
		print_r($features);
		echo "\n";
	}
}else{
	echo ‘get no features‘."\n";
}

//release notes
if(preg_match(‘/class="section-title.*">Version Notes.*<p>(.*)<\/p>/isU‘,$content,$match)){
	$releasenotes = $match[1];
	echo "release Notes:".$releasenotes."\n";
}else{
	echo ‘no version notes‘."\n";
}

//screenshots
if(preg_match_all(‘/class="media-img ratio-16-9">.*<img src="(.*)".*\/>/isU‘, $content, $match)){
	$screenshots = $match[1];
	echo "screenshots:";
	print_r($screenshots);
	echo "\n";
}else{
	echo ‘get no screenshots‘."\n";
	exit();
}

//description
if(preg_match(‘/<div class="showmore m-t-pdp">.*<p.*>(.*)<\//isU‘, $content,$match)){
	$description = $match[1];
	echo ‘description:‘.$description."\n";
}else{
	echo "get no description content\n";
	exit();
}

exit();

function get($url) {
        $ch = curl_init ($url);

        curl_setopt($ch, CURLOPT_USERAGENT, ‘Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11‘);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
        $output = curl_exec ($ch);
        $curlinfo = curl_getinfo($ch);
        $lasturl = $curlinfo[‘url‘];
        curl_close($ch);

        return $output."[url:$lasturl]";
}

成品站:www.topwindata.com    ,windows 10 一发布,流量就翻番了,不过还是只有1000左右ip。

时间: 2024-10-02 09:47:49

php采集windows 10 app的信息的相关文章

打造理想的Windows 10 APP开发环境的5个步骤

(此文章同时发表在本人微信公众号"dotNET每日精华文章",欢迎右边二维码来关注.) 题记:微软即将发布Windows 10手机版,实际上很多人现在已经开始在开发Windows 10 APP了.今天推荐的文章简单介绍了如何搭建一个理想的开发环境. 周末一个大学同学到访成都,所以停更了2天.今天时间也不多,就推荐一个简单的文章,作者Michael Crump在这篇文章中分享了如何打造理想的Windows 10 APP开发环境的5个步骤. 分别是: 安装一个干净的系统.要更好的使用Win

TroubleShoot: Enable Developer Mode in Windows 10 Insider Preview Build 10074

There is a known issue in Windows 10 Insider Preview build 10074 (see here). Developers cannot enable Developer Mode in the Settings app for installing and testing apps on this build. We’ll enable this in an upcoming build. In the meantime, you will

背水一战 Windows 10 (64) - 控件(WebView): 加载指定 HttpMethod 的请求, 自定义请求的 http header, app 与 js 的交互

[源码下载] 作者:webabcd 介绍背水一战 Windows 10 之 控件(WebView) 加载指定 HttpMethod 的请求 自定义请求的 http header app 与 js 的交互 示例1.演示 WebView 如何加载指定 HttpMethod 的请求以及如何自定义请求的 http headerWebApi/Controllers/WebViewPostController.cs /* * 用于 WebView 演示“如何加载指定 HttpMethod 的请求,以及如何自

[转载]在 Windows 10 中, 如何卸载和重新安装 OneNote App

在 Windows 10 中, 如何卸载和重新安装 OneNote App 15/8/2015 使用 PowerShell 命令卸载 OneNote App 开始菜单 -> 输入 "Powershell" -> 右键点击菜单 -> 用管理员运行 -> 输入以下命令行 Get-AppxPackage *microsoft.office.onenote* | Remove-AppxPackage 使用 Windows 商店重新安装 OneNote App 打开 Wi

Windows 10 版本信息

原文 https://technet.microsoft.com/zh-cn/windows/release-info Windows 10 版本信息 Microsoft 已更新其服务模型. 半年频道每年发布两次功能更新,时间大概在 3 月和 9 月,每个版本的服务时间线为 18 个月. 从 Windows 10 版本 1703 开始,半年频道取代了 Current Branch (CB) 和 Current Branch for Business (CBB) 概念,它已于 2017 年 7 月

背水一战 Windows 10 (82) - 用户和账号: 获取用户的信息, 获取用户的同意

原文:背水一战 Windows 10 (82) - 用户和账号: 获取用户的信息, 获取用户的同意 [源码下载] 作者:webabcd 介绍背水一战 Windows 10 之 用户和账号 获取用户的信息 获取用户的同意 示例1.演示如何获取用户的信息UserAndAccount/UserInfo.xaml <Page x:Class="Windows10.UserAndAccount.UserInfo" xmlns="http://schemas.microsoft.c

背水一战 Windows 10 (122) - 其它: 通过 Windows.System.Profile 命名空间下的类获取信息, 查找指定类或接口的所在程序集的所有子类和子接口

[源码下载] 作者:webabcd 介绍背水一战 Windows 10 之 其它 通过 Windows.System.Profile 命名空间下的类获取信息 查找指定类或接口的所在程序集的所有子类和子接口 示例1.演示如何通过 Windows.System.Profile 命名空间下的类获取信息Information/ProfileInfo.xaml <Page x:Class="Windows10.Information.ProfileInfo" xmlns="htt

信息世界新纪元,Windows 10开启

以下是关于Windows 10名称,仅仅是个人观点. 最近朋友圈里都是关于下一个Windows版本名称的讨论,其实我觉得没必要太纠结这个事,经过几天的试用,已经改变了试用之初的无力吐槽的观点,感觉还是确实不错的,缺点当然还是蛮多的,毕竟是技术预览版本,不用过于挑剔啦. 前两天还在和朋友讨论,为什么桌面下还要留下Metro的应用,这是多么鸡肋啊!当时讨论无解.但是现在换个思路想想,微软希望提供的是,一个一致性的设备操作系统,它可以覆盖台式机(或工作站).笔记本(或超极本).平板电脑.手机.以及嵌入

微软 Windows 10 物联网版系统 IoT 介绍 树莓派2 可以装一下

微软在发布Win10的同时,还发布了 微软 Windows 10 物联网版系统  IoT . 大家可以下载用虚拟机玩一下,很不错哟,看来Android和IOS也有新的竞争对手了,有智能家居的可以试试. 有玩树莓派2 的也可以下载装一下. 官方地址:https://dev.windows.com/zh-cn/iot 下载地址:https://dev.windows.com/zh-cn/downloads 物联网 物联网 (IoT) 将设备.传感器.云.数据和你的想象力集合在一起.构构建你最关注的内