使用.NET实现一个从大众点评抓取一些基础数据(商家名称,地址,电话,经纬度)的小程序。
实现逻辑:
1、以异步方式从列表上获取前三项(商家名称、地址、电话);
2、把获取下来的数据保存到数据库表里;
3、把存在数据表里的地址信息读取出来,通过调用QQ地图API把地址转化成经纬度;
4、按行更新GIS信息。
代码如下:
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Data;
using System.Data.SqlClient;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Xml;namespace DianPing_MeiFa
{
public partial class test : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
// string url = "http://www.dianping.com/search/category/2/50/p";
//this.lblNames.Text = "begin...";
//for (int i = 1; i < 51; i++)//循环分页
//{
// string url = "http://www.dianping.com/search/category/2/50/p";
// url += i;
// this.SaveMeiFaData(url);
//}
this.setMapLocation();
}/// <summary>
/// 获取数据
/// </summary>
/// <returns></returns>
private DataTable GetMeifa()
{
string sql = "SELECT * FROM t_meifa";
DataSet ds = SqlHelper.ExecuteDataset(SqlHelper.GetConnection(), CommandType.Text, sql);
return ds.Tables[0];
}private void setMapLocation()
{
DataTable dt = this.GetMeifa();
IList<MeiFa> mfList = new List<MeiFa>();
//将DataTable转化成对象
foreach (DataRow dr in dt.Rows)
{
MeiFa mf = new MeiFa
{
Id = int.Parse(dr["id"].ToString()),
Name = dr["name"].ToString(),
Address = dr["address"].ToString()
};
mfList.Add(mf);
}//设置获经纬度
foreach (MeiFa mf in mfList)
{
QQMapGeocoder qmg = this.GetGeocoder(mf.Address);
mf.lat = qmg.result.location.lat;
mf.lng = qmg.result.location.lng;this.UpdateMeiFaLocation(mf);
}}
private void UpdateMeiFaLocation(MeiFa mf)
{
string sql = "UPDATE t_meifa_bak SET [email protected],[email protected] WHERE [email protected]";
SqlParameter[] sps ={
new SqlParameter("@lng",mf.lng),
new SqlParameter("@lat",mf.lat),
new SqlParameter("@id",mf.Id),
};SqlHelper.ExecuteNonQuery(SqlHelper.GetConnection(), CommandType.Text, sql, sps);
}/// <summary>
/// 根据地理位置获取经纬度
/// </summary>
/// <param name="address">地址</param>
/// <returns></returns>
private QQMapGeocoder GetGeocoder(string address)
{
string apiMapUrl = "http://apis.map.qq.com/ws/geocoder/v1/?region=北京&address={0}&key=Y5QBZ-DEDR4-3W3U7-XL37W-VVMT6-3KB6K";
apiMapUrl = string.Format(apiMapUrl, address);
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(apiMapUrl);
request.Method = "GET";HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream newstream = response.GetResponseStream();
StreamReader srRead = new StreamReader(newstream, Encoding.UTF8);
string json = srRead.ReadToEnd();
QQMapGeocoder qmg = QQMapGeocoder.DeserializeGeocoder(json);
return qmg;
}/// <summary>
/// 将列表保存到数据库
/// </summary>
/// <param name="url"></param>
private void SaveMeiFaData(string url)
{
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
request.Method = "GET";
request.KeepAlive = true;
request.ContentType = "application/x-www-form-urlencoded";request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
request.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.2; zh-CN; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8";HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream newstream = response.GetResponseStream();
StreamReader srRead = new StreamReader(newstream, Encoding.UTF8);
string outString = srRead.ReadToEnd();IList<MeiFa> mfList = this.getMeiFaList(outString);
this.InsertDb(mfList);
}private string GetContent(IList<string> list)
{
string str = string.Empty;
foreach (string s in list)
{
str += s;
}
return str;
}/// <summary>
/// 将列表数据转化成对象
/// </summary>
/// <param name="html"></param>
/// <returns></returns>
public IList<MeiFa> getMeiFaList(string html)
{
IList<MeiFa> mfList = new List<MeiFa>();
string reg = @"<ul[^>]*class=""detail""[^>]*>[\s\S]*?</ul>";
MatchCollection mc = Regex.Matches(html, reg);
foreach (Match m in mc)
{
string strDom = m.Value;
strDom = strDom.Replace(" ", " ");
MeiFa mf = MeiFa.CreateMeifa(strDom);
mfList.Add(mf);
}
return mfList;
}public string ReplaceHtml(string HTMLStr)
{
return Regex.Replace(HTMLStr, "<[^>]*>", "");
}public void InsertDb(IList<MeiFa> mfs)
{
foreach (var mf in mfs)
{
if (string.IsNullOrEmpty(mf.Name))
continue;try
{
this.InsertDb(mf);
}
catch
{
continue;
}
}
}/// <summary>
/// 插入到数据表里
/// </summary>
/// <param name="mf"></param>
public void InsertDb(MeiFa mf)
{
string sql = "INSERT INTO t_meifa(name,address,tel) values(@name,@address,@tel)";SqlParameter[] sps ={
new SqlParameter("@name",mf.Name),
new SqlParameter("@address",mf.Address),
new SqlParameter("@tel",mf.Tel),
};SqlHelper.ExecuteNonQuery(SqlHelper.GetConnection(), CommandType.Text, sql, sps);
}public class MeiFa
{
public int Id { get; set; }
public string Name { get; set; }
public string Address { get; set; }
public string Tel { get; set; }
public float? lng { get; set; }
public float? lat { get; set; }public MeiFa()
{}
public static MeiFa CreateMeifa(string domStr)
{
MeiFa m = new MeiFa();
try
{
Debug.WriteLine(domStr);XmlDocument dom = new XmlDocument();
dom.LoadXml(domStr);XmlNode nameNode = dom.SelectSingleNode("//ul/li[@class=\"shopname\"]/a[@data-hippo-type=\"shop\"]");
m.Name = nameNode.InnerText;XmlNode addressNode = dom.SelectSingleNode("//ul/*/li[@class=\"address\"]");
string at = ReplaceAddress(addressNode.InnerText);
string[] ats = getArr(at);
m.Address = ats[0];
m.Tel = ats[1];
}
catch
{
m = new MeiFa();
}
return m;
}
/// <summary>
/// 生成数组
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
static private string[] getArr(string str)
{
string[] tempArr = str.Split(‘ ‘);
List<string> ss = new List<string>();
for (int i = 0; i < tempArr.Length; i++)
{
if (!string.IsNullOrEmpty(tempArr[i]))
{
ss.Add(tempArr[i]);
}
}return ss.ToArray();
}static private string ReplaceAddress(string s)
{
s = s.Replace("地址:", "");
s = s.Replace("\n", "");
return s;
}
}
}
}
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;namespace DianPing_MeiFa
{
//[JsonProperty]
public class QQMapGeocoder
{
public int status { get; set; }
public string message { get; set; }
public QQMapResult result { get; set; }public static QQMapGeocoder DeserializeGeocoder(string jsonStr)
{
JsonSerializerSettings jsz = new JsonSerializerSettings();
QQMapGeocoder qg = JsonConvert.DeserializeObject<QQMapGeocoder>(jsonStr, jsz);
return qg;
}
}public class QQMapResult
{
public QQMapLocation location { get; set; }
public QQMapAddressComponents address_components { get; set; }
public string similarity { get; set; }
}public class QQMapLocation
{
public float? lng { get; set; }
public float? lat { get; set; }
}public class QQMapAddressComponents
{
public string province { get; set; }
public string city { get; set; }
public string district { get; set; }
public string street { get; set; }
public string street_number { get; set; }}
}
注:仅供学习使用!