Mytophome Deal

using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace AnfleCrawler.DataAnalyzer
{
    internal class Mytophome : AnalyzerBase
    {
        protected override void AnalyzeInternal(PageLandEntity current)
        {
            var lander = Crawler.Lander;
            var pHandler = CreateContentHandler(current);
            switch (current.Depth)
            {
                case 0:
                    {
                        var dom = lander.GetDocument(pHandler);
                        var nextNode = QueryNode(dom.DocumentNode, "nobr").ParentNode;
                        nextNode.SetAttributeValue("id", PagingHack);
                        DoPerPaging(current, dom.DocumentNode, string.Format("#{0}", PagingHack));

                        foreach (var node in QueryNodes(dom.DocumentNode, ".deD_ctt li"))
                        {
                            var Nset = QueryNodes(node, "span").ToArray();
                            var hUrl = GetHref(QueryNode(Nset[1], "a"), current.Url);
                            var query = System.Web.HttpUtility.ParseQueryString(hUrl.Query);
                            string shid = query["estateId"];
                            hUrl = new Uri(string.Format("http://{0}/wiki/{1}/detail.html", hUrl.Authority, shid));
                            Guid housesID;
                            try
                            {
                                CheckHouses(hUrl, out housesID);
                            }
                            catch (HtmlNodeMissingException ex)
                            {
                                App.LogError(ex, "OrgUrl={0} HousesUrl={1}", shid, hUrl);
                                continue;
                            }

                            var vals = Nset.Select(p => p.InnerText.HtmlTrim()).ToArray();
                            DateTime? transactionDate = null;
                            DateTime dump;
                            if (DateTime.TryParse(vals.Last(), out dump))
                            {
                                transactionDate = dump;
                            }
                            if (vals.Length == 6)
                            {
                                Repository.SaveHouselisting(new HouselistingEntity()
                                {
                                    HousesID = housesID,
                                    TransactionDate = transactionDate,
                                    BuildingName = vals[2],
                                    Area = string.Format("{0}平方", vals[3]),
                                    SoldPriceOrRent = string.Format("{0}万", vals[4]),
                                    UnitPriceOrLease = string.Format("{0}元/平方", vals[5]),
                                });
                            }
                            else
                            {
                                Repository.SaveHouselisting(new HouselistingEntity()
                                {
                                    HousesID = housesID,
                                    TransactionDate = transactionDate,
                                    Area = string.Format("{0}平方", vals[2]),
                                    SoldPriceOrRent = string.Format("{0}万", vals[3]),
                                    UnitPriceOrLease = string.Format("{0}元/平方", vals[4]),
                                });
                            }
                            Crawler.OutWrite("保存小区出售记录 {0}", housesID);
                        }
                    }
                    break;
            }
        }

        private void CheckHouses(Uri housesUrl, out Guid housesID)
        {
            var pHandler = CreateContentHandler(new PageLandEntity()
            {
                Url = housesUrl,
                Depth = DataDepth.Houses
            });
            pHandler.AjaxBlocks.Add(HACK);
            var dom = Crawler.Lander.GetDocument(pHandler);
            var attrs = new AttributeFiller();

            attrs.Append(QueryTexts(dom.DocumentNode, ".xxjs_rbar_ct li"));

            housesID = GenHashKey(housesUrl.OriginalString);
            var bo = Crawler.Repository.LoadHouses(housesID);
            if (!string.IsNullOrEmpty(bo.SiteID))
            {
                return;
            }
            bo.SiteID = "Mytophome.com";
            bo.PageUrl = housesUrl.OriginalString;
            bo.CityName = Crawler.Config.CityName;
            attrs.FillEntity(bo, new Dictionary<string, string>()
            {
                {"楼盘名称", "小区名称"},
                {"楼盘地址", "小区地址"},
                {"发展商", "开发商"},
                {"物管公司", "物业公司"},
                {"物管电话", "物业办公电话"},
            });
            MapMark(bo);
            Crawler.Repository.Save(bo);
            Crawler.OutWrite("保存楼盘 {0}", bo.小区名称);
        }
    }
}
时间: 2024-11-08 21:53:13

Mytophome Deal的相关文章

DevOps is dirty work - What&#39;s the deal

什么是DevOps?终于又回到这个最初的问题. 第一次看到这个词的时候,还身陷于各种敏捷概念轰炸中.用“身陷”这个词其实并不准确,因为那个年代的我也是那些热情洋溢地无处不宣传敏捷的热血文艺青年中的一员.就像天生的一样,我从未接触或真正实践过瀑布模型.瀑布开发对我来说一直是书里的概念,各种流程背得滚瓜烂熟都是应付考试用的东西.打从第一脚踏入老东家N记,Scrum Master骄傲地带着我各楼层领略五颜六色的进度小纸条和大小各异的手写燃尽图的那一刻开始,我就被敏捷浸淫而无法自拔.N记也不愧为国内敏捷

“Software Architect” Has Only Lowercase a’s; Deal with It

? "Software Architect" Has Only Lowercase a's; Deal with It Barry Hawkins A diSAppoinTing TREnd has been in bloom for some time now within software development: the attempt to professionalize the practice of software architecture as one on par w

How to deal with the Unmerged paths

如果新提交的分支代码和master有冲突,先checkout到分支merge master,解决冲突, 然后再checkout master,merge 分支. (1) git checkout edit_package_page git merge master --no-ff 这时候会看到很多CONFLICT (add/add): Merge conflict in  <filenname> git status 查看所有的冲突文件 vim 每一个文件解决一下冲突 git add . gi

XML节点名称中有小数点处理(deal with dot)导致使用xpath时报错解决方法

1 <?xml version="1.0"?> 2 <ModifyFiles> 3 <_Layout.cshtml>123456</_Layout.cshtml> 4 <Contact.cshtml>@section MasterPart{"/Home/About"}</Contact.cshtml> 5 </ModifyFiles> XML节点中有小数点怎么办?怎么引用它的xpat

OK335xS canutils deal with compile error

/************************************************************************************** * OK335xS canutils deal with compile error * 声明: * 本文主要是记录解决编译can测试工具(canutils)时遇到的一些编译错误. * * 2015-9-4 晴 深圳 南山平山村 曾剑锋 *******************************************

Dooioo Deal

using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using System.Net; using System.Text; using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer { internal class Dooioo : AnalyzerBase { protected ov

Sustain Broadsheet Bags Affordability As A Good Deal

The final classified generally as Honma Beres place in Japan EMPLOYEES bags for sale but ?? I love the ability to jump in and take the set. For someone who is 10-15 HCP, I think it could be a game? .Torba There are more players than teams JAPAN Berc,

What is &quot;found.000&quot; ? How to deal with it?

最近在ubuntu系统中发现双系统的win盘中有一些文件夹,名字是"found.000",甚是疑惑,遂查而记之. found.000文件夹里面的一些后缀名为CHK的文件是你在使用"磁盘碎片整理程序"整理硬盘后所产生的"丢失簇的恢复文件". what is "found.000"? 在c:\windows下有很多以fff开头的文件是由Mdm.exe(Machine Debug Manager)这个程序产生的.Mdm.exe的主要

android how to deal with data when listview refresh

如何解决listview数据刷新,下拉刷新,上拉加载更多时,图片不闪烁. 在Activity的onResume()方法中将adaper和listView重新再绑定一次. listView.setAdapter(adapter); adapter.notifyDataSetChanged(); http://www.eoeandroid.com/forum.php?mod=viewthread&tid=541113&extra=&ordertype=1 public class Ho