ITPub博客

首页 > 大数据 > 数据挖掘 > Python抓取淘宝IP地址数据

Python抓取淘宝IP地址数据

数据挖掘 作者:阿布多abu 时间:2019-04-26 17:19:17 0 删除 编辑
def fetch(ip):
    url = '
    result = []    try:
        response = urllib.urlopen(url).read()
        jsondata = json.loads(response)        if jsondata[u'code'] == 0:
            result.append(jsondata[u'data'][u'ip'].encode('utf-8'))            
            result.append(jsondata[u'data'][u'country'].encode('utf-8'))
            result.append(jsondata[u'data'][u'country_id'].encode('utf-8'))
            result.append(jsondata[u'data'][u'area'].encode('utf-8'))
            result.append(jsondata[u'data'][u'area_id'].encode('utf-8'))
            result.append(jsondata[u'data'][u'region'].encode('utf-8'))
            result.append(jsondata[u'data'][u'region_id'].encode('utf-8'))
            result.append(jsondata[u'data'][u'city'].encode('utf-8'))
            result.append(jsondata[u'data'][u'city_id'].encode('utf-8'))
            result.append(jsondata[u'data'][u'county'].encode('utf-8'))
            result.append(jsondata[u'data'][u'county_id'].encode('utf-8'))
            result.append(jsondata[u'data'][u'isp'].encode('utf-8'))
            result.append(jsondata[u'data'][u'isp_id'].encode('utf-8'))            
        else:            return 0, result    except:
        logging.exception("Url open failed:" + url)        return 0, result    return 1, result 
def worker(ratelimit, jobs, results, progress):    global cancel    while not cancel:        try:
            ratelimit.ratecontrol()
            ip = jobs.get(timeout=2) # Wait 2 seconds
            ok, result = fetch(ip)            if not ok:
                logging.error("Fetch information failed, ip:{}".format(ip))
                progress.put("") # Notify the progress even it failed
            elif result is not None:
                results.put(" ".join(result))
            jobs.task_done()    # Notify one item
        except Queue.Empty:            pass
        except:
            logging.exception("Unknown Error!")
def process(target, results, progress):    global cancel    while not cancel:        try:
            line = results.get(timeout=5)        except Queue.Empty:            pass
        else:            print >>target, line
            progress.put("")
            results.task_done()
def progproc(progressbar, count, progress):    """
    Since ProgressBar is not a thread-safe class, we use a Queue to do the counting job, like
    two other threads. Use this thread do the printing of progress bar. By the way, it will
    print to stderr, which does not conflict with the default result output(stdout).    """
    idx = 1    while True:        try:
            progress.get(timeout=5)        except Queue.Empty:            pass
        else:
            progressbar.update(idx)
            idx += 1


来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/69903461/viewspace-2642675/,如需转载,请注明出处,否则将追究法律责任。

请登录后发表评论 登录
全部评论

注册时间:2018-12-21

  • 博文量
    24
  • 访问量
    18912