通过scrapyd定时调度爬虫,post远程开启服务器上的scrapy爬虫

wylc123 1年前 ⋅ 1078 阅读
#!usr/bin/env python
# -*- coding: utf-8 -*-
#!文件类型: python
#!创建时间: 2021-08-05 17:18
#!作者: SongBin
#!文件名称: sipderSchedule.py
#!简介:通过scrapyd定时调度爬虫
#!来源:https://www.daxueyiwu.com
import datetime
import threading
import urllib.request
import time
import schedule as schedule


def aiqSpider():
    # 启动爬虫
    print((datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "#############AiqSipder调度程序开始运行###########"))
    url = "http://192.168.20.150:6800/schedule.json"
    header = {"User-Agent": "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.7.62 Version/11.01"}
    formData = {'project': 'aiqSpider', 'spider': 'aiq'}
    # 将str类型转换为bytes类型
    data = urllib.parse.urlencode(formData).encode("utf-8")
    # 以下是post请求
    request = urllib.request.Request(url, data=data, headers=header)
    print(urllib.request.urlopen(request).read().decode("utf-8"))

    # 查看日志
    # 以下是get请求
    myproject = "aiqSpider"
    requrl = "http://192.168.20.150:6800/listjobs.json?project=" + myproject
    req = urllib.request.urlopen(requrl)
    res = req.read()  # res 是str类型
    print(res)
    print((datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "#############AiqSipder调度程序运行结束###########"))
def job():
    aiqSpider()
def run_threaded(job_func):
    job_thread = threading.Thread(target=job_func)
    job_thread.start()

schedule.every().day.at("00:01").do(run_threaded, job)
schedule.every().day.at("10:00").do(run_threaded, job)
if __name__ == '__main__':
    # aiqSpider()
  while True:
    schedule.run_pending()
    time.sleep(10)

相关文章推荐

全部评论: 0

    我有话说: