#!usr/bin/env python
# -*- coding: utf-8 -*-
#!文件类型: python
#!创建时间: 2021-08-05 17:18
#!作者: SongBin
#!文件名称: sipderSchedule.py
#!简介:通过scrapyd定时调度爬虫
#!来源:https://www.daxueyiwu.com
import datetime
import threading
import urllib.request
import time
import schedule as schedule
def aiqSpider():
# 启动爬虫
print((datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "#############AiqSipder调度程序开始运行###########"))
url = "http://192.168.20.150:6800/schedule.json"
header = {"User-Agent": "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.7.62 Version/11.01"}
formData = {'project': 'aiqSpider', 'spider': 'aiq'}
# 将str类型转换为bytes类型
data = urllib.parse.urlencode(formData).encode("utf-8")
# 以下是post请求
request = urllib.request.Request(url, data=data, headers=header)
print(urllib.request.urlopen(request).read().decode("utf-8"))
# 查看日志
# 以下是get请求
myproject = "aiqSpider"
requrl = "http://192.168.20.150:6800/listjobs.json?project=" + myproject
req = urllib.request.urlopen(requrl)
res = req.read() # res 是str类型
print(res)
print((datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "#############AiqSipder调度程序运行结束###########"))
def job():
aiqSpider()
def run_threaded(job_func):
job_thread = threading.Thread(target=job_func)
job_thread.start()
schedule.every().day.at("00:01").do(run_threaded, job)
schedule.every().day.at("10:00").do(run_threaded, job)
if __name__ == '__main__':
# aiqSpider()
while True:
schedule.run_pending()
time.sleep(10)
注意:本文归作者所有,未经作者允许,不得转载