-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
106 lines (77 loc) · 2.93 KB
/
main.py
File metadata and controls
106 lines (77 loc) · 2.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import requests
import os
import re
import time
import random
from threading import Thread
from CrawlerStruct import SearchResult
from CrawlerAbs import CrawlerAbs, movie_queue
from requests.packages import urllib3
from lxml import etree
# 关闭警告
urllib3.disable_warnings()
class SakulaCrawler(CrawlerAbs):
def __init__(self) -> None:
CrawlerAbs.__init__(self)
def Search(self, keyWord):
json = {"m": "search", "c": "index", "a": "init", "q": keyWord}
res = requests.post(
"http://www.yinghuacd.com/search/{}".format(keyWord), json=json
)
result = etree.HTML(res.text)
resultHrefs = result.xpath('.//div[@class="lpic"]//li/a/@href')
resultNames = result.xpath('.//div[@class="lpic"]//li/a/img/@alt')
self.searchResult = SearchResult(resultNames, resultHrefs).data
def Select_Movie(self):
for result in self.searchResult:
print("{}.{}".format(result["index"], result["name"]))
while True:
try:
index = int(input()) - 1
if index > len(self.searchResult):
print("请输入存在的序号")
continue
break
except ValueError:
print("请输入数字序号")
# index = 2
self.targetHref = self.searchResult[index]["href"]
self.targetName = self.searchResult[index]["name"]
def Select_Ep(self):
print("选择的电影为:{}".format(self.targetName))
detail_page = requests.get("http://www.yinghuacd.com/" + self.targetHref)
detail_page.encoding = "utf-8"
Ep_page_res = etree.HTML(detail_page.text)
Ep_hrefs = Ep_page_res.xpath('//div[@class="movurl"]/ul/li/a/@href')
Ep_dic = {}
for index, ep in enumerate(Ep_hrefs):
Ep_dic.update({index + 1: "http://www.yinghuacd.com" + ep})
for item in Ep_dic:
print("EP" + str(item))
targetEp = self.get_list_from_input()
def get_m3u8_url(index):
ep_page = requests.get(Ep_dic[index])
ep_page.encoding = "utf-8"
ep_m3u8_page_url = etree.HTML(ep_page.text).xpath(
'//div[@id="playbox"]/@data-vid'
)[0]
a = requests.get(
"https://tup.yinghuacd.com/", params={"vid": ep_m3u8_page_url}
)
a.encoding = "utf-8"
m3u8_url = re.findall('url: "(.*?)",', a.text)[0]
movie_queue.put({"ep": index, "m3u8": m3u8_url}, timeout=5)
threads = []
if -1 in targetEp:
indexs = range(len(Ep_dic))
else:
indexs = targetEp
for i in indexs:
t = Thread(target=get_m3u8_url, args=(i + 1,))
threads.append(t)
t.start()
for t in threads:
t.join()
if __name__ == "__main__":
MovieDownloader = SakulaCrawler()
MovieDownloader.start()