午夜剧场伦理_日本一道高清_国产又黄又硬_91黄色网战_女同久久另类69精品国产_妹妹的朋友在线

您的位置:首頁技術文章
文章詳情頁

python 爬取影視網站下載鏈接

瀏覽:154日期:2022-06-18 09:31:11
目錄項目地址:運行效果導入模塊爬蟲主代碼完整代碼項目地址:

https://github.com/GriffinLewis2001/Python_movie_links_scraper

運行效果

python 爬取影視網站下載鏈接

python 爬取影視網站下載鏈接

導入模塊

import requests,refrom requests.cookies import RequestsCookieJarfrom fake_useragent import UserAgentimport os,pickle,threading,timeimport concurrent.futuresfrom goto import with_goto爬蟲主代碼

def get_content_url_name(url): send_headers = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8'} cookie_jar = RequestsCookieJar() cookie_jar.set('mttp', '9740fe449238', domain='www.yikedy.co') response=requests.get(url,send_headers,cookies=cookie_jar) response.encoding=’utf-8’ content=response.text reg=re.compile(r’<a rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' ’) url_name_list=reg.findall(content) return url_name_listdef get_content(url): send_headers = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8'} cookie_jar = RequestsCookieJar() cookie_jar.set('mttp', '9740fe449238', domain='www.yikedy.co') response=requests.get(url,send_headers,cookies=cookie_jar) response.encoding=’utf-8’ return response.textdef search_durl(url): content=get_content(url) reg=re.compile(r'{’x64x65x63x72x69x70x74x50x61x72x61x6d’:’(.*?)’}') index=reg.findall(content)[0] download_url=url[:-5]+r’/downloadList?decriptParam=’+index content=get_content(download_url) reg1=re.compile(r’title='.*?' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' ’) download_list=reg1.findall(content) return download_listdef get_page(url): send_headers = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8'} cookie_jar = RequestsCookieJar() cookie_jar.set('mttp', '9740fe449238', domain='www.yikedy.co') response=requests.get(url,send_headers,cookies=cookie_jar) response.encoding=’utf-8’ content=response.text reg=re.compile(r’<a target='_blank' href='http://www.leifengta.com.cn/bcjs/(.*?)' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' >(.*?)</a>’) url_name_list=reg.findall(content) return url_name_list@with_gotodef main(): print('=========================================================') name=input('請輸入劇名(輸入quit退出):') if name == 'quit':exit() url='http://www.yikedy.co/search?query='+name dlist=get_page(url) print('n') if(dlist):num=0count=0for i in dlist: if (name in i[1]) :print(f'{num} {i[1]}')num+=1 elif num==0 and count==len(dlist)-1:goto .end count+=1dest=int(input('nn請輸入劇的編號(輸100跳過此次搜尋):'))if dest == 100: goto .endx=0print('n以下為下載鏈接:n')for i in dlist: if (name in i[1]):if(x==dest): for durl in search_durl(i[0]):print(f'{durl}n') print('n') breakx+=1 else:label .endprint('沒找到或不想看n')完整代碼

import requests,refrom requests.cookies import RequestsCookieJarfrom fake_useragent import UserAgentimport os,pickle,threading,timeimport concurrent.futuresfrom goto import with_gotodef get_content_url_name(url): send_headers = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8'} cookie_jar = RequestsCookieJar() cookie_jar.set('mttp', '9740fe449238', domain='www.yikedy.co') response=requests.get(url,send_headers,cookies=cookie_jar) response.encoding=’utf-8’ content=response.text reg=re.compile(r’<a href='http://www.leifengta.com.cn/bcjs/(.*?)' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' ’) url_name_list=reg.findall(content) return url_name_listdef get_content(url): send_headers = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8'} cookie_jar = RequestsCookieJar() cookie_jar.set('mttp', '9740fe449238', domain='www.yikedy.co') response=requests.get(url,send_headers,cookies=cookie_jar) response.encoding=’utf-8’ return response.textdef search_durl(url): content=get_content(url) reg=re.compile(r'{’x64x65x63x72x69x70x74x50x61x72x61x6d’:’(.*?)’}') index=reg.findall(content)[0] download_url=url[:-5]+r’/downloadList?decriptParam=’+index content=get_content(download_url) reg1=re.compile(r’title='.*?' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' ’) download_list=reg1.findall(content) return download_listdef get_page(url): send_headers = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8'} cookie_jar = RequestsCookieJar() cookie_jar.set('mttp', '9740fe449238', domain='www.yikedy.co') response=requests.get(url,send_headers,cookies=cookie_jar) response.encoding=’utf-8’ content=response.text reg=re.compile(r’<a target='_blank' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' >(.*?)</a>’) url_name_list=reg.findall(content) return url_name_list@with_gotodef main(): print('=========================================================') name=input('請輸入劇名(輸入quit退出):') if name == 'quit':exit() url='http://www.xxx.com/search?query='+name dlist=get_page(url) print('n') if(dlist):num=0count=0for i in dlist: if (name in i[1]) :print(f'{num} {i[1]}')num+=1 elif num==0 and count==len(dlist)-1:goto .end count+=1dest=int(input('nn請輸入劇的編號(輸100跳過此次搜尋):'))if dest == 100: goto .endx=0print('n以下為下載鏈接:n')for i in dlist: if (name in i[1]):if(x==dest): for durl in search_durl(i[0]):print(f'{durl}n') print('n') breakx+=1 else:label .endprint('沒找到或不想看n')print('本軟件由CLY.所有nn')while(True): main()

以上就是python 爬取影視網站下載鏈接的詳細內容,更多關于python 爬取下載鏈接的資料請關注好吧啦網其它相關文章!

標簽: Python 編程
相關文章:
主站蜘蛛池模板: 亚洲欧美日韩综合 | 亚洲欧美高清 | 国产午夜亚洲精品午夜鲁丝片 | 日韩一级黄色 | 在线观看视频亚洲 | 成年在线观看 | 一区二区三区中文字幕在线观看 | 狠狠狠操 | 男人的天堂免费视频 | 另类视频在线 | 欧美日韩国产不卡 | www国产视频 | 成人一区二区视频 | 日日夜夜伊人 | 成人久久久久久久 | 久久久久国产视频 | 91精品国产综合久久福利 | 在线观看黄av | www.一区二区三区 | 黄色在线观看 | 久草手机在线视频 | 国产三级在线 | 可以在线观看的av | 国产精品麻豆入口 | 第一色综合 | 久久久三区 | 五月天婷婷在线播放 | 国产精品久久久精品 | 91青青草原 | 毛片毛片毛片毛片 | 精品九九视频 | 久久精品区 | 在线中文字日产幕 | 午夜在线视频观看 | 91精品综合久久久久久五月天 | 激情欧美一区二区三区中文字幕 | 国产精品久久一区二区三区 | 国产女主播福利 | 日本一区二区精品视频 | 午夜在线成人 | 国产精品久久国产精品 |