1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
| import os import requests from bs4 import BeautifulSoup import cloudscraper import re from selenium import webdriver from selenium.webdriver.chrome.options import Options year = 2018 for month in range(4, 13): base_url = "https://hanime1.me/search?query=&type=&genre=%E8%A3%8F%E7%95%AA&sort=&year={}&month={}" url = base_url.format(year, month) scraper = cloudscraper.create_scraper() response = scraper.get(url) soup = BeautifulSoup(response.text.encode("utf-8"), "html.parser") pattern = r'"(https://hanime1\.me/watch\?[^\s]+)"' matches = re.findall(pattern, str(soup)) download_dir = f"{year}/{month:02}" print(download_dir) if not os.path.exists(download_dir): os.makedirs(download_dir) for matche in matches: clean_match = matche.strip("") headers = { 'User-Agent': '' } cookies = { "cookies1": ""} response2 = scraper.get(matche, cookies=cookies, headers=headers) chrome_options = Options() chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_argument('--user-agent=headers') chrome_options.add_argument('--disable-web-security') driver = webdriver.Chrome(options=chrome_options) driver.get(matche) driver.implicitly_wait(5) page_content = driver.page_source driver.quit() soup2 = BeautifulSoup(page_content, 'html.parser') source_1080 = soup2.find('source', {'size': '1080'}) if source_1080: src = source_1080.get('src') print(f"1080p URL: {src}") else: source_720 = soup2.find('source', {'size': '720'}) if source_720: src = source_720.get('src') print(f"720p URL: {src}") else: source_480 = soup2.find('source', {'size': '480'}) if source_480: src = source_480.get('src') print(f"480p URL: {src}") else: print("未找到任何匹配的URL") pattern2 = soup2.find('input', {'id': 'video-sd'}) value = pattern2['value'].split("?")[0] video_response = requests.get(src, stream=True) video_filename = download_dir filename = value.split('/')[-1] save_path = os.path.join(download_dir, filename) with open(save_path, 'wb') as video_file: video_file.write(video_response.content) print(f"Downloaded: {save_path}") print('月份 {} 下载结束'.format(month)) print('年份 {} 下载结束'.format(year))
|