1. 更新月月抓取规则,支持抓取月月首页促销种子

This commit is contained in:
ngfchl
2022-09-02 17:33:55 +08:00
parent 59a84da2b9
commit 27ce6d8cdd
6 changed files with 124 additions and 39 deletions

16
pt.json
View File

@@ -3170,7 +3170,7 @@
"pk": 44,
"fields": {
"created_at": "2022-08-23T16:41:38.669",
"updated_at": "2022-09-02T16:41:47.215",
"updated_at": "2022-09-02T17:24:52.002",
"url": "https://pt.keepfrds.com/",
"name": "月月Frds",
"logo": "https://pt.keepfrds.com/static/favicon-64x64.png",
@@ -3206,16 +3206,16 @@
"detail_url_rule": ".//td[@class=\"embedded\"]/a[contains(@href,\"detail\")]/@href",
"category_rule": ".//td[@class=\"rowfollow nowrap\"][1]/a[1]/img/@title",
"poster_rule": ".//table/tr/td[1]/img/@src",
"magnet_url_rule": ".//input[@id=\"download_link\"]/@value",
"download_url_rule": ".//a[contains(@href,\"download.php\")]/@href",
"size_rule": "//td[5]/text()",
"magnet_url_rule": ".//a[contains(@href,\"download.php?id=\")]/@href",
"download_url_rule": ".//input[@id=\"download_link\"]/@value",
"size_rule": ".//td[5]/text()",
"hr_rule": ".//table/tr/td/img[@class=\"hitandrun\"]/@title",
"sale_rule": ".//div/img[contains(@class,\"pro_\")]/@alt",
"sale_expire_rule": ".//div/img[contains(@class,\"pro_\")]/@onmouseover",
"release_rule": ".//td[4]/span/@title",
"seeders_rule": "//a[contains(@href,\"#seeders\")]/text()",
"leechers_rule": "//a[contains(@href,\"#leechers\")]/text()",
"completers_rule": "//a[contains(@href,\"viewsnatches.php?id=\")]//text()",
"seeders_rule": ".//a[contains(@href,\"#seeders\")]/text()",
"leechers_rule": ".//a[contains(@href,\"#leechers\")]/text()",
"completers_rule": ".//a[contains(@href,\"viewsnatches.php?id=\")]//text()",
"viewfilelist_rule": ".//td/text()",
"viewpeerlist_rule": ".//tr/td[9]/nobr/text()",
"peer_speed_rule": ".//tr/td[7]/nobr/text()",
@@ -3233,7 +3233,7 @@
"my_hr_rule": "//tr[14]/td[2]/a/text()",
"leech_rule": "//img[@class=\"arrowdown\"]/following-sibling::text()[1]",
"seed_rule": "//img[@class=\"arrowup\"]/following-sibling::text()[1]",
"record_count_rule": "//td[3]/text()",
"record_count_rule": ".//td[3]/text()",
"seed_vol_rule": "//tr/td[3]",
"mailbox_rule": "//a[@href=\"messages.php\"]/following-sibling::text()[1]",
"hash_rule": "//td[@class=\"no_border_wide\"]/b[text()=\"Hash 码:\"]/following::text()[1]"

View File

@@ -618,27 +618,29 @@ class PtSpider:
if not sale_status:
continue
sale_status = ''.join(re.split(r'[^\x00-\xff]', sale_status))
sale_status = sale_status.upper().replace('FREE', 'Free').replace(' ', '')
sale_status = sale_status.upper().replace('FREE', 'Free').title().replace(' ', '')
# # 下载链接,下载链接已存在则跳过
href = ''.join(tr.xpath(site.magnet_url_rule))
# print(href)
print('href', href)
magnet_url = site.url + href.replace('&type=zip', '').replace(site.url, '')
if href.count('passkey') <= 0 and href.count('&sign=') <= 0:
download_url = magnet_url + '&passkey=' + my_site.passkey
else:
download_url = magnet_url
# print(download_url)
# print(magnet_url)
print('download_url', download_url)
print('magnet_url', magnet_url)
title_list = tr.xpath(site.title_rule)
print(title_list)
title = ''.join(title_list).strip().strip('剩余时间:').strip('剩餘時間:').strip('()')
# if sale_status == '2X':
# sale_status = '2XFree'
# # H&R 如果设置为不下载HR种子且种子HR为真,跳过
hr = True if ''.join(tr.xpath(site.hr_rule)) else False
# 如果种子有HR则为否 HR绿色表示无需红色表示未通过HR考核
hr = False if ''.join(tr.xpath(site.hr_rule)) else True
# print(torrent.hr)
if hr and not site.hr:
# H&R 种子有HR且站点设置不下载HR种子,跳过,
if not hr and not site.hr:
continue
# # 促销到期时间
sale_expire = ''.join(tr.xpath(site.sale_expire_rule))
@@ -647,6 +649,7 @@ class PtSpider:
'http://www.oshen.win/',
'https://www.hitpt.com/',
'https://hdsky.me/',
'https://pt.keepfrds.com/',
]:
"""
由于备胎等站优惠结束日期格式特殊,所以做特殊处理,使用正则表达式获取字符串中的时间
@@ -692,9 +695,9 @@ class PtSpider:
print('leechers', leechers)
print('H&R', hr)
print('completers', completers)
result = TorrentInfo.objects.update_or_create(download_url=download_url, defaults={
result = TorrentInfo.objects.update_or_create(magnet_url=magnet_url, defaults={
'category': category,
'magnet_url': magnet_url,
'download_url': download_url,
'site': site,
'name': name,
'title': title,

View File

@@ -0,0 +1,63 @@
# Generated by Django 4.1 on 2022-09-02 17:01
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('pt_site', '0003_signin_sign_in_info'),
]
operations = [
migrations.AlterField(
model_name='site',
name='category_rule',
field=models.CharField(default='.//td[@class="rowfollow nowrap"][1]/a[1]/img/@title', max_length=128, verbose_name='分类'),
),
migrations.AlterField(
model_name='site',
name='completers_rule',
field=models.CharField(default='.//a[contains(@href,"viewsnatches.php?id=")]//text()', max_length=128, verbose_name='完成人数'),
),
migrations.AlterField(
model_name='site',
name='download_url_rule',
field=models.CharField(default='.//a[contains(@href,"download.php?id=") and contains(@href,"passkey")]/@href', max_length=128, verbose_name='详情页种子链接'),
),
migrations.AlterField(
model_name='site',
name='leechers_rule',
field=models.CharField(default='.//a[contains(@href,"#leechers")]/text()', max_length=128, verbose_name='下载人数'),
),
migrations.AlterField(
model_name='site',
name='magnet_url_rule',
field=models.CharField(default='.//td/a[contains(@href,"download.php?id=")]/@href', max_length=128, verbose_name='主页下载链接'),
),
migrations.AlterField(
model_name='site',
name='peer_speed_rule',
field=models.CharField(default='.//tr/td[7]/nobr/text()', max_length=128, verbose_name='平均下载速度'),
),
migrations.AlterField(
model_name='site',
name='record_count_rule',
field=models.CharField(default='.//td[3]/text()', help_text='提取做种列表中文件大小计算总量', max_length=128, verbose_name='做种大小列表'),
),
migrations.AlterField(
model_name='site',
name='sale_rule',
field=models.CharField(default='.//div/img[contains(@class,"pro_")]/@alt', max_length=128, verbose_name='促销信息'),
),
migrations.AlterField(
model_name='site',
name='seeders_rule',
field=models.CharField(default='.//a[contains(@href,"#seeders")]/text()', max_length=128, verbose_name='做种人数'),
),
migrations.AlterField(
model_name='site',
name='title_rule',
field=models.CharField(default='.//td[@class="embedded"]/a/following::text()[1]', max_length=128, verbose_name='种子标题'),
),
]

View File

@@ -0,0 +1,18 @@
# Generated by Django 4.1 on 2022-09-02 17:09
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('pt_site', '0004_alter_site_category_rule_alter_site_completers_rule_and_more'),
]
operations = [
migrations.AlterField(
model_name='torrentinfo',
name='hr',
field=models.BooleanField(default=True, help_text='绿色为通过或无需HR考核', verbose_name='H&R考核'),
),
]

View File

@@ -77,7 +77,7 @@ class Site(BaseEntity):
default='.//td[@class="embedded"]/a/b/text()',
max_length=128)
title_rule = models.CharField(verbose_name='种子标题',
default='.//tr/td[1]/text()',
default='.//td[@class="embedded"]/a/following::text()[1]',
max_length=128)
detail_url_rule = models.CharField(
verbose_name='种子详情',
@@ -85,18 +85,18 @@ class Site(BaseEntity):
max_length=128)
category_rule = models.CharField(
verbose_name='分类',
default='.//td[@class="rowfollow nowrap"][1]/a[1]/img/@class',
default='.//td[@class="rowfollow nowrap"][1]/a[1]/img/@title',
max_length=128)
poster_rule = models.CharField(
verbose_name='海报',
default='.//table/tr/td[1]/img/@src',
max_length=128)
magnet_url_rule = models.CharField(
verbose_name='下载链接',
default='.//td/a[contains(@href,"download")]/@href',
verbose_name='主页下载链接',
default='.//td/a[contains(@href,"download.php?id=")]/@href',
max_length=128)
download_url_rule = models.CharField(
verbose_name='种子链接',
verbose_name='详情页种子链接',
default='.//a[contains(@href,"download.php?id=") and contains(@href,"passkey")]/@href',
max_length=128)
size_rule = models.CharField(verbose_name='文件大小',
@@ -108,7 +108,7 @@ class Site(BaseEntity):
max_length=128)
sale_rule = models.CharField(
verbose_name='促销信息',
default='.//table/tr/td/img[contains(@class,"pro_")]/@alt',
default='.//div/img[contains(@class,"pro_")]/@alt',
max_length=128
)
sale_expire_rule = models.CharField(
@@ -121,15 +121,15 @@ class Site(BaseEntity):
max_length=128)
seeders_rule = models.CharField(
verbose_name='做种人数',
default='.//td[6]/b/a/text()',
default='.//a[contains(@href,"#seeders")]/text()',
max_length=128)
leechers_rule = models.CharField(
verbose_name='下载人数',
default='.//td[7]/b/a/text()',
default='.//a[contains(@href,"#leechers")]/text()',
max_length=128)
completers_rule = models.CharField(
verbose_name='完成人数',
default='.//td[8]/a/b/text()',
default='.//a[contains(@href,"viewsnatches.php?id=")]//text()',
max_length=128)
viewfilelist_rule = models.CharField(
verbose_name='解析文件结构',
@@ -140,8 +140,8 @@ class Site(BaseEntity):
default='.//tr/td[9]/nobr/text()',
max_length=128)
peer_speed_rule = models.CharField(
verbose_name='平均上传速度',
default='.//tr/td[5]/nobr/text()',
verbose_name='平均下载速度',
default='.//tr/td[7]/nobr/text()',
max_length=128)
remark = models.TextField(verbose_name='备注', default='', null=True, blank=True)
# 状态信息XPath
@@ -199,8 +199,9 @@ class Site(BaseEntity):
default='//img[@class="arrowup"]/following-sibling::text()[1]',
max_length=128)
record_count_rule = models.CharField(verbose_name='种子记录数',
default='/html/body/b/text()',
record_count_rule = models.CharField(verbose_name='做种大小列表',
help_text='提取做种列表中文件大小计算总量',
default='.//td[3]/text()',
max_length=128)
seed_vol_rule = models.CharField(verbose_name='做种大小',
@@ -334,7 +335,7 @@ class TorrentInfo(BaseEntity):
size = models.IntegerField(verbose_name='文件大小', default=0)
state = models.BooleanField(max_length=16, verbose_name='推送状态', default=False)
save_path = models.FilePathField(verbose_name='保存路径', default='/downloads/brush')
hr = models.BooleanField(verbose_name='H&R', default=False)
hr = models.BooleanField(verbose_name='H&R考核', default=True, help_text='绿色为通过或无需HR考核')
sale_status = models.CharField(verbose_name='优惠状态', default='无促销', max_length=16)
sale_expire = models.CharField(verbose_name='到期时间', default='无限期', max_length=32)
on_release = models.CharField(verbose_name='发布时间', default='', max_length=32)

View File

@@ -2964,7 +2964,7 @@
"pk": 44,
"fields": {
"created_at": "2022-08-23T16:41:38.669",
"updated_at": "2022-09-02T16:41:47.215",
"updated_at": "2022-09-02T17:24:52.002",
"url": "https://pt.keepfrds.com/",
"name": "月月Frds",
"logo": "https://pt.keepfrds.com/static/favicon-64x64.png",
@@ -3000,16 +3000,16 @@
"detail_url_rule": ".//td[@class=\"embedded\"]/a[contains(@href,\"detail\")]/@href",
"category_rule": ".//td[@class=\"rowfollow nowrap\"][1]/a[1]/img/@title",
"poster_rule": ".//table/tr/td[1]/img/@src",
"magnet_url_rule": ".//input[@id=\"download_link\"]/@value",
"download_url_rule": ".//a[contains(@href,\"download.php\")]/@href",
"size_rule": "//td[5]/text()",
"magnet_url_rule": ".//a[contains(@href,\"download.php?id=\")]/@href",
"download_url_rule": ".//input[@id=\"download_link\"]/@value",
"size_rule": ".//td[5]/text()",
"hr_rule": ".//table/tr/td/img[@class=\"hitandrun\"]/@title",
"sale_rule": ".//div/img[contains(@class,\"pro_\")]/@alt",
"sale_expire_rule": ".//div/img[contains(@class,\"pro_\")]/@onmouseover",
"release_rule": ".//td[4]/span/@title",
"seeders_rule": "//a[contains(@href,\"#seeders\")]/text()",
"leechers_rule": "//a[contains(@href,\"#leechers\")]/text()",
"completers_rule": "//a[contains(@href,\"viewsnatches.php?id=\")]//text()",
"seeders_rule": ".//a[contains(@href,\"#seeders\")]/text()",
"leechers_rule": ".//a[contains(@href,\"#leechers\")]/text()",
"completers_rule": ".//a[contains(@href,\"viewsnatches.php?id=\")]//text()",
"viewfilelist_rule": ".//td/text()",
"viewpeerlist_rule": ".//tr/td[9]/nobr/text()",
"peer_speed_rule": ".//tr/td[7]/nobr/text()",
@@ -3027,7 +3027,7 @@
"my_hr_rule": "//tr[14]/td[2]/a/text()",
"leech_rule": "//img[@class=\"arrowdown\"]/following-sibling::text()[1]",
"seed_rule": "//img[@class=\"arrowup\"]/following-sibling::text()[1]",
"record_count_rule": "//td[3]/text()",
"record_count_rule": ".//td[3]/text()",
"seed_vol_rule": "//tr/td[3]",
"mailbox_rule": "//a[@href=\"messages.php\"]/following-sibling::text()[1]",
"hash_rule": "//td[@class=\"no_border_wide\"]/b[text()=\"Hash 码:\"]/following::text()[1]"