Files
proxypool/pkg/getter/tgchannel.go
2020-08-29 14:10:10 +08:00

88 lines
2.0 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package getter
import (
"fmt"
"log"
"sync"
"github.com/gocolly/colly"
"github.com/zu1k/proxypool/pkg/proxy"
"github.com/zu1k/proxypool/pkg/tool"
)
func init() {
Register("tgchannel", NewTGChannelGetter)
}
type TGChannelGetter struct {
c *colly.Collector
NumNeeded int
results []string
Url string
}
func NewTGChannelGetter(options tool.Options) (getter Getter, err error) {
num, found := options["num"]
t := 200
switch num.(type) {
case int:
t = num.(int)
case float64:
t = int(num.(float64))
}
if !found || t <= 0 {
t = 200
}
urlInterface, found := options["channel"]
if found {
url, err := AssertTypeStringNotNull(urlInterface)
if err != nil {
return nil, err
}
return &TGChannelGetter{
c: tool.GetColly(),
NumNeeded: t,
Url: "https://t.me/s/" + url,
}, nil
}
return nil, ErrorUrlNotFound
}
func (g *TGChannelGetter) Get() proxy.ProxyList {
result := make(proxy.ProxyList, 0)
g.results = make([]string, 0)
// 找到所有的文字消息
g.c.OnHTML("div.tgme_widget_message_text", func(e *colly.HTMLElement) {
g.results = append(g.results, GrepLinksFromString(e.Text)...)
// 抓取到http链接有可能是订阅链接或其他链接无论如何试一下
subUrls := urlRe.FindAllString(e.Text, -1)
for _, url := range subUrls {
result = append(result, (&Subscribe{Url: url}).Get()...)
}
})
// 找到之前消息页面的链接,加入访问队列
g.c.OnHTML("link[rel=prev]", func(e *colly.HTMLElement) {
if len(g.results) < g.NumNeeded {
_ = e.Request.Visit(e.Attr("href"))
}
})
g.results = make([]string, 0)
err := g.c.Visit(g.Url)
if err != nil {
_ = fmt.Errorf("%s", err.Error())
}
return append(result, StringArray2ProxyArray(g.results)...)
}
func (g *TGChannelGetter) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) {
defer wg.Done()
nodes := g.Get()
log.Printf("STATISTIC: TGChannel\tcount=%d\turl=%s\n", len(nodes), g.Url)
for _, node := range nodes {
pc <- node
}
}