From 6f70ee0b7be7bc2c43a54cfa1e617dbdee8ce630 Mon Sep 17 00:00:00 2001 From: zu1k Date: Fri, 21 Aug 2020 09:00:26 +0800 Subject: [PATCH] update web fanqiangdang --- pkg/getter/web_fanqiangdang.go | 82 +++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 7 deletions(-) diff --git a/pkg/getter/web_fanqiangdang.go b/pkg/getter/web_fanqiangdang.go index c8c93b6..fd990f5 100644 --- a/pkg/getter/web_fanqiangdang.go +++ b/pkg/getter/web_fanqiangdang.go @@ -2,6 +2,7 @@ package getter import ( "fmt" + "strings" "sync" "github.com/gocolly/colly" @@ -10,6 +11,7 @@ import ( ) func init() { + Register("web-fanqiangdang-rss", NewWebFanqiangdangRSSGetter) Register("web-fanqiangdang", NewWebFanqiangdangGetter) } @@ -17,7 +19,7 @@ type WebFanqiangdang struct { c *colly.Collector NumNeeded int Url string - results []string + results proxy.ProxyList } func NewWebFanqiangdangGetter(options tool.Options) (getter Getter, err error) { @@ -50,17 +52,83 @@ func NewWebFanqiangdangGetter(options tool.Options) (getter Getter, err error) { } func (w *WebFanqiangdang) Get() proxy.ProxyList { + w.results = make(proxy.ProxyList, 0) + w.c.OnHTML("td.t_f", func(e *colly.HTMLElement) { + w.results = append(w.results, FuzzParseProxyFromString(e.Text)...) + subUrls := urlRe.FindAllString(e.Text, -1) + for _, url := range subUrls { + w.results = append(w.results, (&Subscribe{Url: url}).Get()...) + } + }) + + w.c.OnHTML("th.new>a[href]", func(e *colly.HTMLElement) { + url := e.Attr("href") + if strings.HasPrefix(url, "https://fanqiangdang.com/thread") { + fmt.Println(url) + _ = e.Request.Visit(url) + } + }) + + w.results = make(proxy.ProxyList, 0) + err := w.c.Visit(w.Url) + if err != nil { + _ = fmt.Errorf("%s", err.Error()) + } + + return w.results +} + +func (w *WebFanqiangdang) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) { + defer wg.Done() + nodes := w.Get() + for _, node := range nodes { + pc <- node + } +} + + +type WebFanqiangdangRSS struct { + c *colly.Collector + Url string + results []string +} + +func NewWebFanqiangdangRSSGetter(options tool.Options) (getter Getter, err error) { + num, found := options["num"] + + t := 200 + switch num.(type) { + case int: + t = num.(int) + case float64: + t = int(num.(float64)) + } + + if !found || t <= 0 { + t = 200 + } + urlInterface, found := options["url"] + if found { + url, err := AssertTypeStringNotNull(urlInterface) + if err != nil { + return nil, err + } + return &WebFanqiangdangRSS{ + c: colly.NewCollector(), + Url: url, + }, nil + } + return nil, ErrorUrlNotFound +} + +func (w *WebFanqiangdangRSS) Get() proxy.ProxyList { w.results = make([]string, 0) - // 找到所有的文字消息 w.c.OnHTML("td.t_f", func(e *colly.HTMLElement) { w.results = append(w.results, GrepLinksFromString(e.Text)...) }) - // 从订阅中取出每一页,因为是订阅,所以都比较新 w.c.OnXML("//item//link", func(e *colly.XMLElement) { - if len(w.results) < w.NumNeeded { - _ = e.Request.Visit(e.Text) - } + _ = e.Request.Visit(e.Text) }) w.results = make([]string, 0) @@ -72,7 +140,7 @@ func (w *WebFanqiangdang) Get() proxy.ProxyList { return StringArray2ProxyArray(w.results) } -func (w *WebFanqiangdang) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) { +func (w *WebFanqiangdangRSS) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) { defer wg.Done() nodes := w.Get() for _, node := range nodes {