From db47de2747ac05315dfb7a24ba737c82fbfb6cb4 Mon Sep 17 00:00:00 2001 From: zu1k Date: Wed, 12 Aug 2020 17:54:22 +0800 Subject: [PATCH] add coroutine --- app/cron.go | 2 +- app/task.go | 62 +++++++++++++++++++++++++++++++++++++- getter/base.go | 2 ++ getter/subscribe.go | 10 ++++++ getter/tgchannel.go | 10 ++++++ getter/web_fanqiangdang.go | 59 ++++++++++++++++++++++++++++++++++++ getter/web_free_ssr_xyz.go | 10 ++++++ getter/web_fuzz.go | 10 ++++++ getter/web_lucn_org.go | 10 ++++++ getter/web_test.go | 23 -------------- main.go | 2 +- proxy/vmess.go | 3 -- 12 files changed, 174 insertions(+), 29 deletions(-) create mode 100644 getter/web_fanqiangdang.go delete mode 100644 getter/web_test.go diff --git a/app/cron.go b/app/cron.go index 15b5cbd..9f78915 100644 --- a/app/cron.go +++ b/app/cron.go @@ -5,6 +5,6 @@ import ( ) func Cron() { - _ = gocron.Every(10).Minutes().Do(CrawlTGChannel) + _ = gocron.Every(10).Minutes().Do(CrawlGo) <-gocron.Start() } diff --git a/app/task.go b/app/task.go index 77c6622..eac30fb 100644 --- a/app/task.go +++ b/app/task.go @@ -1,8 +1,10 @@ package app import ( + "log" "math/rand" "strconv" + "sync" "github.com/zu1k/proxypool/provider" @@ -12,7 +14,7 @@ import ( "github.com/zu1k/proxypool/proxy" ) -func CrawlTGChannel() { +func Crawl() { proxies := make([]proxy.Proxy, 0) // tg上各种节点分享频道 @@ -35,6 +37,10 @@ func CrawlTGChannel() { proxies = append(proxies, getter.NewWebFuzz("https://zfjvpn.gitbook.io/").Get()...) proxies = append(proxies, getter.NewWebFuzz("https://www.freefq.com/d/file/free-ssr/20200811/1f3e9d0d0064f662457062712dcf1b66.txt").Get()...) proxies = append(proxies, getter.NewWebFuzz("https://merlinblog.xyz/wiki/freess.html").Get()...) + // 翻墙党 + proxies = append(proxies, getter.NewWebFanqiangdangGetter("https://fanqiangdang.com/forum.php?mod=rss&fid=50&auth=0", 200).Get()...) + proxies = append(proxies, getter.NewWebFanqiangdangGetter("https://fanqiangdang.com/forum.php?mod=rss&fid=2&auth=0", 200).Get()...) + proxies = append(proxies, getter.NewWebFanqiangdangGetter("https://fanqiangdang.com/forum.php?mod=rss&fid=36&auth=0", 200).Get()...) // 订阅链接 proxies = append(proxies, getter.NewSubscribe("https://raw.githubusercontent.com/ssrsub/ssr/master/v2ray").Get()...) @@ -51,3 +57,57 @@ func CrawlTGChannel() { cache.SetProxies(proxies) cache.SetString("clashproxies", provider.Clash{Proxies: proxies}.Provide()) } + +func CrawlGo() { + wg := sync.WaitGroup{} + var pc = make(chan proxy.Proxy) + // tg上各种节点分享频道 + go getter.NewTGChannelGetter("https://t.me/s/ssrList", 200).Get2Chan(pc, &wg) + go getter.NewTGChannelGetter("https://t.me/s/SSRSUB", 200).Get2Chan(pc, &wg) + go getter.NewTGChannelGetter("https://t.me/s/FreeSSRNode", 200).Get2Chan(pc, &wg) + go getter.NewTGChannelGetter("https://t.me/s/ssrlists", 200).Get2Chan(pc, &wg) + go getter.NewTGChannelGetter("https://t.me/s/ssrshares", 200).Get2Chan(pc, &wg) + go getter.NewTGChannelGetter("https://t.me/s/V2List", 200).Get2Chan(pc, &wg) + go getter.NewTGChannelGetter("https://t.me/s/ssrtool", 200).Get2Chan(pc, &wg) + go getter.NewTGChannelGetter("https://t.me/s/vmessr", 200).Get2Chan(pc, &wg) + go getter.NewTGChannelGetter("https://t.me/s/FreeSSR666", 200).Get2Chan(pc, &wg) + go getter.NewTGChannelGetter("https://t.me/s/fanqiang666", 200).Get2Chan(pc, &wg) + + // 各种网站上公开的 + go getter.WebFreessrXyz{}.Get2Chan(pc, &wg) + go getter.WebLucnOrg{}.Get2Chan(pc, &wg) + + // 从web页面模糊获取 + go getter.NewWebFuzz("https://zfjvpn.gitbook.io/").Get2Chan(pc, &wg) + go getter.NewWebFuzz("https://www.freefq.com/d/file/free-ssr/20200811/1f3e9d0d0064f662457062712dcf1b66.txt").Get2Chan(pc, &wg) + go getter.NewWebFuzz("https://merlinblog.xyz/wiki/freess.html").Get2Chan(pc, &wg) + // 翻墙党 + go getter.NewWebFanqiangdangGetter("https://fanqiangdang.com/forum.php?mod=rss&fid=50&auth=0", 200).Get2Chan(pc, &wg) + go getter.NewWebFanqiangdangGetter("https://fanqiangdang.com/forum.php?mod=rss&fid=2&auth=0", 200).Get2Chan(pc, &wg) + go getter.NewWebFanqiangdangGetter("https://fanqiangdang.com/forum.php?mod=rss&fid=36&auth=0", 200).Get2Chan(pc, &wg) + + // 订阅链接 + go getter.NewSubscribe("https://raw.githubusercontent.com/ssrsub/ssr/master/v2ray").Get2Chan(pc, &wg) + go getter.NewSubscribe("https://raw.githubusercontent.com/ssrsub/ssr/master/ssrsub").Get2Chan(pc, &wg) + go getter.NewSubscribe("https://raw.githubusercontent.com/ssrsub/ssr/master/ss-sub").Get2Chan(pc, &wg) + + proxies := cache.GetProxies() + go func() { + wg.Wait() + close(pc) + }() + for node := range pc { + if node != nil { + proxies = append(proxies, node) + } + } + proxies = proxy.Deduplication(proxies) + + num := len(proxies) + for i := 0; i < num; i++ { + proxies[i].SetName(strconv.Itoa(rand.Int())) + } + log.Println("CrawlGo node count:", num) + cache.SetProxies(proxies) + cache.SetString("clashproxies", provider.Clash{Proxies: proxies}.Provide()) +} diff --git a/getter/base.go b/getter/base.go index 34cb807..2bc00d2 100644 --- a/getter/base.go +++ b/getter/base.go @@ -2,12 +2,14 @@ package getter import ( "strings" + "sync" "github.com/zu1k/proxypool/proxy" ) type Getter interface { Get() []proxy.Proxy + Get2Chan(pc chan proxy.Proxy, wg sync.WaitGroup) } func String2Proxy(link string) proxy.Proxy { diff --git a/getter/subscribe.go b/getter/subscribe.go index 8d5cd11..a5cc54a 100644 --- a/getter/subscribe.go +++ b/getter/subscribe.go @@ -4,6 +4,7 @@ import ( "io/ioutil" "net/http" "strings" + "sync" "github.com/zu1k/proxypool/proxy" "github.com/zu1k/proxypool/tool" @@ -34,6 +35,15 @@ func (s Subscribe) Get() []proxy.Proxy { return StringArray2ProxyArray(nodes) } +func (s Subscribe) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) { + wg.Add(1) + nodes := s.Get() + for _, node := range nodes { + pc <- node + } + wg.Done() +} + func NewSubscribe(url string) *Subscribe { return &Subscribe{ Url: url, diff --git a/getter/tgchannel.go b/getter/tgchannel.go index 3f3b91f..c601786 100644 --- a/getter/tgchannel.go +++ b/getter/tgchannel.go @@ -2,6 +2,7 @@ package getter import ( "fmt" + "sync" "github.com/gocolly/colly" "github.com/zu1k/proxypool/proxy" @@ -47,3 +48,12 @@ func (g TGChannelGetter) Get() []proxy.Proxy { return StringArray2ProxyArray(g.Results) } + +func (g TGChannelGetter) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) { + wg.Add(1) + nodes := g.Get() + for _, node := range nodes { + pc <- node + } + wg.Done() +} diff --git a/getter/web_fanqiangdang.go b/getter/web_fanqiangdang.go new file mode 100644 index 0000000..638011b --- /dev/null +++ b/getter/web_fanqiangdang.go @@ -0,0 +1,59 @@ +package getter + +import ( + "fmt" + "sync" + + "github.com/gocolly/colly" + "github.com/zu1k/proxypool/proxy" +) + +type WebFanqiangdang struct { + c *colly.Collector + NumNeeded int + Results []string + Url string +} + +func NewWebFanqiangdangGetter(url string, numNeeded int) *WebFanqiangdang { + if numNeeded <= 0 { + numNeeded = 200 + } + return &WebFanqiangdang{ + c: colly.NewCollector(), + NumNeeded: numNeeded, + Results: make([]string, 0), + Url: url, + } +} + +func (w WebFanqiangdang) Get() []proxy.Proxy { + // 找到所有的文字消息 + w.c.OnHTML("td.t_f", func(e *colly.HTMLElement) { + w.Results = append(w.Results, GrepLinksFromString(e.Text)...) + }) + + // 从订阅中取出每一页,因为是订阅,所以都比较新 + w.c.OnXML("//item//link", func(e *colly.XMLElement) { + if len(w.Results) < w.NumNeeded { + _ = e.Request.Visit(e.Text) + } + }) + + w.Results = make([]string, 0) + err := w.c.Visit(w.Url) + if err != nil { + _ = fmt.Errorf("%s", err.Error()) + } + + return StringArray2ProxyArray(w.Results) +} + +func (w WebFanqiangdang) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) { + wg.Add(1) + nodes := w.Get() + for _, node := range nodes { + pc <- node + } + wg.Done() +} diff --git a/getter/web_free_ssr_xyz.go b/getter/web_free_ssr_xyz.go index 61778f9..66ecfcd 100644 --- a/getter/web_free_ssr_xyz.go +++ b/getter/web_free_ssr_xyz.go @@ -4,6 +4,7 @@ import ( "encoding/json" "io/ioutil" "net/http" + "sync" "github.com/zu1k/proxypool/proxy" ) @@ -22,6 +23,15 @@ func (w WebFreessrXyz) Get() []proxy.Proxy { return results } +func (w WebFreessrXyz) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) { + wg.Add(1) + nodes := w.Get() + for _, node := range nodes { + pc <- node + } + wg.Done() +} + func freessrxyzFetch(link string) []proxy.Proxy { resp, err := http.Get(link) if err != nil { diff --git a/getter/web_fuzz.go b/getter/web_fuzz.go index 478c5e8..c74ff5e 100644 --- a/getter/web_fuzz.go +++ b/getter/web_fuzz.go @@ -3,6 +3,7 @@ package getter import ( "io/ioutil" "net/http" + "sync" "github.com/zu1k/proxypool/proxy" ) @@ -24,6 +25,15 @@ func (w WebFuzz) Get() []proxy.Proxy { return FuzzParseProxyFromString(string(body)) } +func (w WebFuzz) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) { + wg.Add(1) + nodes := w.Get() + for _, node := range nodes { + pc <- node + } + wg.Done() +} + func NewWebFuzz(url string) *WebFuzz { return &WebFuzz{Url: url} } diff --git a/getter/web_lucn_org.go b/getter/web_lucn_org.go index 9af5f7f..68a0856 100644 --- a/getter/web_lucn_org.go +++ b/getter/web_lucn_org.go @@ -5,6 +5,7 @@ import ( "encoding/json" "io/ioutil" "net/http" + "sync" "github.com/zu1k/proxypool/tool" @@ -57,6 +58,15 @@ func (w WebLucnOrg) Get() []proxy.Proxy { return StringArray2ProxyArray(result) } +func (w WebLucnOrg) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) { + wg.Add(1) + nodes := w.Get() + for _, node := range nodes { + pc <- node + } + wg.Done() +} + func decryptAesForLucn(code string, c string) []byte { if code == "" { code = "abclnv561cqqfg30" diff --git a/getter/web_test.go b/getter/web_test.go deleted file mode 100644 index 6d4e6c8..0000000 --- a/getter/web_test.go +++ /dev/null @@ -1,23 +0,0 @@ -package getter - -import ( - "fmt" - "testing" -) - -func TestWebLucnOrg_Get(t *testing.T) { - fmt.Println(WebLucnOrg{}.Get()) -} - -func TestWebFreessrXyz_Get(t *testing.T) { - fmt.Println(WebFreessrXyz{}.Get()) -} - -func TestWebFuzz_Get(t *testing.T) { - fmt.Println(NewWebFuzz("https://merlinblog.xyz/wiki/freess.html").Get()) -} - -func TestSubscribe_Get(t *testing.T) { - fmt.Println(NewSubscribe("https://raw.githubusercontent.com/ssrsub/ssr/master/v2ray").Get()) - fmt.Println(NewSubscribe("https://raw.githubusercontent.com/ssrsub/ssr/master/ssrsub").Get()) -} diff --git a/main.go b/main.go index 1c58bdf..bce0fbe 100644 --- a/main.go +++ b/main.go @@ -10,6 +10,6 @@ import ( func main() { go app.Cron() fmt.Println("Do the first crawl...") - app.CrawlTGChannel() + app.CrawlGo() api.Run() } diff --git a/proxy/vmess.go b/proxy/vmess.go index adcf42f..25e4141 100644 --- a/proxy/vmess.go +++ b/proxy/vmess.go @@ -3,7 +3,6 @@ package proxy import ( "encoding/json" "errors" - "fmt" "math/rand" "net" "net/url" @@ -103,7 +102,6 @@ func ParseVmessLink(link string) (*Vmess, error) { if err != nil { return nil, ErrorVmessPayloadParseFail } - fmt.Println(baseInfo) baseInfoPath := strings.Split(baseInfo, ":") if len(baseInfoPath) < 3 { return nil, ErrorPathNotComplete @@ -123,7 +121,6 @@ func ParseVmessLink(link string) (*Vmess, error) { } moreInfo, _ := url.ParseQuery(infoPayloads[1]) - fmt.Println(moreInfo) remarks := moreInfo.Get("remarks") obfs := moreInfo.Get("obfs") network := "tcp"