Files
proxypool/pkg/getter/web_fuzz_sub.go
2020-08-29 14:10:10 +08:00

92 lines
3.0 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package getter
import (
"io/ioutil"
"log"
"regexp"
"sync"
"github.com/zu1k/proxypool/pkg/proxy"
"github.com/zu1k/proxypool/pkg/tool"
)
func init() {
Register("webfuzzsub", NewWebFuzzSubGetter)
}
type WebFuzzSub struct {
Url string
}
func (w *WebFuzzSub) Get() proxy.ProxyList {
resp, err := tool.GetHttpClient().Get(w.Url)
if err != nil {
return nil
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil
}
text := string(body)
subUrls := urlRe.FindAllString(text, -1)
result := make(proxy.ProxyList, 0)
for _, url := range subUrls {
result = append(result, (&Subscribe{Url: url}).Get()...)
}
return result
}
func (w *WebFuzzSub) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) {
defer wg.Done()
nodes := w.Get()
log.Printf("STATISTIC: WebFuzzSub\tcount=%d\turl=%s\n", len(nodes), w.Url)
for _, node := range nodes {
pc <- node
}
}
func NewWebFuzzSubGetter(options tool.Options) (getter Getter, err error) {
urlInterface, found := options["url"]
if found {
url, err := AssertTypeStringNotNull(urlInterface)
if err != nil {
return nil, err
}
return &WebFuzzSub{Url: url}, nil
}
return nil, ErrorUrlNotFound
}
var urlRe = regexp.MustCompile(urlPattern)
const (
// 匹配 IP4
ip4Pattern = `((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)`
// 匹配 IP6参考以下网页内容
// http://blog.csdn.net/jiangfeng08/article/details/7642018
ip6Pattern = `(([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|` +
`(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|` +
`(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|` +
`(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
`(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
`(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
`(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
`(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))`
// 同时匹配 IP4 和 IP6
ipPattern = "(" + ip4Pattern + ")|(" + ip6Pattern + ")"
// 匹配域名
domainPattern = `[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}(\.[a-zA-Z0-9][a-zA-Z0-9_-]{0,62})*(\.[a-zA-Z][a-zA-Z0-9]{0,10}){1}`
// 匹配 URL
urlPattern = `((https|http)?://)?` + // 协议
`(([0-9a-zA-Z]+:)?[0-9a-zA-Z_-]+@)?` + // pwd:user@
"(" + ipPattern + "|(" + domainPattern + "))" + // IP 或域名
`(:\d{1,5})?` + // 端口
`(/+[a-zA-Z0-9][a-zA-Z0-9_.-]*)*/*` + // path
`(\?([a-zA-Z0-9_-]+(=.*&?)*)*)*` // query
)