92 lines
3.0 KiB
Go
92 lines
3.0 KiB
Go
package getter
|
||
|
||
import (
|
||
"io/ioutil"
|
||
"log"
|
||
"regexp"
|
||
"sync"
|
||
|
||
"github.com/zu1k/proxypool/pkg/proxy"
|
||
"github.com/zu1k/proxypool/pkg/tool"
|
||
)
|
||
|
||
func init() {
|
||
Register("webfuzzsub", NewWebFuzzSubGetter)
|
||
}
|
||
|
||
type WebFuzzSub struct {
|
||
Url string
|
||
}
|
||
|
||
func (w *WebFuzzSub) Get() proxy.ProxyList {
|
||
resp, err := tool.GetHttpClient().Get(w.Url)
|
||
if err != nil {
|
||
return nil
|
||
}
|
||
defer resp.Body.Close()
|
||
body, err := ioutil.ReadAll(resp.Body)
|
||
if err != nil {
|
||
return nil
|
||
}
|
||
text := string(body)
|
||
subUrls := urlRe.FindAllString(text, -1)
|
||
result := make(proxy.ProxyList, 0)
|
||
for _, url := range subUrls {
|
||
result = append(result, (&Subscribe{Url: url}).Get()...)
|
||
}
|
||
return result
|
||
}
|
||
|
||
func (w *WebFuzzSub) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) {
|
||
defer wg.Done()
|
||
nodes := w.Get()
|
||
log.Printf("STATISTIC: WebFuzzSub\tcount=%d\turl=%s\n", len(nodes), w.Url)
|
||
for _, node := range nodes {
|
||
pc <- node
|
||
}
|
||
}
|
||
|
||
func NewWebFuzzSubGetter(options tool.Options) (getter Getter, err error) {
|
||
urlInterface, found := options["url"]
|
||
if found {
|
||
url, err := AssertTypeStringNotNull(urlInterface)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
return &WebFuzzSub{Url: url}, nil
|
||
}
|
||
return nil, ErrorUrlNotFound
|
||
}
|
||
|
||
var urlRe = regexp.MustCompile(urlPattern)
|
||
|
||
const (
|
||
// 匹配 IP4
|
||
ip4Pattern = `((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)`
|
||
|
||
// 匹配 IP6,参考以下网页内容:
|
||
// http://blog.csdn.net/jiangfeng08/article/details/7642018
|
||
ip6Pattern = `(([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|` +
|
||
`(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|` +
|
||
`(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|` +
|
||
`(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
|
||
`(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
|
||
`(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
|
||
`(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
|
||
`(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))`
|
||
|
||
// 同时匹配 IP4 和 IP6
|
||
ipPattern = "(" + ip4Pattern + ")|(" + ip6Pattern + ")"
|
||
|
||
// 匹配域名
|
||
domainPattern = `[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}(\.[a-zA-Z0-9][a-zA-Z0-9_-]{0,62})*(\.[a-zA-Z][a-zA-Z0-9]{0,10}){1}`
|
||
|
||
// 匹配 URL
|
||
urlPattern = `((https|http)?://)?` + // 协议
|
||
`(([0-9a-zA-Z]+:)?[0-9a-zA-Z_-]+@)?` + // pwd:user@
|
||
"(" + ipPattern + "|(" + domainPattern + "))" + // IP 或域名
|
||
`(:\d{1,5})?` + // 端口
|
||
`(/+[a-zA-Z0-9][a-zA-Z0-9_.-]*)*/*` + // path
|
||
`(\?([a-zA-Z0-9_-]+(=.*&?)*)*)*` // query
|
||
)
|