@@ -0,0 +1,89 @@
package getter
import (
"io/ioutil"
"regexp"
"sync"
"github.com/zu1k/proxypool/proxy"
"github.com/zu1k/proxypool/tool"
)
func init ( ) {
Register ( "webfuzzsub" , NewWebFuzzSubGetter )
}
type WebFuzzSub struct {
Url string
}
func ( w * WebFuzzSub ) Get ( ) proxy . ProxyList {
resp , err := tool . GetHttpClient ( ) . Get ( w . Url )
if err != nil {
return nil
}
defer resp . Body . Close ( )
body , err := ioutil . ReadAll ( resp . Body )
if err != nil {
return nil
}
text := string ( body )
subUrls := urlRe . FindAllString ( text , - 1 )
result := make ( proxy . ProxyList , 0 )
for _ , url := range subUrls {
result = append ( result , ( & Subscribe { Url : url } ) . Get ( ) ... )
}
return result
}
func ( w * WebFuzzSub ) Get2Chan ( pc chan proxy . Proxy , wg * sync . WaitGroup ) {
defer wg . Done ( )
nodes := w . Get ( )
for _ , node := range nodes {
pc <- node
}
}
func NewWebFuzzSubGetter ( options tool . Options ) ( getter Getter , err error ) {
urlInterface , found := options [ "url" ]
if found {
url , err := AssertTypeStringNotNull ( urlInterface )
if err != nil {
return nil , err
}
return & WebFuzzSub { Url : url } , nil
}
return nil , ErrorUrlNotFound
}
var urlRe = regexp . MustCompile ( urlPattern )
const (
// 匹配 IP4
ip4Pattern = ` ((25[0-5]|2[0-4]\d|[01]?\d\d?)\.) { 3}(25[0-5]|2[0-4]\d|[01]?\d\d?) `
// 匹配 IP6, 参考以下网页内容:
// http://blog.csdn.net/jiangfeng08/article/details/7642018
ip6Pattern = ` (([0-9A-Fa-f] { 1,4}:) { 7}([0-9A-Fa-f] { 1,4}|:))| ` +
` (([0-9A-Fa-f] { 1,4}:) { 6}(:[0-9A-Fa-f] { 1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)) { 3})|:))| ` +
` (([0-9A-Fa-f] { 1,4}:) { 5}(((:[0-9A-Fa-f] { 1,4}) { 1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)) { 3})|:))| ` +
` (([0-9A-Fa-f] { 1,4}:) { 4}(((:[0-9A-Fa-f] { 1,4}) { 1,3})|((:[0-9A-Fa-f] { 1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)) { 3}))|:))| ` +
` (([0-9A-Fa-f] { 1,4}:) { 3}(((:[0-9A-Fa-f] { 1,4}) { 1,4})|((:[0-9A-Fa-f] { 1,4}) { 0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)) { 3}))|:))| ` +
` (([0-9A-Fa-f] { 1,4}:) { 2}(((:[0-9A-Fa-f] { 1,4}) { 1,5})|((:[0-9A-Fa-f] { 1,4}) { 0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)) { 3}))|:))| ` +
` (([0-9A-Fa-f] { 1,4}:) { 1}(((:[0-9A-Fa-f] { 1,4}) { 1,6})|((:[0-9A-Fa-f] { 1,4}) { 0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)) { 3}))|:))| ` +
` (:(((:[0-9A-Fa-f] { 1,4}) { 1,7})|((:[0-9A-Fa-f] { 1,4}) { 0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)) { 3}))|:)) `
// 同时匹配 IP4 和 IP6
ipPattern = "(" + ip4Pattern + ")|(" + ip6Pattern + ")"
// 匹配域名
domainPattern = ` [a-zA-Z0-9][a-zA-Z0-9_-] { 0,62}(\.[a-zA-Z0-9][a-zA-Z0-9_-] { 0,62})*(\.[a-zA-Z][a-zA-Z0-9] { 0,10}) { 1} `
// 匹配 URL
urlPattern = ` ((https|http|ftp|rtsp|mms)?://)? ` + // 协议
` (([0-9a-zA-Z]+:)?[0-9a-zA-Z_-]+@)? ` + // pwd:user@
"(" + ipPattern + "|(" + domainPattern + "))" + // IP 或域名
` (:\d { 1,5})? ` + // 端口
` (/+[a-zA-Z0-9][a-zA-Z0-9_.-]*)*/* ` + // path
` (\?([a-zA-Z0-9_-]+(=.*&?)*)*)* ` // query
)