This repository has been archived by the owner on Sep 3, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.go
101 lines (89 loc) · 1.71 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
package main
import (
"bufio"
"flag"
"os"
"runtime"
"sync"
"sync/atomic"
)
// flags
var (
maxDepthFlag = flag.Int("d", 1, "Maximum depth for crawling")
verboseFlag = flag.Bool("v", false, "Write verbose logs")
formatFlag = flag.String("f", "table", "Format of the output (table|csv|tsv)")
)
var (
id int32
)
func NewReporter() Reporter {
switch *formatFlag {
case "table":
return NewTableReporter()
case "csv":
return NewCSVReporter()
case "tsv":
return NewTSVReporter()
default:
return NewTableReporter()
}
}
func main() {
flag.Parse()
scanner := bufio.NewScanner(os.Stdin)
var wg sync.WaitGroup
sem := make(chan struct{}, concurrencyNumber())
reporter := NewReporter()
reports := make(chan *PageStats)
go func() {
for r := range reports {
reporter.Append(r)
wg.Done()
}
}()
visited := make(map[string]bool)
for scanner.Scan() {
next := scanner.Text()
if _, ok := visited[next]; ok {
continue
}
visited[next] = true
wg.Add(1)
sem <- struct{}{}
go func(url string, rc chan *PageStats) {
defer wg.Done()
processUrl(url, rc, &wg)
<-sem
}(next, reports)
}
wg.Wait()
close(reports)
reporter.Render()
}
func concurrencyNumber() int {
count := runtime.NumCPU() / *maxDepthFlag
if count < 1 {
count = 1
}
return count
}
func processUrl(url string, reports chan *PageStats, wg *sync.WaitGroup) {
s := &Scraper{
ID: atomic.AddInt32(&id, 1),
Website: url,
MaxDepth: *maxDepthFlag,
PrintLogs: *verboseFlag,
Async: true,
mutex: &sync.Mutex{},
stats: make(map[string]*PageStats),
}
err := s.Scrape()
if err != nil {
s.Log("Error while scraping:", err)
return
}
for _, r := range s.Report() {
wg.Add(1)
reports <- r
}
}