-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser_test.go
119 lines (100 loc) · 2.17 KB
/
parser_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package crawler
import (
"strings"
"testing"
"golang.org/x/net/html"
)
func TestExtractTextReturnsTextWithOrder(t *testing.T) {
node, err := html.Parse(strings.NewReader(`
<p>a</p>
<p>b</p>
<p>c</p>
`))
if err != nil {
t.Fatal(err)
}
expectEqualInTest(t, extractText(node), "abc")
}
func TestExtractTextReplaceNewlineToSpace(t *testing.T) {
node, err := html.Parse(strings.NewReader("<p>a\nb</p>"))
if err != nil {
t.Fatal(err)
}
expectEqualInTest(t, extractText(node), "a b")
}
func TestExtractTextReturnsTextOfAnchor(t *testing.T) {
node, err := html.Parse(strings.NewReader(`
<a href="example.com">text</a>
`))
if err != nil {
t.Fatal(err)
}
expectEqualInTest(t, extractText(node), "text")
}
func TestParseLinksReturnsLinksWithText(t *testing.T) {
links, err := parseLinks(strings.NewReader(`
<a href="example.com">text</a>
`))
if err != nil {
t.Fatal(err)
}
expectEqualInTest(t, links, []Link{{Url: "example.com", Text: "text"}})
}
func TestParseLinksTrimsWhitespaces(t *testing.T) {
links, err := parseLinks(strings.NewReader(`
<a href=" example.com "> text </a>
`))
if err != nil {
t.Fatal(err)
}
expectEqualInTest(t, links, []Link{{Url: "example.com", Text: "text"}})
}
func TestParseLinksReturnsMultipleLinkWithText(t *testing.T) {
links, err := parseLinks(strings.NewReader(`
<div>
<a href="a.com">a</a>
<div>
<a href="b.com">b</a>
</div>
</div>
`))
if err != nil {
t.Fatal(err)
}
expectEqualInTest(t, links, []Link{
{Url: "a.com", Text: "a"},
{Url: "b.com", Text: "b"},
})
}
func TestParsePageReturnsTitleAndContent(t *testing.T) {
title, content, err := parsePage(strings.NewReader(`
<html>
<head>
<title>Title</title>
</head>
<body>
<h1>Header</h1>
<p>Content</p>
</body>
</html>
`))
if err != nil {
t.Fatal(err)
}
expectEqualInTest(t, title, "Title")
expectEqualInTest(t, content, "Header Content")
}
func TestParsePageIgnoresNonContentTags(t *testing.T) {
_, content, err := parsePage(strings.NewReader(`
<html>
<body>
<span>span</span>
<button>Click Me!</button>
</body>
</html>
`))
if err != nil {
t.Fatal(err)
}
expectEqualInTest(t, content, "")
}