-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.go
165 lines (141 loc) · 3.39 KB
/
scraper.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
package scraper
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"os"
"path/filepath"
"time"
)
// Result represents incoming JSON data from Prompt API - Scraper
type Result struct {
Data string `json:"data,omitempty"`
DataSelector []string `json:"data-selector,omitempty"`
URL string `json:"url"`
Headers map[string]interface{} `json:"headers"`
}
// ErrorResponse represents response errors from Prompt API - Scraper
type ErrorResponse struct {
Message string `json:"message"`
}
// Params represents arguments for Scrape method
type Params struct {
URL string
AuthPassword string
AuthUsername string
Cookie string
Country string
Referer string
Selector string
}
// ExtraHeader represents extra header information
type ExtraHeader struct {
Name string
Value string
}
var promptAPIEndpoint = "https://api.promptapi.com/scraper"
// PromptAPI represents type
type PromptAPI struct{}
// Scrape makes API call to Prompt API - Scraper and returns result
func (pa PromptAPI) Scrape(params *Params, headers []*ExtraHeader, result *Result) error {
apiKey, ok := os.LookupEnv("PROMPTAPI_TOKEN")
if !ok {
return errors.New("You need to set PROMPTAPI_TOKEN environment variable")
}
_, err := url.ParseRequestURI(params.URL)
if err != nil {
return err
}
v := url.Values{}
v.Set("url", params.URL)
if params.AuthPassword != "" {
v.Set("auth_password", params.AuthPassword)
}
if params.AuthUsername != "" {
v.Set("auth_username", params.AuthUsername)
}
if params.Cookie != "" {
v.Set("cookie", params.Cookie)
}
if params.Country != "" {
v.Set("country", params.Country)
}
if params.Referer != "" {
v.Set("referer", params.Referer)
}
if params.Selector != "" {
v.Set("selector", params.Selector)
}
client := &http.Client{
Timeout: 5 * time.Second,
}
requiredURL := fmt.Sprintf("%s?%s", promptAPIEndpoint, v.Encode())
req, err := http.NewRequest("GET", requiredURL, nil)
if err != nil {
return err
}
req.Header.Set("apikey", apiKey)
if len(headers) > 0 {
for _, h := range headers {
req.Header.Set(h.Name, h.Value)
}
}
res, err := client.Do(req)
if err != nil {
return err
}
if res.Body != nil {
defer res.Body.Close()
}
body, err := ioutil.ReadAll(res.Body)
if err != nil {
return err
}
statusOK := res.StatusCode >= 200 && res.StatusCode < 300
if !statusOK {
msg := new(ErrorResponse)
err := json.Unmarshal(body, msg)
if err != nil {
return err
}
return errors.New(msg.Message)
}
err = json.Unmarshal(body, result)
if err != nil {
return err
}
return nil
}
// Save saves fetched data to given file
func (pa PromptAPI) Save(filename string, result *Result) (n int, err error) {
targetExtension := ".html"
saveData := result.Data
if len(result.DataSelector) > 0 {
targetExtension = ".json"
buffer := new(bytes.Buffer)
encoder := json.NewEncoder(buffer)
encoder.SetEscapeHTML(false)
err = encoder.Encode(result.DataSelector)
if err != nil {
return
}
saveData = buffer.String()
}
fileExtension := filepath.Ext(filename)
fileBasename := filename[0 : len(filename)-len(fileExtension)]
fileTargetname := fileBasename + targetExtension
f, err := os.Create(fileTargetname)
if err != nil {
return
}
defer f.Close()
n, errWrite := f.WriteString(saveData)
if errWrite != nil {
return
}
return
}