From 20ea99dbb2087bb799ce1afe118a0cfe4bab0d04 Mon Sep 17 00:00:00 2001 From: Kamiar Bahri Date: Mon, 31 Jul 2023 00:07:10 +0100 Subject: [PATCH] retry on failed attempts --- README.md | 6 +- czds-dowload.go | 16 ++-- czds.go | 190 +++++++++++++++++++++++++++++++++++++++++++----- defs.go | 18 ++++- icann-api.go | 11 ++- init.go | 6 +- util.go | 5 ++ 7 files changed, 219 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 25943fa..7cef994 100755 --- a/README.md +++ b/README.md @@ -5,16 +5,16 @@ Runs all needed to download zone-files periodically -- via one call. ## Automation Features -### Renews the OAuth 2.0 JWT every 24 hours +### Renews the OAuth 2.0 JWT A new JWT token can be issued (per IP Addr) every 2 minutes; and each token is valid for 24 hours. -A separate instance keeps track of the JWT token issuance to ensure that no API request takes place with an +A separate instance keeps track of the JWT token to ensure that no API request takes place with an expired token. ### Downloads approved TLDs every 48 hours According to ICANN terms, each tld must be downloaded no more than once in 24 hours. Considering the large amount of information to process + the time it takes to prepare the results for a purpose (i.e. indexing), 48 hours is deemed to be the minimum time between downloads. -## Security +## Config ### Required args The following variables must be set via the environment variables: diff --git a/czds-dowload.go b/czds-dowload.go index 6be0552..28ad580 100755 --- a/czds-dowload.go +++ b/czds-dowload.go @@ -36,7 +36,7 @@ func (wm *TeeWriter) Write(p []byte) (int, error) { } // DownloadZoneFile downloads a zone file from an assigned link. -func (c *CzdsAPI) DownloadZoneFile(localFilePath string, downloadLink string, wg *sync.WaitGroup) error { +func (c *CzdsAPI) DownloadZoneFile(localFilePath string, downloadLink string, wg *sync.WaitGroup) (int, error) { if wg != nil { defer wg.Done() @@ -53,7 +53,7 @@ func (c *CzdsAPI) DownloadZoneFile(localFilePath string, downloadLink string, wg if FileOrDirExists(localFilePath) { errTxt := fmt.Sprintf("%s already exist", localFilePath) fmt.Println(errTxt) - return errors.New(errTxt) + return -1, errors.New(errTxt) } headers := c.icann.GetCommonHeaders() @@ -93,16 +93,16 @@ func (c *CzdsAPI) DownloadZoneFile(localFilePath string, downloadLink string, wg ioOutput, err := os.Create(tempFilePath) if err != nil { - return err + return -1, err } - client := &http.Client{Timeout: 5 * time.Hour} + client := &http.Client{} req, _ := http.NewRequest(http.MethodGet, downloadLink, nil) req.Header = headers resp, err := client.Do(req) if err != nil { ioOutput.Close() - return err + return -1, err } defer resp.Body.Close() @@ -113,17 +113,17 @@ func (c *CzdsAPI) DownloadZoneFile(localFilePath string, downloadLink string, wg if _, err = io.Copy(ioOutput, io.TeeReader(resp.Body, teeWriter)); err != nil { ioOutput.Close() - return err + return resp.StatusCode, err } // Close the file, before renaming it. ioOutput.Close() if err = os.Rename(tempFilePath, localFilePath); err != nil { - return err + return resp.StatusCode, err } fmt.Println("") - return nil + return -1, nil } diff --git a/czds.go b/czds.go index 0ebfa54..b3cf09f 100755 --- a/czds.go +++ b/czds.go @@ -5,6 +5,9 @@ import ( "encoding/json" "fmt" "log" + "os" + "path/filepath" + "quenubes/icann/lib/util" "strconv" "strings" "sync" @@ -18,7 +21,7 @@ type CzdsAPI struct { // ICzdsAPI is the interface for CzdsAPI. type ICzdsAPI interface { - DownloadZoneFile(localFilePath string, downloadLink string, wg *sync.WaitGroup) error + DownloadZoneFile(localFilePath string, downloadLink string, wg *sync.WaitGroup) (int, error) ICANN() *IcannAPI Run() } @@ -35,29 +38,24 @@ lblAgain: log.Fatal("unable to get download-links") } - // tldUnq is an array to keep track items already downloaded + // tldUnq is an array to keep track items already downloaded. + // This list avoid any originated duplicates (i.e. net,net,com) var tldUnq []interface{} // go through the loop from the bottom so that the latest // gets downloaded first. for i := (len(dlinks) - 1); i >= 0; i-- { - // still check for authentication; becuase if the token - // expires in the middle of download, icann api will terminate - // the connection! - if !c.icann.Authenticated { - log.Println("waiting for authentication...") - for { - if c.icann.Authenticated { - break - } - time.Sleep(time.Second) - } - } + // still check for authentication between downloads + c.waitUntilAutenticated() link := dlinks[i] localFilePath := c.getDownloadLocalFilePath(link) + if c.todayZoneFileExistsOnDisk(localFilePath) { + continue + } + // Always get the latest (one download for each tld) v := strings.Split(link, "/") oneTLD := v[len(v)-1] @@ -67,24 +65,182 @@ lblAgain: if alreadyDowloaded { continue } + tldUnq = append(tldUnq, oneTLD) // wait for each download to finish // (one file at a time per ip addr; as it's not a good idea // to download files simultaneousely from the same ip addr!). - err := c.DownloadZoneFile(localFilePath, link, nil) + statusCode, err := c.DownloadZoneFile(localFilePath, link, nil) if err != nil { - fmt.Println("c.DownloadZoneFile()=>", oneTLD, err) + fmt.Println(" c.DownloadZoneFile()=>", oneTLD, err) + // remove from the downloaded-list (success list) + tldUnq = util.RemoveFromArray(tldUnq, oneTLD) + + c.downloadZoneFilePostErr(localFilePath, link, oneTLD, statusCode, err) + + // // it's a good idea to halt the download a bit time.Sleep(time.Minute) } - } + // download loop is done. Now see if there are any failures + c.downloadFailedTLDs() + + c.cleanup() + c.keepIdlUntilNextInternval() goto lblAgain } +// todayZoneFileExistsOnDisk determins if a zone file +// for the current session is present on disk; so that +// if this app is turned off/on, and a successfully downloaded +// zone file exists; it will not be re-downloaded. +func (c *CzdsAPI) todayZoneFileExistsOnDisk(fp string) bool { + + fileName := filepath.Base(fp) + + // com => ~ 5 GB + // net => ~ 500 MB + files, err := os.ReadDir(c.icann.AppDataDir) + if err != nil { + log.Fatal(err) + } + + for i := 0; i < len(files); i++ { + fn := files[i].Name() + if fileName == fn { + fi, _ := files[i].Info() + if strings.Contains(fileName, "-com.zone.gz") { + gb := fi.Size() / 1024 / 1024 / 1024 + if gb > 4 { + return true + } + } + if strings.Contains(fileName, "-net.zone.gz") { + mb := fi.Size() / 1024 / 1024 + if mb > 490 { + return true + } + } + } + } + + return false +} +func (c *CzdsAPI) cleanup() { + + // remove lingering partially downloaded files + files, err := os.ReadDir(c.icann.AppDataDir) + if err != nil { + log.Fatal(err) + } + + for i := 0; i < len(files); i++ { + fn := files[i].Name() + + if strings.HasSuffix(fn, ".part") { + fp := fmt.Sprintf("%s/%s", c.icann.AppDataDir, fn) + os.Remove(fp) + } + } +} +func (c *CzdsAPI) downloadFailedTLDs() { + if len(c.icann.failedDownloadQueue) == 0 { + return + } + + c.removeMaxedoutItemsFromFailedDownloadList() + + for i := 0; i < len(c.icann.failedDownloadQueue); i++ { + statusCode, err := c.DownloadZoneFile(c.icann.failedDownloadQueue[i].LocalFilePath, + c.icann.failedDownloadQueue[i].DownloadURL, nil) + + if err != nil { + fmt.Println(" c.DownloadZoneFile()=>", c.icann.failedDownloadQueue[i].TLD, err) + c.downloadZoneFilePostErr(c.icann.failedDownloadQueue[i].LocalFilePath, + c.icann.failedDownloadQueue[i].DownloadURL, c.icann.failedDownloadQueue[i].TLD, statusCode, err) + + time.Sleep(time.Minute) + } + + c.removeMaxedoutItemsFromFailedDownloadList() + if len(c.icann.failedDownloadQueue) == 0 { + break + } + } +} +func (c *CzdsAPI) removeItemFromItemArry(s []failedDownloadItem, i int) []failedDownloadItem { + s[len(s)-1], s[i] = s[i], s[len(s)-1] + return s[:len(s)-1] +} + +func (c *CzdsAPI) removeMaxedoutItemsFromFailedDownloadList() { +lblAgain: + for i := 0; i < len(c.icann.failedDownloadQueue); i++ { + + // if error is: "...connection reset by peer", it won't do any good to re-try + // successively; hopfuly download for the next session will be successfull. + if c.icann.failedDownloadQueue[i].AttempCount > 2 || + strings.Contains(c.icann.failedDownloadQueue[i].ErrTxt, "connection reset by peer") { + + c.icann.failedDownloadQueue = c.removeItemFromItemArry(c.icann.failedDownloadQueue, i) + + if len(c.icann.failedDownloadQueue) == 0 { + return + } + + goto lblAgain + } + + } +} +func (c *CzdsAPI) downloadZoneFilePostErr(localFilePath string, link string, oneTLD string, statusCode int, err error) { + if err == nil { + return + } + + for i := 0; i < len(c.icann.failedDownloadQueue); i++ { + if c.icann.failedDownloadQueue[i].TLD == oneTLD { + c.icann.failedDownloadQueue[i].AttempCount = c.icann.failedDownloadQueue[i].AttempCount + 1 + return + } + } + + var item = failedDownloadItem{ + TLD: oneTLD, + DateTimeAborted: time.Now(), + LocalFilePath: localFilePath, + DownloadURL: link, + AttempCount: c.getFailedAttempCount(oneTLD) + 1, + StatusCode: statusCode, + ErrTxt: err.Error()} + + c.icann.failedDownloadQueue = append(c.icann.failedDownloadQueue, item) +} + +// getFailedAttempCount returns the attemp-count of an item +// in the failed-attmpe queue. +func (c *CzdsAPI) getFailedAttempCount(tld string) uint8 { + + var attpCnt uint8 + lenx := len(c.icann.failedDownloadQueue) + + if lenx == 0 { + return attpCnt + } + + for i := 0; i < lenx; i++ { + if c.icann.failedDownloadQueue[i].TLD == tld { + return c.icann.failedDownloadQueue[i].AttempCount + } + } + + return attpCnt +} + // ICANN exposes the IcannAPI to outside callers (public). func (c *CzdsAPI) ICANN() *IcannAPI { return c.icann diff --git a/defs.go b/defs.go index 07f6d62..1cbb5e9 100755 --- a/defs.go +++ b/defs.go @@ -33,7 +33,21 @@ type configData struct { HoursToWaitBetweenDownloads int } -// IcannAPI defines the structure of hte IIcannAPI interface. +// failedDownloadItem hold info on a filed download so that +// the download can be tried again; after other files +// are downloaded, since there is one download in-progress +// at a time. +type failedDownloadItem struct { + TLD string // com, net,... + DateTimeAborted time.Time + LocalFilePath string + DownloadURL string + AttempCount uint8 // 1 thru 3 + StatusCode int + ErrTxt string +} + +// IcannAPI defines the structure of the IIcannAPI interface. type IcannAPI struct { // AppDataDir is the directory (on the volume) that zone files will be downloaded to. @@ -63,6 +77,8 @@ type IcannAPI struct { isDirty bool HoursToWaitBetweenDownloads int + + failedDownloadQueue []failedDownloadItem } type JWT struct { diff --git a/icann-api.go b/icann-api.go index 5d52c70..31f10cc 100755 --- a/icann-api.go +++ b/icann-api.go @@ -113,6 +113,7 @@ func (i *IcannAPI) getAccessTokenFromDisk() { } b, _ := os.ReadFile(tokenFilePath) b, _ = hex.DecodeString(string(b)) + json.Unmarshal(b, &i.AccessToken) } @@ -123,8 +124,14 @@ func (i *IcannAPI) Authenticate() { actExp := i.accessTokenExpired() if !actExp { - // token still good - return + // token still good? test the token + // hd := i.GetCommonHeaders() + // hd.Add("Authorization", fmt.Sprintf("Bearer %s", i.AccessToken.Token)) + // res := i.HTTPExec(GET, czdsAPIDownloadLinksURL, hd, nil) + // if res.StatusCode == 200 { + i.Authenticated = true + // return + // } } i.waitForAuthAttemptTimeout() diff --git a/init.go b/init.go index 637c0d9..934077f 100755 --- a/init.go +++ b/init.go @@ -29,17 +29,19 @@ func NewIcannAPIClient() *IcannClient { var authToken JWT var authenticated bool + var fdArry []failedDownloadItem + // Initialize the IcannAPI interface icn.IcannAPI = &IcannAPI{cnf.ZoneFileDir, cnf.UserAgent, cnf.IcannAccountUserName, cnf.IcannAccountPassword, cnf.ApprovedTLD, - authenticated, authToken, false, cnf.HoursToWaitBetweenDownloads} + authenticated, authToken, false, cnf.HoursToWaitBetweenDownloads, fdArry} // CzdsAPI expands the IcannAPI interface with more functionaliy; so its IcannAPI instance // must be initialized accordingly. icn.CzdsAPI = &CzdsAPI{ &IcannAPI{cnf.ZoneFileDir, cnf.UserAgent, cnf.IcannAccountUserName, cnf.IcannAccountPassword, cnf.ApprovedTLD, - authenticated, authToken, false, cnf.HoursToWaitBetweenDownloads}, + authenticated, authToken, false, cnf.HoursToWaitBetweenDownloads, fdArry}, } // Authenticate on the first run; after that -- diff --git a/util.go b/util.go index e517e5f..d0fe9f2 100755 --- a/util.go +++ b/util.go @@ -259,6 +259,11 @@ func getNeutralDate(t time.Time) string { return fmt.Sprintf("%d%02d%02d", time.Now().Year(), time.Now().Month(), time.Now().Day()) } +func RemoveItemFromIntArry(s []interface{}, i int) []interface{} { + s[len(s)-1], s[i] = s[i], s[len(s)-1] + return s[:len(s)-1] +} + // FileOrDirExists checks to see if a file or dir exist. // Note that os.Stat(path) works for any path (file or // directory).