Skip to content

Commit

Permalink
adjust pipe behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
mkadirtan committed May 17, 2024
1 parent 411f0c2 commit a5d2fa1
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 119 deletions.
64 changes: 46 additions & 18 deletions cmd/find/find.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,56 +2,84 @@ package find

import (
"bufio"
"errors"
"fmt"
"io"
"os"
"strings"

"github.com/mkadirtan/feed-toolbelt/pkg/inspect"
)

type FindCMD struct {
Pipe bool `help:"use piped input" short:"p"`
StrategyHeader bool `help:"toggle header strategy" negatable:"" short:"l" default:"true"`
StrategyPage bool `help:"toggle page strategy" negatable:"" short:"c" default:"true"`
StrategyCommon bool `help:"toggle common strategy" negatable:"" short:"b" default:"false"`
Validate bool `help:"validate feed urls" negatable:"" short:"g" default:"false"`
URL string `arg:"" help:"target url"`
Pipe bool `help:"Use this flag if you pipe HTML content into this command. Piping without using this flag will result in interpreting piped input as target url" short:"p"`
StrategyHeader bool `help:"Toggle header strategy" negatable:"" short:"l" default:"true"`
StrategyPage bool `help:"Toggle page strategy" negatable:"" short:"c" default:"true"`
StrategyCommon bool `help:"Toggle common strategy" negatable:"" short:"b" default:"false"`
Validate bool `help:"Validate feed URLs contain actual feeds" negatable:"" short:"g" default:"false"`
URL string `arg:"" help:"target url, optional in case piped input is given" optional:""`
}

func (f *FindCMD) Run() error {
inspectorOptions := []inspect.InspectorOption{
inspect.WithTargetURL(f.URL),
options := []inspect.Option{
inspect.WithOutputHandler(func(o string) { fmt.Println(o) }),
// inspect.WithDebugHandler(func(d string) { a.debugLogger.Println(d) }),
}

// not implemented yet
if false {
options = append(options, inspect.WithDebugHandler(func(d string) { fmt.Println(d) }))
}

if f.Pipe {
stat, _ := os.Stdin.Stat()
if (stat.Mode() & os.ModeCharDevice) == 0 {
inspectorOptions = append(inspectorOptions, inspect.WithPipedInput(bufio.NewReader(os.Stdin)))
options = append(options, inspect.WithPipedInput(bufio.NewReader(os.Stdin)))
} else {
return errors.New("pipe option used without piped input")
}
}

// no positional url, pipe option true OK
// no positional url, pipe option false, url from pipe OK
// no positional url, pipe option false, no url from pipe NO
if f.URL == "" {
if !f.Pipe {
stat, _ := os.Stdin.Stat()
if (stat.Mode() & os.ModeCharDevice) == 0 {
urlBytes, err := io.ReadAll(os.Stdin)
if err != nil {
return err
}
targetURL := strings.TrimSpace(string(urlBytes))
options = append(options, inspect.WithTargetURL(targetURL))
} else {
return errors.New("no url specified")
}
}
} else {
options = append(options, inspect.WithTargetURL(f.URL))
}

if f.Validate {
inspectorOptions = append(inspectorOptions, inspect.WithValidate())
options = append(options, inspect.WithValidate())
}

if f.StrategyHeader {
inspectorOptions = append(inspectorOptions, inspect.WithStrategyHeader())
options = append(options, inspect.WithStrategyHeader())
}
if f.StrategyPage {
inspectorOptions = append(inspectorOptions, inspect.WithStrategyPage())
options = append(options, inspect.WithStrategyPage())
}
if f.StrategyCommon {
inspectorOptions = append(inspectorOptions, inspect.WithStrategyCommon())
options = append(options, inspect.WithStrategyCommon())
}

inspectorOptions = append(inspectorOptions)
options = append(options)

inspector, err := inspect.NewInspector(inspectorOptions...)
inspector, err := inspect.NewInspector(options...)
if err != nil {
return err
}
inspector.Find()

return nil
return inspector.Find()
}
90 changes: 27 additions & 63 deletions pkg/inspect/inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,43 @@
package inspect

import (
"bufio"
"errors"
"fmt"
"net/http"
"slices"
"strings"

"github.com/mkadirtan/feed-toolbelt/pkg/common_paths"
"github.com/mkadirtan/feed-toolbelt/pkg/util"
)

func (i *Inspector) Find() {
func (i *Inspector) Find() error {
if i.config.PipedInput != nil {
i.findTargetHTML()
return
i.body = i.config.PipedInput
} else {
resp, err := http.DefaultClient.Get(*i.config.TargetURL)
if err != nil {
return err
}

if resp.StatusCode < 200 || resp.StatusCode > 299 {
return errors.New(fmt.Sprintf("invalid status code: %d", resp.StatusCode))
}

i.body = bufio.NewReader(resp.Body)
i.header = resp.Header
}

if i.config.TargetURL != nil {
i.findTargetURL()
return
if i.config.Strategies.Header {
i.applyStrategyHeader()
}
if i.config.Strategies.Page {
i.applyStrategyPage()
}
if i.config.Strategies.Common {
i.applyStrategyCommon()
}

return nil
}

func (i *Inspector) processFeedCandidate(feedCandidateURL string, mustValidate bool) {
Expand All @@ -40,13 +59,6 @@ func (i *Inspector) processFeedCandidate(feedCandidateURL string, mustValidate b
i.foundFeeds = append(i.foundFeeds, feedCandidateURL)
}

func (i *Inspector) findTargetHTML() {
feedsOnPage, _ := inspectPage(*i.config.PipedInput)
for _, feed := range feedsOnPage {
i.processFeedCandidate(feed, false)
}
}

func (i *Inspector) validateFeedURL(feedURL string) bool {
resp, err := http.DefaultClient.Get(feedURL)
if err != nil {
Expand All @@ -59,51 +71,3 @@ func (i *Inspector) validateFeedURL(feedURL string) bool {

return util.ValidateFeed(resp.Body)
}

func (i *Inspector) findTargetURL() {
if i.config.Strategies.Header || i.config.Strategies.Page {
i.pageAndHeadersStrategy()
}

if i.config.Strategies.Common {
i.commonStrategy()
}
}

func (i *Inspector) commonStrategy() {
strippedURL, _ := strings.CutSuffix(*i.config.TargetURL, "/")
for path := range common_paths.CommonPaths {
feedCandidate := strippedURL + path

if slices.Contains(i.foundFeeds, feedCandidate) {
continue
}

i.processFeedCandidate(feedCandidate, true)
}
}

func (i *Inspector) pageAndHeadersStrategy() {
resp, err := http.DefaultClient.Get(*i.config.TargetURL)
if err != nil {
return
}

if resp.StatusCode < 200 || resp.StatusCode > 299 {
return
}

if i.config.Strategies.Header {
feedsOnHeader, _ := inspectHeaders(resp.Header)
for _, feed := range feedsOnHeader {
i.processFeedCandidate(feed, false)
}
}

if i.config.Strategies.Page {
feedsOnPage, _ := inspectPage(resp.Body)
for _, feed := range feedsOnPage {
i.processFeedCandidate(feed, false)
}
}
}
21 changes: 21 additions & 0 deletions pkg/inspect/inspect_common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package inspect

import (
"slices"
"strings"

"github.com/mkadirtan/feed-toolbelt/pkg/common_paths"
)

func (i *Inspector) applyStrategyCommon() {
strippedURL, _ := strings.CutSuffix(*i.config.TargetURL, "/")
for path := range common_paths.CommonPaths {
feedCandidate := strippedURL + path

if slices.Contains(i.foundFeeds, feedCandidate) {
continue
}

i.processFeedCandidate(feedCandidate, true)
}
}
16 changes: 4 additions & 12 deletions pkg/inspect/inspect_headers.go
Original file line number Diff line number Diff line change
@@ -1,25 +1,17 @@
package inspect

import (
"net/http"

"github.com/mkadirtan/feed-toolbelt/pkg/header_node"
)

func inspectHeaders(headers http.Header) ([]string, bool) {
linkHeaders := headers.Values("link")
var foundFeeds = make([]string, 0)
func (i *Inspector) applyStrategyHeader() {
linkHeaders := i.header.Values("link")

for _, linkHeader := range linkHeaders {
headerNode := header_node.NewHeaderNode()
headerNode.ParseFields(linkHeader)
if headerNode.IsValidFeed() {
foundFeeds = append(foundFeeds, headerNode.FeedURL())
i.processFeedCandidate(headerNode.FeedURL(), false)
}
}

if len(foundFeeds) > 0 {
return foundFeeds, true
}

return nil, false
}
17 changes: 4 additions & 13 deletions pkg/inspect/inspect_page.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
package inspect

import (
"io"

"github.com/mkadirtan/feed-toolbelt/pkg/link_node"
"github.com/mkadirtan/feed-toolbelt/pkg/script_node"

"golang.org/x/net/html"
)

func inspectPage(r io.Reader) ([]string, bool) {
var hrefs = make([]string, 0)
z := html.NewTokenizer(r)
func (i *Inspector) applyStrategyPage() {
z := html.NewTokenizer(i.body)

for {
tt := z.Next()
Expand All @@ -35,20 +32,14 @@ func inspectPage(r io.Reader) ([]string, bool) {
feedNode := link_node.NewLinkNode(tag)
feedNode.ParseFields(z)
if feedNode.IsValidFeed() {
hrefs = append(hrefs, feedNode.FeedURL())
i.processFeedCandidate(feedNode.FeedURL(), false)
}
case "script":
scriptNode := script_node.NewScriptNode(tag)
scriptNode.ParseFields(z)
if scriptNode.IsValidFeed() {
hrefs = append(hrefs, scriptNode.FeedURL())
i.processFeedCandidate(scriptNode.FeedURL(), false)
}
}
}

if len(hrefs) > 0 {
return hrefs, true
}

return nil, false
}
Loading

0 comments on commit a5d2fa1

Please sign in to comment.