From a5d2fa1b08aef67acf23b6c4eb742e3339597f19 Mon Sep 17 00:00:00 2001 From: mkadirtan Date: Fri, 17 May 2024 17:50:44 +0300 Subject: [PATCH] adjust pipe behavior --- cmd/find/find.go | 64 +++++++++++++++++------- pkg/inspect/inspect.go | 90 ++++++++++------------------------ pkg/inspect/inspect_common.go | 21 ++++++++ pkg/inspect/inspect_headers.go | 16 ++---- pkg/inspect/inspect_page.go | 17 ++----- pkg/inspect/inspector.go | 29 ++++++----- 6 files changed, 118 insertions(+), 119 deletions(-) create mode 100644 pkg/inspect/inspect_common.go diff --git a/cmd/find/find.go b/cmd/find/find.go index 6969746..f6e2688 100644 --- a/cmd/find/find.go +++ b/cmd/find/find.go @@ -2,56 +2,84 @@ package find import ( "bufio" + "errors" "fmt" + "io" "os" + "strings" "github.com/mkadirtan/feed-toolbelt/pkg/inspect" ) type FindCMD struct { - Pipe bool `help:"use piped input" short:"p"` - StrategyHeader bool `help:"toggle header strategy" negatable:"" short:"l" default:"true"` - StrategyPage bool `help:"toggle page strategy" negatable:"" short:"c" default:"true"` - StrategyCommon bool `help:"toggle common strategy" negatable:"" short:"b" default:"false"` - Validate bool `help:"validate feed urls" negatable:"" short:"g" default:"false"` - URL string `arg:"" help:"target url"` + Pipe bool `help:"Use this flag if you pipe HTML content into this command. Piping without using this flag will result in interpreting piped input as target url" short:"p"` + StrategyHeader bool `help:"Toggle header strategy" negatable:"" short:"l" default:"true"` + StrategyPage bool `help:"Toggle page strategy" negatable:"" short:"c" default:"true"` + StrategyCommon bool `help:"Toggle common strategy" negatable:"" short:"b" default:"false"` + Validate bool `help:"Validate feed URLs contain actual feeds" negatable:"" short:"g" default:"false"` + URL string `arg:"" help:"target url, optional in case piped input is given" optional:""` } func (f *FindCMD) Run() error { - inspectorOptions := []inspect.InspectorOption{ - inspect.WithTargetURL(f.URL), + options := []inspect.Option{ inspect.WithOutputHandler(func(o string) { fmt.Println(o) }), - // inspect.WithDebugHandler(func(d string) { a.debugLogger.Println(d) }), + } + + // not implemented yet + if false { + options = append(options, inspect.WithDebugHandler(func(d string) { fmt.Println(d) })) } if f.Pipe { stat, _ := os.Stdin.Stat() if (stat.Mode() & os.ModeCharDevice) == 0 { - inspectorOptions = append(inspectorOptions, inspect.WithPipedInput(bufio.NewReader(os.Stdin))) + options = append(options, inspect.WithPipedInput(bufio.NewReader(os.Stdin))) + } else { + return errors.New("pipe option used without piped input") + } + } + + // no positional url, pipe option true OK + // no positional url, pipe option false, url from pipe OK + // no positional url, pipe option false, no url from pipe NO + if f.URL == "" { + if !f.Pipe { + stat, _ := os.Stdin.Stat() + if (stat.Mode() & os.ModeCharDevice) == 0 { + urlBytes, err := io.ReadAll(os.Stdin) + if err != nil { + return err + } + targetURL := strings.TrimSpace(string(urlBytes)) + options = append(options, inspect.WithTargetURL(targetURL)) + } else { + return errors.New("no url specified") + } } + } else { + options = append(options, inspect.WithTargetURL(f.URL)) } if f.Validate { - inspectorOptions = append(inspectorOptions, inspect.WithValidate()) + options = append(options, inspect.WithValidate()) } if f.StrategyHeader { - inspectorOptions = append(inspectorOptions, inspect.WithStrategyHeader()) + options = append(options, inspect.WithStrategyHeader()) } if f.StrategyPage { - inspectorOptions = append(inspectorOptions, inspect.WithStrategyPage()) + options = append(options, inspect.WithStrategyPage()) } if f.StrategyCommon { - inspectorOptions = append(inspectorOptions, inspect.WithStrategyCommon()) + options = append(options, inspect.WithStrategyCommon()) } - inspectorOptions = append(inspectorOptions) + options = append(options) - inspector, err := inspect.NewInspector(inspectorOptions...) + inspector, err := inspect.NewInspector(options...) if err != nil { return err } - inspector.Find() - return nil + return inspector.Find() } diff --git a/pkg/inspect/inspect.go b/pkg/inspect/inspect.go index a488d3a..d42a627 100644 --- a/pkg/inspect/inspect.go +++ b/pkg/inspect/inspect.go @@ -7,24 +7,43 @@ package inspect import ( + "bufio" + "errors" + "fmt" "net/http" "slices" - "strings" - "github.com/mkadirtan/feed-toolbelt/pkg/common_paths" "github.com/mkadirtan/feed-toolbelt/pkg/util" ) -func (i *Inspector) Find() { +func (i *Inspector) Find() error { if i.config.PipedInput != nil { - i.findTargetHTML() - return + i.body = i.config.PipedInput + } else { + resp, err := http.DefaultClient.Get(*i.config.TargetURL) + if err != nil { + return err + } + + if resp.StatusCode < 200 || resp.StatusCode > 299 { + return errors.New(fmt.Sprintf("invalid status code: %d", resp.StatusCode)) + } + + i.body = bufio.NewReader(resp.Body) + i.header = resp.Header } - if i.config.TargetURL != nil { - i.findTargetURL() - return + if i.config.Strategies.Header { + i.applyStrategyHeader() + } + if i.config.Strategies.Page { + i.applyStrategyPage() + } + if i.config.Strategies.Common { + i.applyStrategyCommon() } + + return nil } func (i *Inspector) processFeedCandidate(feedCandidateURL string, mustValidate bool) { @@ -40,13 +59,6 @@ func (i *Inspector) processFeedCandidate(feedCandidateURL string, mustValidate b i.foundFeeds = append(i.foundFeeds, feedCandidateURL) } -func (i *Inspector) findTargetHTML() { - feedsOnPage, _ := inspectPage(*i.config.PipedInput) - for _, feed := range feedsOnPage { - i.processFeedCandidate(feed, false) - } -} - func (i *Inspector) validateFeedURL(feedURL string) bool { resp, err := http.DefaultClient.Get(feedURL) if err != nil { @@ -59,51 +71,3 @@ func (i *Inspector) validateFeedURL(feedURL string) bool { return util.ValidateFeed(resp.Body) } - -func (i *Inspector) findTargetURL() { - if i.config.Strategies.Header || i.config.Strategies.Page { - i.pageAndHeadersStrategy() - } - - if i.config.Strategies.Common { - i.commonStrategy() - } -} - -func (i *Inspector) commonStrategy() { - strippedURL, _ := strings.CutSuffix(*i.config.TargetURL, "/") - for path := range common_paths.CommonPaths { - feedCandidate := strippedURL + path - - if slices.Contains(i.foundFeeds, feedCandidate) { - continue - } - - i.processFeedCandidate(feedCandidate, true) - } -} - -func (i *Inspector) pageAndHeadersStrategy() { - resp, err := http.DefaultClient.Get(*i.config.TargetURL) - if err != nil { - return - } - - if resp.StatusCode < 200 || resp.StatusCode > 299 { - return - } - - if i.config.Strategies.Header { - feedsOnHeader, _ := inspectHeaders(resp.Header) - for _, feed := range feedsOnHeader { - i.processFeedCandidate(feed, false) - } - } - - if i.config.Strategies.Page { - feedsOnPage, _ := inspectPage(resp.Body) - for _, feed := range feedsOnPage { - i.processFeedCandidate(feed, false) - } - } -} diff --git a/pkg/inspect/inspect_common.go b/pkg/inspect/inspect_common.go new file mode 100644 index 0000000..48141fc --- /dev/null +++ b/pkg/inspect/inspect_common.go @@ -0,0 +1,21 @@ +package inspect + +import ( + "slices" + "strings" + + "github.com/mkadirtan/feed-toolbelt/pkg/common_paths" +) + +func (i *Inspector) applyStrategyCommon() { + strippedURL, _ := strings.CutSuffix(*i.config.TargetURL, "/") + for path := range common_paths.CommonPaths { + feedCandidate := strippedURL + path + + if slices.Contains(i.foundFeeds, feedCandidate) { + continue + } + + i.processFeedCandidate(feedCandidate, true) + } +} diff --git a/pkg/inspect/inspect_headers.go b/pkg/inspect/inspect_headers.go index c764e7e..10017a6 100644 --- a/pkg/inspect/inspect_headers.go +++ b/pkg/inspect/inspect_headers.go @@ -1,25 +1,17 @@ package inspect import ( - "net/http" - "github.com/mkadirtan/feed-toolbelt/pkg/header_node" ) -func inspectHeaders(headers http.Header) ([]string, bool) { - linkHeaders := headers.Values("link") - var foundFeeds = make([]string, 0) +func (i *Inspector) applyStrategyHeader() { + linkHeaders := i.header.Values("link") + for _, linkHeader := range linkHeaders { headerNode := header_node.NewHeaderNode() headerNode.ParseFields(linkHeader) if headerNode.IsValidFeed() { - foundFeeds = append(foundFeeds, headerNode.FeedURL()) + i.processFeedCandidate(headerNode.FeedURL(), false) } } - - if len(foundFeeds) > 0 { - return foundFeeds, true - } - - return nil, false } diff --git a/pkg/inspect/inspect_page.go b/pkg/inspect/inspect_page.go index 5322407..75a1e24 100644 --- a/pkg/inspect/inspect_page.go +++ b/pkg/inspect/inspect_page.go @@ -1,17 +1,14 @@ package inspect import ( - "io" - "github.com/mkadirtan/feed-toolbelt/pkg/link_node" "github.com/mkadirtan/feed-toolbelt/pkg/script_node" "golang.org/x/net/html" ) -func inspectPage(r io.Reader) ([]string, bool) { - var hrefs = make([]string, 0) - z := html.NewTokenizer(r) +func (i *Inspector) applyStrategyPage() { + z := html.NewTokenizer(i.body) for { tt := z.Next() @@ -35,20 +32,14 @@ func inspectPage(r io.Reader) ([]string, bool) { feedNode := link_node.NewLinkNode(tag) feedNode.ParseFields(z) if feedNode.IsValidFeed() { - hrefs = append(hrefs, feedNode.FeedURL()) + i.processFeedCandidate(feedNode.FeedURL(), false) } case "script": scriptNode := script_node.NewScriptNode(tag) scriptNode.ParseFields(z) if scriptNode.IsValidFeed() { - hrefs = append(hrefs, scriptNode.FeedURL()) + i.processFeedCandidate(scriptNode.FeedURL(), false) } } } - - if len(hrefs) > 0 { - return hrefs, true - } - - return nil, false } diff --git a/pkg/inspect/inspector.go b/pkg/inspect/inspector.go index b095454..8c61425 100644 --- a/pkg/inspect/inspector.go +++ b/pkg/inspect/inspector.go @@ -1,8 +1,9 @@ package inspect import ( + "bufio" "errors" - "io" + "net/http" ) type HandlerFunc func(string) @@ -11,11 +12,13 @@ type Inspector struct { config InspectorConfig foundFeeds []string visitedURLs []string + body *bufio.Reader + header http.Header } type InspectorConfig struct { // strategies option is ignored in case PipedInput is defined - PipedInput *io.Reader + PipedInput *bufio.Reader TargetURL *string Strategies struct { @@ -34,51 +37,51 @@ type InspectorConfig struct { DebugHandler HandlerFunc } -type InspectorOption func(*InspectorConfig) +type Option func(*InspectorConfig) -func WithPipedInput(htmlBody io.Reader) InspectorOption { +func WithPipedInput(htmlBody *bufio.Reader) Option { return func(c *InspectorConfig) { - c.PipedInput = &htmlBody + c.PipedInput = htmlBody } } -func WithTargetURL(targetURL string) InspectorOption { +func WithTargetURL(targetURL string) Option { return func(c *InspectorConfig) { c.TargetURL = &targetURL } } -func WithStrategyHeader() InspectorOption { +func WithStrategyHeader() Option { return func(c *InspectorConfig) { c.Strategies.Header = true } } -func WithStrategyPage() InspectorOption { +func WithStrategyPage() Option { return func(c *InspectorConfig) { c.Strategies.Page = true } } -func WithStrategyCommon() InspectorOption { +func WithStrategyCommon() Option { return func(c *InspectorConfig) { c.Strategies.Common = true } } -func WithValidate() InspectorOption { +func WithValidate() Option { return func(c *InspectorConfig) { c.Validate = true } } -func WithOutputHandler(outputHandler HandlerFunc) InspectorOption { +func WithOutputHandler(outputHandler HandlerFunc) Option { return func(c *InspectorConfig) { c.OutputHandler = outputHandler } } -func WithDebugHandler(debugHandler HandlerFunc) InspectorOption { +func WithDebugHandler(debugHandler HandlerFunc) Option { return func(c *InspectorConfig) { c.DebugHandler = debugHandler } @@ -89,7 +92,7 @@ var ( errNoOutputHandler = errors.New("no output handler specified") ) -func NewInspector(options ...InspectorOption) (*Inspector, error) { +func NewInspector(options ...Option) (*Inspector, error) { config := &InspectorConfig{} for _, option := range options {