diff --git a/cmd/lassie/fetch.go b/cmd/lassie/fetch.go index c115ccb2..b094f43b 100644 --- a/cmd/lassie/fetch.go +++ b/cmd/lassie/fetch.go @@ -13,8 +13,11 @@ import ( "github.com/filecoin-project/lassie/pkg/storage" "github.com/filecoin-project/lassie/pkg/types" "github.com/ipfs/go-cid" + "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/storage/deferred" "github.com/ipld/go-ipld-prime/datamodel" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/urfave/cli/v2" ) @@ -39,9 +42,9 @@ var fetchFlags = []cli.Flag{ Value: "all", Action: func(cctx *cli.Context, v string) error { switch v { - case string(types.DagScopeAll): - case string(types.DagScopeEntity): - case string(types.DagScopeBlock): + case string(trustlessutils.DagScopeAll): + case string(trustlessutils.DagScopeEntity): + case string(trustlessutils.DagScopeBlock): default: return fmt.Errorf("invalid dag-scope parameter, must be of value [all, entity, block]") } @@ -55,7 +58,7 @@ var fetchFlags = []cli.Flag{ " Valid values should be of the form from:to, where from and to are byte offsets and to may be '*'", DefaultText: "defaults to the entire file, 0:*", Action: func(cctx *cli.Context, v string) error { - if _, err := types.ParseByteRange(v); err != nil { + if _, err := trustlessutils.ParseByteRange(v); err != nil { return fmt.Errorf("invalid entity-bytes parameter, must be of the form from:to," + " where from and to are byte offsets and to may be '*'") } @@ -252,14 +255,14 @@ func defaultFetchRun( lassie.RegisterSubscriber(pp.subscriber) } - var carWriter *storage.DeferredCarWriter + var carWriter *deferred.DeferredCarWriter if outfile == stdoutFileString { // we need the onlyWriter because stdout is presented as an os.File, and // therefore pretend to support seeks, so feature-checking in go-car // will make bad assumptions about capabilities unless we hide it - carWriter = storage.NewDeferredCarWriterForStream(rootCid, &onlyWriter{dataWriter}) + carWriter = deferred.NewDeferredCarWriterForStream(&onlyWriter{dataWriter}, []cid.Cid{rootCid}) } else { - carWriter = storage.NewDeferredCarWriterForPath(rootCid, outfile) + carWriter = deferred.NewDeferredCarWriterForPath(outfile, []cid.Cid{rootCid}, car.WriteAsCarV1(true)) } tempStore := storage.NewDeferredStorageCar(tempDir, rootCid) @@ -278,12 +281,12 @@ func defaultFetchRun( } }, false) - byteRange, _ := types.ParseByteRange(entityBytes) - var br *types.ByteRange + byteRange, _ := trustlessutils.ParseByteRange(entityBytes) + var br *trustlessutils.ByteRange if !byteRange.IsDefault() { br = &byteRange } - request, err := types.NewRequestForPath(carStore, rootCid, path, types.DagScope(dagScope), br) + request, err := types.NewRequestForPath(carStore, rootCid, path, trustlessutils.DagScope(dagScope), br) if err != nil { return err } diff --git a/cmd/lassie/fetch_test.go b/cmd/lassie/fetch_test.go index 16eba339..5ab8bc70 100644 --- a/cmd/lassie/fetch_test.go +++ b/cmd/lassie/fetch_test.go @@ -10,8 +10,8 @@ import ( "github.com/filecoin-project/lassie/pkg/indexerlookup" l "github.com/filecoin-project/lassie/pkg/lassie" "github.com/filecoin-project/lassie/pkg/retriever" - "github.com/filecoin-project/lassie/pkg/types" "github.com/ipfs/go-cid" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/libp2p/go-libp2p/core/peer" "github.com/multiformats/go-multicodec" "github.com/stretchr/testify/require" @@ -32,7 +32,7 @@ func TestFetchCommandFlags(t *testing.T) { // fetch specific params require.Equal(t, "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", rootCid.String()) require.Equal(t, "", path) - require.Equal(t, string(types.DagScopeAll), dagScope) + require.Equal(t, string(trustlessutils.DagScopeAll), dagScope) require.Empty(t, entityBytes) require.Equal(t, false, progress) require.Equal(t, "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4.car", outfile) @@ -85,7 +85,7 @@ func TestFetchCommandFlags(t *testing.T) { "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { - require.Equal(t, string(types.DagScopeEntity), dagScope) + require.Equal(t, string(trustlessutils.DagScopeEntity), dagScope) return nil }, }, @@ -98,7 +98,7 @@ func TestFetchCommandFlags(t *testing.T) { "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", }, assertRun: func(ctx context.Context, lCfg *l.LassieConfig, erCfg *a.EventRecorderConfig, msgWriter io.Writer, dataWriter io.Writer, rootCid cid.Cid, path string, dagScope string, entityBytes string, tempDir string, progress bool, outfile string) error { - require.Equal(t, string(types.DagScopeBlock), dagScope) + require.Equal(t, string(trustlessutils.DagScopeBlock), dagScope) return nil }, }, diff --git a/go.mod b/go.mod index b98b41c6..f32bb5f3 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,6 @@ go 1.20 require ( github.com/benbjohnson/clock v1.3.5 - github.com/cespare/xxhash/v2 v2.2.0 github.com/dustin/go-humanize v1.0.1 github.com/filecoin-project/go-data-transfer/v2 v2.0.0-rc7 github.com/filecoin-project/go-retrieval-types v1.2.0 @@ -12,21 +11,22 @@ require ( github.com/google/uuid v1.3.0 github.com/hannahhoward/go-pubsub v1.0.0 github.com/ipfs/boxo v0.11.1-0.20230817065640-7ec68c5e5adf - github.com/ipfs/go-block-format v0.1.2 + github.com/ipfs/go-block-format v0.2.0 github.com/ipfs/go-cid v0.4.1 github.com/ipfs/go-datastore v0.6.0 - github.com/ipfs/go-graphsync v0.14.7 + github.com/ipfs/go-graphsync v0.14.8 github.com/ipfs/go-ipfs-blockstore v1.3.0 github.com/ipfs/go-ipfs-blocksutil v0.0.1 github.com/ipfs/go-ipfs-delay v0.0.1 github.com/ipfs/go-ipfs-exchange-interface v0.2.0 - github.com/ipfs/go-ipld-format v0.5.0 + github.com/ipfs/go-ipld-format v0.6.0 github.com/ipfs/go-log/v2 v2.5.1 - github.com/ipfs/go-unixfsnode v1.7.4 - github.com/ipld/go-car/v2 v2.11.0 + github.com/ipfs/go-unixfsnode v1.8.0 + github.com/ipld/go-car/v2 v2.13.1 github.com/ipld/go-codec-dagpb v1.6.0 github.com/ipld/go-ipld-prime v0.21.0 - github.com/ipld/ipld/specs v0.0.0-20230816230151-73f8fbea1783 + github.com/ipld/go-trustless-utils v0.2.0 + github.com/ipld/ipld/specs v0.0.0-20230907004443-0e4ff95ff474 github.com/ipni/go-libipni v0.0.8-0.20230425184153-86a1fcb7f7ff github.com/libp2p/go-libp2p v0.30.0 github.com/libp2p/go-libp2p-routing-helpers v0.7.0 @@ -46,6 +46,8 @@ require ( require ( github.com/beorn7/perks v1.0.1 // indirect github.com/bep/debounce v1.2.0 // indirect + github.com/cespare/xxhash v1.1.0 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/containerd/cgroups v1.1.0 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect @@ -88,7 +90,7 @@ require ( github.com/ipfs/go-ipfs-ds-help v1.1.0 // indirect github.com/ipfs/go-ipfs-pq v0.0.3 // indirect github.com/ipfs/go-ipfs-util v0.0.2 // indirect - github.com/ipfs/go-ipld-cbor v0.0.6 // indirect + github.com/ipfs/go-ipld-cbor v0.1.0 // indirect github.com/ipfs/go-log v1.0.5 // indirect github.com/ipfs/go-metrics-interface v0.0.1 // indirect github.com/ipfs/go-peertaskqueue v0.8.1 // indirect @@ -147,7 +149,7 @@ require ( github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/warpfork/go-testmark v0.12.1 // indirect github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect - github.com/whyrusleeping/cbor-gen v0.0.0-20230126041949-52956bd4c9aa // indirect + github.com/whyrusleeping/cbor-gen v0.0.0-20230818171029-f91ae536ca25 // indirect github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f // indirect github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect go.opentelemetry.io/otel/sdk v1.14.0 // indirect diff --git a/go.sum b/go.sum index d4649f5c..355b78fc 100644 --- a/go.sum +++ b/go.sum @@ -24,6 +24,8 @@ dmitri.shuralyov.com/state v0.0.0-20180228185332-28bcc343414c/go.mod h1:0PRwlb0D git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= +github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= @@ -38,6 +40,8 @@ github.com/bep/debounce v1.2.0/go.mod h1:H8yggRPQKLUhUoqrJC1bO2xNya7vanpDl7xR3IS github.com/bradfitz/go-smtpd v0.0.0-20170404230938-deb6d6237625/go.mod h1:HYsPBTaaSFSlLx/70C2HPIMNZpVV8+vt/A+FMnYP11g= github.com/buger/jsonparser v0.0.0-20181115193947-bf1c66bbce23/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= +github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= @@ -235,8 +239,8 @@ github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbG github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-block-format v0.0.2/go.mod h1:AWR46JfpcObNfg3ok2JHDUfdiHRgWhJgCQF+KIgOPJY= github.com/ipfs/go-block-format v0.0.3/go.mod h1:4LmD4ZUw0mhO+JSKdpWwrzATiEfM7WWgQ8H5l6P8MVk= -github.com/ipfs/go-block-format v0.1.2 h1:GAjkfhVx1f4YTODS6Esrj1wt2HhrtwTnhEr+DyPUaJo= -github.com/ipfs/go-block-format v0.1.2/go.mod h1:mACVcrxarQKstUU3Yf/RdwbC4DzPV6++rO2a3d+a/KE= +github.com/ipfs/go-block-format v0.2.0 h1:ZqrkxBA2ICbDRbK8KJs/u0O3dlp6gmAuuXUJNiW1Ycs= +github.com/ipfs/go-block-format v0.2.0/go.mod h1:+jpL11nFx5A/SPpsoBn6Bzkra/zaArfSmsknbPMYgzM= github.com/ipfs/go-blockservice v0.5.0 h1:B2mwhhhVQl2ntW2EIpaWPwSCxSuqr5fFA93Ms4bYLEY= github.com/ipfs/go-cid v0.0.1/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= github.com/ipfs/go-cid v0.0.2/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= @@ -256,8 +260,8 @@ github.com/ipfs/go-datastore v0.6.0 h1:JKyz+Gvz1QEZw0LsX1IBn+JFCJQH4SJVFtM4uWU0M github.com/ipfs/go-datastore v0.6.0/go.mod h1:rt5M3nNbSO/8q1t4LNkLyUwRs8HupMeN/8O4Vn9YAT8= github.com/ipfs/go-detect-race v0.0.1 h1:qX/xay2W3E4Q1U7d9lNs1sU9nvguX0a7319XbyQ6cOk= github.com/ipfs/go-detect-race v0.0.1/go.mod h1:8BNT7shDZPo99Q74BpGMK+4D8Mn4j46UU0LZ723meps= -github.com/ipfs/go-graphsync v0.14.7 h1:V90NORSdCpUHAgqQhApU/bmPSLOnwtSHM2v7R90k9Do= -github.com/ipfs/go-graphsync v0.14.7/go.mod h1:yT0AfjFgicOoWdAlUJ96tQ5AkuGI4r1taIQX/aHbBQo= +github.com/ipfs/go-graphsync v0.14.8 h1:NFFHquTNnwPi05tJhdpPj4CJMnqRBLxpZd+IfPRauf4= +github.com/ipfs/go-graphsync v0.14.8/go.mod h1:qyHjUvHey6EfKUDMQPwCuVkMOurRG3hcjRm+FaVP6bE= github.com/ipfs/go-hamt-ipld v0.1.1/go.mod h1:1EZCr2v0jlCnhpa+aZ0JZYp8Tt2w16+JJOAVz17YcDk= github.com/ipfs/go-ipfs-blockstore v1.3.0 h1:m2EXaWgwTzAfsmt5UdJ7Is6l4gJcaM/A12XwJyvYvMM= github.com/ipfs/go-ipfs-blockstore v1.3.0/go.mod h1:KgtZyc9fq+P2xJUiCAzbRdhhqJHvsw8u2Dlqy2MyRTE= @@ -284,12 +288,13 @@ github.com/ipfs/go-ipld-cbor v0.0.3/go.mod h1:wTBtrQZA3SoFKMVkp6cn6HMRteIB1VsmHA github.com/ipfs/go-ipld-cbor v0.0.4/go.mod h1:BkCduEx3XBCO6t2Sfo5BaHzuok7hbhdMm9Oh8B2Ftq4= github.com/ipfs/go-ipld-cbor v0.0.5/go.mod h1:BkCduEx3XBCO6t2Sfo5BaHzuok7hbhdMm9Oh8B2Ftq4= github.com/ipfs/go-ipld-cbor v0.0.6-0.20211211231443-5d9b9e1f6fa8/go.mod h1:ssdxxaLJPXH7OjF5V4NSjBbcfh+evoR4ukuru0oPXMA= -github.com/ipfs/go-ipld-cbor v0.0.6 h1:pYuWHyvSpIsOOLw4Jy7NbBkCyzLDcl64Bf/LZW7eBQ0= github.com/ipfs/go-ipld-cbor v0.0.6/go.mod h1:ssdxxaLJPXH7OjF5V4NSjBbcfh+evoR4ukuru0oPXMA= +github.com/ipfs/go-ipld-cbor v0.1.0 h1:dx0nS0kILVivGhfWuB6dUpMa/LAwElHPw1yOGYopoYs= +github.com/ipfs/go-ipld-cbor v0.1.0/go.mod h1:U2aYlmVrJr2wsUBU67K4KgepApSZddGRDWBYR0H4sCk= github.com/ipfs/go-ipld-format v0.0.1/go.mod h1:kyJtbkDALmFHv3QR6et67i35QzO3S0dCDnkOJhcZkms= github.com/ipfs/go-ipld-format v0.0.2/go.mod h1:4B6+FM2u9OJ9zCV+kSbgFAZlOrv1Hqbf0INGQgiKf9k= -github.com/ipfs/go-ipld-format v0.5.0 h1:WyEle9K96MSrvr47zZHKKcDxJ/vlpET6PSiQsAFO+Ds= -github.com/ipfs/go-ipld-format v0.5.0/go.mod h1:ImdZqJQaEouMjCvqCe0ORUS+uoBmf7Hf+EO/jh+nk3M= +github.com/ipfs/go-ipld-format v0.6.0 h1:VEJlA2kQ3LqFSIm5Vu6eIlSxD/Ze90xtc4Meten1F5U= +github.com/ipfs/go-ipld-format v0.6.0/go.mod h1:g4QVMTn3marU3qXchwjpKPKgJv+zF+OlaKMyhJ4LHPg= github.com/ipfs/go-ipld-legacy v0.2.1 h1:mDFtrBpmU7b//LzLSypVrXsD8QxkEWxu5qVxN99/+tk= github.com/ipfs/go-libipfs v0.6.0 h1:3FuckAJEm+zdHbHbf6lAyk0QUzc45LsFcGw102oBCZM= github.com/ipfs/go-log v0.0.1/go.mod h1:kL1d2/hzSpI0thNYjiKfjanbVNU+IIGA/WnNESY9leM= @@ -310,18 +315,20 @@ github.com/ipfs/go-metrics-interface v0.0.1/go.mod h1:6s6euYU4zowdslK0GKHmqaIZ3j github.com/ipfs/go-peertaskqueue v0.8.1 h1:YhxAs1+wxb5jk7RvS0LHdyiILpNmRIRnZVztekOF0pg= github.com/ipfs/go-peertaskqueue v0.8.1/go.mod h1:Oxxd3eaK279FxeydSPPVGHzbwVeHjatZ2GA8XD+KbPU= github.com/ipfs/go-unixfs v0.4.5 h1:wj8JhxvV1G6CD7swACwSKYa+NgtdWC1RUit+gFnymDU= -github.com/ipfs/go-unixfsnode v1.7.4 h1:iLvKyAVKUYOIAW2t4kDYqsT7VLGj31eXJE2aeqGfbwA= -github.com/ipfs/go-unixfsnode v1.7.4/go.mod h1:PVfoyZkX1B34qzT3vJO4nsLUpRCyhnMuHBznRcXirlk= +github.com/ipfs/go-unixfsnode v1.8.0 h1:yCkakzuE365glu+YkgzZt6p38CSVEBPgngL9ZkfnyQU= +github.com/ipfs/go-unixfsnode v1.8.0/go.mod h1:HxRu9HYHOjK6HUqFBAi++7DVoWAHn0o4v/nZ/VA+0g8= github.com/ipfs/go-verifcid v0.0.2 h1:XPnUv0XmdH+ZIhLGKg6U2vaPaRDXb9urMyNVCE7uvTs= -github.com/ipld/go-car/v2 v2.11.0 h1:lkAPwbbTFqbdfawgm+bfmFc8PjGC7D12VcaLXPCLNfM= -github.com/ipld/go-car/v2 v2.11.0/go.mod h1:aDszqev0zjtU8l96g4lwXHaU9bzArj56Y7eEN0q/xqA= +github.com/ipld/go-car/v2 v2.13.1 h1:KnlrKvEPEzr5IZHKTXLAEub+tPrzeAFQVRlSQvuxBO4= +github.com/ipld/go-car/v2 v2.13.1/go.mod h1:QkdjjFNGit2GIkpQ953KBwowuoukoM75nP/JI1iDJdo= github.com/ipld/go-codec-dagpb v1.6.0 h1:9nYazfyu9B1p3NAgfVdpRco3Fs2nFC72DqVsMj6rOcc= github.com/ipld/go-codec-dagpb v1.6.0/go.mod h1:ANzFhfP2uMJxRBr8CE+WQWs5UsNa0pYtmKZ+agnUw9s= github.com/ipld/go-ipld-prime v0.21.0 h1:n4JmcpOlPDIxBcY037SVfpd1G+Sj1nKZah0m6QH9C2E= github.com/ipld/go-ipld-prime v0.21.0/go.mod h1:3RLqy//ERg/y5oShXXdx5YIp50cFGOanyMctpPjsvxQ= github.com/ipld/go-ipld-prime/storage/bsadapter v0.0.0-20230102063945-1a409dc236dd h1:gMlw/MhNr2Wtp5RwGdsW23cs+yCuj9k2ON7i9MiJlRo= -github.com/ipld/ipld/specs v0.0.0-20230816230151-73f8fbea1783 h1:09+y1AqnODibi/a6xvuwEiEbx51XZ21pWzKgeezfNII= -github.com/ipld/ipld/specs v0.0.0-20230816230151-73f8fbea1783/go.mod h1:AfGlAr20WOjV5PyCowEnGY3pAm5x5i+o0R8IUeir6cs= +github.com/ipld/go-trustless-utils v0.2.0 h1:XPP2gJJ2snhWiwP8m5Gw66pmTSMHAERxo/DMV1NVOkU= +github.com/ipld/go-trustless-utils v0.2.0/go.mod h1:nbUA6YuKZqNHYd3mYKALxOQa5aC8fnr2w+OxirSSOqc= +github.com/ipld/ipld/specs v0.0.0-20230907004443-0e4ff95ff474 h1:CiDtcUFyzRwhKyiS4Gn+fzdraMoNVtPHd+wVIekExPc= +github.com/ipld/ipld/specs v0.0.0-20230907004443-0e4ff95ff474/go.mod h1:WcT0DfRe+e2QFY0kcbsOnuT6jL5Q0JNZ83I5DHIdStg= github.com/ipni/go-libipni v0.0.8-0.20230425184153-86a1fcb7f7ff h1:xbKrIvnpQkbF8iHPk/HGcegsypCDpcXWHhzBCLyCWf8= github.com/ipni/go-libipni v0.0.8-0.20230425184153-86a1fcb7f7ff/go.mod h1:paYP9U4N3/vOzGCuN9kU972vtvw9JUcQjOKyiCFGwRk= github.com/ipsn/go-secp256k1 v0.0.0-20180726113642-9d62b9f0bc52 h1:QG4CGBqCeuBo6aZlGAamSkxWdgWfZGeE49eUOWJPA4c= @@ -560,6 +567,7 @@ github.com/smartystreets/goconvey v1.7.2 h1:9RBaZCeXEQ3UselpuwUQHltGVXvdwm6cv1hg github.com/smartystreets/goconvey v1.7.2/go.mod h1:Vw0tHAZW6lzCRk3xgdin6fKYcG+G3Pg9vgXWeJpQFMM= github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d/go.mod h1:UdhH50NIW0fCiwBSr0co2m7BnFLdv4fQTgdqdJTHFeE= github.com/sourcegraph/syntaxhighlight v0.0.0-20170531221838-bd320f5d308e/go.mod h1:HuIsMU8RRBOtsCgI77wP899iHVBQpCmg4ErYMZB+2IA= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -601,8 +609,8 @@ github.com/whyrusleeping/cbor-gen v0.0.0-20200812213548-958ddffe352c/go.mod h1:f github.com/whyrusleeping/cbor-gen v0.0.0-20200826160007-0b9f6c5fb163/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= github.com/whyrusleeping/cbor-gen v0.0.0-20210118024343-169e9d70c0c2/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= github.com/whyrusleeping/cbor-gen v0.0.0-20210303213153-67a261a1d291/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= -github.com/whyrusleeping/cbor-gen v0.0.0-20230126041949-52956bd4c9aa h1:EyA027ZAkuaCLoxVX4r1TZMPy1d31fM6hbfQ4OU4I5o= -github.com/whyrusleeping/cbor-gen v0.0.0-20230126041949-52956bd4c9aa/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= +github.com/whyrusleeping/cbor-gen v0.0.0-20230818171029-f91ae536ca25 h1:yVYDLoN2gmB3OdBXFW8e1UwgVbmCvNlnAKhvHPaNARI= +github.com/whyrusleeping/cbor-gen v0.0.0-20230818171029-f91ae536ca25/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f h1:jQa4QT2UP9WYv2nzyawpKMOCl+Z/jW7djv2/J50lj9E= github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f/go.mod h1:p9UJB6dDgdPgMJZs7UjUOdulKyRr9fqkS+6JKAInPy8= github.com/whyrusleeping/go-logging v0.0.0-20170515211332-0457bb6b88fc/go.mod h1:bopw91TMyo8J3tvftk8xmU2kPmlrt4nScJQZU2hE5EM= diff --git a/pkg/internal/itest/direct_fetch_test.go b/pkg/internal/itest/direct_fetch_test.go index a03fa60e..2f6b1a77 100644 --- a/pkg/internal/itest/direct_fetch_test.go +++ b/pkg/internal/itest/direct_fetch_test.go @@ -21,6 +21,7 @@ import ( "github.com/ipld/go-car/v2/storage" "github.com/ipld/go-ipld-prime/codec/dagcbor" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + trustlessutils "github.com/ipld/go-trustless-utils" host "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" @@ -116,7 +117,7 @@ func TestDirectFetch(t *testing.T) { }() outCar, err := storage.NewReadableWritable(outFile, []cid.Cid{srcData1.Root}, carv2.WriteAsCarV1(true)) req.NoError(err) - request, err := types.NewRequestForPath(outCar, srcData1.Root, "", types.DagScopeAll, nil) + request, err := types.NewRequestForPath(outCar, srcData1.Root, "", trustlessutils.DagScopeAll, nil) req.NoError(err) _, err = lassie.Fetch(ctx, request, func(types.RetrievalEvent) {}) req.NoError(err) diff --git a/pkg/internal/itest/http_fetch_test.go b/pkg/internal/itest/http_fetch_test.go index 891c9f4f..a602dbf7 100644 --- a/pkg/internal/itest/http_fetch_test.go +++ b/pkg/internal/itest/http_fetch_test.go @@ -23,7 +23,6 @@ import ( "github.com/filecoin-project/lassie/pkg/lassie" "github.com/filecoin-project/lassie/pkg/retriever" httpserver "github.com/filecoin-project/lassie/pkg/server/http" - "github.com/filecoin-project/lassie/pkg/verifiedcar" "github.com/google/uuid" "github.com/ipfs/go-cid" unixfs "github.com/ipfs/go-unixfsnode/testutil" @@ -35,6 +34,8 @@ import ( cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/storage/memstore" selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" + trustlesshttp "github.com/ipld/go-trustless-utils/http" + "github.com/ipld/go-trustless-utils/traversal" "github.com/libp2p/go-libp2p/core/peer" "github.com/multiformats/go-multicodec" "github.com/stretchr/testify/require" @@ -61,6 +62,11 @@ func TestHttpFetch(t *testing.T) { type queryModifier func(url.Values, []testpeer.TestPeer) type bodyValidator func(*testing.T, unixfs.DirEntry, []byte) type lassieOptsGen func(*testing.T, *mocknet.MockRetrievalNet) []lassie.LassieOption + type response struct { + StatusCode int + Header http.Header + Body []byte + } wrapPath := "/want2/want1/want0" testCases := []struct { @@ -79,6 +85,7 @@ func TestHttpFetch(t *testing.T) { modifyQueries []queryModifier validateBodies []bodyValidator lassieOpts lassieOptsGen + expectNoDups bool }{ { name: "graphsync large sharded file", @@ -895,7 +902,7 @@ func TestHttpFetch(t *testing.T) { lsys := cidlink.DefaultLinkSystem() lsys.SetReadStorage(store) lsys.SetWriteStorage(store) - _, _, err := verifiedcar.Config{ + _, err := traversal.Config{ Root: srcData.Root, Selector: selectorparse.CommonSelector_ExploreAllRecursively, ExpectDuplicatesIn: true, @@ -904,9 +911,57 @@ func TestHttpFetch(t *testing.T) { }}, }, { - name: "http large sharded file with dups, no dups response requested", + name: "http large sharded file with dups, no dups response requested", + httpRemotes: 1, + setHeader: noDups, + expectNoDups: true, + generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { + return []unixfs.DirEntry{unixfs.GenerateFile(t, remotes[0].LinkSystem, testutil.ZeroReader{}, 4<<20)} + }, + validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { + wantCids := []cid.Cid{ + srcData.Root, // "/"" + srcData.SelfCids[1], + srcData.SelfCids[len(srcData.SelfCids)-1], + } + validateCarBody(t, body, srcData.Root, wantCids, true) + }}, + }, + { + name: "http large sharded file with dups, */* gives dups", httpRemotes: 1, - setHeader: noDups, + setHeader: func(h http.Header) { h.Set("Accept", "*/*") }, + generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { + return []unixfs.DirEntry{unixfs.GenerateFile(t, remotes[0].LinkSystem, testutil.ZeroReader{}, 4<<20)} + }, + validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { + store := &testutil.CorrectedMemStore{ParentStore: &memstore.Store{ + Bag: make(map[string][]byte), + }} + lsys := cidlink.DefaultLinkSystem() + lsys.SetReadStorage(store) + lsys.SetWriteStorage(store) + _, err := traversal.Config{ + Root: srcData.Root, + Selector: selectorparse.CommonSelector_ExploreAllRecursively, + ExpectDuplicatesIn: true, + }.VerifyCar(context.Background(), bytes.NewReader(body), lsys) + require.NoError(t, err) + }}, + }, { + name: "http large sharded file with dups, multiple accept, priority to no dups", + httpRemotes: 1, + expectNoDups: true, + setHeader: func(h http.Header) { + h.Set("Accept", + strings.Join([]string{ + "text/html", + trustlesshttp.DefaultContentType().WithDuplicates(true).WithQuality(0.7).String(), + trustlesshttp.DefaultContentType().WithDuplicates(false).WithQuality(0.8).String(), + "*/*;q=0.1", + }, ", "), + ) + }, generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { return []unixfs.DirEntry{unixfs.GenerateFile(t, remotes[0].LinkSystem, testutil.ZeroReader{}, 4<<20)} }, @@ -919,6 +974,37 @@ func TestHttpFetch(t *testing.T) { validateCarBody(t, body, srcData.Root, wantCids, true) }}, }, + { + name: "http large sharded file with dups, multiple accept, priority to dups", + httpRemotes: 1, + setHeader: func(h http.Header) { + h.Set("Accept", + strings.Join([]string{ + "text/html", + trustlesshttp.DefaultContentType().WithDuplicates(true).WithQuality(0.8).String(), + trustlesshttp.DefaultContentType().WithDuplicates(false).WithQuality(0.7).String(), + "*/*;q=0.1", + }, ", "), + ) + }, + generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { + return []unixfs.DirEntry{unixfs.GenerateFile(t, remotes[0].LinkSystem, testutil.ZeroReader{}, 4<<20)} + }, + validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { + store := &testutil.CorrectedMemStore{ParentStore: &memstore.Store{ + Bag: make(map[string][]byte), + }} + lsys := cidlink.DefaultLinkSystem() + lsys.SetReadStorage(store) + lsys.SetWriteStorage(store) + _, err := traversal.Config{ + Root: srcData.Root, + Selector: selectorparse.CommonSelector_ExploreAllRecursively, + ExpectDuplicatesIn: true, + }.VerifyCar(context.Background(), bytes.NewReader(body), lsys) + require.NoError(t, err) + }}, + }, { name: "bitswap nested file, path with special characters", bitswapRemotes: 1, @@ -1077,9 +1163,9 @@ func TestHttpFetch(t *testing.T) { } } - responseChans := make([]chan *http.Response, 0) + responseChans := make([]chan response, 0) for i := 0; i < len(srcData); i++ { - responseChan := make(chan *http.Response, 1) + responseChan := make(chan response, 1) responseChans = append(responseChans, responseChan) go func(i int) { // Make a request for our CID and read the complete CAR bytes @@ -1099,11 +1185,17 @@ func TestHttpFetch(t *testing.T) { t.Log("Fetching", getReq.URL.String()) resp, err := http.DefaultClient.Do(getReq) req.NoError(err) - responseChan <- resp + expectBodyReadError := "" + if testCase.expectUncleanEnd { + expectBodyReadError = "http: unexpected EOF reading trailer" + } + body := readAllBody(t, resp.Body, expectBodyReadError) + req.NoError(resp.Body.Close()) + responseChan <- response{StatusCode: resp.StatusCode, Header: resp.Header, Body: body} }(i) } - responses := make([]*http.Response, 0) + responses := make([]response, 0) for _, responseChan := range responseChans { select { case resp := <-responseChan: @@ -1133,15 +1225,13 @@ func TestHttpFetch(t *testing.T) { req.Equal(http.StatusUnauthorized, resp.StatusCode) } else { if resp.StatusCode != http.StatusOK { - body, err := io.ReadAll(resp.Body) - req.NoError(err) - req.Failf("200 response code not received", "got code: %d, body: %s", resp.StatusCode, string(body)) + req.Failf("200 response code not received", "got code: %d, body: %s", resp.StatusCode, string(resp.Body)) } req.Regexp(`^lassie/v\d+\.\d+\.\d+-\w+$`, resp.Header.Get("Server")) req.Equal(fmt.Sprintf(`attachment; filename="%s.car"`, srcData[i].Root.String()), resp.Header.Get("Content-Disposition")) req.Equal("none", resp.Header.Get("Accept-Ranges")) req.Equal("public, max-age=29030400, immutable", resp.Header.Get("Cache-Control")) - req.Equal("application/vnd.ipld.car; version=1", resp.Header.Get("Content-Type")) + req.Equal(trustlesshttp.DefaultContentType().WithDuplicates(!testCase.expectNoDups).String(), resp.Header.Get("Content-Type")) req.Equal("nosniff", resp.Header.Get("X-Content-Type-Options")) etagStart := fmt.Sprintf(`"%s.car.`, srcData[i].Root.String()) etagGot := resp.Header.Get("ETag") @@ -1152,30 +1242,21 @@ func TestHttpFetch(t *testing.T) { require.NotEmpty(t, requestId) _, err := uuid.Parse(requestId) req.NoError(err) - body, err := io.ReadAll(resp.Body) - if !testCase.expectUncleanEnd { - req.NoError(err) - } else { - req.Error(err) - } - err = resp.Body.Close() - req.NoError(err) if DEBUG_DATA { t.Logf("Creating CAR %s in temp dir", fmt.Sprintf("%s_received%d.car", testCase.name, i)) dstf, err := os.CreateTemp("", fmt.Sprintf("%s_received%d.car", testCase.name, i)) req.NoError(err) t.Logf("Writing received data to CAR @ %s", dstf.Name()) - _, err = dstf.Write(body) + _, err = dstf.Write(resp.Body) req.NoError(err) carFiles = append(carFiles, dstf) } if testCase.validateBodies != nil && testCase.validateBodies[i] != nil { - testCase.validateBodies[i](t, srcData[i], body) + testCase.validateBodies[i](t, srcData[i], resp.Body) } else { - // gotDir := CarToDirEntry(t, bytes.NewReader(body), srcData[i].Root, true) - gotLsys := CarBytesLinkSystem(t, bytes.NewReader(body)) + gotLsys := CarBytesLinkSystem(t, bytes.NewReader(resp.Body)) gotDir := unixfs.ToDirEntry(t, gotLsys, srcData[i].Root, true) unixfs.CompareDirEntries(t, srcData[i], gotDir) } @@ -1267,3 +1348,31 @@ func debugRemotes(t *testing.T, ctx context.Context, name string, remotes []test } return carFiles } + +func readAllBody(t *testing.T, r io.Reader, expectError string) []byte { + if expectError == "" { + body, err := io.ReadAll(r) + require.NoError(t, err) + return body + } + // expect an error, so let's creep up on it and collect as much of the body + // as we can before the error blocks us + // see readLocked() in src/net/http/transfer.go: + // → b.src.Read(p) + // → followed by b.readTrailer() which should error; we want to capture both + var buf bytes.Buffer + var byt [1]byte + var err error + var n int + for { + n, err = r.Read(byt[:]) + // record the bytes we read, the error should come after the normal body + // read and then it attempts to read trailers where it should fail + buf.Write(byt[:n]) + if err != nil { + require.EqualError(t, err, expectError) + break + } + } + return buf.Bytes() +} diff --git a/pkg/internal/itest/testpeer/peerhttpserver.go b/pkg/internal/itest/testpeer/peerhttpserver.go index a439d408..a1d0d9f0 100644 --- a/pkg/internal/itest/testpeer/peerhttpserver.go +++ b/pkg/internal/itest/testpeer/peerhttpserver.go @@ -9,7 +9,6 @@ import ( "net/http" "strings" - "github.com/filecoin-project/lassie/pkg/types" "github.com/ipfs/go-cid" "github.com/ipfs/go-unixfsnode" "github.com/ipld/go-car/v2" @@ -21,6 +20,7 @@ import ( "github.com/ipld/go-ipld-prime/node/basicnode" "github.com/ipld/go-ipld-prime/traversal" "github.com/ipld/go-ipld-prime/traversal/selector" + trustlessutils "github.com/ipld/go-trustless-utils" servertiming "github.com/mitchellh/go-server-timing" ) @@ -125,21 +125,21 @@ func MockIpfsHandler(ctx context.Context, lsys linking.LinkSystem) func(http.Res } // Parse car scope and use it to get selector - var dagScope types.DagScope + var dagScope trustlessutils.DagScope switch req.URL.Query().Get("dag-scope") { case "all": - dagScope = types.DagScopeAll + dagScope = trustlessutils.DagScopeAll case "entity": - dagScope = types.DagScopeEntity + dagScope = trustlessutils.DagScopeEntity case "block": - dagScope = types.DagScopeBlock + dagScope = trustlessutils.DagScopeBlock default: http.Error(res, fmt.Sprintf("Invalid dag-scope parameter: %s", req.URL.Query().Get("dag-scope")), http.StatusBadRequest) return } - var byteRange *types.ByteRange + var byteRange *trustlessutils.ByteRange if req.URL.Query().Get("entity-bytes") != "" { - br, err := types.ParseByteRange(req.URL.Query().Get("entity-bytes")) + br, err := trustlessutils.ParseByteRange(req.URL.Query().Get("entity-bytes")) if err != nil { http.Error(res, fmt.Sprintf("Invalid entity-bytes parameter: %s", req.URL.Query().Get("entity-bytes")), http.StatusBadRequest) return @@ -147,7 +147,7 @@ func MockIpfsHandler(ctx context.Context, lsys linking.LinkSystem) func(http.Res byteRange = &br } - sel, err := selector.CompileSelector(types.PathScopeSelector(unixfsPath, dagScope, byteRange)) + sel, err := selector.CompileSelector(trustlessutils.Request{Path: unixfsPath, Scope: dagScope, Bytes: byteRange}.Selector()) if err != nil { http.Error(res, fmt.Sprintf("Failed to compile selector from dag-scope: %v", err), http.StatusInternalServerError) return diff --git a/pkg/internal/itest/trustless_fetch_test.go b/pkg/internal/itest/trustless_fetch_test.go index a53f827d..9b47cda4 100644 --- a/pkg/internal/itest/trustless_fetch_test.go +++ b/pkg/internal/itest/trustless_fetch_test.go @@ -15,11 +15,11 @@ import ( "github.com/filecoin-project/lassie/pkg/internal/itest/testpeer" "github.com/filecoin-project/lassie/pkg/lassie" httpserver "github.com/filecoin-project/lassie/pkg/server/http" - "github.com/filecoin-project/lassie/pkg/types" "github.com/google/uuid" "github.com/ipfs/go-unixfsnode" "github.com/ipld/go-car/v2" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + trustlessutils "github.com/ipld/go-trustless-utils" trustlesspathing "github.com/ipld/ipld/specs/pkg-go/trustless-pathing" "github.com/stretchr/testify/require" ) @@ -27,7 +27,7 @@ import ( func TestTrustlessUnixfsFetch(t *testing.T) { req := require.New(t) - testCases, err := trustlesspathing.Unixfs20mVarietyCases() + testCases, _, err := trustlesspathing.Unixfs20mVarietyCases() req.NoError(err) storage, closer, err := trustlesspathing.Unixfs20mVarietyReadableStorage() req.NoError(err) @@ -112,13 +112,13 @@ func TestTrustlessUnixfsFetch(t *testing.T) { req.Equal(fmt.Sprintf(`attachment; filename="%s.car"`, tc.Root.String()), resp.Header.Get("Content-Disposition")) req.Equal("none", resp.Header.Get("Accept-Ranges")) req.Equal("public, max-age=29030400, immutable", resp.Header.Get("Cache-Control")) - req.Equal("application/vnd.ipld.car; version=1", resp.Header.Get("Content-Type")) + req.Equal("application/vnd.ipld.car;version=1;order=dfs;dups=y", resp.Header.Get("Content-Type")) req.Equal("nosniff", resp.Header.Get("X-Content-Type-Options")) etagStart := fmt.Sprintf(`"%s.car.`, tc.Root.String()) etagGot := resp.Header.Get("ETag") req.True(strings.HasPrefix(etagGot, etagStart), "ETag should start with [%s], got [%s]", etagStart, etagGot) req.Equal(`"`, etagGot[len(etagGot)-1:], "ETag should end with a quote") - req.Equal(fmt.Sprintf("/ipfs/%s%s", tc.Root.String(), types.PathEscape(tc.Path)), resp.Header.Get("X-Ipfs-Path")) + req.Equal(fmt.Sprintf("/ipfs/%s%s", tc.Root.String(), trustlessutils.PathEscape(tc.Path)), resp.Header.Get("X-Ipfs-Path")) requestId := resp.Header.Get("X-Trace-Id") require.NotEmpty(t, requestId) _, err = uuid.Parse(requestId) diff --git a/pkg/internal/testutil/gen.go b/pkg/internal/testutil/gen.go index fca9093a..00e09775 100644 --- a/pkg/internal/testutil/gen.go +++ b/pkg/internal/testutil/gen.go @@ -17,6 +17,7 @@ import ( dagpb "github.com/ipld/go-codec-dagpb" "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/ipni/go-libipni/metadata" crypto "github.com/libp2p/go-libp2p/core/crypto" "github.com/libp2p/go-libp2p/core/peer" @@ -90,7 +91,7 @@ func GenerateRetrievalRequests(t *testing.T, n int) []types.RetrievalRequest { for i := 0; i < n; i++ { requests = append(requests, types.RetrievalRequest{ RetrievalID: rids[i], - Cid: cids[i], + Request: trustlessutils.Request{Root: cids[i]}, LinkSystem: cidlink.DefaultLinkSystem(), }) } diff --git a/pkg/internal/testutil/mockroundtripper.go b/pkg/internal/testutil/mockroundtripper.go index 12cf25f2..0c75ece6 100644 --- a/pkg/internal/testutil/mockroundtripper.go +++ b/pkg/internal/testutil/mockroundtripper.go @@ -12,7 +12,6 @@ import ( "time" "github.com/benbjohnson/clock" - "github.com/filecoin-project/lassie/pkg/types" "github.com/google/uuid" "github.com/ipfs/go-cid" "github.com/ipld/go-car/v2" @@ -25,6 +24,7 @@ import ( "github.com/ipld/go-ipld-prime/node/basicnode" "github.com/ipld/go-ipld-prime/traversal" "github.com/ipld/go-ipld-prime/traversal/selector" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/libp2p/go-libp2p/core/peer" "github.com/stretchr/testify/require" ) @@ -43,8 +43,9 @@ type MockRoundTripper struct { clock *clock.Mock remoteBlockDuration time.Duration expectedPath map[cid.Cid]string - expectedScope map[cid.Cid]types.DagScope + expectedScope map[cid.Cid]trustlessutils.DagScope remotes map[cid.Cid][]MockRoundTripRemote + sendDuplicates map[cid.Cid]bool startsCh chan peer.ID statsCh chan RemoteStats endsCh chan peer.ID @@ -59,8 +60,9 @@ func NewMockRoundTripper( clock *clock.Mock, remoteBlockDuration time.Duration, expectedPath map[cid.Cid]string, - expectedScope map[cid.Cid]types.DagScope, + expectedScope map[cid.Cid]trustlessutils.DagScope, remotes map[cid.Cid][]MockRoundTripRemote, + sendDuplicates map[cid.Cid]bool, ) *MockRoundTripper { return &MockRoundTripper{ t, @@ -70,6 +72,7 @@ func NewMockRoundTripper( expectedPath, expectedScope, remotes, + sendDuplicates, make(chan peer.ID, 32), make(chan RemoteStats, 32), make(chan peer.ID, 32), @@ -101,16 +104,12 @@ func (mrt *MockRoundTripper) RoundTrip(req *http.Request) (*http.Response, error } else { require.Equal(mrt.t, path, expectedPath) } - expectedScope := types.DagScopeAll + expectedScope := trustlessutils.DagScopeAll if scope, ok := mrt.expectedScope[root]; ok { expectedScope = scope } - legacyScope := string(expectedScope) - if legacyScope == string(types.DagScopeEntity) { - legacyScope = "file" - } - require.Equal(mrt.t, req.URL.RawQuery, fmt.Sprintf("dag-scope=%s&car-scope=%s", expectedScope, legacyScope)) - require.Equal(mrt.t, req.Header["Accept"], []string{"application/vnd.ipld.car;version=1;order=dfs;dups=y"}) + require.Equal(mrt.t, req.URL.RawQuery, fmt.Sprintf("dag-scope=%s", expectedScope)) + require.Equal(mrt.t, []string{"application/vnd.ipld.car;version=1;order=dfs;dups=y"}, req.Header["Accept"]) reqId := req.Header["X-Request-Id"] require.Len(mrt.t, reqId, 1) _, err = uuid.Parse(reqId[0]) @@ -134,9 +133,21 @@ func (mrt *MockRoundTripper) RoundTrip(req *http.Request) (*http.Response, error } } + sendDuplicates := true + if d, ok := mrt.sendDuplicates[root]; ok { + sendDuplicates = d + } + + header := make(http.Header) + dupsyn := "y" + if !sendDuplicates { + dupsyn = "n" + } + header.Add("Content-Type", "application/vnd.ipld.car; version=1; order=dfs; dups="+dupsyn) return &http.Response{ StatusCode: http.StatusOK, - Body: newDeferredBody(mrt, remote, root), + Header: header, + Body: newDeferredBody(mrt, remote, root, sendDuplicates), }, nil } @@ -190,19 +201,21 @@ func (mrt *MockRoundTripper) VerifyRetrievalsCompleted(ctx context.Context, t *t // deferredBody is simply a Reader that lazily starts a CAR writer on the first // Read call. type deferredBody struct { - mrt *MockRoundTripper - remote MockRoundTripRemote - root cid.Cid + mrt *MockRoundTripper + remote MockRoundTripRemote + root cid.Cid + duplicates bool r io.ReadCloser once sync.Once } -func newDeferredBody(mrt *MockRoundTripper, remote MockRoundTripRemote, root cid.Cid) *deferredBody { +func newDeferredBody(mrt *MockRoundTripper, remote MockRoundTripRemote, root cid.Cid, duplicates bool) *deferredBody { return &deferredBody{ - mrt: mrt, - remote: remote, - root: root, + mrt: mrt, + remote: remote, + root: root, + duplicates: duplicates, } } @@ -233,7 +246,7 @@ func (d *deferredBody) makeBody() io.ReadCloser { } // instantiating this writes a CARv1 header and waits for more Put()s - carWriter, err := storage.NewWritable(carW, []cid.Cid{d.root}, car.WriteAsCarV1(true), car.AllowDuplicatePuts(false)) + carWriter, err := storage.NewWritable(carW, []cid.Cid{d.root}, car.WriteAsCarV1(true), car.AllowDuplicatePuts(d.duplicates)) req.NoError(err) // intercept the StorageReadOpener of the LinkSystem so that for each @@ -241,6 +254,7 @@ func (d *deferredBody) makeBody() io.ReadCloser { // to the CARv1 writer. lsys := d.remote.LinkSystem originalSRO := lsys.StorageReadOpener + seen := make(map[cid.Cid]struct{}) lsys.StorageReadOpener = func(lc linking.LinkContext, lnk datamodel.Link) (io.Reader, error) { r, err := originalSRO(lc, lnk) if err != nil { @@ -250,19 +264,22 @@ func (d *deferredBody) makeBody() io.ReadCloser { if err != nil { return nil, err } - err = carWriter.Put(d.mrt.ctx, lnk.(cidlink.Link).Cid.KeyString(), byts) - req.NoError(err) - stats.Blocks = append(stats.Blocks, lnk.(cidlink.Link).Cid) - stats.ByteCount += uint64(len(byts)) // only the length of the bytes, not the rest of the CAR infrastructure - - // ensure there is blockDuration between each block send - sendAt := d.remote.RespondAt.Add(d.mrt.remoteBlockDuration * time.Duration(len(stats.Blocks))) - if d.mrt.clock.Until(sendAt) > 0 { - select { - case <-d.mrt.ctx.Done(): - return nil, d.mrt.ctx.Err() - case <-d.mrt.clock.After(d.mrt.clock.Until(sendAt)): - time.Sleep(1 * time.Millisecond) // let em goroutines breathe + if _, ok := seen[lnk.(cidlink.Link).Cid]; d.duplicates || !ok { + err = carWriter.Put(d.mrt.ctx, lnk.(cidlink.Link).Cid.KeyString(), byts) + seen[lnk.(cidlink.Link).Cid] = struct{}{} + req.NoError(err) + stats.Blocks = append(stats.Blocks, lnk.(cidlink.Link).Cid) + stats.ByteCount += uint64(len(byts)) // only the length of the bytes, not the rest of the CAR infrastructure + + // ensure there is blockDuration between each block send + sendAt := d.remote.RespondAt.Add(d.mrt.remoteBlockDuration * time.Duration(len(stats.Blocks))) + if d.mrt.clock.Until(sendAt) > 0 { + select { + case <-d.mrt.ctx.Done(): + return nil, d.mrt.ctx.Err() + case <-d.mrt.clock.After(d.mrt.clock.Until(sendAt)): + time.Sleep(1 * time.Millisecond) // let em goroutines breathe + } } } return bytes.NewReader(byts), nil diff --git a/pkg/retriever/assignablecandidatefinder.go b/pkg/retriever/assignablecandidatefinder.go index 9f399282..c8272c37 100644 --- a/pkg/retriever/assignablecandidatefinder.go +++ b/pkg/retriever/assignablecandidatefinder.go @@ -36,11 +36,11 @@ func (acf AssignableCandidateFinder) FindCandidates(ctx context.Context, request ctx, cancelCtx := context.WithCancel(ctx) defer cancelCtx() - eventsCallback(events.StartedFindingCandidates(acf.clock.Now(), request.RetrievalID, request.Cid)) + eventsCallback(events.StartedFindingCandidates(acf.clock.Now(), request.RetrievalID, request.Root)) var totalCandidates atomic.Uint64 candidateBuffer := candidatebuffer.NewCandidateBuffer(func(candidates []types.RetrievalCandidate) { - eventsCallback(events.CandidatesFound(acf.clock.Now(), request.RetrievalID, request.Cid, candidates)) + eventsCallback(events.CandidatesFound(acf.clock.Now(), request.RetrievalID, request.Root, candidates)) acceptableCandidates := make([]types.RetrievalCandidate, 0) for _, candidate := range candidates { @@ -58,25 +58,25 @@ func (acf AssignableCandidateFinder) FindCandidates(ctx context.Context, request return } - eventsCallback(events.CandidatesFiltered(acf.clock.Now(), request.RetrievalID, request.Cid, acceptableCandidates)) + eventsCallback(events.CandidatesFiltered(acf.clock.Now(), request.RetrievalID, request.Root, acceptableCandidates)) totalCandidates.Add(uint64(len(acceptableCandidates))) onCandidates(acceptableCandidates) }, acf.clock) err := candidateBuffer.BufferStream(ctx, func(ctx context.Context, onNextCandidate candidatebuffer.OnNextCandidate) error { if len(request.FixedPeers) > 0 { - return sendFixedPeers(request.Cid, request.FixedPeers, onNextCandidate) + return sendFixedPeers(request.Root, request.FixedPeers, onNextCandidate) } - return acf.candidateFinder.FindCandidatesAsync(ctx, request.Cid, onNextCandidate) + return acf.candidateFinder.FindCandidatesAsync(ctx, request.Root, onNextCandidate) }, BufferWindow) if err != nil { - eventsCallback(events.Failed(acf.clock.Now(), request.RetrievalID, types.RetrievalCandidate{RootCid: request.Cid}, err.Error())) - return fmt.Errorf("could not get retrieval candidates for %s: %w", request.Cid, err) + eventsCallback(events.Failed(acf.clock.Now(), request.RetrievalID, types.RetrievalCandidate{RootCid: request.Root}, err.Error())) + return fmt.Errorf("could not get retrieval candidates for %s: %w", request.Root, err) } if totalCandidates.Load() == 0 { - eventsCallback(events.Failed(acf.clock.Now(), request.RetrievalID, types.RetrievalCandidate{RootCid: request.Cid}, ErrNoCandidates.Error())) + eventsCallback(events.Failed(acf.clock.Now(), request.RetrievalID, types.RetrievalCandidate{RootCid: request.Root}, ErrNoCandidates.Error())) return ErrNoCandidates } return nil diff --git a/pkg/retriever/assignablecandidatefinder_test.go b/pkg/retriever/assignablecandidatefinder_test.go index dad3ba1d..e21b2243 100644 --- a/pkg/retriever/assignablecandidatefinder_test.go +++ b/pkg/retriever/assignablecandidatefinder_test.go @@ -12,6 +12,7 @@ import ( "github.com/filecoin-project/lassie/pkg/types" "github.com/ipfs/go-cid" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/libp2p/go-libp2p/core/peer" "github.com/stretchr/testify/require" ) @@ -186,7 +187,7 @@ func TestAssignableCandidateFinder(t *testing.T) { err = retrievalCandidateFinder.FindCandidates(ctx, types.RetrievalRequest{ RetrievalID: rid1, - Cid: cid1, + Request: trustlessutils.Request{Root: cid1}, LinkSystem: cidlink.DefaultLinkSystem(), FixedPeers: allFixedPeers[cid1], }, retrievalCollector, candidateCollector) @@ -200,7 +201,7 @@ func TestAssignableCandidateFinder(t *testing.T) { candidates = nil err = retrievalCandidateFinder.FindCandidates(ctx, types.RetrievalRequest{ RetrievalID: rid2, - Cid: cid2, + Request: trustlessutils.Request{Root: cid2}, LinkSystem: cidlink.DefaultLinkSystem(), FixedPeers: allFixedPeers[cid2], }, retrievalCollector, candidateCollector) diff --git a/pkg/retriever/bitswapretriever.go b/pkg/retriever/bitswapretriever.go index 526e4bf8..d461a231 100644 --- a/pkg/retriever/bitswapretriever.go +++ b/pkg/retriever/bitswapretriever.go @@ -5,7 +5,6 @@ import ( "context" "fmt" "io" - "math" "sync" "sync/atomic" "time" @@ -19,15 +18,11 @@ import ( "github.com/ipfs/boxo/bitswap/network" "github.com/ipfs/boxo/blockservice" "github.com/ipfs/go-cid" - "github.com/ipfs/go-unixfsnode" - dagpb "github.com/ipld/go-codec-dagpb" "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/linking/preload" - "github.com/ipld/go-ipld-prime/node/basicnode" - "github.com/ipld/go-ipld-prime/traversal" - "github.com/ipld/go-ipld-prime/traversal/selector" + "github.com/ipld/go-trustless-utils/traversal" "github.com/ipni/go-libipni/metadata" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/peer" @@ -132,7 +127,7 @@ func (br *bitswapRetrieval) RetrieveFromAsyncCandidates(ayncCandidates types.Inb selector := br.request.GetSelector() startTime := br.clock.Now() // this is a hack cause we aren't able to track bitswap fetches per peer for now, so instead we just create a single peer for all events - bitswapCandidate := types.NewRetrievalCandidate(peer.ID(""), nil, br.request.Cid, metadata.Bitswap{}) + bitswapCandidate := types.NewRetrievalCandidate(peer.ID(""), nil, br.request.Root, metadata.Bitswap{}) // setup the linksystem to record bytes & blocks written -- since this isn't automatic w/o go-data-transfer retrievalCtx, cancel := context.WithCancel(br.ctx) @@ -235,14 +230,12 @@ func (br *bitswapRetrieval) RetrieveFromAsyncCandidates(ayncCandidates types.Inb } // run the retrieval - err = easyTraverse( - retrievalCtx, - cidlink.Link{Cid: br.request.Cid}, - selector, - traversalLinkSys, - preloader, - br.request.MaxBlocks, - ) + _, err = traversal.Config{ + Root: br.request.Root, + Selector: selector, + MaxBlocks: br.request.MaxBlocks, + }.Traverse(retrievalCtx, traversalLinkSys, preloader) + if storage != nil { storage.Stop() } @@ -291,7 +284,7 @@ func (br *bitswapRetrieval) RetrieveFromAsyncCandidates(ayncCandidates types.Inb // return stats return &types.RetrievalStats{ StorageProviderId: peer.ID(""), - RootCid: br.request.Cid, + RootCid: br.request.Root, Size: totalWritten.Load(), Blocks: blockCount.Load(), Duration: duration, @@ -322,70 +315,3 @@ func loaderForSession(retrievalID types.RetrievalID, inProgressCids InProgressCi return bytes.NewReader(blk.RawData()), nil } } - -func easyTraverse( - ctx context.Context, - root datamodel.Link, - traverseSelector datamodel.Node, - lsys linking.LinkSystem, - preloader preload.Loader, - maxBlocks uint64, -) error { - - lsys, ecr := newErrorCapturingReader(lsys) - protoChooser := dagpb.AddSupportToChooser(basicnode.Chooser) - - // retrieve first node - prototype, err := protoChooser(root, linking.LinkContext{Ctx: ctx}) - if err != nil { - return err - } - node, err := lsys.Load(linking.LinkContext{Ctx: ctx}, root, prototype) - if err != nil { - return err - } - - progress := traversal.Progress{ - Cfg: &traversal.Config{ - Ctx: ctx, - LinkSystem: lsys, - LinkTargetNodePrototypeChooser: protoChooser, - Preloader: preloader, - }, - } - if maxBlocks > 0 { - progress.Budget = &traversal.Budget{ - LinkBudget: int64(maxBlocks) - 1, // first block is already loaded - NodeBudget: math.MaxInt64, - } - } - progress.LastBlock.Link = root - compiledSelector, err := selector.ParseSelector(traverseSelector) - if err != nil { - return err - } - - if err := progress.WalkMatching(node, compiledSelector, unixfsnode.BytesConsumingMatcher); err != nil { - return err - } - return ecr.Error -} - -type errorCapturingReader struct { - sro linking.BlockReadOpener - Error error -} - -func newErrorCapturingReader(lsys linking.LinkSystem) (linking.LinkSystem, *errorCapturingReader) { - ecr := &errorCapturingReader{sro: lsys.StorageReadOpener} - lsys.StorageReadOpener = ecr.StorageReadOpener - return lsys, ecr -} - -func (ecr *errorCapturingReader) StorageReadOpener(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) { - r, err := ecr.sro(lc, l) - if err != nil { - ecr.Error = err - } - return r, err -} diff --git a/pkg/retriever/bitswapretriever_test.go b/pkg/retriever/bitswapretriever_test.go index 286aae39..1a06085b 100644 --- a/pkg/retriever/bitswapretriever_test.go +++ b/pkg/retriever/bitswapretriever_test.go @@ -26,6 +26,7 @@ import ( "github.com/ipld/go-ipld-prime/storage/memstore" "github.com/ipld/go-ipld-prime/traversal/selector" "github.com/ipld/go-ipld-prime/traversal/selector/builder" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/stretchr/testify/require" ) @@ -336,7 +337,7 @@ func TestBitswapRetriever(t *testing.T) { } req1 := types.RetrievalRequest{ RetrievalID: rid1, - Cid: cid1, + Request: trustlessutils.Request{Root: cid1}, LinkSystem: *linkSystemForCid(cid1, localLinkSystems), PreloadLinkSystem: preloadLinkSys1, Selector: sel, @@ -351,7 +352,7 @@ func TestBitswapRetriever(t *testing.T) { } req2 := types.RetrievalRequest{ RetrievalID: rid2, - Cid: cid2, + Request: trustlessutils.Request{Root: cid2}, LinkSystem: *linkSystemForCid(cid2, localLinkSystems), PreloadLinkSystem: preloadLinkSys2, Selector: sel, diff --git a/pkg/retriever/graphsyncretriever_test.go b/pkg/retriever/graphsyncretriever_test.go index 91d7c30b..b574082e 100644 --- a/pkg/retriever/graphsyncretriever_test.go +++ b/pkg/retriever/graphsyncretriever_test.go @@ -17,6 +17,7 @@ import ( cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/node/basicnode" selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/ipni/go-libipni/metadata" "github.com/libp2p/go-libp2p/core/peer" "github.com/multiformats/go-multicodec" @@ -711,7 +712,7 @@ func TestRetrievalRacing(t *testing.T) { tc.cancelAfter, []testutil.RunRetrieval{func(cb func(types.RetrievalEvent)) (*types.RetrievalStats, error) { return cfg.Retrieve(retCtx, types.RetrievalRequest{ - Cid: cid.Undef, + Request: trustlessutils.Request{Root: cid.Undef}, RetrievalID: retrievalID, LinkSystem: cidlink.DefaultLinkSystem(), }, cb).RetrieveFromAsyncCandidates(makeAsyncCandidates(t, candidates)) @@ -884,7 +885,7 @@ func TestMultipleRetrievals(t *testing.T) { }.RunWithVerification(ctx, t, clock, mockClient, nil, session, nil, 0, []testutil.RunRetrieval{ func(cb func(types.RetrievalEvent)) (*types.RetrievalStats, error) { return cfg.Retrieve(context.Background(), types.RetrievalRequest{ - Cid: cid1, + Request: trustlessutils.Request{Root: cid1}, RetrievalID: retrievalID1, LinkSystem: cidlink.DefaultLinkSystem(), }, cb).RetrieveFromAsyncCandidates(makeAsyncCandidates(t, []types.RetrievalCandidate{ @@ -895,7 +896,7 @@ func TestMultipleRetrievals(t *testing.T) { }, func(cb func(types.RetrievalEvent)) (*types.RetrievalStats, error) { return cfg.Retrieve(context.Background(), types.RetrievalRequest{ - Cid: cid2, + Request: trustlessutils.Request{Root: cid2}, RetrievalID: retrievalID2, LinkSystem: cidlink.DefaultLinkSystem(), }, cb).RetrieveFromAsyncCandidates(makeAsyncCandidates(t, []types.RetrievalCandidate{ @@ -943,7 +944,7 @@ func TestRetrievalSelector(t *testing.T) { selector := selectorparse.CommonSelector_MatchPoint retrieval := cfg.Retrieve(context.Background(), types.RetrievalRequest{ - Cid: cid1, + Request: trustlessutils.Request{Root: cid1}, RetrievalID: retrievalID, LinkSystem: cidlink.DefaultLinkSystem(), Selector: selector, @@ -1074,7 +1075,7 @@ func TestDuplicateRetreivals(t *testing.T) { }.RunWithVerification(ctx, t, clock, mockClient, nil, session, nil, 0, []testutil.RunRetrieval{ func(cb func(types.RetrievalEvent)) (*types.RetrievalStats, error) { return cfg.Retrieve(context.Background(), types.RetrievalRequest{ - Cid: cid1, + Request: trustlessutils.Request{Root: cid1}, RetrievalID: retrievalID, LinkSystem: cidlink.DefaultLinkSystem(), }, cb).RetrieveFromAsyncCandidates(makeAsyncCandidates(t, []types.RetrievalCandidate{ diff --git a/pkg/retriever/httpretriever.go b/pkg/retriever/httpretriever.go index 0d85f951..c7f6221b 100644 --- a/pkg/retriever/httpretriever.go +++ b/pkg/retriever/httpretriever.go @@ -13,8 +13,9 @@ import ( "github.com/filecoin-project/lassie/pkg/build" "github.com/filecoin-project/lassie/pkg/events" "github.com/filecoin-project/lassie/pkg/types" - "github.com/filecoin-project/lassie/pkg/verifiedcar" "github.com/ipfs/go-cid" + trustlesshttp "github.com/ipld/go-trustless-utils/http" + "github.com/ipld/go-trustless-utils/traversal" "github.com/ipni/go-libipni/metadata" "github.com/multiformats/go-multicodec" ) @@ -116,31 +117,41 @@ func (ph *ProtocolHttp) Retrieve( if resp.StatusCode < 200 || resp.StatusCode >= 300 { return nil, ErrHttpRequestFailure{Code: resp.StatusCode} } + + var expectDuplicates = trustlesshttp.DefaultIncludeDupes + if contentType, valid := trustlesshttp.ParseContentType(resp.Header.Get("Content-Type")); valid { + expectDuplicates = contentType.Duplicates + } // else be permissive and just expect duplicates (DefaultIncludeDupes) + var ttfb time.Duration rdr := newTimeToFirstByteReader(resp.Body, func() { ttfb = retrieval.Clock.Since(retrievalStart) shared.sendEvent(ctx, events.FirstByte(retrieval.Clock.Now(), retrieval.request.RetrievalID, candidate, ttfb, multicodec.TransportIpfsGatewayHttp)) }) - cfg := verifiedcar.Config{ - Root: retrieval.request.Cid, + cfg := traversal.Config{ + Root: retrieval.request.Root, Selector: retrieval.request.GetSelector(), - ExpectDuplicatesIn: true, + ExpectDuplicatesIn: expectDuplicates, + // write out the same as we get in so we're not causing waste here, + // dealing with the actual output duplicates requirements can be done + // in a parent + WriteDuplicatesOut: expectDuplicates, MaxBlocks: retrieval.request.MaxBlocks, } - blockCount, byteCount, err := cfg.VerifyCar(ctx, rdr, retrieval.request.LinkSystem) + traversalResult, err := cfg.VerifyCar(ctx, rdr, retrieval.request.LinkSystem) if err != nil { return nil, err } duration := retrieval.Clock.Since(retrievalStart) - speed := uint64(float64(byteCount) / duration.Seconds()) + speed := uint64(float64(traversalResult.BytesIn) / duration.Seconds()) return &types.RetrievalStats{ RootCid: candidate.RootCid, StorageProviderId: candidate.MinerPeer.ID, - Size: byteCount, - Blocks: blockCount, + Size: traversalResult.BytesIn, + Blocks: traversalResult.BlocksIn, Duration: duration, AverageSpeed: speed, TotalPayment: big.Zero(), @@ -166,19 +177,19 @@ func makeRequest(ctx context.Context, request types.RetrievalRequest, candidate return nil, fmt.Errorf("%w: %v", ErrNoHttpForPeer, err) } - path, err := request.GetUrlPath() + path, err := request.Request.UrlPath() if err != nil { logger.Warnf("Couldn't construct a url path for request: %v", err) return nil, fmt.Errorf("%w: %v", ErrBadPathForRequest, err) } - reqURL := fmt.Sprintf("%s/ipfs/%s%s", candidateURL, request.Cid, path) + reqURL := fmt.Sprintf("%s/ipfs/%s%s", candidateURL, request.Root, path) req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil) if err != nil { logger.Warnf("Couldn't construct a http request %s: %v", candidate.MinerPeer.ID, err) return nil, fmt.Errorf("%w for peer %s: %v", ErrBadPathForRequest, candidate.MinerPeer.ID, err) } - req.Header.Add("Accept", request.Scope.AcceptHeader()) + req.Header.Add("Accept", trustlesshttp.DefaultContentType().String()) // prefer duplicates req.Header.Add("X-Request-Id", request.RetrievalID.String()) req.Header.Add("User-Agent", build.UserAgent) diff --git a/pkg/retriever/httpretriever_test.go b/pkg/retriever/httpretriever_test.go index b90aed9c..5b7897c9 100644 --- a/pkg/retriever/httpretriever_test.go +++ b/pkg/retriever/httpretriever_test.go @@ -2,6 +2,7 @@ package retriever_test import ( "context" + "fmt" "io" "math" "net/http" @@ -25,6 +26,7 @@ import ( "github.com/ipld/go-ipld-prime/node/basicnode" "github.com/ipld/go-ipld-prime/storage/memstore" selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/ipni/go-libipni/metadata" "github.com/libp2p/go-libp2p/core/peer" "github.com/multiformats/go-multicodec" @@ -62,6 +64,10 @@ func TestHTTPRetriever(t *testing.T) { funkyPath, funkyLinks := mkFunky(lsys) funkyCands := testutil.GenerateRetrievalCandidatesForCID(t, 1, funkyLinks[0], metadata.IpfsGatewayHttp{}) + // testing our ability to handle duplicates or not + dupyLinks, dupyLinksDeduped := mkDupy(lsys) + dupyCands := testutil.GenerateRetrievalCandidatesForCID(t, 1, dupyLinks[0], metadata.IpfsGatewayHttp{}) + rid1 := types.RetrievalID(uuid.New()) rid2 := types.RetrievalID(uuid.New()) remoteBlockDuration := 50 * time.Millisecond @@ -73,8 +79,9 @@ func TestHTTPRetriever(t *testing.T) { name string requests map[cid.Cid]types.RetrievalID requestPath map[cid.Cid]string - requestScope map[cid.Cid]types.DagScope + requestScope map[cid.Cid]trustlessutils.DagScope remotes map[cid.Cid][]testutil.MockRoundTripRemote + sendDuplicates map[cid.Cid]bool // will default to true expectedStats map[cid.Cid]*types.RetrievalStats expectedErrors map[cid.Cid]struct{} expectedCids map[cid.Cid][]cid.Cid // expected in this order @@ -662,6 +669,166 @@ func TestHTTPRetriever(t *testing.T) { }, }, }, + { + name: "dag with duplicates, peer sending duplicates", + requests: map[cid.Cid]types.RetrievalID{dupyLinks[0]: rid1}, + sendDuplicates: map[cid.Cid]bool{dupyLinks[0]: true}, + remotes: map[cid.Cid][]testutil.MockRoundTripRemote{ + dupyLinks[0]: { + { + Peer: dupyCands[0].MinerPeer, + LinkSystem: lsys, + Selector: allSelector, + RespondAt: startTime.Add(initialPause + time.Millisecond*40), + }, + }, + }, + expectedStats: map[cid.Cid]*types.RetrievalStats{ + dupyLinks[0]: { + RootCid: dupyLinks[0], + StorageProviderId: dupyCands[0].MinerPeer.ID, + Size: sizeOfStored(lsys, dupyLinks), + Blocks: uint64(len(dupyLinks)), + Duration: 40*time.Millisecond + remoteBlockDuration*time.Duration(len(dupyLinks)), + AverageSpeed: uint64(float64(sizeOfStored(lsys, dupyLinks)) / (40*time.Millisecond + remoteBlockDuration*time.Duration(len(dupyLinks))).Seconds()), + TimeToFirstByte: 40 * time.Millisecond, + TotalPayment: big.Zero(), + AskPrice: big.Zero(), + }, + }, + expectedCids: map[cid.Cid][]cid.Cid{dupyLinks[0]: dupyLinks}, + expectSequence: []testutil.ExpectedActionsAtTime{ + { + AfterStart: 0, + ExpectedEvents: []types.RetrievalEvent{ + events.StartedRetrieval(startTime, rid1, toCandidate(dupyLinks[0], dupyCands[0].MinerPeer), multicodec.TransportIpfsGatewayHttp), + events.ConnectedToProvider(startTime, rid1, toCandidate(dupyLinks[0], dupyCands[0].MinerPeer), multicodec.TransportIpfsGatewayHttp), + }, + ExpectedMetrics: []testutil.SessionMetric{ + {Type: testutil.SessionMetric_Connect, Provider: dupyCands[0].MinerPeer.ID}, + }, + }, + { + AfterStart: initialPause, + ReceivedRetrievals: []peer.ID{dupyCands[0].MinerPeer.ID}, + }, + { + AfterStart: initialPause + time.Millisecond*40, + ExpectedEvents: []types.RetrievalEvent{ + events.FirstByte(startTime.Add(initialPause+time.Millisecond*40), rid1, toCandidate(dupyLinks[0], dupyCands[0].MinerPeer), time.Millisecond*40, multicodec.TransportIpfsGatewayHttp), + }, + ExpectedMetrics: []testutil.SessionMetric{ + {Type: testutil.SessionMetric_FirstByte, Provider: dupyCands[0].MinerPeer.ID, Duration: time.Millisecond * 40}, + }, + }, + { + AfterStart: initialPause + time.Millisecond*40 + remoteBlockDuration*time.Duration(len(dupyLinks)), + ExpectedEvents: []types.RetrievalEvent{ + events.Success( + startTime.Add(initialPause+time.Millisecond*40+remoteBlockDuration*time.Duration(len(dupyLinks))), + rid1, + toCandidate(dupyLinks[0], dupyCands[0].MinerPeer), + sizeOfStored(lsys, dupyLinks), + uint64(len(dupyLinks)), + time.Millisecond*40+remoteBlockDuration*time.Duration(len(dupyLinks)), + multicodec.TransportIpfsGatewayHttp, + ), + }, + CompletedRetrievals: []peer.ID{dupyCands[0].MinerPeer.ID}, + ServedRetrievals: []testutil.RemoteStats{ + { + Peer: dupyCands[0].MinerPeer.ID, + Root: dupyLinks[0], + ByteCount: sizeOfStored(lsys, dupyLinks), + Blocks: dupyLinks, + }, + }, + ExpectedMetrics: []testutil.SessionMetric{ + {Type: testutil.SessionMetric_Success, Provider: dupyCands[0].MinerPeer.ID, Value: math.Trunc(float64(sizeOfStored(lsys, dupyLinks)) / (time.Millisecond*40 + remoteBlockDuration*time.Duration(len(dupyLinks))).Seconds())}, + }, + }, + }, + }, + { + name: "dag with duplicates, peer not sending duplicates", + requests: map[cid.Cid]types.RetrievalID{dupyLinks[0]: rid1}, + sendDuplicates: map[cid.Cid]bool{dupyLinks[0]: false}, + remotes: map[cid.Cid][]testutil.MockRoundTripRemote{ + dupyLinks[0]: { + { + Peer: dupyCands[0].MinerPeer, + LinkSystem: lsys, + Selector: allSelector, + RespondAt: startTime.Add(initialPause + time.Millisecond*40), + }, + }, + }, + expectedStats: map[cid.Cid]*types.RetrievalStats{ + dupyLinks[0]: { + RootCid: dupyLinks[0], + StorageProviderId: dupyCands[0].MinerPeer.ID, + Size: sizeOfStored(lsys, dupyLinksDeduped), + Blocks: uint64(len(dupyLinksDeduped)), + Duration: 40*time.Millisecond + remoteBlockDuration*time.Duration(len(dupyLinksDeduped)), + AverageSpeed: uint64(float64(sizeOfStored(lsys, dupyLinksDeduped)) / (40*time.Millisecond + remoteBlockDuration*time.Duration(len(dupyLinksDeduped))).Seconds()), + TimeToFirstByte: 40 * time.Millisecond, + TotalPayment: big.Zero(), + AskPrice: big.Zero(), + }, + }, + expectedCids: map[cid.Cid][]cid.Cid{dupyLinks[0]: dupyLinksDeduped}, + expectSequence: []testutil.ExpectedActionsAtTime{ + { + AfterStart: 0, + ExpectedEvents: []types.RetrievalEvent{ + events.StartedRetrieval(startTime, rid1, toCandidate(dupyLinks[0], dupyCands[0].MinerPeer), multicodec.TransportIpfsGatewayHttp), + events.ConnectedToProvider(startTime, rid1, toCandidate(dupyLinks[0], dupyCands[0].MinerPeer), multicodec.TransportIpfsGatewayHttp), + }, + ExpectedMetrics: []testutil.SessionMetric{ + {Type: testutil.SessionMetric_Connect, Provider: dupyCands[0].MinerPeer.ID}, + }, + }, + { + AfterStart: initialPause, + ReceivedRetrievals: []peer.ID{dupyCands[0].MinerPeer.ID}, + }, + { + AfterStart: initialPause + time.Millisecond*40, + ExpectedEvents: []types.RetrievalEvent{ + events.FirstByte(startTime.Add(initialPause+time.Millisecond*40), rid1, toCandidate(dupyLinks[0], dupyCands[0].MinerPeer), time.Millisecond*40, multicodec.TransportIpfsGatewayHttp), + }, + ExpectedMetrics: []testutil.SessionMetric{ + {Type: testutil.SessionMetric_FirstByte, Provider: dupyCands[0].MinerPeer.ID, Duration: time.Millisecond * 40}, + }, + }, + { + AfterStart: initialPause + time.Millisecond*40 + remoteBlockDuration*time.Duration(len(dupyLinksDeduped)), + ExpectedEvents: []types.RetrievalEvent{ + events.Success( + startTime.Add(initialPause+time.Millisecond*40+remoteBlockDuration*time.Duration(len(dupyLinksDeduped))), + rid1, + toCandidate(dupyLinks[0], dupyCands[0].MinerPeer), + sizeOfStored(lsys, dupyLinksDeduped), + uint64(len(dupyLinksDeduped)), + time.Millisecond*40+remoteBlockDuration*time.Duration(len(dupyLinksDeduped)), + multicodec.TransportIpfsGatewayHttp, + ), + }, + CompletedRetrievals: []peer.ID{dupyCands[0].MinerPeer.ID}, + ServedRetrievals: []testutil.RemoteStats{ + { + Peer: dupyCands[0].MinerPeer.ID, + Root: dupyLinks[0], + ByteCount: sizeOfStored(lsys, dupyLinksDeduped), + Blocks: dupyLinksDeduped, + }, + }, + ExpectedMetrics: []testutil.SessionMetric{ + {Type: testutil.SessionMetric_Success, Provider: dupyCands[0].MinerPeer.ID, Value: math.Trunc(float64(sizeOfStored(lsys, dupyLinksDeduped)) / (time.Millisecond*40 + remoteBlockDuration*time.Duration(len(dupyLinksDeduped))).Seconds())}, + }, + }, + }, + }, } for _, testCase := range testCases { @@ -676,7 +843,16 @@ func TestHTTPRetriever(t *testing.T) { clock := clock.NewMock() clock.Set(startTime) - roundTripper := testutil.NewMockRoundTripper(t, ctx, clock, remoteBlockDuration, testCase.requestPath, testCase.requestScope, testCase.remotes) + roundTripper := testutil.NewMockRoundTripper( + t, + ctx, + clock, + remoteBlockDuration, + testCase.requestPath, + testCase.requestScope, + testCase.remotes, + testCase.sendDuplicates, + ) client := &http.Client{Transport: roundTripper} mockSession := testutil.NewMockSession(ctx) @@ -704,10 +880,12 @@ func TestHTTPRetriever(t *testing.T) { retrievals = append(retrievals, func(eventsCb func(types.RetrievalEvent)) (*types.RetrievalStats, error) { request := types.RetrievalRequest{ RetrievalID: rid, - Cid: c, - LinkSystem: *lsys, - Path: testCase.requestPath[c], - Scope: testCase.requestScope[c], + Request: trustlessutils.Request{ + Root: c, + Path: testCase.requestPath[c], + Scope: testCase.requestScope[c], + }, + LinkSystem: *lsys, } candidates := toCandidates(c, testCase.remotes[c]) return retriever.Retrieve(context.Background(), request, eventsCb). @@ -828,3 +1006,35 @@ func mkFunky(lsys linking.LinkSystem) (string, []cid.Cid) { slices.Reverse(funkyLinks) return funkyPath, funkyLinks } + +func mkDupy(lsys linking.LinkSystem) ([]cid.Cid, []cid.Cid) { + dupy := mkBlockWithBytes(lsys, []byte("duplicate data")) + + n, err := qp.BuildMap(dagpb.Type.PBNode, 1, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, "Links", qp.List(100, func(la datamodel.ListAssembler) { + for i := 0; i < 100; i++ { + qp.ListEntry(la, qp.Map(2, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, "Name", qp.String(fmt.Sprintf("%03d", i))) + qp.MapEntry(ma, "Hash", qp.Link(cidlink.Link{Cid: dupy})) + })) + } + })) + }) + if err != nil { + panic(err) + } + l, err := lsys.Store(linking.LinkContext{}, pblp, n) + if err != nil { + panic(err) + } + + // dupyLinks contains the duplicates + dupyLinks := []cid.Cid{l.(cidlink.Link).Cid} + for i := 0; i < 100; i++ { + dupyLinks = append(dupyLinks, dupy) + } + // dupyLinksDeduped contains just the unique links + dupyLinksDeduped := []cid.Cid{l.(cidlink.Link).Cid, dupy} + + return dupyLinks, dupyLinksDeduped +} diff --git a/pkg/retriever/parallelpeerretriever.go b/pkg/retriever/parallelpeerretriever.go index 10d9542d..a2ba414c 100644 --- a/pkg/retriever/parallelpeerretriever.go +++ b/pkg/retriever/parallelpeerretriever.go @@ -210,7 +210,7 @@ func (retrieval *retrieval) RetrieveFromAsyncCandidates(asyncCandidates types.In logger.Errorf( "Possible leak: unable to successfully cancel all %s retrieval attempts for %s within 100ms", retrieval.Protocol.Code().String(), - retrieval.request.Cid.String(), + retrieval.request.Root.String(), ) } } @@ -258,7 +258,7 @@ func (retrieval *retrieval) filterCandidates(ctx context.Context, asyncCandidate // update or add new candidate metadata currMetadata, seenCandidate := retrieval.candidateMetadata[candidate.MinerPeer.ID] newMetadata := candidate.Metadata.Get(multicodec.Code(retrieval.Protocol.Code())) - candidateMetadata := retrieval.Protocol.GetMergedMetadata(retrieval.request.Cid, currMetadata, newMetadata) + candidateMetadata := retrieval.Protocol.GetMergedMetadata(retrieval.request.Root, currMetadata, newMetadata) retrieval.candidateMetadata[candidate.MinerPeer.ID] = candidateMetadata // if it's a new candidate, include it, otherwise don't start a new retrieval for it if !seenCandidate { diff --git a/pkg/retriever/retriever.go b/pkg/retriever/retriever.go index 1a6298b5..12fdabab 100644 --- a/pkg/retriever/retriever.go +++ b/pkg/retriever/retriever.go @@ -135,12 +135,12 @@ func (retriever *Retriever) Retrieve( if !retriever.eventManager.IsStarted() { return nil, ErrRetrieverNotStarted } - if !retriever.session.RegisterRetrieval(request.RetrievalID, request.Cid, request.GetSelector()) { - return nil, fmt.Errorf("%w: %s", ErrRetrievalAlreadyRunning, request.Cid) + if !retriever.session.RegisterRetrieval(request.RetrievalID, request.Root, request.GetSelector()) { + return nil, fmt.Errorf("%w: %s", ErrRetrievalAlreadyRunning, request.Root) } defer func() { if err := retriever.session.EndRetrieval(request.RetrievalID); err != nil { - logger.Errorf("failed to end retrieval tracking for %s: %s", request.Cid, err.Error()) + logger.Errorf("failed to end retrieval tracking for %s: %s", request.Root, err.Error()) } }() @@ -149,7 +149,7 @@ func (retriever *Retriever) Retrieve( onRetrievalEvent := makeOnRetrievalEvent(ctx, retriever.eventManager, retriever.session, - request.Cid, + request.Root, request.RetrievalID, eventStats, eventsCB, @@ -159,10 +159,10 @@ func (retriever *Retriever) Retrieve( if err != nil { return nil, err } - descriptor = strings.TrimPrefix(descriptor, "/ipfs/"+request.Cid.String()) + descriptor = strings.TrimPrefix(descriptor, "/ipfs/"+request.Root.String()) // Emit a StartedFetch event signaling that the Lassie fetch has started - onRetrievalEvent(events.StartedFetch(retriever.clock.Now(), request.RetrievalID, request.Cid, descriptor, request.GetSupportedProtocols(retriever.protocols)...)) + onRetrievalEvent(events.StartedFetch(retriever.clock.Now(), request.RetrievalID, request.Root, descriptor, request.GetSupportedProtocols(retriever.protocols)...)) // retrieve, note that we could get a successful retrieval // (retrievalStats!=nil) _and_ also an error return because there may be @@ -175,7 +175,7 @@ func (retriever *Retriever) Retrieve( ) // Emit a Finished event denoting that the entire fetch has finished - onRetrievalEvent(events.Finished(retriever.clock.Now(), request.RetrievalID, types.RetrievalCandidate{RootCid: request.Cid})) + onRetrievalEvent(events.Finished(retriever.clock.Now(), request.RetrievalID, types.RetrievalCandidate{RootCid: request.Root})) if err != nil && retrievalStats == nil { return nil, err @@ -188,7 +188,7 @@ func (retriever *Retriever) Retrieve( "\tBytes Received: %s\n"+ "\tTotal Payment: %s", retrievalStats.StorageProviderId, - request.Cid, + request.Root, retrievalStats.Duration, humanize.IBytes(retrievalStats.Size), types.FIL(retrievalStats.TotalPayment), diff --git a/pkg/retriever/retriever_test.go b/pkg/retriever/retriever_test.go index 45d7861b..d74f2516 100644 --- a/pkg/retriever/retriever_test.go +++ b/pkg/retriever/retriever_test.go @@ -20,6 +20,7 @@ import ( "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/node/basicnode" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/ipni/go-libipni/metadata" "github.com/libp2p/go-libp2p/core/peer" "github.com/multiformats/go-multicodec" @@ -40,7 +41,7 @@ func TestRetrieverStart(t *testing.T) { result, err := ret.Retrieve(context.Background(), types.RetrievalRequest{ LinkSystem: cidlink.DefaultLinkSystem(), RetrievalID: types.RetrievalID(uuid.New()), - Cid: cid.MustParse("bafkqaalb"), + Request: trustlessutils.Request{Root: cid.MustParse("bafkqaalb")}, }, func(types.RetrievalEvent) {}) require.ErrorIs(t, err, ErrRetrieverNotStarted) require.Nil(t, result) @@ -60,7 +61,7 @@ func TestRetriever(t *testing.T) { candidates []types.RetrievalCandidate path string dups bool - scope types.DagScope + scope trustlessutils.DagScope returns_connected map[string]testutil.DelayedConnectReturn returns_retrievals map[string]testutil.DelayedClientReturn cancelAfter time.Duration @@ -141,7 +142,7 @@ func TestRetriever(t *testing.T) { }, path: "some/path/to/request", dups: true, - scope: types.DagScopeBlock, + scope: trustlessutils.DagScopeBlock, returns_connected: map[string]testutil.DelayedConnectReturn{ string(peerA): {Err: nil, Delay: time.Millisecond * 20}, }, @@ -829,10 +830,12 @@ func TestRetriever(t *testing.T) { return ret.Retrieve(retCtx, types.RetrievalRequest{ LinkSystem: cidlink.DefaultLinkSystem(), RetrievalID: rid, - Cid: cid1, - Path: tc.path, - Scope: tc.scope, - Duplicates: tc.dups, + Request: trustlessutils.Request{ + Root: cid1, + Path: tc.path, + Scope: tc.scope, + Duplicates: tc.dups, + }, }, cb) }}, ) @@ -974,7 +977,7 @@ func TestLinkSystemPerRequest(t *testing.T) { return ret.Retrieve(context.Background(), types.RetrievalRequest{ LinkSystem: lsA, RetrievalID: rid, - Cid: cid1, + Request: trustlessutils.Request{Root: cid1}, }, cb) }, }) @@ -1040,7 +1043,7 @@ func TestLinkSystemPerRequest(t *testing.T) { return ret.Retrieve(context.Background(), types.RetrievalRequest{ LinkSystem: lsB, RetrievalID: rid, - Cid: cid1, + Request: trustlessutils.Request{Root: cid1}, }, cb) }, }) diff --git a/pkg/server/http/ipfs.go b/pkg/server/http/ipfs.go index 75ffa297..a774f2d7 100644 --- a/pkg/server/http/ipfs.go +++ b/pkg/server/http/ipfs.go @@ -12,127 +12,48 @@ import ( "github.com/filecoin-project/lassie/pkg/storage" "github.com/filecoin-project/lassie/pkg/types" "github.com/ipfs/go-cid" - "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipfs/go-unixfsnode" + "github.com/ipld/go-car/v2/storage/deferred" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + trustlessutils "github.com/ipld/go-trustless-utils" + trustlesshttp "github.com/ipld/go-trustless-utils/http" "github.com/libp2p/go-libp2p/core/peer" "github.com/multiformats/go-multicodec" ) -const ( - MimeTypeCar = "application/vnd.ipld.car" // The only accepted MIME type - MimeTypeCarVersion = "1" // We only accept version 1 of the MIME type - FormatParameterCar = "car" // The only valid format parameter value - FilenameExtCar = ".car" // The only valid filename extension - DefaultIncludeDupes = true // The default value for an unspecified "dups" parameter. See https://github.com/ipfs/specs/pull/412. - ResponseAcceptRangesHeader = "none" // We currently don't accept range requests - ResponseCacheControlHeader = "public, max-age=29030400, immutable" // Magic cache control values -) - -var ( - ResponseChunkDelimeter = []byte("0\r\n") // An http/1.1 chunk delimeter, used for specifying an early end to the response - ResponseContentTypeHeader = fmt.Sprintf("%s; version=%s", MimeTypeCar, MimeTypeCarVersion) -) - func IpfsHandler(fetcher types.Fetcher, cfg HttpServerConfig) func(http.ResponseWriter, *http.Request) { return func(res http.ResponseWriter, req *http.Request) { statusLogger := newStatusLogger(req.Method, req.URL.Path) - path := datamodel.ParsePath(req.URL.Path) - _, path = path.Shift() // remove /ipfs - - // filter out everything but GET requests - switch req.Method { - case http.MethodGet: - break - default: - res.Header().Add("Allow", http.MethodGet) - errorResponse(res, statusLogger, http.StatusMethodNotAllowed, errors.New("method not allowed")) - return - } - - // check if CID path param is missing - if path.Len() == 0 { - // not a valid path to hit - errorResponse(res, statusLogger, http.StatusNotFound, errors.New("not found")) - return - } - - includeDupes, err := CheckFormat(req) - if err != nil { - errorResponse(res, statusLogger, http.StatusBadRequest, err) - return - } - - fileName, err := ParseFilename(req) - if err != nil { - errorResponse(res, statusLogger, http.StatusBadRequest, err) - return - } - - // validate CID path parameter - var cidSeg datamodel.PathSegment - cidSeg, path = path.Shift() - rootCid, err := cid.Parse(cidSeg.String()) - if err != nil { - errorResponse(res, statusLogger, http.StatusInternalServerError, errors.New("failed to parse CID path parameter")) - return - } - - dagScope, err := ParseScope(req) - if err != nil { - errorResponse(res, statusLogger, http.StatusBadRequest, err) - return - } - - byteRange, err := ParseByteRange(req) - if err != nil { - errorResponse(res, statusLogger, http.StatusBadRequest, err) - return - } - protocols, err := parseProtocols(req) - if err != nil { - errorResponse(res, statusLogger, http.StatusBadRequest, err) + if !checkGet(req, res, statusLogger) { return } - fixedPeers, err := parseProviders(req) - if err != nil { - errorResponse(res, statusLogger, http.StatusBadRequest, err) + ok, request := decodeRetrievalRequest(cfg, res, req, statusLogger) + if !ok { return } - // for setting Content-Disposition header based on filename url parameter - if fileName == "" { - fileName = fmt.Sprintf("%s%s", rootCid.String(), FilenameExtCar) - } - - retrievalId, err := types.NewRetrievalID() - if err != nil { - errorResponse(res, statusLogger, http.StatusInternalServerError, fmt.Errorf("failed to generate retrieval ID: %w", err)) + ok, fileName := decodeFilename(res, req, statusLogger, request.Root) + if !ok { return } - // TODO: we should propogate this value throughout logs so - // that we can correlate specific requests to related logs. - // For now just using to log the corrolation and return the - // X-Trace-Id header. + // TODO: this needs to be propagated through the request, perhaps on + // RetrievalRequest or we decode it as a UUID and override RetrievalID? requestId := req.Header.Get("X-Request-Id") if requestId == "" { - requestId = retrievalId.String() + requestId = request.RetrievalID.String() } else { - logger.Debugw("Corrolating provided request ID with retrieval ID", "request_id", requestId, "retrieval_id", retrievalId) + logger.Debugw("custom X-Request-Id fore retrieval", "request_id", requestId, "retrieval_id", request.RetrievalID) } - // bytesWritten will be closed once we've started writing CAR content to - // the response writer. Once closed, no other content should be written. - bytesWritten := make(chan struct{}, 1) - - tempStore := storage.NewDeferredStorageCar(cfg.TempDir, rootCid) + tempStore := storage.NewDeferredStorageCar(cfg.TempDir, request.Root) var carWriter storage.DeferredWriter - if includeDupes { - carWriter = storage.NewDuplicateAdderCarForStream(req.Context(), rootCid, path.String(), dagScope, byteRange, tempStore, res) + if request.Duplicates { + carWriter = storage.NewDuplicateAdderCarForStream(req.Context(), res, request.Root, request.Path, request.Scope, request.Bytes, tempStore) } else { - carWriter = storage.NewDeferredCarWriterForStream(rootCid, res) + carWriter = deferred.NewDeferredCarWriterForStream(res, []cid.Cid{request.Root}) } carStore := storage.NewCachingTempStore(carWriter.BlockWriteOpener(), tempStore) defer func() { @@ -140,75 +61,47 @@ func IpfsHandler(fetcher types.Fetcher, cfg HttpServerConfig) func(http.Response logger.Errorf("error closing temp store: %s", err) } }() - var store types.ReadableWritableStorage = carStore - request, err := types.NewRequestForPath(store, rootCid, path.String(), dagScope, byteRange) - if err != nil { - errorResponse(res, statusLogger, http.StatusInternalServerError, fmt.Errorf("failed to create request: %w", err)) - return - } - request.Protocols = protocols - request.FixedPeers = fixedPeers - request.RetrievalID = retrievalId - request.Duplicates = includeDupes // needed for etag + request.LinkSystem.SetWriteStorage(carStore) + request.LinkSystem.SetReadStorage(carStore) + + // setup preload storage for bitswap, the temporary CAR store can set up a + // separate preload space in its storage + request.PreloadLinkSystem = cidlink.DefaultLinkSystem() + preloadStore := carStore.PreloadStore() + request.PreloadLinkSystem.SetReadStorage(preloadStore) + request.PreloadLinkSystem.SetWriteStorage(preloadStore) + request.PreloadLinkSystem.TrustedStorage = true + + // bytesWritten will be closed once we've started writing CAR content to + // the response writer. Once closed, no other content should be written. + bytesWritten := make(chan struct{}, 1) carWriter.OnPut(func(int) { // called once we start writing blocks into the CAR (on the first Put()) res.Header().Set("Server", build.UserAgent) // "lassie/vx.y.z-" res.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", fileName)) - res.Header().Set("Accept-Ranges", ResponseAcceptRangesHeader) - res.Header().Set("Cache-Control", ResponseCacheControlHeader) - res.Header().Set("Content-Type", ResponseContentTypeHeader) + res.Header().Set("Accept-Ranges", "none") + res.Header().Set("Cache-Control", trustlesshttp.ResponseCacheControlHeader) + res.Header().Set("Content-Type", trustlesshttp.DefaultContentType().WithDuplicates(request.Duplicates).String()) res.Header().Set("Etag", request.Etag()) res.Header().Set("X-Content-Type-Options", "nosniff") - res.Header().Set("X-Ipfs-Path", types.PathEscape(req.URL.Path)) - // TODO: set X-Ipfs-Roots header when we support root+path - // see https://github.com/ipfs/kubo/pull/8720 + res.Header().Set("X-Ipfs-Path", trustlessutils.PathEscape(req.URL.Path)) res.Header().Set("X-Trace-Id", requestId) statusLogger.logStatus(200, "OK") close(bytesWritten) }, true) - // setup preload storage for bitswap, the temporary CAR store can set up a - // separate preload space in its storage - request.PreloadLinkSystem = cidlink.DefaultLinkSystem() - preloadStore := carStore.PreloadStore() - request.PreloadLinkSystem.SetReadStorage(preloadStore) - request.PreloadLinkSystem.SetWriteStorage(preloadStore) - request.PreloadLinkSystem.TrustedStorage = true - - // extract block limit from query param as needed - var blockLimit uint64 - if req.URL.Query().Has("blockLimit") { - if parsedBlockLimit, err := strconv.ParseUint(req.URL.Query().Get("blockLimit"), 10, 64); err == nil { - blockLimit = parsedBlockLimit - } - } - if cfg.MaxBlocksPerRequest > 0 || blockLimit > 0 { - // use the lowest non-zero value for block limit - if blockLimit == 0 || (cfg.MaxBlocksPerRequest > 0 && blockLimit > cfg.MaxBlocksPerRequest) { - blockLimit = cfg.MaxBlocksPerRequest - } - request.MaxBlocks = blockLimit - } - - // servertiming metrics - logger.Debugw("fetching CID", - "retrievalId", - retrievalId, - "CID", - rootCid.String(), - "path", - path.String(), - "dagScope", - dagScope, - "byteRange", - byteRange, - "includeDupes", - includeDupes, - "blockLimit", - blockLimit, + logger.Debugw("fetching", + "retrieval_id", request.RetrievalID, + "root", request.Root.String(), + "path", request.Path, + "dag-scope", request.Scope, + "entity-bytes", request.Bytes, + "dups", request.Duplicates, + "maxBlocks", request.MaxBlocks, ) + stats, err := fetcher.Fetch(req.Context(), request, servertimingsSubscriber(req)) // force all blocks to flush @@ -219,7 +112,7 @@ func IpfsHandler(fetcher types.Fetcher, cfg HttpServerConfig) func(http.Response if err != nil { select { case <-bytesWritten: - logger.Debugw("unclean close", "cid", request.Cid, "retrievalID", request.RetrievalID) + logger.Debugw("unclean close", "cid", request.Root, "retrievalID", request.RetrievalID) if err := closeWithUnterminatedChunk(res); err != nil { logger.Infow("unable to send early termination", "err", err) } @@ -227,21 +120,140 @@ func IpfsHandler(fetcher types.Fetcher, cfg HttpServerConfig) func(http.Response default: } if errors.Is(err, retriever.ErrNoCandidates) { - errorResponse(res, statusLogger, http.StatusNotFound, errors.New("no candidates found")) + errorResponse(res, statusLogger, http.StatusBadGateway, errors.New("no candidates found")) } else { errorResponse(res, statusLogger, http.StatusGatewayTimeout, fmt.Errorf("failed to fetch CID: %w", err)) } return } - logger.Debugw("successfully fetched CID", - "retrievalId", retrievalId, - "CID", rootCid, + + logger.Debugw("successfully fetched", + "retrieval_id", request.RetrievalID, + "root", request.Root.String(), + "path", request.Path, + "dag-scope", request.Scope, + "entity-bytes", request.Bytes, + "dups", request.Duplicates, + "maxBlocks", request.MaxBlocks, "duration", stats.Duration, "bytes", stats.Size, ) } } +func checkGet(req *http.Request, res http.ResponseWriter, statusLogger *statusLogger) bool { + // filter out everything but GET requests + if req.Method == http.MethodGet { + return true + } + res.Header().Add("Allow", http.MethodGet) + errorResponse(res, statusLogger, http.StatusMethodNotAllowed, errors.New("method not allowed")) + return false +} + +func decodeRequest(res http.ResponseWriter, req *http.Request, statusLogger *statusLogger) (bool, trustlessutils.Request) { + rootCid, path, err := trustlesshttp.ParseUrlPath(req.URL.Path) + if err != nil { + if errors.Is(err, trustlesshttp.ErrPathNotFound) { + errorResponse(res, statusLogger, http.StatusNotFound, err) + } else if errors.Is(err, trustlesshttp.ErrBadCid) { + errorResponse(res, statusLogger, http.StatusBadRequest, err) + } else { + errorResponse(res, statusLogger, http.StatusInternalServerError, err) + } + return false, trustlessutils.Request{} + } + + accept, err := trustlesshttp.CheckFormat(req) + if err != nil { + errorResponse(res, statusLogger, http.StatusBadRequest, err) + return false, trustlessutils.Request{} + } + + dagScope, err := trustlesshttp.ParseScope(req) + if err != nil { + errorResponse(res, statusLogger, http.StatusBadRequest, err) + return false, trustlessutils.Request{} + } + + byteRange, err := trustlesshttp.ParseByteRange(req) + if err != nil { + errorResponse(res, statusLogger, http.StatusBadRequest, err) + return false, trustlessutils.Request{} + } + + return true, trustlessutils.Request{ + Root: rootCid, + Path: path.String(), + Scope: dagScope, + Bytes: byteRange, + Duplicates: accept.Duplicates, + } +} + +func decodeRetrievalRequest(cfg HttpServerConfig, res http.ResponseWriter, req *http.Request, statusLogger *statusLogger) (bool, types.RetrievalRequest) { + ok, request := decodeRequest(res, req, statusLogger) + if !ok { + return false, types.RetrievalRequest{} + } + + protocols, err := parseProtocols(req) + if err != nil { + errorResponse(res, statusLogger, http.StatusBadRequest, err) + return false, types.RetrievalRequest{} + } + + fixedPeers, err := parseProviders(req) + if err != nil { + errorResponse(res, statusLogger, http.StatusBadRequest, err) + return false, types.RetrievalRequest{} + } + + // extract block limit from query param as needed + var maxBlocks uint64 + if req.URL.Query().Has("blockLimit") { + if parsedBlockLimit, err := strconv.ParseUint(req.URL.Query().Get("blockLimit"), 10, 64); err == nil { + maxBlocks = parsedBlockLimit + } + } + // use the lowest non-zero value for block limit + if maxBlocks == 0 || (cfg.MaxBlocksPerRequest > 0 && maxBlocks > cfg.MaxBlocksPerRequest) { + maxBlocks = cfg.MaxBlocksPerRequest + } + + retrievalId, err := types.NewRetrievalID() + if err != nil { + errorResponse(res, statusLogger, http.StatusInternalServerError, fmt.Errorf("failed to generate retrieval ID: %w", err)) + return false, types.RetrievalRequest{} + } + + linkSystem := cidlink.DefaultLinkSystem() + linkSystem.TrustedStorage = true + unixfsnode.AddUnixFSReificationToLinkSystem(&linkSystem) + + return true, types.RetrievalRequest{ + Request: request, + RetrievalID: retrievalId, + LinkSystem: linkSystem, + Protocols: protocols, + FixedPeers: fixedPeers, + MaxBlocks: maxBlocks, + } +} + +func decodeFilename(res http.ResponseWriter, req *http.Request, statusLogger *statusLogger, root cid.Cid) (bool, string) { + fileName, err := trustlesshttp.ParseFilename(req) + if err != nil { + errorResponse(res, statusLogger, http.StatusBadRequest, err) + return false, "" + } + // for setting Content-Disposition header based on filename url parameter + if fileName == "" { + fileName = fmt.Sprintf("%s%s", root, trustlesshttp.FilenameExtCar) + } + return true, fileName +} + // statusLogger is a logger for logging response statuses for a given request type statusLogger struct { method string @@ -291,7 +303,7 @@ func closeWithUnterminatedChunk(res http.ResponseWriter) error { if err != nil { return fmt.Errorf("unable to access conn through hijack interface: %w", err) } - if _, err := buf.Write(ResponseChunkDelimeter); err != nil { + if _, err := buf.Write(trustlesshttp.ResponseChunkDelimeter); err != nil { return fmt.Errorf("writing response chunk delimiter: %w", err) } if err := buf.Flush(); err != nil { diff --git a/pkg/server/http/ipfs_test.go b/pkg/server/http/ipfs_test.go index 32950197..63a4c455 100644 --- a/pkg/server/http/ipfs_test.go +++ b/pkg/server/http/ipfs_test.go @@ -10,6 +10,7 @@ import ( "github.com/filecoin-project/lassie/pkg/internal/mockfetcher" "github.com/filecoin-project/lassie/pkg/retriever" "github.com/filecoin-project/lassie/pkg/types" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/stretchr/testify/require" ) @@ -46,7 +47,7 @@ func TestIpfsHandler(t *testing.T) { path: "/ipfs/bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", headers: map[string]string{"Accept": "application/json"}, wantStatus: http.StatusBadRequest, - wantBody: "no acceptable content type\n", + wantBody: "invalid Accept header; unsupported: \"application/json\"\n", }, { name: "400 on invalid Accept header - bad dups", @@ -54,7 +55,7 @@ func TestIpfsHandler(t *testing.T) { path: "/ipfs/bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", headers: map[string]string{"Accept": "application/vnd.ipld.car;dups=invalid"}, wantStatus: http.StatusBadRequest, - wantBody: "no acceptable content type\n", + wantBody: "invalid Accept header; unsupported: \"application/vnd.ipld.car;dups=invalid\"\n", }, { name: "400 on invalid Accept header - bad version", @@ -62,7 +63,7 @@ func TestIpfsHandler(t *testing.T) { path: "/ipfs/bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", headers: map[string]string{"Accept": "application/vnd.ipld.car;version=2"}, wantStatus: http.StatusBadRequest, - wantBody: "no acceptable content type\n", + wantBody: "invalid Accept header; unsupported: \"application/vnd.ipld.car;version=2\"\n", }, { name: "400 on invalid Accept header - bad order", @@ -70,21 +71,21 @@ func TestIpfsHandler(t *testing.T) { path: "/ipfs/bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", headers: map[string]string{"Accept": "application/vnd.ipld.car;order=invalid"}, wantStatus: http.StatusBadRequest, - wantBody: "no acceptable content type\n", + wantBody: "invalid Accept header; unsupported: \"application/vnd.ipld.car;order=invalid\"\n", }, { name: "400 on invalid format query param", method: "GET", path: "/ipfs/bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4?format=invalid", wantStatus: http.StatusBadRequest, - wantBody: "requested non-supported format invalid\n", + wantBody: "invalid format parameter; unsupported: \"invalid\"\n", }, { name: "400 on missing Accept header and format query param", method: "GET", path: "/ipfs/bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", wantStatus: http.StatusBadRequest, - wantBody: "neither a valid accept header or format parameter were provided\n", + wantBody: "neither a valid Accept header nor format parameter were provided\n", }, { name: "400 on missing extension in filename query param", @@ -92,7 +93,7 @@ func TestIpfsHandler(t *testing.T) { path: "/ipfs/bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4?filename=birb", headers: map[string]string{"Accept": "application/vnd.ipld.car"}, wantStatus: http.StatusBadRequest, - wantBody: "filename missing extension\n", + wantBody: "invalid filename parameter; missing extension\n", }, { name: "400 on non-supported extension in filename query param", @@ -100,15 +101,15 @@ func TestIpfsHandler(t *testing.T) { path: "/ipfs/bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4?filename=birb.tar", headers: map[string]string{"Accept": "application/vnd.ipld.car"}, wantStatus: http.StatusBadRequest, - wantBody: "filename uses non-supported extension .tar\n", + wantBody: "invalid filename parameter; unsupported extension: \".tar\"\n", }, { - name: "500 when we fail to parse the CID path param", + name: "400 when we fail to parse the CID path param", method: "GET", path: "/ipfs/bafyfoo", headers: map[string]string{"Accept": "application/vnd.ipld.car"}, - wantStatus: http.StatusInternalServerError, - wantBody: "failed to parse CID path parameter\n", + wantStatus: http.StatusBadRequest, + wantBody: "failed to parse root CID\n", }, { name: "400 on invalid dag-scope query parameter", @@ -150,7 +151,7 @@ func TestIpfsHandler(t *testing.T) { fetchFunc: func(ctx context.Context, r types.RetrievalRequest, cb func(types.RetrievalEvent)) (*types.RetrievalStats, error) { return nil, retriever.ErrNoCandidates }, - wantStatus: http.StatusNotFound, + wantStatus: http.StatusBadGateway, wantBody: "no candidates found\n", }, { @@ -170,9 +171,9 @@ func TestIpfsHandler(t *testing.T) { path: "/ipfs/bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4/birb.mp4?dag-scope=entity", headers: map[string]string{"Accept": "application/vnd.ipld.car"}, fetchFunc: func(ctx context.Context, r types.RetrievalRequest, cb func(types.RetrievalEvent)) (*types.RetrievalStats, error) { - require.Equal(t, "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", r.Cid.String()) + require.Equal(t, "bafybeic56z3yccnla3cutmvqsn5zy3g24muupcsjtoyp3pu5pm5amurjx4", r.Root.String()) require.Equal(t, "birb.mp4", r.Path) - require.Equal(t, types.DagScopeEntity, r.Scope) + require.Equal(t, trustlessutils.DagScopeEntity, r.Scope) require.Equal(t, uint64(0), r.MaxBlocks) return &types.RetrievalStats{}, nil }, @@ -264,7 +265,7 @@ func TestIpfsHandler(t *testing.T) { } if tt.wantBody != "" && rr.Body.String() != tt.wantBody { - t.Errorf("handler returned unexpected body: got %v want %v", + t.Errorf("handler returned unexpected body: got [%v] want [%v]", rr.Body.String(), tt.wantBody) } }) diff --git a/pkg/server/http/util.go b/pkg/server/http/util.go deleted file mode 100644 index 93d033b0..00000000 --- a/pkg/server/http/util.go +++ /dev/null @@ -1,165 +0,0 @@ -package httpserver - -import ( - "errors" - "fmt" - "net/http" - "path/filepath" - "strings" - - "github.com/filecoin-project/lassie/pkg/types" -) - -// ParseScope returns the dag-scope query parameter or an error if the dag-scope -// parameter is not one of the supported values. -func ParseScope(req *http.Request) (types.DagScope, error) { - if req.URL.Query().Has("dag-scope") { - if ds, err := types.ParseDagScope(req.URL.Query().Get("dag-scope")); err != nil { - return ds, errors.New("invalid dag-scope parameter") - } else { - return ds, nil - } - } - // check for legacy param name -- to do -- delete once we confirm this isn't used any more - if req.URL.Query().Has("car-scope") { - switch req.URL.Query().Get("car-scope") { - case "all": - return types.DagScopeAll, nil - case "file": - return types.DagScopeEntity, nil - case "block": - return types.DagScopeBlock, nil - default: - return types.DagScopeAll, errors.New("invalid car-scope parameter") - } - } - return types.DagScopeAll, nil -} - -// ParseByteRange returns the entity-bytes query parameter if one is set in the -// query string or nil if one is not set. An error is returned if an -// entity-bytes query string is not a valid byte range. -func ParseByteRange(req *http.Request) (*types.ByteRange, error) { - if req.URL.Query().Has("entity-bytes") { - br, err := types.ParseByteRange(req.URL.Query().Get("entity-bytes")) - if err != nil { - return nil, errors.New("invalid entity-bytes parameter") - } - return &br, nil - } - return nil, nil -} - -// ParseFilename returns the filename query parameter or an error if the filename -// extension is not ".car". Lassie only supports returning CAR data. -// See https://specs.ipfs.tech/http-gateways/path-gateway/#filename-request-query-parameter -func ParseFilename(req *http.Request) (string, error) { - // check if provided filename query parameter has .car extension - if req.URL.Query().Has("filename") { - filename := req.URL.Query().Get("filename") - ext := filepath.Ext(filename) - if ext == "" { - return "", errors.New("filename missing extension") - } - if ext != FilenameExtCar { - return "", fmt.Errorf("filename uses non-supported extension %s", ext) - } - return filename, nil - } - return "", nil -} - -// CheckFormat validates that the data being requested is of the type CAR. -// We do this validation because the http gateway path spec allows for additional -// response formats that Lassie does not currently support, so we throw an error in -// the cases where the request is requesting one of Lassie's unsupported response -// formats. Lassie only supports returning CAR data. -// -// The spec outlines that the requesting format can be provided -// via the Accept header or the format query parameter. -// -// Lassie only allows the application/vnd.ipld.car Accept header -// https://specs.ipfs.tech/http-gateways/path-gateway/#accept-request-header -// -// Lassie only allows the "car" format query parameter -// https://specs.ipfs.tech/http-gateways/path-gateway/#format-request-query-parameter -func CheckFormat(req *http.Request) (bool, error) { - hasAccept := req.Header.Get("Accept") != "" - // check if Accept header includes application/vnd.ipld.car - validAccept, includeDupes := ParseAccept(req.Header.Get("Accept")) - if hasAccept && !validAccept { - return false, fmt.Errorf("no acceptable content type") - } - - // check if format is "car" - hasFormat := req.URL.Query().Has("format") - if hasFormat && req.URL.Query().Get("format") != FormatParameterCar { - return false, fmt.Errorf("requested non-supported format %s", req.URL.Query().Get("format")) - } - - // if neither are provided return - // one of them has to be given with a CAR type since we only return CAR data - if !validAccept && !hasFormat { - return false, fmt.Errorf("neither a valid accept header or format parameter were provided") - } - - return includeDupes, nil -} - -// ParseAccept validates that the request Accept header is of the type CAR and -// returns whether or not duplicate blocks are allowed in the response via -// IPIP-412: https://github.com/ipfs/specs/pull/412. -func ParseAccept(acceptHeader string) (validAccept bool, includeDupes bool) { - acceptTypes := strings.Split(acceptHeader, ",") - validAccept = false - includeDupes = DefaultIncludeDupes - for _, acceptType := range acceptTypes { - typeParts := strings.Split(acceptType, ";") - if typeParts[0] == "*/*" || typeParts[0] == "application/*" || typeParts[0] == MimeTypeCar { - validAccept = true - if typeParts[0] == MimeTypeCar { - // parse additional car attributes outlined in IPIP-412: https://github.com/ipfs/specs/pull/412 - for _, nextPart := range typeParts[1:] { - pair := strings.Split(nextPart, "=") - if len(pair) == 2 { - attr := strings.TrimSpace(pair[0]) - value := strings.TrimSpace(pair[1]) - switch attr { - case "dups": - switch value { - case "y": - includeDupes = true - case "n": - includeDupes = false - default: - // don't accept unexpected values - validAccept = false - } - case "version": - switch value { - case MimeTypeCarVersion: - default: - validAccept = false - } - case "order": - switch value { - case "dfs": - case "unk": - default: - // we only do dfs, which also satisfies unk, future extensions are not yet supported - validAccept = false - } - default: - // ignore others - } - } - } - } - // only break if further validation didn't fail - if validAccept { - break - } - } - } - return -} diff --git a/pkg/storage/cachingtempstore_test.go b/pkg/storage/cachingtempstore_test.go index f3bb7fa3..993c4221 100644 --- a/pkg/storage/cachingtempstore_test.go +++ b/pkg/storage/cachingtempstore_test.go @@ -4,14 +4,21 @@ import ( "bytes" "context" "io" + "math/rand" + "sync" "testing" "time" "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/storage/deferred" + mh "github.com/multiformats/go-multihash" "github.com/stretchr/testify/require" ) +var rng = rand.New(rand.NewSource(3333)) +var rngLk sync.Mutex + func TestDeferredCarWriterWritesCARv1(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() @@ -48,7 +55,7 @@ func TestDeferredCarWriterWritesCARv1(t *testing.T) { testCid2, testData2 := randBlock() var buf bytes.Buffer - cw := NewDeferredCarWriterForStream(testCid1, &buf) + cw := deferred.NewDeferredCarWriterForStream(&buf, []cid.Cid{testCid1}) ss := NewCachingTempStore(cw.BlockWriteOpener(), NewDeferredStorageCar("", testCid1)) t.Cleanup(func() { ss.Close() }) @@ -155,3 +162,20 @@ func TestDeferredCarWriterWritesCARv1(t *testing.T) { }) } } + +func randBlock() (cid.Cid, []byte) { + data := make([]byte, 1024) + rngLk.Lock() + rng.Read(data) + rngLk.Unlock() + h, err := mh.Sum(data, mh.SHA2_512, -1) + if err != nil { + panic(err) + } + return cid.NewCidV1(cid.Raw, h), data +} + +func randCid() cid.Cid { + c, _ := randBlock() + return c +} diff --git a/pkg/storage/deferredcarwriter.go b/pkg/storage/deferredcarwriter.go deleted file mode 100644 index 276b3ef1..00000000 --- a/pkg/storage/deferredcarwriter.go +++ /dev/null @@ -1,160 +0,0 @@ -package storage - -import ( - "context" - "io" - "os" - "sync" - - "github.com/ipfs/go-cid" - carv2 "github.com/ipld/go-car/v2" - carstorage "github.com/ipld/go-car/v2/storage" - "github.com/ipld/go-ipld-prime" - "github.com/ipld/go-ipld-prime/linking" - ipldstorage "github.com/ipld/go-ipld-prime/storage" -) - -type putCb struct { - cb func(int) - once bool -} - -var _ ipldstorage.WritableStorage = (*DeferredCarWriter)(nil) -var _ io.Closer = (*DeferredCarWriter)(nil) - -type DeferredWriter interface { - ipldstorage.WritableStorage - io.Closer - BlockWriteOpener() linking.BlockWriteOpener - OnPut(cb func(int), once bool) -} - -// DeferredCarWriter creates a write-only CARv1 either to an existing stream or -// to a file designated by a supplied path. CARv1 content (including header) -// only begins when the first Put() operation is performed. If the output is a -// file, it will be created when the first Put() operation is performed. -// DeferredCarWriter is threadsafe, and can be used concurrently. -// Closing the writer will close, but not delete, the underlying file. This -// writer is intended for constructing the final output CARv1 for the user. -type DeferredCarWriter struct { - root cid.Cid - outPath string - outStream io.Writer - - lk sync.Mutex - f *os.File - w carstorage.WritableCar - putCb []putCb - opts []carv2.Option -} - -// NewDeferredCarWriterForPath creates a DeferredCarWriter that will write to a -// file designated by the supplied path. The file will only be created on the -// first Put() operation. -func NewDeferredCarWriterForPath(root cid.Cid, outPath string, opts ...carv2.Option) *DeferredCarWriter { - return &DeferredCarWriter{root: root, outPath: outPath, opts: opts} -} - -// NewDeferredCarWriterForStream creates a DeferredCarWriter that will write to -// the supplied stream. The stream will only be written to on the first Put() -// operation. -func NewDeferredCarWriterForStream(root cid.Cid, outStream io.Writer, opts ...carv2.Option) *DeferredCarWriter { - return &DeferredCarWriter{root: root, outStream: outStream, opts: opts} -} - -// OnPut will call a callback when each Put() operation is started. The argument -// to the callback is the number of bytes being written. If once is true, the -// callback will be removed after the first call. -func (dcw *DeferredCarWriter) OnPut(cb func(int), once bool) { - if dcw.putCb == nil { - dcw.putCb = make([]putCb, 0) - } - dcw.putCb = append(dcw.putCb, putCb{cb: cb, once: once}) -} - -// Has returns false if the key was not already written to the CARv1 output. -func (dcw *DeferredCarWriter) Has(ctx context.Context, key string) (bool, error) { - dcw.lk.Lock() - defer dcw.lk.Unlock() - - if dcw.w == nil { // shortcut, haven't written anything, don't even initialise - return false, nil - } - - writer, err := dcw.writer() - if err != nil { - return false, err - } - - return writer.Has(ctx, key) -} - -// Put writes the given content to the CARv1 output stream, creating it if it -// doesn't exist yet. -func (dcw *DeferredCarWriter) Put(ctx context.Context, key string, content []byte) error { - dcw.lk.Lock() - defer dcw.lk.Unlock() - - if dcw.putCb != nil { - // call all callbacks, remove those that were only needed once - for i := 0; i < len(dcw.putCb); i++ { - cb := dcw.putCb[i] - cb.cb(len(content)) - if cb.once { - dcw.putCb = append(dcw.putCb[:i], dcw.putCb[i+1:]...) - i-- - } - } - } - - // first Put() call, initialise writer, which will write a CARv1 header - writer, err := dcw.writer() - if err != nil { - return err - } - - return writer.Put(ctx, key, content) -} - -// writer() -func (dcw *DeferredCarWriter) writer() (carstorage.WritableCar, error) { - if dcw.w == nil { - outStream := dcw.outStream - if outStream == nil { - openedFile, err := os.OpenFile(dcw.outPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) - if err != nil { - return nil, err - } - dcw.f = openedFile - outStream = openedFile - } - w, err := carstorage.NewWritable(outStream, []cid.Cid{dcw.root}, append([]carv2.Option{carv2.WriteAsCarV1(true)}, dcw.opts...)...) - if err != nil { - return nil, err - } - dcw.w = w - } - return dcw.w, nil -} - -// Close closes the underlying file, if one was created. -func (dcw *DeferredCarWriter) Close() error { - dcw.lk.Lock() - defer dcw.lk.Unlock() - - if dcw.f != nil { - defer func() { dcw.f = nil }() - return dcw.f.Close() - } - return nil -} - -// BlockWriteOpener returns a BlockWriteOpener that operates on this storage. -func (dcw *DeferredCarWriter) BlockWriteOpener() linking.BlockWriteOpener { - return func(lctx linking.LinkContext) (io.Writer, linking.BlockWriteCommitter, error) { - wr, wrcommit, err := ipldstorage.PutStream(lctx.Ctx, dcw) - return wr, func(lnk ipld.Link) error { - return wrcommit(lnk.Binary()) - }, err - } -} diff --git a/pkg/storage/deferredcarwriter_test.go b/pkg/storage/deferredcarwriter_test.go deleted file mode 100644 index 486177c6..00000000 --- a/pkg/storage/deferredcarwriter_test.go +++ /dev/null @@ -1,224 +0,0 @@ -package storage - -import ( - "bytes" - "context" - "io" - "math/rand" - "os" - "sync" - "testing" - - "github.com/ipfs/go-cid" - carv2 "github.com/ipld/go-car/v2" - mh "github.com/multiformats/go-multihash" - "github.com/stretchr/testify/require" -) - -var rng = rand.New(rand.NewSource(3333)) -var rngLk sync.Mutex - -func TestDeferredCarWriterForPath(t *testing.T) { - ctx := context.Background() - testCid1, testData1 := randBlock() - testCid2, testData2 := randBlock() - - tmpFile := t.TempDir() + "/test.car" - - cw := NewDeferredCarWriterForPath(testCid1, tmpFile) - - _, err := os.Stat(tmpFile) - require.True(t, os.IsNotExist(err)) - - require.NoError(t, cw.Put(ctx, testCid1.KeyString(), testData1)) - require.NoError(t, cw.Put(ctx, testCid2.KeyString(), testData2)) - - stat, err := os.Stat(tmpFile) - require.NoError(t, err) - require.True(t, stat.Size() > int64(len(testData1)+len(testData2))) - - require.NoError(t, cw.Close()) - - // shouldn't be deleted - _, err = os.Stat(tmpFile) - require.NoError(t, err) - - r, err := os.Open(tmpFile) - require.NoError(t, err) - t.Cleanup(func() { r.Close() }) - carv2, err := carv2.NewBlockReader(r) - require.NoError(t, err) - - // compare CAR contents to what we wrote - require.Equal(t, carv2.Roots, []cid.Cid{testCid1}) - require.Equal(t, carv2.Version, uint64(1)) - - blk, err := carv2.Next() - require.NoError(t, err) - require.Equal(t, blk.Cid(), testCid1) - require.Equal(t, blk.RawData(), testData1) - - blk, err = carv2.Next() - require.NoError(t, err) - require.Equal(t, blk.Cid(), testCid2) - require.Equal(t, blk.RawData(), testData2) - - _, err = carv2.Next() - require.ErrorIs(t, err, io.EOF) -} - -func TestDeferredCarWriterForStream(t *testing.T) { - for _, tc := range []string{"path", "stream"} { - tc := tc - t.Run(tc, func(t *testing.T) { - t.Parallel() - ctx := context.Background() - testCid1, testData1 := randBlock() - testCid2, testData2 := randBlock() - testCid3, _ := randBlock() - - var cw *DeferredCarWriter - var buf bytes.Buffer - tmpFile := t.TempDir() + "/test.car" - - if tc == "path" { - cw = NewDeferredCarWriterForPath(testCid1, tmpFile) - _, err := os.Stat(tmpFile) - require.True(t, os.IsNotExist(err)) - } else { - cw = NewDeferredCarWriterForStream(testCid1, &buf) - require.Equal(t, buf.Len(), 0) - } - - has, err := cw.Has(ctx, testCid3.KeyString()) - require.NoError(t, err) - require.False(t, has) - - require.NoError(t, cw.Put(ctx, testCid1.KeyString(), testData1)) - has, err = cw.Has(ctx, testCid1.KeyString()) - require.NoError(t, err) - require.True(t, has) - require.NoError(t, cw.Put(ctx, testCid2.KeyString(), testData2)) - has, err = cw.Has(ctx, testCid1.KeyString()) - require.NoError(t, err) - require.True(t, has) - has, err = cw.Has(ctx, testCid2.KeyString()) - require.NoError(t, err) - require.True(t, has) - has, err = cw.Has(ctx, testCid3.KeyString()) - require.NoError(t, err) - require.False(t, has) - - if tc == "path" { - stat, err := os.Stat(tmpFile) - require.NoError(t, err) - require.True(t, stat.Size() > int64(len(testData1)+len(testData2))) - } else { - require.True(t, buf.Len() > len(testData1)+len(testData2)) - } - - require.NoError(t, cw.Close()) - - var rdr *carv2.BlockReader - if tc == "path" { - r, err := os.Open(tmpFile) - require.NoError(t, err) - rdr, err = carv2.NewBlockReader(r) - require.NoError(t, err) - t.Cleanup(func() { r.Close() }) - } else { - rdr, err = carv2.NewBlockReader(&buf) - require.NoError(t, err) - } - - // compare CAR contents to what we wrote - require.Equal(t, rdr.Roots, []cid.Cid{testCid1}) - require.Equal(t, rdr.Version, uint64(1)) - - blk, err := rdr.Next() - require.NoError(t, err) - require.Equal(t, blk.Cid(), testCid1) - require.Equal(t, blk.RawData(), testData1) - - blk, err = rdr.Next() - require.NoError(t, err) - require.Equal(t, blk.Cid(), testCid2) - require.Equal(t, blk.RawData(), testData2) - - _, err = rdr.Next() - require.ErrorIs(t, err, io.EOF) - }) - } -} - -func TestDeferredCarWriterPutCb(t *testing.T) { - ctx := context.Background() - testCid1, testData1 := randBlock() - testCid2, testData2 := randBlock() - - var buf bytes.Buffer - cw := NewDeferredCarWriterForStream(testCid1, &buf) - - var pc1 int - cw.OnPut(func(ii int) { - switch pc1 { - case 0: - require.Equal(t, buf.Len(), 0) // called before first write - require.Equal(t, len(testData1), ii) - case 1: - require.Equal(t, len(testData2), ii) - default: - require.Fail(t, "unexpected put callback") - } - pc1++ - }, false) - var pc2 int - cw.OnPut(func(ii int) { - switch pc2 { - case 0: - require.Equal(t, buf.Len(), 0) // called before first write - require.Equal(t, len(testData1), ii) - case 1: - require.Equal(t, len(testData2), ii) - default: - require.Fail(t, "unexpected put callback") - } - pc2++ - }, false) - var pc3 int - cw.OnPut(func(ii int) { - switch pc3 { - case 0: - require.Equal(t, buf.Len(), 0) // called before first write - require.Equal(t, len(testData1), ii) - default: - require.Fail(t, "unexpected put callback") - } - pc3++ - }, true) - - require.NoError(t, cw.Put(ctx, testCid1.KeyString(), testData1)) - require.NoError(t, cw.Put(ctx, testCid2.KeyString(), testData2)) - require.NoError(t, cw.Close()) - - require.Equal(t, 2, pc1) - require.Equal(t, 2, pc2) - require.Equal(t, 1, pc3) -} - -func randBlock() (cid.Cid, []byte) { - data := make([]byte, 1024) - rngLk.Lock() - rng.Read(data) - rngLk.Unlock() - h, err := mh.Sum(data, mh.SHA2_512, -1) - if err != nil { - panic(err) - } - return cid.NewCidV1(cid.Raw, h), data -} - -func randCid() cid.Cid { - c, _ := randBlock() - return c -} diff --git a/pkg/storage/duplicateaddercar.go b/pkg/storage/duplicateaddercar.go index f5b6a164..07b6f3ae 100644 --- a/pkg/storage/duplicateaddercar.go +++ b/pkg/storage/duplicateaddercar.go @@ -8,23 +8,34 @@ import ( "io" "sync" - "github.com/filecoin-project/lassie/pkg/types" - "github.com/filecoin-project/lassie/pkg/verifiedcar" blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/storage/deferred" "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + ipldstorage "github.com/ipld/go-ipld-prime/storage" + trustlessutils "github.com/ipld/go-trustless-utils" + "github.com/ipld/go-trustless-utils/traversal" ) +type DeferredWriter interface { + ipldstorage.WritableStorage + io.Closer + BlockWriteOpener() linking.BlockWriteOpener + OnPut(cb func(int), once bool) +} + +var _ DeferredWriter = (*DuplicateAdderCar)(nil) + type DuplicateAdderCar struct { - *DeferredCarWriter + *deferred.DeferredCarWriter ctx context.Context root cid.Cid path string - scope types.DagScope - bytes *types.ByteRange + scope trustlessutils.DagScope + bytes *trustlessutils.ByteRange store *DeferredStorageCar blockStream *blockStream streamCompletion chan error @@ -33,12 +44,12 @@ type DuplicateAdderCar struct { func NewDuplicateAdderCarForStream( ctx context.Context, + outStream io.Writer, root cid.Cid, path string, - scope types.DagScope, - bytes *types.ByteRange, + scope trustlessutils.DagScope, + bytes *trustlessutils.ByteRange, store *DeferredStorageCar, - outStream io.Writer, ) *DuplicateAdderCar { blockStream := &blockStream{ctx: ctx, seen: make(map[cid.Cid]struct{})} @@ -46,7 +57,7 @@ func NewDuplicateAdderCarForStream( blockStream.cond = sync.NewCond(&blockStream.mu) // create the car writer for the final stream - outgoing := NewDeferredCarWriterForStream(root, outStream, carv2.AllowDuplicatePuts(true)) + outgoing := deferred.NewDeferredCarWriterForStream(outStream, []cid.Cid{root}, carv2.AllowDuplicatePuts(true)) return &DuplicateAdderCar{ DeferredCarWriter: outgoing, ctx: ctx, @@ -64,10 +75,10 @@ func (da *DuplicateAdderCar) addDupes() { defer func() { da.streamCompletion <- err }() - sel := types.PathScopeSelector(da.path, da.scope, da.bytes) + sel := trustlessutils.Request{Path: da.path, Scope: da.scope, Bytes: da.bytes}.Selector() // we're going to do a verified car where we add dupes back in - cfg := verifiedcar.Config{ + cfg := traversal.Config{ Root: da.root, Selector: sel, WriteDuplicatesOut: true, @@ -82,7 +93,7 @@ func (da *DuplicateAdderCar) addDupes() { lsys.TrustedStorage = true // run the verification - _, _, err = cfg.VerifyBlockStream(da.ctx, da.blockStream, lsys) + _, err = cfg.VerifyBlockStream(da.ctx, da.blockStream, lsys) } func (da *DuplicateAdderCar) BlockWriteOpener() linking.BlockWriteOpener { @@ -159,7 +170,7 @@ func (bs *blockStream) WriteBlock(blk blocks.Block) error { return nil } -func (bs *blockStream) Next() (blocks.Block, error) { +func (bs *blockStream) Next(ctx context.Context) (blocks.Block, error) { bs.mu.Lock() defer bs.mu.Unlock() @@ -167,6 +178,8 @@ func (bs *blockStream) Next() (blocks.Block, error) { select { case <-bs.ctx.Done(): return nil, bs.ctx.Err() + case <-ctx.Done(): + return nil, ctx.Err() default: } if e := bs.blockBuffer.Front(); e != nil { diff --git a/pkg/storage/duplicateaddercar_test.go b/pkg/storage/duplicateaddercar_test.go index 29d03261..33c05197 100644 --- a/pkg/storage/duplicateaddercar_test.go +++ b/pkg/storage/duplicateaddercar_test.go @@ -8,13 +8,13 @@ import ( "github.com/filecoin-project/lassie/pkg/internal/testutil" "github.com/filecoin-project/lassie/pkg/storage" - "github.com/filecoin-project/lassie/pkg/types" blocks "github.com/ipfs/go-block-format" unixfs "github.com/ipfs/go-unixfsnode/testutil" carv2 "github.com/ipld/go-car/v2" cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/storage/memstore" selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/stretchr/testify/require" ) @@ -33,7 +33,7 @@ func TestDuplicateAdderCar(t *testing.T) { store := storage.NewDeferredStorageCar("", unixfsFileWithDups.Root) ctx := context.Background() - carWriter := storage.NewDuplicateAdderCarForStream(ctx, unixfsFileWithDups.Root, "", types.DagScopeAll, nil, store, buf) + carWriter := storage.NewDuplicateAdderCarForStream(ctx, buf, unixfsFileWithDups.Root, "", trustlessutils.DagScopeAll, nil, store) cachingTempStore := storage.NewCachingTempStore(carWriter.BlockWriteOpener(), store) // write the root block, containing sharding metadata diff --git a/pkg/types/request.go b/pkg/types/request.go index 8a5654d6..81ac4059 100644 --- a/pkg/types/request.go +++ b/pkg/types/request.go @@ -3,22 +3,15 @@ package types import ( "errors" "fmt" - "math" - "net/url" - "strconv" "strings" - "github.com/cespare/xxhash/v2" "github.com/google/uuid" "github.com/ipfs/go-cid" "github.com/ipfs/go-unixfsnode" "github.com/ipld/go-ipld-prime" - "github.com/ipld/go-ipld-prime/datamodel" cidlink "github.com/ipld/go-ipld-prime/linking/cid" - "github.com/ipld/go-ipld-prime/node/basicnode" ipldstorage "github.com/ipld/go-ipld-prime/storage" - "github.com/ipld/go-ipld-prime/traversal/selector" - "github.com/ipld/go-ipld-prime/traversal/selector/builder" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/libp2p/go-libp2p/core/peer" "github.com/multiformats/go-multicodec" ) @@ -54,12 +47,11 @@ func (id *RetrievalID) UnmarshalText(data []byte) error { // RetrievalRequest describes the parameters of a request. It is intended to be // immutable. type RetrievalRequest struct { + trustlessutils.Request + // RetrievalID is a unique identifier for this request. RetrievalID RetrievalID - // Cid is the root CID to fetch. - Cid cid.Cid - // LinkSystem is the destination for the blocks to fetch, it may be // pre-populated with existing blocks in the DAG, in which case they may // be used to satisfy the request (except in the case of an HTTP retrieval, @@ -70,21 +62,6 @@ type RetrievalRequest struct { // Path and Scope will be used to generate a selector. Selector ipld.Node - // Path is the optional path within the DAG to fetch. - Path string - - // Scope describes the scope of the DAG to fetch. If the Selector parameter - // is not set, Scope and Path will be used to construct a selector. - Scope DagScope - - // Bytes is the optional byte range within the DAG to fetch. If not set - // the default byte range will fetch the entire file. - Bytes *ByteRange - - // Duplicates is a flag that indicates whether duplicate blocks should be - // stored into the LinkSystem where they occur in the traversal. - Duplicates bool - // Protocols is an optional list of protocols to use when fetching the DAG. // If nil, the default protocols will be used. Protocols []multicodec.Code @@ -114,8 +91,8 @@ func NewRequestForPath( store ipldstorage.WritableStorage, cid cid.Cid, path string, - dagScope DagScope, - byteRange *ByteRange, + dagScope trustlessutils.DagScope, + byteRange *trustlessutils.ByteRange, ) (RetrievalRequest, error) { retrievalId, err := NewRetrievalID() @@ -132,106 +109,25 @@ func NewRequestForPath( unixfsnode.AddUnixFSReificationToLinkSystem(&linkSystem) return RetrievalRequest{ + Request: trustlessutils.Request{ + Root: cid, + Path: path, + Scope: dagScope, + Bytes: byteRange, + Duplicates: false, + }, RetrievalID: retrievalId, - Cid: cid, - Path: path, - Scope: dagScope, - Bytes: byteRange, LinkSystem: linkSystem, - Duplicates: false, }, nil } -// PathScopeSelector generates a selector for the given path, scope and byte -// range. Use DefaultByteRange() for the default byte range value if none is -// specified. -func PathScopeSelector(path string, scope DagScope, bytes *ByteRange) ipld.Node { - // Turn the path / scope into a selector - terminal := scope.TerminalSelectorSpec() - if !bytes.IsDefault() { - var to int64 = math.MaxInt64 - if bytes.To != nil { - to = *bytes.To - if to > 0 { - to++ // selector is exclusive, so increment the end - } - } - ssb := builder.NewSelectorSpecBuilder(basicnode.Prototype.Any) - // if we reach a terminal and it's not a file, then we need to fall-back to the default - // selector for the given scope. We do this with a union of the original terminal. - if scope == DagScopeEntity { - // entity is a special case which we can't just union with our matcher because it - // has its own matcher in it which we need to replace with the subset matcher. - terminal = ssb.ExploreInterpretAs("unixfs", - ssb.ExploreUnion( - ssb.MatcherSubset(bytes.From, to), - ssb.ExploreRecursive( - selector.RecursionLimitDepth(1), - ssb.ExploreAll(ssb.ExploreRecursiveEdge()), - ), - ), - ) - } else { - terminal = ssb.ExploreUnion( - ssb.ExploreInterpretAs("unixfs", ssb.MatcherSubset(bytes.From, to)), - terminal, - ) - } - } - return unixfsnode.UnixFSPathSelectorBuilder(path, terminal, false) -} - // GetSelector will safely return a selector for this request. If none has been // set, it will generate one for the path & scope. func (r RetrievalRequest) GetSelector() ipld.Node { if r.Selector != nil { // custom selector return r.Selector } - return PathScopeSelector(r.Path, r.Scope, r.Bytes) -} - -// GetUrlPath returns a URL path and query string valid with the Trusted HTTP -// Gateway spec by combining the Path and the Scope of this request. -// -// If this request uses an explicit Selector rather than a Path, an error will -// be returned. -// -// The returned value includes a URL escaped form of the originally requested -// path. -func (r RetrievalRequest) GetUrlPath() (string, error) { - if r.Selector != nil { - return "", errors.New("RetrievalRequest uses an explicit selector, can't generate a URL path for it") - } - scope := r.Scope - if r.Scope == "" { - scope = DagScopeAll - } - // TODO: remove once relevant endpoints support dag-scope - legacyScope := string(scope) - if legacyScope == string(DagScopeEntity) { - legacyScope = "file" - } - byteRange := "" - if !r.Bytes.IsDefault() { - byteRange = "&entity-bytes=" + r.Bytes.String() - } - path := PathEscape(r.Path) - return fmt.Sprintf("%s?dag-scope=%s&car-scope=%s%s", path, scope, legacyScope, byteRange), nil -} - -func PathEscape(path string) string { - if path == "" { - return path - } - var sb strings.Builder - var ps datamodel.PathSegment - p := datamodel.ParsePath(path) - for p.Len() > 0 { - ps, p = p.Shift() - sb.WriteRune('/') - sb.WriteString(url.PathEscape(ps.String())) - } - return sb.String() + return r.Request.Selector() } // GetDescriptorString returns a URL and query string-style descriptor string @@ -247,9 +143,9 @@ func (r RetrievalRequest) GetDescriptorString() (string, error) { } scope := r.Scope if r.Scope == "" { - scope = DagScopeAll + scope = trustlessutils.DagScopeAll } - path := PathEscape(r.Path) + path := trustlessutils.PathEscape(r.Path) byteRange := "" if !r.Bytes.IsDefault() { byteRange = "&entity-bytes=" + r.Bytes.String() @@ -282,7 +178,7 @@ func (r RetrievalRequest) GetDescriptorString() (string, error) { } providers = "&providers=" + ps } - return fmt.Sprintf("/ipfs/%s%s?dag-scope=%s%s&dups=%s%s%s%s", r.Cid.String(), path, scope, byteRange, dups, blockLimit, protocols, providers), nil + return fmt.Sprintf("/ipfs/%s%s?dag-scope=%s%s&dups=%s%s%s%s", r.Root.String(), path, scope, byteRange, dups, blockLimit, protocols, providers), nil } @@ -306,36 +202,6 @@ func (r RetrievalRequest) GetSupportedProtocols(allSupportedProtocols []multicod return supportedProtocols } -func (r RetrievalRequest) Etag() string { - // similar, but extended form of: - // https://github.com/ipfs/boxo/blob/a91e44dbdbd4c36a5b25a1b9df6ee237aa4442d2/gateway/handler_car.go#L167-L184 - sb := strings.Builder{} - sb.WriteString("/ipfs/") - sb.WriteString(r.Cid.String()) - if r.Path != "" { - sb.WriteRune('/') - sb.WriteString(datamodel.ParsePath(r.Path).String()) - } - if r.Scope != DagScopeAll { - sb.WriteRune('.') - sb.WriteString(string(r.Scope)) - } - if !r.Bytes.IsDefault() { - sb.WriteRune('.') - sb.WriteString(strconv.FormatInt(r.Bytes.From, 10)) - if r.Bytes.To != nil { - sb.WriteRune('.') - sb.WriteString(strconv.FormatInt(*r.Bytes.To, 10)) - } - } - if r.Duplicates { - sb.WriteString(".dups") - } - sb.WriteString(".dfs") - suffix := strconv.FormatUint(xxhash.Sum64([]byte(sb.String())), 32) - return `"` + r.Cid.String() + ".car." + suffix + `"` -} - func (r RetrievalRequest) HasPreloadLinkSystem() bool { return r.PreloadLinkSystem.StorageReadOpener != nil && r.PreloadLinkSystem.StorageWriteOpener != nil } diff --git a/pkg/types/request_test.go b/pkg/types/request_test.go index 0d6a97c7..dcee276e 100644 --- a/pkg/types/request_test.go +++ b/pkg/types/request_test.go @@ -1,11 +1,11 @@ package types_test import ( - "fmt" "testing" "github.com/filecoin-project/lassie/pkg/types" "github.com/ipfs/go-cid" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/multiformats/go-multicodec" "github.com/stretchr/testify/require" ) @@ -13,175 +13,9 @@ import ( var testCidV1 = cid.MustParse("bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi") var testCidV0 = cid.MustParse("QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK") -func TestEtag(t *testing.T) { - // To generate independent fixtures using Node.js, `npm install xxhash` then - // in a REPL: - // - // xx = (s) => require('xxhash').hash64(Buffer.from(s), 0).readBigUInt64LE(0).toString(32) - // - // then generate the suffix with the expected construction: - // - // xx('/ipfs/QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.dfs') - - testCases := []struct { - cid cid.Cid - path string - scope types.DagScope - bytes *types.ByteRange - dups bool - expected string - }{ - { - cid: testCidV0, - scope: types.DagScopeAll, - expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.58mf8vcmd2eo8"`, - }, - { - cid: testCidV0, - scope: types.DagScopeEntity, - expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.3t6g88g8u04i6"`, - }, - { - cid: testCidV0, - scope: types.DagScopeBlock, - expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.1fe71ua3km0b5"`, - }, - { - cid: testCidV0, - scope: types.DagScopeAll, - dups: true, - expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.4mglp6etuagob"`, - }, - { - cid: testCidV0, - scope: types.DagScopeEntity, - dups: true, - expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.fqhsp0g4l66m1"`, - }, - { - cid: testCidV0, - scope: types.DagScopeBlock, - dups: true, - expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.8u1ga109k62pp"`, - }, - { - cid: testCidV1, - scope: types.DagScopeAll, - path: "/some/path/to/thing", - expected: `"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi.car.8q5lna3r43lgj"`, - }, - { - cid: testCidV1, - scope: types.DagScopeEntity, - path: "/some/path/to/thing", - expected: `"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi.car.e4hni8qqgeove"`, - }, - { - cid: testCidV1, - scope: types.DagScopeBlock, - path: "/some/path/to/thing", - expected: `"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi.car.7pdc786smhd1n"`, - }, - { - cid: testCidV1, - scope: types.DagScopeAll, - path: "/some/path/to/thing", - dups: true, - expected: `"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi.car.bdfv1q76a1oem"`, - }, - { - cid: testCidV1, - scope: types.DagScopeEntity, - path: "/some/path/to/thing", - dups: true, - expected: `"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi.car.790m13mh0recp"`, - }, - { - cid: testCidV1, - scope: types.DagScopeBlock, - path: "/some/path/to/thing", - dups: true, - expected: `"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi.car.972jmjvd3o3"`, - }, - // path variations should be normalised - { - cid: testCidV1, - scope: types.DagScopeAll, - path: "some/path/to/thing", - expected: `"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi.car.8q5lna3r43lgj"`, - }, - { - cid: testCidV1, - scope: types.DagScopeAll, - path: "///some//path//to/thing/", - expected: `"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi.car.8q5lna3r43lgj"`, - }, - { - cid: cid.MustParse("bafyrgqhai26anf3i7pips7q22coa4sz2fr4gk4q4sqdtymvvjyginfzaqewveaeqdh524nsktaq43j65v22xxrybrtertmcfxufdam3da3hbk"), - scope: types.DagScopeAll, - expected: `"bafyrgqhai26anf3i7pips7q22coa4sz2fr4gk4q4sqdtymvvjyginfzaqewveaeqdh524nsktaq43j65v22xxrybrtertmcfxufdam3da3hbk.car.9lumqv26cg30t"`, - }, - { - cid: cid.MustParse("QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK"), - scope: types.DagScopeAll, - bytes: &types.ByteRange{From: 0}, // default, not included - expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.58mf8vcmd2eo8"`, - }, - { - cid: cid.MustParse("QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK"), - scope: types.DagScopeAll, - bytes: &types.ByteRange{From: 10}, - expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.560ditjelh0u2"`, - }, - { - cid: cid.MustParse("QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK"), - scope: types.DagScopeAll, - bytes: &types.ByteRange{From: 0, To: ptr(200)}, - expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.faqf14andvfmb"`, - }, - { - cid: cid.MustParse("QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK"), - scope: types.DagScopeAll, - bytes: &types.ByteRange{From: 100, To: ptr(200)}, - expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.bvebrb14stt94"`, - }, - { - cid: cid.MustParse("QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK"), - scope: types.DagScopeEntity, - bytes: &types.ByteRange{From: 100, To: ptr(200)}, - expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.bq3u6t9t877t3"`, - }, - { - cid: cid.MustParse("QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK"), - scope: types.DagScopeEntity, - dups: true, - bytes: &types.ByteRange{From: 100, To: ptr(200)}, - expected: `"QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK.car.fhf498an52uqb"`, - }, - } - - for _, tc := range testCases { - br := "" - if tc.bytes != nil { - br = ":" + tc.bytes.String() - } - t.Run(fmt.Sprintf("%s:%s:%s:%v%s", tc.cid.String(), tc.path, tc.scope, tc.dups, br), func(t *testing.T) { - rr := types.RetrievalRequest{ - Cid: tc.cid, - Path: tc.path, - Scope: tc.scope, - Bytes: tc.bytes, - Duplicates: tc.dups, - } - actual := rr.Etag() - if actual != tc.expected { - t.Errorf("expected %s, got %s", tc.expected, actual) - } - }) - } -} - func TestRequestStringRepresentations(t *testing.T) { + // some of the parts of this test are duplicated in go-trustless-utils/tyeps_test.go + testCases := []struct { name string request types.RetrievalRequest @@ -191,139 +25,140 @@ func TestRequestStringRepresentations(t *testing.T) { { name: "plain", request: types.RetrievalRequest{ - Cid: testCidV1, + Request: trustlessutils.Request{Root: testCidV1}, }, - expectedUrlPath: "?dag-scope=all&car-scope=all", + expectedUrlPath: "?dag-scope=all", expectedDescriptor: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi?dag-scope=all&dups=n", }, { name: "path", request: types.RetrievalRequest{ - Cid: testCidV1, - Path: "/some/path/to/thing", + Request: trustlessutils.Request{Root: testCidV1, Path: "/some/path/to/thing"}, }, - expectedUrlPath: "/some/path/to/thing?dag-scope=all&car-scope=all", + expectedUrlPath: "/some/path/to/thing?dag-scope=all", expectedDescriptor: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi/some/path/to/thing?dag-scope=all&dups=n", }, { name: "escaped path", request: types.RetrievalRequest{ - Cid: testCidV1, - Path: "/?/#/;/&/ /!", + Request: trustlessutils.Request{Root: testCidV1, Path: "/?/#/;/&/ /!"}, }, - expectedUrlPath: "/%3F/%23/%3B/&/%20/%21?dag-scope=all&car-scope=all", + expectedUrlPath: "/%3F/%23/%3B/&/%20/%21?dag-scope=all", expectedDescriptor: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi/%3F/%23/%3B/&/%20/%21?dag-scope=all&dups=n", }, { name: "entity", request: types.RetrievalRequest{ - Cid: testCidV1, - Scope: types.DagScopeEntity, + Request: trustlessutils.Request{Root: testCidV1, Scope: trustlessutils.DagScopeEntity}, }, - expectedUrlPath: "?dag-scope=entity&car-scope=file", + expectedUrlPath: "?dag-scope=entity", expectedDescriptor: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi?dag-scope=entity&dups=n", }, { name: "block", request: types.RetrievalRequest{ - Cid: testCidV1, - Scope: types.DagScopeBlock, + Request: trustlessutils.Request{Root: testCidV1, Scope: trustlessutils.DagScopeBlock}, }, - expectedUrlPath: "?dag-scope=block&car-scope=block", + expectedUrlPath: "?dag-scope=block", expectedDescriptor: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi?dag-scope=block&dups=n", }, { name: "protocol", request: types.RetrievalRequest{ - Cid: testCidV0, + Request: trustlessutils.Request{Root: testCidV0}, Protocols: []multicodec.Code{multicodec.TransportGraphsyncFilecoinv1}, }, - expectedUrlPath: "?dag-scope=all&car-scope=all", + expectedUrlPath: "?dag-scope=all", expectedDescriptor: "/ipfs/QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK?dag-scope=all&dups=n&protocols=transport-graphsync-filecoinv1", }, { name: "protocols", request: types.RetrievalRequest{ - Cid: testCidV1, + Request: trustlessutils.Request{Root: testCidV1}, Protocols: []multicodec.Code{multicodec.TransportBitswap, multicodec.TransportIpfsGatewayHttp}, }, - expectedUrlPath: "?dag-scope=all&car-scope=all", + expectedUrlPath: "?dag-scope=all", expectedDescriptor: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi?dag-scope=all&dups=n&protocols=transport-bitswap,transport-ipfs-gateway-http", }, { name: "duplicates", request: types.RetrievalRequest{ - Cid: testCidV0, - Duplicates: true, + Request: trustlessutils.Request{Root: testCidV0, Duplicates: true}, }, - expectedUrlPath: "?dag-scope=all&car-scope=all", + expectedUrlPath: "?dag-scope=all", expectedDescriptor: "/ipfs/QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK?dag-scope=all&dups=y", }, { name: "block limit", request: types.RetrievalRequest{ - Cid: testCidV1, + Request: trustlessutils.Request{Root: testCidV1}, MaxBlocks: 100, }, - expectedUrlPath: "?dag-scope=all&car-scope=all", + expectedUrlPath: "?dag-scope=all", expectedDescriptor: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi?dag-scope=all&dups=n&blockLimit=100", }, { name: "fixed peer", request: types.RetrievalRequest{ - Cid: testCidV1, + Request: trustlessutils.Request{Root: testCidV1}, FixedPeers: must(types.ParseProviderStrings("/ip4/127.0.0.1/tcp/5000/p2p/12D3KooWBSTEYMLSu5FnQjshEVah9LFGEZoQt26eacCEVYfedWA4")), }, - expectedUrlPath: "?dag-scope=all&car-scope=all", + expectedUrlPath: "?dag-scope=all", expectedDescriptor: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi?dag-scope=all&dups=n&providers=/ip4/127.0.0.1/tcp/5000/p2p/12D3KooWBSTEYMLSu5FnQjshEVah9LFGEZoQt26eacCEVYfedWA4", }, { name: "fixed peers", request: types.RetrievalRequest{ - Cid: testCidV1, + Request: trustlessutils.Request{Root: testCidV1}, FixedPeers: must(types.ParseProviderStrings("/dns/beep.boop.com/tcp/3747/p2p/12D3KooWDXAVxjSTKbHKpNk8mFVQzHdBDvR4kybu582Xd4Zrvagg,/ip4/127.0.0.1/tcp/5000/p2p/12D3KooWBSTEYMLSu5FnQjshEVah9LFGEZoQt26eacCEVYfedWA4")), }, - expectedUrlPath: "?dag-scope=all&car-scope=all", + expectedUrlPath: "?dag-scope=all", expectedDescriptor: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi?dag-scope=all&dups=n&providers=/dns/beep.boop.com/tcp/3747/p2p/12D3KooWDXAVxjSTKbHKpNk8mFVQzHdBDvR4kybu582Xd4Zrvagg,/ip4/127.0.0.1/tcp/5000/p2p/12D3KooWBSTEYMLSu5FnQjshEVah9LFGEZoQt26eacCEVYfedWA4", }, { name: "byte range", request: types.RetrievalRequest{ - Cid: testCidV1, - Bytes: &types.ByteRange{From: 100, To: ptr(200)}, + Request: trustlessutils.Request{ + Root: testCidV1, + Bytes: &trustlessutils.ByteRange{From: 100, To: ptr(200)}, + }, }, - expectedUrlPath: "?dag-scope=all&car-scope=all&entity-bytes=100:200", + expectedUrlPath: "?dag-scope=all&entity-bytes=100:200", expectedDescriptor: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi?dag-scope=all&entity-bytes=100:200&dups=n", }, { name: "byte range -ve", request: types.RetrievalRequest{ - Cid: testCidV1, - Bytes: &types.ByteRange{From: -100}, + Request: trustlessutils.Request{ + Root: testCidV1, + Bytes: &trustlessutils.ByteRange{From: -100}, + }, }, - expectedUrlPath: "?dag-scope=all&car-scope=all&entity-bytes=-100:*", + expectedUrlPath: "?dag-scope=all&entity-bytes=-100:*", expectedDescriptor: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi?dag-scope=all&entity-bytes=-100:*&dups=n", }, { name: "all the things", request: types.RetrievalRequest{ - Cid: testCidV0, - Path: "/some/path/to/thing", - Scope: types.DagScopeEntity, - Duplicates: true, - Bytes: &types.ByteRange{From: 100, To: ptr(-200)}, + Request: trustlessutils.Request{ + Root: testCidV0, + Path: "/some/path/to/thing", + Scope: trustlessutils.DagScopeEntity, + Duplicates: true, + Bytes: &trustlessutils.ByteRange{From: 100, To: ptr(-200)}, + }, MaxBlocks: 222, Protocols: []multicodec.Code{multicodec.TransportBitswap, multicodec.TransportIpfsGatewayHttp}, FixedPeers: must(types.ParseProviderStrings("/dns/beep.boop.com/tcp/3747/p2p/12D3KooWDXAVxjSTKbHKpNk8mFVQzHdBDvR4kybu582Xd4Zrvagg,/ip4/127.0.0.1/tcp/5000/p2p/12D3KooWBSTEYMLSu5FnQjshEVah9LFGEZoQt26eacCEVYfedWA4")), }, - expectedUrlPath: "/some/path/to/thing?dag-scope=entity&car-scope=file&entity-bytes=100:-200", + expectedUrlPath: "/some/path/to/thing?dag-scope=entity&entity-bytes=100:-200", expectedDescriptor: "/ipfs/QmVXsSVjwxMsCwKRCUxEkGb4f4B98gXVy3ih3v4otvcURK/some/path/to/thing?dag-scope=entity&entity-bytes=100:-200&dups=y&blockLimit=222&protocols=transport-bitswap,transport-ipfs-gateway-http&providers=/dns/beep.boop.com/tcp/3747/p2p/12D3KooWDXAVxjSTKbHKpNk8mFVQzHdBDvR4kybu582Xd4Zrvagg,/ip4/127.0.0.1/tcp/5000/p2p/12D3KooWBSTEYMLSu5FnQjshEVah9LFGEZoQt26eacCEVYfedWA4", }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - actual, err := tc.request.GetUrlPath() + actual, err := tc.request.Request.UrlPath() require.NoError(t, err) require.Equal(t, tc.expectedUrlPath, actual) actual, err = tc.request.GetDescriptorString() diff --git a/pkg/types/types.go b/pkg/types/types.go index 99bd87f4..4cf362a1 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -5,15 +5,10 @@ import ( "errors" "fmt" "net/url" - "strconv" - "strings" "time" "github.com/filecoin-project/go-state-types/abi" "github.com/ipfs/go-cid" - "github.com/ipfs/go-unixfsnode" - "github.com/ipld/go-ipld-prime/node/basicnode" - "github.com/ipld/go-ipld-prime/traversal/selector/builder" "github.com/ipni/go-libipni/maurl" "github.com/ipni/go-libipni/metadata" "github.com/libp2p/go-libp2p/core/peer" @@ -263,93 +258,3 @@ func RetrievalIDFromContext(ctx context.Context) (RetrievalID, error) { } return id, nil } - -type DagScope string - -const DagScopeAll DagScope = "all" -const DagScopeEntity DagScope = "entity" -const DagScopeBlock DagScope = "block" - -var matcherSelector = builder.NewSelectorSpecBuilder(basicnode.Prototype.Any).Matcher() - -func (ds DagScope) TerminalSelectorSpec() builder.SelectorSpec { - switch ds { - case DagScopeAll: - return unixfsnode.ExploreAllRecursivelySelector - case DagScopeEntity: - return unixfsnode.MatchUnixFSEntitySelector - case DagScopeBlock: - return matcherSelector - case DagScope(""): - return unixfsnode.ExploreAllRecursivelySelector // default to explore-all for zero-value DagScope - } - panic(fmt.Sprintf("unknown DagScope: [%s]", string(ds))) -} - -func ParseDagScope(s string) (DagScope, error) { - switch s { - case "all": - return DagScopeAll, nil - case "entity": - return DagScopeEntity, nil - case "block": - return DagScopeBlock, nil - default: - return DagScopeAll, errors.New("invalid dag-scope") - } -} - -func (ds DagScope) AcceptHeader() string { - return "application/vnd.ipld.car;version=1;order=dfs;dups=y" -} - -// ByteRange represents a range of bytes in a file. The default value is 0 to -// the end of the file, [0:*]. -// The range is inclusive at both ends, so the case of From==To selects a single -// byte. -// Where the end is * or beyond the end of the file, the end of the file is -// selected. -type ByteRange struct { - From int64 - To *int64 -} - -// IsDefault is roughly equivalent to the range matching [0:*] -func (br *ByteRange) IsDefault() bool { - return br == nil || br.From == 0 && br.To == nil -} - -func (br *ByteRange) String() string { - if br.IsDefault() { - return "0:*" - } - to := "*" // default to end of file - if br.To != nil { - to = strconv.FormatInt(*br.To, 10) - } - return fmt.Sprintf("%d:%s", br.From, to) -} - -func ParseByteRange(s string) (ByteRange, error) { - br := ByteRange{} - if s == "" { - return br, nil - } - parts := strings.Split(s, ":") - if len(parts) != 2 { - return br, fmt.Errorf("invalid entity-bytes: %s", s) - } - var err error - br.From, err = strconv.ParseInt(parts[0], 10, 64) - if err != nil { - return br, fmt.Errorf("invalid entity-bytes: %s (%w)", s, err) - } - if parts[1] != "*" { - to, err := strconv.ParseInt(parts[1], 10, 64) - if err != nil { - return br, fmt.Errorf("invalid entity-bytes: %s (%w)", s, err) - } - br.To = &to - } - return br, nil -} diff --git a/pkg/verifiedcar/verifiedcar.go b/pkg/verifiedcar/verifiedcar.go deleted file mode 100644 index cb6d989a..00000000 --- a/pkg/verifiedcar/verifiedcar.go +++ /dev/null @@ -1,281 +0,0 @@ -package verifiedcar - -import ( - "bytes" - "context" - "errors" - "fmt" - "io" - "math" - - // include all the codecs we care about - dagpb "github.com/ipld/go-codec-dagpb" - _ "github.com/ipld/go-ipld-prime/codec/cbor" - _ "github.com/ipld/go-ipld-prime/codec/dagcbor" - _ "github.com/ipld/go-ipld-prime/codec/dagjson" - _ "github.com/ipld/go-ipld-prime/codec/json" - _ "github.com/ipld/go-ipld-prime/codec/raw" - - blocks "github.com/ipfs/go-block-format" - "github.com/ipfs/go-cid" - format "github.com/ipfs/go-ipld-format" - "github.com/ipfs/go-unixfsnode" - "github.com/ipld/go-car/v2" - "github.com/ipld/go-ipld-prime/datamodel" - "github.com/ipld/go-ipld-prime/linking" - cidlink "github.com/ipld/go-ipld-prime/linking/cid" - "github.com/ipld/go-ipld-prime/node/basicnode" - "github.com/ipld/go-ipld-prime/traversal" - "github.com/ipld/go-ipld-prime/traversal/selector" - "go.uber.org/multierr" -) - -var ( - ErrMalformedCar = errors.New("malformed CAR") - ErrBadVersion = errors.New("bad CAR version") - ErrBadRoots = errors.New("CAR root CID mismatch") - ErrUnexpectedBlock = errors.New("unexpected block in CAR") - ErrExtraneousBlock = errors.New("extraneous block in CAR") - ErrMissingBlock = errors.New("missing block in CAR") -) - -type BlockReader interface { - Next() (blocks.Block, error) -} - -var protoChooser = dagpb.AddSupportToChooser(basicnode.Chooser) - -type Config struct { - Root cid.Cid // The single root we expect to appear in the CAR and that we use to run our traversal against - AllowCARv2 bool // If true, allow CARv2 files to be received, otherwise strictly only allow CARv1 - Selector datamodel.Node // The selector to execute, starting at the provided Root, to verify the contents of the CAR - CheckRootsMismatch bool // Check if roots match expected behavior - ExpectDuplicatesIn bool // Handles whether the incoming stream has duplicates - WriteDuplicatesOut bool // Handles whether duplicates should be written a second time as blocks - MaxBlocks uint64 // set a budget for the traversal -} - -// Verify reads a CAR from the provided reader, verifies the contents are -// strictly what is specified by this Config and writes the blocks to the -// provided BlockWriteOpener. It returns the number of blocks and bytes -// written to the BlockWriteOpener. -// -// Verification is performed according to the CAR construction rules contained -// within the Trustless, and Path Gateway specifications: -// -// * https://specs.ipfs.tech/http-gateways/trustless-gateway/ -// -// * https://specs.ipfs.tech/http-gateways/path-gateway/ -func (cfg Config) VerifyCar(ctx context.Context, rdr io.Reader, lsys linking.LinkSystem) (uint64, uint64, error) { - cbr, err := car.NewBlockReader(rdr, car.WithTrustedCAR(false)) - if err != nil { - // TODO: post-1.19: fmt.Errorf("%w: %w", ErrMalformedCar, err) - return 0, 0, multierr.Combine(ErrMalformedCar, err) - } - - switch cbr.Version { - case 1: - case 2: - if !cfg.AllowCARv2 { - return 0, 0, ErrBadVersion - } - default: - return 0, 0, ErrBadVersion - } - - if cfg.CheckRootsMismatch && (len(cbr.Roots) != 1 || cbr.Roots[0] != cfg.Root) { - return 0, 0, ErrBadRoots - } - return cfg.VerifyBlockStream(ctx, cbr, lsys) -} - -func (cfg Config) VerifyBlockStream(ctx context.Context, cbr BlockReader, lsys linking.LinkSystem) (uint64, uint64, error) { - sel, err := selector.CompileSelector(cfg.Selector) - if err != nil { - return 0, 0, err - } - - cr := &carReader{ - cbr: cbr, - } - bt := &writeTracker{} - lsys.TrustedStorage = true // we can rely on the CAR decoder to check CID integrity - unixfsnode.AddUnixFSReificationToLinkSystem(&lsys) - - nbls, lsys := NewNextBlockLinkSystem(ctx, cfg, cr, bt, lsys) - - // run traversal in this goroutine - progress := traversal.Progress{ - Cfg: &traversal.Config{ - Ctx: ctx, - LinkSystem: lsys, - LinkTargetNodePrototypeChooser: protoChooser, - }, - } - if cfg.MaxBlocks > 0 { - progress.Budget = &traversal.Budget{ - LinkBudget: int64(cfg.MaxBlocks) - 1, // first block is already loaded - NodeBudget: math.MaxInt64, - } - } - - rootNode, err := loadNode(ctx, cfg.Root, lsys) - if err != nil { - return 0, 0, fmt.Errorf("failed to load root node: %w", err) - } - if err := progress.WalkMatching(rootNode, sel, unixfsnode.BytesConsumingMatcher); err != nil { - return 0, 0, traversalError(err) - } - - if nbls.Error != nil { - // capture any errors not bubbled up through the traversal, i.e. see - // https://github.com/ipld/go-ipld-prime/pull/524 - return 0, 0, fmt.Errorf("block load failed during traversal: %w", nbls.Error) - } - - // make sure we don't have any extraneous data beyond what the traversal needs - _, err = cbr.Next() - if err == nil { - return 0, 0, ErrExtraneousBlock - } else if !errors.Is(err, io.EOF) { - return 0, 0, err - } - - // wait for parser to finish and provide errors or stats - return bt.blocks, bt.bytes, nil -} - -func loadNode(ctx context.Context, rootCid cid.Cid, lsys linking.LinkSystem) (datamodel.Node, error) { - lnk := cidlink.Link{Cid: rootCid} - lnkCtx := linking.LinkContext{Ctx: ctx} - proto, err := protoChooser(lnk, lnkCtx) - if err != nil { - return nil, fmt.Errorf("failed to choose prototype for CID %s: %w", rootCid.String(), err) - } - rootNode, err := lsys.Load(lnkCtx, lnk, proto) - if err != nil { - return nil, fmt.Errorf("failed to load root CID: %w", err) - } - return rootNode, nil -} - -type NextBlockLinkSystem struct { - Error error -} - -func NewNextBlockLinkSystem( - ctx context.Context, - cfg Config, - cr *carReader, - bt *writeTracker, - lsys linking.LinkSystem, -) (*NextBlockLinkSystem, linking.LinkSystem) { - nbls := &NextBlockLinkSystem{} - seen := make(map[cid.Cid]struct{}) - storageReadOpener := lsys.StorageReadOpener - - nextBlockReadOpener := func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) { - cid := l.(cidlink.Link).Cid - var data []byte - var err error - if _, ok := seen[cid]; ok { - if cfg.ExpectDuplicatesIn { - // duplicate block, but in this case we are expecting the stream to have it - data, err = cr.readNextBlock(ctx, cid) - if err != nil { - return nil, err - } - if !cfg.WriteDuplicatesOut { - return bytes.NewReader(data), nil - } - } else { - // duplicate block, rely on the supplied LinkSystem to have stored this - rdr, err := storageReadOpener(lc, l) - if !cfg.WriteDuplicatesOut { - return rdr, err - } - data, err = io.ReadAll(rdr) - if err != nil { - return nil, err - } - } - } else { - seen[cid] = struct{}{} - data, err = cr.readNextBlock(ctx, cid) - if err != nil { - return nil, err - } - } - bt.recordBlock(data) - w, wc, err := lsys.StorageWriteOpener(lc) - if err != nil { - return nil, err - } - rdr := bytes.NewReader(data) - if _, err := io.Copy(w, rdr); err != nil { - return nil, err - } - if err := wc(l); err != nil { - return nil, err - } - if _, err := rdr.Seek(0, io.SeekStart); err != nil { - return nil, err - } - return io.NopCloser(rdr), nil - } - - // wrap nextBlockReadOpener in one that captures errors on `nbls` - lsys.StorageReadOpener = func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) { - rdr, err := nextBlockReadOpener(lc, l) - if err != nil { - nbls.Error = err - return nil, err - } - return rdr, nil - } - - return nbls, lsys -} - -type carReader struct { - cbr BlockReader -} - -func (cr *carReader) readNextBlock(ctx context.Context, expected cid.Cid) ([]byte, error) { - blk, err := cr.cbr.Next() - if err != nil { - if errors.Is(err, io.EOF) { - return nil, format.ErrNotFound{Cid: expected} - } - return nil, multierr.Combine(ErrMalformedCar, err) - } - - // compare by multihash only - if !bytes.Equal(blk.Cid().Hash(), expected.Hash()) { - return nil, fmt.Errorf("%w: %s != %s", ErrUnexpectedBlock, blk.Cid(), expected) - } - return blk.RawData(), nil -} - -type writeTracker struct { - blocks uint64 - bytes uint64 -} - -func (bt *writeTracker) recordBlock(data []byte) { - bt.blocks++ - bt.bytes += uint64(len(data)) -} - -func traversalError(original error) error { - err := original - for { - if v, ok := err.(interface{ NotFound() bool }); ok && v.NotFound() { - // TODO: post-1.19: fmt.Errorf("%w: %w", ErrMissingBlock, err) - return multierr.Combine(ErrMissingBlock, err) - } - if err = errors.Unwrap(err); err == nil { - return original - } - } -} diff --git a/pkg/verifiedcar/verifiedcar_test.go b/pkg/verifiedcar/verifiedcar_test.go deleted file mode 100644 index 4b4e38bf..00000000 --- a/pkg/verifiedcar/verifiedcar_test.go +++ /dev/null @@ -1,885 +0,0 @@ -package verifiedcar_test - -import ( - "bytes" - "context" - "errors" - "fmt" - "io" - "math/rand" - "os" - "testing" - "time" - - "github.com/filecoin-project/lassie/pkg/internal/testutil" - "github.com/filecoin-project/lassie/pkg/types" - "github.com/filecoin-project/lassie/pkg/verifiedcar" - blocks "github.com/ipfs/go-block-format" - "github.com/ipfs/go-cid" - gstestutil "github.com/ipfs/go-graphsync/testutil" - "github.com/ipfs/go-unixfsnode" - unixfs "github.com/ipfs/go-unixfsnode/testutil" - "github.com/ipld/go-car/v2" - "github.com/ipld/go-car/v2/storage" - "github.com/ipld/go-ipld-prime" - "github.com/ipld/go-ipld-prime/codec/dagjson" - "github.com/ipld/go-ipld-prime/datamodel" - "github.com/ipld/go-ipld-prime/linking" - cidlink "github.com/ipld/go-ipld-prime/linking/cid" - "github.com/ipld/go-ipld-prime/node/basicnode" - "github.com/ipld/go-ipld-prime/storage/memstore" - "github.com/ipld/go-ipld-prime/traversal" - "github.com/ipld/go-ipld-prime/traversal/selector" - "github.com/ipld/go-ipld-prime/traversal/selector/builder" - selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" - trustlesspathing "github.com/ipld/ipld/specs/pkg-go/trustless-pathing" - mh "github.com/multiformats/go-multihash" - "github.com/stretchr/testify/require" -) - -func TestUnixfs20mVariety(t *testing.T) { - req := require.New(t) - - testCases, err := trustlesspathing.Unixfs20mVarietyCases() - req.NoError(err) - storage, closer, err := trustlesspathing.Unixfs20mVarietyReadableStorage() - req.NoError(err) - defer closer.Close() - - lsys := cidlink.DefaultLinkSystem() - lsys.TrustedStorage = true - unixfsnode.AddUnixFSReificationToLinkSystem(&lsys) - lsys.SetReadStorage(storage) - - for _, tc := range testCases { - t.Run(tc.Name, func(t *testing.T) { - req := require.New(t) - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - - t.Logf("query=%s, blocks=%d", tc.AsQuery(), len(tc.ExpectedCids)) - - // tc.ExpectedCids is in the order we expect to see them in a properly - // formed trustless CAR for the given query. So we build our list of - // expected blocks in that order and feed it through makeCarStream to - // produce the expected CAR. - expectedBlocks := make([]expectedBlock, len(tc.ExpectedCids)) - for ii, ec := range tc.ExpectedCids { - byt, err := lsys.LoadRaw(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: ec}) - req.NoError(err) - blk, err := blocks.NewBlockWithCid(byt, ec) - req.NoError(err) - expectedBlocks[ii] = expectedBlock{blk, false} - } - - carStream, errorCh := makeCarStream(t, ctx, []cid.Cid{tc.Root}, expectedBlocks, false, false, false, nil, false, false) - - lsys := cidlink.DefaultLinkSystem() - var writeCounter int - lsys.StorageWriteOpener = func(lc linking.LinkContext) (io.Writer, linking.BlockWriteCommitter, error) { - var buf bytes.Buffer - return &buf, func(l datamodel.Link) error { - req.Equal(expectedBlocks[writeCounter].Cid().String(), l.(cidlink.Link).Cid.String(), "block %d", writeCounter) - req.Equal(expectedBlocks[writeCounter].RawData(), buf.Bytes(), "block %d", writeCounter) - writeCounter++ - return nil - }, nil - } - lsys.StorageReadOpener = func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) { - return nil, fmt.Errorf("unexpected read of %s", l.String()) - } - - // Run the verifier over the CAR stream to see if we end up with - // the same query. - scope, err := types.ParseDagScope(tc.Scope) - req.NoError(err) - var byteRange *types.ByteRange - if tc.ByteRange != "" { - br, err := types.ParseByteRange(tc.ByteRange) - req.NoError(err) - byteRange = &br - } - cfg := verifiedcar.Config{ - Root: tc.Root, - Selector: types.PathScopeSelector(tc.Path, scope, byteRange), - } - { - selBytes, _ := ipld.Encode(cfg.Selector, dagjson.Encode) - t.Logf("selector=%s, entity-bytes=%s", string(selBytes), tc.ByteRange) - } - blockCount, byteCount, err := cfg.VerifyCar(ctx, carStream, lsys) - - req.NoError(err) - req.Equal(count(expectedBlocks), blockCount) - req.Equal(sizeOf(expectedBlocks), byteCount) - req.Equal(int(count(expectedBlocks)), writeCounter) - - select { - case err := <-errorCh: - req.NoError(err) - default: - } - - // Make sure we consumed the entire stream. - byt, err := io.ReadAll(carStream) - req.NoError(err) - req.Equal(0, len(byt)) - }) - } -} - -func TestVerifiedCar(t *testing.T) { - ctx := context.Background() - - req := require.New(t) - - rndSeed := time.Now().UTC().UnixNano() - t.Logf("random seed: %d", rndSeed) - var rndReader io.Reader = rand.New(rand.NewSource(rndSeed)) - - store := &testutil.CorrectedMemStore{ParentStore: &memstore.Store{ - Bag: make(map[string][]byte), - }} - lsys := cidlink.DefaultLinkSystem() - lsys.TrustedStorage = true - lsys.SetReadStorage(store) - lsys.SetWriteStorage(store) - - tbc1 := gstestutil.SetupBlockChain(ctx, t, lsys, 1000, 100) - root1 := tbc1.TipLink.(cidlink.Link).Cid - allBlocks := tbc1.AllBlocks() - extraneousLnk, err := lsys.Store(linking.LinkContext{}, cidlink.LinkPrototype{Prefix: cid.Prefix{Version: 1, Codec: 0x71, MhType: 0x12, MhLength: 32}}, basicnode.NewString("borp")) - req.NoError(err) - extraneousByts, err := lsys.LoadRaw(linking.LinkContext{}, extraneousLnk) - req.NoError(err) - extraneousBlk, err := blocks.NewBlockWithCid(extraneousByts, extraneousLnk.(cidlink.Link).Cid) - req.NoError(err) - - allSelector := selectorparse.CommonSelector_ExploreAllRecursively - - wrapPath := "/some/path/to/content" - - unixfsFile := testutil.GenerateNoDupes(func() unixfs.DirEntry { return unixfs.GenerateFile(t, &lsys, rndReader, 4<<20) }) - unixfsFileBlocks := testutil.ToBlocks(t, lsys, unixfsFile.Root, allSelector) - - unixfsFileRange0_1048576Blocks := unixfsFileBlocks[0:6] - ssb := builder.NewSelectorSpecBuilder(basicnode.Prototype.Any) - ss := ssb.ExploreInterpretAs("unixfs", ssb.MatcherSubset(0, 1<<20)) - unixfsFileRange0_1048576Selector := ss.Node() - - // need the root plus the byte range of 1M->2M, which happens to include the - // block of the 0->1M range because of overlapping data - unixfsFileRange1048576_2097152Blocks := append(append([]blocks.Block{}, unixfsFileBlocks[0]), unixfsFileBlocks[5:10]...) - ss = ssb.ExploreInterpretAs("unixfs", ssb.MatcherSubset(1<<20, 2<<20)) - unixfsFileRange1048576_2097152Selector := ss.Node() - - unixfsFileWithDups := unixfs.GenerateFile(t, &lsys, testutil.ZeroReader{}, 4<<20) - unixfsFileWithDupsBlocks := testutil.ToBlocks(t, lsys, unixfsFileWithDups.Root, allSelector) - var unixfsDir unixfs.DirEntry - var unixfsDirBlocks []blocks.Block - for { - unixfsDir = testutil.GenerateNoDupes(func() unixfs.DirEntry { return unixfs.GenerateDirectory(t, &lsys, rndReader, 8<<20, false) }) - unixfsDirBlocks = testutil.ToBlocks(t, lsys, unixfsDir.Root, allSelector) - if len(unixfsDir.Children) > 2 { // we want at least 3 children to test the path subset selector - break - } - } - - unixfsShardedDir := testutil.GenerateNoDupes(func() unixfs.DirEntry { - return testutil.GenerateStrictlyNestedShardedDir(t, &lsys, rndReader, 8<<20) - }) - unixfsShardedDirBlocks := testutil.ToBlocks(t, lsys, unixfsShardedDir.Root, allSelector) - - unixfsPreloadSelector := unixfsnode.MatchUnixFSEntitySelector.Node() - - unixfsPreloadDirBlocks := testutil.ToBlocks(t, lsys, unixfsDir.Root, unixfsPreloadSelector) - unixfsPreloadShardedDirBlocks := testutil.ToBlocks(t, lsys, unixfsShardedDir.Root, unixfsPreloadSelector) - - unixfsDirSubsetSelector := unixfsnode.UnixFSPathSelectorBuilder(unixfsDir.Children[1].Path, unixfsnode.MatchUnixFSEntitySelector, false) - - unixfsWrappedPathSelector := unixfsnode.UnixFSPathSelectorBuilder(wrapPath, unixfsnode.ExploreAllRecursivelySelector, false) - unixfsWrappedPreloadPathSelector := unixfsnode.UnixFSPathSelectorBuilder(wrapPath, unixfsnode.MatchUnixFSEntitySelector, false) - preloadSubst := ssb.ExploreInterpretAs("unixfs", ssb.ExploreRecursive( - selector.RecursionLimitDepth(1), - ssb.ExploreAll(ssb.ExploreRecursiveEdge()), - )) - unixfsWrappedPreloadPathSelectorSubst := unixfsnode.UnixFSPathSelectorBuilder(wrapPath, preloadSubst, false) - - unixfsWrappedFile := testutil.GenerateNoDupes(func() unixfs.DirEntry { return unixfs.WrapContent(t, rndReader, &lsys, unixfsFile, wrapPath, false) }) - unixfsWrappedFileBlocks := testutil.ToBlocks(t, lsys, unixfsWrappedFile.Root, allSelector) - // "trimmed" is similar to "exclusive" except that "trimmed" is a subset - // of a larger DAG, whereas "exclusive" is a complete DAG. - unixfsTrimmedWrappedFileBlocks := testutil.ToBlocks(t, lsys, unixfsWrappedFile.Root, unixfsWrappedPathSelector) - unixfsExclusiveWrappedFile := testutil.GenerateNoDupes(func() unixfs.DirEntry { return unixfs.WrapContent(t, rndReader, &lsys, unixfsFile, wrapPath, true) }) - unixfsExclusiveWrappedFileBlocks := testutil.ToBlocks(t, lsys, unixfsExclusiveWrappedFile.Root, allSelector) - - unixfsWrappedShardedDir := testutil.GenerateNoDupes(func() unixfs.DirEntry { - return unixfs.WrapContent(t, rndReader, &lsys, unixfsShardedDir, wrapPath, false) - }) - unixfsWrappedShardedDirBlocks := testutil.ToBlocks(t, lsys, unixfsWrappedShardedDir.Root, allSelector) - // "trimmed" is similar to "exclusive" except that "trimmed" is a subset - // of a larger DAG, whereas "exclusive" is a complete DAG. - unixfsTrimmedWrappedShardedDirBlocks := testutil.ToBlocks(t, lsys, unixfsWrappedShardedDir.Root, unixfsWrappedPathSelector) - unixfsTrimmedWrappedShardedDirOnlyBlocks := testutil.ToBlocks(t, lsys, unixfsWrappedShardedDir.Root, unixfsWrappedPreloadPathSelector) - unixfsExclusiveWrappedShardedDir := testutil.GenerateNoDupes(func() unixfs.DirEntry { - return unixfs.WrapContent(t, rndReader, &lsys, unixfsShardedDir, wrapPath, true) - }) - unixfsExclusiveWrappedShardedDirBlocks := testutil.ToBlocks(t, lsys, unixfsExclusiveWrappedShardedDir.Root, allSelector) - unixfsExclusiveWrappedShardedDirOnlyBlocks := testutil.ToBlocks(t, lsys, unixfsExclusiveWrappedShardedDir.Root, unixfsWrappedPreloadPathSelector) - - mismatchedCidBlk, _ := blocks.NewBlockWithCid(extraneousByts, allBlocks[99].Cid()) - testCases := []struct { - name string - skip bool - blocks []expectedBlock - roots []cid.Cid - carv2 bool - expectErr string - streamErr error - blockWriteErr error - cfg verifiedcar.Config - incomingHasDups bool - carAsCIDv0 bool - carAsRawBlocks bool - }{ - { - name: "complete carv1", - blocks: consumedBlocks(allBlocks), - roots: []cid.Cid{root1}, - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - }, - }, - { - name: "carv2 without AllowCARv2 errors", - blocks: consumedBlocks(allBlocks), - roots: []cid.Cid{root1}, - carv2: true, - expectErr: "bad CAR version", - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - }, - }, - { - name: "complete carv2 with AllowCARv2", - blocks: consumedBlocks(allBlocks), - roots: []cid.Cid{root1}, - carv2: true, - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - AllowCARv2: true, - }, - }, - { - name: "carv1 with multiple roots errors", - blocks: consumedBlocks(allBlocks), - roots: []cid.Cid{root1, root1}, - expectErr: "root CID mismatch", - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - CheckRootsMismatch: true, - }, - }, - { - name: "carv1 with multiple roots errors, no root cid mismatch", - blocks: consumedBlocks(allBlocks), - roots: []cid.Cid{root1, root1}, - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - }, - }, - { - name: "carv1 with wrong root errors", - blocks: consumedBlocks(allBlocks), - roots: []cid.Cid{tbc1.AllBlocks()[1].Cid()}, - expectErr: "root CID mismatch", - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - CheckRootsMismatch: true, - }, - }, - { - name: "carv1 with extraneous trailing block errors", - blocks: append(consumedBlocks(append([]blocks.Block{}, allBlocks...)), expectedBlock{extraneousBlk, true}), - roots: []cid.Cid{root1}, - expectErr: "extraneous block in CAR", - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - }, - }, - { - name: "carv1 with extraneous leading block errors", - blocks: append(consumedBlocks([]blocks.Block{extraneousBlk}), consumedBlocks(allBlocks)...), - roots: []cid.Cid{root1}, - expectErr: "unexpected block in CAR: " + extraneousLnk.(cidlink.Link).Cid.String() + " != " + allBlocks[0].Cid().String(), - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - }, - }, - { - name: "carv1 with out-of-order blocks errors", - blocks: consumedBlocks(append(append([]blocks.Block{}, allBlocks[50:]...), allBlocks[0:50]...)), - roots: []cid.Cid{root1}, - expectErr: "unexpected block in CAR: " + allBlocks[50].Cid().String() + " != " + allBlocks[0].Cid().String(), - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - }, - }, - { - name: "carv1 with mismatching CID errors", - blocks: consumedBlocks(append(append([]blocks.Block{}, allBlocks[0:99]...), mismatchedCidBlk)), - roots: []cid.Cid{root1}, - expectErr: "mismatch in content integrity", - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - }, - }, - { - name: "carv1 over budget errors", - blocks: consumedBlocks(allBlocks), - roots: []cid.Cid{root1}, - expectErr: (&traversal.ErrBudgetExceeded{ - BudgetKind: "link", - Path: datamodel.ParsePath("Parents/0/Parents/0/Parents/0"), - Link: tbc1.LinkTipIndex(3), - }).Error(), - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - MaxBlocks: 3, - }, - }, - { - name: "unixfs: large sharded file", - blocks: consumedBlocks(unixfsFileBlocks), - roots: []cid.Cid{unixfsFile.Root}, - cfg: verifiedcar.Config{ - Root: unixfsFile.Root, - Selector: allSelector, - }, - }, - { - name: "unixfs: large directory", - blocks: consumedBlocks(unixfsDirBlocks), - roots: []cid.Cid{unixfsDir.Root}, - cfg: verifiedcar.Config{ - Root: unixfsDir.Root, - Selector: allSelector, - }, - }, - { - name: "unixfs: large sharded directory", - blocks: consumedBlocks(unixfsShardedDirBlocks), - roots: []cid.Cid{unixfsShardedDir.Root}, - cfg: verifiedcar.Config{ - Root: unixfsShardedDir.Root, - Selector: allSelector, - }, - }, - { - name: "unixfs: large sharded file with file scope", - blocks: consumedBlocks(unixfsFileBlocks), - roots: []cid.Cid{unixfsFile.Root}, - cfg: verifiedcar.Config{ - Root: unixfsFile.Root, - Selector: unixfsPreloadSelector, - }, - }, - { - name: "unixfs: all of large directory with file scope, errors", - blocks: consumedBlocks(unixfsDirBlocks), - roots: []cid.Cid{unixfsDir.Root}, - expectErr: "extraneous block in CAR", - cfg: verifiedcar.Config{ - Root: unixfsDir.Root, - Selector: unixfsPreloadSelector, - }, - }, - { - name: "unixfs: all of large sharded directory with file scope, errors", - blocks: consumedBlocks(unixfsShardedDirBlocks), - roots: []cid.Cid{unixfsShardedDir.Root}, - expectErr: "unexpected block in CAR:", - cfg: verifiedcar.Config{ - Root: unixfsShardedDir.Root, - Selector: unixfsPreloadSelector, - }, - }, - { - name: "unixfs: all of large directory with file scope", - blocks: consumedBlocks(unixfsPreloadDirBlocks), - roots: []cid.Cid{unixfsDir.Root}, - cfg: verifiedcar.Config{ - Root: unixfsDir.Root, - Selector: unixfsPreloadSelector, - }, - }, - { - name: "unixfs: all of large sharded directory with file scope", - blocks: consumedBlocks(unixfsPreloadShardedDirBlocks), - roots: []cid.Cid{unixfsShardedDir.Root}, - cfg: verifiedcar.Config{ - Root: unixfsShardedDir.Root, - Selector: unixfsPreloadSelector, - }, - }, - { - name: "unixfs: pathed subset inside large directory with file scope, errors", - blocks: consumedBlocks(unixfsDirBlocks), - roots: []cid.Cid{unixfsDir.Root}, - expectErr: "unexpected block in CAR", - cfg: verifiedcar.Config{ - Root: unixfsDir.Root, - Selector: unixfsDirSubsetSelector, - }, - }, - { - name: "unixfs: large sharded file wrapped in directories", - blocks: consumedBlocks(unixfsWrappedFileBlocks), - roots: []cid.Cid{unixfsWrappedFile.Root}, - cfg: verifiedcar.Config{ - Root: unixfsWrappedFile.Root, - Selector: allSelector, - }, - }, - { - // our wrapped file has additional in the nested directories - name: "unixfs: large sharded file wrapped in directories, pathed, errors", - blocks: consumedBlocks(unixfsWrappedFileBlocks), - roots: []cid.Cid{unixfsWrappedFile.Root}, - expectErr: "unexpected block in CAR", - cfg: verifiedcar.Config{ - Root: unixfsWrappedFile.Root, - Selector: unixfsWrappedPathSelector, - }, - }, - { - name: "unixfs: large sharded file wrapped in directories, trimmed, pathed", - blocks: consumedBlocks(unixfsTrimmedWrappedFileBlocks), - roots: []cid.Cid{unixfsWrappedFile.Root}, - cfg: verifiedcar.Config{ - Root: unixfsWrappedFile.Root, - Selector: unixfsWrappedPathSelector, - }, - }, - { - name: "unixfs: large sharded file wrapped in directories, trimmed, all, errors", - blocks: consumedBlocks(unixfsTrimmedWrappedFileBlocks), - roots: []cid.Cid{unixfsWrappedFile.Root}, - expectErr: "unexpected block in CAR", - cfg: verifiedcar.Config{ - Root: unixfsWrappedFile.Root, - Selector: allSelector, - }, - }, - { - name: "unixfs: large sharded file wrapped in directories, exclusive, pathed", - blocks: consumedBlocks(unixfsExclusiveWrappedFileBlocks), - roots: []cid.Cid{unixfsExclusiveWrappedFile.Root}, - cfg: verifiedcar.Config{ - Root: unixfsExclusiveWrappedFile.Root, - Selector: unixfsWrappedPathSelector, - }, - }, - { - name: "unixfs: large sharded dir wrapped in directories", - blocks: consumedBlocks(unixfsWrappedShardedDirBlocks), - roots: []cid.Cid{unixfsWrappedShardedDir.Root}, - cfg: verifiedcar.Config{ - Root: unixfsWrappedShardedDir.Root, - Selector: allSelector, - }, - }, - { - // our wrapped dir has additional in the nested directories - name: "unixfs: large sharded dir wrapped in directories, pathed, errors", - blocks: consumedBlocks(unixfsWrappedShardedDirBlocks), - roots: []cid.Cid{unixfsWrappedShardedDir.Root}, - expectErr: "unexpected block in CAR", - cfg: verifiedcar.Config{ - Root: unixfsWrappedShardedDir.Root, - Selector: unixfsWrappedPathSelector, - }, - }, - { - name: "unixfs: large sharded dir wrapped in directories, trimmed, pathed", - blocks: consumedBlocks(unixfsTrimmedWrappedShardedDirBlocks), - roots: []cid.Cid{unixfsWrappedShardedDir.Root}, - cfg: verifiedcar.Config{ - Root: unixfsWrappedShardedDir.Root, - Selector: unixfsWrappedPathSelector, - }, - }, - { - name: "unixfs: large sharded dir wrapped in directories, trimmed, preload, pathed", - blocks: consumedBlocks(unixfsTrimmedWrappedShardedDirOnlyBlocks), - roots: []cid.Cid{unixfsWrappedShardedDir.Root}, - cfg: verifiedcar.Config{ - Root: unixfsWrappedShardedDir.Root, - Selector: unixfsWrappedPreloadPathSelector, - }, - }, - { - name: "unixfs: large sharded dir wrapped in directories, trimmed, all, errors", - blocks: consumedBlocks(unixfsTrimmedWrappedShardedDirBlocks), - roots: []cid.Cid{unixfsWrappedShardedDir.Root}, - expectErr: "unexpected block in CAR", - cfg: verifiedcar.Config{ - Root: unixfsWrappedShardedDir.Root, - Selector: allSelector, - }, - }, - { - name: "unixfs: large sharded dir wrapped in directories, exclusive, pathed", - blocks: consumedBlocks(unixfsExclusiveWrappedShardedDirBlocks), - roots: []cid.Cid{unixfsExclusiveWrappedShardedDir.Root}, - cfg: verifiedcar.Config{ - Root: unixfsExclusiveWrappedShardedDir.Root, - Selector: unixfsWrappedPathSelector, - }, - }, - { - name: "unixfs: large sharded dir wrapped in directories, exclusive, preload, pathed", - blocks: consumedBlocks(unixfsExclusiveWrappedShardedDirOnlyBlocks), - roots: []cid.Cid{unixfsExclusiveWrappedShardedDir.Root}, - cfg: verifiedcar.Config{ - Root: unixfsExclusiveWrappedShardedDir.Root, - Selector: unixfsWrappedPreloadPathSelectorSubst, - }, - }, - { - name: "unixfs: file with dups", - blocks: append(append(consumedBlocks(unixfsFileWithDupsBlocks[:2]), skippedBlocks(unixfsFileWithDupsBlocks[2:len(unixfsFileWithDupsBlocks)-1])...), consumedBlocks(unixfsFileWithDupsBlocks[len(unixfsFileWithDupsBlocks)-1:])...), - roots: []cid.Cid{unixfsFileWithDups.Root}, - cfg: verifiedcar.Config{ - Root: unixfsFileWithDups.Root, - Selector: allSelector, - }, - }, - { - name: "unixfs: file with dups, incoming has dups, not allowed", - blocks: append(append(consumedBlocks(unixfsFileWithDupsBlocks[:2]), skippedBlocks(unixfsFileWithDupsBlocks[2:len(unixfsFileWithDupsBlocks)-1])...), consumedBlocks(unixfsFileWithDupsBlocks[len(unixfsFileWithDupsBlocks)-1:])...), - expectErr: "unexpected block in CAR: " + unixfsFileWithDupsBlocks[2].Cid().String() + " != " + unixfsFileWithDupsBlocks[len(unixfsFileWithDupsBlocks)-1].Cid().String(), - roots: []cid.Cid{unixfsFileWithDups.Root}, - cfg: verifiedcar.Config{ - Root: unixfsFileWithDups.Root, - Selector: allSelector, - }, - incomingHasDups: true, - }, - { - name: "unixfs: file with dups, incoming has dups, allowed", - blocks: append(append(consumedBlocks(unixfsFileWithDupsBlocks[:2]), skippedBlocks(unixfsFileWithDupsBlocks[2:len(unixfsFileWithDupsBlocks)-1])...), consumedBlocks(unixfsFileWithDupsBlocks[len(unixfsFileWithDupsBlocks)-1:])...), - roots: []cid.Cid{unixfsFileWithDups.Root}, - cfg: verifiedcar.Config{ - Root: unixfsFileWithDups.Root, - Selector: allSelector, - ExpectDuplicatesIn: true, - }, - incomingHasDups: true, - }, - { - name: "unixfs: file with dups, duplicate writes on", - blocks: consumedBlocks(unixfsFileWithDupsBlocks), - roots: []cid.Cid{unixfsFileWithDups.Root}, - cfg: verifiedcar.Config{ - Root: unixfsFileWithDups.Root, - Selector: allSelector, - WriteDuplicatesOut: true, - }, - }, - { - name: "unixfs: file with dups, duplicate writes on, incoming dups", - blocks: consumedBlocks(unixfsFileWithDupsBlocks), - roots: []cid.Cid{unixfsFileWithDups.Root}, - cfg: verifiedcar.Config{ - Root: unixfsFileWithDups.Root, - Selector: allSelector, - WriteDuplicatesOut: true, - ExpectDuplicatesIn: true, - }, - incomingHasDups: true, - }, - { - name: "premature stream end errors", - blocks: consumedBlocks(allBlocks), - roots: []cid.Cid{root1}, - expectErr: "something wicked this way comes", - streamErr: errors.New("something wicked this way comes"), - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - }, - }, - { - name: "block write error errors", - blocks: consumedBlocks(allBlocks), - roots: []cid.Cid{root1}, - expectErr: "something wicked this way comes", - blockWriteErr: errors.New("something wicked this way comes"), - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - }, - }, - { - // strip out codec from the CIDs coming over the wire - name: "complete carv1, raw blocks", - blocks: consumedBlocks(allBlocks), - roots: []cid.Cid{root1}, - cfg: verifiedcar.Config{ - Root: root1, - Selector: allSelector, - }, - carAsRawBlocks: true, - }, - { - // strip out codec from the CIDs coming over the wire - name: "unixfs: large sharded file, raw blocks", - blocks: consumedBlocks(unixfsFileBlocks), - roots: []cid.Cid{unixfsFile.Root}, - cfg: verifiedcar.Config{ - Root: unixfsFile.Root, - Selector: allSelector, - }, - carAsRawBlocks: true, - }, - { - // switch to CIDv0 for the CIDs coming over the wire, internally the DAG still has CIDv1 - // links - name: "unixfs: large sharded file, CIDv0", - blocks: consumedBlocks(unixfsFileBlocks), - roots: []cid.Cid{unixfsFile.Root}, - cfg: verifiedcar.Config{ - Root: unixfsFile.Root, - Selector: allSelector, - }, - carAsCIDv0: true, - }, - { - name: "unixfs: large sharded file byte range [0:1M]", - blocks: consumedBlocks(unixfsFileRange0_1048576Blocks), - roots: []cid.Cid{unixfsFile.Root}, - cfg: verifiedcar.Config{ - Root: unixfsFile.Root, - Selector: unixfsFileRange0_1048576Selector, - }, - }, - { - name: "unixfs: large sharded file byte range [1M:2M]", - blocks: consumedBlocks(unixfsFileRange1048576_2097152Blocks), - roots: []cid.Cid{unixfsFile.Root}, - cfg: verifiedcar.Config{ - Root: unixfsFile.Root, - Selector: unixfsFileRange1048576_2097152Selector, - }, - }, - } - - for _, testCase := range testCases { - testCase := testCase - t.Run(testCase.name, func(t *testing.T) { - if testCase.skip { - t.Skip() - } - t.Parallel() - - ctx, cancel := context.WithTimeout(ctx, 2*time.Second) - defer cancel() - - req := require.New(t) - - store := &testutil.CorrectedMemStore{ParentStore: &memstore.Store{ - Bag: make(map[string][]byte), - }} - lsys := cidlink.DefaultLinkSystem() - lsys.SetReadStorage(store) - lsys.SetWriteStorage(store) - bwo := lsys.StorageWriteOpener - var writeCounter int - var skipped int - lsys.StorageWriteOpener = func(lc linking.LinkContext) (io.Writer, linking.BlockWriteCommitter, error) { - var buf bytes.Buffer - return &buf, func(l datamodel.Link) error { - if testCase.blockWriteErr != nil && writeCounter+skipped == len(testCase.blocks)/2 { - return testCase.blockWriteErr - } - for testCase.blocks[writeCounter+skipped].skipped { - skipped++ - } - req.Equal(testCase.blocks[writeCounter+skipped].Cid().String(), l.(cidlink.Link).Cid.String(), "block %d", writeCounter) - req.Equal(testCase.blocks[writeCounter+skipped].RawData(), buf.Bytes(), "block %d", writeCounter) - writeCounter++ - w, wc, err := bwo(lc) - if err != nil { - return err - } - buf.WriteTo(w) - return wc(l) - }, nil - } - - carStream, errorCh := makeCarStream(t, ctx, testCase.roots, testCase.blocks, testCase.carv2, testCase.expectErr != "", testCase.incomingHasDups, testCase.streamErr, testCase.carAsRawBlocks, testCase.carAsCIDv0) - blockCount, byteCount, err := testCase.cfg.VerifyCar(ctx, carStream, lsys) - - // read the rest of data - io.ReadAll(carStream) - - select { - case err := <-errorCh: - req.NoError(err) - default: - } - - if testCase.expectErr != "" { - req.ErrorContains(err, testCase.expectErr) - req.Equal(uint64(0), blockCount) - req.Equal(uint64(0), byteCount) - } else { - req.NoError(err) - req.Equal(count(testCase.blocks), blockCount) - req.Equal(sizeOf(testCase.blocks), byteCount) - req.Equal(int(count(testCase.blocks)), writeCounter) - } - }) - } -} - -func makeCarStream( - t *testing.T, - ctx context.Context, - roots []cid.Cid, - blocks []expectedBlock, - carv2 bool, - expectErrors bool, - allowDuplicatePuts bool, - streamError error, - carAsRawBlocks bool, - carAsCIDv0 bool, -) (io.Reader, chan error) { - - r, w := io.Pipe() - - errorCh := make(chan error, 1) - go func() { - var carW io.Writer = w - - var v2f *os.File - if carv2 { - // if v2 we have to write to a temp file and stream that out since we - // can't create a streaming v2 - var err error - v2f, err = os.CreateTemp(t.TempDir(), "carv2") - if err != nil { - errorCh <- err - return - } - t.Cleanup(func() { - v2f.Close() - os.Remove(v2f.Name()) - }) - carW = v2f - } - - carWriter, err := storage.NewWritable(carW, roots, car.WriteAsCarV1(!carv2), car.AllowDuplicatePuts(allowDuplicatePuts)) - if err != nil { - errorCh <- err - return - } - for ii, block := range blocks { - if streamError != nil && ii == len(blocks)/2 { - w.CloseWithError(streamError) - return - } - c := block.Cid() - if carAsCIDv0 && c.Prefix().MhType == mh.SHA2_256 && c.Prefix().Codec == cid.DagProtobuf { - c = cid.NewCidV0(c.Hash()) - } else if carAsRawBlocks { - c = cid.NewCidV1(cid.Raw, c.Hash()) - } - err := carWriter.Put(ctx, c.KeyString(), block.RawData()) - if !expectErrors && err != nil { - errorCh <- err - return - } - if ctx.Err() != nil { - return - } - } - if err := carWriter.Finalize(); err != nil { - errorCh <- err - return - } - - if carv2 { - v2f.Seek(0, io.SeekStart) - // ignore error because upstream will strictly stop and close after - // reading the carv1 payload so we'll get an error here - io.Copy(w, v2f) - } - - if err := w.Close(); err != nil { - errorCh <- err - } - }() - - go func() { - <-ctx.Done() - if ctx.Err() != nil { - r.CloseWithError(ctx.Err()) - } - }() - - return r, errorCh -} - -type expectedBlock struct { - blocks.Block - skipped bool -} - -func consumedBlocks(blocks []blocks.Block) []expectedBlock { - expectedBlocks := make([]expectedBlock, 0, len(blocks)) - for _, block := range blocks { - expectedBlocks = append(expectedBlocks, expectedBlock{block, false}) - } - return expectedBlocks -} - -func skippedBlocks(blocks []blocks.Block) []expectedBlock { - expectedBlocks := make([]expectedBlock, 0, len(blocks)) - for _, block := range blocks { - expectedBlocks = append(expectedBlocks, expectedBlock{block, true}) - } - return expectedBlocks -} - -func count(blocks []expectedBlock) uint64 { - total := uint64(0) - for _, block := range blocks { - if !block.skipped { - total++ - } - } - return total -} - -func sizeOf(blocks []expectedBlock) uint64 { - total := uint64(0) - for _, block := range blocks { - if !block.skipped { - total += uint64(len(block.RawData())) - } - } - return total -}