Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use range properly in sda-download and fix coordinate handling when using encryption to comply with documented behaviour #1252

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
49 changes: 36 additions & 13 deletions sda-download/api/sda/sda.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,16 +291,18 @@ func Download(c *gin.Context) {
}

wholeFile := true
if start != 0 || end != 0 {
wholeFile = false
}

start, end, err = calculateCoords(start, end, c.GetHeader("Range"), fileDetails, c.Param("type"))
if err != nil {
log.Errorf("Byte range coordinates invalid! %v", err)

return
}

if start != 0 || end != 0 {
wholeFile = false
}

if c.Param("type") != "encrypted" {
// set the content-length for unencrypted files
if start == 0 && end == 0 {
Expand Down Expand Up @@ -563,17 +565,38 @@ var calculateCoords = func(start, end int64, htsget_range string, fileDetails *d
return start, end, nil
}

// Adapt end coordinate to follow the crypt4gh block boundaries
headlength := bytes.NewReader(fileDetails.Header)
bodyEnd := int64(fileDetails.ArchiveSize)
if end > 0 {
var packageSize float64 = 65564 // 64KiB+28, 28 is for chacha20_ietf_poly1305
togo := end - start
bodysize := math.Max(float64(togo-headlength.Size()), 0)
endCoord := packageSize * math.Ceil(bodysize/packageSize)
bodyEnd = int64(math.Min(float64(bodyEnd), endCoord))
if start == 0 && end == 0 {
// Defaults, read the whole file
return 0, 0, nil
}

return start, headlength.Size() + bodyEnd, nil
// If passing coordinates through query, adapt coordinates to follow the
// crypt4gh block boundaries each block is 65536 data + MAC/others
// (currently 28 bytes since chacha20 is the only supported crypto)
headLength := bytes.NewReader(fileDetails.Header).Size()

if end <= headLength {
// Not trying to read any actual data, let them!
// We know from before that start is less than end
return start, end, nil
}

const packageSize float64 = 65536 + 28 // 64KiB+28, 28 is for chacha20_ietf_poly1305

var serveStart int64 = start
bodySize := int64(fileDetails.ArchiveSize)
dataRequestEndOffset := end - headLength
dataRequestOffset := start - headLength

blockEndCoord := packageSize * math.Ceil(float64(dataRequestEndOffset)/packageSize)
serveEnd := int64(math.Min(float64(bodySize), blockEndCoord)) + headLength

// Only use adjusted offset if we request data from after the header
// (otherwise, adding the length of the header would mean we miss out
// on the header itself)
if start > headLength {
serveStart = int64(packageSize*math.Floor(float64(dataRequestOffset)/packageSize)) + headLength
}

return serveStart, serveEnd, nil
}
55 changes: 49 additions & 6 deletions sda-download/api/sda/sda_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -670,9 +670,9 @@ func Test_CalucalateCoords(t *testing.T) {
assert.GreaterOrEqual(t, end, from)
assert.NoError(t, err)

// end should not be smaller than a header
// no "padding" if requesting part of the header only
_, end, err = calculateCoords(from, headerSize-10, "", fileDetails, "encrypted")
assert.GreaterOrEqual(t, end, headerSize)
assert.GreaterOrEqual(t, end, headerSize-10)
assert.NoError(t, err)

// end should not be larger than file length + header
Expand All @@ -682,7 +682,8 @@ func Test_CalucalateCoords(t *testing.T) {

// param range 0-0 should give whole file
start, end, err = calculateCoords(0, 0, "", fileDetails, "encrypted")
assert.Equal(t, end-start, fullSize)
assert.Equal(t, int64(0), start)
assert.Equal(t, int64(0), end)
assert.NoError(t, err)

// byte range 0-1000 should return the range size, end coord inclusive
Expand Down Expand Up @@ -824,7 +825,7 @@ func TestDownload_Whole_Range_Encrypted(t *testing.T) {
assert.NoError(t, err, "Could not make crypt4gh writer for test")

// Write some data to the file
for i := 0; i < 1000; i++ {
for i := 0; i < 40000; i++ {
_, err = dataWriter.Write([]byte("data"))
assert.NoError(t, err, "Could not write to crypt4gh writer for test")
}
Expand Down Expand Up @@ -852,7 +853,7 @@ func TestDownload_Whole_Range_Encrypted(t *testing.T) {
database.GetFile = func(_ string) (*database.FileDownload, error) {
fileDetails := &database.FileDownload{
ArchivePath: datafileName,
ArchiveSize: 0,
ArchiveSize: 4 * 40000,
Header: headerBytes,
}

Expand All @@ -871,7 +872,7 @@ func TestDownload_Whole_Range_Encrypted(t *testing.T) {

assert.Equal(t, 200, response.StatusCode, "Unexpected status code from download")
// We only check
assert.Equal(t, []byte(strings.Repeat("data", 1000)), body, "Unexpected body from download")
assert.Equal(t, []byte(strings.Repeat("data", 40000)), body, "Unexpected body from download")

// Test download with specified coordinates, should return a small bit of the file
w = httptest.NewRecorder()
Expand Down Expand Up @@ -904,6 +905,47 @@ func TestDownload_Whole_Range_Encrypted(t *testing.T) {

assert.Equal(t, 200, response.StatusCode, "Unexpected status code from download")
assert.Equal(t, []byte("crypt4gh"), body[:8], "Unexpected body from download")
assert.Equal(t, 11, len(body), "Unexpected body from download")

// Test encrypted download not from the start, should work and give output
// that is crypt4gh encrypted
w = httptest.NewRecorder()
c, _ = gin.CreateTestContext(w)
c.Request = &http.Request{Method: "GET", URL: &url.URL{Path: "/mocks3/somepath", RawQuery: "filename=somepath"}}
c.Request.Header = http.Header{"Client-Public-Key": []string{config.Config.C4GH.PublicKeyB64},
"Range": []string{"bytes=4-1404"}}

c.Params = make(gin.Params, 1)
c.Params[0] = gin.Param{Key: "type", Value: "encrypted"}

Download(c)
response = w.Result()
defer response.Body.Close()
body, _ = io.ReadAll(response.Body)

assert.Equal(t, 200, response.StatusCode, "Unexpected status code from download")
assert.Equal(t, []byte("t4gh"), body[:4], "Unexpected body from download")
assert.Equal(t, 1401, len(body), "Unexpected body from download")

// Test encrypted download of a part late in the file, should work and give
// output that is crypt4gh encrypted (but those bits)
w = httptest.NewRecorder()
c, _ = gin.CreateTestContext(w)
c.Request = &http.Request{Method: "GET", URL: &url.URL{Path: "/mocks3/somepath", RawQuery: "filename=somepath&startCoordinate=70000&endCoordinate=70200"}}
c.Request.Header = http.Header{"Client-Public-Key": []string{config.Config.C4GH.PublicKeyB64}}

c.Params = make(gin.Params, 1)
c.Params[0] = gin.Param{Key: "type", Value: "encrypted"}

Download(c)
response = w.Result()
defer response.Body.Close()
body, _ = io.ReadAll(response.Body)

assert.Equal(t, 200, response.StatusCode, "Unexpected status code from download")
assert.Equal(t, 65536+28, len(body), "Unexpected body from download")
// TODO: is it worth it to grab a header and construct a crypt4gh stream
// to verify that we see expected content?

// Test encrypted download, should work even when AllowedUnencryptedDownload is false
w = httptest.NewRecorder()
Expand All @@ -922,6 +964,7 @@ func TestDownload_Whole_Range_Encrypted(t *testing.T) {

assert.Equal(t, 200, response.StatusCode, "Unexpected status code from download")
assert.Equal(t, []byte("crypt4gh"), body[:8], "Unexpected body from download")
assert.Equal(t, 11, len(body), "Unexpected body from download")

// Test encrypted download without passing the key, should fail
w = httptest.NewRecorder()
Expand Down
Loading