Skip to content

Commit

Permalink
fix: prefer audiovisual file types in mime type sort
Browse files Browse the repository at this point in the history
now that invenio previewer supports some audio and video types
closes #4 again
  • Loading branch information
phette23 committed Aug 13, 2024
1 parent 3af4790 commit 2eedf27
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
4 changes: 2 additions & 2 deletions migrate/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,12 @@ def test_to_edtf(input, expect):
[
{"filename": "img.tiff"},
{"filename": "doc.pdf"},
{"filename": "movie.mp4"},
{"filename": "song.mp3"},
{"folder": "zip.zip"},
{"filename": "unknown"},
{"filename": "plain.txt"},
{"filename": "img.webp"},
{"filename": "movie.mp4"},
{"filename": "song.mp3"},
{"filename": "app.exe"},
],
),
Expand Down
11 changes: 8 additions & 3 deletions migrate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,24 +72,29 @@ def to_edtf(s) -> str | None:
def visual_mime_type_sort(attachment) -> int:
# Sort EQUELLA attachment dicts by MIME type, types previewable in Invenio
# which is (according to readme): PDF, ZIP, CSV, MARKDOWN, XML, JSON, PNG, JPG, GIF
# but also includes some audio and video types (don't know exactly which)
# https://github.com/inveniosoftware/invenio-previewer
# Order: TIFF > Non-HEIC/WBEP Images > PDF > Markdown, CSV, XML > JSON > ZIP > Everything else (not previewable)
# Order: TIFF > Non-HEIC/WBEP Images > PDF > Video > Markdown, CSV, XML > JSON > Audio > ZIP > others
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types#types
# type=zip attachments have a "folder" but no "filename"
fn = attachment.get("filename") or attachment["folder"]
mt: str | None = mimetypes.guess_type(fn)[0]
type, subtype = mt.split("/") if mt else ("unknown", "unknown")
guess: str | None = mimetypes.guess_type(fn)[0]
type, subtype = guess.split("/") if guess else ("unknown", "unknown")
match type, subtype:
case "image", "tiff":
return 0
case "image", _ if subtype not in ["heic", "webp"]:
return 10
case "application", "pdf":
return 20
case "video", _:
return 25
case "text", _ if subtype in ["csv", "markdown", "xml"]:
return 30
case "application", "json":
return 40
case "audio", _:
return 45
case "application", _ if subtype in ["zip", "x-zip-compressed"]:
return 50
case _, _: # model, font types, subtypes not covered above
Expand Down

0 comments on commit 2eedf27

Please sign in to comment.