diff --git a/paper/colm_camera/README.md b/paper/colm_camera/README.md new file mode 100644 index 0000000..6e685db --- /dev/null +++ b/paper/colm_camera/README.md @@ -0,0 +1,3 @@ +# Template + +Template and style files for CoLM 2024 diff --git a/paper/colm_camera/colm2024_conference.bib b/paper/colm_camera/colm2024_conference.bib new file mode 100644 index 0000000..95744c2 --- /dev/null +++ b/paper/colm_camera/colm2024_conference.bib @@ -0,0 +1,11 @@ +@inproceedings{Vaswani+2017, + author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia}, + booktitle = {Advances in Neural Information Processing Systems}, + pages = {}, + publisher = {Curran Associates, Inc.}, + title = {Attention is All you Need}, + url = {https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf}, + volume = {30}, + year = {2017} +} + diff --git a/paper/colm_camera/colm2024_conference.bst b/paper/colm_camera/colm2024_conference.bst new file mode 100644 index 0000000..a85a008 --- /dev/null +++ b/paper/colm_camera/colm2024_conference.bst @@ -0,0 +1,1440 @@ +%% File: `iclr2024.bst' +%% A copy of iclm2010.bst, which is a modification of `plainnl.bst' for use with natbib package +%% +%% Copyright 2010 Hal Daum\'e III +%% Modified by J. Fürnkranz +%% - Changed labels from (X and Y, 2000) to (X & Y, 2000) +%% +%% Copyright 1993-2007 Patrick W Daly +%% Max-Planck-Institut f\"ur Sonnensystemforschung +%% Max-Planck-Str. 2 +%% D-37191 Katlenburg-Lindau +%% Germany +%% E-mail: daly@mps.mpg.de +%% +%% This program can be redistributed and/or modified under the terms +%% of the LaTeX Project Public License Distributed from CTAN +%% archives in directory macros/latex/base/lppl.txt; either +%% version 1 of the License, or any later version. +%% + % Version and source file information: + % \ProvidesFile{icml2010.mbs}[2007/11/26 1.93 (PWD)] + % + % BibTeX `plainnat' family + % version 0.99b for BibTeX versions 0.99a or later, + % for LaTeX versions 2.09 and 2e. + % + % For use with the `natbib.sty' package; emulates the corresponding + % member of the `plain' family, but with author-year citations. + % + % With version 6.0 of `natbib.sty', it may also be used for numerical + % citations, while retaining the commands \citeauthor, \citefullauthor, + % and \citeyear to print the corresponding information. + % + % For version 7.0 of `natbib.sty', the KEY field replaces missing + % authors/editors, and the date is left blank in \bibitem. + % + % Includes field EID for the sequence/citation number of electronic journals + % which is used instead of page numbers. + % + % Includes fields ISBN and ISSN. + % + % Includes field URL for Internet addresses. + % + % Includes field DOI for Digital Object Idenfifiers. + % + % Works best with the url.sty package of Donald Arseneau. + % + % Works with identical authors and year are further sorted by + % citation key, to preserve any natural sequence. + % +ENTRY + { address + author + booktitle + chapter + doi + eid + edition + editor + howpublished + institution + isbn + issn + journal + key + month + note + number + organization + pages + publisher + school + series + title + type + url + volume + year + } + {} + { label extra.label sort.label short.list } + +INTEGERS { output.state before.all mid.sentence after.sentence after.block } + +FUNCTION {init.state.consts} +{ #0 'before.all := + #1 'mid.sentence := + #2 'after.sentence := + #3 'after.block := +} + +STRINGS { s t } + +FUNCTION {output.nonnull} +{ 's := + output.state mid.sentence = + { ", " * write$ } + { output.state after.block = + { add.period$ write$ + newline$ + "\newblock " write$ + } + { output.state before.all = + 'write$ + { add.period$ " " * write$ } + if$ + } + if$ + mid.sentence 'output.state := + } + if$ + s +} + +FUNCTION {output} +{ duplicate$ empty$ + 'pop$ + 'output.nonnull + if$ +} + +FUNCTION {output.check} +{ 't := + duplicate$ empty$ + { pop$ "empty " t * " in " * cite$ * warning$ } + 'output.nonnull + if$ +} + +FUNCTION {fin.entry} +{ add.period$ + write$ + newline$ +} + +FUNCTION {new.block} +{ output.state before.all = + 'skip$ + { after.block 'output.state := } + if$ +} + +FUNCTION {new.sentence} +{ output.state after.block = + 'skip$ + { output.state before.all = + 'skip$ + { after.sentence 'output.state := } + if$ + } + if$ +} + +FUNCTION {not} +{ { #0 } + { #1 } + if$ +} + +FUNCTION {and} +{ 'skip$ + { pop$ #0 } + if$ +} + +FUNCTION {or} +{ { pop$ #1 } + 'skip$ + if$ +} + +FUNCTION {new.block.checka} +{ empty$ + 'skip$ + 'new.block + if$ +} + +FUNCTION {new.block.checkb} +{ empty$ + swap$ empty$ + and + 'skip$ + 'new.block + if$ +} + +FUNCTION {new.sentence.checka} +{ empty$ + 'skip$ + 'new.sentence + if$ +} + +FUNCTION {new.sentence.checkb} +{ empty$ + swap$ empty$ + and + 'skip$ + 'new.sentence + if$ +} + +FUNCTION {field.or.null} +{ duplicate$ empty$ + { pop$ "" } + 'skip$ + if$ +} + +FUNCTION {emphasize} +{ duplicate$ empty$ + { pop$ "" } + { "\emph{" swap$ * "}" * } + if$ +} + +INTEGERS { nameptr namesleft numnames } + +FUNCTION {format.names} +{ 's := + #1 'nameptr := + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr "{ff~}{vv~}{ll}{, jj}" format.name$ 't := + nameptr #1 > + { namesleft #1 > + { ", " * t * } + { numnames #2 > + { "," * } + 'skip$ + if$ + t "others" = + { " et~al." * } + { " and " * t * } + if$ + } + if$ + } + 't + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {format.key} +{ empty$ + { key field.or.null } + { "" } + if$ +} + +FUNCTION {format.authors} +{ author empty$ + { "" } + { author format.names } + if$ +} + +FUNCTION {format.editors} +{ editor empty$ + { "" } + { editor format.names + editor num.names$ #1 > + { " (eds.)" * } + { " (ed.)" * } + if$ + } + if$ +} + +FUNCTION {format.isbn} +{ isbn empty$ + { "" } + { new.block "ISBN " isbn * } + if$ +} + +FUNCTION {format.issn} +{ issn empty$ + { "" } + { new.block "ISSN " issn * } + if$ +} + +FUNCTION {format.url} +{ url empty$ + { "" } + { new.block "URL \url{" url * "}" * } + if$ +} + +FUNCTION {format.doi} +{ doi empty$ + { "" } + { new.block "\doi{" doi * "}" * } + if$ +} + +FUNCTION {format.title} +{ title empty$ + { "" } + { title "t" change.case$ } + if$ +} + +FUNCTION {format.full.names} +{'s := + #1 'nameptr := + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr + "{vv~}{ll}" format.name$ 't := + nameptr #1 > + { + namesleft #1 > + { ", " * t * } + { + numnames #2 > + { "," * } + 'skip$ + if$ + t "others" = + { " et~al." * } + { " and " * t * } + if$ + } + if$ + } + 't + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {author.editor.full} +{ author empty$ + { editor empty$ + { "" } + { editor format.full.names } + if$ + } + { author format.full.names } + if$ +} + +FUNCTION {author.full} +{ author empty$ + { "" } + { author format.full.names } + if$ +} + +FUNCTION {editor.full} +{ editor empty$ + { "" } + { editor format.full.names } + if$ +} + +FUNCTION {make.full.names} +{ type$ "book" = + type$ "inbook" = + or + 'author.editor.full + { type$ "proceedings" = + 'editor.full + 'author.full + if$ + } + if$ +} + +FUNCTION {output.bibitem} +{ newline$ + "\bibitem[" write$ + label write$ + ")" make.full.names duplicate$ short.list = + { pop$ } + { * } + if$ + "]{" * write$ + cite$ write$ + "}" write$ + newline$ + "" + before.all 'output.state := +} + +FUNCTION {n.dashify} +{ 't := + "" + { t empty$ not } + { t #1 #1 substring$ "-" = + { t #1 #2 substring$ "--" = not + { "--" * + t #2 global.max$ substring$ 't := + } + { { t #1 #1 substring$ "-" = } + { "-" * + t #2 global.max$ substring$ 't := + } + while$ + } + if$ + } + { t #1 #1 substring$ * + t #2 global.max$ substring$ 't := + } + if$ + } + while$ +} + +FUNCTION {format.date} +{ year duplicate$ empty$ + { "empty year in " cite$ * warning$ + pop$ "" } + 'skip$ + if$ + month empty$ + 'skip$ + { month + " " * swap$ * + } + if$ + extra.label * +} + +FUNCTION {format.btitle} +{ title emphasize +} + +FUNCTION {tie.or.space.connect} +{ duplicate$ text.length$ #3 < + { "~" } + { " " } + if$ + swap$ * * +} + +FUNCTION {either.or.check} +{ empty$ + 'pop$ + { "can't use both " swap$ * " fields in " * cite$ * warning$ } + if$ +} + +FUNCTION {format.bvolume} +{ volume empty$ + { "" } + { "volume" volume tie.or.space.connect + series empty$ + 'skip$ + { " of " * series emphasize * } + if$ + "volume and number" number either.or.check + } + if$ +} + +FUNCTION {format.number.series} +{ volume empty$ + { number empty$ + { series field.or.null } + { output.state mid.sentence = + { "number" } + { "Number" } + if$ + number tie.or.space.connect + series empty$ + { "there's a number but no series in " cite$ * warning$ } + { " in " * series * } + if$ + } + if$ + } + { "" } + if$ +} + +FUNCTION {format.edition} +{ edition empty$ + { "" } + { output.state mid.sentence = + { edition "l" change.case$ " edition" * } + { edition "t" change.case$ " edition" * } + if$ + } + if$ +} + +INTEGERS { multiresult } + +FUNCTION {multi.page.check} +{ 't := + #0 'multiresult := + { multiresult not + t empty$ not + and + } + { t #1 #1 substring$ + duplicate$ "-" = + swap$ duplicate$ "," = + swap$ "+" = + or or + { #1 'multiresult := } + { t #2 global.max$ substring$ 't := } + if$ + } + while$ + multiresult +} + +FUNCTION {format.pages} +{ pages empty$ + { "" } + { pages multi.page.check + { "pp.\ " pages n.dashify tie.or.space.connect } + { "pp.\ " pages tie.or.space.connect } + if$ + } + if$ +} + +FUNCTION {format.eid} +{ eid empty$ + { "" } + { "art." eid tie.or.space.connect } + if$ +} + +FUNCTION {format.vol.num.pages} +{ volume field.or.null + number empty$ + 'skip$ + { "\penalty0 (" number * ")" * * + volume empty$ + { "there's a number but no volume in " cite$ * warning$ } + 'skip$ + if$ + } + if$ + pages empty$ + 'skip$ + { duplicate$ empty$ + { pop$ format.pages } + { ":\penalty0 " * pages n.dashify * } + if$ + } + if$ +} + +FUNCTION {format.vol.num.eid} +{ volume field.or.null + number empty$ + 'skip$ + { "\penalty0 (" number * ")" * * + volume empty$ + { "there's a number but no volume in " cite$ * warning$ } + 'skip$ + if$ + } + if$ + eid empty$ + 'skip$ + { duplicate$ empty$ + { pop$ format.eid } + { ":\penalty0 " * eid * } + if$ + } + if$ +} + +FUNCTION {format.chapter.pages} +{ chapter empty$ + 'format.pages + { type empty$ + { "chapter" } + { type "l" change.case$ } + if$ + chapter tie.or.space.connect + pages empty$ + 'skip$ + { ", " * format.pages * } + if$ + } + if$ +} + +FUNCTION {format.in.ed.booktitle} +{ booktitle empty$ + { "" } + { editor empty$ + { "In " booktitle emphasize * } + { "In " format.editors * ", " * booktitle emphasize * } + if$ + } + if$ +} + +FUNCTION {empty.misc.check} +{ author empty$ title empty$ howpublished empty$ + month empty$ year empty$ note empty$ + and and and and and + key empty$ not and + { "all relevant fields are empty in " cite$ * warning$ } + 'skip$ + if$ +} + +FUNCTION {format.thesis.type} +{ type empty$ + 'skip$ + { pop$ + type "t" change.case$ + } + if$ +} + +FUNCTION {format.tr.number} +{ type empty$ + { "Technical Report" } + 'type + if$ + number empty$ + { "t" change.case$ } + { number tie.or.space.connect } + if$ +} + +FUNCTION {format.article.crossref} +{ key empty$ + { journal empty$ + { "need key or journal for " cite$ * " to crossref " * crossref * + warning$ + "" + } + { "In \emph{" journal * "}" * } + if$ + } + { "In " } + if$ + " \citet{" * crossref * "}" * +} + +FUNCTION {format.book.crossref} +{ volume empty$ + { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ + "In " + } + { "Volume" volume tie.or.space.connect + " of " * + } + if$ + editor empty$ + editor field.or.null author field.or.null = + or + { key empty$ + { series empty$ + { "need editor, key, or series for " cite$ * " to crossref " * + crossref * warning$ + "" * + } + { "\emph{" * series * "}" * } + if$ + } + 'skip$ + if$ + } + 'skip$ + if$ + " \citet{" * crossref * "}" * +} + +FUNCTION {format.incoll.inproc.crossref} +{ editor empty$ + editor field.or.null author field.or.null = + or + { key empty$ + { booktitle empty$ + { "need editor, key, or booktitle for " cite$ * " to crossref " * + crossref * warning$ + "" + } + { "In \emph{" booktitle * "}" * } + if$ + } + { "In " } + if$ + } + { "In " } + if$ + " \citet{" * crossref * "}" * +} + +FUNCTION {article} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.title "title" output.check + new.block + crossref missing$ + { journal emphasize "journal" output.check + eid empty$ + { format.vol.num.pages output } + { format.vol.num.eid output } + if$ + format.date "year" output.check + } + { format.article.crossref output.nonnull + eid empty$ + { format.pages output } + { format.eid output } + if$ + } + if$ + format.issn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {book} +{ output.bibitem + author empty$ + { format.editors "author and editor" output.check + editor format.key output + } + { format.authors output.nonnull + crossref missing$ + { "author and editor" editor either.or.check } + 'skip$ + if$ + } + if$ + new.block + format.btitle "title" output.check + crossref missing$ + { format.bvolume output + new.block + format.number.series output + new.sentence + publisher "publisher" output.check + address output + } + { new.block + format.book.crossref output.nonnull + } + if$ + format.edition output + format.date "year" output.check + format.isbn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {booklet} +{ output.bibitem + format.authors output + author format.key output + new.block + format.title "title" output.check + howpublished address new.block.checkb + howpublished output + address output + format.date output + format.isbn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {inbook} +{ output.bibitem + author empty$ + { format.editors "author and editor" output.check + editor format.key output + } + { format.authors output.nonnull + crossref missing$ + { "author and editor" editor either.or.check } + 'skip$ + if$ + } + if$ + new.block + format.btitle "title" output.check + crossref missing$ + { format.bvolume output + format.chapter.pages "chapter and pages" output.check + new.block + format.number.series output + new.sentence + publisher "publisher" output.check + address output + } + { format.chapter.pages "chapter and pages" output.check + new.block + format.book.crossref output.nonnull + } + if$ + format.edition output + format.date "year" output.check + format.isbn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {incollection} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.title "title" output.check + new.block + crossref missing$ + { format.in.ed.booktitle "booktitle" output.check + format.bvolume output + format.number.series output + format.chapter.pages output + new.sentence + publisher "publisher" output.check + address output + format.edition output + format.date "year" output.check + } + { format.incoll.inproc.crossref output.nonnull + format.chapter.pages output + } + if$ + format.isbn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {inproceedings} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.title "title" output.check + new.block + crossref missing$ + { format.in.ed.booktitle "booktitle" output.check + format.bvolume output + format.number.series output + format.pages output + address empty$ + { organization publisher new.sentence.checkb + organization output + publisher output + format.date "year" output.check + } + { address output.nonnull + format.date "year" output.check + new.sentence + organization output + publisher output + } + if$ + } + { format.incoll.inproc.crossref output.nonnull + format.pages output + } + if$ + format.isbn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {conference} { inproceedings } + +FUNCTION {manual} +{ output.bibitem + format.authors output + author format.key output + new.block + format.btitle "title" output.check + organization address new.block.checkb + organization output + address output + format.edition output + format.date output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {mastersthesis} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.title "title" output.check + new.block + "Master's thesis" format.thesis.type output.nonnull + school "school" output.check + address output + format.date "year" output.check + format.url output + new.block + note output + fin.entry +} + +FUNCTION {misc} +{ output.bibitem + format.authors output + author format.key output + title howpublished new.block.checkb + format.title output + howpublished new.block.checka + howpublished output + format.date output + format.issn output + format.url output + new.block + note output + fin.entry + empty.misc.check +} + +FUNCTION {phdthesis} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.btitle "title" output.check + new.block + "PhD thesis" format.thesis.type output.nonnull + school "school" output.check + address output + format.date "year" output.check + format.url output + new.block + note output + fin.entry +} + +FUNCTION {proceedings} +{ output.bibitem + format.editors output + editor format.key output + new.block + format.btitle "title" output.check + format.bvolume output + format.number.series output + address output + format.date "year" output.check + new.sentence + organization output + publisher output + format.isbn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {techreport} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.title "title" output.check + new.block + format.tr.number output.nonnull + institution "institution" output.check + address output + format.date "year" output.check + format.url output + new.block + note output + fin.entry +} + +FUNCTION {unpublished} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.title "title" output.check + new.block + note "note" output.check + format.date output + format.url output + fin.entry +} + +FUNCTION {default.type} { misc } + + +MACRO {jan} {"January"} + +MACRO {feb} {"February"} + +MACRO {mar} {"March"} + +MACRO {apr} {"April"} + +MACRO {may} {"May"} + +MACRO {jun} {"June"} + +MACRO {jul} {"July"} + +MACRO {aug} {"August"} + +MACRO {sep} {"September"} + +MACRO {oct} {"October"} + +MACRO {nov} {"November"} + +MACRO {dec} {"December"} + + + +MACRO {acmcs} {"ACM Computing Surveys"} + +MACRO {acta} {"Acta Informatica"} + +MACRO {cacm} {"Communications of the ACM"} + +MACRO {ibmjrd} {"IBM Journal of Research and Development"} + +MACRO {ibmsj} {"IBM Systems Journal"} + +MACRO {ieeese} {"IEEE Transactions on Software Engineering"} + +MACRO {ieeetc} {"IEEE Transactions on Computers"} + +MACRO {ieeetcad} + {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"} + +MACRO {ipl} {"Information Processing Letters"} + +MACRO {jacm} {"Journal of the ACM"} + +MACRO {jcss} {"Journal of Computer and System Sciences"} + +MACRO {scp} {"Science of Computer Programming"} + +MACRO {sicomp} {"SIAM Journal on Computing"} + +MACRO {tocs} {"ACM Transactions on Computer Systems"} + +MACRO {tods} {"ACM Transactions on Database Systems"} + +MACRO {tog} {"ACM Transactions on Graphics"} + +MACRO {toms} {"ACM Transactions on Mathematical Software"} + +MACRO {toois} {"ACM Transactions on Office Information Systems"} + +MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"} + +MACRO {tcs} {"Theoretical Computer Science"} + + +READ + +FUNCTION {sortify} +{ purify$ + "l" change.case$ +} + +INTEGERS { len } + +FUNCTION {chop.word} +{ 's := + 'len := + s #1 len substring$ = + { s len #1 + global.max$ substring$ } + 's + if$ +} + +FUNCTION {format.lab.names} +{ 's := + s #1 "{vv~}{ll}" format.name$ + s num.names$ duplicate$ + #2 > + { pop$ " et~al." * } + { #2 < + 'skip$ + { s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = + { " et~al." * } + { " \& " * s #2 "{vv~}{ll}" format.name$ * } + if$ + } + if$ + } + if$ +} + +FUNCTION {author.key.label} +{ author empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { author format.lab.names } + if$ +} + +FUNCTION {author.editor.key.label} +{ author empty$ + { editor empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { editor format.lab.names } + if$ + } + { author format.lab.names } + if$ +} + +FUNCTION {author.key.organization.label} +{ author empty$ + { key empty$ + { organization empty$ + { cite$ #1 #3 substring$ } + { "The " #4 organization chop.word #3 text.prefix$ } + if$ + } + 'key + if$ + } + { author format.lab.names } + if$ +} + +FUNCTION {editor.key.organization.label} +{ editor empty$ + { key empty$ + { organization empty$ + { cite$ #1 #3 substring$ } + { "The " #4 organization chop.word #3 text.prefix$ } + if$ + } + 'key + if$ + } + { editor format.lab.names } + if$ +} + +FUNCTION {calc.short.authors} +{ type$ "book" = + type$ "inbook" = + or + 'author.editor.key.label + { type$ "proceedings" = + 'editor.key.organization.label + { type$ "manual" = + 'author.key.organization.label + 'author.key.label + if$ + } + if$ + } + if$ + 'short.list := +} + +FUNCTION {calc.label} +{ calc.short.authors + short.list + "(" + * + year duplicate$ empty$ + short.list key field.or.null = or + { pop$ "" } + 'skip$ + if$ + * + 'label := +} + +FUNCTION {sort.format.names} +{ 's := + #1 'nameptr := + "" + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { + s nameptr "{vv{ } }{ll{ }}{ ff{ }}{ jj{ }}" format.name$ 't := + nameptr #1 > + { + " " * + namesleft #1 = t "others" = and + { "zzzzz" * } + { numnames #2 > nameptr #2 = and + { "zz" * year field.or.null * " " * } + 'skip$ + if$ + t sortify * + } + if$ + } + { t sortify * } + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {sort.format.title} +{ 't := + "A " #2 + "An " #3 + "The " #4 t chop.word + chop.word + chop.word + sortify + #1 global.max$ substring$ +} + +FUNCTION {author.sort} +{ author empty$ + { key empty$ + { "to sort, need author or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { author sort.format.names } + if$ +} + +FUNCTION {author.editor.sort} +{ author empty$ + { editor empty$ + { key empty$ + { "to sort, need author, editor, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { editor sort.format.names } + if$ + } + { author sort.format.names } + if$ +} + +FUNCTION {author.organization.sort} +{ author empty$ + { organization empty$ + { key empty$ + { "to sort, need author, organization, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { "The " #4 organization chop.word sortify } + if$ + } + { author sort.format.names } + if$ +} + +FUNCTION {editor.organization.sort} +{ editor empty$ + { organization empty$ + { key empty$ + { "to sort, need editor, organization, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { "The " #4 organization chop.word sortify } + if$ + } + { editor sort.format.names } + if$ +} + + +FUNCTION {presort} +{ calc.label + label sortify + " " + * + type$ "book" = + type$ "inbook" = + or + 'author.editor.sort + { type$ "proceedings" = + 'editor.organization.sort + { type$ "manual" = + 'author.organization.sort + 'author.sort + if$ + } + if$ + } + if$ + " " + * + year field.or.null sortify + * + " " + * + cite$ + * + #1 entry.max$ substring$ + 'sort.label := + sort.label * + #1 entry.max$ substring$ + 'sort.key$ := +} + +ITERATE {presort} + +SORT + +STRINGS { longest.label last.label next.extra } + +INTEGERS { longest.label.width last.extra.num number.label } + +FUNCTION {initialize.longest.label} +{ "" 'longest.label := + #0 int.to.chr$ 'last.label := + "" 'next.extra := + #0 'longest.label.width := + #0 'last.extra.num := + #0 'number.label := +} + +FUNCTION {forward.pass} +{ last.label label = + { last.extra.num #1 + 'last.extra.num := + last.extra.num int.to.chr$ 'extra.label := + } + { "a" chr.to.int$ 'last.extra.num := + "" 'extra.label := + label 'last.label := + } + if$ + number.label #1 + 'number.label := +} + +FUNCTION {reverse.pass} +{ next.extra "b" = + { "a" 'extra.label := } + 'skip$ + if$ + extra.label 'next.extra := + extra.label + duplicate$ empty$ + 'skip$ + { "{\natexlab{" swap$ * "}}" * } + if$ + 'extra.label := + label extra.label * 'label := +} + +EXECUTE {initialize.longest.label} + +ITERATE {forward.pass} + +REVERSE {reverse.pass} + +FUNCTION {bib.sort.order} +{ sort.label 'sort.key$ := +} + +ITERATE {bib.sort.order} + +SORT + +FUNCTION {begin.bib} +{ preamble$ empty$ + 'skip$ + { preamble$ write$ newline$ } + if$ + "\begin{thebibliography}{" number.label int.to.str$ * "}" * + write$ newline$ + "\providecommand{\natexlab}[1]{#1}" + write$ newline$ + "\providecommand{\url}[1]{\texttt{#1}}" + write$ newline$ + "\expandafter\ifx\csname urlstyle\endcsname\relax" + write$ newline$ + " \providecommand{\doi}[1]{doi: #1}\else" + write$ newline$ + " \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi" + write$ newline$ +} + +EXECUTE {begin.bib} + +EXECUTE {init.state.consts} + +ITERATE {call.type$} + +FUNCTION {end.bib} +{ newline$ + "\end{thebibliography}" write$ newline$ +} + +EXECUTE {end.bib} diff --git a/paper/colm_camera/colm2024_conference.pdf b/paper/colm_camera/colm2024_conference.pdf new file mode 100644 index 0000000..75d7e65 Binary files /dev/null and b/paper/colm_camera/colm2024_conference.pdf differ diff --git a/paper/colm_camera/colm2024_conference.sty b/paper/colm_camera/colm2024_conference.sty new file mode 100644 index 0000000..32461dc --- /dev/null +++ b/paper/colm_camera/colm2024_conference.sty @@ -0,0 +1,251 @@ +%%%% COLM Macros (LaTex) +%%%% Adapted by Hugo Larochelle from the NIPS stylefile Macros +%%%% Style File +%%%% Dec 12, 1990 Rev Aug 14, 1991; Sept, 1995; April, 1997; April, 1999; October 2014 + +% This file can be used with Latex2e whether running in main mode, or +% 2.09 compatibility mode. +% +% If using main mode, you need to include the commands +% \documentclass{article} +% \usepackage{colm14submit_e} +% + +% Palatino font +\RequirePackage{tgpagella} % text only +\RequirePackage{mathpazo} % math & text +\RequirePackage{inconsolata} % for tt font + +% Change the overall width of the page. If these parameters are +% changed, they will require corresponding changes in the +% maketitle section. +% +\usepackage{eso-pic} % used by \AddToShipoutPicture +\RequirePackage{fancyhdr} +\RequirePackage{natbib} + +% modification to natbib citations +\setcitestyle{authoryear,round,citesep={;},aysep={,},yysep={;}} + +\renewcommand{\topfraction}{0.95} % let figure take up nearly whole page +\renewcommand{\textfraction}{0.05} % let figure take up nearly whole page + +% Define colmfinal, set to true if colmfinalcopy is defined +\newif\ifcolmfinal +\colmfinalfalse +\def\colmfinalcopy{\colmfinaltrue} +\font\colmtenhv = phvb at 8pt + +% Specify the dimensions of each page + +\setlength{\paperheight}{11in} +\setlength{\paperwidth}{8.5in} + + +\oddsidemargin .5in % Note \oddsidemargin = \evensidemargin +\evensidemargin .5in +\marginparwidth 0.07 true in +%\marginparwidth 0.75 true in +%\topmargin 0 true pt % Nominal distance from top of page to top of +%\topmargin 0.125in +\topmargin -0.625in +\addtolength{\headsep}{0.25in} +\textheight 9.0 true in % Height of text (including footnotes & figures) +\textwidth 5.5 true in % Width of text line. +\widowpenalty=10000 +\clubpenalty=10000 + +% \thispagestyle{empty} \pagestyle{empty} +\flushbottom \sloppy + +% We're never going to need a table of contents, so just flush it to +% save space --- suggested by drstrip@sandia-2 +\def\addcontentsline#1#2#3{} + +% Title stuff, taken from deproc. +\def\maketitle{\par +\begingroup + \def\thefootnote{\fnsymbol{footnote}} + \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}} % for perfect author + % name centering +% The footnote-mark was overlapping the footnote-text, +% added the following to fix this problem (MK) + \long\def\@makefntext##1{\parindent 1em\noindent + \hbox to1.8em{\hss $\m@th ^{\@thefnmark}$}##1} + \@maketitle \@thanks +\endgroup +\setcounter{footnote}{0} +\let\maketitle\relax \let\@maketitle\relax +\gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax} + +% The toptitlebar has been raised to top-justify the first page + +\usepackage{fancyhdr} +\pagestyle{fancy} +\renewcommand{\headrulewidth}{1.5pt} +\fancyhead{} + +% Title (includes both anonimized and non-anonimized versions) +\def\@maketitle{\vbox{\hsize\textwidth +%\linewidth\hsize \vskip 0.1in \toptitlebar \centering +{\Large\bf \@title\par} +%\bottomtitlebar % \vskip 0.1in % minus +\ifcolmfinal + \lhead{Published as a conference paper at COLM 2024} + \def\And{\end{tabular}\hfil\linebreak[0]\hfil + \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}% + \def\AND{\end{tabular}\hfil\linebreak[4]\hfil + \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}% + \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\@author\end{tabular}% +\else + \lhead{Under review as a conference paper at COLM 2024} + \def\And{\end{tabular}\hfil\linebreak[0]\hfil + \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}% + \def\AND{\end{tabular}\hfil\linebreak[4]\hfil + \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}% + \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}Anonymous authors\\Paper under double-blind review\end{tabular}% +\fi +\vskip 0.3in minus 0.1in}} + +\renewenvironment{abstract}{\vskip.075in\centerline{\large\bf +Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex} + +% sections with less space +\def\section{\@startsection {section}{1}{\z@}{-2.0ex plus + -0.5ex minus -.2ex}{1.5ex plus 0.3ex +minus0.2ex}{\large\bf\raggedright}} + +\def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus +-0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\bf\raggedright}} +\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-1.5ex +plus -0.5ex minus -.2ex}{0.5ex plus +.2ex}{\normalsize\bf\raggedright}} +\def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus +0.5ex minus .2ex}{-1em}{\normalsize\bf}} +\def\subparagraph{\@startsection{subparagraph}{5}{\z@}{1.5ex plus + 0.5ex minus .2ex}{-1em}{\normalsize}} +\def\subsubsubsection{\vskip +5pt{\noindent\normalsize\rm\raggedright}} + + +% Footnotes +\footnotesep 6.65pt % +\skip\footins 9pt plus 4pt minus 2pt +\def\footnoterule{\kern-3pt \hrule width 12pc \kern 2.6pt } +\setcounter{footnote}{0} + +% Lists and paragraphs +\parindent 0pt +\topsep 4pt plus 1pt minus 2pt +\partopsep 1pt plus 0.5pt minus 0.5pt +\itemsep 2pt plus 1pt minus 0.5pt +\parsep 2pt plus 1pt minus 0.5pt +\parskip .5pc + + +%\leftmargin2em +\leftmargin3pc +\leftmargini\leftmargin \leftmarginii 2em +\leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em + +%\labelsep \labelsep 5pt + +\def\@listi{\leftmargin\leftmargini} +\def\@listii{\leftmargin\leftmarginii + \labelwidth\leftmarginii\advance\labelwidth-\labelsep + \topsep 2pt plus 1pt minus 0.5pt + \parsep 1pt plus 0.5pt minus 0.5pt + \itemsep \parsep} +\def\@listiii{\leftmargin\leftmarginiii + \labelwidth\leftmarginiii\advance\labelwidth-\labelsep + \topsep 1pt plus 0.5pt minus 0.5pt + \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt + \itemsep \topsep} +\def\@listiv{\leftmargin\leftmarginiv + \labelwidth\leftmarginiv\advance\labelwidth-\labelsep} +\def\@listv{\leftmargin\leftmarginv + \labelwidth\leftmarginv\advance\labelwidth-\labelsep} +\def\@listvi{\leftmargin\leftmarginvi + \labelwidth\leftmarginvi\advance\labelwidth-\labelsep} + +\abovedisplayskip 7pt plus2pt minus5pt% +\belowdisplayskip \abovedisplayskip +\abovedisplayshortskip 0pt plus3pt% +\belowdisplayshortskip 4pt plus3pt minus3pt% + +% Less leading in most fonts (due to the narrow columns) +% The choices were between 1-pt and 1.5-pt leading +%\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt} % got rid of @ (MK) +\def\normalsize{\@setsize\normalsize{11pt}\xpt\@xpt} +\def\small{\@setsize\small{10pt}\ixpt\@ixpt} +\def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt} +\def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt} +\def\tiny{\@setsize\tiny{7pt}\vipt\@vipt} +\def\large{\@setsize\large{14pt}\xiipt\@xiipt} +\def\Large{\@setsize\Large{16pt}\xivpt\@xivpt} +\def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt} +\def\huge{\@setsize\huge{23pt}\xxpt\@xxpt} +\def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt} + +\def\toptitlebar{\hrule height4pt\vskip .25in\vskip-\parskip} + +\def\bottomtitlebar{\vskip .29in\vskip-\parskip\hrule height1pt\vskip +.09in} % +%Reduced second vskip to compensate for adding the strut in \@author + + +%% % Vertical Ruler +%% % This code is, largely, from the CVPR 2010 conference style file +%% % ----- define vruler +%% \makeatletter +%% \newbox\colmrulerbox +%% \newcount\colmrulercount +%% \newdimen\colmruleroffset +%% \newdimen\cv@lineheight +%% \newdimen\cv@boxheight +%% \newbox\cv@tmpbox +%% \newcount\cv@refno +%% \newcount\cv@tot +%% % NUMBER with left flushed zeros \fillzeros[] +%% \newcount\cv@tmpc@ \newcount\cv@tmpc +%% \def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi +%% \cv@tmpc=1 % +%% \loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi +%% \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat +%% \ifnum#2<0\advance\cv@tmpc1\relax-\fi +%% \loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat +%% \cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}% +%% % \makevruler[][][][][] +%% \def\makevruler[#1][#2][#3][#4][#5]{\begingroup\offinterlineskip +%% \textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt% +%% \global\setbox\colmrulerbox=\vbox to \textheight{% +%% {\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight +%% \cv@lineheight=#1\global\colmrulercount=#2% +%% \cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2% +%% \cv@refno1\vskip-\cv@lineheight\vskip1ex% +%% \loop\setbox\cv@tmpbox=\hbox to0cm{{\colmtenhv\hfil\fillzeros[#4]\colmrulercount}}% +%% \ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break +%% \advance\cv@refno1\global\advance\colmrulercount#3\relax +%% \ifnum\cv@refno<\cv@tot\repeat}}\endgroup}% +%% \makeatother +%% % ----- end of vruler + +%% % \makevruler[][][][][] +%% \def\colmruler#1{\makevruler[12pt][#1][1][3][0.993\textheight]\usebox{\colmrulerbox}} +%% \AddToShipoutPicture{% +%% \ifcolmfinal\else +%% \colmruleroffset=\textheight +%% \advance\colmruleroffset by -3.7pt +%% \color[rgb]{.7,.7,.7} +%% \AtTextUpperLeft{% +%% \put(\LenToUnit{-35pt},\LenToUnit{-\colmruleroffset}){%left ruler +%% \colmruler{\colmrulercount}} +%% } +%% \fi +%% } +%%% To add a vertical bar on the side +%\AddToShipoutPicture{ +%\AtTextLowerLeft{ +%\hspace*{-1.8cm} +%\colorbox[rgb]{0.7,0.7,0.7}{\small \parbox[b][\textheight]{0.1cm}{}}} +%} diff --git a/paper/colm_camera/colm2024_conference.tex b/paper/colm_camera/colm2024_conference.tex new file mode 100644 index 0000000..2739a4c --- /dev/null +++ b/paper/colm_camera/colm2024_conference.tex @@ -0,0 +1,868 @@ + +\documentclass{article} % For LaTeX2e +\usepackage{colm2024_conference} + +\usepackage{microtype} +% \usepackage{hyperref} +\usepackage{url} +\usepackage{booktabs} + +\usepackage{graphicx} +\usepackage{array} +\usepackage{xspace} +\usepackage{pdflscape} +\usepackage{multirow} +\usepackage{multicol} +\usepackage{listings} +\usepackage{verbatim} +\usepackage{caption} +\usepackage{bbm} +\usepackage{fontawesome} +\usepackage{fancyvrb} + +% For theorems and such +\usepackage{amsmath} +\usepackage{amssymb} +\usepackage{amsthm} +\usepackage{mathtools} + +\definecolor{darkblue}{rgb}{0, 0, 0.5} +% \hypersetup{colorlinks=true, citecolor=darkblue, linkcolor=darkblue, urlcolor=darkblue} + + +\definecolor{xlinkcolor}{rgb}{0.7752941176470588, 0.22078431372549023, 0.2262745098039215} + +% For model colours and code +\definecolor{deepblue}{rgb}{0.29411765 0.45882353 0.61960784} +\definecolor{deepred}{rgb}{0.74509804 0.21176471 0.23921569} +\definecolor{deepgreen}{rgb}{0,0.5,0} +\definecolor{deeppurple}{rgb}{0.52941176 0.32941176 0.56470588} +\definecolor{codegray}{rgb}{0.5,0.5,0.5} +\definecolor{backcolour}{rgb}{0.95,0.95,0.92} + +\newcommand{\githubmaster}{\href{https://www.github.com/smsharma/PAPERCLIP-Hubble}{\faGithub}\xspace} + +\newcommand{\package}[1]{\textsl{#1}\xspace} +\newcommand{\hubble}{\emph{Hubble}\xspace} +\newcommand{\eqrefb}[1]{(\ref{#1})} + +\newcommand{\SM}[1]{\textcolor{blue}{[SM: #1]}} +\newcommand{\changes}[1]{\textcolor{red}{#1}} + +\usepackage[ +pdfnewwindow=true, % links in new window +colorlinks=true, % false: boxed links; true: colored links +linkcolor=xlinkcolor, % color of internal links +citecolor=xlinkcolor, % color of links to bibliography +filecolor=xlinkcolor, % color of file links +urlcolor=xlinkcolor, % color of external links +final=true, +]{hyperref} + +% Define a new fancy page style +% Insert correct preprint number +\def\preprintno{5690} +\fancypagestyle{firstpage}{ + \rhead{MIT-CTP/\preprintno} +} + +% Listings style +\lstdefinestyle{mystyle}{ + backgroundcolor=\color{backcolour}, + commentstyle=\color{deepgreen}, + keywordstyle=\color{deepred}, + numberstyle=\tiny\color{codegray}, + stringstyle=\color{deepgreen}, + basicstyle=\ttfamily\footnotesize\linespread{1.1}, + breakatwhitespace=false, + breaklines=true, captionpos=b, + keepspaces=true, numbers=left, + numbersep=8pt, showspaces=false, + showstringspaces=false, showtabs=false, + frame=single, + framerule=0.2pt, + rulecolor=\color{codegray}, + tabsize=2, + aboveskip=1.5ex, + belowskip=1.5ex, + xleftmargin=15pt, + xrightmargin=15pt, + extendedchars=true, + columns=flexible, + linewidth=\textwidth + } + +\lstset{style=mystyle} + +\newcommand{\datafolder}[1]{\def\thedatafolder{#1}} + +% Define Verbatim environment with custom style +\DefineVerbatimEnvironment{jsoncode}{Verbatim}{ + commandchars=\\\{\}, + rulecolor=\color{codegray}, + fillcolor=\color{codegray}, + labelposition=topline, + fontsize=\small, + baselinestretch=1.1, + formatcom=\color{deepgreen}, + xleftmargin=15pt, + xrightmargin=15pt, + tabsize=2 +} + + +\title{\textsc{PAPERCLIP}: Associating Astronomical Observations and Natural Language with Multi-Modal Models} + +% Authors must not appear in the submitted version. They should be hidden +% as long as the \colmfinalcopy macro remains commented out below. +% Non-anonymous submissions will be rejected without review. + +\author{Siddharth Mishra-Sharma, Yiding Song, \& Jesse Thaler \\ +MIT \& IAIFI \\ +\texttt{\{smsharma,ydsong,jthaler\}@mit.edu} +} + +%\author{Siddharth Mishra-Sharma \\ +%The NSF AI Institute for Artificial Intelligence and Fundamental Interactions \\ +%Center for Theoretical Physics, Massachusetts Institute of Technology, Cambridge, MA 02139, USA \\ +%Department of Physics, Harvard University, Cambridge, MA 02138, USA \\ +%\texttt{\href{mailto:smsharma@mit.edu}{smsharma@mit.edu}} +%\And +%Yiding Song \\ +%The NSF AI Institute for Artificial Intelligence and Fundamental Interactions \\ +%Department of Physics, Massachusetts Institute of Technology, Cambridge, MA 02139, USA \\ +%\texttt{\href{mailto:ydsong@mit.edu}{ydsong@mit.edu}} +%\And +%Jesse Thaler \\ +%The NSF AI Institute for Artificial Intelligence and Fundamental Interactions \\ +%Center for Theoretical Physics, Massachusetts Institute of Technology, Cambridge, MA 02139, USA \\ +%\texttt{\href{mailto:jthaler@mit.edu}{jthaler@mit.edu}} +%} + +% The \author macro works with any number of authors. There are two commands +% used to separate the names and addresses of multiple authors: \And and \AND. +% +% Using \And between authors leaves it to \LaTeX{} to determine where to break +% the lines. Using \AND forces a linebreak at that point. So, if \LaTeX{} +% puts 3 of 4 authors names on the first line, and the last on the second +% line, try using \AND instead of \And before the third author name. + +\newcommand{\fix}{\marginpar{FIX}} +\newcommand{\new}{\marginpar{NEW}} + +\colmfinalcopy % Uncomment for camera-ready version, but NOT for submission. +\begin{document} + + +\maketitle + +\begin{abstract} + We present PAPERCLIP (Proposal Abstracts Provide an Effective Representation for Contrastive Language-Image Pre-training), a method which associates astronomical observations imaged by telescopes with natural language using a neural network model. The model is fine-tuned from a pre-trained Contrastive Language–Image Pre-training (CLIP) model using successful observing proposal abstracts and corresponding downstream observations, with the abstracts optionally summarized via guided generation using large language models (LLMs). Using observations from the \hubble Space Telescope (HST) as an example, we show that the fine-tuned model embodies a meaningful joint representation between observations and natural language through quantitative evaluation as well as tests targeting image retrieval (i.e., finding the most relevant observations using natural language queries). + and description retrieval (i.e., querying for astrophysical object classes and use cases most relevant to a given observation). + Our study demonstrates the potential for using generalist foundation models rather than task-specific models for interacting with astronomical data by leveraging text as an interface. +\end{abstract} + +\section{Introduction} +\label{sec:intro} + +Machine learning (ML) is starting to have a significant impact in the sciences, with astrophysics being no exception. +% +ML methods have demonstrated promise at every stage of the research pipeline, from instrument design, to data acquisition, to its analysis \citep{huertas2022dawes}. +% +Most applications of ML within astrophysics have focused on augmenting traditional techniques in order to improve performance on specific tasks. +% +The {foundation model} paradigm, in contrast, seeks to develop generalist models which can be deployed to simultaneously tackle a wide range of tasks \citep{bommasani2021opportunities}. +% +These models are typically pre-trained on massive amounts of unlabeled data using self-supervised or weakly-supervised learning techniques, enabling them to learn powerful representations which can then be used downstream. +% +Foundation models can often benefit from additional training ({fine-tuning}) using a relatively small amount of domain-specific data in order to increase their usefulness when applied to specialized domains. + +There is considerable interest in developing custom foundation models for the sciences \citep[e.g., ][]{batatia2023foundation,subramanian2023towards,mccabe2023multiple,Birk:2024knn,vig2024finetuning,heinrich2024masked}, with astrophysics being ripe for such an effort given the large amounts of publicly-available data and diverse ways of interacting with it. +% +The multi-modality inherent to astrophysical observations, with different types of data (e.g., images, spectra, light curves, textual descriptions) often available for a given target object, presents a unique opportunity. +% +% This multi-modality was recently exploited in \textsc{AstroCLIP}~\citep{lanusse2023astroclip} to construct a joint physically-informative embedding space between multi-band images and optical spectra from the Dark Energy Spectroscopic Instrument (DESI). +% % + +In this paper, we describe \text{PAPERCLIP} (Proposal Abstracts Provide an Effective Representation for Contrastive Language-Image Pre-training), +% \footnote{Technically, we fine tune rather than pre train, but ``PAPERCLIFT'' was rejected by the senior author of this paper.}), +a method that connects, for the first time, astronomical image observations with natural language by leveraging the association between abstracts of successful observing proposals written by astronomers and images corresponding to downstream observations imaged by telescopes. +% +\changes{This approach demonstrates the potential of adapting generalist multi-modal foundation models to astronomy, complementing task-specific models in the domain by providing a flexible, language-based interface for interacting with observational data.} +% +Concretely, we showcase the method using observations imaged by the \hubble Space Telescope (HST). +% +We show that fine-tuning a pre-trained CLIP ~\citep[Contrastive Language-Image Pre-training; ][]{radford2021learning} image-text model on observation-abstract pairs results in meaningful joint representations through quantitative and qualitative evaluation tests. +% +Our method opens up the possibility of interacting with astronomical survey data using free-form natural language as an interface, which is a cornerstone of the success of the modern foundation model paradigm. A high-level overview of the method is shown in Fig.~\ref{fig:overview}. +% + +\begin{figure*}[!t] + \centering + \includegraphics[width=0.97\textwidth]{plots/figure.pdf} + \caption{Overview of the PAPERCLIP method. (Left) A pre-trained CLIP model is fine-tuned using a dataset of \hubble observations and corresponding proposal abstracts. The proposal abstracts are optionally summarized using guided large language model generation. (Right) The fine-tuned model can then be used for downstream tasks such as observation retrieval i.e., finding the observations most relevant to a given text query. The proposal abstract snippet shown here corresponds to proposal ID \href{https://archive.stsci.edu/proposal_search.php?id=16914&mission=hst}{16914}.} + \label{fig:overview} + \end{figure*} + + % \section{Related Work} + % \label{sec:related} + + \paragraph*{Related Work} + + The concept of learning task-agnostic representations via self-supervised and contrastive learning has been applied within astrophysics \citep{slijepcevic2024radio,stein2021self,hayat2021self,slijepcevic2022learning} and used for downstream tasks like object similarity search \citep{stein2021self}, gravitational lens finding \citep{stein2022mining}, estimation of Galactic distances \citep{hayat2021estimating}, identification of rare galaxies \citep{walmsley2023rare}, and data compression \citep{akhmetzhanova2024data}. For a recent review of contrastive learning in astrophysics, see \citet{huertas2023brief}. + % + Beyond applications to a single modality, \textsc{AstroCLIP}~\citep{lanusse2023astroclip} recently used contrastive learning to learn a joint representation between galaxy images and associated spectra, showing that the learned representation embodies relevant physical properties and can be effectively used for downstream tasks like redshift and mass estimation. + % + \citet{bowles2023radio,bowles2022new} introduced a method to associate radio galaxy images with a natural language description of their morphology by using human-generated descriptions, with the goal of deriving semantic morphology classes and using them for classification. + % % + In contrast with previous work, our application is the first to associate astronomical observation with the text modality in a task-agnostic manner, showcasing the potential of language models in specialized scientific domains like astronomy. + % + + % Associating diverse modalities via contrastive learning has been employed in many other scientific domains~\citep[e.g.,][]{liu2023text,Sanchez-Fernandez2022.11.17.516915,lanusse2023astroclip,cepeda2023geoclip}, and has been shown to be effective in learning semantically meaningful joint representations. + + % In this paper, we present for the first time an application associating target-agnostic astronomical data with the text modality, showing that this can be effectively accomplished through contrastive learning by leveraging observing proposal abstracts to inform text captions. + % % + + The rest of this paper is organized as follows. +% +% We review related work in Sec.~\ref{sec:related}. +% % +In Sec.~\ref{sec:dataset}, we describe the \hubble dataset used in this work, including the curation and processing of observations as well as text captions. +% +In Sec.~\ref{sec:methodology}, we describe the methodology used to train and evaluate the model. +% +In Sec.~\ref{sec:results}, we present quantitative and qualitative results of our experiments on retrieval tasks. +% +We discuss future prospects and conclude in Sec.~\ref{sec:conclusion}. + + \section{Dataset Construction} + \label{sec:dataset} + + We curate a dataset of \hubble Space Telescope (HST) image observations and corresponding text descriptions from publicly available sources. + % + We rely on proposal abstracts from the Proposal Abstracts Catalog\footnote{\url{https://archive.stsci.edu/hst/proposal_abstracts.html}} -- a catalog of successful HST proposals -- to generate captions for the observations, optionally summarizing them via guided generation using LLMs (described in Sec.~\ref{sec:summarization} below). + % + The HST has been operational since its launch on April 24, 1990, and we use available proposals and observations up to the Cycle 30 science program, which commenced data-taking in 2022. + % + + Table \ref{tab:dataset} shows examples of images and their corresponding (clipped) proposal abstracts. + % + It can be seen that the images in this dataset exhibit specific characteristics as well as artifacts particular to HST data-taking and processing which distinguishes them from the distribution of natural images typically used for large-scale pre-training of foundation models. + % + This further motivates fine-tuning on domain-specific data. + + \datafolder{./plots/data/} + + \begin{table}[h!] + \centering + \begin{tabular}{m{0.20\textwidth} p{1.9cm} p{1.9cm} m{5.5cm}} + \toprule + \centering \bfseries \hubble image & \centering \bfseries Obs. cycle \\ (Year) & \centering \bfseries Prop. ID & \centering \bfseries Proposal abstract (clipped) \tabularnewline + \midrule + \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_2.pdf} & \centering \input{\thedatafolder/cycle_2.txt} \\ (1999) & \centering \input{\thedatafolder/id_2.txt} & {\scriptsize \input{\thedatafolder/abs1_2.txt}} \tabularnewline + \midrule + % \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_1.pdf} & \centering \input{\thedatafolder/cycle_1.txt} \\ (2013) & \centering \input{\thedatafolder/id_1.txt} & {\scriptsize \input{\thedatafolder/abs1_1.txt}} \tabularnewline + % \midrule + \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_3.pdf} & \centering \input{\thedatafolder/cycle_3.txt} \\ (2016) & \centering \input{\thedatafolder/id_3.txt} & {\scriptsize \input{\thedatafolder/abs1_3.txt}} \tabularnewline + % \midrule + % \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_0.pdf} & \centering \input{\thedatafolder/cycle_0.txt} \\ (2019) & \centering \input{\thedatafolder/id_0.txt} & {\scriptsize \input{\thedatafolder/abs1_0.txt}} \tabularnewline + \bottomrule + \end{tabular} + \caption{Examples of \hubble images (left-most column) and corresponding clipped proposal abstracts (right-most column). The observation cycle and corresponding year, as well as proposal ID, are shown in the second and third columns, respectively. The proposal IDs link to the Mikulski Archive for Space Telescopes (MAST) page corresponding to the proposal.} + \label{tab:dataset} + \end{table} + + \begin{table}[h!] + \renewcommand{\arraystretch}{2} + \centering + \begin{tabular}{m{1.8cm} m{3.6cm} m{7.2cm}} + \toprule + \bfseries Prop. ID & \multicolumn{2}{c}{\bfseries LLM-extracted summary} \tabularnewline + \cmidrule(r){2-3} + & \centering\arraybackslash \bfseries \small{Objects and phenomena} & \centering\arraybackslash \bfseries \small{Science use cases} \tabularnewline + \midrule + \input{\thedatafolder/id1_2.txt} & {\scriptsize \input{\thedatafolder/obj1_2.txt}} & {\scriptsize \input{\thedatafolder/sci1_2.txt}} \tabularnewline + \midrule + % \input{\thedatafolder/id1_1.txt} & {\scriptsize \input{\thedatafolder/obj1_1.txt}} & {\scriptsize \input{\thedatafolder/sci1_1.txt}} \tabularnewline + % \midrule + \input{\thedatafolder/id1_3.txt} & {\scriptsize \input{\thedatafolder/obj1_3.txt}} & {\scriptsize \input{\thedatafolder/sci1_3.txt}} \tabularnewline + % \midrule + % \input{\thedatafolder/id1_0.txt} & {\scriptsize \input{\thedatafolder/obj1_0.txt}} & {\scriptsize \input{\thedatafolder/sci1_0.txt}} \tabularnewline + \bottomrule + \end{tabular} + \caption{For the \hubble proposal abstracts shown in Tab.~\ref{tab:dataset}, the LLM (\textsc{Mixtral-8x7B})-extracted summaries showing objects and phenomena (middle column) as well as potential downstream science use cases (last column) separately. The proposal IDs (left column) contain hyperlinks to the MAST page corresponding to the proposal.} + \label{tab:datasetsumm} + \end{table} + + \subsection{\hubble Data Selection and Pre-processing} + + Observations corresponding to individual proposal IDs are queried through the Mikulski Archive for Space Telescopes (MAST)\footnote{\url{https://mast.stsci.edu/}} via the \package{Astroquery} \citep{2019AJ....157...98G} API. + % + Products of type \texttt{PREVIEW} are filtered in, corresponding to preview postcard images. + % + We note that these are not science-grade observations, but rather lower-resolution images useful for diagnostic or preview purposes. + % + A maximum of 20 images are downloaded per proposal ID, selected at random, in order to avoid biasing the model towards proposals with a larger number of observations and survey-style campaigns. + % + Images are centered and resized to a resolution-per-side of 512 pixels. + % + Color previews (i.e., observations taken with multiple wavelength filters assigned to individual RGB channels) are manually excluded via a filename filter in order to maintain consistency across the samples. + % models trained on datasets with color images included were observed to show worse performance on evaluation metrics. + % + If no appropriate images corresponding to an abstract are found, it is excluded from the dataset. + + In total, 31,859 images corresponding to 4,438 abstracts are included in the fine-tuning dataset. + % + 3,194 images are held out for validation, with no abstract being common between training and validation sets in order to ensure an independent set of image-text pairs for evaluation. The held out images correspond to 429 unique abstracts. % Double check numbers in final version. + % + \changes{Due to practical limitations associated with the small size of the fine-tuning dataset, we did not use different datasets for validation and testing, deeming the current approach sufficient for a proof-of-principle exposition.} + + We note that some fraction of the image-caption pairs in the constructed dataset will primarily concern instrumentation and/or calibration rather than scientific content. + % + We choose to not filter out these pairs, in order to have a larger sample of HST observations that the model can leverage to adapt to the distinctive characteristics of \hubble images. + + \subsection{Abstract Summarization via Guided Generation} + \label{sec:summarization} + + Raw proposal abstracts summarize the corresponding successful HST observing proposals, which intend to make the case for allocating \hubble telescope time towards a particular set of observations. + % + These abstracts are written in a diversity of styles, formats, and lengths while also being highly variable in their content. + % + Although the abstracts can be used as-is as image captions, we experiment with summarizing them via guided large language model (LLM) generation to standardize the captions used for fine-tuning the CLIP model. + % + Captions are summarized by extracting a list of objects and phenomena, as well as potential downstream science use cases, corresponding to the eventual imaged observation. + The intended goal of the summarization process is to increase the strength of the association signal between text and images. + + The method from \cite{willard2023efficient} is used to produce an LLM-generated summary of the abstract conforming to a particular schema, specified in JSON format. + % + The schema is designed to represent a list of the objects (e.g., `Type Ia supernova') and phenomena (e.g., `gravitational lensing'), as well as potential downstream science uses cases (e.g., `set constraints on supernova explosion models') that could correspond to the eventual imaged observation given the abstract text, with a minimum of 1 and a maximum of 5 elements per list. + % + The procedure guides the generation of LLM outputs while ensuring that the schema is respected at every step in the generation process by masking out tokens that would violate the intended format. + % + By framing the problem in terms of transitions between a set of finite states (i.e., a finite-state machine), \cite{willard2023efficient} showed that guided generation can be performed with negligible overhead compared to unconstrained generation. + % + See App.~\ref{app:guided-generation} for a more detailed description of the guidance generation method used here, including an overview of technical details. + % + While the schema-guided generation ensures the \emph{format} of the output, the prompt and choice of LLM will dictate the \emph{content} of the generated summaries. + % + We use the open-weights, instruction-tuned model \textsc{Mixtral-8x7B-Instruct}~\citep{jiang2024mixtral} to generate the summaries, with guided generation performed using the \package{Outlines}\footnote{\url{https://github.com/outlines-dev/outlines}} package. + % + Further details on the summarization procedure, including the prompts and schema used, are provided in App.~\ref{app:summarization}. + + % The guided generation process ensures that, in this case, the output of the generated output of the LLM strictly conforms to the format of the following example:\\ + % \begin{center} + % \begin{jsoncode} + % \centering + % \color{black}\{ + % \color{deepgreen}'objects_and_phenomena'\color{black}: [\color{deepgreen}'star forming galaxy', 'lensed galaxy'\color{black}, ...], + % \color{deepgreen}'science_use_cases'\color{black}: [\color{deepgreen}'measure lensing magnification'\color{black}, ...] + % \color{black}\} + % \end{jsoncode} + % \end{center} + % which is then used to construct the summarized caption by combining the two key elements. + % + Examples of LLM-generated abstract summaries are shown in Tab.~\ref{tab:datasetsumm}, for the same set of abstracts as shown in Tab.~\ref{tab:dataset}. + % + We train separate models using the raw abstracts and the LLM-generated summaries, and compare their performance on downstream tasks in Sec.~\ref{sec:results}. + % + We note that, even after summarization, the association signal is expected to be noisy, since parts of the summarized caption may not be directly descriptive of the observed images. The goal of the fine-tuning process is to leverage the signal contained in this noisy association. + + \section{Methodology} + \label{sec:methodology} + + % Our goal is to learn a semantically meaningful joint representation between images corresponding to HST observation and natural (English) language. + % % + % With PAPERCLIP, we leverage the strong generalization capabilities demonstrated by pre-trained CLIP models and adapt these to work with domain-specific \hubble data via fine-tuning. + + \subsection{Contrastive Language-Image Pre-training} + + Contrastive Language-Image Pre-training \citep[CLIP;][]{radford2021learning} is a multi-modal neural network model pre-trained on a large corpus of image-text pairs via weak supervision using a contrastive loss. + % + Given a minibatch $\mathcal{B}$ of $|\mathcal{B}|$ image-text pairs $\{(I_i, T_i)\}$, the goal is to align the learned representations of corresponding (positive) pairs $(I_i, T_i)$ while repelling the representations of unaligned (negative) pairs $(I_i, T_{j\neq i})$. + % + Image and text encoders $f: I \rightarrow \mathbb R^{n_\text{emb}}$ and $g: T \rightarrow \mathbb R^{n_\text{emb}}$ are used to map images and text to a common embedding space of dimension $n_\text{emb}$. + % + We use the standard bidirectional variant of the InfoNCE~\citep{oord2018representation} contrastive loss function introduced for training CLIP-style architectures \citep{radford2021learning}, + % % + % \begin{equation} + % \label{eq:softmax_loss} + % \mathcal{L}(\mathcal{B})=-\frac{1}{2|\mathcal{B}|} \sum_{i=1}^{|\mathcal{B}|}\left(\log \frac{e^{x_i \cdot y_i / \tau}}{\sum_{j=1}^{|\mathcal{B}|} e^{x_i \cdot y_j / \tau}}+\log \frac{e^{x_i \cdot y_i / \tau}}{\sum_{j=1}^{|\mathcal{B}|} e^{x_j \cdot y_i / \tau}}\right) + % \end{equation} + $\mathcal{L}(\mathcal{B})=-\frac{1}{2|\mathcal{B}|} \sum_{i=1}^{|\mathcal{B}|}\left(\log \frac{e^{x_i \cdot y_i / \tau}}{\sum_{j=1}^{|\mathcal{B}|} e^{x_i \cdot y_j / \tau}}+\log \frac{e^{x_i \cdot y_i / \tau}}{\sum_{j=1}^{|\mathcal{B}|} e^{x_j \cdot y_i / \tau}}\right)$, + % + where ${x}_i={f\left(I_i\right)}/{\left\|f\left(I_i\right)\right\|}$ and ${y}_i={g\left(T_i\right)}/{\left\|g\left(T_i\right)\right\|}$ are the normalized representations of the $i$-th image and text caption, respectively, and $\tau$ is a learnable temperature hyperparameter. + % + Note that this loss treats the image and text representations symmetrically, ensuring that the two modalities are considered on the same footing. + + % We use the \texttt{CLIP-ViT-B/16} \citep{radford2021learning} variant as the base pre-trained CLIP model. + % % + % This model uses a 12-layer, 12-head, 768-embedding dimension vision transformer with patch size $16\times16$ as the image encoder \citep{dosovitskiy2020image} and a 12-layer, 8-head, 512-embedding dimension text sequence transformer as the text backbone \citep{vaswani2017attention}. + % % + % The text encoder has a maximum length of 77 tokens and the image encoder has a native resolution of $224\times224$ pixels. + % % + % Linear projection layers map the outputs of the image and text encoders to a common embedding space of dimension $n_\text{emb}=512$. + % % + % In total, the model contains $\sim 149$ million trainable parameters. + % % + % This model was originally pre-trained on $\sim 400$ million image-text pairs from internet data. + % % + + \subsection{Fine-tuning Procedure} + + The base CLIP model is fine-tuned using the dataset described in Sec.~\ref{sec:dataset}, using either the LLM-summarized abstracts or raw proposal abstracts paired with observations. + % + When using raw proposal abstracts, random chunks of the text delimited by periods are selected on the fly to fit within the maximum token length of the text encoder. + % + Images are augmented via random four-fold rotations (increments of $90^\circ$) and randomly cropped to the native resolution of the image encoder, maintaining $\sim 20\%$ of the area of the original image, at each training step. + % + Given the relatively modest size of the fine-tuning dataset, a batch size $|\mathcal B| = 32$ is used throughout; larger batch sizes were observed to be susceptible to overfitting. + % + The temperature hyperparameter $\tau$ was initialized to its pre-trained value. + % + We emphasize that the positive and negative image-text association is noisy and imperfect, since multiple images can be associated with the same abstract, and the goal of the fine-tuning process is to leverage the signal contained in this noisy association. + + We use the \texttt{CLIP-ViT-B/16} \citep{radford2021learning} variant as the base pre-trained CLIP model. + % + We explore three different methods of training the model on our domain dataset: \emph{(1)} Fine-tuning the entire network starting from the pre-trained base model; \emph{(2)} Freezing the base image/text encoders and training a small projection head; and \emph{(3)} Training the entire model from scratch. + % + For \emph{(2)}, we use a 2-layer MLP with 1024 hidden units and a GELU activation layer, projecting onto the 512-dimensional common embedding space. Additional details on the CLIP model and fine-tuning procedure are provided in App.~\ref{app:model_details}. + + \subsection{Evaluation Metrics} + \label{sec:eval} + + The model is evaluated by tracking the contrastive loss as well as the top-$k\%$ retrieval accuracy on the held out validation set over the course of training. + % + The retrieval accuracy is defined as the fraction of associated captions (either raw or LLM-summarized abstracts) which fall within the top $k\%$ of captions by cosine similarity of the normalized image and caption embeddings, averaged over the images in the validation set: + $\frac{1}{|\mathcal V|} \sum_{i=1}^{|\mathcal V|} \mathbbm{1}\left[\operatorname{rank}\left({x}_i \cdot {y}_{i}; \{{x}_i \cdot {y}_{j}\}_{j=1}^{|\mathcal V|}\right) \leq \left\lfloor\frac{k}{100}|\mathcal V|\right\rfloor\right]$ + % \begin{equation} + % \text{Retrieval accuracy}_k = \frac{1}{|\mathcal V|} \sum_{i=1}^{|\mathcal V|} \mathbbm{1}\left[\operatorname{rank}\left({x}_i \cdot {y}_{i}; \{{x}_i \cdot {y}_{j}\}_{j=1}^{|\mathcal V|}\right) \leq \left\lfloor\frac{k}{100}|\mathcal V|\right\rfloor\right] + % \label{eq:retrieval_accuracy} + % \end{equation} + where $|\mathcal V|$ is the total number of images in the validation set, $\mathbbm{1}[\cdot]$ is the indicator function that returns 1 if the condition inside the brackets is true and 0 otherwise, $\operatorname{rank}\left({x}_i \cdot {y}_{i}; \{{x}_i \cdot {y}_{j}\}_{j=1}^{|\mathcal V|}\right)$ is a function that returns the rank of the cosine similarity between ${x}_i$ and ${y}_{i}$ among the cosine similarities between ${x}_i$ and all captions ${y}_j$ in the validation set, and $k$ is the percentage of top captions considered for the retrieval accuracy. Note that this metric is symmetric in the image and text modalities. + + We also qualitatively evaluate the learned embeddings through image retrieval (i.e., retrieving the most relevant images from the validation set using natural language queries). % and description retrieval (i.e., querying the astrophysical object classes and science use cases most relevant to a given observation, akin to zero-shot classification) experiments. + % + % For the description/text retrieval evaluation, we define a list of possible text associations (i.e., classes), which we show in App.~\ref{app:categories}, by querying the \textsc{Claude 2}\footnote{\url{https://claude.ai/}} large language followed by manual curation. + + \section{Results and Discussion} + \label{sec:results} + + \subsection{Quantitative Evaluation} + + \paragraph*{Validation metrics during training} + + Figure~\ref{fig:retrieval_acc} shows the contrastive loss (left) and the top-10\% retrieval accuracy (right) evaluated on the held out validation set over the course of training, for different training configurations considered. + % + The dashed orange lines show the metrics evaluated when training with batches where the image-text associations are randomly shuffled. + % + This randomized baseline is seen to do on par with random expectation (i.e., a 10\% retrieval accuracy), unlike the others, validating the presence of a significant association signal between images and text in the dataset. + % + Interestingly, the base pre-trained model performs better than random expectation, with a top-10\% retrieval accuracy of $\sim 15\%$ (as see from the left-most datum in Fig.~\ref{fig:retrieval_acc} right, for the curves corresponding to fine-tuned models). + % + We therefore also compare the qualitative performance of the base model with the fine-tuned models on downstream retrieval tasks. + + The model trained using LLM-summarized abstracts (red lines) is seen to perform slightly worse than the model using raw abstracts as captions (blue lines), despite the curation of the summarized-abstract dataset intended to provide a stronger image-text association signal. + % + Fine-tuning a small MLP head over frozen vision and text backbones (dotted green lines) and training from scratch with summarized abstracts as captions (yellow lines) show a non-trivial improvement compared to the base model, although with deteriorated performance compared to fine-tuning with either summarized or raw abstracts. + + \begin{figure*}[!h] + \centering + \includegraphics[width=0.85\textwidth]{plots/val_metrics.pdf} + \caption{The CLIP contrastive loss (left) and the top-10\% retrieval accuracy (right) computed on the validation set over the course of training. Shown for the dataset with summarized abstracts as captions (red), dataset using raw proposal abstracts as captions (blue), only fine-tuning a small MLP head (dotted green), training from scratch with summarized abstracts as captions (yellow), and trained with shuffled image-text pairs (dashed orange).} + \label{fig:retrieval_acc} + \end{figure*} + + \paragraph*{Distribution of text-image cosine similarities} + + Figure~\ref{fig:sim_valtrain} (left) shows the distribution of cosine similarities between corresponding image and text embeddings, $x_i$ and $y_i$, for the base CLIP model (purple line), and for the LLM-summarized abstracts using the fine-tuned CLIP model (red line). + % + Distributions evaluated for a shuffled order of text embeddings -- therefore randomizing the image-text correspondence during evaluation -- are shown as dashed lines. We note that the shuffling here is performed at the evaluation stage, and not the training stage. + % + The distributions for the base model is seen to be sharply peaked at a specific value, showing little diversity and being very similar between the shuffled (dashed purple) and non-shuffled (solid purple) versions. + % + Distributions for the fine-tuned model, on the other hand, show a clear separation when evaluated on shuffled (dashed red) and corresponding (solid red) text-image pairs. + + \paragraph*{Retrieval accuracy} + + Figure~\ref{fig:sim_valtrain} (right) shows the retrieval accuracy as a function of the retrieval fraction $k\%$. + % + In this case, we evaluate all four models (fine-tuned on raw abstracts (blue), fine-tuned on LLM-summarized abstracts (red), trained on LLM-summarized abstracts from scratch (yellow), and the base model (purple)) on the same captions dataset -- the summarized abstracts -- for a direct comparison. + % + Remarkably, the model trained on raw abstracts shows very similar performance when evaluated on the summarized abstracts compared to that trained on the summarized abstracts themselves, indicating that \emph{(1)} the image-text association signal is preserved in the summarization process, and \emph{(2)} the model is able to effectively leverage meaningful concepts in the noisy raw abstracts through weak supervision. The significantly worse performance of the model trained from scratch, compared to the fine-tuned models, highlights the crucial role of the inductive bias inherited from the base pre-trained model, which effectively captures rich associations between images and language. + + \begin{figure*}[!h] + \centering + \includegraphics[width=0.42\textwidth]{plots/sim_val.pdf} + \includegraphics[width=0.42\textwidth]{plots/retrieval.pdf} + \caption{(Left) Distribution of cosine similarities between corresponding image and text embeddings, $x_i$ and $y_i$, shown when using the base CLIP model (purple lines), and the summary fine-tuned CLIP model (red line). Dashed lines correspond to models evaluated on image-text pairs with associations shuffled. (Right) Retrieval accuracy as a function of the retrieval fraction $k$ for the fine-tuned model on the summarized abstracts (red), fine-tuned on raw abstracts (blue), trained on summarized abstracts from scratch (yellow), and the base model (purple).} + \label{fig:sim_valtrain} + \end{figure*} + + We show retrieval accuracy performance for additional variations on the model and training configuration in App.~\ref{app:ablations}. + + \subsection{Image Retrieval} + + Having aligned the image and text representations, we can embed a natural language query using the model and show the closest images by embedding from the validation set when ranked by cosine similarity. A sketch of this procedure is shown in Fig.~\ref{fig:overview} (right). + % + We show these in Tabs.~\ref{tab:tti_base} and \ref{tab:tti} for the base and fine-tuned models respectively using two simple curated queries: \texttt{Jupiter} and \texttt{SN1987A} (a specific supernova). The proposal ID corresponding to the retrieved images is shown below each image, and contains a hyperlink to the MAST page corresponding to the proposal for further details. + + While the base model shows some signs of meaningful retrieval (e.g., the image of Jupiter in the first row of Tab.~\ref{tab:tti_base}), + % it is challenging to discern meaningful, strong associations between the retrieved images and corresponding query. + \changes{associations between the retrieved images and corresponding queries are not consistent.} + + The model fine-tuned with summarized abstracts, meanwhile, shows strikingly different behavior (Tab.~\ref{tab:tti}). + % + % The \texttt{dwarf galaxy}-queried images correspond to proposals aiming to measure the kinematics of the stellar cores of dwarf galaxies. + % % + Images looking like Jupiter are returned for the \texttt{Jupiter} query. + % + However, this example also illustrates the model's potential to misidentify objects, with the first and third image actually showing Saturn with artifacts on the planet and partially obscured rings. + % + Supernova SN1987 itself can be seen in the three closest images for the \texttt{SN1987A} query. % with the fourth image being a supernova remnant. + % % + % Cluster-scale as well as galaxy-scale gravitational lenses are returned by the \texttt{strong lensing} query, with lensing patterns visible in the images. + + \changes{We also evaluate the observation retrieval task more quantitatively. We design a prompt which lets us evaluate whether the abstract corresponding to a retrieved observation is relevant or not, with the output constrained to be a boolean using \package{Outlines}. We then evaluated this prompt, for the base as well as fine-tuned models, on the top 10 closest images by cosine similarity returned for 10 different queries. 38\% and 77\% of the retrieved observations are deemed relevant when using the base and fine-tuned models, respectively. The fine-tuned model is thus significantly more likely to return images relevant to the query. The prompt and curated queries for this test are described in App.~\ref{app:quanteval_prompt}.} + + Note that we chose to illustrate qualitative performance on image retrieval using the model fine-tuned on summarized abstracts, rather than raw abstracts. + % We show analogous results for the model fine-tuned on raw abstracts in App.~\ref{app:eval_raw}. + Although the two models show very similar quantitative performance on retrieval metrics (as shown in Fig.~\ref{fig:sim_valtrain}), they exhibit characteristically different behaviors in terms of images retrieved, with the summary fine-tuned models generally retrieving images that look more visually ``relevant'' to a domain expert. We emphasize that for scientific usefulness, the goal is not necessarily to correctly retrieve the most relevant objects, but rather to identify a diverse set of interesting candidates for manual follow-up and further analysis. % ; both models are seen to perform sensibly, even if differently, in this regard. + % + \changes{By diverse, we mean that retrieved observations may contain different types of objects or phenomena, which may be relevant to the query in distinct ways.} + + The fine-tuned model can similarly be used for description/text retrieval, akin to the traditional zero-shot classification setting, where the closest text snippets from a curated list are returned given an observed astronomical image. We show examples of the text retrieval task in App.~\ref{app:text_retrieval}. + + \begin{table}[h!] + \centering + \begin{tabular}{m{2.7cm} p{2.9cm} p{2.9cm} p{2.9cm}} + \toprule + \centering \bfseries Query & \multicolumn{3}{c}{\bfseries{Top-3 most similar images using \textcolor{deeppurple}{base off-the-shelf CLIP model}}} \tabularnewline + % \midrule + % \texttt{\input{\thedatafolder/query_tti_base_1.txt}} \vspace{20mm} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_base_1_0.pdf} \\ \input{\thedatafolder/propid_tti_base_1_0.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_base_1_1.pdf} \\ \input{\thedatafolder/propid_tti_base_1_1.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_base_1_2.pdf} \\ \input{\thedatafolder/propid_tti_base_1_2.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_base_1_3.pdf} \\ \input{\thedatafolder/propid_tti_base_1_3.txt} \tabularnewline + \midrule + \texttt{\input{\thedatafolder/query_tti_base_0.txt}} \vspace{20mm} & \centering \includegraphics[width=0.16\textwidth]{\thedatafolder/img_tti_base_0_0.pdf} \\ \input{\thedatafolder/propid_tti_base_0_0.txt} & \centering \includegraphics[width=0.16\textwidth]{\thedatafolder/img_tti_base_0_1.pdf} \\ \input{\thedatafolder/propid_tti_base_0_1.txt} & \centering \includegraphics[width=0.16\textwidth]{\thedatafolder/img_tti_base_0_2.pdf} \\ \input{\thedatafolder/propid_tti_base_0_2.txt} \tabularnewline + \midrule + \texttt{\input{\thedatafolder/query_tti_base_2.txt}} \vspace{20mm} & \centering \includegraphics[width=0.16\textwidth]{\thedatafolder/img_tti_base_2_0.pdf} \\ \input{\thedatafolder/propid_tti_base_2_0.txt} & \centering \includegraphics[width=0.16\textwidth]{\thedatafolder/img_tti_base_2_1.pdf} \\ \input{\thedatafolder/propid_tti_base_2_1.txt} & \centering \includegraphics[width=0.16\textwidth]{\thedatafolder/img_tti_base_2_2.pdf} \\ \input{\thedatafolder/propid_tti_base_2_2.txt} \tabularnewline + % \midrule + % \texttt{\input{\thedatafolder/query_tti_base_3.txt}} \vspace{20mm} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_base_3_0.pdf} \\ \input{\thedatafolder/propid_tti_base_3_0.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_base_3_1.pdf} \\ \input{\thedatafolder/propid_tti_base_3_1.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_base_3_2.pdf} \\ \input{\thedatafolder/propid_tti_base_3_2.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_base_3_3.pdf} \\ \input{\thedatafolder/propid_tti_base_3_3.txt} \tabularnewline + \bottomrule + \end{tabular} + \caption{For two text queries (left-most column), the three most similar images from the validation dataset by cosine similarity when using the \textbf{\textcolor{deeppurple}{base (off-the-shelf) CLIP model}} (\texttt{CLIP-ViT-B/16}). The proposal ID associated with each image is given below the image and contains a hyperlink to the MAST page corresponding to the proposal.} + \label{tab:tti_base} + \end{table} + + \begin{table}[h!] + \centering + \begin{tabular}{m{2.7cm} p{2.9cm} p{2.9cm} p{2.9cm}} + \toprule + \centering \bfseries Query & \multicolumn{3}{c}{\bfseries{Top-3 most similar images using \textcolor{deepred}{summary fine-tuned CLIP model}}} \tabularnewline + \midrule + % \texttt{\input{\thedatafolder/query_tti_1.txt}} \vspace{20mm} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_1_0.pdf} \\ \input{\thedatafolder/propid_tti_1_0.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_1_1.pdf} \\ \input{\thedatafolder/propid_tti_1_1.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_1_2.pdf} \\ \input{\thedatafolder/propid_tti_1_2.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_1_3.pdf} \\ \input{\thedatafolder/propid_tti_1_3.txt} \tabularnewline + % \midrule + \texttt{\input{\thedatafolder/query_tti_0.txt}} \vspace{20mm} & \centering \includegraphics[width=0.16\textwidth]{\thedatafolder/img_tti_0_0.pdf} \\ \input{\thedatafolder/propid_tti_0_0.txt} & \centering \includegraphics[width=0.16\textwidth]{\thedatafolder/img_tti_0_1.pdf} \\ \input{\thedatafolder/propid_tti_0_1.txt} & \centering \includegraphics[width=0.16\textwidth]{\thedatafolder/img_tti_0_2.pdf} \\ \input{\thedatafolder/propid_tti_0_2.txt} \tabularnewline + \midrule + \texttt{\input{\thedatafolder/query_tti_2.txt}} \vspace{20mm} & \centering \includegraphics[width=0.16\textwidth]{\thedatafolder/img_tti_2_0.pdf} \\ \input{\thedatafolder/propid_tti_2_0.txt} & \centering \includegraphics[width=0.16\textwidth]{\thedatafolder/img_tti_2_1.pdf} \\ \input{\thedatafolder/propid_tti_2_1.txt} & \centering \includegraphics[width=0.16\textwidth]{\thedatafolder/img_tti_2_2.pdf} \\ \input{\thedatafolder/propid_tti_2_2.txt} \tabularnewline + % \midrule + % \texttt{\input{\thedatafolder/query_tti_3.txt}} \vspace{20mm} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_3_0.pdf} \\ \input{\thedatafolder/propid_tti_3_0.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_3_1.pdf} \\ \input{\thedatafolder/propid_tti_3_1.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_3_2.pdf} \\ \input{\thedatafolder/propid_tti_3_2.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_3_3.pdf} \\ \input{\thedatafolder/propid_tti_3_3.txt} \tabularnewline + \bottomrule + \end{tabular} + \caption{Same as Tab.~\ref{tab:tti_base}, but using the \textbf{\textcolor{deepred}{summary fine-tuned CLIP model}}.} + \label{tab:tti} + \end{table} + + + + \section{Outlook and Conclusions} + \label{sec:conclusion} + + We present \textsc{PAPERCLIP}, a method for training domain-specific multi-modal models for astrophysics that associates observations imaged by telescopes with natural language in a common embedding space. + % + We showcase an application to \hubble Space Telescope (HST) observations, where the model is fine-tuned from a pre-trained CLIP model using abstracts of successful \hubble proposals, optionally summarized, leveraging a noisy association signal between text and images. + % + We show that \textsc{PAPERCLIP} significantly outperforms the base CLIP model in quantitative metrics, such as retrieval accuracy, as well as quality of text-to-image and image-to-text retrieval. + % + We also introduce a novel LLM summarization process which leverages guided generation to distill the content of proposal abstracts while preserving salient information. + % + Overall, the procedure demonstrates the efficacy of fine-tuning generalist pre-trained models on small amounts of domain-specific data, in particular astronomical datasets, and leveraging text as an interface for interacting with the data. + + Although the model explored here is fine-tuned using postage stamp images (i.e., preview-quality and not science-grade data), we highlight potential immediate as well as downstream use cases. + % + A model trained using weakly-supervised image-text pairs can be used to query large amounts of unlabeled survey data e.g., PHANGS~\citep{lee2022phangs}, COSMOS~\citep{scoville2007cosmic} for objects or use-cases of interest using natural language, as well as to efficiently find patterns in such data that may not be apparent using specialized models or manual inspection. + % + The learned representations, having shown to correlate with physical characteristics of imaged objects, can also be fine-tuned via transfer learning to adapt to either specific tasks e.g., classification~\citep{wei2020deep} or segmentation~\citep{hausen2020morpheus}, or observations imaged by other telescopes. + % + + Finally, while the CLIP model is restricted to retrieving nearest-neighbour associations within and across text/image modalities, the learned embeddings can be used as a starting point for training or fine-tuning multi-modal large-language models for interacting with survey data and receiving responses in natural language form, as well as grounding the responses based on an existing set of observations. + + +% \subsubsection*{Author Contributions} +% If you'd like to, you may include a section for author contributions as is done +% in many journals. This is optional and at the discretion of the authors. + +\subsubsection*{Acknowledgments} +% Use unnumbered third level headings for the acknowledgments. All +% acknowledgments, including those to funding agencies, go at the end of the paper. + +We thank Michael Brenner, Fran\c{c}ois Lanusse, and Julian Mu\~{n}oz for helpful conversations. +% +This work is supported by the National Science Foundation under Cooperative Agreement PHY-2019786 (The NSF AI Institute for Artificial Intelligence and Fundamental Interactions, \url{http://iaifi.org/}). +% +This material is based upon work supported by the U.S. Department of Energy, Office of Science, Office of High Energy Physics of U.S. Department of Energy under grant Contract Number DE-SC0012567. +% +YS was supported by the Research Science Institute (RSI) program at MIT. +% +This research was supported by an award from Google, ``Interpretation of Multimodal Images from Astronomy''. +% +This research was supported by the Munich Institute for Astro-, Particle and BioPhysics (MIAPbP), which is funded by the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) under Germany's Excellence Strategy – EXC-2094 – 390783311. +% +The computations in this paper were run on the FASRC Cannon cluster supported by the FAS Division of Science Research Computing Group at Harvard University. + +This research is based on observations made with the NASA/ESA Hubble Space Telescope obtained from the Space Telescope Science Institute, which is operated by the Association of Universities for Research in Astronomy, Inc., under NASA contract NAS 5-26555. +% +Based on observations made with the NASA/ESA Hubble Space Telescope, and obtained from the Hubble Legacy Archive, which is a collaboration between the Space Telescope Science Institute (STScI/NASA), the Space Telescope European Coordinating Facility (ST-ECF/ESAC/ESA) and the Canadian Astronomy Data Centre (CADC/NRC/CSA). + +This work relied on the \package{Astroquery} \citep{2019AJ....157...98G}, \package{BitsAndBytes} \citep{dettmers2022llmint8}, \package{Flax} \citep{flax2020github}, \package{Jax} \citep{jax2018github}, \package{Jupyter} \citep{Kluyver2016jupyter}, \package{Matplotlib} \citep{Hunter:2007}, \package{Numpy} \citep{harris2020array}, \package{Optax} \citep{deepmind2020jax}, \package{Outlines}, \package{Pandas} \citep{2020SciPy-NMeth}, \package{Pydantic}, \package{PyTorch} \citep{paszke2019pytorch}, \package{SciPy} \citep{2020SciPy-NMeth}, \package{Transformers} \citep{wolf2019huggingface}, and \package{Wandb} \citep{wandb} software packages. + + \subsubsection*{Reproducibility Statement} + + Code used to reproduce the results in this work is available at \url{https://github.com/smsharma/PAPERCLIP-Hubble/tree/main}. + +\subsubsection*{Ethics Statement} +This work relies on using abstracts from successful \hubble Space Telescope observing proposals as part of a dataset for training and evaluating machine learning models. While these abstracts are publicly available, the authors likely did not anticipate their text being used in this manner, raising questions around consent, attribution, and appropriate use of data. Since this research intends to develop methods to aid astronomical research and does not use sensitive personal information or target commercial gain, we believe that the scientific benefits outweigh the potential concerns in this case, while acknowledging good-faith arguments to the contrary. As the use of foundation models in the sciences increases, it will be important for the community to consider norms and guidelines around the appropriate use and attribution of various data sources for model training and evaluation, including qualitative textual data, to ensure transparency and maintain trust. + + +\bibliography{hubble_paperclip} +\bibliographystyle{colm2024_conference} + +\appendix + +\section{Details on the Abstract Summarization Procedure} + +We provide additional details of the abstract summarization procedure, including a brief overview of the guided generation method used, as well as the prompts and schema used for the summarization task. + +\subsection{Guided LLM Generation with \package{Outlines}} +\label{app:guided-generation} + +As mention in Sec.~\ref{sec:summarization}, we employ the guided generation method introduced by \citet{willard2023efficient} and implemented in \package{Outlines} to ensure that the LLM summarization of the raw proposal abstracts adheres to specific pattern, specified in JSON format (Sec.~\ref{app:summarization} below), which we briefly describe here. This approach represents the desired output format as a finite-state machine (FSM) that encodes the JSON schema as a regular expression. The JSON schema constraint is therefore first converted into a regular expression. + +The key idea then is to pre-compute an index that maps each state of the FSM to the subset of tokens from the LLM's vocabulary that can be generated from that state while still allowing for a valid completion of the pattern. By doing so, we can efficiently determine the valid next tokens at each step of the generation process without having to check the entire vocabulary. + +Formally, let $\mathcal{M} = (Q, \Sigma, \delta, q_0, F)$ be the FSM representing the regular expression, where $Q$ is the set of states, $\Sigma$ is the alphabet of the regular expression, $\delta: Q \times \Sigma \rightarrow Q$ is the transition function between states, $q_0$ is the start state, and $F\subseteq Q$ is the set of accept states which terminate the generation. An index $\sigma: Q \rightarrow \mathcal{P}(V)$ is first constructed, where $V$ is the LLM's token vocabulary and $\mathcal{P}(V)$ denotes the power set of $V$. For each state $q \in Q$, $\sigma(q)$ contains the allowed tokens that can be generated from state $q$ while maintaining the possibility of reaching an accept state. The construction of $\sigma$ involves finding all token sequences that, when processed by the FSM starting from each state $q$, lead to an accept state. + +During the sequential generation process, the current FSM state $q_t$ is kept track of after sampling each token $v_t$. At each step $t$, the LLM's output logits are masked based on the valid next tokens $\sigma(q_t)$, setting the logits of invalid tokens to $-\infty$. The next token is then sampled from the categorical distribution defined by the unmasked logits, and the FSM transitions to the next state $q_{t+1} = \delta(q_t, v_{t+1})$, where $v_{t+1} \in \Sigma$ is the token in the regular expression alphabet corresponding to the sampled token. This process continues until an accept state with no outgoing transitions is reached, indicating a valid completion of the pattern. + +\subsection{Prompts and Schema Used for Summarization} +\label{app:summarization} + +We list here the prompts and schema (i.e., desired output formats) used for guided text generation via \package{Outlines} package interfacing with the \textsc{Mixtral-8x7B-Instruct} open-weights large language model. + +The following schema, specified using the data-validation package \package{Pydantic}, is used to guide the generation of the summaries, intended to produce between one and five objects and hypotheses, as well as science use cases, given a raw proposal abstract. Both fields are of type \texttt{conlist}, a \package{Pydantic} type that represents a constrained list. \\ + +\begin{lstlisting}[language=Python] +from pydantic import BaseModel, conlist + +class ConstrainedResponseHST(BaseModel): + objects_and_phenomena: conlist(str, min_length=1, max_length=5) + science_use_cases: conlist(str, min_length=1, max_length=5) +\end{lstlisting} + +The following prompt function is used to produce a list of one to five possible objects and phenomena shown in HST observations downstream of a proposal abstract, as well as one to five possible science use cases, in the format native to \package{Outlines}. \textcolor{deepgreen}{\lstinline{"[INST]"}} and \textcolor{deepgreen}{\lstinline{"[/INST]"}} are start and end instruction delimiters, respectively, for the \textsc{Mixtral-8x7B} model.\\ + +\begin{lstlisting}[language=Python] +import outlines + +@outlines.prompt +def prompt_fn(abstract): + """[INST] You are an expert astrophysicist, with broad expertise across observational and theoretical astrophysics. You are able to extract core information from astrophysical texts. + +Abstract: "{{abstract}}" + +Based on the above observational proposal abstract, your task is to summarize the nature of the eventual observations. You will identify the astrophysical objects and phenomena, as well as the potential science use cases described in the abstract. + +Follow these instructions exactly: +- Mention up to 5 items for both categories; do not mention more than 5 items in either category. +- Choose the most relevant ones if there are more than 5 items in a category. +- Never mention the Hubble Space Telescope, HST, or the HST archive. +- Mention the class (e.g., barred spiral galaxy) and not just the specific instance (e.g., Andromeda). +- Name the objects in the science use cases, if appropriate. +- Write out full names of objects in addition to acronyms. +- Do not list irrelevant objects which do not describe the eventual observation, such as units or proposal Cycle numbers. List fewer but more relevant objects, if in doubt. +- Each science case listed must be self-contained but succinct. +- Only write in English. +- Do not list items that are too generic (e.g., galaxy, faint object, kinematics) +- The total length of text should not exceed 80 words. +- Present your lists in a comma-separated format; no dashed or numbered lists. + +Example output: {'objects_and_phenomena':'spiral galaxies, galaxy clusters, supernova remnants', 'science_use_cases':'model galactic structure and evolution, characterize dark matter distribution in clusters, analyze expansion rates of supernova remnants'} + +Answer in JSON format. The JSON should be a dictionary with keys "objects_and_phenomena" and "science_use_cases". + +[/INST] +""" +\end{lstlisting} + +\subsection{Prompt Used for Quantitative Evaluation of Observation Retrieval} +\label{app:quanteval_prompt} + +\changes{The following prompt was used to evaluate the relevance of abstracts corresponding to retrieved images to a query, when quantitatively assessing the observation retrieval task.} + +\begin{lstlisting}[language=Python] +import outlines + +@outlines.prompt +def prompt_fn(abstract, query): + """[INST] +You are an expert astrophysicist, with broad expertise across observational and theoretical astrophysics. + +Abstract: "{{abstract}}" +Query: "{{query}}" + +The above is an abstract for a proposed observation taken by the Hubble Space Telescope (labeled "Abstract"), and an object or concept (labeled "Query"). + +Could the observations corresponding to the abstract contain the query? Be precise, and do not contain related concepts or objects. + +Your response should be either True or False. Only return True if the query is closely related to the abstract, and the downstream observation could be relevant to the query. +[/INST] +""" +\end{lstlisting} + +\changes{The queries used in the evaluation were \texttt{["globular cluster", "dwarf galaxy", "SN1987A", "strong lensing", "galaxy clusters", "interstellar medium", "dark matter", "spiral galaxies", "lyman alpha", "comets"].}} + +% \section{Evaluation of Model Trained on Raw Abstracts} +% \label{app:eval_raw} + +% In the main text, we illustrated qualitative evaluation (image and text retrieval) for the model fine-tuned on summarized abstracts. Here, we show the same for the model fine-tuned on raw proposal abstracts. + +% Table~\ref{tab:tti_abs} shows the Top-3 most similar images for the abstract fine-tuned CLIP model on the same curated queries as in Tab.~\ref{tab:tti} for the summary fine-tuned model. Table~\ref{tab:itt_abs} shows text associations from the curated list most closely matching the image queries, for the base and abstract fine-tuned models, as well as the summary fine-tuned model, for comparison. Although qualitatively different behavior is observed for both tasks, the objects retrieved are seen to, in most cases, meaningfully correspond to the given image/text queries. + +% \begin{table}[h!] +% \centering +% \begin{tabular}{m{2.7cm} p{2.9cm} p{2.9cm} p{2.9cm}} +% \toprule +% \centering \bfseries Query & \multicolumn{3}{c}{\bfseries{Top-3 most similar images using \textcolor{deepblue}{abstract fine-tuned CLIP model}}} \tabularnewline +% \midrule +% % \texttt{\input{\thedatafolder/query_tti_abs_1.txt}} \vspace{20mm} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_1_0.pdf} \\ \input{\thedatafolder/propid_tti_abs_1_0.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_1_1.pdf} \\ \input{\thedatafolder/propid_tti_abs_1_1.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_1_2.pdf} \\ \input{\thedatafolder/propid_tti_abs_1_2.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_1_3.pdf} \\ \input{\thedatafolder/propid_tti_abs_1_3.txt} \tabularnewline +% % \midrule +% \texttt{\input{\thedatafolder/query_tti_abs_0.txt}} \vspace{20mm} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_0_0.pdf} \\ \input{\thedatafolder/propid_tti_abs_0_0.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_0_1.pdf} \\ \input{\thedatafolder/propid_tti_abs_0_1.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_0_2.pdf} \\ \input{\thedatafolder/propid_tti_abs_0_2.txt} \tabularnewline +% \midrule +% \texttt{\input{\thedatafolder/query_tti_abs_2.txt}} \vspace{20mm} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_2_0.pdf} \\ \input{\thedatafolder/propid_tti_abs_2_0.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_2_1.pdf} \\ \input{\thedatafolder/propid_tti_abs_2_1.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_2_2.pdf} \\ \input{\thedatafolder/propid_tti_abs_2_2.txt} \tabularnewline +% % \midrule +% % \texttt{\input{\thedatafolder/query_tti_abs_3.txt}} \vspace{20mm} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_3_0.pdf} \\ \input{\thedatafolder/propid_tti_abs_3_0.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_3_1.pdf} \\ \input{\thedatafolder/propid_tti_abs_3_1.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_3_2.pdf} \\ \input{\thedatafolder/propid_tti_abs_3_2.txt} & \centering \includegraphics[width=0.18\textwidth]{\thedatafolder/img_tti_abs_3_3.pdf} \\ \input{\thedatafolder/propid_tti_abs_3_3.txt} \tabularnewline +% \bottomrule +% \end{tabular} +% \caption{Same as Tabs.~\ref{tab:tti_base} and \ref{tab:tti}, but using the \textbf{\textcolor{deepblue}{abstract fine-tuned CLIP model}}.} +% \label{tab:tti_abs} +% \end{table} + +% \begin{table}[t!] +% \centering +% \renewcommand{\arraystretch}{0.1} +% \begin{tabular}{m{3cm} m{3.9cm} m{3.9cm} m{3.9cm}} +% \toprule +% \centering \bfseries \hubble image & \centering \textbf{Top-3 text} \\ \textbf{\textcolor{deeppurple}{(base off-the-shelf)}} & \centering \textbf{Top-3 text} \\ \textbf{\textcolor{deepblue}{(abstract fine-tuned)}} & \centering \textbf{Top-3 text} \\ \textbf{\textcolor{deepred}{(summary fine-tuned)}} \tabularnewline +% \midrule +% \centering \includegraphics[width=0.15\textwidth]{\thedatafolder/img_itt_abs_0.pdf} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_base_0.txt} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_abs_0.txt} & {\scriptsize \verbatiminput{\thedatafolder/sci_itt_0.txt}} \tabularnewline +% \midrule +% \centering \includegraphics[width=0.15\textwidth]{\thedatafolder/img_itt_abs_1.pdf} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_base_1.txt} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_abs_1.txt} & {\scriptsize \verbatiminput{\thedatafolder/sci_itt_1.txt}} \tabularnewline +% \midrule +% \centering \includegraphics[width=0.15\textwidth]{\thedatafolder/img_itt_abs_2.pdf} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_base_2.txt} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_abs_2.txt} & {\scriptsize \verbatiminput{\thedatafolder/sci_itt_2.txt}} \tabularnewline +% \midrule +% \centering \includegraphics[width=0.15\textwidth]{\thedatafolder/img_itt_abs_3.pdf} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_base_3.txt} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_abs_3.txt} & {\scriptsize \verbatiminput{\thedatafolder/sci_itt_3.txt}} \tabularnewline +% \bottomrule +% \end{tabular} +% \caption{Text associations from a curated list most closely matching four image queries (first column, the same as in Tab.~\ref{tab:itt}), for the \textcolor{deeppurple}{base off-the-shelf} (\texttt{CLIP-ViT-B/16}), \textcolor{deepblue}{abstract fine-tuned}, and \textcolor{deepred}{summary fine-tuned} models.} +% \label{tab:itt_abs} +% \end{table} + +\section{Additional Model and Training Details} +\label{app:model_details} + +We use the \texttt{CLIP-ViT-B/16} \citep{radford2021learning} variant as the base pre-trained CLIP model. +% +This model uses a 12-layer, 12-head, 768-embedding dimension vision transformer with patch size $16\times16$ as the image encoder \citep{dosovitskiy2020image} and a 12-layer, 8-head, 512-embedding dimension text sequence transformer as the text backbone \citep{vaswani2017attention}. +% +The text encoder has a maximum length of 77 tokens and the image encoder has a native resolution of $224\times224$ pixels. +% +Linear projection layers map the outputs of the image and text encoders to a common embedding space of dimension $n_\text{emb}=512$. +% +In total, the model contains $\sim 149$ million trainable parameters. +% +This model was originally pre-trained on $\sim 400$ million image-text pairs from internet data. +% + +All models were trained over 20,000 steps with 2000 linear warmup steps +using the AdamW optimizer \citep{DBLP:conf/iclr/LoshchilovH19,DBLP:journals/corr/KingmaB14} with %peak +learning rate $10^{-5}$ and weight decay $10^{-3}$. +% +Training takes approximately 3 hours on 4 Nvidia A100 GPUs. +Models were instantiated using the \package{Transformers} \citep{wolf2019huggingface} library and trained using packages from the \package{Jax} \citep{jax2018github} ecosystem. +% + +\section{Additional Variations on Model and Training} +\label{app:ablations} + +Figure~\ref{fig:sim_app} shows the retrieval accuracy as a function of the retrieval fraction for further variations of the model or training, evaluated and trained on summarized abstracts. The red line corresponds to the model trained on summarized abstract described in the main text (fine-tuned on \texttt{CLIP-ViT-B/16} with constant learning rate $\mathrm{LR}=10^{-5}$ after linear warmup). The purple line corresponds to the base \texttt{CLIP-ViT-B/16} model. + +Curves for the model fine-tuned on the larger base CLIP model \texttt{CLIP-ViT-L/14} (dotted red), with a smaller learning rate $\mathrm{LR}=10^{-6}$ (dashed green), and with a cosine learning rate schedule (green) are also shown. All these models are seen to perform similarly, with the exception of the model trained with smaller learning rate showing degraded performance. Given the similar performance between \texttt{CLIP-ViT-L/14} ($\sim 428$ million parameters) and \texttt{CLIP-ViT-B/16} ($\sim 149$ million parameters), we chose the latter as the base model in the main text for computational efficiency. + + +\begin{figure*}[!h] + \centering + \includegraphics[width=0.62\textwidth]{plots/retrieval_app.pdf} + \caption{Same as Fig.~\ref{fig:sim_valtrain} (right) -- retrieval accuracy as a function of the retrieval fraction -- for further variations on the model or training. The red and purple lines correspond to the model trained on summarized abstract, described in the main text, and the base \texttt{CLIP-ViT-B/16} model, respectively. Curves for the model fine-tuned on the larger base CLIP model \texttt{CLIP-ViT-L/14} (dotted red), with a smaller learning rate $\mathrm{LR}=10^{-6}$ (dashed green), and with a cosine learning rate schedule (green) are also shown.} + \label{fig:sim_app} + \end{figure*} + + \section{Text Retrieval Task} + \label{app:text_retrieval} + + We can use images from the validation set as queries and retrieve the most relevant text chunks (e.g., objects and use cases) from a curated list. + % + + The following curated categories are used in the text retrieval experiment in Sec.~\ref{sec:results}. +% +These are derived by initially prompting \textsc{Claude 2}\footnote{\url{https://claude.ai/}}, having attached a subsample of 30 proposal abstracts in the online interface to be used as context, to produce a list of categories corresponding to typical HST observations. The list is then manually curated to remove similar entries and ensure a representative sample of categories. \\ \\ + +\begin{lstlisting}[language=Python] +["star forming galaxies", "lyman alpha", "dust", "crowded stellar field", "core-collapse supernova", "cosmology", "gravitational lensing", "supernovae", "diffuse galaxies", "globular clusters", "stellar populations", "interstellar medium", "black holes", "dark matter", "galaxy clusters", "galaxy evolution", "galaxy formation", "quasars", "circumstellar disks", "exoplanets", "Kuiper Belt objects", "solar system objects", "cosmic web structure", "distant galaxies", "galaxy mergers", "galaxy interactions", "star formation", "stellar winds", "brown dwarfs", "white dwarfs", "nebulae", "star clusters", "galaxy archeology", "galactic structure", "active galactic nuclei", "gamma-ray bursts", "stellar nurseries", "intergalactic medium", "dark energy", "dwarf galaxies", "barred spiral galaxies", "irregular galaxies", "starburst galaxies", "low surface brightness galaxies", "ultra diffuse galaxies", "circumgalactic medium", "intracluster medium", "cosmic dust", "interstellar chemistry", "star formation histories", "initial mass function", "stellar proper motions", "binary star systems", "open clusters", "pre-main sequence stars", "protostars", "protoplanetary disks", "jets and outflows", "interstellar shocks", "planetary nebulae", "supernova remnants", "red giants", "Cepheid variables", "RR Lyrae variables", "stellar abundances", "stellar dynamics", "compact stellar remnants", "Einstein rings", "trans-Neptunian objects", "cosmic microwave background", "reionization epoch", "first stars", "first galaxies", "high-redshift quasars", "primordial black holes", "resolved binaries", "binary stars"] +\end{lstlisting} + +The following prompt is used to generate the initial list before manual curation: \emph{``Here is a list of Hubble proposals. Base on this, please provide a list of about 100 strings, each describing a science target or use case for observations imaged by the Hubble Space Telescope. You may use these proposals and also rely on your general knowledge. For example, ["gravitational lensing", "supernovae", "diffuse galaxies", ...]''} + + We show the result of image-to-text retrieval in Tab.~\ref{tab:itt}, for the base (second column) as well as summary fine-tuned (third column) models, using four observations (left-most column) from the validation set. + % + + The top four text associations are shown for each image query. + % + The `ground truth' summarized abstract is shown in the right column. + % + The base as well as fine-tuned models are seen to return a mix of relevant and less-relevant associations, although showing different qualitative behavior. Purely qualitatively, the fine-tuned model is seen to consistently return more relevant associations compared to the base model. + % + + The second row (an image of supernova 1987A) highlights an interesting pattern -- the base model erroneously attributes the object at the center of the image to a gravitational lens, while the fine-tuned model correctly identifies it as a supernova remnant. This kind of reasonable misattribution is common when querying the base model, and largely absent in the fine-tuned model. + + \begin{table}[h!] + \centering + \renewcommand{\arraystretch}{0.1} + \begin{tabular}{m{3cm} m{3.0cm} m{3.0cm} m{3.2cm}} + \toprule + \centering \bfseries \hubble image & \centering \textbf{Top-4 text} \\ \small\textbf{\textcolor{deeppurple}{(base off-the-shelf)}} & \centering \textbf{Top43 text} \\ \small\textbf{\textcolor{deepred}{(summary fine-tuned)}} & \centering \textbf{Summarized abstract} \\ (objects; `ground truth') \tabularnewline + \midrule + \centering \includegraphics[width=0.15\textwidth]{\thedatafolder/img_itt_0.pdf} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_base_0.txt} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_0.txt} & {\scriptsize \input{\thedatafolder/abs_itt_0.txt}} \tabularnewline + \midrule + \centering \includegraphics[width=0.15\textwidth]{\thedatafolder/img_itt_1.pdf} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_base_1.txt} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_1.txt} & {\scriptsize \input{\thedatafolder/abs_itt_1.txt}} \tabularnewline + \midrule + \centering \includegraphics[width=0.15\textwidth]{\thedatafolder/img_itt_2.pdf} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_base_2.txt} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_2.txt} & {\scriptsize \input{\thedatafolder/abs_itt_2.txt}} \tabularnewline + \midrule + \centering \includegraphics[width=0.15\textwidth]{\thedatafolder/img_itt_3.pdf} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_base_3.txt} & \centering \scriptsize \verbatiminput{\thedatafolder/sci_itt_3.txt} & {\scriptsize \input{\thedatafolder/abs_itt_3.txt}} \tabularnewline + \bottomrule + \end{tabular} + \caption{Text snippets from a curated list most closely matching a given image query (left-most column) by cosine similarity of respective embeddings, shown for the \textcolor{deeppurple}{base off-the-shelf} (\texttt{CLIP-ViT-B/16}) and \textcolor{deepred}{summary fine-tuned} models. The `ground truth' LLM-summarized abstract (only objects/phenomena) is shown in the right-most column.} + \label{tab:itt} + \end{table} + +% \section{List of Categories for Text Retrieval Task} +% \label{app:categories} + + +\end{document} diff --git a/paper/colm_camera/fancyhdr.sty b/paper/colm_camera/fancyhdr.sty new file mode 100644 index 0000000..77ed4e3 --- /dev/null +++ b/paper/colm_camera/fancyhdr.sty @@ -0,0 +1,485 @@ +% fancyhdr.sty version 3.2 +% Fancy headers and footers for LaTeX. +% Piet van Oostrum, +% Dept of Computer and Information Sciences, University of Utrecht, +% Padualaan 14, P.O. Box 80.089, 3508 TB Utrecht, The Netherlands +% Telephone: +31 30 2532180. Email: piet@cs.uu.nl +% ======================================================================== +% LICENCE: +% This file may be distributed under the terms of the LaTeX Project Public +% License, as described in lppl.txt in the base LaTeX distribution. +% Either version 1 or, at your option, any later version. +% ======================================================================== +% MODIFICATION HISTORY: +% Sep 16, 1994 +% version 1.4: Correction for use with \reversemargin +% Sep 29, 1994: +% version 1.5: Added the \iftopfloat, \ifbotfloat and \iffloatpage commands +% Oct 4, 1994: +% version 1.6: Reset single spacing in headers/footers for use with +% setspace.sty or doublespace.sty +% Oct 4, 1994: +% version 1.7: changed \let\@mkboth\markboth to +% \def\@mkboth{\protect\markboth} to make it more robust +% Dec 5, 1994: +% version 1.8: corrections for amsbook/amsart: define \@chapapp and (more +% importantly) use the \chapter/sectionmark definitions from ps@headings if +% they exist (which should be true for all standard classes). +% May 31, 1995: +% version 1.9: The proposed \renewcommand{\headrulewidth}{\iffloatpage... +% construction in the doc did not work properly with the fancyplain style. +% June 1, 1995: +% version 1.91: The definition of \@mkboth wasn't restored on subsequent +% \pagestyle{fancy}'s. +% June 1, 1995: +% version 1.92: The sequence \pagestyle{fancyplain} \pagestyle{plain} +% \pagestyle{fancy} would erroneously select the plain version. +% June 1, 1995: +% version 1.93: \fancypagestyle command added. +% Dec 11, 1995: +% version 1.94: suggested by Conrad Hughes +% CJCH, Dec 11, 1995: added \footruleskip to allow control over footrule +% position (old hardcoded value of .3\normalbaselineskip is far too high +% when used with very small footer fonts). +% Jan 31, 1996: +% version 1.95: call \@normalsize in the reset code if that is defined, +% otherwise \normalsize. +% this is to solve a problem with ucthesis.cls, as this doesn't +% define \@currsize. Unfortunately for latex209 calling \normalsize doesn't +% work as this is optimized to do very little, so there \@normalsize should +% be called. Hopefully this code works for all versions of LaTeX known to +% mankind. +% April 25, 1996: +% version 1.96: initialize \headwidth to a magic (negative) value to catch +% most common cases that people change it before calling \pagestyle{fancy}. +% Note it can't be initialized when reading in this file, because +% \textwidth could be changed afterwards. This is quite probable. +% We also switch to \MakeUppercase rather than \uppercase and introduce a +% \nouppercase command for use in headers. and footers. +% May 3, 1996: +% version 1.97: Two changes: +% 1. Undo the change in version 1.8 (using the pagestyle{headings} defaults +% for the chapter and section marks. The current version of amsbook and +% amsart classes don't seem to need them anymore. Moreover the standard +% latex classes don't use \markboth if twoside isn't selected, and this is +% confusing as \leftmark doesn't work as expected. +% 2. include a call to \ps@empty in ps@@fancy. This is to solve a problem +% in the amsbook and amsart classes, that make global changes to \topskip, +% which are reset in \ps@empty. Hopefully this doesn't break other things. +% May 7, 1996: +% version 1.98: +% Added % after the line \def\nouppercase +% May 7, 1996: +% version 1.99: This is the alpha version of fancyhdr 2.0 +% Introduced the new commands \fancyhead, \fancyfoot, and \fancyhf. +% Changed \headrulewidth, \footrulewidth, \footruleskip to +% macros rather than length parameters, In this way they can be +% conditionalized and they don't consume length registers. There is no need +% to have them as length registers unless you want to do calculations with +% them, which is unlikely. Note that this may make some uses of them +% incompatible (i.e. if you have a file that uses \setlength or \xxxx=) +% May 10, 1996: +% version 1.99a: +% Added a few more % signs +% May 10, 1996: +% version 1.99b: +% Changed the syntax of \f@nfor to be resistent to catcode changes of := +% Removed the [1] from the defs of \lhead etc. because the parameter is +% consumed by the \@[xy]lhead etc. macros. +% June 24, 1997: +% version 1.99c: +% corrected \nouppercase to also include the protected form of \MakeUppercase +% \global added to manipulation of \headwidth. +% \iffootnote command added. +% Some comments added about \@fancyhead and \@fancyfoot. +% Aug 24, 1998 +% version 1.99d +% Changed the default \ps@empty to \ps@@empty in order to allow +% \fancypagestyle{empty} redefinition. +% Oct 11, 2000 +% version 2.0 +% Added LPPL license clause. +% +% A check for \headheight is added. An errormessage is given (once) if the +% header is too large. Empty headers don't generate the error even if +% \headheight is very small or even 0pt. +% Warning added for the use of 'E' option when twoside option is not used. +% In this case the 'E' fields will never be used. +% +% Mar 10, 2002 +% version 2.1beta +% New command: \fancyhfoffset[place]{length} +% defines offsets to be applied to the header/footer to let it stick into +% the margins (if length > 0). +% place is like in fancyhead, except that only E,O,L,R can be used. +% This replaces the old calculation based on \headwidth and the marginpar +% area. +% \headwidth will be dynamically calculated in the headers/footers when +% this is used. +% +% Mar 26, 2002 +% version 2.1beta2 +% \fancyhfoffset now also takes h,f as possible letters in the argument to +% allow the header and footer widths to be different. +% New commands \fancyheadoffset and \fancyfootoffset added comparable to +% \fancyhead and \fancyfoot. +% Errormessages and warnings have been made more informative. +% +% Dec 9, 2002 +% version 2.1 +% The defaults for \footrulewidth, \plainheadrulewidth and +% \plainfootrulewidth are changed from \z@skip to 0pt. In this way when +% someone inadvertantly uses \setlength to change any of these, the value +% of \z@skip will not be changed, rather an errormessage will be given. + +% March 3, 2004 +% Release of version 3.0 + +% Oct 7, 2004 +% version 3.1 +% Added '\endlinechar=13' to \fancy@reset to prevent problems with +% includegraphics in header when verbatiminput is active. + +% March 22, 2005 +% version 3.2 +% reset \everypar (the real one) in \fancy@reset because spanish.ldf does +% strange things with \everypar between << and >>. + +\def\ifancy@mpty#1{\def\temp@a{#1}\ifx\temp@a\@empty} + +\def\fancy@def#1#2{\ifancy@mpty{#2}\fancy@gbl\def#1{\leavevmode}\else + \fancy@gbl\def#1{#2\strut}\fi} + +\let\fancy@gbl\global + +\def\@fancyerrmsg#1{% + \ifx\PackageError\undefined + \errmessage{#1}\else + \PackageError{Fancyhdr}{#1}{}\fi} +\def\@fancywarning#1{% + \ifx\PackageWarning\undefined + \errmessage{#1}\else + \PackageWarning{Fancyhdr}{#1}{}\fi} + +% Usage: \@forc \var{charstring}{command to be executed for each char} +% This is similar to LaTeX's \@tfor, but expands the charstring. + +\def\@forc#1#2#3{\expandafter\f@rc\expandafter#1\expandafter{#2}{#3}} +\def\f@rc#1#2#3{\def\temp@ty{#2}\ifx\@empty\temp@ty\else + \f@@rc#1#2\f@@rc{#3}\fi} +\def\f@@rc#1#2#3\f@@rc#4{\def#1{#2}#4\f@rc#1{#3}{#4}} + +% Usage: \f@nfor\name:=list\do{body} +% Like LaTeX's \@for but an empty list is treated as a list with an empty +% element + +\newcommand{\f@nfor}[3]{\edef\@fortmp{#2}% + \expandafter\@forloop#2,\@nil,\@nil\@@#1{#3}} + +% Usage: \def@ult \cs{defaults}{argument} +% sets \cs to the characters from defaults appearing in argument +% or defaults if it would be empty. All characters are lowercased. + +\newcommand\def@ult[3]{% + \edef\temp@a{\lowercase{\edef\noexpand\temp@a{#3}}}\temp@a + \def#1{}% + \@forc\tmpf@ra{#2}% + {\expandafter\if@in\tmpf@ra\temp@a{\edef#1{#1\tmpf@ra}}{}}% + \ifx\@empty#1\def#1{#2}\fi} +% +% \if@in +% +\newcommand{\if@in}[4]{% + \edef\temp@a{#2}\def\temp@b##1#1##2\temp@b{\def\temp@b{##1}}% + \expandafter\temp@b#2#1\temp@b\ifx\temp@a\temp@b #4\else #3\fi} + +\newcommand{\fancyhead}{\@ifnextchar[{\f@ncyhf\fancyhead h}% + {\f@ncyhf\fancyhead h[]}} +\newcommand{\fancyfoot}{\@ifnextchar[{\f@ncyhf\fancyfoot f}% + {\f@ncyhf\fancyfoot f[]}} +\newcommand{\fancyhf}{\@ifnextchar[{\f@ncyhf\fancyhf{}}% + {\f@ncyhf\fancyhf{}[]}} + +% New commands for offsets added + +\newcommand{\fancyheadoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyheadoffset h}% + {\f@ncyhfoffs\fancyheadoffset h[]}} +\newcommand{\fancyfootoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyfootoffset f}% + {\f@ncyhfoffs\fancyfootoffset f[]}} +\newcommand{\fancyhfoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyhfoffset{}}% + {\f@ncyhfoffs\fancyhfoffset{}[]}} + +% The header and footer fields are stored in command sequences with +% names of the form: \f@ncy with for [eo], from [lcr] +% and from [hf]. + +\def\f@ncyhf#1#2[#3]#4{% + \def\temp@c{}% + \@forc\tmpf@ra{#3}% + {\expandafter\if@in\tmpf@ra{eolcrhf,EOLCRHF}% + {}{\edef\temp@c{\temp@c\tmpf@ra}}}% + \ifx\@empty\temp@c\else + \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument: + [#3]}% + \fi + \f@nfor\temp@c{#3}% + {\def@ult\f@@@eo{eo}\temp@c + \if@twoside\else + \if\f@@@eo e\@fancywarning + {\string#1's `E' option without twoside option is useless}\fi\fi + \def@ult\f@@@lcr{lcr}\temp@c + \def@ult\f@@@hf{hf}{#2\temp@c}% + \@forc\f@@eo\f@@@eo + {\@forc\f@@lcr\f@@@lcr + {\@forc\f@@hf\f@@@hf + {\expandafter\fancy@def\csname + f@ncy\f@@eo\f@@lcr\f@@hf\endcsname + {#4}}}}}} + +\def\f@ncyhfoffs#1#2[#3]#4{% + \def\temp@c{}% + \@forc\tmpf@ra{#3}% + {\expandafter\if@in\tmpf@ra{eolrhf,EOLRHF}% + {}{\edef\temp@c{\temp@c\tmpf@ra}}}% + \ifx\@empty\temp@c\else + \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument: + [#3]}% + \fi + \f@nfor\temp@c{#3}% + {\def@ult\f@@@eo{eo}\temp@c + \if@twoside\else + \if\f@@@eo e\@fancywarning + {\string#1's `E' option without twoside option is useless}\fi\fi + \def@ult\f@@@lcr{lr}\temp@c + \def@ult\f@@@hf{hf}{#2\temp@c}% + \@forc\f@@eo\f@@@eo + {\@forc\f@@lcr\f@@@lcr + {\@forc\f@@hf\f@@@hf + {\expandafter\setlength\csname + f@ncyO@\f@@eo\f@@lcr\f@@hf\endcsname + {#4}}}}}% + \fancy@setoffs} + +% Fancyheadings version 1 commands. These are more or less deprecated, +% but they continue to work. + +\newcommand{\lhead}{\@ifnextchar[{\@xlhead}{\@ylhead}} +\def\@xlhead[#1]#2{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#2}} +\def\@ylhead#1{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#1}} + +\newcommand{\chead}{\@ifnextchar[{\@xchead}{\@ychead}} +\def\@xchead[#1]#2{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#2}} +\def\@ychead#1{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#1}} + +\newcommand{\rhead}{\@ifnextchar[{\@xrhead}{\@yrhead}} +\def\@xrhead[#1]#2{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#2}} +\def\@yrhead#1{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#1}} + +\newcommand{\lfoot}{\@ifnextchar[{\@xlfoot}{\@ylfoot}} +\def\@xlfoot[#1]#2{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#2}} +\def\@ylfoot#1{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#1}} + +\newcommand{\cfoot}{\@ifnextchar[{\@xcfoot}{\@ycfoot}} +\def\@xcfoot[#1]#2{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#2}} +\def\@ycfoot#1{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#1}} + +\newcommand{\rfoot}{\@ifnextchar[{\@xrfoot}{\@yrfoot}} +\def\@xrfoot[#1]#2{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#2}} +\def\@yrfoot#1{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#1}} + +\newlength{\fancy@headwidth} +\let\headwidth\fancy@headwidth +\newlength{\f@ncyO@elh} +\newlength{\f@ncyO@erh} +\newlength{\f@ncyO@olh} +\newlength{\f@ncyO@orh} +\newlength{\f@ncyO@elf} +\newlength{\f@ncyO@erf} +\newlength{\f@ncyO@olf} +\newlength{\f@ncyO@orf} +\newcommand{\headrulewidth}{0.4pt} +\newcommand{\footrulewidth}{0pt} +\newcommand{\footruleskip}{.3\normalbaselineskip} + +% Fancyplain stuff shouldn't be used anymore (rather +% \fancypagestyle{plain} should be used), but it must be present for +% compatibility reasons. + +\newcommand{\plainheadrulewidth}{0pt} +\newcommand{\plainfootrulewidth}{0pt} +\newif\if@fancyplain \@fancyplainfalse +\def\fancyplain#1#2{\if@fancyplain#1\else#2\fi} + +\headwidth=-123456789sp %magic constant + +% Command to reset various things in the headers: +% a.o. single spacing (taken from setspace.sty) +% and the catcode of ^^M (so that epsf files in the header work if a +% verbatim crosses a page boundary) +% It also defines a \nouppercase command that disables \uppercase and +% \Makeuppercase. It can only be used in the headers and footers. +\let\fnch@everypar\everypar% save real \everypar because of spanish.ldf +\def\fancy@reset{\fnch@everypar{}\restorecr\endlinechar=13 + \def\baselinestretch{1}% + \def\nouppercase##1{{\let\uppercase\relax\let\MakeUppercase\relax + \expandafter\let\csname MakeUppercase \endcsname\relax##1}}% + \ifx\undefined\@newbaseline% NFSS not present; 2.09 or 2e + \ifx\@normalsize\undefined \normalsize % for ucthesis.cls + \else \@normalsize \fi + \else% NFSS (2.09) present + \@newbaseline% + \fi} + +% Initialization of the head and foot text. + +% The default values still contain \fancyplain for compatibility. +\fancyhf{} % clear all +% lefthead empty on ``plain'' pages, \rightmark on even, \leftmark on odd pages +% evenhead empty on ``plain'' pages, \leftmark on even, \rightmark on odd pages +\if@twoside + \fancyhead[el,or]{\fancyplain{}{\sl\rightmark}} + \fancyhead[er,ol]{\fancyplain{}{\sl\leftmark}} +\else + \fancyhead[l]{\fancyplain{}{\sl\rightmark}} + \fancyhead[r]{\fancyplain{}{\sl\leftmark}} +\fi +\fancyfoot[c]{\rm\thepage} % page number + +% Use box 0 as a temp box and dimen 0 as temp dimen. +% This can be done, because this code will always +% be used inside another box, and therefore the changes are local. + +\def\@fancyvbox#1#2{\setbox0\vbox{#2}\ifdim\ht0>#1\@fancywarning + {\string#1 is too small (\the#1): ^^J Make it at least \the\ht0.^^J + We now make it that large for the rest of the document.^^J + This may cause the page layout to be inconsistent, however\@gobble}% + \dimen0=#1\global\setlength{#1}{\ht0}\ht0=\dimen0\fi + \box0} + +% Put together a header or footer given the left, center and +% right text, fillers at left and right and a rule. +% The \lap commands put the text into an hbox of zero size, +% so overlapping text does not generate an errormessage. +% These macros have 5 parameters: +% 1. LEFTSIDE BEARING % This determines at which side the header will stick +% out. When \fancyhfoffset is used this calculates \headwidth, otherwise +% it is \hss or \relax (after expansion). +% 2. \f@ncyolh, \f@ncyelh, \f@ncyolf or \f@ncyelf. This is the left component. +% 3. \f@ncyoch, \f@ncyech, \f@ncyocf or \f@ncyecf. This is the middle comp. +% 4. \f@ncyorh, \f@ncyerh, \f@ncyorf or \f@ncyerf. This is the right component. +% 5. RIGHTSIDE BEARING. This is always \relax or \hss (after expansion). + +\def\@fancyhead#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset + \@fancyvbox\headheight{\hbox + {\rlap{\parbox[b]{\headwidth}{\raggedright#2}}\hfill + \parbox[b]{\headwidth}{\centering#3}\hfill + \llap{\parbox[b]{\headwidth}{\raggedleft#4}}}\headrule}}#5} + +\def\@fancyfoot#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset + \@fancyvbox\footskip{\footrule + \hbox{\rlap{\parbox[t]{\headwidth}{\raggedright#2}}\hfill + \parbox[t]{\headwidth}{\centering#3}\hfill + \llap{\parbox[t]{\headwidth}{\raggedleft#4}}}}}#5} + +\def\headrule{{\if@fancyplain\let\headrulewidth\plainheadrulewidth\fi + \hrule\@height\headrulewidth\@width\headwidth \vskip-\headrulewidth}} + +\def\footrule{{\if@fancyplain\let\footrulewidth\plainfootrulewidth\fi + \vskip-\footruleskip\vskip-\footrulewidth + \hrule\@width\headwidth\@height\footrulewidth\vskip\footruleskip}} + +\def\ps@fancy{% +\@ifundefined{@chapapp}{\let\@chapapp\chaptername}{}%for amsbook +% +% Define \MakeUppercase for old LaTeXen. +% Note: we used \def rather than \let, so that \let\uppercase\relax (from +% the version 1 documentation) will still work. +% +\@ifundefined{MakeUppercase}{\def\MakeUppercase{\uppercase}}{}% +\@ifundefined{chapter}{\def\sectionmark##1{\markboth +{\MakeUppercase{\ifnum \c@secnumdepth>\z@ + \thesection\hskip 1em\relax \fi ##1}}{}}% +\def\subsectionmark##1{\markright {\ifnum \c@secnumdepth >\@ne + \thesubsection\hskip 1em\relax \fi ##1}}}% +{\def\chaptermark##1{\markboth {\MakeUppercase{\ifnum \c@secnumdepth>\m@ne + \@chapapp\ \thechapter. \ \fi ##1}}{}}% +\def\sectionmark##1{\markright{\MakeUppercase{\ifnum \c@secnumdepth >\z@ + \thesection. \ \fi ##1}}}}% +%\csname ps@headings\endcsname % use \ps@headings defaults if they exist +\ps@@fancy +\gdef\ps@fancy{\@fancyplainfalse\ps@@fancy}% +% Initialize \headwidth if the user didn't +% +\ifdim\headwidth<0sp +% +% This catches the case that \headwidth hasn't been initialized and the +% case that the user added something to \headwidth in the expectation that +% it was initialized to \textwidth. We compensate this now. This loses if +% the user intended to multiply it by a factor. But that case is more +% likely done by saying something like \headwidth=1.2\textwidth. +% The doc says you have to change \headwidth after the first call to +% \pagestyle{fancy}. This code is just to catch the most common cases were +% that requirement is violated. +% + \global\advance\headwidth123456789sp\global\advance\headwidth\textwidth +\fi} +\def\ps@fancyplain{\ps@fancy \let\ps@plain\ps@plain@fancy} +\def\ps@plain@fancy{\@fancyplaintrue\ps@@fancy} +\let\ps@@empty\ps@empty +\def\ps@@fancy{% +\ps@@empty % This is for amsbook/amsart, which do strange things with \topskip +\def\@mkboth{\protect\markboth}% +\def\@oddhead{\@fancyhead\fancy@Oolh\f@ncyolh\f@ncyoch\f@ncyorh\fancy@Oorh}% +\def\@oddfoot{\@fancyfoot\fancy@Oolf\f@ncyolf\f@ncyocf\f@ncyorf\fancy@Oorf}% +\def\@evenhead{\@fancyhead\fancy@Oelh\f@ncyelh\f@ncyech\f@ncyerh\fancy@Oerh}% +\def\@evenfoot{\@fancyfoot\fancy@Oelf\f@ncyelf\f@ncyecf\f@ncyerf\fancy@Oerf}% +} +% Default definitions for compatibility mode: +% These cause the header/footer to take the defined \headwidth as width +% And to shift in the direction of the marginpar area + +\def\fancy@Oolh{\if@reversemargin\hss\else\relax\fi} +\def\fancy@Oorh{\if@reversemargin\relax\else\hss\fi} +\let\fancy@Oelh\fancy@Oorh +\let\fancy@Oerh\fancy@Oolh + +\let\fancy@Oolf\fancy@Oolh +\let\fancy@Oorf\fancy@Oorh +\let\fancy@Oelf\fancy@Oelh +\let\fancy@Oerf\fancy@Oerh + +% New definitions for the use of \fancyhfoffset +% These calculate the \headwidth from \textwidth and the specified offsets. + +\def\fancy@offsolh{\headwidth=\textwidth\advance\headwidth\f@ncyO@olh + \advance\headwidth\f@ncyO@orh\hskip-\f@ncyO@olh} +\def\fancy@offselh{\headwidth=\textwidth\advance\headwidth\f@ncyO@elh + \advance\headwidth\f@ncyO@erh\hskip-\f@ncyO@elh} + +\def\fancy@offsolf{\headwidth=\textwidth\advance\headwidth\f@ncyO@olf + \advance\headwidth\f@ncyO@orf\hskip-\f@ncyO@olf} +\def\fancy@offself{\headwidth=\textwidth\advance\headwidth\f@ncyO@elf + \advance\headwidth\f@ncyO@erf\hskip-\f@ncyO@elf} + +\def\fancy@setoffs{% +% Just in case \let\headwidth\textwidth was used + \fancy@gbl\let\headwidth\fancy@headwidth + \fancy@gbl\let\fancy@Oolh\fancy@offsolh + \fancy@gbl\let\fancy@Oelh\fancy@offselh + \fancy@gbl\let\fancy@Oorh\hss + \fancy@gbl\let\fancy@Oerh\hss + \fancy@gbl\let\fancy@Oolf\fancy@offsolf + \fancy@gbl\let\fancy@Oelf\fancy@offself + \fancy@gbl\let\fancy@Oorf\hss + \fancy@gbl\let\fancy@Oerf\hss} + +\newif\iffootnote +\let\latex@makecol\@makecol +\def\@makecol{\ifvoid\footins\footnotetrue\else\footnotefalse\fi +\let\topfloat\@toplist\let\botfloat\@botlist\latex@makecol} +\def\iftopfloat#1#2{\ifx\topfloat\empty #2\else #1\fi} +\def\ifbotfloat#1#2{\ifx\botfloat\empty #2\else #1\fi} +\def\iffloatpage#1#2{\if@fcolmade #1\else #2\fi} + +\newcommand{\fancypagestyle}[2]{% + \@namedef{ps@#1}{\let\fancy@gbl\relax#2\relax\ps@fancy}} diff --git a/paper/colm_camera/hubble_paperclip.bib b/paper/colm_camera/hubble_paperclip.bib new file mode 100644 index 0000000..b2c6092 --- /dev/null +++ b/paper/colm_camera/hubble_paperclip.bib @@ -0,0 +1,506 @@ +@article{lanusse2023astroclip, + title = {AstroCLIP: Cross-Modal Pre-Training for Astronomical Foundation Models}, + author = {Lanusse, Francois and Parker, Liam and Golkar, Siavash and Cranmer, Miles and Bietti, Alberto and Eickenberg, Michael and Krawezik, Geraud and McCabe, Michael and Ohana, Ruben and Pettee, Mariel and others}, + journal = {arXiv preprint arXiv:2310.03024}, + year = {2023} +} + +@article{nguyen2023astrollama, + title = {AstroLLaMA: Towards Specialized Foundation Models in Astronomy}, + author = {Nguyen, Tuan Dung and Ting, Yuan-Sen and Ciuc{\u{a}}, Ioana and O'Neill, Charlie and Sun, Ze-Chang and Jab{\l}o{\'n}ska, Maja and Kruk, Sandor and Perkowski, Ernest and Miller, Jack and Li, Jason and others}, + journal = {arXiv preprint arXiv:2309.06126}, + year = {2023} +} + +@article{willard2023efficient, + title = {Efficient Guided Generation for LLMs}, + author = {Willard, Brandon T and Louf, R{\'e}mi}, + journal = {arXiv preprint arXiv:2307.09702}, + year = {2023} +} + + +@inproceedings{radford2021learning, + title = {Learning transferable visual models from natural language supervision}, + author = {Radford, Alec and Kim, Jong Wook and Hallacy, Chris and Ramesh, Aditya and Goh, Gabriel and Agarwal, Sandhini and Sastry, Girish and Askell, Amanda and Mishkin, Pamela and Clark, Jack and others}, + booktitle = {International conference on machine learning}, + pages = {8748--8763}, + year = {2021}, + organization = {PMLR} +} + +@article{cepeda2023geoclip, + title = {GeoCLIP: Clip-Inspired Alignment between Locations and Images for Effective Worldwide Geo-localization}, + author = {Cepeda, Vicente Vivanco and Nayak, Gaurav Kumar and Shah, Mubarak}, + journal = {arXiv preprint arXiv:2309.16020}, + year = {2023} +} + +@article{oord2018representation, + title = {Representation learning with contrastive predictive coding}, + author = {Oord, Aaron van den and Li, Yazhe and Vinyals, Oriol}, + journal = {arXiv preprint arXiv:1807.03748}, + year = {2018} +} + +@inproceedings{DBLP:journals/corr/KingmaB14, + author = {Diederik P. Kingma and Jimmy Ba}, + bibsource = {dblp computer science bibliography, https://dblp.org}, + biburl = {https://dblp.org/rec/journals/corr/KingmaB14.bib}, + booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015, San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings}, + date-added = {2021-10-12 10:47:38 -0400}, + date-modified = {2021-10-12 10:47:38 -0400}, + editor = {Yoshua Bengio and Yann LeCun}, + timestamp = {Thu, 25 Jul 2019 14:25:37 +0200}, + title = {Adam: {A} Method for Stochastic Optimization}, + url = {http://arxiv.org/abs/1412.6980}, + year = {2015}, + bdsk-url-1 = {http://arxiv.org/abs/1412.6980} +} + +@inproceedings{DBLP:conf/iclr/LoshchilovH19, + author = {Ilya Loshchilov and Frank Hutter}, + bibsource = {dblp computer science bibliography, https://dblp.org}, + biburl = {https://dblp.org/rec/conf/iclr/LoshchilovH19.bib}, + booktitle = {7th International Conference on Learning Representations, {ICLR} 2019, New Orleans, LA, USA, May 6-9, 2019}, + date-added = {2021-10-12 10:46:54 -0400}, + date-modified = {2021-10-12 10:48:48 -0400}, + timestamp = {Thu, 25 Jul 2019 14:26:04 +0200}, + title = {Decoupled Weight Decay Regularization}, + url = {https://openreview.net/forum?id=Bkg6RiCqY7}, + year = {2019}, + bdsk-url-1 = {https://openreview.net/forum?id=Bkg6RiCqY7} +} + + +@software{flax2020github, + author = {Jonathan Heek and Anselm Levskaya and Avital Oliver and Marvin Ritter and Bertrand Rondepierre and Andreas Steiner and Marc van {Z}ee}, + title = {{F}lax: A neural network library and ecosystem for {JAX}}, + url = {http://github.com/google/flax}, + version = {0.7.0}, + year = {2023} +} +@software{jax2018github, + author = {James Bradbury and Roy Frostig and Peter Hawkins and Matthew James Johnson and Chris Leary and Dougal Maclaurin and George Necula and Adam Paszke and Jake Vander{P}las and Skye Wanderman-{M}ilne and Qiao Zhang}, + title = {{JAX}: composable transformations of {P}ython+{N}um{P}y programs}, + url = {http://github.com/google/jax}, + version = {0.3.13}, + year = {2018} +} +@article{harris2020array, + title = {Array programming with {NumPy}}, + author = {Charles R. Harris and K. Jarrod Millman and St{\'{e}}fan J. + van der Walt and Ralf Gommers and Pauli Virtanen and David + Cournapeau and Eric Wieser and Julian Taylor and Sebastian + Berg and Nathaniel J. Smith and Robert Kern and Matti Picus + and Stephan Hoyer and Marten H. van Kerkwijk and Matthew + Brett and Allan Haldane and Jaime Fern{\'{a}}ndez del + R{\'{i}}o and Mark Wiebe and Pearu Peterson and Pierre + G{\'{e}}rard-Marchant and Kevin Sheppard and Tyler Reddy and + Warren Weckesser and Hameer Abbasi and Christoph Gohlke and + Travis E. Oliphant}, + year = {2020}, + month = sep, + journal = {Nature}, + volume = {585}, + number = {7825}, + pages = {357--362}, + doi = {10.1038/s41586-020-2649-2}, + publisher = {Springer Science and Business Media {LLC}}, + url = {https://doi.org/10.1038/s41586-020-2649-2} +} +@article{Hunter:2007, + author = {Hunter, J. D.}, + title = {Matplotlib: A 2D graphics environment}, + journal = {Computing in Science \& Engineering}, + volume = {9}, + number = {3}, + pages = {90--95}, + abstract = {Matplotlib is a 2D graphics package used for Python for + application development, interactive scripting, and publication-quality + image generation across user interfaces and operating systems.}, + publisher = {IEEE COMPUTER SOC}, + doi = {10.1109/MCSE.2007.55}, + year = 2007 +} +@software{deepmind2020jax, + title = {The {D}eep{M}ind {JAX} {E}cosystem}, + author = {Babuschkin, Igor and Baumli, Kate and Bell, Alison and Bhupatiraju, Surya and Bruce, Jake and Buchlovsky, Peter and Budden, David and Cai, Trevor and Clark, Aidan and Danihelka, Ivo and Dedieu, Antoine and Fantacci, Claudio and Godwin, Jonathan and Jones, Chris and Hemsley, Ross and Hennigan, Tom and Hessel, Matteo and Hou, Shaobo and Kapturowski, Steven and Keck, Thomas and Kemaev, Iurii and King, Michael and Kunesch, Markus and Martens, Lena and Merzic, Hamza and Mikulik, Vladimir and Norman, Tamara and Papamakarios, George and Quan, John and Ring, Roman and Ruiz, Francisco and Sanchez, Alvaro and Schneider, Rosalia and Sezener, Eren and Spencer, Stephen and Srinivasan, Srivatsan and Stokowiec, Wojciech and Wang, Luyu and Zhou, Guangyao and Viola, Fabio}, + url = {http://github.com/deepmind}, + year = {2020} +} +@misc{wandb, + title = {Experiment Tracking with Weights and Biases}, + year = {2020}, + note = {Software available from wandb.com}, + url = {https://www.wandb.com/}, + author = {Biewald, Lukas} +} + +@conference{Kluyver2016jupyter, + title = {Jupyter Notebooks -- a publishing format for reproducible computational workflows}, + author = {Thomas Kluyver and Benjamin Ragan-Kelley and Fernando P{\'e}rez and Brian Granger and Matthias Bussonnier and Jonathan Frederic and Kyle Kelley and Jessica Hamrick and Jason Grout and Sylvain Corlay and Paul Ivanov and Dami{\'a}n Avila and Safia Abdalla and Carol Willing}, + booktitle = {Positioning and Power in Academic Publishing: Players, Agents and Agendas}, + editor = {F. Loizides and B. Schmidt}, + organization = {IOS Press}, + pages = {87 - 90}, + year = {2016} +} + +@article{2020SciPy-NMeth, + author = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and + Haberland, Matt and Reddy, Tyler and Cournapeau, David and + Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and + Bright, Jonathan and {van der Walt}, St{\'e}fan J. and + Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and + Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and + Kern, Robert and Larson, Eric and Carey, C J and + Polat, {\.I}lhan and Feng, Yu and Moore, Eric W. and + {VanderPlas}, Jake and Laxalde, Denis and Perktold, Josef and + Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and + Harris, Charles R. and Archibald, Anne M. and + Ribeiro, Ant{\^o}nio H. and Pedregosa, Fabian and + {van Mulbregt}, Paul and {SciPy 1.0 Contributors}}, + title = {{{SciPy} 1.0: Fundamental Algorithms for Scientific + Computing in Python}}, + journal = {Nature Methods}, + year = {2020}, + volume = {17}, + pages = {261--272}, + adsurl = {https://rdcu.be/b08Wh}, + doi = {10.1038/s41592-019-0686-2} +} + +@article{wolf2019huggingface, + title = {Huggingface's transformers: State-of-the-art natural language processing}, + author = {Wolf, Thomas and Debut, Lysandre and Sanh, Victor and Chaumond, Julien and Delangue, Clement and Moi, Anthony and Cistac, Pierric and Rault, Tim and Louf, R{\'e}mi and Funtowicz, Morgan and others}, + journal = {arXiv preprint arXiv:1910.03771}, + year = {2019} +} + +@article{dettmers2022llmint8, + title = {LLM.int8(): 8-bit Matrix Multiplication for Transformers at Scale}, + author = {Dettmers, Tim and Lewis, Mike and Belkada, Younes and Zettlemoyer, Luke}, + journal = {arXiv preprint arXiv:2208.07339}, + year = {2022} +} + +@article{2019AJ....157...98G, + author = {{Ginsburg}, A. and {Sip{\H o}cz}, B.~M. and {Brasseur}, C.~E. and + {Cowperthwaite}, P.~S. and {Craig}, M.~W. and {Deil}, C. and + {Guillochon}, J. and {Guzman}, G. and {Liedtke}, S. and {Lian Lim}, P. and + {Lockhart}, K.~E. and {Mommert}, M. and {Morris}, B.~M. and + {Norman}, H. and {Parikh}, M. and {Persson}, M.~V. and {Robitaille}, T.~P. and + {Segovia}, J.-C. and {Singer}, L.~P. and {Tollerud}, E.~J. and + {de Val-Borro}, M. and {Valtchanov}, I. and {Woillez}, J. and + {The Astroquery collaboration} and {a subset of the astropy collaboration} + }, + title = {{astroquery: An Astronomical Web-querying Package in Python}}, + journal = {Astrophysical Journal}, + archiveprefix = {arXiv}, + eprint = {1901.04520}, + primaryclass = {astro-ph.IM}, + keywords = {astronomical databases: miscellaneous, virtual observatory tools}, + year = 2019, + month = mar, + volume = 157, + eid = {98}, + pages = {98}, + doi = {10.3847/1538-3881/aafc33}, + adsurl = {https://adsabs.harvard.edu/abs/2019AJ....157...98G}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@article{paszke2019pytorch, + title = {Pytorch: An imperative style, high-performance deep learning library}, + author = {Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and others}, + journal = {Advances in neural information processing systems}, + volume = {32}, + year = {2019} +} + +@inproceedings{mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } +} + +@article{batatia2023foundation, + title = {A foundation model for atomistic materials chemistry}, + author = {Batatia, Ilyes and Benner, Philipp and Chiang, Yuan and Elena, Alin M and Kov{\'a}cs, D{\'a}vid P and Riebesell, Janosh and Advincula, Xavier R and Asta, Mark and Baldwin, William J and Bernstein, Noam and others}, + journal = {arXiv preprint arXiv:2401.00096}, + year = {2023} +} + +@article{subramanian2023towards, + title = {Towards Foundation Models for Scientific Machine Learning: Characterizing Scaling and Transfer Behavior}, + author = {Subramanian, Shashank and Harrington, Peter and Keutzer, Kurt and Bhimji, Wahid and Morozov, Dmitriy and Mahoney, Michael and Gholami, Amir}, + journal = {arXiv preprint arXiv:2306.00258}, + year = {2023} +} + +@article{huertas2022dawes, + title = {The DAWES review 10: The impact of deep learning for the analysis of galaxy surveys}, + author = {Huertas-Company, Marc and Lanusse, Fran{\c{c}}ois}, + journal = {arXiv preprint arXiv:2210.01813}, + year = {2022} +} + +@article{jiang2024mixtral, + title = {Mixtral of Experts}, + author = {Jiang, Albert Q and Sablayrolles, Alexandre and Roux, Antoine and Mensch, Arthur and Savary, Blanche and Bamford, Chris and Chaplot, Devendra Singh and Casas, Diego de las and Hanna, Emma Bou and Bressand, Florian and others}, + journal = {arXiv preprint arXiv:2401.04088}, + year = {2024} +} + +@article{wei2020deep, + title = {Deep transfer learning for star cluster classification: I. application to the PHANGS--HST survey}, + author = {Wei, Wei and Huerta, EA and Whitmore, Bradley C and Lee, Janice C and Hannon, Stephen and Chandar, Rupali and Dale, Daniel A and Larson, Kirsten L and Thilker, David A and Ubeda, Leonardo and others}, + journal = {Monthly Notices of the Royal Astronomical Society}, + volume = {493}, + number = {3}, + pages = {3178--3193}, + year = {2020}, + publisher = {Oxford University Press} +} + +@article{lee2022phangs, + title = {The PHANGS-HST survey: Physics at high angular resolution in nearby galaxies with the hubble space telescope}, + author = {Lee, Janice C and Whitmore, Bradley C and Thilker, David A and Deger, Sinan and Larson, Kirsten L and Ubeda, Leonardo and Anand, Gagandeep S and Boquien, M{\'e}d{\'e}ric and Chandar, Rupali and Dale, Daniel A and others}, + journal = {The Astrophysical Journal Supplement Series}, + volume = {258}, + number = {1}, + pages = {10}, + year = {2022}, + publisher = {IOP Publishing} +} + +@article{scoville2007cosmic, + title = {The cosmic evolution survey (COSMOS): overview}, + author = {Scoville, Nick and Aussel, H and Brusa, Marcella and Capak, Peter and Carollo, C Marcella and Elvis, M and Giavalisco, M and Guzzo, L and Hasinger, G and Impey, C and others}, + journal = {The Astrophysical Journal Supplement Series}, + volume = {172}, + number = {1}, + pages = {1}, + year = {2007}, + publisher = {IOP Publishing} +} + +@article{perkowski2024astrollama, + title = {AstroLLaMA-Chat: Scaling AstroLLaMA with Conversational and Diverse Datasets}, + author = {Perkowski, Ernest and Pan, Rui and Nguyen, Tuan Dung and Ting, Yuan-Sen and Kruk, Sandor and Zhang, Tong and O’Neill, Charlie and Jablonska, Maja and Sun, Zechang and Smith, Michael J and others}, + journal = {Research Notes of the AAS}, + volume = {8}, + number = {1}, + pages = {7}, + year = {2024}, + publisher = {The American Astronomical Society} +} + + +@article{Sanchez-Fernandez2022.11.17.516915, + abstract = {Currently, bioimaging databases cannot be queried by chemical structures that induce the phenotypic effects captured by an image. Through the advent of the contrastive learning paradigm, images and text could be embedded into the same space. We build on this contrastive learning paradigm, to present a novel retrieval system that is able to identify the correct bioimage given a chemical structure out of a database of \~{}2,000 candidate images with a top-1 accuracy \>70 times higher than a random baseline. Additionally, the learned embeddings of our method are highly transferable to various relevant downstream tasks in drug discovery, including activity prediction, microscopy image classification and mechanism of action identification.Competing Interest StatementThe authors have declared no competing interest.}, + author = {Ana Sanchez-Fernandez and Elisabeth Rumetshofer and Sepp Hochreiter and G{\"u}nter Klambauer}, + doi = {10.1101/2022.11.17.516915}, + elocation-id = {2022.11.17.516915}, + eprint = {https://www.biorxiv.org/content/early/2023/06/01/2022.11.17.516915.full.pdf}, + journal = {bioRxiv}, + publisher = {Cold Spring Harbor Laboratory}, + title = {CLOOME: contrastive learning unlocks bioimaging databases for queries with chemical structures}, + url = {https://www.biorxiv.org/content/early/2023/06/01/2022.11.17.516915}, + year = {2023}, + bdsk-url-1 = {https://www.biorxiv.org/content/early/2023/06/01/2022.11.17.516915}, + bdsk-url-2 = {https://doi.org/10.1101/2022.11.17.516915} +} + +@article{liu2023text, + title = {A text-guided protein design framework}, + author = {Liu, Shengchao and Zhu, Yutao and Lu, Jiarui and Xu, Zhao and Nie, Weili and Gitter, Anthony and Xiao, Chaowei and Tang, Jian and Guo, Hongyu and Anandkumar, Anima}, + journal = {arXiv preprint arXiv:2302.04611}, + year = {2023} +} + +@article{hausen2020morpheus, + title = {Morpheus: A deep learning framework for the pixel-level analysis of astronomical image data}, + author = {Hausen, Ryan and Robertson, Brant E}, + journal = {The Astrophysical Journal Supplement Series}, + volume = {248}, + number = {1}, + pages = {20}, + year = {2020}, + publisher = {IOP Publishing} +} + +@article{bommasani2021opportunities, + title = {On the opportunities and risks of foundation models}, + author = {Bommasani, Rishi and Hudson, Drew A and Adeli, Ehsan and Altman, Russ and Arora, Simran and von Arx, Sydney and Bernstein, Michael S and Bohg, Jeannette and Bosselut, Antoine and Brunskill, Emma and others}, + journal = {arXiv preprint arXiv:2108.07258}, + year = {2021} +} + +@inproceedings{deng2009imagenet, + title = {Imagenet: A large-scale hierarchical image database}, + author = {Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li}, + booktitle = {2009 IEEE conference on computer vision and pattern recognition}, + pages = {248--255}, + year = {2009}, + organization = {Ieee} +} + +@article{slijepcevic2024radio, + title = {Radio galaxy zoo: towards building the first multipurpose foundation model for radio astronomy with self-supervised learning}, + author = {Slijepcevic, Inigo V and Scaife, Anna MM and Walmsley, Mike and Bowles, Micah and Wong, O Ivy and Shabala, Stanislav S and White, Sarah V}, + journal = {RAS Techniques and Instruments}, + volume = {3}, + number = {1}, + pages = {19--32}, + year = {2024}, + publisher = {Oxford University Press} +} + +@article{vig2024finetuning, + title = {Finetuning Foundation Models for Joint Analysis Optimization}, + author = {Vig, Matthias and Hartman, Nicole and Heinrich, Lukas}, + journal = {arXiv preprint arXiv:2401.13536}, + year = {2024} +} + +@article{walmsley2022towards, + title = {Towards galaxy foundation models with hybrid contrastive learning}, + author = {Walmsley, Mike and Slijepcevic, Inigo Val and Bowles, Micah and Scaife, Anna MM}, + journal = {arXiv preprint arXiv:2206.11927}, + year = {2022} +} + +@article{stein2021self, + title = {Self-supervised similarity search for large scientific datasets}, + author = {Stein, George and Harrington, Peter and Blaum, Jacqueline and Medan, Tomislav and Lukic, Zarija}, + journal = {arXiv preprint arXiv:2110.13151}, + year = {2021} +} + +@article{stein2022mining, + title = {Mining for strong gravitational lenses with self-supervised learning}, + author = {Stein, George and Blaum, Jacqueline and Harrington, Peter and Medan, Tomislav and Luki{\'c}, Zarija}, + journal = {The Astrophysical Journal}, + volume = {932}, + number = {2}, + pages = {107}, + year = {2022}, + publisher = {IOP Publishing} +} + +@article{hayat2021estimating, + title = {Estimating galactic distances from images using self-supervised representation learning}, + author = {Hayat, Md Abul and Harrington, Peter and Stein, George and Luki{\'c}, Zarija and Mustafa, Mustafa}, + journal = {arXiv preprint arXiv:2101.04293}, + year = {2021} +} + +@article{hayat2021self, + title = {Self-supervised representation learning for astronomical images}, + author = {Hayat, Md Abul and Stein, George and Harrington, Peter and Luki{\'c}, Zarija and Mustafa, Mustafa}, + journal = {The Astrophysical Journal Letters}, + volume = {911}, + number = {2}, + pages = {L33}, + year = {2021}, + publisher = {IOP Publishing} +} + +@article{huertas2023brief, + title = {A brief review of contrastive learning applied to astrophysics}, + author = {Huertas-Company, Marc and Sarmiento, Regina and Knapen, Johan H}, + journal = {RAS Techniques and Instruments}, + volume = {2}, + number = {1}, + pages = {441--452}, + year = {2023}, + publisher = {Oxford University Press} +} + +@article{slijepcevic2022learning, + title = {Learning useful representations for radio astronomy" in the wild" with contrastive learning}, + author = {Slijepcevic, Inigo Val and Scaife, Anna MM and Walmsley, Mike and Bowles, Micah}, + journal = {arXiv preprint arXiv:2207.08666}, + year = {2022} +} + +@article{bowles2022new, + title = {A New Task: Deriving Semantic Class Targets for the Physical Sciences}, + author = {Bowles, Micah and Tang, Hongming and Vardoulaki, Eleni and Alexander, Emma L and Luo, Yan and Rudnick, Lawrence and Walmsley, Mike and Porter, Fiona and Scaife, Anna MM and Slijepcevic, Inigo Val and others}, + journal = {arXiv preprint arXiv:2210.14760}, + year = {2022} +} + +@article{bowles2023radio, + title = {Radio galaxy zoo EMU: towards a semantic radio galaxy morphology taxonomy}, + author = {Bowles, Micah and Tang, Hongming and Vardoulaki, Eleni and Alexander, Emma L and Luo, Yan and Rudnick, Lawrence and Walmsley, Mike and Porter, Fiona and Scaife, Anna MM and Slijepcevic, Inigo Val and others}, + journal = {Monthly Notices of the Royal Astronomical Society}, + volume = {522}, + number = {2}, + pages = {2584--2600}, + year = {2023}, + publisher = {Oxford University Press} +} + +@article{walmsley2023rare, + title = {Rare Galaxy Classes Identified In Foundation Model Representations}, + author = {Walmsley, Mike and Scaife, Anna MM}, + journal = {arXiv preprint arXiv:2312.02910}, + year = {2023} +} + +@article{akhmetzhanova2024data, + title = {Data compression and inference in cosmology with self-supervised machine learning}, + author = {Akhmetzhanova, Aizhan and Mishra-Sharma, Siddharth and Dvorkin, Cora}, + journal = {Monthly Notices of the Royal Astronomical Society}, + volume = {527}, + number = {3}, + pages = {7459--7481}, + year = {2024}, + publisher = {Oxford University Press} +} + +@article{Birk:2024knn, + author = {Birk, Joschka and Hallin, Anna and Kasieczka, Gregor}, + title = {{OmniJet-$\alpha$: The first cross-task foundation model for particle physics}}, + eprint = {2403.05618}, + archiveprefix = {arXiv}, + primaryclass = {hep-ph}, + month = {3}, + year = {2024} +} + +@article{heinrich2024masked, + title = {Masked Particle Modeling on Sets: Towards Self-Supervised High Energy Physics Foundation Models}, + author = {Heinrich, Lukas and Kagan, Michael and Klein, Samuel and Leigh, Matthew and Golling, Tobias and Raine, John Andrew and Osadchy, Margarita}, + journal = {arXiv preprint arXiv:2401.13537}, + year = {2024} +} + +@article{mccabe2023multiple, + title = {Multiple physics pretraining for physical surrogate models}, + author = {McCabe, Michael and Blancard, Bruno R{\'e}galdo-Saint and Parker, Liam Holden and Ohana, Ruben and Cranmer, Miles and Bietti, Alberto and Eickenberg, Michael and Golkar, Siavash and Krawezik, Geraud and Lanusse, Francois and others}, + journal = {arXiv preprint arXiv:2310.02994}, + year = {2023} +} + +@article{vaswani2017attention, + title = {Attention is all you need}, + author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia}, + journal = {Advances in neural information processing systems}, + volume = {30}, + year = {2017} +} + +@article{dosovitskiy2020image, + title = {An image is worth 16x16 words: Transformers for image recognition at scale}, + author = {Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and others}, + journal = {arXiv preprint arXiv:2010.11929}, + year = {2020} +} \ No newline at end of file diff --git a/paper/colm_camera/math_commands.tex b/paper/colm_camera/math_commands.tex new file mode 100644 index 0000000..0668f93 --- /dev/null +++ b/paper/colm_camera/math_commands.tex @@ -0,0 +1,508 @@ +%%%%% NEW MATH DEFINITIONS %%%%% + +\usepackage{amsmath,amsfonts,bm} + +% Mark sections of captions for referring to divisions of figures +\newcommand{\figleft}{{\em (Left)}} +\newcommand{\figcenter}{{\em (Center)}} +\newcommand{\figright}{{\em (Right)}} +\newcommand{\figtop}{{\em (Top)}} +\newcommand{\figbottom}{{\em (Bottom)}} +\newcommand{\captiona}{{\em (a)}} +\newcommand{\captionb}{{\em (b)}} +\newcommand{\captionc}{{\em (c)}} +\newcommand{\captiond}{{\em (d)}} + +% Highlight a newly defined term +\newcommand{\newterm}[1]{{\bf #1}} + + +% Figure reference, lower-case. +\def\figref#1{figure~\ref{#1}} +% Figure reference, capital. For start of sentence +\def\Figref#1{Figure~\ref{#1}} +\def\twofigref#1#2{figures \ref{#1} and \ref{#2}} +\def\quadfigref#1#2#3#4{figures \ref{#1}, \ref{#2}, \ref{#3} and \ref{#4}} +% Section reference, lower-case. +\def\secref#1{section~\ref{#1}} +% Section reference, capital. +\def\Secref#1{Section~\ref{#1}} +% Reference to two sections. +\def\twosecrefs#1#2{sections \ref{#1} and \ref{#2}} +% Reference to three sections. +\def\secrefs#1#2#3{sections \ref{#1}, \ref{#2} and \ref{#3}} +% Reference to an equation, lower-case. +\def\eqref#1{equation~\ref{#1}} +% Reference to an equation, upper case +\def\Eqref#1{Equation~\ref{#1}} +% A raw reference to an equation---avoid using if possible +\def\plaineqref#1{\ref{#1}} +% Reference to a chapter, lower-case. +\def\chapref#1{chapter~\ref{#1}} +% Reference to an equation, upper case. +\def\Chapref#1{Chapter~\ref{#1}} +% Reference to a range of chapters +\def\rangechapref#1#2{chapters\ref{#1}--\ref{#2}} +% Reference to an algorithm, lower-case. +\def\algref#1{algorithm~\ref{#1}} +% Reference to an algorithm, upper case. +\def\Algref#1{Algorithm~\ref{#1}} +\def\twoalgref#1#2{algorithms \ref{#1} and \ref{#2}} +\def\Twoalgref#1#2{Algorithms \ref{#1} and \ref{#2}} +% Reference to a part, lower case +\def\partref#1{part~\ref{#1}} +% Reference to a part, upper case +\def\Partref#1{Part~\ref{#1}} +\def\twopartref#1#2{parts \ref{#1} and \ref{#2}} + +\def\ceil#1{\lceil #1 \rceil} +\def\floor#1{\lfloor #1 \rfloor} +\def\1{\bm{1}} +\newcommand{\train}{\mathcal{D}} +\newcommand{\valid}{\mathcal{D_{\mathrm{valid}}}} +\newcommand{\test}{\mathcal{D_{\mathrm{test}}}} + +\def\eps{{\epsilon}} + + +% Random variables +\def\reta{{\textnormal{$\eta$}}} +\def\ra{{\textnormal{a}}} +\def\rb{{\textnormal{b}}} +\def\rc{{\textnormal{c}}} +\def\rd{{\textnormal{d}}} +\def\re{{\textnormal{e}}} +\def\rf{{\textnormal{f}}} +\def\rg{{\textnormal{g}}} +\def\rh{{\textnormal{h}}} +\def\ri{{\textnormal{i}}} +\def\rj{{\textnormal{j}}} +\def\rk{{\textnormal{k}}} +\def\rl{{\textnormal{l}}} +% rm is already a command, just don't name any random variables m +\def\rn{{\textnormal{n}}} +\def\ro{{\textnormal{o}}} +\def\rp{{\textnormal{p}}} +\def\rq{{\textnormal{q}}} +\def\rr{{\textnormal{r}}} +\def\rs{{\textnormal{s}}} +\def\rt{{\textnormal{t}}} +\def\ru{{\textnormal{u}}} +\def\rv{{\textnormal{v}}} +\def\rw{{\textnormal{w}}} +\def\rx{{\textnormal{x}}} +\def\ry{{\textnormal{y}}} +\def\rz{{\textnormal{z}}} + +% Random vectors +\def\rvepsilon{{\mathbf{\epsilon}}} +\def\rvtheta{{\mathbf{\theta}}} +\def\rva{{\mathbf{a}}} +\def\rvb{{\mathbf{b}}} +\def\rvc{{\mathbf{c}}} +\def\rvd{{\mathbf{d}}} +\def\rve{{\mathbf{e}}} +\def\rvf{{\mathbf{f}}} +\def\rvg{{\mathbf{g}}} +\def\rvh{{\mathbf{h}}} +\def\rvu{{\mathbf{i}}} +\def\rvj{{\mathbf{j}}} +\def\rvk{{\mathbf{k}}} +\def\rvl{{\mathbf{l}}} +\def\rvm{{\mathbf{m}}} +\def\rvn{{\mathbf{n}}} +\def\rvo{{\mathbf{o}}} +\def\rvp{{\mathbf{p}}} +\def\rvq{{\mathbf{q}}} +\def\rvr{{\mathbf{r}}} +\def\rvs{{\mathbf{s}}} +\def\rvt{{\mathbf{t}}} +\def\rvu{{\mathbf{u}}} +\def\rvv{{\mathbf{v}}} +\def\rvw{{\mathbf{w}}} +\def\rvx{{\mathbf{x}}} +\def\rvy{{\mathbf{y}}} +\def\rvz{{\mathbf{z}}} + +% Elements of random vectors +\def\erva{{\textnormal{a}}} +\def\ervb{{\textnormal{b}}} +\def\ervc{{\textnormal{c}}} +\def\ervd{{\textnormal{d}}} +\def\erve{{\textnormal{e}}} +\def\ervf{{\textnormal{f}}} +\def\ervg{{\textnormal{g}}} +\def\ervh{{\textnormal{h}}} +\def\ervi{{\textnormal{i}}} +\def\ervj{{\textnormal{j}}} +\def\ervk{{\textnormal{k}}} +\def\ervl{{\textnormal{l}}} +\def\ervm{{\textnormal{m}}} +\def\ervn{{\textnormal{n}}} +\def\ervo{{\textnormal{o}}} +\def\ervp{{\textnormal{p}}} +\def\ervq{{\textnormal{q}}} +\def\ervr{{\textnormal{r}}} +\def\ervs{{\textnormal{s}}} +\def\ervt{{\textnormal{t}}} +\def\ervu{{\textnormal{u}}} +\def\ervv{{\textnormal{v}}} +\def\ervw{{\textnormal{w}}} +\def\ervx{{\textnormal{x}}} +\def\ervy{{\textnormal{y}}} +\def\ervz{{\textnormal{z}}} + +% Random matrices +\def\rmA{{\mathbf{A}}} +\def\rmB{{\mathbf{B}}} +\def\rmC{{\mathbf{C}}} +\def\rmD{{\mathbf{D}}} +\def\rmE{{\mathbf{E}}} +\def\rmF{{\mathbf{F}}} +\def\rmG{{\mathbf{G}}} +\def\rmH{{\mathbf{H}}} +\def\rmI{{\mathbf{I}}} +\def\rmJ{{\mathbf{J}}} +\def\rmK{{\mathbf{K}}} +\def\rmL{{\mathbf{L}}} +\def\rmM{{\mathbf{M}}} +\def\rmN{{\mathbf{N}}} +\def\rmO{{\mathbf{O}}} +\def\rmP{{\mathbf{P}}} +\def\rmQ{{\mathbf{Q}}} +\def\rmR{{\mathbf{R}}} +\def\rmS{{\mathbf{S}}} +\def\rmT{{\mathbf{T}}} +\def\rmU{{\mathbf{U}}} +\def\rmV{{\mathbf{V}}} +\def\rmW{{\mathbf{W}}} +\def\rmX{{\mathbf{X}}} +\def\rmY{{\mathbf{Y}}} +\def\rmZ{{\mathbf{Z}}} + +% Elements of random matrices +\def\ermA{{\textnormal{A}}} +\def\ermB{{\textnormal{B}}} +\def\ermC{{\textnormal{C}}} +\def\ermD{{\textnormal{D}}} +\def\ermE{{\textnormal{E}}} +\def\ermF{{\textnormal{F}}} +\def\ermG{{\textnormal{G}}} +\def\ermH{{\textnormal{H}}} +\def\ermI{{\textnormal{I}}} +\def\ermJ{{\textnormal{J}}} +\def\ermK{{\textnormal{K}}} +\def\ermL{{\textnormal{L}}} +\def\ermM{{\textnormal{M}}} +\def\ermN{{\textnormal{N}}} +\def\ermO{{\textnormal{O}}} +\def\ermP{{\textnormal{P}}} +\def\ermQ{{\textnormal{Q}}} +\def\ermR{{\textnormal{R}}} +\def\ermS{{\textnormal{S}}} +\def\ermT{{\textnormal{T}}} +\def\ermU{{\textnormal{U}}} +\def\ermV{{\textnormal{V}}} +\def\ermW{{\textnormal{W}}} +\def\ermX{{\textnormal{X}}} +\def\ermY{{\textnormal{Y}}} +\def\ermZ{{\textnormal{Z}}} + +% Vectors +\def\vzero{{\bm{0}}} +\def\vone{{\bm{1}}} +\def\vmu{{\bm{\mu}}} +\def\vtheta{{\bm{\theta}}} +\def\va{{\bm{a}}} +\def\vb{{\bm{b}}} +\def\vc{{\bm{c}}} +\def\vd{{\bm{d}}} +\def\ve{{\bm{e}}} +\def\vf{{\bm{f}}} +\def\vg{{\bm{g}}} +\def\vh{{\bm{h}}} +\def\vi{{\bm{i}}} +\def\vj{{\bm{j}}} +\def\vk{{\bm{k}}} +\def\vl{{\bm{l}}} +\def\vm{{\bm{m}}} +\def\vn{{\bm{n}}} +\def\vo{{\bm{o}}} +\def\vp{{\bm{p}}} +\def\vq{{\bm{q}}} +\def\vr{{\bm{r}}} +\def\vs{{\bm{s}}} +\def\vt{{\bm{t}}} +\def\vu{{\bm{u}}} +\def\vv{{\bm{v}}} +\def\vw{{\bm{w}}} +\def\vx{{\bm{x}}} +\def\vy{{\bm{y}}} +\def\vz{{\bm{z}}} + +% Elements of vectors +\def\evalpha{{\alpha}} +\def\evbeta{{\beta}} +\def\evepsilon{{\epsilon}} +\def\evlambda{{\lambda}} +\def\evomega{{\omega}} +\def\evmu{{\mu}} +\def\evpsi{{\psi}} +\def\evsigma{{\sigma}} +\def\evtheta{{\theta}} +\def\eva{{a}} +\def\evb{{b}} +\def\evc{{c}} +\def\evd{{d}} +\def\eve{{e}} +\def\evf{{f}} +\def\evg{{g}} +\def\evh{{h}} +\def\evi{{i}} +\def\evj{{j}} +\def\evk{{k}} +\def\evl{{l}} +\def\evm{{m}} +\def\evn{{n}} +\def\evo{{o}} +\def\evp{{p}} +\def\evq{{q}} +\def\evr{{r}} +\def\evs{{s}} +\def\evt{{t}} +\def\evu{{u}} +\def\evv{{v}} +\def\evw{{w}} +\def\evx{{x}} +\def\evy{{y}} +\def\evz{{z}} + +% Matrix +\def\mA{{\bm{A}}} +\def\mB{{\bm{B}}} +\def\mC{{\bm{C}}} +\def\mD{{\bm{D}}} +\def\mE{{\bm{E}}} +\def\mF{{\bm{F}}} +\def\mG{{\bm{G}}} +\def\mH{{\bm{H}}} +\def\mI{{\bm{I}}} +\def\mJ{{\bm{J}}} +\def\mK{{\bm{K}}} +\def\mL{{\bm{L}}} +\def\mM{{\bm{M}}} +\def\mN{{\bm{N}}} +\def\mO{{\bm{O}}} +\def\mP{{\bm{P}}} +\def\mQ{{\bm{Q}}} +\def\mR{{\bm{R}}} +\def\mS{{\bm{S}}} +\def\mT{{\bm{T}}} +\def\mU{{\bm{U}}} +\def\mV{{\bm{V}}} +\def\mW{{\bm{W}}} +\def\mX{{\bm{X}}} +\def\mY{{\bm{Y}}} +\def\mZ{{\bm{Z}}} +\def\mBeta{{\bm{\beta}}} +\def\mPhi{{\bm{\Phi}}} +\def\mLambda{{\bm{\Lambda}}} +\def\mSigma{{\bm{\Sigma}}} + +% Tensor +\DeclareMathAlphabet{\mathsfit}{\encodingdefault}{\sfdefault}{m}{sl} +\SetMathAlphabet{\mathsfit}{bold}{\encodingdefault}{\sfdefault}{bx}{n} +\newcommand{\tens}[1]{\bm{\mathsfit{#1}}} +\def\tA{{\tens{A}}} +\def\tB{{\tens{B}}} +\def\tC{{\tens{C}}} +\def\tD{{\tens{D}}} +\def\tE{{\tens{E}}} +\def\tF{{\tens{F}}} +\def\tG{{\tens{G}}} +\def\tH{{\tens{H}}} +\def\tI{{\tens{I}}} +\def\tJ{{\tens{J}}} +\def\tK{{\tens{K}}} +\def\tL{{\tens{L}}} +\def\tM{{\tens{M}}} +\def\tN{{\tens{N}}} +\def\tO{{\tens{O}}} +\def\tP{{\tens{P}}} +\def\tQ{{\tens{Q}}} +\def\tR{{\tens{R}}} +\def\tS{{\tens{S}}} +\def\tT{{\tens{T}}} +\def\tU{{\tens{U}}} +\def\tV{{\tens{V}}} +\def\tW{{\tens{W}}} +\def\tX{{\tens{X}}} +\def\tY{{\tens{Y}}} +\def\tZ{{\tens{Z}}} + + +% Graph +\def\gA{{\mathcal{A}}} +\def\gB{{\mathcal{B}}} +\def\gC{{\mathcal{C}}} +\def\gD{{\mathcal{D}}} +\def\gE{{\mathcal{E}}} +\def\gF{{\mathcal{F}}} +\def\gG{{\mathcal{G}}} +\def\gH{{\mathcal{H}}} +\def\gI{{\mathcal{I}}} +\def\gJ{{\mathcal{J}}} +\def\gK{{\mathcal{K}}} +\def\gL{{\mathcal{L}}} +\def\gM{{\mathcal{M}}} +\def\gN{{\mathcal{N}}} +\def\gO{{\mathcal{O}}} +\def\gP{{\mathcal{P}}} +\def\gQ{{\mathcal{Q}}} +\def\gR{{\mathcal{R}}} +\def\gS{{\mathcal{S}}} +\def\gT{{\mathcal{T}}} +\def\gU{{\mathcal{U}}} +\def\gV{{\mathcal{V}}} +\def\gW{{\mathcal{W}}} +\def\gX{{\mathcal{X}}} +\def\gY{{\mathcal{Y}}} +\def\gZ{{\mathcal{Z}}} + +% Sets +\def\sA{{\mathbb{A}}} +\def\sB{{\mathbb{B}}} +\def\sC{{\mathbb{C}}} +\def\sD{{\mathbb{D}}} +% Don't use a set called E, because this would be the same as our symbol +% for expectation. +\def\sF{{\mathbb{F}}} +\def\sG{{\mathbb{G}}} +\def\sH{{\mathbb{H}}} +\def\sI{{\mathbb{I}}} +\def\sJ{{\mathbb{J}}} +\def\sK{{\mathbb{K}}} +\def\sL{{\mathbb{L}}} +\def\sM{{\mathbb{M}}} +\def\sN{{\mathbb{N}}} +\def\sO{{\mathbb{O}}} +\def\sP{{\mathbb{P}}} +\def\sQ{{\mathbb{Q}}} +\def\sR{{\mathbb{R}}} +\def\sS{{\mathbb{S}}} +\def\sT{{\mathbb{T}}} +\def\sU{{\mathbb{U}}} +\def\sV{{\mathbb{V}}} +\def\sW{{\mathbb{W}}} +\def\sX{{\mathbb{X}}} +\def\sY{{\mathbb{Y}}} +\def\sZ{{\mathbb{Z}}} + +% Entries of a matrix +\def\emLambda{{\Lambda}} +\def\emA{{A}} +\def\emB{{B}} +\def\emC{{C}} +\def\emD{{D}} +\def\emE{{E}} +\def\emF{{F}} +\def\emG{{G}} +\def\emH{{H}} +\def\emI{{I}} +\def\emJ{{J}} +\def\emK{{K}} +\def\emL{{L}} +\def\emM{{M}} +\def\emN{{N}} +\def\emO{{O}} +\def\emP{{P}} +\def\emQ{{Q}} +\def\emR{{R}} +\def\emS{{S}} +\def\emT{{T}} +\def\emU{{U}} +\def\emV{{V}} +\def\emW{{W}} +\def\emX{{X}} +\def\emY{{Y}} +\def\emZ{{Z}} +\def\emSigma{{\Sigma}} + +% entries of a tensor +% Same font as tensor, without \bm wrapper +\newcommand{\etens}[1]{\mathsfit{#1}} +\def\etLambda{{\etens{\Lambda}}} +\def\etA{{\etens{A}}} +\def\etB{{\etens{B}}} +\def\etC{{\etens{C}}} +\def\etD{{\etens{D}}} +\def\etE{{\etens{E}}} +\def\etF{{\etens{F}}} +\def\etG{{\etens{G}}} +\def\etH{{\etens{H}}} +\def\etI{{\etens{I}}} +\def\etJ{{\etens{J}}} +\def\etK{{\etens{K}}} +\def\etL{{\etens{L}}} +\def\etM{{\etens{M}}} +\def\etN{{\etens{N}}} +\def\etO{{\etens{O}}} +\def\etP{{\etens{P}}} +\def\etQ{{\etens{Q}}} +\def\etR{{\etens{R}}} +\def\etS{{\etens{S}}} +\def\etT{{\etens{T}}} +\def\etU{{\etens{U}}} +\def\etV{{\etens{V}}} +\def\etW{{\etens{W}}} +\def\etX{{\etens{X}}} +\def\etY{{\etens{Y}}} +\def\etZ{{\etens{Z}}} + +% The true underlying data generating distribution +\newcommand{\pdata}{p_{\rm{data}}} +% The empirical distribution defined by the training set +\newcommand{\ptrain}{\hat{p}_{\rm{data}}} +\newcommand{\Ptrain}{\hat{P}_{\rm{data}}} +% The model distribution +\newcommand{\pmodel}{p_{\rm{model}}} +\newcommand{\Pmodel}{P_{\rm{model}}} +\newcommand{\ptildemodel}{\tilde{p}_{\rm{model}}} +% Stochastic autoencoder distributions +\newcommand{\pencode}{p_{\rm{encoder}}} +\newcommand{\pdecode}{p_{\rm{decoder}}} +\newcommand{\precons}{p_{\rm{reconstruct}}} + +\newcommand{\laplace}{\mathrm{Laplace}} % Laplace distribution + +\newcommand{\E}{\mathbb{E}} +\newcommand{\Ls}{\mathcal{L}} +\newcommand{\R}{\mathbb{R}} +\newcommand{\emp}{\tilde{p}} +\newcommand{\lr}{\alpha} +\newcommand{\reg}{\lambda} +\newcommand{\rect}{\mathrm{rectifier}} +\newcommand{\softmax}{\mathrm{softmax}} +\newcommand{\sigmoid}{\sigma} +\newcommand{\softplus}{\zeta} +\newcommand{\KL}{D_{\mathrm{KL}}} +\newcommand{\Var}{\mathrm{Var}} +\newcommand{\standarderror}{\mathrm{SE}} +\newcommand{\Cov}{\mathrm{Cov}} +% Wolfram Mathworld says $L^2$ is for function spaces and $\ell^2$ is for vectors +% But then they seem to use $L^2$ for vectors throughout the site, and so does +% wikipedia. +\newcommand{\normlzero}{L^0} +\newcommand{\normlone}{L^1} +\newcommand{\normltwo}{L^2} +\newcommand{\normlp}{L^p} +\newcommand{\normmax}{L^\infty} + +\newcommand{\parents}{Pa} % See usage in notation.tex. Chosen to match Daphne's book. + +\DeclareMathOperator*{\argmax}{arg\,max} +\DeclareMathOperator*{\argmin}{arg\,min} + +\DeclareMathOperator{\sign}{sign} +\DeclareMathOperator{\Tr}{Tr} +\let\ab\allowbreak diff --git a/paper/colm_camera/natbib.sty b/paper/colm_camera/natbib.sty new file mode 100644 index 0000000..ff0d0b9 --- /dev/null +++ b/paper/colm_camera/natbib.sty @@ -0,0 +1,1246 @@ +%% +%% This is file `natbib.sty', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% natbib.dtx (with options: `package,all') +%% ============================================= +%% IMPORTANT NOTICE: +%% +%% This program can be redistributed and/or modified under the terms +%% of the LaTeX Project Public License Distributed from CTAN +%% archives in directory macros/latex/base/lppl.txt; either +%% version 1 of the License, or any later version. +%% +%% This is a generated file. +%% It may not be distributed without the original source file natbib.dtx. +%% +%% Full documentation can be obtained by LaTeXing that original file. +%% Only a few abbreviated comments remain here to describe the usage. +%% ============================================= +%% Copyright 1993-2009 Patrick W Daly +%% Max-Planck-Institut f\"ur Sonnensystemforschung +%% Max-Planck-Str. 2 +%% D-37191 Katlenburg-Lindau +%% Germany +%% E-mail: daly@mps.mpg.de +\NeedsTeXFormat{LaTeX2e}[1995/06/01] +\ProvidesPackage{natbib} + [2009/07/16 8.31 (PWD, AO)] + + % This package reimplements the LaTeX \cite command to be used for various + % citation styles, both author-year and numerical. It accepts BibTeX + % output intended for many other packages, and therefore acts as a + % general, all-purpose citation-style interface. + % + % With standard numerical .bst files, only numerical citations are + % possible. With an author-year .bst file, both numerical and + % author-year citations are possible. + % + % If author-year citations are selected, \bibitem must have one of the + % following forms: + % \bibitem[Jones et al.(1990)]{key}... + % \bibitem[Jones et al.(1990)Jones, Baker, and Williams]{key}... + % \bibitem[Jones et al., 1990]{key}... + % \bibitem[\protect\citeauthoryear{Jones, Baker, and Williams}{Jones + % et al.}{1990}]{key}... + % \bibitem[\protect\citeauthoryear{Jones et al.}{1990}]{key}... + % \bibitem[\protect\astroncite{Jones et al.}{1990}]{key}... + % \bibitem[\protect\citename{Jones et al., }1990]{key}... + % \harvarditem[Jones et al.]{Jones, Baker, and Williams}{1990}{key}... + % + % This is either to be made up manually, or to be generated by an + % appropriate .bst file with BibTeX. + % Author-year mode || Numerical mode + % Then, \citet{key} ==>> Jones et al. (1990) || Jones et al. [21] + % \citep{key} ==>> (Jones et al., 1990) || [21] + % Multiple citations as normal: + % \citep{key1,key2} ==>> (Jones et al., 1990; Smith, 1989) || [21,24] + % or (Jones et al., 1990, 1991) || [21,24] + % or (Jones et al., 1990a,b) || [21,24] + % \cite{key} is the equivalent of \citet{key} in author-year mode + % and of \citep{key} in numerical mode + % Full author lists may be forced with \citet* or \citep*, e.g. + % \citep*{key} ==>> (Jones, Baker, and Williams, 1990) + % Optional notes as: + % \citep[chap. 2]{key} ==>> (Jones et al., 1990, chap. 2) + % \citep[e.g.,][]{key} ==>> (e.g., Jones et al., 1990) + % \citep[see][pg. 34]{key}==>> (see Jones et al., 1990, pg. 34) + % (Note: in standard LaTeX, only one note is allowed, after the ref. + % Here, one note is like the standard, two make pre- and post-notes.) + % \citealt{key} ==>> Jones et al. 1990 + % \citealt*{key} ==>> Jones, Baker, and Williams 1990 + % \citealp{key} ==>> Jones et al., 1990 + % \citealp*{key} ==>> Jones, Baker, and Williams, 1990 + % Additional citation possibilities (both author-year and numerical modes) + % \citeauthor{key} ==>> Jones et al. + % \citeauthor*{key} ==>> Jones, Baker, and Williams + % \citeyear{key} ==>> 1990 + % \citeyearpar{key} ==>> (1990) + % \citetext{priv. comm.} ==>> (priv. comm.) + % \citenum{key} ==>> 11 [non-superscripted] + % Note: full author lists depends on whether the bib style supports them; + % if not, the abbreviated list is printed even when full requested. + % + % For names like della Robbia at the start of a sentence, use + % \Citet{dRob98} ==>> Della Robbia (1998) + % \Citep{dRob98} ==>> (Della Robbia, 1998) + % \Citeauthor{dRob98} ==>> Della Robbia + % + % + % Citation aliasing is achieved with + % \defcitealias{key}{text} + % \citetalias{key} ==>> text + % \citepalias{key} ==>> (text) + % + % Defining the citation mode and punctual (citation style) + % \setcitestyle{} + % Example: \setcitestyle{square,semicolon} + % Alternatively: + % Use \bibpunct with 6 mandatory arguments: + % 1. opening bracket for citation + % 2. closing bracket + % 3. citation separator (for multiple citations in one \cite) + % 4. the letter n for numerical styles, s for superscripts + % else anything for author-year + % 5. punctuation between authors and date + % 6. punctuation between years (or numbers) when common authors missing + % One optional argument is the character coming before post-notes. It + % appears in square braces before all other arguments. May be left off. + % Example (and default) \bibpunct[, ]{(}{)}{;}{a}{,}{,} + % + % To make this automatic for a given bib style, named newbib, say, make + % a local configuration file, natbib.cfg, with the definition + % \newcommand{\bibstyle@newbib}{\bibpunct...} + % Then the \bibliographystyle{newbib} will cause \bibstyle@newbib to + % be called on THE NEXT LATEX RUN (via the aux file). + % + % Such preprogrammed definitions may be invoked anywhere in the text + % by calling \citestyle{newbib}. This is only useful if the style specified + % differs from that in \bibliographystyle. + % + % With \citeindextrue and \citeindexfalse, one can control whether the + % \cite commands make an automatic entry of the citation in the .idx + % indexing file. For this, \makeindex must also be given in the preamble. + % + % Package Options: (for selecting punctuation) + % round - round parentheses are used (default) + % square - square brackets are used [option] + % curly - curly braces are used {option} + % angle - angle brackets are used