From 67753cb7c0923e44e6f4c0a66b09ba3bae1fd6f9 Mon Sep 17 00:00:00 2001
From: Marcel Bollmann <marcel@bollmann.me>
Date: Wed, 1 Jan 2025 22:39:53 +0100
Subject: [PATCH 01/16] Move paper+volume BibTeX creation into
 create_hugo_data.py

---
 bin/create_hugo_data.py | 75 +++++++++++++++++++++++++++++------------
 1 file changed, 53 insertions(+), 22 deletions(-)

diff --git a/bin/create_hugo_data.py b/bin/create_hugo_data.py
index d46bc7bab8..cd702b9f17 100755
--- a/bin/create_hugo_data.py
+++ b/bin/create_hugo_data.py
@@ -15,13 +15,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Usage: create_hugo_data.py [--importdir=DIR] [--exportdir=DIR] [-c] [--debug] [--dry-run]
+"""Usage: create_hugo_data.py [--importdir=DIR] [--exportdir=DIR] [options]
 
 Creates Hugo data files containing all necessary Anthology data for the website generation.
 
+This will write JSON data files to `{exportdir}/data/` as well as volume-level BibTeX files
+to `{exportdir}/data-export/volumes/`.
+
 Options:
   --importdir=DIR          Directory to import XML files from. [default: {scriptdir}/../data/]
-  --exportdir=DIR          Directory to write data files to.   [default: {scriptdir}/../build/data/]
+  --exportdir=DIR          Directory to write build files to.   [default: {scriptdir}/../build/]
+  --bib-limit=N            Only generate bibliographic information for the first N papers per volume.
+                           Setting the environment variable NOBIB=true is equivalent to --bib-limit=3.
   --debug                  Output debug-level log messages.
   -c, --clean              Delete existing files in target directory before generation.
   -n, --dry-run            Do not write data files (useful for debugging).
@@ -55,6 +60,7 @@
 )
 
 
+BIBLIMIT = False
 ENCODER = msgspec.json.Encoder()
 SCRIPTDIR = os.path.dirname(os.path.realpath(__file__))
 
@@ -126,6 +132,8 @@ def paper_to_dict(paper):
     editors = [
         person_to_dict(paper.root.resolve(ns).id, ns) for ns in paper.get_editors()
     ]
+    if not BIBLIMIT or int(paper.id) <= BIBLIMIT:
+        data["bibtex"] = paper.to_bibtex(with_abstract=True)
     if paper.is_frontmatter:
         # Editors are considered authors for the frontmatter
         if editors:
@@ -273,13 +281,14 @@ def volume_to_dict(volume):
     return data
 
 
-def export_papers_and_volumes(anthology, outdir, dryrun):
+def export_papers_and_volumes(anthology, builddir, dryrun):
     all_volumes = {}
     with make_progress() as progress:
         paper_count = sum(1 for _ in anthology.papers())
         task = progress.add_task("Exporting papers...", total=paper_count)
         for collection in anthology.collections.values():
             collection_papers = {}
+            volume_bibtex = {}
             for volume in collection.volumes():
                 # Compute volume-level information that gets appended to every paper
                 # TODO: Could this be changed in the Hugo templates to
@@ -287,6 +296,7 @@ def export_papers_and_volumes(anthology, outdir, dryrun):
                 # this information on every paper?
                 # --- this also applies to some information from paper_to_dict()
                 # which may be fetched from the volume if not set for the paper
+                volume_bibtex[volume.full_id] = []
                 volume_data = {
                     "booktitle": volume.title.as_text(),
                     "parent_volume_id": volume.full_id,
@@ -307,24 +317,34 @@ def export_papers_and_volumes(anthology, outdir, dryrun):
                     data = paper_to_dict(paper)
                     data.update(volume_data)
                     collection_papers[paper.full_id] = data
+                    if "bibtex" in data:
+                        volume_bibtex[volume.full_id].append(
+                            paper.to_bibtex(with_abstract=False)
+                        )
 
                 # We build the volume data separately since it uses slightly
                 # different fields than what gets attached to papers
                 all_volumes[volume.full_id] = volume_to_dict(volume)
 
             if not dryrun:
-                with open(f"{outdir}/papers/{collection.id}.json", "wb") as f:
+                with open(f"{builddir}/data/papers/{collection.id}.json", "wb") as f:
                     f.write(ENCODER.encode(collection_papers))
 
+                for volume_id, bibtex in volume_bibtex.items():
+                    with open(
+                        f"{builddir}/data-export/volumes/{volume_id}.bib", "w"
+                    ) as f:
+                        print("\n".join(bibtex), file=f)
+
             progress.update(task, advance=len(collection_papers))
 
     # Export volumes
     if not dryrun:
-        with open(f"{outdir}/volumes.json", "wb") as f:
+        with open(f"{builddir}/data/volumes.json", "wb") as f:
             f.write(ENCODER.encode(all_volumes))
 
 
-def export_people(anthology, outdir, dryrun):
+def export_people(anthology, builddir, dryrun):
     with make_progress() as progress:
         # Just to make progress bars nicer
         ppl_count = sum(1 for _ in anthology.people.items())
@@ -379,12 +399,12 @@ def export_people(anthology, outdir, dryrun):
 
         if not dryrun:
             for first_letter, people_list in people.items():
-                with open(f"{outdir}/people/{first_letter}.json", "wb") as f:
+                with open(f"{builddir}/data/people/{first_letter}.json", "wb") as f:
                     f.write(ENCODER.encode(people_list))
             progress.update(task, advance=100)
 
 
-def export_venues(anthology, outdir, dryrun):
+def export_venues(anthology, builddir, dryrun):
     all_venues = {}
     print("Exporting venues...")
     for venue_id, venue in anthology.venues.items():
@@ -416,11 +436,11 @@ def export_venues(anthology, outdir, dryrun):
         all_venues[venue_id] = data
 
     if not dryrun:
-        with open("{}/venues.json".format(outdir), "wb") as f:
+        with open(f"{builddir}/data/venues.json", "wb") as f:
             f.write(ENCODER.encode(all_venues))
 
 
-def export_events(anthology, outdir, dryrun):
+def export_events(anthology, builddir, dryrun):
     # Export events
     all_events = {}
     print("Exporting events...")
@@ -473,11 +493,11 @@ def export_events(anthology, outdir, dryrun):
         all_events[event.id] = data
 
     if not dryrun:
-        with open(f"{outdir}/events.json", "wb") as f:
+        with open(f"{builddir}/data/events.json", "wb") as f:
             f.write(ENCODER.encode(all_events))
 
 
-def export_sigs(anthology, outdir, dryrun):
+def export_sigs(anthology, builddir, dryrun):
     all_sigs = {}
     print("Exporting SIGs...")
     for sig in anthology.sigs.values():
@@ -502,27 +522,32 @@ def export_sigs(anthology, outdir, dryrun):
         all_sigs[sig.acronym] = data
 
     if not dryrun:
-        with open("{}/sigs.json".format(outdir), "wb") as f:
+        with open(f"{builddir}/data/sigs.json", "wb") as f:
             f.write(ENCODER.encode(all_sigs))
 
 
-def export_anthology(anthology, outdir, clean=False, dryrun=False):
+def export_anthology(anthology, builddir, clean=False, dryrun=False):
     """
-    Dumps files in build/data/*.json. These files are used in conjunction with the hugo
-    page stubs created by create_hugo_pages.py to instantiate Hugo templates.
+    Dumps files in build/data/*.json, which are used by Hugo templates
+    to generate the website, as well as build/data-export/volumes/*.bib,
+    which are used later as a basis to generate more bibliographic files.
     """
     # Create directories
     if not dryrun:
         for subdir in ("", "papers", "people"):
-            target_dir = "{}/{}".format(outdir, subdir)
+            target_dir = "{}/data/{}".format(builddir, subdir)
+            if not check_directory(target_dir, clean=clean):
+                return
+        for subdir in ("", "volumes"):
+            target_dir = "{}/data-export/{}".format(builddir, subdir)
             if not check_directory(target_dir, clean=clean):
                 return
 
-    export_papers_and_volumes(anthology, outdir, dryrun)
-    export_people(anthology, outdir, dryrun)
-    export_venues(anthology, outdir, dryrun)
-    export_events(anthology, outdir, dryrun)
-    export_sigs(anthology, outdir, dryrun)
+    export_papers_and_volumes(anthology, builddir, dryrun)
+    export_people(anthology, builddir, dryrun)
+    export_venues(anthology, builddir, dryrun)
+    export_events(anthology, builddir, dryrun)
+    export_sigs(anthology, builddir, dryrun)
 
 
 if __name__ == "__main__":
@@ -540,6 +565,12 @@ def export_anthology(anthology, outdir, clean=False, dryrun=False):
     log_level = log.DEBUG if args["--debug"] else log.INFO
     tracker = setup_rich_logging(level=log_level)
 
+    if limit := args["--bib-limit"]:
+        BIBLIMIT = int(limit)
+    elif os.environ.get("NOBIB", "false") == "true":
+        BIBLIMIT = 3
+        log.info("NOBIB=true, setting --bib-limit=3")
+
     # This "freezes" the config, resulting in a massive speed-up
     OmegaConf.resolve(config)
 

From be873538f041c2cb1fcc893eb6ce3ad7e85d49d8 Mon Sep 17 00:00:00 2001
From: Marcel Bollmann <marcel@bollmann.me>
Date: Wed, 1 Jan 2025 22:40:20 +0100
Subject: [PATCH 02/16] Move MODS+Endnote generation into create_bibtex.py,
 rename to create_bib.py

---
 Makefile             |  58 ++--------
 bin/bib2xml_wrapper  |  20 ----
 bin/create_bib.py    | 264 +++++++++++++++++++++++++++++++++++++++++++
 bin/create_bibtex.py | 194 -------------------------------
 bin/xml2end_wrapper  |  17 ---
 5 files changed, 274 insertions(+), 279 deletions(-)
 delete mode 100755 bin/bib2xml_wrapper
 create mode 100755 bin/create_bib.py
 delete mode 100755 bin/create_bibtex.py
 delete mode 100755 bin/xml2end_wrapper

diff --git a/Makefile b/Makefile
index c1d616d49c..0d0a752ceb 100644
--- a/Makefile
+++ b/Makefile
@@ -97,10 +97,6 @@ ifeq ($(HUGO_VERSION_TOO_LOW),true)
   $(error "incorrect hugo version installed! Need hugo 0.$(HUGO_VERSION_MIN), but only found hugo 0.$(HUGO_VERSION)!")
 endif
 
-# check whether bibtools are installed; used by the endnote and mods targets.
-HAS_XML2END=$(shell which xml2end > /dev/null && echo true || echo false)
-HAS_BIB2XML=$(shell which bib2xml > /dev/null && echo true || echo false)
-
 
 VENV := "venv/bin/activate"
 
@@ -167,63 +163,29 @@ build/.data: build/.basedirs $(sourcefiles) venv/bin/activate
 	. $(VENV) && python3 bin/create_hugo_data.py --clean
 	@touch build/.data
 
-.PHONY: bibtex
-bibtex:	build/.bibtex
-
-.PHONY: mods
-mods: build/.mods
-
-.PHONY: endnote
-endnote: build/.endnote
+.PHONY: bib
+bib:	build/.bib
 
 #######################################################
-build/.bibtex: build/.basedirs $(sourcefiles) venv/bin/activate
-	@echo "INFO     Creating BibTeX files..."
-	. $(VENV) && python3 bin/create_bibtex.py --clean
-	@touch build/.bibtex
-
 # Disable citation targets (except for 3 bibtex per volume) by setting NOBIB=true
 ifeq (true, $(NOBIB))
-$(info WARNING: not creating citation materials; this is not suitable for release!)
-build/.mods: build/.bibtex
-	touch build/.mods
-build/.endnote: build/.bibtex
-	touch build/.endnote
+$(info WARNING: not creating full citation materials; this is not suitable for release!)
+build/.bib:
+	@touch build/.bib
 else
 
-build/.mods: build/.bibtex
-	@if [ $(HAS_BIB2XML) = false ]; then \
-	    echo "bib2xml not found, please install bibtools"; \
-            echo "alternatively, build the site without endnote files by running make hugo"; \
-	    exit 1; \
-	fi
-	@echo "INFO     Converting BibTeX files to MODS XML..."
-	@find build/data-export -name '*.bib' -print0 | \
-	      xargs -0 -n 1 -P 8 bin/bib2xml_wrapper >/dev/null
-	@touch build/.mods
-
-build/.endnote: build/.mods
-	@if [ $(HAS_XML2END) = false ]; then \
-	    echo "xml2end not found, please install bibtools"; \
-            echo "alternatively, build the site without endnote files by running make hugo"; \
-	    exit 1; \
-	fi
-	@echo "INFO     Converting MODS XML files to EndNote..."
-	@find build/data-export -name '*.xml' -print0 | \
-	      xargs -0 -n 1 -P 8 bin/xml2end_wrapper >/dev/null
-	@touch build/.endnote
+build/.bib: build/.basedirs build/.data venv/bin/activate
+	@echo "INFO     Creating extra bibliographic files..."
+	. $(VENV) && python3 bin/create_bib.py --clean
+	@touch build/.bib
 endif
 # end if block to conditionally disable bibtex generation
 #######################################################
 
-
-%.endf: %.xml
-	xml2end $< 2>&1 > $@
-
 .PHONY: hugo
 hugo: build/.hugo
 
-build/.hugo: build/.static build/.data build/.bibtex build/.mods build/.endnote
+build/.hugo: build/.static build/.data build/.bib
 	@echo "INFO     Running Hugo... this may take a while."
 	@cd build && \
 	    hugo -b $(ANTHOLOGYHOST)/$(ANTHOLOGYDIR) \
diff --git a/bin/bib2xml_wrapper b/bin/bib2xml_wrapper
deleted file mode 100755
index 1c39513805..0000000000
--- a/bin/bib2xml_wrapper
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/sh
-#
-# Copyright 2019 Marcel Bollmann <marcel@bollmann.me>
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-
-bib2xml -nt $1 2>/dev/null > ${1%.bib}.xml
-echo 1
diff --git a/bin/create_bib.py b/bin/create_bib.py
new file mode 100755
index 0000000000..a9bac0288b
--- /dev/null
+++ b/bin/create_bib.py
@@ -0,0 +1,264 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright 2019-2024 Marcel Bollmann <marcel@bollmann.me>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Usage: create_bib.py [--builddir=DIR] [-c] [--max-workers=N] [--debug]
+
+Creates anthology.bib files and MODS/Endnote formats for all papers in the Hugo directory.
+
+Options:
+  --builddir=DIR           Directory with build files; used both for reading and writing. [default: {scriptdir}/../build/]
+  --debug                  Output debug-level log messages.
+  -c, --clean              Delete existing files in target directory before generation.
+  -n, --max-workers=N      Maximum number of subprocesses that will be spawned.
+  -h, --help               Display this helpful text.
+"""
+
+import concurrent.futures
+import datetime
+from docopt import docopt
+import gzip
+import logging as log
+import os
+import msgspec
+from pathlib import Path
+import re
+from rich.progress import track
+import shutil
+import subprocess
+
+from acl_anthology import config
+from acl_anthology.utils.ids import infer_year
+from acl_anthology.utils.logging import setup_rich_logging
+from create_hugo_data import make_progress
+
+
+BIB2XML = None
+XML2END = None
+
+
+def create_bibtex(builddir, clean=False) -> None:
+    """Create full Anthology BibTeX files.
+
+    Requires volume bib files from create_hugo_data.py (for file without abstracts).
+    Requires data files from create_hugo_data.py (for file with abstracts).
+    """
+    with (
+        open(
+            f"{builddir}/data-export/anthology.bib", "wt", encoding="utf-8"
+        ) as file_anthology_raw,
+        gzip.open(
+            f"{builddir}/data-export/anthology.bib.gz", "wt", encoding="utf-8"
+        ) as file_anthology,
+    ):
+        # Add a header to each consolidated bibfile
+        for outfh in file_anthology_raw, file_anthology:
+            print(
+                f"% {config.url_prefix}/{Path(outfh.name).name} generated on {datetime.date.today().isoformat()}\n",
+                file=outfh,
+            )
+
+        # Add some shortcuts to the uncompressed consolidated bib file
+        print(
+            "@string{acl = {Association for Computational Linguistics}}",
+            file=file_anthology_raw,
+        )
+        print(f"@string{{anth = {{{config.url_prefix}/}}}}", file=file_anthology_raw)
+        print(file=file_anthology_raw)
+
+        for volume_file in track(
+            sorted(
+                Path(f"{builddir}/data-export/volumes").glob("*.bib"),
+                key=lambda p: (infer_year(p.stem), p.stem),
+                reverse=True,
+            ),
+            description="Create anthology.bib.gz...  ",
+        ):
+            # reset this each time
+            abbrev = None
+            volume_id = volume_file.stem
+
+            with open(volume_file, "r") as f:
+                bibtex = f.read()
+            print(bibtex, file=file_anthology)
+
+            # Space saver (https://github.com/acl-org/acl-anthology/issues/3016) for the
+            # uncompressed consolidated bibfile.
+            # Replace verbose text with abbreviations to get the file under 50 MB for Overleaf
+            concise_contents = bibtex.replace(
+                'publisher = "Association for Computational Linguistics",',
+                "publisher = acl,",
+            )
+            concise_contents = re.sub(
+                rf'url = "{config.url_prefix}/(.*)"',
+                r"url = anth # {\1}",
+                concise_contents,
+            )
+
+            # Abbreviate the booktitle by extracting it and printing it before
+            # the first entry in each volume
+            if concise_contents.startswith("@proceedings"):
+                # Grab the title string and create the alias
+                first_bibkey_comp = re.match(
+                    r'@proceedings{([a-z0-9]*)-', concise_contents
+                ).group(1)
+                abbrev = f"{first_bibkey_comp.upper()}:{infer_year(volume_id)}:{volume_id.split('-')[-1]}"
+                try:
+                    booktitle = re.match(
+                        r"@proceedings{[a-z0-9-]*,\n    title = \"(.*)\",",
+                        concise_contents,
+                    ).group(1)
+                    print(
+                        f"@string{{{abbrev} = {{{booktitle}}}}}",
+                        file=file_anthology_raw,
+                    )
+                except AttributeError:
+                    log.warning(f"Could not find title for {volume_id}")
+                    abbrev = None
+
+                if abbrev is not None and "booktitle" in concise_contents:
+                    # substitute the alias for the booktitle
+                    concise_contents = re.sub(
+                        r"    booktitle = (\".*\"),",
+                        f"    booktitle = {abbrev},",
+                        concise_contents,
+                    )
+
+                # Remove whitespace to save space and keep things under 50 MB
+                concise_contents = re.sub(r",\n +", ",", concise_contents)
+                concise_contents = re.sub(r"  and\n +", " and ", concise_contents)
+                concise_contents = re.sub(r",\n}", "}", concise_contents)
+
+                print(concise_contents, file=file_anthology_raw)
+
+    with gzip.open(
+        f"{builddir}/data-export/anthology+abstracts.bib.gz", "wt", encoding="utf-8"
+    ) as file_anthology_with_abstracts:
+        for collection_file in track(
+            sorted(
+                Path(f"{builddir}/data/papers").glob("*.json"),
+                key=lambda p: (infer_year(p.stem), p.stem),
+                reverse=True,
+            ),
+            description="       +abstracts.bib.gz... ",
+        ):
+            with open(collection_file, "rb") as f:
+                data = msgspec.json.decode(f.read())
+
+                # bibtex = "\n".join(entry["bibtex"] for entry in data.values() if "bibtex" in entry)
+                # print(bibtex, file=file_anthology_with_abstracts)
+
+                for entry in data.values():
+                    if bibtex := entry.get("bibtex"):
+                        print(bibtex, file=file_anthology_with_abstracts)
+
+
+def convert_bibtex(builddir, max_workers=None):
+    """Convert BibTeX into other bibliographic formats, and add them to the data files.
+
+    Requires data files from create_hugo_data.py.
+    """
+    files = list(Path(f"{builddir}/data/papers").glob("*.json"))
+
+    with make_progress() as progress:
+        task = progress.add_task("Convert to MODS & Endnote...", total=len(files))
+
+        with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
+            futures = [executor.submit(convert_collection_file, file) for file in files]
+            for _ in concurrent.futures.as_completed(futures):
+                progress.update(task, advance=1)
+
+
+def convert_collection_file(collection_file):
+    """Read a single collection data file, convert its BibTeX entries to MODS and Endnote formats, and save those back into the file.
+
+    Important:
+        This function should not rely on global objects, as it will be executed concurrently for different files with multiprocessing.
+    """
+
+    with open(collection_file, "rb") as f:
+        data = msgspec.json.decode(f.read())
+
+    entries = [entry for entry in data.values() if entry.get("bibtex")]
+    if not entries:
+        return
+
+    bibtex = "\n".join(entry["bibtex"] for entry in entries)
+    mods_batch, endf_batch = batch_convert_to_mods_and_endf(bibtex, collection_file.name)
+    assert len(entries) == len(mods_batch) == len(endf_batch)
+    for entry, mods, endf in zip(entries, mods_batch, endf_batch):
+        entry["mods"] = mods
+        entry["endf"] = endf
+
+    with open(collection_file, "wb") as f:
+        f.write(msgspec.json.encode(data))
+
+
+def batch_convert_to_mods_and_endf(bibtex, context):
+    """Convert a BibTeX string with multiple entries to MODS and Endnote.
+
+    Relies on bibutils to perform the conversion, then returns a list with the individual converted entries.
+    """
+    mods = subprocess.run(
+        [BIB2XML, "-nt"],
+        input=bibtex,
+        capture_output=True,
+        text=True,
+    )
+    log.debug(f"{context}: {mods.stderr.strip()}")
+    endf = subprocess.run(
+        [XML2END],
+        input=mods.stdout,
+        capture_output=True,
+        text=True,
+    )
+    log.debug(f"{context}: {endf.stderr.strip()}")
+
+    mods_header, *mods_entries = re.split(r"<mods ", mods.stdout)
+    mods_header = mods_header.lstrip("\ufeff")
+    mods_footer = "</modsCollection>\n"
+    mods_batch = [
+        f"{mods_header}<mods {entry}{mods_footer}" for entry in mods_entries[:-1]
+    ] + [f"{mods_header}<mods {mods_entries[-1]}"]
+
+    endf_batch = endf.stdout.strip("\ufeff\r\n").split("\n\n")
+
+    return mods_batch, endf_batch
+
+
+if __name__ == "__main__":
+    args = docopt(__doc__)
+    scriptdir = os.path.dirname(os.path.abspath(__file__))
+    if "{scriptdir}" in args["--builddir"]:
+        args["--builddir"] = os.path.abspath(
+            args["--builddir"].format(scriptdir=scriptdir)
+        )
+
+    log_level = log.DEBUG if args["--debug"] else log.INFO
+    tracker = setup_rich_logging(level=log_level)
+
+    max_workers = int(args["--max-workers"]) if args["--max-workers"] else None
+    if (BIB2XML := shutil.which("bib2xml")) is None:
+        log.error("bib2xml not found; please install bibutils for MODS XML conversion")
+    if (XML2END := shutil.which("xml2end")) is None:
+        log.error("xml2end not found; please install bibutils for Endnote conversion")
+
+    create_bibtex(args["--builddir"], clean=args["--clean"])
+    if BIB2XML and XML2END:
+        convert_bibtex(args["--builddir"], max_workers=max_workers)
+
+    if tracker.highest >= log.ERROR:
+        exit(1)
diff --git a/bin/create_bibtex.py b/bin/create_bibtex.py
deleted file mode 100755
index 05bec2b952..0000000000
--- a/bin/create_bibtex.py
+++ /dev/null
@@ -1,194 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-#
-# Copyright 2019-2024 Marcel Bollmann <marcel@bollmann.me>
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Usage: create_bibtex.py [--importdir=DIR] [--exportdir=DIR] [-c] [--debug]
-
-Creates .bib files for all papers in the Hugo directory.
-
-Options:
-  --importdir=DIR          Directory to import XML files from. [default: {scriptdir}/../data/]
-  --exportdir=DIR          Directory to write exported files to.   [default: {scriptdir}/../build/data-export/]
-  --debug                  Output debug-level log messages.
-  -c, --clean              Delete existing files in target directory before generation.
-  -h, --help               Display this helpful text.
-"""
-
-import re
-import gzip
-import logging as log
-import os
-import datetime
-
-from docopt import docopt
-from omegaconf import OmegaConf
-from pathlib import Path
-from rich.progress import track
-
-from acl_anthology import Anthology, config
-from acl_anthology.utils.logging import setup_rich_logging
-from create_hugo_data import check_directory
-
-
-def create_bibtex(anthology, trgdir, limit=0, clean=False) -> None:
-    """Creates .bib files for all papers.
-
-    :param anthology: The Anthology object.
-    :param trgdir: The target directory to write to
-    :param limit: If nonzero, only generate {limit} entries per volume
-    :param clean: Clean the directory first
-    """
-    if not check_directory("{}/papers".format(trgdir), clean=clean):
-        return
-    if not check_directory("{}/volumes".format(trgdir), clean=clean):
-        return
-
-    log.debug("Creating BibTeX files for all papers...")
-    with (
-        open(
-            "{}/anthology.bib".format(trgdir), "wt", encoding="utf-8"
-        ) as file_anthology_raw,
-        gzip.open(
-            "{}/anthology.bib.gz".format(trgdir), "wt", encoding="utf-8"
-        ) as file_anthology,
-        gzip.open(
-            "{}/anthology+abstracts.bib.gz".format(trgdir), "wt", encoding="utf-8"
-        ) as file_anthology_with_abstracts,
-    ):
-        # Add a header to each consolidated bibfile
-        for outfh in file_anthology_raw, file_anthology, file_anthology_with_abstracts:
-            print(
-                f"% https://aclanthology.org/{Path(outfh.name).name} generated on {datetime.date.today().isoformat()}\n",
-                file=outfh,
-            )
-
-        # Add some shortcuts to the uncompressed consolidated bib file
-        print(
-            "@string{acl = {Association for Computational Linguistics}}",
-            file=file_anthology_raw,
-        )
-        print("@string{anth = {https://aclanthology.org/}}", file=file_anthology_raw)
-        print(file=file_anthology_raw)
-
-        for volume in track(
-            sorted(
-                anthology.volumes(), key=lambda vol: (vol.year, vol.full_id), reverse=True
-            ),
-            description="Creating BibTeX files...",
-        ):
-            # reset this each time
-            abbrev = None
-
-            volume_dir = trgdir
-            if not os.path.exists(volume_dir):
-                os.makedirs(volume_dir)
-            with open(
-                "{}/volumes/{}.bib".format(trgdir, volume.full_id), "w"
-            ) as file_volume:
-                for i, paper in enumerate(volume.values(), 1):
-                    if limit and i > limit:
-                        break
-
-                    with open(
-                        "{}/{}.bib".format(volume_dir, paper.full_id), "w"
-                    ) as file_paper:
-                        contents = paper.to_bibtex(with_abstract=True)
-                        print(contents, file=file_paper)
-                        print(contents, file=file_anthology_with_abstracts)
-
-                        concise_contents = paper.to_bibtex()
-                        print(concise_contents, file=file_volume)
-                        print(concise_contents, file=file_anthology)
-
-                        # Space saver (https://github.com/acl-org/acl-anthology/issues/3016) for the
-                        # uncompressed consolidated bibfile.
-                        # Replace verbose text with abbreviations to get the file under 50 MB for Overleaf
-                        concise_contents = concise_contents.replace(
-                            'publisher = "Association for Computational Linguistics",',
-                            "publisher = acl,",
-                        )
-                        concise_contents = re.sub(
-                            r'url = "https://aclanthology.org/(.*)"',
-                            r"url = anth # {\1}",
-                            concise_contents,
-                        )
-
-                        # Abbreviate the booktitle by extracting it and printing it before
-                        # the first entry in each volume
-                        if concise_contents.startswith("@proceedings"):
-                            # Grab the title string and create the alias
-                            abbrev = (
-                                f"{volume.venue_ids[0].upper()}:{volume.year}:{volume.id}"
-                            )
-                            try:
-                                booktitle = re.search(
-                                    r"    title = \"(.*)\",", concise_contents
-                                ).group(1)
-                                print(
-                                    f"@string{{{abbrev} = {{{booktitle}}}}}",
-                                    file=file_anthology_raw,
-                                )
-                            except AttributeError:
-
-                                log.warning(f"Could not find title for {volume.full_id}")
-                                abbrev = None
-
-                        if abbrev is not None and "booktitle" in concise_contents:
-                            # substitute the alias for the booktitle
-                            concise_contents = re.sub(
-                                r"    booktitle = (\".*\"),",
-                                f"    booktitle = {abbrev},",
-                                concise_contents,
-                            )
-
-                        # Remove whitespace to save space and keep things under 50 MB
-                        concise_contents = re.sub(r",\n +", ",", concise_contents)
-                        concise_contents = re.sub(r"  and\n +", " and ", concise_contents)
-                        concise_contents = re.sub(r",\n}", "}", concise_contents)
-
-                        print(concise_contents, file=file_anthology_raw)
-
-
-if __name__ == "__main__":
-    args = docopt(__doc__)
-    scriptdir = os.path.dirname(os.path.abspath(__file__))
-    if "{scriptdir}" in args["--importdir"]:
-        args["--importdir"] = os.path.abspath(
-            args["--importdir"].format(scriptdir=scriptdir)
-        )
-    if "{scriptdir}" in args["--exportdir"]:
-        args["--exportdir"] = os.path.abspath(
-            args["--exportdir"].format(scriptdir=scriptdir)
-        )
-
-    log_level = log.DEBUG if args["--debug"] else log.INFO
-    tracker = setup_rich_logging(level=log_level)
-
-    # This "freezes" the config, resulting in a massive speed-up
-    OmegaConf.resolve(config)
-
-    # If NOBIB is set, generate only three bibs per volume
-    limit = 0 if os.environ.get("NOBIB", "false") == "false" else 3
-    if limit != 0:
-        log.info(f"NOBIB=true, generating only {limit} BibTEX files per volume")
-
-    anthology = Anthology(datadir=args["--importdir"]).load_all()
-    if tracker.highest >= log.ERROR:
-        exit(1)
-
-    create_bibtex(anthology, args["--exportdir"], limit=limit, clean=args["--clean"])
-    if tracker.highest >= log.ERROR:
-        exit(1)
diff --git a/bin/xml2end_wrapper b/bin/xml2end_wrapper
deleted file mode 100755
index 63f8877768..0000000000
--- a/bin/xml2end_wrapper
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/sh
-#
-# Copyright 2019 Martin Villalba <villalba@7c0h.com>
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-xml2end $1 2>&1 > ${1%.xml}.endf

From 1d74ed44c696fac634b491f466e78bc40f2ef84f Mon Sep 17 00:00:00 2001
From: Marcel Bollmann <marcel@bollmann.me>
Date: Wed, 1 Jan 2025 23:22:14 +0100
Subject: [PATCH 03/16] Change Hugo templates to read bib from data, add
 FileSaver.js

---
 hugo/layouts/_default/baseof.html          |   2 +-
 hugo/layouts/papers/list-entry-author.html |   2 +
 hugo/layouts/papers/list-entry.html        |   2 +
 hugo/layouts/papers/single.html            |  58 ++++---
 hugo/static/js/FileSaver.js                | 188 +++++++++++++++++++++
 5 files changed, 225 insertions(+), 27 deletions(-)
 create mode 100644 hugo/static/js/FileSaver.js

diff --git a/hugo/layouts/_default/baseof.html b/hugo/layouts/_default/baseof.html
index 125c02053e..c2f3c118de 100644
--- a/hugo/layouts/_default/baseof.html
+++ b/hugo/layouts/_default/baseof.html
@@ -48,7 +48,7 @@
 
           // abstract toggling -- used on multiple pages, so defined here
           if ($("#toggle-all-abstracts")) {
-              $("#toggle-all-abstracts").click(
+              $("#toggle-all-abstracts").on("click",
                   function() {
                       var target = $("#toggle-all-abstracts");
                       target.attr("disabled", true);
diff --git a/hugo/layouts/papers/list-entry-author.html b/hugo/layouts/papers/list-entry-author.html
index d32424cc27..06a72fc8b1 100644
--- a/hugo/layouts/papers/list-entry-author.html
+++ b/hugo/layouts/papers/list-entry-author.html
@@ -7,11 +7,13 @@
       pdf
     </a>
     {{- end -}}
+<!--
     {{- if (fileExists (printf "/data-export/%s.bib" .Params.anthology_id)) -}}
     <a class="badge badge-secondary align-middle mr-1" href="{{ (printf "/%s.bib" .Params.anthology_id) | relURL }}" data-toggle="tooltip" data-placement="top" title="Export to BibTeX">
       bib
     </a>
     {{- end -}}
+-->
     {{- with $paper.abstract_html -}}
     <a class="badge badge-info align-middle mr-1" href="#abstract-{{ replace $.Params.anthology_id "." "--" }}" data-toggle="collapse" aria-expanded="false" aria-controls="abstract-{{ $.Params.anthology_id }}" title="Show Abstract">abs</a>
     {{- end -}}
diff --git a/hugo/layouts/papers/list-entry.html b/hugo/layouts/papers/list-entry.html
index bfe19aa249..98abfab33f 100644
--- a/hugo/layouts/papers/list-entry.html
+++ b/hugo/layouts/papers/list-entry.html
@@ -8,11 +8,13 @@
       pdf
     </a>
     {{- end -}}
+<!--
     {{- if and (not $paper.retracted) (fileExists (printf "/data-export/%s.bib" .Params.anthology_id)) -}}
     <a class="badge badge-secondary align-middle mr-1" href="{{ (printf "/%s.bib" .Params.anthology_id) | relURL }}" data-toggle="tooltip" data-placement="top" title="Export to BibTeX">
       bib
     </a>
     {{- end -}}
+-->
     {{- with $paper.abstract_html -}}
     <a class="badge badge-info align-middle mr-1" href="#abstract-{{ replace $.Params.anthology_id "." "--" }}" data-toggle="collapse" aria-expanded="false" aria-controls="abstract-{{ $.Params.anthology_id }}" title="Show Abstract">abs</a>
     {{- end -}}
diff --git a/hugo/layouts/papers/single.html b/hugo/layouts/papers/single.html
index c52134c560..90dc3fb228 100644
--- a/hugo/layouts/papers/single.html
+++ b/hugo/layouts/papers/single.html
@@ -40,6 +40,7 @@
 {{ $volume_id := index (split .Params.anthology_id "-") 0 }}
 {{ $paper := index (index .Site.Data.papers $volume_id) .Params.anthology_id }}
 <script src="{{ "/js/clipboard.min.js" | relURL }}"></script>
+<script src="{{ "/js/FileSaver.js" | relURL }}"></script>
 <script>
   $( document ).ready(function() {
       if (ClipboardJS.isSupported()) {
@@ -74,6 +75,20 @@
       }
   });
 
+  $( document ).ready(function() {
+      if ($(".btn-filesaver")) {
+          $(".btn-filesaver").on("click",
+              function() {
+                  var target = $( this ).attr("data-filesaver-target");
+                  var filename = $( this ).attr("data-filesaver-name");
+                  var blob = new Blob([$(target).text()], {type: "text/plain;charset=utf-8"});
+                  saveAs(blob, filename);
+              }
+          );
+          $(".btn-filesaver").removeClass("disabled");
+      }
+  });
+
   const paper_params = {
     anthology_id: "{{ .Params.anthology_id }}",
     title: "{{ $paper.title_raw }}",
@@ -322,9 +337,6 @@
 {{ $anthology_id := .Params.anthology_id }}
 {{ $volume_id := index (split .Params.anthology_id "-") 0 }}
 {{ $paper := index (index .Site.Data.papers $volume_id) .Params.anthology_id }}
-{{ $has_bib := fileExists (printf "/data-export/%s.bib" $anthology_id) }}
-{{ $has_xml := fileExists (printf "/data-export/%s.xml" $anthology_id) }}
-{{ $has_endf := fileExists (printf "/data-export/%s.endf" $anthology_id) }}
 <section id="main">
   <div>
   <h2 id="title">
@@ -511,14 +523,14 @@ <h5 class="card-title">Abstract</h5>
       {{ end }}
       <dt class="acl-button-row">Copy Citation:</dt>
       <dd class="acl-button-row">
-      {{ if $has_bib }}
+      {{ with $paper.bibtex }}
         <button type="button" class="btn btn-clipboard-outside btn-secondary btn-sm d-none" data-clipboard-target="#citeBibtexContent"><i class="far fa-clipboard pr-2"></i>BibTeX</button>
       {{ end }}
         <button type="button" class="btn btn-clipboard-outside btn-secondary btn-sm d-none" data-clipboard-target="#citeMarkdownContent"><i class="far fa-clipboard pr-2"></i>Markdown</button>
-      {{ if $has_xml }}
+      {{ with $paper.mods }}
         <button type="button" class="btn btn-clipboard-outside btn-secondary btn-sm d-none" data-clipboard-target="#citeModsContent"><i class="far fa-clipboard pr-2"></i>MODS XML</button>
       {{ end }}
-      {{ if $has_endf }}
+      {{ with $paper.endf }}
         <button type="button" class="btn btn-clipboard-outside btn-secondary btn-sm d-none" data-clipboard-target="#citeEndnoteContent"><i class="far fa-clipboard pr-2"></i>Endnote</button>
       {{ end }}
         <button type="button" class="btn btn-secondary btn-sm" data-toggle="modal" data-target="#citeModal">More options…</button>
@@ -592,7 +604,7 @@ <h5 class="card-title">Abstract</h5>
         </a>
         {{ end }}
       {{ end }}
-      {{ if and (not $paper.retracted) (not $paper.removed) (or $has_bib $has_xml $has_endf) }}
+      {{ if and (not $paper.retracted) (not $paper.removed) ($paper.bibtex) }}
       <a class="btn btn-secondary" title="Open dialog for exporting citations" data-toggle="modal" data-target="#citeModal" href="#">
         <i class="fas fa-quote-left"></i><span class="pl-2">Cite</span>
       </a>
@@ -640,49 +652,43 @@ <h5 class="modal-title" id="citeModalLabel">Export citation</h5>
         <div class="modal-body">
           <ul class="nav nav-tabs mb-2" id="citeFormats" role="tablist">
             <li class="nav-item">
-              <a class="nav-link {{ if not $has_bib }}disabled{{ else }}active{{ end }}" data-toggle="list" href="#citeBibtex" role="tab" aria-controls="citeBibtex" aria-selected="{{ if $has_bib }}true{{ else }}false{{ end }}">BibTeX</a>
+              <a class="nav-link {{ if not $paper.bibtex }}disabled{{ else }}active{{ end }}" data-toggle="list" href="#citeBibtex" role="tab" aria-controls="citeBibtex" aria-selected="{{ if $paper.bibtex }}true{{ else }}false{{ end }}">BibTeX</a>
             </li>
             <li class="nav-item">
-              <a class="nav-link {{ if not $has_xml }}disabled{{ end }}" data-toggle="list" href="#citeMods" role="tab" aria-controls="citeMods" aria-selected="false">MODS XML</a>
+              <a class="nav-link {{ if not $paper.mods }}disabled{{ end }}" data-toggle="list" href="#citeMods" role="tab" aria-controls="citeMods" aria-selected="false">MODS XML</a>
             </li>
             <li class="nav-item">
-              <a class="nav-link {{ if not $has_endf }}disabled{{ end }}" data-toggle="list" href="#citeEndnote" role="tab" aria-controls="citeEndnote" aria-selected="false">Endnote</a>
+              <a class="nav-link {{ if not $paper.endf }}disabled{{ end }}" data-toggle="list" href="#citeEndnote" role="tab" aria-controls="citeEndnote" aria-selected="false">Endnote</a>
             </li>
             <li class="nav-item">
-              <a class="nav-link {{ if not $has_bib }}active{{ end }}" data-toggle="list" href="#citeMarkdown" role="tab" aria-controls="citeMarkdown" aria-selected="{{ if $has_bib }}false{{ else }}true{{ end }}">Preformatted</a>
+              <a class="nav-link {{ if not $paper.bibtex }}active{{ end }}" data-toggle="list" href="#citeMarkdown" role="tab" aria-controls="citeMarkdown" aria-selected="{{ if $paper.bibtex }}false{{ else }}true{{ end }}">Preformatted</a>
             </li>
           </ul>
 
           <div class="tab-content" id="citeFormatsContent">
             <div class="tab-pane active" id="citeBibtex" role="tabpanel">
-              {{- if $has_bib -}}
-              <pre id="citeBibtexContent" class="bg-light border p-2" style="max-height: 50vh;">
-                {{- readFile (printf "/data-export/%s.bib" $anthology_id) -}}
-              </pre>
+              {{- with $paper.bibtex -}}
+              <pre id="citeBibtexContent" class="bg-light border p-2" style="max-height: 50vh;">{{ . }}</pre>
               <div class="modal-footer pb-1">
-                <a class="btn btn-secondary" href="{{ (printf "/%s.bib" $anthology_id) | relURL }}"><i class="fas fa-download pr-2"></i>Download as File</a>
+                <a class="btn btn-secondary btn-filesaver disabled" data-filesaver-target="#citeBibtexContent" data-filesaver-name="{{ $.Params.anthology_id }}.bib"><i class="fas fa-download pr-2"></i>Download as File</a>
                 <button class="btn btn-clipboard btn-primary d-none" data-clipboard-target="#citeBibtexContent"><i class="far fa-clipboard pr-2"></i>Copy to Clipboard</button>
               </div>
               {{- end -}}
             </div>
             <div class="tab-pane" id="citeMods" role="tabpanel">
-              {{- if $has_xml -}}
-              <pre id="citeModsContent" class="bg-light border p-2" style="max-height: 50vh;">
-                {{- readFile (printf "/data-export/%s.xml" $anthology_id) -}}
-              </pre>
+              {{- with $paper.mods -}}
+              <pre id="citeModsContent" class="bg-light border p-2" style="max-height: 50vh;">{{ . }}</pre>
               <div class="modal-footer pb-1">
-                <a class="btn btn-secondary" href="{{ (printf "/%s.xml" $anthology_id) | relURL }}"><i class="fas fa-download pr-2"></i>Download as File</a>
+                <a class="btn btn-secondary btn-filesaver disabled" data-filesaver-target="#citeModsContent" data-filesaver-name="{{ $.Params.anthology_id }}.xml"><i class="fas fa-download pr-2"></i>Download as File</a>
                 <button class="btn btn-clipboard btn-primary d-none" data-clipboard-target="#citeModsContent"><i class="far fa-clipboard pr-2"></i>Copy to Clipboard</button>
               </div>
               {{- end -}}
             </div>
             <div class="tab-pane" id="citeEndnote" role="tabpanel">
-              {{- if $has_endf -}}
-              <pre id="citeEndnoteContent" class="bg-light border p-2" style="max-height: 50vh;">
-                {{- readFile (printf "/data-export/%s.endf" $anthology_id) -}}
-              </pre>
+              {{- with $paper.endf -}}
+              <pre id="citeEndnoteContent" class="bg-light border p-2" style="max-height: 50vh;">{{ . }}</pre>
               <div class="modal-footer pb-1">
-                <a class="btn btn-secondary" href="{{ (printf "/%s.endf" $anthology_id) | relURL }}"><i class="fas fa-download pr-2"></i>Download as File</a>
+                <a class="btn btn-secondary btn-filesaver disabled" data-filesaver-target="#citeEndnoteContent" data-filesaver-name="{{ $.Params.anthology_id }}.endf"><i class="fas fa-download pr-2"></i>Download as File</a>
                 <button class="btn btn-clipboard btn-primary d-none" data-clipboard-target="#citeEndnoteContent"><i class="far fa-clipboard pr-2"></i>Copy to Clipboard</button>
               </div>
               {{- end -}}
diff --git a/hugo/static/js/FileSaver.js b/hugo/static/js/FileSaver.js
new file mode 100644
index 0000000000..54fc090045
--- /dev/null
+++ b/hugo/static/js/FileSaver.js
@@ -0,0 +1,188 @@
+(function (global, factory) {
+  if (typeof define === "function" && define.amd) {
+    define([], factory);
+  } else if (typeof exports !== "undefined") {
+    factory();
+  } else {
+    var mod = {
+      exports: {}
+    };
+    factory();
+    global.FileSaver = mod.exports;
+  }
+})(this, function () {
+  "use strict";
+
+  /*
+  * FileSaver.js
+  * A saveAs() FileSaver implementation.
+  *
+  * By Eli Grey, http://eligrey.com
+  *
+  * License : https://github.com/eligrey/FileSaver.js/blob/master/LICENSE.md (MIT)
+  * source  : http://purl.eligrey.com/github/FileSaver.js
+  */
+  // The one and only way of getting global scope in all environments
+  // https://stackoverflow.com/q/3277182/1008999
+  var _global = typeof window === 'object' && window.window === window ? window : typeof self === 'object' && self.self === self ? self : typeof global === 'object' && global.global === global ? global : void 0;
+
+  function bom(blob, opts) {
+    if (typeof opts === 'undefined') opts = {
+      autoBom: false
+    };else if (typeof opts !== 'object') {
+      console.warn('Deprecated: Expected third argument to be a object');
+      opts = {
+        autoBom: !opts
+      };
+    } // prepend BOM for UTF-8 XML and text/* types (including HTML)
+    // note: your browser will automatically convert UTF-16 U+FEFF to EF BB BF
+
+    if (opts.autoBom && /^\s*(?:text\/\S*|application\/xml|\S*\/\S*\+xml)\s*;.*charset\s*=\s*utf-8/i.test(blob.type)) {
+      return new Blob([String.fromCharCode(0xFEFF), blob], {
+        type: blob.type
+      });
+    }
+
+    return blob;
+  }
+
+  function download(url, name, opts) {
+    var xhr = new XMLHttpRequest();
+    xhr.open('GET', url);
+    xhr.responseType = 'blob';
+
+    xhr.onload = function () {
+      saveAs(xhr.response, name, opts);
+    };
+
+    xhr.onerror = function () {
+      console.error('could not download file');
+    };
+
+    xhr.send();
+  }
+
+  function corsEnabled(url) {
+    var xhr = new XMLHttpRequest(); // use sync to avoid popup blocker
+
+    xhr.open('HEAD', url, false);
+
+    try {
+      xhr.send();
+    } catch (e) {}
+
+    return xhr.status >= 200 && xhr.status <= 299;
+  } // `a.click()` doesn't work for all browsers (#465)
+
+
+  function click(node) {
+    try {
+      node.dispatchEvent(new MouseEvent('click'));
+    } catch (e) {
+      var evt = document.createEvent('MouseEvents');
+      evt.initMouseEvent('click', true, true, window, 0, 0, 0, 80, 20, false, false, false, false, 0, null);
+      node.dispatchEvent(evt);
+    }
+  } // Detect WebView inside a native macOS app by ruling out all browsers
+  // We just need to check for 'Safari' because all other browsers (besides Firefox) include that too
+  // https://www.whatismybrowser.com/guides/the-latest-user-agent/macos
+
+
+  var isMacOSWebView = /Macintosh/.test(navigator.userAgent) && /AppleWebKit/.test(navigator.userAgent) && !/Safari/.test(navigator.userAgent);
+  var saveAs = _global.saveAs || ( // probably in some web worker
+  typeof window !== 'object' || window !== _global ? function saveAs() {}
+  /* noop */
+  // Use download attribute first if possible (#193 Lumia mobile) unless this is a macOS WebView
+  : 'download' in HTMLAnchorElement.prototype && !isMacOSWebView ? function saveAs(blob, name, opts) {
+    var URL = _global.URL || _global.webkitURL;
+    var a = document.createElement('a');
+    name = name || blob.name || 'download';
+    a.download = name;
+    a.rel = 'noopener'; // tabnabbing
+    // TODO: detect chrome extensions & packaged apps
+    // a.target = '_blank'
+
+    if (typeof blob === 'string') {
+      // Support regular links
+      a.href = blob;
+
+      if (a.origin !== location.origin) {
+        corsEnabled(a.href) ? download(blob, name, opts) : click(a, a.target = '_blank');
+      } else {
+        click(a);
+      }
+    } else {
+      // Support blobs
+      a.href = URL.createObjectURL(blob);
+      setTimeout(function () {
+        URL.revokeObjectURL(a.href);
+      }, 4E4); // 40s
+
+      setTimeout(function () {
+        click(a);
+      }, 0);
+    }
+  } // Use msSaveOrOpenBlob as a second approach
+  : 'msSaveOrOpenBlob' in navigator ? function saveAs(blob, name, opts) {
+    name = name || blob.name || 'download';
+
+    if (typeof blob === 'string') {
+      if (corsEnabled(blob)) {
+        download(blob, name, opts);
+      } else {
+        var a = document.createElement('a');
+        a.href = blob;
+        a.target = '_blank';
+        setTimeout(function () {
+          click(a);
+        });
+      }
+    } else {
+      navigator.msSaveOrOpenBlob(bom(blob, opts), name);
+    }
+  } // Fallback to using FileReader and a popup
+  : function saveAs(blob, name, opts, popup) {
+    // Open a popup immediately do go around popup blocker
+    // Mostly only available on user interaction and the fileReader is async so...
+    popup = popup || open('', '_blank');
+
+    if (popup) {
+      popup.document.title = popup.document.body.innerText = 'downloading...';
+    }
+
+    if (typeof blob === 'string') return download(blob, name, opts);
+    var force = blob.type === 'application/octet-stream';
+
+    var isSafari = /constructor/i.test(_global.HTMLElement) || _global.safari;
+
+    var isChromeIOS = /CriOS\/[\d]+/.test(navigator.userAgent);
+
+    if ((isChromeIOS || force && isSafari || isMacOSWebView) && typeof FileReader !== 'undefined') {
+      // Safari doesn't allow downloading of blob URLs
+      var reader = new FileReader();
+
+      reader.onloadend = function () {
+        var url = reader.result;
+        url = isChromeIOS ? url : url.replace(/^data:[^;]*;/, 'data:attachment/file;');
+        if (popup) popup.location.href = url;else location = url;
+        popup = null; // reverse-tabnabbing #460
+      };
+
+      reader.readAsDataURL(blob);
+    } else {
+      var URL = _global.URL || _global.webkitURL;
+      var url = URL.createObjectURL(blob);
+      if (popup) popup.location = url;else location.href = url;
+      popup = null; // reverse-tabnabbing #460
+
+      setTimeout(function () {
+        URL.revokeObjectURL(url);
+      }, 4E4); // 40s
+    }
+  });
+  _global.saveAs = saveAs.saveAs = saveAs;
+
+  if (typeof module !== 'undefined') {
+    module.exports = saveAs;
+  }
+});

From 0f18af84ae6e5ef13db026e3b318706f20f102d5 Mon Sep 17 00:00:00 2001
From: Matt Post <post@cs.jhu.edu>
Date: Thu, 2 Jan 2025 17:57:28 -0500
Subject: [PATCH 04/16] Add .htaccess redirects

---
 hugo/static/.htaccess | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/hugo/static/.htaccess b/hugo/static/.htaccess
index bfa7cc5ffc..126416061b 100644
--- a/hugo/static/.htaccess
+++ b/hugo/static/.htaccess
@@ -92,3 +92,8 @@ RewriteRule ^(\d{4})\.([a-zA-Z\d]+).handbook.pdf$ anthology-files/handbooks/$2/$
 
 # Copyright
 RewriteRule acl-copyright-transfer.pdf anthology-files/templates/acl-copyright-transfer.pdf [L,NC]
+
+# Dynamically generate paper-level bib files
+# Needs "Options +ExecCGI" and "AddHandler cgi-script .cgi" for the /cgi-bin directory enabled
+RewriteRule ^(\d{4}\.[a-zA-Z\d]+-[a-zA-Z\d]+\.[a-zA-Z\d]+)\.bib$ /ANTHOLOGYDIR/cgi-bin/generate_bib.cgi?anthology_id=$1 [L,NC]
+RewriteRule ^([A-Za-z]\d{2}\-\d{4})\.bib$ /ANTHOLOGYDIR/cgi-bin/generate_bib.cgi?anthology_id=$1 [L,NC]
\ No newline at end of file

From b283fb26d722ca7ee86dbc7c4e0b2ec49081560d Mon Sep 17 00:00:00 2001
From: Matt Post <post@cs.jhu.edu>
Date: Thu, 2 Jan 2025 17:57:37 -0500
Subject: [PATCH 05/16] Add half-working bash script

---
 hugo/static/generate_bib.cgi | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 hugo/static/generate_bib.cgi

diff --git a/hugo/static/generate_bib.cgi b/hugo/static/generate_bib.cgi
new file mode 100644
index 0000000000..77f17de8b4
--- /dev/null
+++ b/hugo/static/generate_bib.cgi
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+if [[ -z $QUERY_STRING ]]; then
+    QUERY_STRING="?anthology_id=$1"
+fi
+anthid=${QUERY_STRING#anthology_id=}
+
+# Set content type headers for PDF
+echo "Content-Type: text/plain"
+echo ""
+
+# Get volume name
+volume=$(echo $anthid | cut -d. -f1-2)
+
+#echo "QUERY STRING $QUERY_STRING"
+#echo $anthid
+#echo $volume
+
+echo "Looking for $anthid in $volume..."
\ No newline at end of file

From 17bf6d210663e2e0e58f4e383395932a567577b0 Mon Sep 17 00:00:00 2001
From: Matt Post <post@cs.jhu.edu>
Date: Thu, 2 Jan 2025 18:56:47 -0500
Subject: [PATCH 06/16] Add CGI script

---
 hugo/static/cgi-bin/generate_bib.cgi | 96 ++++++++++++++++++++++++++++
 hugo/static/generate_bib.cgi         | 19 ------
 2 files changed, 96 insertions(+), 19 deletions(-)
 create mode 100644 hugo/static/cgi-bin/generate_bib.cgi
 delete mode 100644 hugo/static/generate_bib.cgi

diff --git a/hugo/static/cgi-bin/generate_bib.cgi b/hugo/static/cgi-bin/generate_bib.cgi
new file mode 100644
index 0000000000..cd23a42c38
--- /dev/null
+++ b/hugo/static/cgi-bin/generate_bib.cgi
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright 2024 Matt Post <post@cs.jhu.edu>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+A Python CGI script. Takes the anthology_id parameter, finds the volume,
+and then loads that file from ../{volume_id}.bib, which it opens to
+search for the appropriate BibTeX entry. This is then printed to STDOUT.
+
+The volume bibtex has lines like the following:
+
+@proceedings{acl-2024-long,
+    title = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
+    editor = "Ku, Lun-Wei  and
+      Martins, Andre  and
+      Srikumar, Vivek",
+    month = aug,
+    year = "2024",
+    address = "Bangkok, Thailand",
+    publisher = "Association for Computational Linguistics",
+    url = "https://preview.aclanthology.org/dont-generate-bib-files/2024.acl-long.0/"
+}
+
+To find the appropriate entry, walk through the file, reading an entry at a time.
+If the entry has a "url" field containing the anthology_id, return that entry.
+
+To test, you can pass the query string in as an environment variable:
+
+    QUERY_STRING="anthology_id=2024.acl-long.1" python generate_bib.cgi
+
+This needs to be done in a sister directory of the volumes/ directory.
+"""
+
+import os
+import sys
+import acl_anthology
+
+
+def parse_query_string(query_string):
+    """
+    Parse the query string into a dictionary.
+    """
+    return dict(q.split("=") for q in query_string.split("&"))
+
+
+def bib_entries(f):
+    """
+    Create an iterator that iterates over bib entries in a file.
+    """
+    entry = ""
+    for line in f:
+        if line.strip() == "}":
+            entry += line
+            yield entry
+            entry = ""
+        else:
+            entry += line
+
+
+def get_bibtex_entry(anthology_id):
+    # Get the volume_id from the anthology_id
+    parsed = acl_anthology.utils.parse_id(anthology_id)
+    volume_id = f"{parsed[0]}-{parsed[1]}"
+    with open(f"../volumes/{volume_id}.bib") as f:
+        # iterate through the file, reading bibtex entries
+        for entry in bib_entries(f):
+            if f'/{anthology_id}/' in entry:
+                return entry
+    return None
+
+
+if __name__ == "__main__":
+    print("Content-Type: text/plain\n")
+
+    # Get the anthology_id from the query string
+    params = parse_query_string(os.environ.get("QUERY_STRING", ""))
+    anthology_id = params.get("anthology_id")
+    if not anthology_id:
+        print("Error: anthology_id not provided")
+        sys.exit(1)
+
+    bibtex_entry = get_bibtex_entry(anthology_id)
+    print(bibtex_entry)
diff --git a/hugo/static/generate_bib.cgi b/hugo/static/generate_bib.cgi
deleted file mode 100644
index 77f17de8b4..0000000000
--- a/hugo/static/generate_bib.cgi
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-
-if [[ -z $QUERY_STRING ]]; then
-    QUERY_STRING="?anthology_id=$1"
-fi
-anthid=${QUERY_STRING#anthology_id=}
-
-# Set content type headers for PDF
-echo "Content-Type: text/plain"
-echo ""
-
-# Get volume name
-volume=$(echo $anthid | cut -d. -f1-2)
-
-#echo "QUERY STRING $QUERY_STRING"
-#echo $anthid
-#echo $volume
-
-echo "Looking for $anthid in $volume..."
\ No newline at end of file

From 7d1f9b90d7e8467b74f2743e310a21ae9384c4e3 Mon Sep 17 00:00:00 2001
From: Matt Post <post@cs.jhu.edu>
Date: Thu, 2 Jan 2025 19:28:16 -0500
Subject: [PATCH 07/16] Add options

---
 hugo/static/.htaccess | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hugo/static/.htaccess b/hugo/static/.htaccess
index 126416061b..73fcbab810 100644
--- a/hugo/static/.htaccess
+++ b/hugo/static/.htaccess
@@ -94,6 +94,7 @@ RewriteRule ^(\d{4})\.([a-zA-Z\d]+).handbook.pdf$ anthology-files/handbooks/$2/$
 RewriteRule acl-copyright-transfer.pdf anthology-files/templates/acl-copyright-transfer.pdf [L,NC]
 
 # Dynamically generate paper-level bib files
-# Needs "Options +ExecCGI" and "AddHandler cgi-script .cgi" for the /cgi-bin directory enabled
+Options +ExecCGI
+AddHandler cgi-script .cgi
 RewriteRule ^(\d{4}\.[a-zA-Z\d]+-[a-zA-Z\d]+\.[a-zA-Z\d]+)\.bib$ /ANTHOLOGYDIR/cgi-bin/generate_bib.cgi?anthology_id=$1 [L,NC]
-RewriteRule ^([A-Za-z]\d{2}\-\d{4})\.bib$ /ANTHOLOGYDIR/cgi-bin/generate_bib.cgi?anthology_id=$1 [L,NC]
\ No newline at end of file
+RewriteRule ^([A-Za-z]\d{2}\-\d{4})\.bib$ /ANTHOLOGYDIR/cgi-bin/generate_bib.cgi?anthology_id=$1 [L,NC]

From 392038c9926a2a3a3d85fc9de6f16ddcf439c647 Mon Sep 17 00:00:00 2001
From: Matt Post <post@cs.jhu.edu>
Date: Thu, 2 Jan 2025 19:32:07 -0500
Subject: [PATCH 08/16] Get rid of acl_anthology import for simplicity

---
 hugo/static/cgi-bin/generate_bib.cgi | 46 ++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 3 deletions(-)
 mode change 100644 => 100755 hugo/static/cgi-bin/generate_bib.cgi

diff --git a/hugo/static/cgi-bin/generate_bib.cgi b/hugo/static/cgi-bin/generate_bib.cgi
old mode 100644
new mode 100755
index cd23a42c38..c0ff988dc2
--- a/hugo/static/cgi-bin/generate_bib.cgi
+++ b/hugo/static/cgi-bin/generate_bib.cgi
@@ -46,7 +46,44 @@ This needs to be done in a sister directory of the volumes/ directory.
 
 import os
 import sys
-import acl_anthology
+
+
+def parse_id(anthology_id):
+    """
+    Parses an Anthology ID into its constituent collection ID, volume ID, and paper ID
+    parts.
+
+    Copied and trimmed from the Anthology python module to avoid the import.
+    """
+
+    if isinstance(anthology_id, tuple):
+        return anthology_id
+
+    if "-" not in anthology_id:
+        return (anthology_id, None, None)
+
+    collection_id, rest = anthology_id.split("-")
+    if collection_id[0].isdigit():
+        # post-2020 IDs
+        if "." in rest:
+            return (collection_id, *(rest.split(".")))  # type: ignore
+        else:
+            return (collection_id, rest, None)
+    else:
+        # pre-2020 IDs
+        if len(rest) < 4:
+            # probably volume-only identifier
+            return (collection_id, rest.lstrip("0"), None)
+        elif (
+            collection_id.startswith("W")
+            or collection_id == "C69"
+            or (collection_id == "D19" and int(rest[0]) >= 5)
+        ):
+            paper_id = rest[2:].lstrip("0")
+            return (collection_id, rest[0:2].lstrip("0"), paper_id if paper_id else "0")
+        else:
+            paper_id = rest[1:].lstrip("0")
+            return (collection_id, rest[0], paper_id if paper_id else "0")
 
 
 def parse_query_string(query_string):
@@ -71,8 +108,11 @@ def bib_entries(f):
 
 
 def get_bibtex_entry(anthology_id):
-    # Get the volume_id from the anthology_id
-    parsed = acl_anthology.utils.parse_id(anthology_id)
+    """
+    Opens the volumes file and retrieves the bibtex entry corresponding
+    to the requested Anthology ID.
+    """
+    parsed = parse_id(anthology_id)
     volume_id = f"{parsed[0]}-{parsed[1]}"
     with open(f"../volumes/{volume_id}.bib") as f:
         # iterate through the file, reading bibtex entries

From bcabf521a1ee516f276ba1c1dbb9d610374ad6bf Mon Sep 17 00:00:00 2001
From: Matt Post <post@cs.jhu.edu>
Date: Fri, 3 Jan 2025 07:57:00 -0500
Subject: [PATCH 09/16] Handle variants for bib files

Fixes https://x.com/zngu/status/1449007350346625024
---
 hugo/static/.htaccess | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hugo/static/.htaccess b/hugo/static/.htaccess
index 73fcbab810..6353f946fd 100644
--- a/hugo/static/.htaccess
+++ b/hugo/static/.htaccess
@@ -96,5 +96,5 @@ RewriteRule acl-copyright-transfer.pdf anthology-files/templates/acl-copyright-t
 # Dynamically generate paper-level bib files
 Options +ExecCGI
 AddHandler cgi-script .cgi
-RewriteRule ^(\d{4}\.[a-zA-Z\d]+-[a-zA-Z\d]+\.[a-zA-Z\d]+)\.bib$ /ANTHOLOGYDIR/cgi-bin/generate_bib.cgi?anthology_id=$1 [L,NC]
-RewriteRule ^([A-Za-z]\d{2}\-\d{4})\.bib$ /ANTHOLOGYDIR/cgi-bin/generate_bib.cgi?anthology_id=$1 [L,NC]
+RewriteRule ^(\d{4}\.[a-zA-Z\d]+-[a-zA-Z\d]+\.[a-zA-Z\d]+?)(v\d+)?\.bib$ /ANTHOLOGYDIR/cgi-bin/generate_bib.cgi?anthology_id=$1 [L,NC]
+RewriteRule ^([A-Za-z]\d{2}\-\d{4})(v\d+)?\.bib$ /ANTHOLOGYDIR/cgi-bin/generate_bib.cgi?anthology_id=$1 [L,NC]
\ No newline at end of file

From 78b710f745868f871d9d32b41842101a6734f67a Mon Sep 17 00:00:00 2001
From: Matt Post <post@cs.jhu.edu>
Date: Fri, 3 Jan 2025 19:13:03 -0500
Subject: [PATCH 10/16] Fix bug with 404

---
 hugo/static/cgi-bin/generate_bib.cgi | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/hugo/static/cgi-bin/generate_bib.cgi b/hugo/static/cgi-bin/generate_bib.cgi
index c0ff988dc2..efd58d8726 100755
--- a/hugo/static/cgi-bin/generate_bib.cgi
+++ b/hugo/static/cgi-bin/generate_bib.cgi
@@ -123,14 +123,14 @@ def get_bibtex_entry(anthology_id):
 
 
 if __name__ == "__main__":
-    print("Content-Type: text/plain\n")
-
     # Get the anthology_id from the query string
     params = parse_query_string(os.environ.get("QUERY_STRING", ""))
     anthology_id = params.get("anthology_id")
-    if not anthology_id:
-        print("Error: anthology_id not provided")
-        sys.exit(1)
-
     bibtex_entry = get_bibtex_entry(anthology_id)
-    print(bibtex_entry)
+    if not bibtex_entry:
+        print("Status: 404 Not Found")
+        print("Content-Type: text/plain")
+        print()
+    else:
+        print("Content-Type: text/plain\n")
+        print(bibtex_entry)
\ No newline at end of file

From 60b9896218de25d81ab0093f2a29715c0c3de2d7 Mon Sep 17 00:00:00 2001
From: Matt Post <post@cs.jhu.edu>
Date: Sat, 4 Jan 2025 09:38:05 -0500
Subject: [PATCH 11/16] Generalize htaccess rule and script for all formats

---
 hugo/static/.htaccess                         |  6 ++--
 ...{generate_bib.cgi => extract_citation.cgi} | 29 ++++++++++++++++---
 2 files changed, 28 insertions(+), 7 deletions(-)
 rename hugo/static/cgi-bin/{generate_bib.cgi => extract_citation.cgi} (88%)

diff --git a/hugo/static/.htaccess b/hugo/static/.htaccess
index 6353f946fd..332bd9e444 100644
--- a/hugo/static/.htaccess
+++ b/hugo/static/.htaccess
@@ -93,8 +93,8 @@ RewriteRule ^(\d{4})\.([a-zA-Z\d]+).handbook.pdf$ anthology-files/handbooks/$2/$
 # Copyright
 RewriteRule acl-copyright-transfer.pdf anthology-files/templates/acl-copyright-transfer.pdf [L,NC]
 
-# Dynamically generate paper-level bib files
+# Dynamically generate paper-level bib, endnote, and mods XML files
 Options +ExecCGI
 AddHandler cgi-script .cgi
-RewriteRule ^(\d{4}\.[a-zA-Z\d]+-[a-zA-Z\d]+\.[a-zA-Z\d]+?)(v\d+)?\.bib$ /ANTHOLOGYDIR/cgi-bin/generate_bib.cgi?anthology_id=$1 [L,NC]
-RewriteRule ^([A-Za-z]\d{2}\-\d{4})(v\d+)?\.bib$ /ANTHOLOGYDIR/cgi-bin/generate_bib.cgi?anthology_id=$1 [L,NC]
\ No newline at end of file
+RewriteRule ^(\d{4}\.[a-zA-Z\d]+-[a-zA-Z\d]+\.[a-zA-Z\d]+?)(?:v\d+)?\.(bib|xml|endf)$ /ANTHOLOGYDIR/cgi-bin/extract_citation.cgi?anthology_id=$1&format=$2 [L,NC]
+RewriteRule ^([A-Za-z]\d{2}\-\d{4})(?:v\d+)?\.(bib|xml|endf)$ /ANTHOLOGYDIR/cgi-bin/extract_citation.cgi?anthology_id=$1&format=$2 [L,NC]
\ No newline at end of file
diff --git a/hugo/static/cgi-bin/generate_bib.cgi b/hugo/static/cgi-bin/extract_citation.cgi
similarity index 88%
rename from hugo/static/cgi-bin/generate_bib.cgi
rename to hugo/static/cgi-bin/extract_citation.cgi
index efd58d8726..405cdf515c 100755
--- a/hugo/static/cgi-bin/generate_bib.cgi
+++ b/hugo/static/cgi-bin/extract_citation.cgi
@@ -122,15 +122,36 @@ def get_bibtex_entry(anthology_id):
     return None
 
 
+def get_mods_xml_entry(anthology_id):
+    return None
+
+
+def get_endnote_entry(anthology_id):
+    return None
+
+
+def get_entry(anthology_id, format):
+    if format == "bib":
+        return get_bibtex_entry(anthology_id)
+    elif format == "xml":
+        return get_mods_xml_entry(anthology_id)
+    elif format == "endf":
+        return get_endnote_entry(anthology_id)
+    else:
+        return ""
+
+
 if __name__ == "__main__":
     # Get the anthology_id from the query string
     params = parse_query_string(os.environ.get("QUERY_STRING", ""))
     anthology_id = params.get("anthology_id")
-    bibtex_entry = get_bibtex_entry(anthology_id)
-    if not bibtex_entry:
+    format = params.get("format")
+    entry = get_entry(anthology_id, format)
+    if not entry:
         print("Status: 404 Not Found")
         print("Content-Type: text/plain")
         print()
     else:
-        print("Content-Type: text/plain\n")
-        print(bibtex_entry)
\ No newline at end of file
+        print("Content-Type: text/plain")
+        print()
+        print(entry)
\ No newline at end of file

From 70c2ce746a302abd3316354d4c56171d954c271e Mon Sep 17 00:00:00 2001
From: Matt Post <post@cs.jhu.edu>
Date: Sat, 4 Jan 2025 10:01:22 -0500
Subject: [PATCH 12/16] Generalize iterator and format file

---
 hugo/static/cgi-bin/extract_citation.cgi | 58 +++++++++++++-----------
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/hugo/static/cgi-bin/extract_citation.cgi b/hugo/static/cgi-bin/extract_citation.cgi
index 405cdf515c..7d33ce820b 100755
--- a/hugo/static/cgi-bin/extract_citation.cgi
+++ b/hugo/static/cgi-bin/extract_citation.cgi
@@ -93,9 +93,9 @@ def parse_query_string(query_string):
     return dict(q.split("=") for q in query_string.split("&"))
 
 
-def bib_entries(f):
+def bib_iterator(f):
     """
-    Create an iterator that iterates over bib entries in a file.
+    An iterator that iterates over bib entries in a file.
     """
     entry = ""
     for line in f:
@@ -107,38 +107,44 @@ def bib_entries(f):
             entry += line
 
 
-def get_bibtex_entry(anthology_id):
+def xml_iterator(anthology_id):
     """
-    Opens the volumes file and retrieves the bibtex entry corresponding
-    to the requested Anthology ID.
+    An iterator for MODS XML entries.
     """
-    parsed = parse_id(anthology_id)
-    volume_id = f"{parsed[0]}-{parsed[1]}"
-    with open(f"../volumes/{volume_id}.bib") as f:
-        # iterate through the file, reading bibtex entries
-        for entry in bib_entries(f):
-            if f'/{anthology_id}/' in entry:
-                return entry
     return None
 
 
-def get_mods_xml_entry(anthology_id):
-    return None
-
-
-def get_endnote_entry(anthology_id):
-    return None
+def endf_iterator(f):
+    """
+    An iterator for EndNote XML entries.
+    """
+    entry = ""
+    for line in f:
+        if line.strip() == "":
+            yield entry
+            entry = ""
+        else:
+            entry += line
+    if entry:
+        yield entry
 
 
 def get_entry(anthology_id, format):
-    if format == "bib":
-        return get_bibtex_entry(anthology_id)
-    elif format == "xml":
-        return get_mods_xml_entry(anthology_id)
-    elif format == "endf":
-        return get_endnote_entry(anthology_id)
-    else:
-        return ""
+    """
+    Opens the volumes file corresponding to the format, then grab the
+    associated iterator, and look for an entry matching the anthology_id.
+    """
+    # The iterator is the function {format}_iterator
+    iterator = globals().get(f"{format}_iterator")
+    if iterator:
+        parsed_id = parse_id(anthology_id)
+        volume_id = f"{parsed_id[0]}-{parsed_id[1]}"
+        with open(f"../volumes/{volume_id}.{format}") as f:
+            # iterate through the file, reading bibtex entries
+            for entry in iterator(f):
+                if "https://aclanthology.rog" in entry and f'/{anthology_id}/' in entry:
+                    return entry
+    return None
 
 
 if __name__ == "__main__":

From 35c31f5dd2cba976d914822b305e2325615ce8e5 Mon Sep 17 00:00:00 2001
From: Matt Post <post@cs.jhu.edu>
Date: Sat, 4 Jan 2025 10:09:15 -0500
Subject: [PATCH 13/16] Add iterator for mods volumes

---
 hugo/static/cgi-bin/extract_citation.cgi | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/hugo/static/cgi-bin/extract_citation.cgi b/hugo/static/cgi-bin/extract_citation.cgi
index 7d33ce820b..252ae931c3 100755
--- a/hugo/static/cgi-bin/extract_citation.cgi
+++ b/hugo/static/cgi-bin/extract_citation.cgi
@@ -107,11 +107,31 @@ def bib_iterator(f):
             entry += line
 
 
-def xml_iterator(anthology_id):
+def xml_iterator(f):
     """
     An iterator for MODS XML entries.
+
+    Format:
+        <?xml version="1.0" encoding="UTF-8"?>
+        <modsCollection xmlns="http://www.loc.gov/mods/v3">
+        <mods ID="acl-2024-long">
+            <titleInfo>
+                <title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
+            </titleInfo>
+        ...
+        </modsCollection>
     """
-    return None
+    header = ""
+    entry = ""
+    for line in f:
+        if line.startswith("<?xml") or line.startswith("<modsCollection"):
+            header += line
+        if line.strip() == "</mods>":
+            entry += line
+            yield header + entry + "</modsCollection>"
+            entry = ""
+        else:
+            entry += line
 
 
 def endf_iterator(f):

From 8ba296fbcb5332f29ce03c3a0917c1f47de736ac Mon Sep 17 00:00:00 2001
From: Matt Post <post@cs.jhu.edu>
Date: Sat, 4 Jan 2025 12:08:51 -0500
Subject: [PATCH 14/16] Fix typo; handle missing volume file

---
 hugo/static/cgi-bin/extract_citation.cgi | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/hugo/static/cgi-bin/extract_citation.cgi b/hugo/static/cgi-bin/extract_citation.cgi
index 252ae931c3..0ea62404e6 100755
--- a/hugo/static/cgi-bin/extract_citation.cgi
+++ b/hugo/static/cgi-bin/extract_citation.cgi
@@ -159,11 +159,14 @@ def get_entry(anthology_id, format):
     if iterator:
         parsed_id = parse_id(anthology_id)
         volume_id = f"{parsed_id[0]}-{parsed_id[1]}"
-        with open(f"../volumes/{volume_id}.{format}") as f:
-            # iterate through the file, reading bibtex entries
-            for entry in iterator(f):
-                if "https://aclanthology.rog" in entry and f'/{anthology_id}/' in entry:
-                    return entry
+        try:
+            with open(f"../volumes/{volume_id}.{format}") as f:
+                # iterate through the file, reading bibtex entries
+                for entry in iterator(f):
+                    if "https://aclanthology.org" in entry and f'/{anthology_id}/' in entry:
+                        return entry
+        except FileNotFoundError:
+            pass
     return None
 
 

From 9e7a7ba45b206d532db633190f5ea3cb346892ab Mon Sep 17 00:00:00 2001
From: Matt Post <post@cs.jhu.edu>
Date: Sat, 4 Jan 2025 17:12:47 -0500
Subject: [PATCH 15/16] Make pattern work for previews

---
 hugo/static/cgi-bin/extract_citation.cgi | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hugo/static/cgi-bin/extract_citation.cgi b/hugo/static/cgi-bin/extract_citation.cgi
index 0ea62404e6..85f6ae1f85 100755
--- a/hugo/static/cgi-bin/extract_citation.cgi
+++ b/hugo/static/cgi-bin/extract_citation.cgi
@@ -163,7 +163,9 @@ def get_entry(anthology_id, format):
             with open(f"../volumes/{volume_id}.{format}") as f:
                 # iterate through the file, reading bibtex entries
                 for entry in iterator(f):
-                    if "https://aclanthology.org" in entry and f'/{anthology_id}/' in entry:
+                    # match either https://preview.aclanthology.org/{branch}/{anthology_id}/
+                    # or https://aclanthology.org/{anthology_id}/ without using regex (good enough)
+                    if "https://" in entry or "aclanthology.org" in entry and f'/{anthology_id}/' in entry:
                         return entry
         except FileNotFoundError:
             pass

From 9c161b4940582fd5f9f6caab00ecf64b52da3a36 Mon Sep 17 00:00:00 2001
From: Marcel Bollmann <marcel@bollmann.me>
Date: Sun, 5 Jan 2025 14:22:45 +0100
Subject: [PATCH 16/16] Create volume-level MODS and Endnote files, log
 exceptions properly

---
 bin/create_bib.py | 47 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/bin/create_bib.py b/bin/create_bib.py
index a9bac0288b..4a2ec1982d 100755
--- a/bin/create_bib.py
+++ b/bin/create_bib.py
@@ -167,19 +167,26 @@ def create_bibtex(builddir, clean=False) -> None:
 
 
 def convert_bibtex(builddir, max_workers=None):
-    """Convert BibTeX into other bibliographic formats, and add them to the data files.
+    """Convert BibTeX into other bibliographic formats, for both data files and volume-level bibliography files.
 
     Requires data files from create_hugo_data.py.
     """
-    files = list(Path(f"{builddir}/data/papers").glob("*.json"))
+    data_files = list(Path(f"{builddir}/data/papers").glob("*.json"))
+    bib_files = list(Path(f"{builddir}/data-export/volumes").glob("*.bib"))
 
     with make_progress() as progress:
-        task = progress.add_task("Convert to MODS & Endnote...", total=len(files))
+        task = progress.add_task(
+            "Convert to MODS & Endnote...", total=len(data_files) + len(bib_files)
+        )
 
         with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
-            futures = [executor.submit(convert_collection_file, file) for file in files]
-            for _ in concurrent.futures.as_completed(futures):
+            futures = [
+                executor.submit(convert_collection_file, file) for file in data_files
+            ] + [executor.submit(convert_volume_bib_file, file) for file in bib_files]
+            for future in concurrent.futures.as_completed(futures):
                 progress.update(task, advance=1)
+                if (exc := future.exception()) is not None:
+                    log.exception(exc)
 
 
 def convert_collection_file(collection_file):
@@ -207,6 +214,34 @@ def convert_collection_file(collection_file):
         f.write(msgspec.json.encode(data))
 
 
+def convert_volume_bib_file(volume_bib_file):
+    """Read a single volume bib file, and convert it to MODS and Endnote formats.
+
+    Important:
+        This function should not rely on global objects, as it will be executed concurrently for different files with multiprocessing.
+    """
+
+    volume_mods_file = volume_bib_file.with_suffix(".xml")
+    volume_endf_file = volume_bib_file.with_suffix(".endf")
+
+    with open(volume_bib_file, "rb") as bib, open(volume_mods_file, "wb") as mods:
+        subprocess.run(
+            [BIB2XML, "-nt"],
+            stdin=bib,
+            stdout=mods,
+            stderr=subprocess.PIPE,
+            check=True,
+        )
+    with open(volume_mods_file, "rb") as mods, open(volume_endf_file, "wb") as endf:
+        subprocess.run(
+            [XML2END],
+            stdin=mods,
+            stdout=endf,
+            stderr=subprocess.PIPE,
+            check=True,
+        )
+
+
 def batch_convert_to_mods_and_endf(bibtex, context):
     """Convert a BibTeX string with multiple entries to MODS and Endnote.
 
@@ -217,6 +252,7 @@ def batch_convert_to_mods_and_endf(bibtex, context):
         input=bibtex,
         capture_output=True,
         text=True,
+        check=True,
     )
     log.debug(f"{context}: {mods.stderr.strip()}")
     endf = subprocess.run(
@@ -224,6 +260,7 @@ def batch_convert_to_mods_and_endf(bibtex, context):
         input=mods.stdout,
         capture_output=True,
         text=True,
+        check=True,
     )
     log.debug(f"{context}: {endf.stderr.strip()}")