diff --git a/chew/data/GRCh37_sites.bed.gz b/chew/data/GRCh37_sites.bed.gz
index 0d006b8..e4fd174 100644
Binary files a/chew/data/GRCh37_sites.bed.gz and b/chew/data/GRCh37_sites.bed.gz differ
diff --git a/chew/data/GRCh37_sitesX.bed.gz b/chew/data/GRCh37_sitesX.bed.gz
index 5320024..8705564 100644
Binary files a/chew/data/GRCh37_sitesX.bed.gz and b/chew/data/GRCh37_sitesX.bed.gz differ
diff --git a/chew/data/GRCh38_sites.bed.gz b/chew/data/GRCh38_sites.bed.gz
index 323f38a..c5dddbc 100644
Binary files a/chew/data/GRCh38_sites.bed.gz and b/chew/data/GRCh38_sites.bed.gz differ
diff --git a/chew/data/GRCh38_sitesX.bed.gz b/chew/data/GRCh38_sitesX.bed.gz
index 555372b..b879b60 100644
Binary files a/chew/data/GRCh38_sitesX.bed.gz and b/chew/data/GRCh38_sitesX.bed.gz differ
diff --git a/misc/sites/README.md b/misc/sites/README.md
new file mode 100644
index 0000000..eeacd03
--- /dev/null
+++ b/misc/sites/README.md
@@ -0,0 +1,32 @@
+# NGS Chew Sites Related Computation
+
+This directory contains the documentation on the `ngs-chew` sites are computed and verified as well as the generation of the PCA projection.
+
+## Site Selection
+
+The details can be found in the `select-auto` and `select-xy` directories.
+To rerun it, obtain dbSNP GRCh37/GRCh38 files and `pip -r install /requirements/misc.txt`.
+Then run the following:
+
+```
+PATH_DBSNP37=path/to/dbsnp-b151-grch37.vcf.gz \
+PATH_DBSNP38=path/to/dbsnp-b151-grch38.vcf.gz \
+    snakemake -c -p work/label/grch37.common.bed
+```
+
+## Autosomal Site Selection
+
+We base the selection of our autosomal sites on the GRCh37 sites from [peddy](https://github.com/brentp/peddy).
+However, we limit the sites to those that have an RSID and are also in GRCh38.
+The sites are in the order of the rsid.
+This allows to re-use the fingerprints for both releases with only small limitations.
+
+## Gonosomal Site Selection
+
+See [this issue](https://github.com/bihealth/ngs-chew/issues/20) for the original selection of GRCh37 sites.
+We use a similar approach as for autosomal sites.
+However, these sites are only used for prediction of gonomosomal karyotypes and not fingerprints.
+
+## Population Group Analysis
+
+The folder `pca` contains a Snakemake workflow that infers population groups / clusters from the thousand genotypes data at our selected sites.
diff --git a/misc/sites/pca/.gitignore b/misc/sites/pca/.gitignore
new file mode 100644
index 0000000..38e3a4a
--- /dev/null
+++ b/misc/sites/pca/.gitignore
@@ -0,0 +1,6 @@
+/*.tbi
+.snakemake/*
+output/*
+raw_data/*
+work/*
+by_id/*
diff --git a/misc/sites/pca/PCA.ipynb b/misc/sites/pca/PCA.ipynb
new file mode 100644
index 0000000..63b3f42
--- /dev/null
+++ b/misc/sites/pca/PCA.ipynb
@@ -0,0 +1,132 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.decomposition import PCA\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "import matplotlib.pyplot as plt\n",
+    "import plotly.express as px\n",
+    "import polars as pl\n",
+    "import pickle\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labels = pl.read_csv(\n",
+    "    \"raw_data/g1k/founders.tsv\",\n",
+    "    separator=\" \",\n",
+    "    has_header=False,\n",
+    ")\n",
+    "labels.columns = (\"FAM\", \"NAME\", \"FATHER\", \"MOTHER\", \"SEX\", \"GROUP\", \"SUPERGROUP\")\n",
+    "groups = labels.get_column(\"GROUP\")\n",
+    "y = labels.get_column(\"SUPERGROUP\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_pl = pl.read_csv(\n",
+    "    \"by_id/genotypes.tsv\",\n",
+    "    separator=\"\\t\",\n",
+    "    has_header=True,\n",
+    ")\n",
+    "\n",
+    "X = data_pl.drop(\"CHROM\", \"POS\", \"ID\").fill_null(0).to_numpy()\n",
+    "#data_np.transpose()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pca = PCA(n_components=2)\n",
+    "pca.fit(X.transpose())\n",
+    "X_reduced = pca.transform(X.transpose())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('pca.pkl', 'wb') as pickle_file:\n",
+    "        pickle.dump(pca, pickle_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = px.scatter(X_reduced, x=0, y=1, color=y)\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rf = RandomForestClassifier()\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X_reduced, y)\n",
+    "rf.fit(X_train, y_train)\n",
+    "rf.predict_proba(X_test)\n",
+    "pca.transform(X_test.transpose())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rf.score(X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ngs-chew",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/misc/sites/pca/Snakefile b/misc/sites/pca/Snakefile
new file mode 100644
index 0000000..dde37b4
--- /dev/null
+++ b/misc/sites/pca/Snakefile
@@ -0,0 +1,106 @@
+rule obtain_calls:
+    output:
+        vcf="raw_data/g1k/genotypes.vcf",
+    params:
+        bed="../../../chew/data/GRCh38_sites.bed.gz",
+    threads: 100
+    shell:
+        r"""
+        set -x
+
+        url()
+        {{
+            echo http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_raw_GT_with_annot/20201028_CCDG_14151_B01_GRM_WGS_2020-08-05_chr$1.recalibrated_variants.vcf.gz
+        }}
+        export -f url
+
+        export TMPDIR=$(mktemp -d)
+        trap "rm -rf $TMPDIR" EXIT
+
+        tabix --only-header $(url 1) \
+        > {output.vcf}
+
+        for i in {{1..22}}; do
+            zgrep "^$i\s" {params.bed} \
+            | sed -e 's/^/chr/' \
+            | sort -k2,2n \
+            > $TMPDIR/chr.$i.bed
+            split -l 10 $TMPDIR/chr.$i.bed $TMPDIR/chr.$i.bed-
+        done
+
+        parallel -k -j {threads} 'i=$(basename {{}} | cut -d . -f 2); tabix -R {{}} $(url $i)' ::: $TMPDIR/chr.*.bed-* \
+        >> {output.vcf}
+        """
+
+
+rule obtain_pedigrees:
+    output:
+        txt="raw_data/g1k/20130606_g1k_3202_samples_ped_population.tsv",
+        txt_founders="raw_data/g1k/founders.tsv",
+    shell:
+        r"""
+        wget -O {output.txt} \
+            http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/20130606_g1k_3202_samples_ped_population.txt
+
+        awk -F ' ' '
+            ((NR > 1) && ($3 == 0) && ($4 == 0))' \
+            {output.txt} \
+        > {output.txt_founders}
+        """
+
+
+rule vcf_to_tsv:
+    input:
+        # vcf="raw_data/g1k/genotypes.vcf",
+        txt_founders="raw_data/g1k/founders.tsv",
+    output:
+        tsv="raw_data/g1k/genotypes.tsv",
+    shell:
+        r"""
+        (
+            echo -en "CHROM\tPOS\tREF\tALT\t"; \
+            cut -d ' ' -f 2 raw_data/g1k/founders.tsv | tr '\n' '\t' | sed -e 's/\t$//g'; \
+            echo; \
+        ) > {output.tsv}
+
+        set +o pipefail
+        bcftools +missing2ref raw_data/g1k/genotypes.vcf \
+        | bcftools view -S <(cut -d ' ' -f 2 raw_data/g1k/founders.tsv) -i '(type="snp")' \
+        | bcftools query -f '%CHROM\t%POS\t%REF\t%ALT[\t%GT]\n' - \
+        | sed -e 's|[2-9]/|1/|g' \
+        | sed -e 's|/[2-9]|/1|g' \
+        | sed -e 's|,.||g' \
+        | sed -e 's|,.||g' \
+        | sed -e 's|,.||g' \
+        | sed -e 's|0/0|0|g' \
+        | sed -e 's|0/1|1|g' \
+        | sed -e 's|1/0|1|g' \
+        | sed -e 's|1/1|2|g' \
+        >> {output.tsv}
+        """
+
+rule join_tsv:
+    input:
+        tsv="raw_data/g1k/genotypes.tsv",
+    output:
+        by_id_tsv="by_id/ids.tsv",
+        tsv="by_id/genotypes.tsv",
+    params:
+        bed="../../../chew/data/GRCh38_sites.bed.gz",
+    shell:
+        r"""
+        echo -e "CHROM_\tPOS0_\tPOS_\tID" \
+        > {output.by_id_tsv}
+        zcat {params.bed} \
+        | sed -e 's/^/chr/' \
+        >> {output.by_id_tsv}
+        cp  {output.by_id_tsv} {output.by_id_tsv}.bak
+
+        qsv join --delimiter $'\t' --left \
+            CHROM_,POS_ {output.by_id_tsv} \
+            CHROM,POS {input.tsv} \
+        | qsv select '!CHROM,POS0_,POS,REF,ALT' - \
+        | sed -e 's/CHROM_/CHROM/g' -e 's/POS_/POS/g' \
+        | tr , '\t' \
+        > {output.tsv}
+        """
diff --git a/misc/sites/select-auto/.gitignore b/misc/sites/select-auto/.gitignore
new file mode 100644
index 0000000..f422783
--- /dev/null
+++ b/misc/sites/select-auto/.gitignore
@@ -0,0 +1,4 @@
+.snakemake/*
+output/*
+raw_data/*
+work/*
diff --git a/misc/sites/select-auto/Snakefile b/misc/sites/select-auto/Snakefile
new file mode 100644
index 0000000..d9655c9
--- /dev/null
+++ b/misc/sites/select-auto/Snakefile
@@ -0,0 +1,218 @@
+envvars:
+    "PATH_DBSNP37",  # GRCh37 dbSNP VCF
+    "PATH_DBSNP38",  # GRCh38 dbSNP VCF
+
+
+rule default:
+    input:
+        "output/common_ids.txt",
+        "output/grch37.common.bed",
+        "output/grch38.common.bed",
+
+
+rule raw_data_peddy:
+    output:
+        "raw_data/peddy/{filename}.sites",
+    shell:
+        """
+        wget -O {output} https://github.com/brentp/peddy/raw/master/peddy/{wildcards.filename}.sites
+        """
+
+
+rule sites_to_bed:
+    input:
+        "raw_data/peddy/{filename}.sites",
+    output:
+        bed_header="raw_data/peddy/{filename}.header.bed",
+        bed_noheader="raw_data/peddy/{filename}.bed",
+    shell:
+        r"""
+        echo -e "CHROM\tBEGIN\tEND\tREF\tALT" \
+        > {output.bed_header}
+
+        awk -F : '
+            BEGIN {{
+                OFS="\t";
+            }}
+            {{
+                print $1, $2 - 1, $2, $3, $4
+            }}
+        ' {input} \
+        | LC_ALL=C sort -k1,1V -k2,2n \
+        | tee -a {output.bed_header} \
+        > {output.bed_noheader}
+        """
+
+
+rule extract_dbsnp:
+    input:
+        bed="raw_data/peddy/{filename}.bed",
+    output:
+        vcf="work/dbsnp/{filename}.vcf",
+    params:
+        path_dbsnp=os.environ["PATH_DBSNP37"],
+    shell:
+        r"""
+        set -x
+
+        export TMPDIR=$(mktemp -d)
+        trap "rm -rf $TMPDIR" EXIT
+
+        tabix --only-header {params.path_dbsnp} \
+        > $TMPDIR/dbsnp.vcf
+        (tabix -R {input.bed} {params.path_dbsnp} || true) \
+        | sort -k1,1V -k2,2n \
+        >> $TMPDIR/dbsnp.vcf
+        bgzip $TMPDIR/dbsnp.vcf
+        tabix -f $TMPDIR/dbsnp.vcf.gz
+
+        bcftools sort -T $TMPDIR/bcftools.XXXXXX $TMPDIR/dbsnp.vcf.gz \
+        | bcftools norm -d both \
+        | bcftools norm -m - \
+        > {output.vcf}
+        """
+
+
+rule dbsnp_to_bed:
+    input:
+        vcf="work/dbsnp/{filename}.vcf",
+    output:
+        bed="work/dbsnp/{filename}.bed",
+    shell:
+        r"""
+        echo -e "CHROM_\tBEGIN_\tEND_\tREF_\tALT_\tID" \
+        > {output.bed}
+        bcftools query \
+            -f '%CHROM\t%POS0\t%POS\t%REF\t%ALT\t%ID\n' \
+            {input.vcf} \
+        >> {output.bed}
+        """
+
+
+rule label_sites:
+    input:
+        bed_sites="raw_data/peddy/GRCH37.header.bed",
+        bed_dbsnp="work/dbsnp/GRCH37.bed",
+    output:
+        bed="work/label/grch37.bed",
+    shell:
+        r"""
+        set -x
+
+        export TMPDIR=$(mktemp -d)
+        trap "rm -rf $TMPDIR" EXIT
+
+        qsv join --delimiter $'\t' --left \
+            CHROM,BEGIN,REF,ALT {input.bed_sites} \
+            CHROM_,BEGIN_,REF_,ALT_ {input.bed_dbsnp} \
+        | qsv select CHROM,BEGIN,END,REF,ALT,ID \
+        | tr ',' '\t' \
+        > {output.bed}
+        """
+
+
+rule extract_dbsnp38_vcf:
+    input:
+        bed="work/label/grch37.bed",
+    output:
+        vcf="work/dbsnp/GRCH38.vcf",
+        vcf_gz="work/dbsnp/GRCH38.vcf.gz",
+        tbi="work/dbsnp/GRCH38.vcf.gz.tbi",
+    params:
+        path_dbsnp=os.environ["PATH_DBSNP38"],
+    threads: 8
+    shell:
+        r"""
+        set -x
+
+        export TMPDIR=$(mktemp -d)
+        trap "rm -rf $TMPDIR" EXIT
+
+        tail -n +2 {input.bed} \
+        | cut -f 6 \
+        | sed -e 's/^/\t/' -e 's/$/\t/' \
+        > $TMPDIR/rsids.txt
+
+        tabix --only-header {params.path_dbsnp} \
+        > {output.vcf}
+        parallel -k -j {threads} \
+            'tabix {params.path_dbsnp} {{}} | (fgrep -f $TMPDIR/rsids.txt || true)' ::: $(tabix -l {params.path_dbsnp}) \
+        >> {output.vcf}
+
+        bgzip -c {output.vcf} \
+        > {output.vcf_gz}
+        tabix -f {output.vcf_gz}
+        """
+
+
+rule convert_dbsnp38_vcf_to_bed:
+    input:
+        vcf="work/dbsnp/GRCH38.vcf.gz",
+    output:
+        bed="work/label/grch38.bed",
+    shell:
+        r"""
+        set -x
+
+        export TMPDIR=$(mktemp -d)
+        trap "rm -rf $TMPDIR" EXIT
+
+        set +o pipefail
+
+        echo -e "CHROM\tBEGIN\tEND\tREF\tALT\tID" \
+        > {output.bed}
+        bcftools sort -T $TMPDIR/sort.1.XXXXXX {input.vcf} \
+        | bcftools norm -m -any \
+        | bcftools query \
+            -f '%CHROM\t%POS0\t%POS\t%REF\t%ALT\t%ID\n' \
+        | sort -k1,1V -k2,2n \
+        | uniq \
+        >> {output.bed}
+        """
+
+
+rule extract_common_dbsnp_entries:
+    input:
+        grch37="work/label/grch37.bed",
+        grch38="work/label/grch38.bed",
+    output:
+        common_ids="output/common_ids.txt",
+        grch37="output/grch37.common.bed",
+        grch38="output/grch38.common.bed",
+    shell:
+        r"""
+        set -x
+
+        export TMPDIR=$(mktemp -d)
+        trap "rm -rf $TMPDIR" EXIT
+
+        echo -e "REF_\tID_" \
+        >{output.common_ids}
+        qsv join --delimiter $'\t' \
+            REF,ID {input.grch37} \
+            REF,ID {input.grch38} \
+        | qsv select REF,ID \
+        | tr ',' '\t' \
+        | uniq \
+        | tail -n +2 \
+        >> {output.common_ids}
+
+        qsv join --delimiter $'\t' \
+            REF,ID {input.grch37} \
+            REF_,ID_ {output.common_ids} \
+        | qsv select CHROM,BEGIN,END,ID \
+        | tr ',' '\t' \
+        | tail -n +2 \
+        | LC_ALL=C sort -k4,4 \
+        > {output.grch37}
+
+        qsv join --delimiter $'\t' \
+            REF,ID {input.grch38} \
+            REF_,ID_ {output.common_ids} \
+        | qsv select CHROM,BEGIN,END,ID \
+        | tr ',' '\t' \
+        | tail -n +2 \
+        | uniq \
+        | LC_ALL=C sort -k4,4 \
+        > {output.grch38}
+        """
diff --git a/misc/sites/select-xy/.gitattributes b/misc/sites/select-xy/.gitattributes
new file mode 100644
index 0000000..cbf6593
--- /dev/null
+++ b/misc/sites/select-xy/.gitattributes
@@ -0,0 +1 @@
+data/Agilent_V6.bed filter=lfs diff=lfs merge=lfs -text
diff --git a/misc/sites/select-xy/.gitignore b/misc/sites/select-xy/.gitignore
new file mode 100644
index 0000000..f422783
--- /dev/null
+++ b/misc/sites/select-xy/.gitignore
@@ -0,0 +1,4 @@
+.snakemake/*
+output/*
+raw_data/*
+work/*
diff --git a/misc/sites/select-xy/Snakefile b/misc/sites/select-xy/Snakefile
new file mode 100644
index 0000000..45f3e9b
--- /dev/null
+++ b/misc/sites/select-xy/Snakefile
@@ -0,0 +1,145 @@
+envvars:
+    "PATH_DBSNP37",  # GRCh37 dbSNP VCF
+    "PATH_DBSNP38",  # GRCh38 dbSNP VCF
+
+
+rule default:
+    input:
+        "output/common_ids.txt",
+        "output/grch37.common.bed",
+        "output/grch38.common.bed",
+
+
+rule select_sites:
+    output:
+        "raw_data/selected/GRCh37_sitesX.bed",
+    params:
+        path_dbsnp=os.environ["PATH_DBSNP37"],
+    shell:
+        r"""
+        set -x
+
+        (tabix \
+            --print-header \
+            --regions data/Agilent_V6.bed \
+            {params.path_dbsnp} \
+            || true) \
+        | bcftools view -M 2 -i '(CAF != ".")' \
+        | bcftools query -f '%CHROM\t%POS\t%REF\t%ALT\t%ID\t%CAF\n' \
+        | tr ',' '\t' \
+        | awk -F $'\t' 'BEGIN {{
+            OFS=FS;
+            print "CHROM_", "BEGIN_", "END_", "REF_", "ALT_", "ID"
+        }}
+        ($6 <= 0.95) {{
+            print $1, $2 - 1, $2, $3, $4, $5
+        }}' \
+        > {output}
+        """
+
+
+rule extract_dbsnp38_vcf:
+    input:
+        bed="raw_data/selected/GRCh37_sitesX.bed",
+    output:
+        vcf="work/dbsnp/GRCH38.vcf",
+        vcf_gz="work/dbsnp/GRCH38.vcf.gz",
+        tbi="work/dbsnp/GRCH38.vcf.gz.tbi",
+    params:
+        path_dbsnp=os.environ["PATH_DBSNP38"],
+    threads: 8
+    shell:
+        r"""
+        set -x
+
+        export TMPDIR=$(mktemp -d)
+        trap "rm -rf $TMPDIR" EXIT
+
+        tail -n +2 {input.bed} \
+        | cut -f 6 \
+        | sed -e 's/^/\t/' -e 's/$/\t/' \
+        > $TMPDIR/rsids.txt
+
+        tabix --only-header {params.path_dbsnp} \
+        > {output.vcf}
+        parallel -k -j {threads} \
+            'tabix {params.path_dbsnp} {{}} | (fgrep -f $TMPDIR/rsids.txt || true)' ::: X Y \
+        >> {output.vcf}
+
+        bgzip -c {output.vcf} \
+        > {output.vcf_gz}
+        tabix -f {output.vcf_gz}
+        """
+
+
+rule convert_dbsnp38_vcf_to_bed:
+    input:
+        vcf="work/dbsnp/GRCH38.vcf.gz",
+    output:
+        bed="work/label/grch38.bed",
+    shell:
+        r"""
+        set -x
+
+        export TMPDIR=$(mktemp -d)
+        trap "rm -rf $TMPDIR" EXIT
+
+        set +o pipefail
+
+        echo -e "CHROM\tBEGIN\tEND\tREF\tALT\tID" \
+        > {output.bed}
+        bcftools sort -T $TMPDIR/sort.1.XXXXXX {input.vcf} \
+        | bcftools norm -m -any \
+        | bcftools query \
+            -f '%CHROM\t%POS0\t%POS\t%REF\t%ALT\t%ID\n' \
+        | sort -k1,1V -k2,2n \
+        | uniq \
+        >> {output.bed}
+        """
+
+
+rule extract_common_dbsnp_entries:
+    input:
+        grch37="raw_data/selected/GRCh37_sitesX.bed",
+        grch38="work/label/grch38.bed",
+    output:
+        common_ids="output/common_ids.txt",
+        grch37="output/grch37.common.bed",
+        grch38="output/grch38.common.bed",
+    shell:
+        r"""
+        set -x
+
+        export TMPDIR=$(mktemp -d)
+        trap "rm -rf $TMPDIR" EXIT
+
+        echo -e "REF_\tID_" \
+        >{output.common_ids}
+        qsv join --delimiter $'\t' \
+            REF_,ID {input.grch37} \
+            REF,ID {input.grch38} \
+        | qsv select REF,ID \
+        | tr ',' '\t' \
+        | uniq \
+        | tail -n +2 \
+        >> {output.common_ids}
+
+        qsv join --delimiter $'\t' \
+            REF_,ID {input.grch37} \
+            REF_,ID_ {output.common_ids} \
+        | qsv select CHROM_,BEGIN_,END_,ID \
+        | tr ',' '\t' \
+        | tail -n +2 \
+        | LC_ALL=C sort -k4,4 \
+        > {output.grch37}
+
+        qsv join --delimiter $'\t' \
+            REF,ID {input.grch38} \
+            REF_,ID_ {output.common_ids} \
+        | qsv select CHROM,BEGIN,END,ID \
+        | tr ',' '\t' \
+        | tail -n +2 \
+        | uniq \
+        | LC_ALL=C sort -k4,4 \
+        > {output.grch38}
+        """
diff --git a/misc/sites/select-xy/data/Agilent_V6.bed b/misc/sites/select-xy/data/Agilent_V6.bed
new file mode 100644
index 0000000..9e0f9f9
--- /dev/null
+++ b/misc/sites/select-xy/data/Agilent_V6.bed
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e476a794fba6ef5f5ce0212b87628877d3006a34552acbb0a33e4386910baa3
+size 176404
diff --git a/requirements/misc.txt b/requirements/misc.txt
new file mode 100644
index 0000000..f3fe7c3
--- /dev/null
+++ b/requirements/misc.txt
@@ -0,0 +1,9 @@
+-r dev.txt
+
+snakemake
+snakefmt
+jupyterlab
+scikit-learn
+polars
+matplotlib
+plotly-express