Skip to content

Commit

Permalink
Fix genome index generation; Reimplementation of count_matrix buildin…
Browse files Browse the repository at this point in the history
…g; Fix column order matching
  • Loading branch information
fsantilli committed Nov 28, 2024
1 parent 574a508 commit 5043d01
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 13 deletions.
2 changes: 1 addition & 1 deletion workflow/rules/coverage_tRNA.smk
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ rule mk_genome_tsv:
mem_mb=2048,
shell:
"""
samtools view -H {input} | grep SQ | cut -f 2,3 | sed -r 's/(SN|LN)://g' | tr " " "\t" > {output}
samtools view -H {input} | grep '@SQ' | cut -f 2,3 | sed -r 's/(SN|LN)://g' | tr " " "\t" > {output}
"""


Expand Down
24 changes: 16 additions & 8 deletions workflow/scripts/build_tRNA_coverage_matrix_v1.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,21 @@ print(coverage_files)
coverage_files <- coverage_files[match(sample_sheet$name, names(coverage_files))]
print(coverage_files)


out <- coverage_files %>%
map(read_tsv, col_names = FALSE) %>%
map(select, c(X4, X14)) %>%
purrr::reduce(inner_join, by = "X4") %>%
rename_with(function(x) c("Name", basename(as.character(coverage_files))))

print(head(out), file = stderr())

write_tsv(x = out, file = as.character(snakemake@output))
map(dplyr::select, c(X4, X14))

ncol <- length(out)
nrow <- nrow(out[[1]]) # assume that all elements in out have the same number of rows

m <- matrix(0, nrow, ncol, dimnames = list(out[[1]]$X4, basename(as.character(coverage_files))))
for (i in seq(length(out))) {
df <- out[[i]]
if (nrow(df) != nrow(m)) stop(sprintf("Incorrect dimensions! Element %d has %d rows instead of %d.",
i, nrow(df), nrow(m)))
#df <- df[match(rownames(m), df$X4),]
m[,i] <- df$X14
}
m <- as_tibble(m, rownames="Name")

write_tsv(x = m, file = as.character(snakemake@output))
4 changes: 2 additions & 2 deletions workflow/scripts/coverage_trna.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ for C in $CHROMOSOMES; do
awk -v j="$C" '$1~j' "${snakemake_input[annotation]}" >> $T
done

bedtools sort -faidx "${snakemake_input[genome]}" -i $T > $SORTED 2> ${snakemake_log}
bedtools sort -faidx "${snakemake_input[genome]}" -i $T > $SORTED

bedtools coverage \
-g "${snakemake_input[genome]}" \
-sorted \
-a $SORTED \
-b "${snakemake_input[bam]}" | \
sort -k1,1 -k2,2n > "${snakemake_output}" 2>> ${snakemake_log}
sort -k1,1 -k2,2n > "${snakemake_output}"

rm $SORTED
3 changes: 1 addition & 2 deletions workflow/scripts/deseq2_trna_v1.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ sample_sheet <- read.csv(
header=TRUE
)


colnames_order <- sapply(colnames(count_matrix), grep, x = sample_sheet$name )
colnames_order <- match(sample_sheet$name,colnames(count_matrix))
colnames(count_matrix)[colnames_order] <- rownames(sample_sheet)

sample_sheet <- sample_sheet[match(colnames(count_matrix), rownames(sample_sheet)),]
Expand Down

0 comments on commit 5043d01

Please sign in to comment.