diff --git a/docs/devel/Orchestrating-Hi-C-analysis-with-Bioconductor.pdf b/docs/devel/Orchestrating-Hi-C-analysis-with-Bioconductor.pdf index 4052ec1..0537245 100644 Binary files a/docs/devel/Orchestrating-Hi-C-analysis-with-Bioconductor.pdf and b/docs/devel/Orchestrating-Hi-C-analysis-with-Bioconductor.pdf differ diff --git a/docs/devel/index.html b/docs/devel/index.html index e9d22ce..dab9267 100644 --- a/docs/devel/index.html +++ b/docs/devel/index.html @@ -798,7 +798,7 @@

Orchestrating Hi-C analysis with Bioconductor

## zip 2.3.0 2023-04-17 [2] CRAN (R 4.4.0) ## zlibbioc 1.49.0 2023-10-24 [2] Bioconductor ## -## [1] /tmp/RtmpVQDQnT/Rinst55b2b1d77 +## [1] /tmp/RtmpyjIh7u/Rinst5327a02592 ## [2] /usr/local/lib/R/site-library ## [3] /usr/local/lib/R/library ## diff --git a/docs/devel/pages/data-representation.html b/docs/devel/pages/data-representation.html index 6968f98..172cf09 100644 --- a/docs/devel/pages/data-representation.html +++ b/docs/devel/pages/data-representation.html @@ -1571,8 +1571,8 @@

This fetches files from the cloud, download them locally and returns the path of the local file.

coolf
-##                                           EH7702 
-##  "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752"
+## EH7702 +## "/root/.cache/R/ExperimentHub/16ffc32b3a_7752"

Similarly, example files are available for other file formats:

@@ -1642,7 +1642,7 @@

# ----- This creates a connection to a `.(m)cool` file (path stored in `coolf`) CoolFile(coolf) ## CoolFile object -## .mcool file: /root/.cache/R/ExperimentHub/1747aa0ffd6_7752 +## .mcool file: /root/.cache/R/ExperimentHub/16ffc32b3a_7752 ## resolution: 1000 ## pairs file: ## metadata(0): @@ -1650,7 +1650,7 @@

# ----- This creates a connection to a `.hic` file (path stored in `hicf`) HicFile(hicf) ## HicFile object -## .hic file: /root/.cache/R/ExperimentHub/1747a133ab3_7836 +## .hic file: /root/.cache/R/ExperimentHub/16f2cb31ab6_7836 ## resolution: 1000 ## pairs file: ## metadata(0): @@ -1659,8 +1659,8 @@

HicproFile(hicpromatrixf, hicproregionsf) ## HicproFile object ## HiC-Pro files: -## $ matrix: /root/.cache/R/ExperimentHub/1746681cf08_7837 -## $ regions: /root/.cache/R/ExperimentHub/174132e5e85_7838 +## $ matrix: /root/.cache/R/ExperimentHub/16f12c59723_7837 +## $ regions: /root/.cache/R/ExperimentHub/16f24a80f84_7838 ## resolution: 1000 ## pairs file: ## metadata(0): @@ -1668,7 +1668,7 @@

# ----- This creates a connection to a pairs file PairsFile(pairsf) ## PairsFile object -## resource: /root/.cache/R/ExperimentHub/174733eb553_7753

+## resource: /root/.cache/R/ExperimentHub/16f6ddc5c03_7753

2.3.3 ContactFile slots

@@ -1684,7 +1684,7 @@

cf <- CoolFile(coolf)
 cf
 ##  CoolFile object
-##  .mcool file: /root/.cache/R/ExperimentHub/1747aa0ffd6_7752 
+##  .mcool file: /root/.cache/R/ExperimentHub/16ffc32b3a_7752 
 ##  resolution: 1000 
 ##  pairs file: 
 ##  metadata(0):
@@ -1782,7 +1782,7 @@ 

hic ## `HiCExperiment` object with 8,757,906 contacts over 12,079 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "whole genome" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 1000 @@ -1814,7 +1814,7 @@

These pieces of information are called slots. They can be directly accessed using getter functions, bearing the same name than the slot.

fileName(hic)
-##  [1] "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752"
+##  [1] "/root/.cache/R/ExperimentHub/16ffc32b3a_7752"
 
 focus(hic)
 ##  NULL
@@ -1881,7 +1881,7 @@ 

hic ## `HiCExperiment` object with 13,681,280 contacts over 12,165 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747a133ab3_7836" +## fileName: "/root/.cache/R/ExperimentHub/16f2cb31ab6_7836" ## focus: "whole genome" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 1000 @@ -2321,14 +2321,14 @@

yeast_hic
 ##  `HiCExperiment` object with 8,757,906 contacts over 763 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "whole genome" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 16000 
 ##  interactions: 267709 
 ##  scores(2): count balanced 
 ##  topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) 
-##  pairsFile: /root/.cache/R/ExperimentHub/174733eb553_7753 
+##  pairsFile: /root/.cache/R/ExperimentHub/16f6ddc5c03_7753 
 ##  metadata(3): ID org date

@@ -2621,7 +2621,7 @@

pairsFile(yeast_hic) ## EH7703 -## "/root/.cache/R/ExperimentHub/174733eb553_7753" +## "/root/.cache/R/ExperimentHub/16f6ddc5c03_7753" readLines(pairsFile(yeast_hic), 25) ## [1] "## pairs format v1.0" diff --git a/docs/devel/pages/interactions-centric.html b/docs/devel/pages/interactions-centric.html index 9e88af8..4abfacb 100644 --- a/docs/devel/pages/interactions-centric.html +++ b/docs/devel/pages/interactions-centric.html @@ -386,7 +386,7 @@

hic
 ##  `HiCExperiment` object with 471,364 contacts over 407 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -414,7 +414,7 @@ 

pf
 ##  PairsFile object
-##  resource: /root/.cache/R/ExperimentHub/174733eb553_7753
+## resource: /root/.cache/R/ExperimentHub/16f6ddc5c03_7753

If needed, PairsFile connections can be imported directly into a GInteractions object with import().

@@ -454,7 +454,7 @@

library(HiContacts)
 ps <- distanceLaw(pf, by_chr = TRUE) 
-##  Importing pairs file /root/.cache/R/ExperimentHub/174733eb553_7753 in memory. This may take a while...
+##  Importing pairs file /root/.cache/R/ExperimentHub/16f6ddc5c03_7753 in memory. This may take a while...
 ps
 ##  # A tibble: 115 × 6
 ##    chr   binned_distance          p     norm_p norm_p_unity slope
@@ -496,7 +496,7 @@ 

eco1_ps <- distanceLaw(eco1_pf, by_chr = TRUE) 
-##  Importing pairs file /root/.cache/R/ExperimentHub/fae309a2f91_7755 in memory. This may take a while...
+##  Importing pairs file /root/.cache/R/ExperimentHub/f90656c7e8e_7755 in memory. This may take a while...
 eco1_ps
 ##  # A tibble: 115 × 6
 ##    chr   binned_distance          p     norm_p norm_p_unity slope
@@ -657,7 +657,7 @@ 

pairsFile(hic) <- pairsf
 scalo <- scalogram(hic) 
-##  Importing pairs file /root/.cache/R/ExperimentHub/174733eb553_7753 in memory. This may take a while...
+##  Importing pairs file /root/.cache/R/ExperimentHub/16f6ddc5c03_7753 in memory. This may take a while...
 plotScalogram(scalo |> filter(chr == 'II'), ylim = c(1e3, 1e5))
@@ -681,7 +681,7 @@

## loading from cache pairsFile(eco1_hic) <- eco1_pairsf eco1_scalo <- scalogram(eco1_hic) -## Importing pairs file /root/.cache/R/ExperimentHub/fae309a2f91_7755 in memory. This may take a while... +## Importing pairs file /root/.cache/R/ExperimentHub/f90656c7e8e_7755 in memory. This may take a while... merged_scalo <- rbind( scalo |> mutate(sample = 'WT'), eco1_scalo |> mutate(sample = 'eco1') diff --git a/docs/devel/pages/interoperability.html b/docs/devel/pages/interoperability.html index 98068d3..67e28f2 100644 --- a/docs/devel/pages/interoperability.html +++ b/docs/devel/pages/interoperability.html @@ -697,7 +697,7 @@

res ## `HiCExperiment` object with 471,364 contacts over 802 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "II" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 1000 diff --git a/docs/devel/pages/interoperability_files/figure-html/unnamed-chunk-5-1.png b/docs/devel/pages/interoperability_files/figure-html/unnamed-chunk-5-1.png index 57caa4b..8468bb3 100644 Binary files a/docs/devel/pages/interoperability_files/figure-html/unnamed-chunk-5-1.png and b/docs/devel/pages/interoperability_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/devel/pages/matrix-centric.html b/docs/devel/pages/matrix-centric.html index e3a7531..11c4acd 100644 --- a/docs/devel/pages/matrix-centric.html +++ b/docs/devel/pages/matrix-centric.html @@ -395,7 +395,7 @@

hic
 ##  `HiCExperiment` object with 471,364 contacts over 407 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -419,7 +419,7 @@ 

normalized_hic ## `HiCExperiment` object with 471,364 contacts over 407 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "II" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 2000 @@ -456,7 +456,7 @@

detrended_hic ## `HiCExperiment` object with 471,364 contacts over 407 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "II" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 2000 @@ -511,7 +511,7 @@

autocorr_hic ## `HiCExperiment` object with 471,364 contacts over 407 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "II" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 2000 @@ -583,7 +583,7 @@

hic2 ## `HiCExperiment` object with 168,785 contacts over 150 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "II:400,000-700,000" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 2000 diff --git a/docs/devel/pages/matrix-centric_files/figure-html/unnamed-chunk-10-1.png b/docs/devel/pages/matrix-centric_files/figure-html/unnamed-chunk-10-1.png index 2757929..2d0a422 100644 Binary files a/docs/devel/pages/matrix-centric_files/figure-html/unnamed-chunk-10-1.png and b/docs/devel/pages/matrix-centric_files/figure-html/unnamed-chunk-10-1.png differ diff --git a/docs/devel/pages/matrix-centric_files/figure-html/unnamed-chunk-13-1.png b/docs/devel/pages/matrix-centric_files/figure-html/unnamed-chunk-13-1.png index c978684..02a9a53 100644 Binary files a/docs/devel/pages/matrix-centric_files/figure-html/unnamed-chunk-13-1.png and b/docs/devel/pages/matrix-centric_files/figure-html/unnamed-chunk-13-1.png differ diff --git a/docs/devel/pages/matrix-centric_files/figure-html/unnamed-chunk-16-1.png b/docs/devel/pages/matrix-centric_files/figure-html/unnamed-chunk-16-1.png index 5f1fe62..eed9648 100644 Binary files a/docs/devel/pages/matrix-centric_files/figure-html/unnamed-chunk-16-1.png and b/docs/devel/pages/matrix-centric_files/figure-html/unnamed-chunk-16-1.png differ diff --git a/docs/devel/pages/parsing.html b/docs/devel/pages/parsing.html index aceb379..a146928 100644 --- a/docs/devel/pages/parsing.html +++ b/docs/devel/pages/parsing.html @@ -364,7 +364,7 @@

hic ## `HiCExperiment` object with 10,801 contacts over 11 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "II:10,000-50,000" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 4000 @@ -439,7 +439,7 @@

cf <- CoolFile(coolf) cf ## CoolFile object -## .mcool file: /root/.cache/R/ExperimentHub/1747aa0ffd6_7752 +## .mcool file: /root/.cache/R/ExperimentHub/16ffc32b3a_7752 ## resolution: 1000 ## pairs file: ## metadata(0): @@ -449,7 +449,7 @@

hic ## `HiCExperiment` object with 306,212 contacts over 257 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "II:300,001-813,184" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 2000 @@ -506,7 +506,7 @@

import(cf, focus = 'II:300001-800000', resolution = 2000)
 ##  `HiCExperiment` object with 301,018 contacts over 250 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II:300,001-800,000" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -523,7 +523,7 @@ 

import(cf, focus = 'II:300001-400000|II:600001-700000', resolution = 2000)
 ##  `HiCExperiment` object with 402 contacts over 100 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II:300001-400000|II:600001-700000" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -540,7 +540,7 @@ 

import(cf, focus = 'II', resolution = 2000)
 ##  `HiCExperiment` object with 471,364 contacts over 407 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -557,7 +557,7 @@ 

import(cf, focus = 'II|III', resolution = 2000)
 ##  `HiCExperiment` object with 9,092 contacts over 566 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II|III" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -574,7 +574,7 @@ 

import(cf, focus = 'II:300001-800000|V:1-500000', resolution = 2000)
 ##  `HiCExperiment` object with 7,147 contacts over 500 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II:300001-800000|V:1-500000" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -606,7 +606,7 @@ 

hic ## `HiCExperiment` object with 306,212 contacts over 257 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "II:300,001-813,184" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 2000 @@ -621,7 +621,7 @@

import(cf, focus = 'III', resolution = 2000) ## `HiCExperiment` object with 151,990 contacts over 159 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "III" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 2000 @@ -636,7 +636,7 @@

refocus(hic, 'III') ## `HiCExperiment` object with 151,990 contacts over 159 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "III" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 2000 @@ -759,7 +759,7 @@

hic["II:800001-813184"]
 ##  `HiCExperiment` object with 1,040 contacts over 6 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II:800,001-813,184" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -776,7 +776,7 @@ 

hic["II:300001-320000|II:800001-813184"]
 ##  `HiCExperiment` object with 3 contacts over 6 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II:300001-320000|II:800001-813184" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -793,7 +793,7 @@ 

hic["II"]
 ##  `HiCExperiment` object with 306,212 contacts over 257 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -810,7 +810,7 @@ 

hic["II|IV"]
 ##  `HiCExperiment` object with 0 contacts over 0 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II:1-813184|IV:1-1531933" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -827,7 +827,7 @@ 

hic["II:300001-320000|IV:1-100000"]
 ##  `HiCExperiment` object with 0 contacts over 0 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II:300001-320000|IV:1-100000" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -844,7 +844,7 @@ 

hic[c('II', 'III', 'IV')]
 ##  `HiCExperiment` object with 306,212 contacts over 257 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II, III, IV" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -871,7 +871,7 @@ 

hic
 ##  `HiCExperiment` object with 306,212 contacts over 257 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "II:300,001-813,184" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -884,7 +884,7 @@ 

zoom(hic, 4000) ## `HiCExperiment` object with 306,212 contacts over 129 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "II:300,001-813,184" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 4000 @@ -897,7 +897,7 @@

zoom(hic, 1000) ## `HiCExperiment` object with 306,212 contacts over 514 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "II:300,001-813,184" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 1000 @@ -1019,7 +1019,7 @@

## names(3): count balanced random head(scores(hic, "random")) -## [1] 0.4036088 0.1918757 0.6034038 0.9544792 0.5272511 0.2514426

+## [1] 0.85021122 0.60707706 0.07291116 0.18050687 0.70713349 0.68386247

3.2.2.2 topologicalFeatures @@ -1067,7 +1067,7 @@

hic ## `HiCExperiment` object with 306,212 contacts over 257 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "II:300,001-813,184" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 2000 @@ -1099,14 +1099,14 @@

hic ## `HiCExperiment` object with 306,212 contacts over 257 regions ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: "II:300,001-813,184" ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 2000 ## interactions: 18513 ## scores(3): count balanced random ## topologicalFeatures: compartments(0) borders(0) loops(9) viewpoints(0) CTCF(4) -## pairsFile: /root/.cache/R/ExperimentHub/174733eb553_7753 +## pairsFile: /root/.cache/R/ExperimentHub/16f6ddc5c03_7753 ## metadata(0):

@@ -1123,7 +1123,7 @@

## [1] "HiCExperiment created from an example .mcool file from `HiContactsData`" ## ## $date -## [1] "Tue Nov 7 18:48:53 2023" +## [1] "Tue Nov 7 18:48:31 2023"

3.3 Coercing HiCExperiment objects

@@ -1190,13 +1190,13 @@

## 4 II 306001 308000 2000 * 269 0.01895202 307000 61 ## 5 II 308001 310000 2000 * 270 0.02898098 309000 38 ## 6 II 310001 312000 2000 * 271 0.01834118 311000 43 -## balanced random -## 1 0.009657438 0.4036088 -## 2 0.076622340 0.1918757 -## 3 0.054101992 0.6034038 -## 4 0.042940512 0.9544792 -## 5 0.040905212 0.5272511 -## 6 0.029293930 0.2514426 +## balanced random +## 1 0.009657438 0.85021122 +## 2 0.076622340 0.60707706 +## 3 0.054101992 0.07291116 +## 4 0.042940512 0.18050687 +## 5 0.040905212 0.70713349 +## 6 0.029293930 0.68386247
diff --git a/docs/devel/pages/topological-features.html b/docs/devel/pages/topological-features.html index 283f99e..1e09a34 100644 --- a/docs/devel/pages/topological-features.html +++ b/docs/devel/pages/topological-features.html @@ -359,7 +359,7 @@

microC ## `HiCExperiment` object with 10,086,710 contacts over 334 regions ## ------- -## fileName: "/tmp/RtmpVQDQnT/Rinst55b2b1d77/OHCA/extdata/chr17.mcool" +## fileName: "/tmp/RtmpyjIh7u/Rinst5327a02592/OHCA/extdata/chr17.mcool" ## focus: "whole genome" ## resolutions(3): 5000 100000 250000 ## active resolution: 250000 @@ -396,7 +396,7 @@

microC_compts ## `HiCExperiment` object with 10,086,710 contacts over 334 regions ## ------- -## fileName: "/tmp/RtmpVQDQnT/Rinst55b2b1d77/OHCA/extdata/chr17.mcool" +## fileName: "/tmp/RtmpyjIh7u/Rinst5327a02592/OHCA/extdata/chr17.mcool" ## focus: "whole genome" ## resolutions(3): 5000 100000 250000 ## active resolution: 250000 @@ -566,7 +566,7 @@

hic ## `HiCExperiment` object with 2,156,222 contacts over 4,652 regions ## ------- -## fileName: "/tmp/RtmpVQDQnT/Rinst55b2b1d77/OHCA/extdata/chr17.mcool" +## fileName: "/tmp/RtmpyjIh7u/Rinst5327a02592/OHCA/extdata/chr17.mcool" ## focus: "chr17:60,000,001-83,257,441" ## resolutions(3): 5000 100000 250000 ## active resolution: 5000 diff --git a/docs/devel/pages/visualization.html b/docs/devel/pages/visualization.html index d94c33f..0a757bf 100644 --- a/docs/devel/pages/visualization.html +++ b/docs/devel/pages/visualization.html @@ -370,7 +370,7 @@

hic
 ##  `HiCExperiment` object with 303,545 contacts over 289 regions 
 ##  -------
-##  fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" 
+##  fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" 
 ##  focus: "V" 
 ##  resolutions(5): 1000 2000 4000 8000 16000
 ##  active resolution: 2000 
@@ -611,7 +611,7 @@ 

aggr_loops ## `AggrHiCExperiment` object over 148 targets ## ------- -## fileName: "/root/.cache/R/ExperimentHub/1747aa0ffd6_7752" +## fileName: "/root/.cache/R/ExperimentHub/16ffc32b3a_7752" ## focus: 148 targets ## resolutions(5): 1000 2000 4000 8000 16000 ## active resolution: 1000 diff --git a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-10-1.png b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-10-1.png index dff38ba..b7618b4 100644 Binary files a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-10-1.png and b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-10-1.png differ diff --git a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-11-1.png b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-11-1.png index d70cec0..0d55f87 100644 Binary files a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-11-1.png and b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-11-1.png differ diff --git a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-14-1.png b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-14-1.png index 0103d5c..01434b5 100644 Binary files a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-14-1.png and b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-14-1.png differ diff --git a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-17-1.png b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-17-1.png index 34c94f1..b8c2b40 100644 Binary files a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-17-1.png and b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-17-1.png differ diff --git a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-4-1.png b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-4-1.png index 93b554b..a4c2874 100644 Binary files a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-4-1.png and b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-5-1.png b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-5-1.png index e7d1619..4fe61d7 100644 Binary files a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-5-1.png and b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-7-1.png b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-7-1.png index aece164..d86bd28 100644 Binary files a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-7-1.png and b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-8-1.png b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-8-1.png index 4f4484c..346d9cd 100644 Binary files a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-8-1.png and b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-8-1.png differ diff --git a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-8-2.png b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-8-2.png index 5e7ed67..1f25092 100644 Binary files a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-8-2.png and b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-8-2.png differ diff --git a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-9-1.png b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-9-1.png index 06361c6..febb966 100644 Binary files a/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-9-1.png and b/docs/devel/pages/visualization_files/figure-html/unnamed-chunk-9-1.png differ diff --git a/docs/devel/search.json b/docs/devel/search.json index c19d917..d70dfdb 100644 --- a/docs/devel/search.json +++ b/docs/devel/search.json @@ -109,14 +109,14 @@ "href": "pages/data-representation.html#contactfile-class", "title": "\n2  Hi-C data structures in R\n", "section": "\n2.3 ContactFile class", - "text": "2.3 ContactFile class\nHi-C contacts can be stored in four different formats (see previous chapter):\n\nAs a .(m)cool matrix (multi-scores, multi-resolution, indexed)\nAs a .hic matrix (multi-scores, multi-resolution, indexed)\nAs a HiC-pro derived matrix (single-score, single-resolution, non-indexed)\nUnbinned, Hi-C contacts can be stored in .pairs files\n\n\n2.3.1 Accessing example Hi-C files\nExample contact files can be downloaded using HiContactsData function.\n\nlibrary(HiContactsData)\ncoolf <- HiContactsData('yeast_wt', 'mcool')\n\nThis fetches files from the cloud, download them locally and returns the path of the local file.\n\ncoolf\n## EH7702 \n## \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\"\n\nSimilarly, example files are available for other file formats:\n\nhicf <- HiContactsData('yeast_wt', 'hic')\nhicpromatrixf <- HiContactsData('yeast_wt', 'hicpro_matrix')\nhicproregionsf <- HiContactsData('yeast_wt', 'hicpro_bed')\npairsf <- HiContactsData('yeast_wt', 'pairs.gz')\n\nWe can even check the content of some of these files to make sure they are actually what they are:\n\n# ---- HiC-Pro generates a tab-separated `regions.bed` file\nreadLines(hicproregionsf, 25)\n## [1] \"I\\t0\\t1000\" \"I\\t1000\\t2000\" \"I\\t2000\\t3000\" \"I\\t3000\\t4000\" \n## [5] \"I\\t4000\\t5000\" \"I\\t5000\\t6000\" \"I\\t6000\\t7000\" \"I\\t7000\\t8000\" \n## [9] \"I\\t8000\\t9000\" \"I\\t9000\\t10000\" \"I\\t10000\\t11000\" \"I\\t11000\\t12000\"\n## [13] \"I\\t12000\\t13000\" \"I\\t13000\\t14000\" \"I\\t14000\\t15000\" \"I\\t15000\\t16000\"\n## [17] \"I\\t16000\\t17000\" \"I\\t17000\\t18000\" \"I\\t18000\\t19000\" \"I\\t19000\\t20000\"\n## [21] \"I\\t20000\\t21000\" \"I\\t21000\\t22000\" \"I\\t22000\\t23000\" \"I\\t23000\\t24000\"\n## [25] \"I\\t24000\\t25000\"\n\n# ---- Pairs are also tab-separated \nreadLines(pairsf, 25)\n## [1] \"## pairs format v1.0\" \n## [2] \"#sorted: chr1-pos1-chr2-pos2\" \n## [3] \"#columns: readID chr1 pos1 chr2 pos2 strand1 strand2 frag1 frag2\" \n## [4] \"#chromsize: I 230218\" \n## [5] \"#chromsize: II 813184\" \n## [6] \"#chromsize: III 316620\" \n## [7] \"#chromsize: IV 1531933\" \n## [8] \"#chromsize: V 576874\" \n## [9] \"#chromsize: VI 270161\" \n## [10] \"#chromsize: VII 1090940\" \n## [11] \"#chromsize: VIII 562643\" \n## [12] \"#chromsize: IX 439888\" \n## [13] \"#chromsize: X 745751\" \n## [14] \"#chromsize: XI 666816\" \n## [15] \"#chromsize: XII 1078177\" \n## [16] \"#chromsize: XIII 924431\" \n## [17] \"#chromsize: XIV 784333\" \n## [18] \"#chromsize: XV 1091291\" \n## [19] \"#chromsize: XVI 948066\" \n## [20] \"#chromsize: Mito 85779\" \n## [21] \"NS500150:527:HHGYNBGXF:3:21611:19085:3986\\tII\\t105\\tII\\t48548\\t+\\t-\\t1358\\t1681\" \n## [22] \"NS500150:527:HHGYNBGXF:4:13604:19734:2406\\tII\\t113\\tII\\t45003\\t-\\t+\\t1358\\t1658\" \n## [23] \"NS500150:527:HHGYNBGXF:2:11108:25178:11036\\tII\\t119\\tII\\t687251\\t-\\t+\\t1358\\t5550\"\n## [24] \"NS500150:527:HHGYNBGXF:1:22301:8468:1586\\tII\\t160\\tII\\t26124\\t+\\t-\\t1358\\t1510\" \n## [25] \"NS500150:527:HHGYNBGXF:4:23606:24037:2076\\tII\\t169\\tII\\t39052\\t+\\t+\\t1358\\t1613\"\n\n\n2.3.2 ContactFile fundamentals\nA ContactFile object establishes a connection with a disk-stored Hi-C file (e.g. a .cool file, or a .pairs file, …). ContactFile classes are defined in the HiCExperiment package.\nContactFiles come in four different flavors:\n\n\nCoolFile: connection to a .(m)cool file\n\nHicFile: connection to a .hic file\n\nHicproFile: connection to output files generated by HiC-Pro\n\nPairsFile: connection to a .pairs file\n\nTo create each flavor of ContactFile, one can use the corresponding function:\n\nlibrary(HiCExperiment)\n\n# ----- This creates a connection to a `.(m)cool` file (path stored in `coolf`)\nCoolFile(coolf)\n## CoolFile object\n## .mcool file: /root/.cache/R/ExperimentHub/1747aa0ffd6_7752 \n## resolution: 1000 \n## pairs file: \n## metadata(0):\n\n# ----- This creates a connection to a `.hic` file (path stored in `hicf`)\nHicFile(hicf)\n## HicFile object\n## .hic file: /root/.cache/R/ExperimentHub/1747a133ab3_7836 \n## resolution: 1000 \n## pairs file: \n## metadata(0):\n\n# ----- This creates a connection to output files from HiC-Pro\nHicproFile(hicpromatrixf, hicproregionsf)\n## HicproFile object\n## HiC-Pro files:\n## $ matrix: /root/.cache/R/ExperimentHub/1746681cf08_7837 \n## $ regions: /root/.cache/R/ExperimentHub/174132e5e85_7838 \n## resolution: 1000 \n## pairs file: \n## metadata(0):\n\n# ----- This creates a connection to a pairs file\nPairsFile(pairsf)\n## PairsFile object\n## resource: /root/.cache/R/ExperimentHub/174733eb553_7753\n\n\n2.3.3 ContactFile slots\nSeveral “slots” (i.e. pieces of information) are attached to a ContactFile object:\n\nThe path to the disk-stored contact matrix;\nThe active resolution (by default, the finest resolution available in a multi-resolution contact matrix);\nOptionally, the path to a matching pairs file (see below);\nSome metadata.\n\nSlots of a CoolFile object can be accessed as follow:\n\ncf <- CoolFile(coolf)\ncf\n## CoolFile object\n## .mcool file: /root/.cache/R/ExperimentHub/1747aa0ffd6_7752 \n## resolution: 1000 \n## pairs file: \n## metadata(0):\n\nresolution(cf)\n## [1] 1000\n\npairsFile(cf)\n## NULL\n\nmetadata(cf)\n## list()\n\n\n\n\n\n\n\nImportant!\n\n\n\nContactFile objects are only connections to a disk-stored HiC file. Although metadata is available, they do not contain actual data!\n\n\n\n2.3.4 ContactFile methods\nTwo useful methods are available for ContactFiles:\n\n\navailableResolutions checks which resolutions are available in a ContactFile.\n\n\navailableResolutions(cf)\n## resolutions(5): 1000 2000 4000 8000 16000\n## \n\n\n\navailableChromosomes checks which chromosomes are available in a ContactFile, along with their length.\n\n\navailableChromosomes(cf)\n## Seqinfo object with 16 sequences from an unspecified genome:\n## seqnames seqlengths isCircular genome\n## I 230218 <NA> <NA>\n## II 813184 <NA> <NA>\n## III 316620 <NA> <NA>\n## IV 1531933 <NA> <NA>\n## V 576874 <NA> <NA>\n## ... ... ... ...\n## XII 1078177 <NA> <NA>\n## XIII 924431 <NA> <NA>\n## XIV 784333 <NA> <NA>\n## XV 1091291 <NA> <NA>\n## XVI 948066 <NA> <NA>" + "text": "2.3 ContactFile class\nHi-C contacts can be stored in four different formats (see previous chapter):\n\nAs a .(m)cool matrix (multi-scores, multi-resolution, indexed)\nAs a .hic matrix (multi-scores, multi-resolution, indexed)\nAs a HiC-pro derived matrix (single-score, single-resolution, non-indexed)\nUnbinned, Hi-C contacts can be stored in .pairs files\n\n\n2.3.1 Accessing example Hi-C files\nExample contact files can be downloaded using HiContactsData function.\n\nlibrary(HiContactsData)\ncoolf <- HiContactsData('yeast_wt', 'mcool')\n\nThis fetches files from the cloud, download them locally and returns the path of the local file.\n\ncoolf\n## EH7702 \n## \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\"\n\nSimilarly, example files are available for other file formats:\n\nhicf <- HiContactsData('yeast_wt', 'hic')\nhicpromatrixf <- HiContactsData('yeast_wt', 'hicpro_matrix')\nhicproregionsf <- HiContactsData('yeast_wt', 'hicpro_bed')\npairsf <- HiContactsData('yeast_wt', 'pairs.gz')\n\nWe can even check the content of some of these files to make sure they are actually what they are:\n\n# ---- HiC-Pro generates a tab-separated `regions.bed` file\nreadLines(hicproregionsf, 25)\n## [1] \"I\\t0\\t1000\" \"I\\t1000\\t2000\" \"I\\t2000\\t3000\" \"I\\t3000\\t4000\" \n## [5] \"I\\t4000\\t5000\" \"I\\t5000\\t6000\" \"I\\t6000\\t7000\" \"I\\t7000\\t8000\" \n## [9] \"I\\t8000\\t9000\" \"I\\t9000\\t10000\" \"I\\t10000\\t11000\" \"I\\t11000\\t12000\"\n## [13] \"I\\t12000\\t13000\" \"I\\t13000\\t14000\" \"I\\t14000\\t15000\" \"I\\t15000\\t16000\"\n## [17] \"I\\t16000\\t17000\" \"I\\t17000\\t18000\" \"I\\t18000\\t19000\" \"I\\t19000\\t20000\"\n## [21] \"I\\t20000\\t21000\" \"I\\t21000\\t22000\" \"I\\t22000\\t23000\" \"I\\t23000\\t24000\"\n## [25] \"I\\t24000\\t25000\"\n\n# ---- Pairs are also tab-separated \nreadLines(pairsf, 25)\n## [1] \"## pairs format v1.0\" \n## [2] \"#sorted: chr1-pos1-chr2-pos2\" \n## [3] \"#columns: readID chr1 pos1 chr2 pos2 strand1 strand2 frag1 frag2\" \n## [4] \"#chromsize: I 230218\" \n## [5] \"#chromsize: II 813184\" \n## [6] \"#chromsize: III 316620\" \n## [7] \"#chromsize: IV 1531933\" \n## [8] \"#chromsize: V 576874\" \n## [9] \"#chromsize: VI 270161\" \n## [10] \"#chromsize: VII 1090940\" \n## [11] \"#chromsize: VIII 562643\" \n## [12] \"#chromsize: IX 439888\" \n## [13] \"#chromsize: X 745751\" \n## [14] \"#chromsize: XI 666816\" \n## [15] \"#chromsize: XII 1078177\" \n## [16] \"#chromsize: XIII 924431\" \n## [17] \"#chromsize: XIV 784333\" \n## [18] \"#chromsize: XV 1091291\" \n## [19] \"#chromsize: XVI 948066\" \n## [20] \"#chromsize: Mito 85779\" \n## [21] \"NS500150:527:HHGYNBGXF:3:21611:19085:3986\\tII\\t105\\tII\\t48548\\t+\\t-\\t1358\\t1681\" \n## [22] \"NS500150:527:HHGYNBGXF:4:13604:19734:2406\\tII\\t113\\tII\\t45003\\t-\\t+\\t1358\\t1658\" \n## [23] \"NS500150:527:HHGYNBGXF:2:11108:25178:11036\\tII\\t119\\tII\\t687251\\t-\\t+\\t1358\\t5550\"\n## [24] \"NS500150:527:HHGYNBGXF:1:22301:8468:1586\\tII\\t160\\tII\\t26124\\t+\\t-\\t1358\\t1510\" \n## [25] \"NS500150:527:HHGYNBGXF:4:23606:24037:2076\\tII\\t169\\tII\\t39052\\t+\\t+\\t1358\\t1613\"\n\n\n2.3.2 ContactFile fundamentals\nA ContactFile object establishes a connection with a disk-stored Hi-C file (e.g. a .cool file, or a .pairs file, …). ContactFile classes are defined in the HiCExperiment package.\nContactFiles come in four different flavors:\n\n\nCoolFile: connection to a .(m)cool file\n\nHicFile: connection to a .hic file\n\nHicproFile: connection to output files generated by HiC-Pro\n\nPairsFile: connection to a .pairs file\n\nTo create each flavor of ContactFile, one can use the corresponding function:\n\nlibrary(HiCExperiment)\n\n# ----- This creates a connection to a `.(m)cool` file (path stored in `coolf`)\nCoolFile(coolf)\n## CoolFile object\n## .mcool file: /root/.cache/R/ExperimentHub/16ffc32b3a_7752 \n## resolution: 1000 \n## pairs file: \n## metadata(0):\n\n# ----- This creates a connection to a `.hic` file (path stored in `hicf`)\nHicFile(hicf)\n## HicFile object\n## .hic file: /root/.cache/R/ExperimentHub/16f2cb31ab6_7836 \n## resolution: 1000 \n## pairs file: \n## metadata(0):\n\n# ----- This creates a connection to output files from HiC-Pro\nHicproFile(hicpromatrixf, hicproregionsf)\n## HicproFile object\n## HiC-Pro files:\n## $ matrix: /root/.cache/R/ExperimentHub/16f12c59723_7837 \n## $ regions: /root/.cache/R/ExperimentHub/16f24a80f84_7838 \n## resolution: 1000 \n## pairs file: \n## metadata(0):\n\n# ----- This creates a connection to a pairs file\nPairsFile(pairsf)\n## PairsFile object\n## resource: /root/.cache/R/ExperimentHub/16f6ddc5c03_7753\n\n\n2.3.3 ContactFile slots\nSeveral “slots” (i.e. pieces of information) are attached to a ContactFile object:\n\nThe path to the disk-stored contact matrix;\nThe active resolution (by default, the finest resolution available in a multi-resolution contact matrix);\nOptionally, the path to a matching pairs file (see below);\nSome metadata.\n\nSlots of a CoolFile object can be accessed as follow:\n\ncf <- CoolFile(coolf)\ncf\n## CoolFile object\n## .mcool file: /root/.cache/R/ExperimentHub/16ffc32b3a_7752 \n## resolution: 1000 \n## pairs file: \n## metadata(0):\n\nresolution(cf)\n## [1] 1000\n\npairsFile(cf)\n## NULL\n\nmetadata(cf)\n## list()\n\n\n\n\n\n\n\nImportant!\n\n\n\nContactFile objects are only connections to a disk-stored HiC file. Although metadata is available, they do not contain actual data!\n\n\n\n2.3.4 ContactFile methods\nTwo useful methods are available for ContactFiles:\n\n\navailableResolutions checks which resolutions are available in a ContactFile.\n\n\navailableResolutions(cf)\n## resolutions(5): 1000 2000 4000 8000 16000\n## \n\n\n\navailableChromosomes checks which chromosomes are available in a ContactFile, along with their length.\n\n\navailableChromosomes(cf)\n## Seqinfo object with 16 sequences from an unspecified genome:\n## seqnames seqlengths isCircular genome\n## I 230218 <NA> <NA>\n## II 813184 <NA> <NA>\n## III 316620 <NA> <NA>\n## IV 1531933 <NA> <NA>\n## V 576874 <NA> <NA>\n## ... ... ... ...\n## XII 1078177 <NA> <NA>\n## XIII 924431 <NA> <NA>\n## XIV 784333 <NA> <NA>\n## XV 1091291 <NA> <NA>\n## XVI 948066 <NA> <NA>" }, { "objectID": "pages/data-representation.html#hicexperiment-class", "href": "pages/data-representation.html#hicexperiment-class", "title": "\n2  Hi-C data structures in R\n", "section": "\n2.4 HiCExperiment class", - "text": "2.4 HiCExperiment class\nBased on the previous sections, we have different Bioconductor classes relevant for Hi-C:\n\n\nGInteractions which can be used to represent genomic interactions in R\n\nContactFiles which can be used to establish a connection with disk-stored Hi-C files\n\nHiCExperiment objects are created when parsing a ContactFile in R. The HiCExperiment class reads a ContactFile in memory and store genomic interactions as GInteractions. The HiCExperiment class is, quite obviously, defined in the HiCExperiment package.\n\n2.4.1 Creating a HiCExperiment object\n\n2.4.1.1 Importing a ContactFile\n\nIn practice, to create a HiCExperiment object from a ContactFile, one can use the import method.\n\n\n\n\n\n\nCaution\n\n\n\n\nCreating a HiCExperiment object means importing data from a Hi-C matrix (e.g.  from a ContactFile) in memory in R.\n\nCreating a HiCExperiment object from large disk-stored contact matrices can potentially take a long time.\n\n\n\n\ncf <- CoolFile(coolf)\nhic <- import(cf)\nhic\n## `HiCExperiment` object with 8,757,906 contacts over 12,079 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"whole genome\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 1000 \n## interactions: 2945692 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\nPrinting a HiCExperiment to the console will not reveal the actual data stored in the object (it would most likely crash your R session!). Instead, it gives a summary of the data stored in the object:\n\nThe fileName, i.e. the path to the disk-stored data file\nThe focus, i.e. the genomic location for which data has been imported (in the example above, \"whole genome\" implies that all the data has been imported in R)\n\nresolutions available in the disk-stored data file (this will be identical to availableResolutions(cf))\n\nactive resolution indicates at which resolution the data is currently imported\n\ninteractions refers to the actual GInteractions imported in R and “hidden” (for now!) in the HiCExperiment object\n\nscores refer to different interaction frequency estimates. These can be raw counts, balanced (if the contact matrix has been previously normalized), or whatever score the end-user want to attribute to each interaction (e.g. ratio of counts between two Hi-C maps, …)\n\ntopologicalFeatures is a list of GRanges or GInteractions objects to describe important topological features.\n\npairsFile is a pointer to an optional disk-stored .pairs file from which the contact matrix has been created. This is often useful to estimate some Hi-C metrics.\n\nmetadata is a list to further describe the experiment.\n\nThese pieces of information are called slots. They can be directly accessed using getter functions, bearing the same name than the slot.\n\nfileName(hic)\n## [1] \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\"\n\nfocus(hic)\n## NULL\n\nresolutions(hic)\n## [1] 1000 2000 4000 8000 16000\n\nresolution(hic)\n## [1] 1000\n\ninteractions(hic)\n## GInteractions object with 2945692 interactions and 4 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | bin_id1\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric>\n## [1] I 1-1000 --- I 1-1000 | 0\n## [2] I 1-1000 --- I 1001-2000 | 0\n## [3] I 1-1000 --- I 2001-3000 | 0\n## [4] I 1-1000 --- I 3001-4000 | 0\n## [5] I 1-1000 --- I 4001-5000 | 0\n## ... ... ... ... ... ... . ...\n## [2945688] XVI 940001-941000 --- XVI 942001-943000 | 12070\n## [2945689] XVI 940001-941000 --- XVI 943001-944000 | 12070\n## [2945690] XVI 941001-942000 --- XVI 941001-942000 | 12071\n## [2945691] XVI 941001-942000 --- XVI 942001-943000 | 12071\n## [2945692] XVI 941001-942000 --- XVI 943001-944000 | 12071\n## bin_id2 count balanced\n## <numeric> <numeric> <numeric>\n## [1] 0 15 0.0663491\n## [2] 1 21 0.1273505\n## [3] 2 21 0.0738691\n## [4] 3 38 0.0827051\n## [5] 4 17 0.0591984\n## ... ... ... ...\n## [2945688] 12072 11 0.0575550\n## [2945689] 12073 1 NaN\n## [2945690] 12071 74 0.0504615\n## [2945691] 12072 39 0.1624599\n## [2945692] 12073 1 NaN\n## -------\n## regions: 12079 ranges and 4 metadata columns\n## seqinfo: 16 sequences from an unspecified genome\n\nscores(hic)\n## List of length 2\n## names(2): count balanced\n\ntopologicalFeatures(hic)\n## List of length 4\n## names(4): compartments borders loops viewpoints\n\npairsFile(hic)\n## NULL\n\nmetadata(hic)\n## list()\n\nimport also works for other types of ContactFile (HicFile, HicproFile, PairsFile), e.g. \n\nFor HicFile and HicproFile, import seamlessly returns a HiCExperiment as well:\n\n\nhf <- HicFile(hicf)\nhic <- import(hf)\nhic\n## `HiCExperiment` object with 13,681,280 contacts over 12,165 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747a133ab3_7836\" \n## focus: \"whole genome\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 1000 \n## interactions: 2965693 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nFor PairsFile, the returned object is a representation of Hi-C “pairs” in R, i.e. GInteractions\n\n\n\npf <- PairsFile(pairsf)\npairs <- import(pf)\npairs\n## GInteractions object with 471364 interactions and 3 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | frag1 frag2\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric> <numeric>\n## [1] II 105 --- II 48548 | 1358 1681\n## [2] II 113 --- II 45003 | 1358 1658\n## [3] II 119 --- II 687251 | 1358 5550\n## [4] II 160 --- II 26124 | 1358 1510\n## [5] II 169 --- II 39052 | 1358 1613\n## ... ... ... ... ... ... . ... ...\n## [471360] II 808605 --- II 809683 | 6316 6320\n## [471361] II 808609 --- II 809917 | 6316 6324\n## [471362] II 808617 --- II 809506 | 6316 6319\n## [471363] II 809447 --- II 809685 | 6319 6321\n## [471364] II 809472 --- II 809675 | 6319 6320\n## distance\n## <integer>\n## [1] 48443\n## [2] 44890\n## [3] 687132\n## [4] 25964\n## [5] 38883\n## ... ...\n## [471360] 1078\n## [471361] 1308\n## [471362] 889\n## [471363] 238\n## [471364] 203\n## -------\n## regions: 549331 ranges and 0 metadata columns\n## seqinfo: 1 sequence from an unspecified genome; no seqlengths\n\n\n2.4.1.2 Customizing the import\n\nTo reduce the import to only parse the data that is relevant to the study, two arguments can be passed to import, along with a ContactFile.\n\n\n\n\n\n\nKey import arguments:\n\n\n\n\n\nfocus: This can be used to only parse data for a specific genomic location.\n\nresolution: This can be used to choose which resolution to parse the contact matrix at (this is ignored if the ContactFile is not multi-resolution, e.g. .cool or HiC-Pro generated matrices)\n\n\n\n\nImport interactions within a single chromosome:\n\n\nhic <- import(cf, focus = 'II', resolution = 2000)\n\nregions(hic) # ---- `regions()` work on `HiCExperiment` the same way than on `GInteractions`\n## GRanges object with 407 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## II_1_2000 II 1-2000 * | 116 NaN II\n## II_2001_4000 II 2001-4000 * | 117 NaN II\n## II_4001_6000 II 4001-6000 * | 118 NaN II\n## II_6001_8000 II 6001-8000 * | 119 NaN II\n## II_8001_10000 II 8001-10000 * | 120 0.0461112 II\n## ... ... ... ... . ... ... ...\n## II_804001_806000 II 804001-806000 * | 518 0.0493107 II\n## II_806001_808000 II 806001-808000 * | 519 0.0611355 II\n## II_808001_810000 II 808001-810000 * | 520 NaN II\n## II_810001_812000 II 810001-812000 * | 521 NaN II\n## II_812001_813184 II 812001-813184 * | 522 NaN II\n## center\n## <integer>\n## II_1_2000 1000\n## II_2001_4000 3000\n## II_4001_6000 5000\n## II_6001_8000 7000\n## II_8001_10000 9000\n## ... ...\n## II_804001_806000 805000\n## II_806001_808000 807000\n## II_808001_810000 809000\n## II_810001_812000 811000\n## II_812001_813184 812592\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\ntable(seqnames(regions(hic)))\n## \n## I II III IV V VI VII VIII IX X XI XII XIII XIV XV \n## 0 407 0 0 0 0 0 0 0 0 0 0 0 0 0 \n## XVI \n## 0\n\nanchors(hic) # ---- `anchors()` work on `HiCExperiment` the same way than on `GInteractions`\n## $first\n## GRanges object with 34063 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## [1] II 1-2000 * | 116 NaN II\n## [2] II 1-2000 * | 116 NaN II\n## [3] II 1-2000 * | 116 NaN II\n## [4] II 1-2000 * | 116 NaN II\n## [5] II 1-2000 * | 116 NaN II\n## ... ... ... ... . ... ... ...\n## [34059] II 804001-806000 * | 518 0.0493107 II\n## [34060] II 806001-808000 * | 519 0.0611355 II\n## [34061] II 806001-808000 * | 519 0.0611355 II\n## [34062] II 806001-808000 * | 519 0.0611355 II\n## [34063] II 808001-810000 * | 520 NaN II\n## center\n## <integer>\n## [1] 1000\n## [2] 1000\n## [3] 1000\n## [4] 1000\n## [5] 1000\n## ... ...\n## [34059] 805000\n## [34060] 807000\n## [34061] 807000\n## [34062] 807000\n## [34063] 809000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n## \n## $second\n## GRanges object with 34063 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## [1] II 1-2000 * | 116 NaN II\n## [2] II 4001-6000 * | 118 NaN II\n## [3] II 6001-8000 * | 119 NaN II\n## [4] II 8001-10000 * | 120 0.0461112 II\n## [5] II 10001-12000 * | 121 0.0334807 II\n## ... ... ... ... . ... ... ...\n## [34059] II 810001-812000 * | 521 NaN II\n## [34060] II 806001-808000 * | 519 0.0611355 II\n## [34061] II 808001-810000 * | 520 NaN II\n## [34062] II 810001-812000 * | 521 NaN II\n## [34063] II 808001-810000 * | 520 NaN II\n## center\n## <integer>\n## [1] 1000\n## [2] 5000\n## [3] 7000\n## [4] 9000\n## [5] 11000\n## ... ...\n## [34059] 811000\n## [34060] 807000\n## [34061] 809000\n## [34062] 811000\n## [34063] 809000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\n\nImport interactions within a segment of a chromosome:\n\n\nhic <- import(cf, focus = 'II:40000-60000', resolution = 1000)\n\nregions(hic) \n## GRanges object with 21 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## II_39001_40000 II 39001-40000 * | 270 0.0220798 II\n## II_40001_41000 II 40001-41000 * | 271 0.0246775 II\n## II_41001_42000 II 41001-42000 * | 272 0.0269232 II\n## II_42001_43000 II 42001-43000 * | 273 0.0341849 II\n## II_43001_44000 II 43001-44000 * | 274 0.0265386 II\n## ... ... ... ... . ... ... ...\n## II_55001_56000 II 55001-56000 * | 286 0.0213532 II\n## II_56001_57000 II 56001-57000 * | 287 0.0569839 II\n## II_57001_58000 II 57001-58000 * | 288 0.0338612 II\n## II_58001_59000 II 58001-59000 * | 289 0.0294531 II\n## II_59001_60000 II 59001-60000 * | 290 0.0306662 II\n## center\n## <integer>\n## II_39001_40000 39500\n## II_40001_41000 40500\n## II_41001_42000 41500\n## II_42001_43000 42500\n## II_43001_44000 43500\n## ... ...\n## II_55001_56000 55500\n## II_56001_57000 56500\n## II_57001_58000 57500\n## II_58001_59000 58500\n## II_59001_60000 59500\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\nanchors(hic)\n## $first\n## GRanges object with 210 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr center\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle> <integer>\n## [1] II 40001-41000 * | 271 0.0246775 II 40500\n## [2] II 40001-41000 * | 271 0.0246775 II 40500\n## [3] II 40001-41000 * | 271 0.0246775 II 40500\n## [4] II 40001-41000 * | 271 0.0246775 II 40500\n## [5] II 40001-41000 * | 271 0.0246775 II 40500\n## ... ... ... ... . ... ... ... ...\n## [206] II 57001-58000 * | 288 0.0338612 II 57500\n## [207] II 57001-58000 * | 288 0.0338612 II 57500\n## [208] II 58001-59000 * | 289 0.0294531 II 58500\n## [209] II 58001-59000 * | 289 0.0294531 II 58500\n## [210] II 59001-60000 * | 290 0.0306662 II 59500\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n## \n## $second\n## GRanges object with 210 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr center\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle> <integer>\n## [1] II 40001-41000 * | 271 0.0246775 II 40500\n## [2] II 41001-42000 * | 272 0.0269232 II 41500\n## [3] II 42001-43000 * | 273 0.0341849 II 42500\n## [4] II 43001-44000 * | 274 0.0265386 II 43500\n## [5] II 44001-45000 * | 275 0.0488968 II 44500\n## ... ... ... ... . ... ... ... ...\n## [206] II 58001-59000 * | 289 0.0294531 II 58500\n## [207] II 59001-60000 * | 290 0.0306662 II 59500\n## [208] II 58001-59000 * | 289 0.0294531 II 58500\n## [209] II 59001-60000 * | 290 0.0306662 II 59500\n## [210] II 59001-60000 * | 290 0.0306662 II 59500\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\n\nImport interactions between two chromosomes:\n\n\nhic2 <- import(cf, focus = 'II|XV', resolution = 4000)\n\nregions(hic2)\n## GRanges object with 477 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight\n## <Rle> <IRanges> <Rle> | <numeric> <numeric>\n## II_1_4000 II 1-4000 * | 58 NaN\n## II_4001_8000 II 4001-8000 * | 59 NaN\n## II_8001_12000 II 8001-12000 * | 60 0.0274474\n## II_12001_16000 II 12001-16000 * | 61 0.0342116\n## II_16001_20000 II 16001-20000 * | 62 0.0195128\n## ... ... ... ... . ... ...\n## XV_1072001_1076000 XV 1072001-1076000 * | 2783 0.041763\n## XV_1076001_1080000 XV 1076001-1080000 * | 2784 NaN\n## XV_1080001_1084000 XV 1080001-1084000 * | 2785 NaN\n## XV_1084001_1088000 XV 1084001-1088000 * | 2786 NaN\n## XV_1088001_1091291 XV 1088001-1091291 * | 2787 NaN\n## chr center\n## <Rle> <integer>\n## II_1_4000 II 2000\n## II_4001_8000 II 6000\n## II_8001_12000 II 10000\n## II_12001_16000 II 14000\n## II_16001_20000 II 18000\n## ... ... ...\n## XV_1072001_1076000 XV 1074000\n## XV_1076001_1080000 XV 1078000\n## XV_1080001_1084000 XV 1082000\n## XV_1084001_1088000 XV 1086000\n## XV_1088001_1091291 XV 1089646\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\nanchors(hic2)\n## $first\n## GRanges object with 18032 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## [1] II 1-4000 * | 58 NaN II\n## [2] II 1-4000 * | 58 NaN II\n## [3] II 1-4000 * | 58 NaN II\n## [4] II 1-4000 * | 58 NaN II\n## [5] II 1-4000 * | 58 NaN II\n## ... ... ... ... . ... ... ...\n## [18028] II 808001-812000 * | 260 NaN II\n## [18029] II 808001-812000 * | 260 NaN II\n## [18030] II 808001-812000 * | 260 NaN II\n## [18031] II 808001-812000 * | 260 NaN II\n## [18032] II 808001-812000 * | 260 NaN II\n## center\n## <integer>\n## [1] 2000\n## [2] 2000\n## [3] 2000\n## [4] 2000\n## [5] 2000\n## ... ...\n## [18028] 810000\n## [18029] 810000\n## [18030] 810000\n## [18031] 810000\n## [18032] 810000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n## \n## $second\n## GRanges object with 18032 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## [1] XV 48001-52000 * | 2527 0.0185354 XV\n## [2] XV 348001-352000 * | 2602 0.0233750 XV\n## [3] XV 468001-472000 * | 2632 0.0153615 XV\n## [4] XV 472001-476000 * | 2633 0.0189624 XV\n## [5] XV 584001-588000 * | 2661 0.0167715 XV\n## ... ... ... ... . ... ... ...\n## [18028] XV 980001-984000 * | 2760 0.0187827 XV\n## [18029] XV 984001-988000 * | 2761 0.0250094 XV\n## [18030] XV 992001-996000 * | 2763 0.0185599 XV\n## [18031] XV 1004001-1008000 * | 2766 0.0196942 XV\n## [18032] XV 1064001-1068000 * | 2781 0.0208220 XV\n## center\n## <integer>\n## [1] 50000\n## [2] 350000\n## [3] 470000\n## [4] 474000\n## [5] 586000\n## ... ...\n## [18028] 982000\n## [18029] 986000\n## [18030] 994000\n## [18031] 1006000\n## [18032] 1066000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\n\nImport interactions between segments of two chromosomes:\n\n\nhic3 <- import(cf, focus = 'III:10000-40000|XV:10000-40000', resolution = 2000)\n\nregions(hic3)\n## GRanges object with 32 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## III_8001_10000 III 8001-10000 * | 527 NaN III\n## III_10001_12000 III 10001-12000 * | 528 NaN III\n## III_12001_14000 III 12001-14000 * | 529 NaN III\n## III_14001_16000 III 14001-16000 * | 530 0.0356351 III\n## III_16001_18000 III 16001-18000 * | 531 0.0230693 III\n## ... ... ... ... . ... ... ...\n## XV_30001_32000 XV 30001-32000 * | 5039 0.0482465 XV\n## XV_32001_34000 XV 32001-34000 * | 5040 0.0241580 XV\n## XV_34001_36000 XV 34001-36000 * | 5041 0.0273166 XV\n## XV_36001_38000 XV 36001-38000 * | 5042 0.0542235 XV\n## XV_38001_40000 XV 38001-40000 * | 5043 0.0206849 XV\n## center\n## <integer>\n## III_8001_10000 9000\n## III_10001_12000 11000\n## III_12001_14000 13000\n## III_14001_16000 15000\n## III_16001_18000 17000\n## ... ...\n## XV_30001_32000 31000\n## XV_32001_34000 33000\n## XV_34001_36000 35000\n## XV_36001_38000 37000\n## XV_38001_40000 39000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\nanchors(hic3)\n## $first\n## GRanges object with 11 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr center\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle> <integer>\n## [1] III 14001-16000 * | 530 0.0356351 III 15000\n## [2] III 16001-18000 * | 531 0.0230693 III 17000\n## [3] III 16001-18000 * | 531 0.0230693 III 17000\n## [4] III 20001-22000 * | 533 0.0343250 III 21000\n## [5] III 22001-24000 * | 534 0.0258604 III 23000\n## [6] III 24001-26000 * | 535 0.0290757 III 25000\n## [7] III 28001-30000 * | 537 0.0290713 III 29000\n## [8] III 30001-32000 * | 538 0.0266373 III 31000\n## [9] III 32001-34000 * | 539 0.0201137 III 33000\n## [10] III 32001-34000 * | 539 0.0201137 III 33000\n## [11] III 36001-38000 * | 541 0.0220603 III 37000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n## \n## $second\n## GRanges object with 11 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr center\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle> <integer>\n## [1] XV 16001-18000 * | 5032 0.0187250 XV 17000\n## [2] XV 16001-18000 * | 5032 0.0187250 XV 17000\n## [3] XV 20001-22000 * | 5034 0.0247973 XV 21000\n## [4] XV 14001-16000 * | 5031 0.0379727 XV 15000\n## [5] XV 10001-12000 * | 5029 0.0296913 XV 11000\n## [6] XV 32001-34000 * | 5040 0.0241580 XV 33000\n## [7] XV 16001-18000 * | 5032 0.0187250 XV 17000\n## [8] XV 38001-40000 * | 5043 0.0206849 XV 39000\n## [9] XV 22001-24000 * | 5035 0.0613856 XV 23000\n## [10] XV 30001-32000 * | 5039 0.0482465 XV 31000\n## [11] XV 10001-12000 * | 5029 0.0296913 XV 11000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\n\n2.4.2 Interacting with HiCExperiment data\n\nAn HiCExperiment object allows parsing of a disk-stored contact matrix.\nAn HiCExperiment object operates by wrapping together (1) a ContactFile (i.e. a connection to a disk-stored data file) and (2) a GInteractions generated by parsing the data file.\n\nWe will use the yeast_hic HiCExperiment object to demonstrate how to parse information from a HiCExperiment object.\n\nyeast_hic <- contacts_yeast()\n\n\nyeast_hic\n## `HiCExperiment` object with 8,757,906 contacts over 763 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"whole genome\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 16000 \n## interactions: 267709 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) \n## pairsFile: /root/.cache/R/ExperimentHub/174733eb553_7753 \n## metadata(3): ID org date\n\n\n2.4.2.1 Interactions\nThe imported genomic interactions can be directly exposed using the interactions function and are returned as a GInteractions object.\n\ninteractions(yeast_hic)\n## GInteractions object with 267709 interactions and 4 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | bin_id1\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric>\n## [1] I 1-16000 --- I 1-16000 | 0\n## [2] I 1-16000 --- I 16001-32000 | 0\n## [3] I 1-16000 --- I 32001-48000 | 0\n## [4] I 1-16000 --- I 48001-64000 | 0\n## [5] I 1-16000 --- I 64001-80000 | 0\n## ... ... ... ... ... ... . ...\n## [267705] XVI 896001-912000 --- XVI 912001-928000 | 759\n## [267706] XVI 896001-912000 --- XVI 928001-944000 | 759\n## [267707] XVI 912001-928000 --- XVI 912001-928000 | 760\n## [267708] XVI 912001-928000 --- XVI 928001-944000 | 760\n## [267709] XVI 928001-944000 --- XVI 928001-944000 | 761\n## bin_id2 count balanced\n## <numeric> <numeric> <numeric>\n## [1] 0 2836 1.0943959\n## [2] 1 2212 0.9592069\n## [3] 2 1183 0.4385242\n## [4] 3 831 0.2231192\n## [5] 4 310 0.0821255\n## ... ... ... ...\n## [267705] 760 3565 1.236371\n## [267706] 761 1359 0.385016\n## [267707] 760 3534 2.103988\n## [267708] 761 3055 1.485794\n## [267709] 761 4308 1.711565\n## -------\n## regions: 763 ranges and 4 metadata columns\n## seqinfo: 16 sequences from an unspecified genome\n\nBecause genomic interactions are actually stored as GInteractions, regions and anchors work on HiCExperiment objects just as they work with GInteractions!\n\nregions(yeast_hic)\n## GRanges object with 763 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight\n## <Rle> <IRanges> <Rle> | <numeric> <numeric>\n## I_1_16000 I 1-16000 * | 0 0.0196442\n## I_16001_32000 I 16001-32000 * | 1 0.0220746\n## I_32001_48000 I 32001-48000 * | 2 0.0188701\n## I_48001_64000 I 48001-64000 * | 3 0.0136679\n## I_64001_80000 I 64001-80000 * | 4 0.0134860\n## ... ... ... ... . ... ...\n## XVI_880001_896000 XVI 880001-896000 * | 758 0.00910873\n## XVI_896001_912000 XVI 896001-912000 * | 759 0.01421350\n## XVI_912001_928000 XVI 912001-928000 * | 760 0.02439992\n## XVI_928001_944000 XVI 928001-944000 * | 761 0.01993237\n## XVI_944001_948066 XVI 944001-948066 * | 762 NaN\n## chr center\n## <Rle> <integer>\n## I_1_16000 I 8000\n## I_16001_32000 I 24000\n## I_32001_48000 I 40000\n## I_48001_64000 I 56000\n## I_64001_80000 I 72000\n## ... ... ...\n## XVI_880001_896000 XVI 888000\n## XVI_896001_912000 XVI 904000\n## XVI_912001_928000 XVI 920000\n## XVI_928001_944000 XVI 936000\n## XVI_944001_948066 XVI 946033\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\nanchors(yeast_hic)\n## $first\n## GRanges object with 267709 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## [1] I 1-16000 * | 0 0.0196442 I\n## [2] I 1-16000 * | 0 0.0196442 I\n## [3] I 1-16000 * | 0 0.0196442 I\n## [4] I 1-16000 * | 0 0.0196442 I\n## [5] I 1-16000 * | 0 0.0196442 I\n## ... ... ... ... . ... ... ...\n## [267705] XVI 896001-912000 * | 759 0.0142135 XVI\n## [267706] XVI 896001-912000 * | 759 0.0142135 XVI\n## [267707] XVI 912001-928000 * | 760 0.0243999 XVI\n## [267708] XVI 912001-928000 * | 760 0.0243999 XVI\n## [267709] XVI 928001-944000 * | 761 0.0199324 XVI\n## center\n## <integer>\n## [1] 8000\n## [2] 8000\n## [3] 8000\n## [4] 8000\n## [5] 8000\n## ... ...\n## [267705] 904000\n## [267706] 904000\n## [267707] 920000\n## [267708] 920000\n## [267709] 936000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n## \n## $second\n## GRanges object with 267709 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## [1] I 1-16000 * | 0 0.0196442 I\n## [2] I 16001-32000 * | 1 0.0220746 I\n## [3] I 32001-48000 * | 2 0.0188701 I\n## [4] I 48001-64000 * | 3 0.0136679 I\n## [5] I 64001-80000 * | 4 0.0134860 I\n## ... ... ... ... . ... ... ...\n## [267705] XVI 912001-928000 * | 760 0.0243999 XVI\n## [267706] XVI 928001-944000 * | 761 0.0199324 XVI\n## [267707] XVI 912001-928000 * | 760 0.0243999 XVI\n## [267708] XVI 928001-944000 * | 761 0.0199324 XVI\n## [267709] XVI 928001-944000 * | 761 0.0199324 XVI\n## center\n## <integer>\n## [1] 8000\n## [2] 24000\n## [3] 40000\n## [4] 56000\n## [5] 72000\n## ... ...\n## [267705] 920000\n## [267706] 936000\n## [267707] 920000\n## [267708] 936000\n## [267709] 936000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\n\n2.4.2.2 Bins and seqinfo\nAdditional useful information can be recovered from a HiCExperiment object. This includes:\n\nThe seqinfo of the HiCExperiment:\n\n\nseqinfo(yeast_hic)\n## Seqinfo object with 16 sequences from an unspecified genome:\n## seqnames seqlengths isCircular genome\n## I 230218 <NA> <NA>\n## II 813184 <NA> <NA>\n## III 316620 <NA> <NA>\n## IV 1531933 <NA> <NA>\n## V 576874 <NA> <NA>\n## ... ... ... ...\n## XII 1078177 <NA> <NA>\n## XIII 924431 <NA> <NA>\n## XIV 784333 <NA> <NA>\n## XV 1091291 <NA> <NA>\n## XVI 948066 <NA> <NA>\n\nThis lists the different chromosomes available to parse along with their length.\n\nThe bins of the HiCExperiment:\n\n\nbins(yeast_hic)\n## GRanges object with 763 ranges and 2 metadata columns:\n## seqnames ranges strand | bin_id weight\n## <Rle> <IRanges> <Rle> | <numeric> <numeric>\n## I_1_16000 I 1-16000 * | 0 0.0196442\n## I_16001_32000 I 16001-32000 * | 1 0.0220746\n## I_32001_48000 I 32001-48000 * | 2 0.0188701\n## I_48001_64000 I 48001-64000 * | 3 0.0136679\n## I_64001_80000 I 64001-80000 * | 4 0.0134860\n## ... ... ... ... . ... ...\n## XVI_880001_896000 XVI 880001-896000 * | 758 0.00910873\n## XVI_896001_912000 XVI 896001-912000 * | 759 0.01421350\n## XVI_912001_928000 XVI 912001-928000 * | 760 0.02439992\n## XVI_928001_944000 XVI 928001-944000 * | 761 0.01993237\n## XVI_944001_948066 XVI 944001-948066 * | 762 NaN\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\n\n\n\n\n\n\nDifference between bins and regions\n\n\n\nbins are not equivalent to regions of an HiCExperiment.\n\n\nbins refer to all the possible regions of a HiCExperiment. For instance, for a HiCExperiment with a total genome size of 1,000,000 and a resolution of 2000, bins will always return a GRanges object with 500 ranges.\n\nregions, on the opposite, refer to the union of anchors of all the interactions imported in a HiCExperiment object.\n\nThus, all the regions will necessarily be a subset of the HiCExperiment bins, or equal to bins if no focus has been specified when importing a ContactFile.\n\n\n\n2.4.2.3 Scores\nOf course, what the end-user would be looking for is the frequency for each genomic interaction. Such frequency scores are available using the scores function. scores returns a list with a number of different types of scores.\n\nhead(scores(yeast_hic))\n## List of length 2\n## names(2): count balanced\n\nhead(scores(yeast_hic, \"count\"))\n## [1] 2836 2212 1183 831 310 159\n\nhead(scores(yeast_hic, \"balanced\"))\n## [1] 1.09439586 0.95920688 0.43852417 0.22311917 0.08212549 0.03345221\n\nCalling interactions(hic) returns a GInteractions with scores already stored in extra columns. This short-hand allows one to dynamically check scores directly from the interactions output.\n\ninteractions(yeast_hic)\n## GInteractions object with 267709 interactions and 4 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | bin_id1\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric>\n## [1] I 1-16000 --- I 1-16000 | 0\n## [2] I 1-16000 --- I 16001-32000 | 0\n## [3] I 1-16000 --- I 32001-48000 | 0\n## [4] I 1-16000 --- I 48001-64000 | 0\n## [5] I 1-16000 --- I 64001-80000 | 0\n## ... ... ... ... ... ... . ...\n## [267705] XVI 896001-912000 --- XVI 912001-928000 | 759\n## [267706] XVI 896001-912000 --- XVI 928001-944000 | 759\n## [267707] XVI 912001-928000 --- XVI 912001-928000 | 760\n## [267708] XVI 912001-928000 --- XVI 928001-944000 | 760\n## [267709] XVI 928001-944000 --- XVI 928001-944000 | 761\n## bin_id2 count balanced\n## <numeric> <numeric> <numeric>\n## [1] 0 2836 1.0943959\n## [2] 1 2212 0.9592069\n## [3] 2 1183 0.4385242\n## [4] 3 831 0.2231192\n## [5] 4 310 0.0821255\n## ... ... ... ...\n## [267705] 760 3565 1.236371\n## [267706] 761 1359 0.385016\n## [267707] 760 3534 2.103988\n## [267708] 761 3055 1.485794\n## [267709] 761 4308 1.711565\n## -------\n## regions: 763 ranges and 4 metadata columns\n## seqinfo: 16 sequences from an unspecified genome\n\nhead(interactions(yeast_hic)$count)\n## [1] 2836 2212 1183 831 310 159\n\n\n2.4.2.4 topologicalFeatures\nIn Hi-C studies, “topological features” refer to genomic structures identified (usually from a Hi-C map, but not necessarily). For instance, one may want to study known structural loops anchored at CTCF sites, or interactions around or over centromeres, or simply specific genomic “viewpoints”.\nHiCExperiment objects can store topologicalFeatures to facilitate this analysis. By default, four empty topologicalFeatures are stored in a list:\n\ncompartments\nborders\nloops\nviewpoints\n\nAdditional topologicalFeatures can be added to this list (read next chapter for more detail).\n\ntopologicalFeatures(yeast_hic)\n## List of length 5\n## names(5): compartments borders loops viewpoints centromeres\n\ntopologicalFeatures(yeast_hic, 'centromeres')\n## GRanges object with 16 ranges and 0 metadata columns:\n## seqnames ranges strand\n## <Rle> <IRanges> <Rle>\n## [1] I 151583-151641 +\n## [2] II 238361-238419 +\n## [3] III 114322-114380 +\n## [4] IV 449879-449937 +\n## [5] V 152522-152580 +\n## ... ... ... ...\n## [12] XII 151366-151424 +\n## [13] XIII 268222-268280 +\n## [14] XIV 628588-628646 +\n## [15] XV 326897-326955 +\n## [16] XVI 556255-556313 +\n## -------\n## seqinfo: 17 sequences (1 circular) from R64-1-1 genome\n\n\n2.4.2.5 pairsFile\nAs a contact matrix is typically obtained from binning a .pairs file, it is often the case that the matching .pairs file is available to then end-user. A PairsFile can thus be created and associated to the corresponding HiCExperiment object. This allows more accurate estimation of contact distribution, e.g. when calculating distance-dependent genomic interaction frequency.\n\npairsFile(yeast_hic) <- pairsf\n\npairsFile(yeast_hic)\n## EH7703 \n## \"/root/.cache/R/ExperimentHub/174733eb553_7753\"\n\nreadLines(pairsFile(yeast_hic), 25)\n## [1] \"## pairs format v1.0\" \n## [2] \"#sorted: chr1-pos1-chr2-pos2\" \n## [3] \"#columns: readID chr1 pos1 chr2 pos2 strand1 strand2 frag1 frag2\" \n## [4] \"#chromsize: I 230218\" \n## [5] \"#chromsize: II 813184\" \n## [6] \"#chromsize: III 316620\" \n## [7] \"#chromsize: IV 1531933\" \n## [8] \"#chromsize: V 576874\" \n## [9] \"#chromsize: VI 270161\" \n## [10] \"#chromsize: VII 1090940\" \n## [11] \"#chromsize: VIII 562643\" \n## [12] \"#chromsize: IX 439888\" \n## [13] \"#chromsize: X 745751\" \n## [14] \"#chromsize: XI 666816\" \n## [15] \"#chromsize: XII 1078177\" \n## [16] \"#chromsize: XIII 924431\" \n## [17] \"#chromsize: XIV 784333\" \n## [18] \"#chromsize: XV 1091291\" \n## [19] \"#chromsize: XVI 948066\" \n## [20] \"#chromsize: Mito 85779\" \n## [21] \"NS500150:527:HHGYNBGXF:3:21611:19085:3986\\tII\\t105\\tII\\t48548\\t+\\t-\\t1358\\t1681\" \n## [22] \"NS500150:527:HHGYNBGXF:4:13604:19734:2406\\tII\\t113\\tII\\t45003\\t-\\t+\\t1358\\t1658\" \n## [23] \"NS500150:527:HHGYNBGXF:2:11108:25178:11036\\tII\\t119\\tII\\t687251\\t-\\t+\\t1358\\t5550\"\n## [24] \"NS500150:527:HHGYNBGXF:1:22301:8468:1586\\tII\\t160\\tII\\t26124\\t+\\t-\\t1358\\t1510\" \n## [25] \"NS500150:527:HHGYNBGXF:4:23606:24037:2076\\tII\\t169\\tII\\t39052\\t+\\t+\\t1358\\t1613\"\n\n\n2.4.2.6 Importing a PairsFile\n\nThe .pairs file linked to a HiCExperiment object can itself be imported in a GInteractions object:\n\nimport(pairsFile(yeast_hic), format = 'pairs')\n## GInteractions object with 471364 interactions and 3 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | frag1 frag2\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric> <numeric>\n## [1] II 105 --- II 48548 | 1358 1681\n## [2] II 113 --- II 45003 | 1358 1658\n## [3] II 119 --- II 687251 | 1358 5550\n## [4] II 160 --- II 26124 | 1358 1510\n## [5] II 169 --- II 39052 | 1358 1613\n## ... ... ... ... ... ... . ... ...\n## [471360] II 808605 --- II 809683 | 6316 6320\n## [471361] II 808609 --- II 809917 | 6316 6324\n## [471362] II 808617 --- II 809506 | 6316 6319\n## [471363] II 809447 --- II 809685 | 6319 6321\n## [471364] II 809472 --- II 809675 | 6319 6320\n## distance\n## <integer>\n## [1] 48443\n## [2] 44890\n## [3] 687132\n## [4] 25964\n## [5] 38883\n## ... ...\n## [471360] 1078\n## [471361] 1308\n## [471362] 889\n## [471363] 238\n## [471364] 203\n## -------\n## regions: 549331 ranges and 0 metadata columns\n## seqinfo: 1 sequence from an unspecified genome; no seqlengths\n\nNote that these GInteractions are not binned, contrary to interactions extracted from a HiCExperiment. Anchors of the interactions listed in the GInteractions imported from a disk-stored .pairs file are all of width 1." + "text": "2.4 HiCExperiment class\nBased on the previous sections, we have different Bioconductor classes relevant for Hi-C:\n\n\nGInteractions which can be used to represent genomic interactions in R\n\nContactFiles which can be used to establish a connection with disk-stored Hi-C files\n\nHiCExperiment objects are created when parsing a ContactFile in R. The HiCExperiment class reads a ContactFile in memory and store genomic interactions as GInteractions. The HiCExperiment class is, quite obviously, defined in the HiCExperiment package.\n\n2.4.1 Creating a HiCExperiment object\n\n2.4.1.1 Importing a ContactFile\n\nIn practice, to create a HiCExperiment object from a ContactFile, one can use the import method.\n\n\n\n\n\n\nCaution\n\n\n\n\nCreating a HiCExperiment object means importing data from a Hi-C matrix (e.g.  from a ContactFile) in memory in R.\n\nCreating a HiCExperiment object from large disk-stored contact matrices can potentially take a long time.\n\n\n\n\ncf <- CoolFile(coolf)\nhic <- import(cf)\nhic\n## `HiCExperiment` object with 8,757,906 contacts over 12,079 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"whole genome\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 1000 \n## interactions: 2945692 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\nPrinting a HiCExperiment to the console will not reveal the actual data stored in the object (it would most likely crash your R session!). Instead, it gives a summary of the data stored in the object:\n\nThe fileName, i.e. the path to the disk-stored data file\nThe focus, i.e. the genomic location for which data has been imported (in the example above, \"whole genome\" implies that all the data has been imported in R)\n\nresolutions available in the disk-stored data file (this will be identical to availableResolutions(cf))\n\nactive resolution indicates at which resolution the data is currently imported\n\ninteractions refers to the actual GInteractions imported in R and “hidden” (for now!) in the HiCExperiment object\n\nscores refer to different interaction frequency estimates. These can be raw counts, balanced (if the contact matrix has been previously normalized), or whatever score the end-user want to attribute to each interaction (e.g. ratio of counts between two Hi-C maps, …)\n\ntopologicalFeatures is a list of GRanges or GInteractions objects to describe important topological features.\n\npairsFile is a pointer to an optional disk-stored .pairs file from which the contact matrix has been created. This is often useful to estimate some Hi-C metrics.\n\nmetadata is a list to further describe the experiment.\n\nThese pieces of information are called slots. They can be directly accessed using getter functions, bearing the same name than the slot.\n\nfileName(hic)\n## [1] \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\"\n\nfocus(hic)\n## NULL\n\nresolutions(hic)\n## [1] 1000 2000 4000 8000 16000\n\nresolution(hic)\n## [1] 1000\n\ninteractions(hic)\n## GInteractions object with 2945692 interactions and 4 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | bin_id1\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric>\n## [1] I 1-1000 --- I 1-1000 | 0\n## [2] I 1-1000 --- I 1001-2000 | 0\n## [3] I 1-1000 --- I 2001-3000 | 0\n## [4] I 1-1000 --- I 3001-4000 | 0\n## [5] I 1-1000 --- I 4001-5000 | 0\n## ... ... ... ... ... ... . ...\n## [2945688] XVI 940001-941000 --- XVI 942001-943000 | 12070\n## [2945689] XVI 940001-941000 --- XVI 943001-944000 | 12070\n## [2945690] XVI 941001-942000 --- XVI 941001-942000 | 12071\n## [2945691] XVI 941001-942000 --- XVI 942001-943000 | 12071\n## [2945692] XVI 941001-942000 --- XVI 943001-944000 | 12071\n## bin_id2 count balanced\n## <numeric> <numeric> <numeric>\n## [1] 0 15 0.0663491\n## [2] 1 21 0.1273505\n## [3] 2 21 0.0738691\n## [4] 3 38 0.0827051\n## [5] 4 17 0.0591984\n## ... ... ... ...\n## [2945688] 12072 11 0.0575550\n## [2945689] 12073 1 NaN\n## [2945690] 12071 74 0.0504615\n## [2945691] 12072 39 0.1624599\n## [2945692] 12073 1 NaN\n## -------\n## regions: 12079 ranges and 4 metadata columns\n## seqinfo: 16 sequences from an unspecified genome\n\nscores(hic)\n## List of length 2\n## names(2): count balanced\n\ntopologicalFeatures(hic)\n## List of length 4\n## names(4): compartments borders loops viewpoints\n\npairsFile(hic)\n## NULL\n\nmetadata(hic)\n## list()\n\nimport also works for other types of ContactFile (HicFile, HicproFile, PairsFile), e.g. \n\nFor HicFile and HicproFile, import seamlessly returns a HiCExperiment as well:\n\n\nhf <- HicFile(hicf)\nhic <- import(hf)\nhic\n## `HiCExperiment` object with 13,681,280 contacts over 12,165 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16f2cb31ab6_7836\" \n## focus: \"whole genome\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 1000 \n## interactions: 2965693 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nFor PairsFile, the returned object is a representation of Hi-C “pairs” in R, i.e. GInteractions\n\n\n\npf <- PairsFile(pairsf)\npairs <- import(pf)\npairs\n## GInteractions object with 471364 interactions and 3 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | frag1 frag2\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric> <numeric>\n## [1] II 105 --- II 48548 | 1358 1681\n## [2] II 113 --- II 45003 | 1358 1658\n## [3] II 119 --- II 687251 | 1358 5550\n## [4] II 160 --- II 26124 | 1358 1510\n## [5] II 169 --- II 39052 | 1358 1613\n## ... ... ... ... ... ... . ... ...\n## [471360] II 808605 --- II 809683 | 6316 6320\n## [471361] II 808609 --- II 809917 | 6316 6324\n## [471362] II 808617 --- II 809506 | 6316 6319\n## [471363] II 809447 --- II 809685 | 6319 6321\n## [471364] II 809472 --- II 809675 | 6319 6320\n## distance\n## <integer>\n## [1] 48443\n## [2] 44890\n## [3] 687132\n## [4] 25964\n## [5] 38883\n## ... ...\n## [471360] 1078\n## [471361] 1308\n## [471362] 889\n## [471363] 238\n## [471364] 203\n## -------\n## regions: 549331 ranges and 0 metadata columns\n## seqinfo: 1 sequence from an unspecified genome; no seqlengths\n\n\n2.4.1.2 Customizing the import\n\nTo reduce the import to only parse the data that is relevant to the study, two arguments can be passed to import, along with a ContactFile.\n\n\n\n\n\n\nKey import arguments:\n\n\n\n\n\nfocus: This can be used to only parse data for a specific genomic location.\n\nresolution: This can be used to choose which resolution to parse the contact matrix at (this is ignored if the ContactFile is not multi-resolution, e.g. .cool or HiC-Pro generated matrices)\n\n\n\n\nImport interactions within a single chromosome:\n\n\nhic <- import(cf, focus = 'II', resolution = 2000)\n\nregions(hic) # ---- `regions()` work on `HiCExperiment` the same way than on `GInteractions`\n## GRanges object with 407 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## II_1_2000 II 1-2000 * | 116 NaN II\n## II_2001_4000 II 2001-4000 * | 117 NaN II\n## II_4001_6000 II 4001-6000 * | 118 NaN II\n## II_6001_8000 II 6001-8000 * | 119 NaN II\n## II_8001_10000 II 8001-10000 * | 120 0.0461112 II\n## ... ... ... ... . ... ... ...\n## II_804001_806000 II 804001-806000 * | 518 0.0493107 II\n## II_806001_808000 II 806001-808000 * | 519 0.0611355 II\n## II_808001_810000 II 808001-810000 * | 520 NaN II\n## II_810001_812000 II 810001-812000 * | 521 NaN II\n## II_812001_813184 II 812001-813184 * | 522 NaN II\n## center\n## <integer>\n## II_1_2000 1000\n## II_2001_4000 3000\n## II_4001_6000 5000\n## II_6001_8000 7000\n## II_8001_10000 9000\n## ... ...\n## II_804001_806000 805000\n## II_806001_808000 807000\n## II_808001_810000 809000\n## II_810001_812000 811000\n## II_812001_813184 812592\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\ntable(seqnames(regions(hic)))\n## \n## I II III IV V VI VII VIII IX X XI XII XIII XIV XV \n## 0 407 0 0 0 0 0 0 0 0 0 0 0 0 0 \n## XVI \n## 0\n\nanchors(hic) # ---- `anchors()` work on `HiCExperiment` the same way than on `GInteractions`\n## $first\n## GRanges object with 34063 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## [1] II 1-2000 * | 116 NaN II\n## [2] II 1-2000 * | 116 NaN II\n## [3] II 1-2000 * | 116 NaN II\n## [4] II 1-2000 * | 116 NaN II\n## [5] II 1-2000 * | 116 NaN II\n## ... ... ... ... . ... ... ...\n## [34059] II 804001-806000 * | 518 0.0493107 II\n## [34060] II 806001-808000 * | 519 0.0611355 II\n## [34061] II 806001-808000 * | 519 0.0611355 II\n## [34062] II 806001-808000 * | 519 0.0611355 II\n## [34063] II 808001-810000 * | 520 NaN II\n## center\n## <integer>\n## [1] 1000\n## [2] 1000\n## [3] 1000\n## [4] 1000\n## [5] 1000\n## ... ...\n## [34059] 805000\n## [34060] 807000\n## [34061] 807000\n## [34062] 807000\n## [34063] 809000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n## \n## $second\n## GRanges object with 34063 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## [1] II 1-2000 * | 116 NaN II\n## [2] II 4001-6000 * | 118 NaN II\n## [3] II 6001-8000 * | 119 NaN II\n## [4] II 8001-10000 * | 120 0.0461112 II\n## [5] II 10001-12000 * | 121 0.0334807 II\n## ... ... ... ... . ... ... ...\n## [34059] II 810001-812000 * | 521 NaN II\n## [34060] II 806001-808000 * | 519 0.0611355 II\n## [34061] II 808001-810000 * | 520 NaN II\n## [34062] II 810001-812000 * | 521 NaN II\n## [34063] II 808001-810000 * | 520 NaN II\n## center\n## <integer>\n## [1] 1000\n## [2] 5000\n## [3] 7000\n## [4] 9000\n## [5] 11000\n## ... ...\n## [34059] 811000\n## [34060] 807000\n## [34061] 809000\n## [34062] 811000\n## [34063] 809000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\n\nImport interactions within a segment of a chromosome:\n\n\nhic <- import(cf, focus = 'II:40000-60000', resolution = 1000)\n\nregions(hic) \n## GRanges object with 21 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## II_39001_40000 II 39001-40000 * | 270 0.0220798 II\n## II_40001_41000 II 40001-41000 * | 271 0.0246775 II\n## II_41001_42000 II 41001-42000 * | 272 0.0269232 II\n## II_42001_43000 II 42001-43000 * | 273 0.0341849 II\n## II_43001_44000 II 43001-44000 * | 274 0.0265386 II\n## ... ... ... ... . ... ... ...\n## II_55001_56000 II 55001-56000 * | 286 0.0213532 II\n## II_56001_57000 II 56001-57000 * | 287 0.0569839 II\n## II_57001_58000 II 57001-58000 * | 288 0.0338612 II\n## II_58001_59000 II 58001-59000 * | 289 0.0294531 II\n## II_59001_60000 II 59001-60000 * | 290 0.0306662 II\n## center\n## <integer>\n## II_39001_40000 39500\n## II_40001_41000 40500\n## II_41001_42000 41500\n## II_42001_43000 42500\n## II_43001_44000 43500\n## ... ...\n## II_55001_56000 55500\n## II_56001_57000 56500\n## II_57001_58000 57500\n## II_58001_59000 58500\n## II_59001_60000 59500\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\nanchors(hic)\n## $first\n## GRanges object with 210 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr center\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle> <integer>\n## [1] II 40001-41000 * | 271 0.0246775 II 40500\n## [2] II 40001-41000 * | 271 0.0246775 II 40500\n## [3] II 40001-41000 * | 271 0.0246775 II 40500\n## [4] II 40001-41000 * | 271 0.0246775 II 40500\n## [5] II 40001-41000 * | 271 0.0246775 II 40500\n## ... ... ... ... . ... ... ... ...\n## [206] II 57001-58000 * | 288 0.0338612 II 57500\n## [207] II 57001-58000 * | 288 0.0338612 II 57500\n## [208] II 58001-59000 * | 289 0.0294531 II 58500\n## [209] II 58001-59000 * | 289 0.0294531 II 58500\n## [210] II 59001-60000 * | 290 0.0306662 II 59500\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n## \n## $second\n## GRanges object with 210 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr center\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle> <integer>\n## [1] II 40001-41000 * | 271 0.0246775 II 40500\n## [2] II 41001-42000 * | 272 0.0269232 II 41500\n## [3] II 42001-43000 * | 273 0.0341849 II 42500\n## [4] II 43001-44000 * | 274 0.0265386 II 43500\n## [5] II 44001-45000 * | 275 0.0488968 II 44500\n## ... ... ... ... . ... ... ... ...\n## [206] II 58001-59000 * | 289 0.0294531 II 58500\n## [207] II 59001-60000 * | 290 0.0306662 II 59500\n## [208] II 58001-59000 * | 289 0.0294531 II 58500\n## [209] II 59001-60000 * | 290 0.0306662 II 59500\n## [210] II 59001-60000 * | 290 0.0306662 II 59500\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\n\nImport interactions between two chromosomes:\n\n\nhic2 <- import(cf, focus = 'II|XV', resolution = 4000)\n\nregions(hic2)\n## GRanges object with 477 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight\n## <Rle> <IRanges> <Rle> | <numeric> <numeric>\n## II_1_4000 II 1-4000 * | 58 NaN\n## II_4001_8000 II 4001-8000 * | 59 NaN\n## II_8001_12000 II 8001-12000 * | 60 0.0274474\n## II_12001_16000 II 12001-16000 * | 61 0.0342116\n## II_16001_20000 II 16001-20000 * | 62 0.0195128\n## ... ... ... ... . ... ...\n## XV_1072001_1076000 XV 1072001-1076000 * | 2783 0.041763\n## XV_1076001_1080000 XV 1076001-1080000 * | 2784 NaN\n## XV_1080001_1084000 XV 1080001-1084000 * | 2785 NaN\n## XV_1084001_1088000 XV 1084001-1088000 * | 2786 NaN\n## XV_1088001_1091291 XV 1088001-1091291 * | 2787 NaN\n## chr center\n## <Rle> <integer>\n## II_1_4000 II 2000\n## II_4001_8000 II 6000\n## II_8001_12000 II 10000\n## II_12001_16000 II 14000\n## II_16001_20000 II 18000\n## ... ... ...\n## XV_1072001_1076000 XV 1074000\n## XV_1076001_1080000 XV 1078000\n## XV_1080001_1084000 XV 1082000\n## XV_1084001_1088000 XV 1086000\n## XV_1088001_1091291 XV 1089646\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\nanchors(hic2)\n## $first\n## GRanges object with 18032 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## [1] II 1-4000 * | 58 NaN II\n## [2] II 1-4000 * | 58 NaN II\n## [3] II 1-4000 * | 58 NaN II\n## [4] II 1-4000 * | 58 NaN II\n## [5] II 1-4000 * | 58 NaN II\n## ... ... ... ... . ... ... ...\n## [18028] II 808001-812000 * | 260 NaN II\n## [18029] II 808001-812000 * | 260 NaN II\n## [18030] II 808001-812000 * | 260 NaN II\n## [18031] II 808001-812000 * | 260 NaN II\n## [18032] II 808001-812000 * | 260 NaN II\n## center\n## <integer>\n## [1] 2000\n## [2] 2000\n## [3] 2000\n## [4] 2000\n## [5] 2000\n## ... ...\n## [18028] 810000\n## [18029] 810000\n## [18030] 810000\n## [18031] 810000\n## [18032] 810000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n## \n## $second\n## GRanges object with 18032 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## [1] XV 48001-52000 * | 2527 0.0185354 XV\n## [2] XV 348001-352000 * | 2602 0.0233750 XV\n## [3] XV 468001-472000 * | 2632 0.0153615 XV\n## [4] XV 472001-476000 * | 2633 0.0189624 XV\n## [5] XV 584001-588000 * | 2661 0.0167715 XV\n## ... ... ... ... . ... ... ...\n## [18028] XV 980001-984000 * | 2760 0.0187827 XV\n## [18029] XV 984001-988000 * | 2761 0.0250094 XV\n## [18030] XV 992001-996000 * | 2763 0.0185599 XV\n## [18031] XV 1004001-1008000 * | 2766 0.0196942 XV\n## [18032] XV 1064001-1068000 * | 2781 0.0208220 XV\n## center\n## <integer>\n## [1] 50000\n## [2] 350000\n## [3] 470000\n## [4] 474000\n## [5] 586000\n## ... ...\n## [18028] 982000\n## [18029] 986000\n## [18030] 994000\n## [18031] 1006000\n## [18032] 1066000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\n\nImport interactions between segments of two chromosomes:\n\n\nhic3 <- import(cf, focus = 'III:10000-40000|XV:10000-40000', resolution = 2000)\n\nregions(hic3)\n## GRanges object with 32 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## III_8001_10000 III 8001-10000 * | 527 NaN III\n## III_10001_12000 III 10001-12000 * | 528 NaN III\n## III_12001_14000 III 12001-14000 * | 529 NaN III\n## III_14001_16000 III 14001-16000 * | 530 0.0356351 III\n## III_16001_18000 III 16001-18000 * | 531 0.0230693 III\n## ... ... ... ... . ... ... ...\n## XV_30001_32000 XV 30001-32000 * | 5039 0.0482465 XV\n## XV_32001_34000 XV 32001-34000 * | 5040 0.0241580 XV\n## XV_34001_36000 XV 34001-36000 * | 5041 0.0273166 XV\n## XV_36001_38000 XV 36001-38000 * | 5042 0.0542235 XV\n## XV_38001_40000 XV 38001-40000 * | 5043 0.0206849 XV\n## center\n## <integer>\n## III_8001_10000 9000\n## III_10001_12000 11000\n## III_12001_14000 13000\n## III_14001_16000 15000\n## III_16001_18000 17000\n## ... ...\n## XV_30001_32000 31000\n## XV_32001_34000 33000\n## XV_34001_36000 35000\n## XV_36001_38000 37000\n## XV_38001_40000 39000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\nanchors(hic3)\n## $first\n## GRanges object with 11 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr center\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle> <integer>\n## [1] III 14001-16000 * | 530 0.0356351 III 15000\n## [2] III 16001-18000 * | 531 0.0230693 III 17000\n## [3] III 16001-18000 * | 531 0.0230693 III 17000\n## [4] III 20001-22000 * | 533 0.0343250 III 21000\n## [5] III 22001-24000 * | 534 0.0258604 III 23000\n## [6] III 24001-26000 * | 535 0.0290757 III 25000\n## [7] III 28001-30000 * | 537 0.0290713 III 29000\n## [8] III 30001-32000 * | 538 0.0266373 III 31000\n## [9] III 32001-34000 * | 539 0.0201137 III 33000\n## [10] III 32001-34000 * | 539 0.0201137 III 33000\n## [11] III 36001-38000 * | 541 0.0220603 III 37000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n## \n## $second\n## GRanges object with 11 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr center\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle> <integer>\n## [1] XV 16001-18000 * | 5032 0.0187250 XV 17000\n## [2] XV 16001-18000 * | 5032 0.0187250 XV 17000\n## [3] XV 20001-22000 * | 5034 0.0247973 XV 21000\n## [4] XV 14001-16000 * | 5031 0.0379727 XV 15000\n## [5] XV 10001-12000 * | 5029 0.0296913 XV 11000\n## [6] XV 32001-34000 * | 5040 0.0241580 XV 33000\n## [7] XV 16001-18000 * | 5032 0.0187250 XV 17000\n## [8] XV 38001-40000 * | 5043 0.0206849 XV 39000\n## [9] XV 22001-24000 * | 5035 0.0613856 XV 23000\n## [10] XV 30001-32000 * | 5039 0.0482465 XV 31000\n## [11] XV 10001-12000 * | 5029 0.0296913 XV 11000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\n\n2.4.2 Interacting with HiCExperiment data\n\nAn HiCExperiment object allows parsing of a disk-stored contact matrix.\nAn HiCExperiment object operates by wrapping together (1) a ContactFile (i.e. a connection to a disk-stored data file) and (2) a GInteractions generated by parsing the data file.\n\nWe will use the yeast_hic HiCExperiment object to demonstrate how to parse information from a HiCExperiment object.\n\nyeast_hic <- contacts_yeast()\n\n\nyeast_hic\n## `HiCExperiment` object with 8,757,906 contacts over 763 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"whole genome\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 16000 \n## interactions: 267709 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) \n## pairsFile: /root/.cache/R/ExperimentHub/16f6ddc5c03_7753 \n## metadata(3): ID org date\n\n\n2.4.2.1 Interactions\nThe imported genomic interactions can be directly exposed using the interactions function and are returned as a GInteractions object.\n\ninteractions(yeast_hic)\n## GInteractions object with 267709 interactions and 4 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | bin_id1\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric>\n## [1] I 1-16000 --- I 1-16000 | 0\n## [2] I 1-16000 --- I 16001-32000 | 0\n## [3] I 1-16000 --- I 32001-48000 | 0\n## [4] I 1-16000 --- I 48001-64000 | 0\n## [5] I 1-16000 --- I 64001-80000 | 0\n## ... ... ... ... ... ... . ...\n## [267705] XVI 896001-912000 --- XVI 912001-928000 | 759\n## [267706] XVI 896001-912000 --- XVI 928001-944000 | 759\n## [267707] XVI 912001-928000 --- XVI 912001-928000 | 760\n## [267708] XVI 912001-928000 --- XVI 928001-944000 | 760\n## [267709] XVI 928001-944000 --- XVI 928001-944000 | 761\n## bin_id2 count balanced\n## <numeric> <numeric> <numeric>\n## [1] 0 2836 1.0943959\n## [2] 1 2212 0.9592069\n## [3] 2 1183 0.4385242\n## [4] 3 831 0.2231192\n## [5] 4 310 0.0821255\n## ... ... ... ...\n## [267705] 760 3565 1.236371\n## [267706] 761 1359 0.385016\n## [267707] 760 3534 2.103988\n## [267708] 761 3055 1.485794\n## [267709] 761 4308 1.711565\n## -------\n## regions: 763 ranges and 4 metadata columns\n## seqinfo: 16 sequences from an unspecified genome\n\nBecause genomic interactions are actually stored as GInteractions, regions and anchors work on HiCExperiment objects just as they work with GInteractions!\n\nregions(yeast_hic)\n## GRanges object with 763 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight\n## <Rle> <IRanges> <Rle> | <numeric> <numeric>\n## I_1_16000 I 1-16000 * | 0 0.0196442\n## I_16001_32000 I 16001-32000 * | 1 0.0220746\n## I_32001_48000 I 32001-48000 * | 2 0.0188701\n## I_48001_64000 I 48001-64000 * | 3 0.0136679\n## I_64001_80000 I 64001-80000 * | 4 0.0134860\n## ... ... ... ... . ... ...\n## XVI_880001_896000 XVI 880001-896000 * | 758 0.00910873\n## XVI_896001_912000 XVI 896001-912000 * | 759 0.01421350\n## XVI_912001_928000 XVI 912001-928000 * | 760 0.02439992\n## XVI_928001_944000 XVI 928001-944000 * | 761 0.01993237\n## XVI_944001_948066 XVI 944001-948066 * | 762 NaN\n## chr center\n## <Rle> <integer>\n## I_1_16000 I 8000\n## I_16001_32000 I 24000\n## I_32001_48000 I 40000\n## I_48001_64000 I 56000\n## I_64001_80000 I 72000\n## ... ... ...\n## XVI_880001_896000 XVI 888000\n## XVI_896001_912000 XVI 904000\n## XVI_912001_928000 XVI 920000\n## XVI_928001_944000 XVI 936000\n## XVI_944001_948066 XVI 946033\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\nanchors(yeast_hic)\n## $first\n## GRanges object with 267709 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## [1] I 1-16000 * | 0 0.0196442 I\n## [2] I 1-16000 * | 0 0.0196442 I\n## [3] I 1-16000 * | 0 0.0196442 I\n## [4] I 1-16000 * | 0 0.0196442 I\n## [5] I 1-16000 * | 0 0.0196442 I\n## ... ... ... ... . ... ... ...\n## [267705] XVI 896001-912000 * | 759 0.0142135 XVI\n## [267706] XVI 896001-912000 * | 759 0.0142135 XVI\n## [267707] XVI 912001-928000 * | 760 0.0243999 XVI\n## [267708] XVI 912001-928000 * | 760 0.0243999 XVI\n## [267709] XVI 928001-944000 * | 761 0.0199324 XVI\n## center\n## <integer>\n## [1] 8000\n## [2] 8000\n## [3] 8000\n## [4] 8000\n## [5] 8000\n## ... ...\n## [267705] 904000\n## [267706] 904000\n## [267707] 920000\n## [267708] 920000\n## [267709] 936000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n## \n## $second\n## GRanges object with 267709 ranges and 4 metadata columns:\n## seqnames ranges strand | bin_id weight chr\n## <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>\n## [1] I 1-16000 * | 0 0.0196442 I\n## [2] I 16001-32000 * | 1 0.0220746 I\n## [3] I 32001-48000 * | 2 0.0188701 I\n## [4] I 48001-64000 * | 3 0.0136679 I\n## [5] I 64001-80000 * | 4 0.0134860 I\n## ... ... ... ... . ... ... ...\n## [267705] XVI 912001-928000 * | 760 0.0243999 XVI\n## [267706] XVI 928001-944000 * | 761 0.0199324 XVI\n## [267707] XVI 912001-928000 * | 760 0.0243999 XVI\n## [267708] XVI 928001-944000 * | 761 0.0199324 XVI\n## [267709] XVI 928001-944000 * | 761 0.0199324 XVI\n## center\n## <integer>\n## [1] 8000\n## [2] 24000\n## [3] 40000\n## [4] 56000\n## [5] 72000\n## ... ...\n## [267705] 920000\n## [267706] 936000\n## [267707] 920000\n## [267708] 936000\n## [267709] 936000\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\n\n2.4.2.2 Bins and seqinfo\nAdditional useful information can be recovered from a HiCExperiment object. This includes:\n\nThe seqinfo of the HiCExperiment:\n\n\nseqinfo(yeast_hic)\n## Seqinfo object with 16 sequences from an unspecified genome:\n## seqnames seqlengths isCircular genome\n## I 230218 <NA> <NA>\n## II 813184 <NA> <NA>\n## III 316620 <NA> <NA>\n## IV 1531933 <NA> <NA>\n## V 576874 <NA> <NA>\n## ... ... ... ...\n## XII 1078177 <NA> <NA>\n## XIII 924431 <NA> <NA>\n## XIV 784333 <NA> <NA>\n## XV 1091291 <NA> <NA>\n## XVI 948066 <NA> <NA>\n\nThis lists the different chromosomes available to parse along with their length.\n\nThe bins of the HiCExperiment:\n\n\nbins(yeast_hic)\n## GRanges object with 763 ranges and 2 metadata columns:\n## seqnames ranges strand | bin_id weight\n## <Rle> <IRanges> <Rle> | <numeric> <numeric>\n## I_1_16000 I 1-16000 * | 0 0.0196442\n## I_16001_32000 I 16001-32000 * | 1 0.0220746\n## I_32001_48000 I 32001-48000 * | 2 0.0188701\n## I_48001_64000 I 48001-64000 * | 3 0.0136679\n## I_64001_80000 I 64001-80000 * | 4 0.0134860\n## ... ... ... ... . ... ...\n## XVI_880001_896000 XVI 880001-896000 * | 758 0.00910873\n## XVI_896001_912000 XVI 896001-912000 * | 759 0.01421350\n## XVI_912001_928000 XVI 912001-928000 * | 760 0.02439992\n## XVI_928001_944000 XVI 928001-944000 * | 761 0.01993237\n## XVI_944001_948066 XVI 944001-948066 * | 762 NaN\n## -------\n## seqinfo: 16 sequences from an unspecified genome\n\n\n\n\n\n\n\nDifference between bins and regions\n\n\n\nbins are not equivalent to regions of an HiCExperiment.\n\n\nbins refer to all the possible regions of a HiCExperiment. For instance, for a HiCExperiment with a total genome size of 1,000,000 and a resolution of 2000, bins will always return a GRanges object with 500 ranges.\n\nregions, on the opposite, refer to the union of anchors of all the interactions imported in a HiCExperiment object.\n\nThus, all the regions will necessarily be a subset of the HiCExperiment bins, or equal to bins if no focus has been specified when importing a ContactFile.\n\n\n\n2.4.2.3 Scores\nOf course, what the end-user would be looking for is the frequency for each genomic interaction. Such frequency scores are available using the scores function. scores returns a list with a number of different types of scores.\n\nhead(scores(yeast_hic))\n## List of length 2\n## names(2): count balanced\n\nhead(scores(yeast_hic, \"count\"))\n## [1] 2836 2212 1183 831 310 159\n\nhead(scores(yeast_hic, \"balanced\"))\n## [1] 1.09439586 0.95920688 0.43852417 0.22311917 0.08212549 0.03345221\n\nCalling interactions(hic) returns a GInteractions with scores already stored in extra columns. This short-hand allows one to dynamically check scores directly from the interactions output.\n\ninteractions(yeast_hic)\n## GInteractions object with 267709 interactions and 4 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | bin_id1\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric>\n## [1] I 1-16000 --- I 1-16000 | 0\n## [2] I 1-16000 --- I 16001-32000 | 0\n## [3] I 1-16000 --- I 32001-48000 | 0\n## [4] I 1-16000 --- I 48001-64000 | 0\n## [5] I 1-16000 --- I 64001-80000 | 0\n## ... ... ... ... ... ... . ...\n## [267705] XVI 896001-912000 --- XVI 912001-928000 | 759\n## [267706] XVI 896001-912000 --- XVI 928001-944000 | 759\n## [267707] XVI 912001-928000 --- XVI 912001-928000 | 760\n## [267708] XVI 912001-928000 --- XVI 928001-944000 | 760\n## [267709] XVI 928001-944000 --- XVI 928001-944000 | 761\n## bin_id2 count balanced\n## <numeric> <numeric> <numeric>\n## [1] 0 2836 1.0943959\n## [2] 1 2212 0.9592069\n## [3] 2 1183 0.4385242\n## [4] 3 831 0.2231192\n## [5] 4 310 0.0821255\n## ... ... ... ...\n## [267705] 760 3565 1.236371\n## [267706] 761 1359 0.385016\n## [267707] 760 3534 2.103988\n## [267708] 761 3055 1.485794\n## [267709] 761 4308 1.711565\n## -------\n## regions: 763 ranges and 4 metadata columns\n## seqinfo: 16 sequences from an unspecified genome\n\nhead(interactions(yeast_hic)$count)\n## [1] 2836 2212 1183 831 310 159\n\n\n2.4.2.4 topologicalFeatures\nIn Hi-C studies, “topological features” refer to genomic structures identified (usually from a Hi-C map, but not necessarily). For instance, one may want to study known structural loops anchored at CTCF sites, or interactions around or over centromeres, or simply specific genomic “viewpoints”.\nHiCExperiment objects can store topologicalFeatures to facilitate this analysis. By default, four empty topologicalFeatures are stored in a list:\n\ncompartments\nborders\nloops\nviewpoints\n\nAdditional topologicalFeatures can be added to this list (read next chapter for more detail).\n\ntopologicalFeatures(yeast_hic)\n## List of length 5\n## names(5): compartments borders loops viewpoints centromeres\n\ntopologicalFeatures(yeast_hic, 'centromeres')\n## GRanges object with 16 ranges and 0 metadata columns:\n## seqnames ranges strand\n## <Rle> <IRanges> <Rle>\n## [1] I 151583-151641 +\n## [2] II 238361-238419 +\n## [3] III 114322-114380 +\n## [4] IV 449879-449937 +\n## [5] V 152522-152580 +\n## ... ... ... ...\n## [12] XII 151366-151424 +\n## [13] XIII 268222-268280 +\n## [14] XIV 628588-628646 +\n## [15] XV 326897-326955 +\n## [16] XVI 556255-556313 +\n## -------\n## seqinfo: 17 sequences (1 circular) from R64-1-1 genome\n\n\n2.4.2.5 pairsFile\nAs a contact matrix is typically obtained from binning a .pairs file, it is often the case that the matching .pairs file is available to then end-user. A PairsFile can thus be created and associated to the corresponding HiCExperiment object. This allows more accurate estimation of contact distribution, e.g. when calculating distance-dependent genomic interaction frequency.\n\npairsFile(yeast_hic) <- pairsf\n\npairsFile(yeast_hic)\n## EH7703 \n## \"/root/.cache/R/ExperimentHub/16f6ddc5c03_7753\"\n\nreadLines(pairsFile(yeast_hic), 25)\n## [1] \"## pairs format v1.0\" \n## [2] \"#sorted: chr1-pos1-chr2-pos2\" \n## [3] \"#columns: readID chr1 pos1 chr2 pos2 strand1 strand2 frag1 frag2\" \n## [4] \"#chromsize: I 230218\" \n## [5] \"#chromsize: II 813184\" \n## [6] \"#chromsize: III 316620\" \n## [7] \"#chromsize: IV 1531933\" \n## [8] \"#chromsize: V 576874\" \n## [9] \"#chromsize: VI 270161\" \n## [10] \"#chromsize: VII 1090940\" \n## [11] \"#chromsize: VIII 562643\" \n## [12] \"#chromsize: IX 439888\" \n## [13] \"#chromsize: X 745751\" \n## [14] \"#chromsize: XI 666816\" \n## [15] \"#chromsize: XII 1078177\" \n## [16] \"#chromsize: XIII 924431\" \n## [17] \"#chromsize: XIV 784333\" \n## [18] \"#chromsize: XV 1091291\" \n## [19] \"#chromsize: XVI 948066\" \n## [20] \"#chromsize: Mito 85779\" \n## [21] \"NS500150:527:HHGYNBGXF:3:21611:19085:3986\\tII\\t105\\tII\\t48548\\t+\\t-\\t1358\\t1681\" \n## [22] \"NS500150:527:HHGYNBGXF:4:13604:19734:2406\\tII\\t113\\tII\\t45003\\t-\\t+\\t1358\\t1658\" \n## [23] \"NS500150:527:HHGYNBGXF:2:11108:25178:11036\\tII\\t119\\tII\\t687251\\t-\\t+\\t1358\\t5550\"\n## [24] \"NS500150:527:HHGYNBGXF:1:22301:8468:1586\\tII\\t160\\tII\\t26124\\t+\\t-\\t1358\\t1510\" \n## [25] \"NS500150:527:HHGYNBGXF:4:23606:24037:2076\\tII\\t169\\tII\\t39052\\t+\\t+\\t1358\\t1613\"\n\n\n2.4.2.6 Importing a PairsFile\n\nThe .pairs file linked to a HiCExperiment object can itself be imported in a GInteractions object:\n\nimport(pairsFile(yeast_hic), format = 'pairs')\n## GInteractions object with 471364 interactions and 3 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | frag1 frag2\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric> <numeric>\n## [1] II 105 --- II 48548 | 1358 1681\n## [2] II 113 --- II 45003 | 1358 1658\n## [3] II 119 --- II 687251 | 1358 5550\n## [4] II 160 --- II 26124 | 1358 1510\n## [5] II 169 --- II 39052 | 1358 1613\n## ... ... ... ... ... ... . ... ...\n## [471360] II 808605 --- II 809683 | 6316 6320\n## [471361] II 808609 --- II 809917 | 6316 6324\n## [471362] II 808617 --- II 809506 | 6316 6319\n## [471363] II 809447 --- II 809685 | 6319 6321\n## [471364] II 809472 --- II 809675 | 6319 6320\n## distance\n## <integer>\n## [1] 48443\n## [2] 44890\n## [3] 687132\n## [4] 25964\n## [5] 38883\n## ... ...\n## [471360] 1078\n## [471361] 1308\n## [471362] 889\n## [471363] 238\n## [471364] 203\n## -------\n## regions: 549331 ranges and 0 metadata columns\n## seqinfo: 1 sequence from an unspecified genome; no seqlengths\n\nNote that these GInteractions are not binned, contrary to interactions extracted from a HiCExperiment. Anchors of the interactions listed in the GInteractions imported from a disk-stored .pairs file are all of width 1." }, { "objectID": "pages/data-representation.html#visual-summary-of-the-hicexperiment-data-structure", @@ -137,21 +137,21 @@ "href": "pages/parsing.html#subsetting-a-contact-matrix", "title": "\n3  Manipulating Hi-C data in R\n", "section": "\n3.1 Subsetting a contact matrix", - "text": "3.1 Subsetting a contact matrix\nTwo entirely different approaches are possible to subset of a Hi-C contact matrix:\n\nSubsetting before importing: leveraging random access to a disk-stored contact matrix to only import interactions overlapping with a genomic locus of interest.\nSubsetting after importing: parsing the entire contact matrix in memory, and subsequently subset interactions overlapping with a genomic locus of interest.\n\n\n\n3.1.1 Subsetting before import: with focus\n\nSpecifying a focus when importing a dataset in R (i.e. \"Subset first, then parse\") is generally the recommended approach to import Hi-C data in R.\nThe focus argument can be set when importing a ContactFile in R, as follows:\n\nimport(cf, focus = \"...\")\n\nThis ensures that only the needed data is parsed in R, reducing memory load and accelerating the import. Thus, this should be the preferred way of parsing HiCExperiment data, as disk-stored contact matrices allow efficient random access to indexed data.\nfocus can be any of the following string types:\n\n# \"II\" --> import contacts over an entire chromosome\n# \"II:300001-800000\" --> import on-diagonal contacts within a chromosome\n# \"II:300001-400000|II:600001-700000\" --> import off-diagonal contacts within a chromosome\n# \"II|III\" --> import contacts between two chromosomes\n# \"II:300001-800000|V:1-500000\" --> import contacts between segments of two chromosomes\n\n\n\n\n\n\n\nMore examples for import with focus argument 👇\n\n\n\n\n\n\nSubsetting to a specific on-diagonal genomic location using standard UCSC coordinates query:\n\n\nimport(cf, focus = 'II:300001-800000', resolution = 2000)\n## `HiCExperiment` object with 301,018 contacts over 250 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II:300,001-800,000\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 17974 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting to a specific off-diagonal genomic location using pairs of coordinates query:\n\n\nimport(cf, focus = 'II:300001-400000|II:600001-700000', resolution = 2000)\n## `HiCExperiment` object with 402 contacts over 100 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II:300001-400000|II:600001-700000\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 357 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those constrained within a single chromosome:\n\n\nimport(cf, focus = 'II', resolution = 2000)\n## `HiCExperiment` object with 471,364 contacts over 407 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 34063 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those between two chromosomes:\n\n\nimport(cf, focus = 'II|III', resolution = 2000)\n## `HiCExperiment` object with 9,092 contacts over 566 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II|III\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 7438 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those between parts of two chromosomes:\n\n\nimport(cf, focus = 'II:300001-800000|V:1-500000', resolution = 2000)\n## `HiCExperiment` object with 7,147 contacts over 500 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II:300001-800000|V:1-500000\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 6523 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\n\n\n\n\n3.1.2 Subsetting after import\nIt may sometimes be desirable to import a full dataset from disk first, and only then perform in-memory subsetting of the HiCExperiment object (i.e. \"Parse first, then subset\"). This is for example necessary when the end user aims to investigate subsets of interactions across a large number of different areas of a contact matrix.\nSeveral strategies are possible to allow subsetting of imported data, either with subsetByOverlaps or [.\n\n3.1.2.1 subsetByOverlaps(<HiCExperiment>, <GRanges>)\n\nsubsetByOverlaps can take a HiCExperiment as a query and a GRanges as a query. In this case, the GRanges is used to extract a subset of a HiCExperiment constrained within a specific genomic location.\n\ntelomere <- GRanges(\"II:700001-813184\")\nsubsetByOverlaps(hic, telomere) |> interactions()\n## GInteractions object with 1540 interactions and 4 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | bin_id1\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric>\n## [1] II 700001-702000 --- II 700001-702000 | 466\n## [2] II 700001-702000 --- II 702001-704000 | 466\n## [3] II 700001-702000 --- II 704001-706000 | 466\n## [4] II 700001-702000 --- II 706001-708000 | 466\n## [5] II 700001-702000 --- II 708001-710000 | 466\n## ... ... ... ... ... ... . ...\n## [1536] II 804001-806000 --- II 810001-812000 | 518\n## [1537] II 806001-808000 --- II 806001-808000 | 519\n## [1538] II 806001-808000 --- II 808001-810000 | 519\n## [1539] II 806001-808000 --- II 810001-812000 | 519\n## [1540] II 808001-810000 --- II 808001-810000 | 520\n## bin_id2 count balanced\n## <numeric> <numeric> <numeric>\n## [1] 466 30 0.0283618\n## [2] 467 145 0.0709380\n## [3] 468 124 0.0704979\n## [4] 469 59 0.0510221\n## [5] 470 59 0.0384004\n## ... ... ... ...\n## [1536] 521 1 NaN\n## [1537] 519 15 0.0560633\n## [1538] 520 25 NaN\n## [1539] 521 1 NaN\n## [1540] 520 10 NaN\n## -------\n## regions: 57 ranges and 4 metadata columns\n## seqinfo: 16 sequences from an unspecified genome\n\nBy default, subsetByOverlaps(hic, telomere) will only recover interactions constrained within telomere, i.e. interactions for which both ends are in telomere.\nAlternatively, type = \"any\" can be specified to get all interactions with at least one of their anchors within telomere.\n\nsubsetByOverlaps(hic, telomere, type = \"any\") |> interactions()\n## GInteractions object with 6041 interactions and 4 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | bin_id1\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric>\n## [1] II 300001-302000 --- II 702001-704000 | 266\n## [2] II 300001-302000 --- II 704001-706000 | 266\n## [3] II 300001-302000 --- II 768001-770000 | 266\n## [4] II 300001-302000 --- II 784001-786000 | 266\n## [5] II 302001-304000 --- II 740001-742000 | 267\n## ... ... ... ... ... ... . ...\n## [6037] II 804001-806000 --- II 810001-812000 | 518\n## [6038] II 806001-808000 --- II 806001-808000 | 519\n## [6039] II 806001-808000 --- II 808001-810000 | 519\n## [6040] II 806001-808000 --- II 810001-812000 | 519\n## [6041] II 808001-810000 --- II 808001-810000 | 520\n## bin_id2 count balanced\n## <numeric> <numeric> <numeric>\n## [1] 467 1 0.000590999\n## [2] 468 1 0.000686799\n## [3] 500 1 0.000728215\n## [4] 508 1 0.000923092\n## [5] 486 1 0.000382222\n## ... ... ... ...\n## [6037] 521 1 NaN\n## [6038] 519 15 0.0560633\n## [6039] 520 25 NaN\n## [6040] 521 1 NaN\n## [6041] 520 10 NaN\n## -------\n## regions: 257 ranges and 4 metadata columns\n## seqinfo: 16 sequences from an unspecified genome\n\n\n3.1.2.2 <HiCExperiment>[\"...\"]\n\nThe square bracket operator [ allows for more advanced textual queries, similarly to focus arguments that can be used when importing contact matrices in memory.\nThis ensures that only the needed data is parsed in R, reducing memory load and accelerating the import. Thus, this should be the preferred way of parsing HiCExperiment data, as disk-stored contact matrices allow efficient random access to indexed data.\nThe following string types can be used to subset a HiCExperiment object with the [ notation:\n\n# \"II\" --> import contacts over an entire chromosome\n# \"II:300001-800000\" --> import on-diagonal contacts within a chromosome\n# \"II:300001-400000|II:600001-700000\" --> import off-diagonal contacts within a chromosome\n# \"II|III\" --> import contacts between two chromosomes\n# \"II:300001-800000|V:1-500000\" --> import contacts between segments of two chromosomes\n# c(\"II\", \"III\", \"IV\") --> import contacts within and between several chromosomes\n\n\n\n\n\n\n\nMore examples for subsetting with [ 👇\n\n\n\n\n\n\nSubsetting to a specific on-diagonal genomic location using standard UCSC coordinates query:\n\n\nhic[\"II:800001-813184\"]\n## `HiCExperiment` object with 1,040 contacts over 6 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II:800,001-813,184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 19 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting to a specific off-diagonal genomic location using pairs of coordinates query:\n\n\nhic[\"II:300001-320000|II:800001-813184\"]\n## `HiCExperiment` object with 3 contacts over 6 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II:300001-320000|II:800001-813184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 3 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those constrained within a single chromosome:\n\n\nhic[\"II\"]\n## `HiCExperiment` object with 306,212 contacts over 257 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 18513 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those between two chromosomes:\n\n\nhic[\"II|IV\"]\n## `HiCExperiment` object with 0 contacts over 0 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II:1-813184|IV:1-1531933\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 0 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those between segments of two chromosomes:\n\n\nhic[\"II:300001-320000|IV:1-100000\"]\n## `HiCExperiment` object with 0 contacts over 0 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II:300001-320000|IV:1-100000\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 0 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those constrained within several chromosomes:\n\n\nhic[c('II', 'III', 'IV')]\n## `HiCExperiment` object with 306,212 contacts over 257 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II, III, IV\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 18513 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\nSome notes:\n\nThis last example (subsetting for a vector of several chromosomes) is the only scenario for which [-based in-memory subsetting of pre-imported data is the only way to go, as such subsetting is not possible with focus from disk-stored data.\nAll the other [ subsetting scenarii illustrated above can be achieved more efficiently using the focus argument when importing data into a HiCExperiment object.\nHowever, keep in mind that subsetting preserves extra data, e.g. added scores, topologicalFeatures, metadata or pairsFile, whereas this information is lost using focus with import.\n\n\n\n\n\n3.1.3 Zooming on a HiCExperiment\n\n“Zooming” refers to dynamically changing the resolution of a HiCExperiment. By zooming a HiCExperiment, one can refine or coarsen the contact matrix. This operation takes aContactFile and focus from an existing HiCExperiment input and re-generates a new HiCExperiment with updated resolution, interactions and scores. Note that zoom will preserve existing metadata, topologicalFeatures and pairsFile information.\n\nhic\n## `HiCExperiment` object with 306,212 contacts over 257 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II:300,001-813,184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 18513 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\nzoom(hic, 4000)\n## `HiCExperiment` object with 306,212 contacts over 129 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II:300,001-813,184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 4000 \n## interactions: 6800 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\nzoom(hic, 1000)\n## `HiCExperiment` object with 306,212 contacts over 514 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II:300,001-813,184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 1000 \n## interactions: 44363 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\n\n\n\n\n\nNote\n\n\n\nThe sum of raw counts do not change after zooming, however the number of individual interactions and regions changes.\n\nlength(hic)\n## [1] 18513\nlength(zoom(hic, 1000))\n## [1] 44363\nlength(zoom(hic, 4000))\n## [1] 6800\nsum(scores(hic, \"count\"))\n## [1] 306212\nsum(scores(zoom(hic, 1000), \"count\"))\n## [1] 306212\nsum(scores(zoom(hic, 4000), \"count\"))\n## [1] 306212\n\n\n\n\n\n\n\n\n\nImportant\n\n\n\n\n\nzoom does not change the focus! It only affects the resolution (and consequently, the interactions).\n\nzoom will only work for multi-resolution contact matrices, e.g. .mcool or .hic." + "text": "3.1 Subsetting a contact matrix\nTwo entirely different approaches are possible to subset of a Hi-C contact matrix:\n\nSubsetting before importing: leveraging random access to a disk-stored contact matrix to only import interactions overlapping with a genomic locus of interest.\nSubsetting after importing: parsing the entire contact matrix in memory, and subsequently subset interactions overlapping with a genomic locus of interest.\n\n\n\n3.1.1 Subsetting before import: with focus\n\nSpecifying a focus when importing a dataset in R (i.e. \"Subset first, then parse\") is generally the recommended approach to import Hi-C data in R.\nThe focus argument can be set when importing a ContactFile in R, as follows:\n\nimport(cf, focus = \"...\")\n\nThis ensures that only the needed data is parsed in R, reducing memory load and accelerating the import. Thus, this should be the preferred way of parsing HiCExperiment data, as disk-stored contact matrices allow efficient random access to indexed data.\nfocus can be any of the following string types:\n\n# \"II\" --> import contacts over an entire chromosome\n# \"II:300001-800000\" --> import on-diagonal contacts within a chromosome\n# \"II:300001-400000|II:600001-700000\" --> import off-diagonal contacts within a chromosome\n# \"II|III\" --> import contacts between two chromosomes\n# \"II:300001-800000|V:1-500000\" --> import contacts between segments of two chromosomes\n\n\n\n\n\n\n\nMore examples for import with focus argument 👇\n\n\n\n\n\n\nSubsetting to a specific on-diagonal genomic location using standard UCSC coordinates query:\n\n\nimport(cf, focus = 'II:300001-800000', resolution = 2000)\n## `HiCExperiment` object with 301,018 contacts over 250 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II:300,001-800,000\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 17974 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting to a specific off-diagonal genomic location using pairs of coordinates query:\n\n\nimport(cf, focus = 'II:300001-400000|II:600001-700000', resolution = 2000)\n## `HiCExperiment` object with 402 contacts over 100 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II:300001-400000|II:600001-700000\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 357 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those constrained within a single chromosome:\n\n\nimport(cf, focus = 'II', resolution = 2000)\n## `HiCExperiment` object with 471,364 contacts over 407 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 34063 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those between two chromosomes:\n\n\nimport(cf, focus = 'II|III', resolution = 2000)\n## `HiCExperiment` object with 9,092 contacts over 566 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II|III\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 7438 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those between parts of two chromosomes:\n\n\nimport(cf, focus = 'II:300001-800000|V:1-500000', resolution = 2000)\n## `HiCExperiment` object with 7,147 contacts over 500 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II:300001-800000|V:1-500000\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 6523 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\n\n\n\n\n3.1.2 Subsetting after import\nIt may sometimes be desirable to import a full dataset from disk first, and only then perform in-memory subsetting of the HiCExperiment object (i.e. \"Parse first, then subset\"). This is for example necessary when the end user aims to investigate subsets of interactions across a large number of different areas of a contact matrix.\nSeveral strategies are possible to allow subsetting of imported data, either with subsetByOverlaps or [.\n\n3.1.2.1 subsetByOverlaps(<HiCExperiment>, <GRanges>)\n\nsubsetByOverlaps can take a HiCExperiment as a query and a GRanges as a query. In this case, the GRanges is used to extract a subset of a HiCExperiment constrained within a specific genomic location.\n\ntelomere <- GRanges(\"II:700001-813184\")\nsubsetByOverlaps(hic, telomere) |> interactions()\n## GInteractions object with 1540 interactions and 4 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | bin_id1\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric>\n## [1] II 700001-702000 --- II 700001-702000 | 466\n## [2] II 700001-702000 --- II 702001-704000 | 466\n## [3] II 700001-702000 --- II 704001-706000 | 466\n## [4] II 700001-702000 --- II 706001-708000 | 466\n## [5] II 700001-702000 --- II 708001-710000 | 466\n## ... ... ... ... ... ... . ...\n## [1536] II 804001-806000 --- II 810001-812000 | 518\n## [1537] II 806001-808000 --- II 806001-808000 | 519\n## [1538] II 806001-808000 --- II 808001-810000 | 519\n## [1539] II 806001-808000 --- II 810001-812000 | 519\n## [1540] II 808001-810000 --- II 808001-810000 | 520\n## bin_id2 count balanced\n## <numeric> <numeric> <numeric>\n## [1] 466 30 0.0283618\n## [2] 467 145 0.0709380\n## [3] 468 124 0.0704979\n## [4] 469 59 0.0510221\n## [5] 470 59 0.0384004\n## ... ... ... ...\n## [1536] 521 1 NaN\n## [1537] 519 15 0.0560633\n## [1538] 520 25 NaN\n## [1539] 521 1 NaN\n## [1540] 520 10 NaN\n## -------\n## regions: 57 ranges and 4 metadata columns\n## seqinfo: 16 sequences from an unspecified genome\n\nBy default, subsetByOverlaps(hic, telomere) will only recover interactions constrained within telomere, i.e. interactions for which both ends are in telomere.\nAlternatively, type = \"any\" can be specified to get all interactions with at least one of their anchors within telomere.\n\nsubsetByOverlaps(hic, telomere, type = \"any\") |> interactions()\n## GInteractions object with 6041 interactions and 4 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | bin_id1\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric>\n## [1] II 300001-302000 --- II 702001-704000 | 266\n## [2] II 300001-302000 --- II 704001-706000 | 266\n## [3] II 300001-302000 --- II 768001-770000 | 266\n## [4] II 300001-302000 --- II 784001-786000 | 266\n## [5] II 302001-304000 --- II 740001-742000 | 267\n## ... ... ... ... ... ... . ...\n## [6037] II 804001-806000 --- II 810001-812000 | 518\n## [6038] II 806001-808000 --- II 806001-808000 | 519\n## [6039] II 806001-808000 --- II 808001-810000 | 519\n## [6040] II 806001-808000 --- II 810001-812000 | 519\n## [6041] II 808001-810000 --- II 808001-810000 | 520\n## bin_id2 count balanced\n## <numeric> <numeric> <numeric>\n## [1] 467 1 0.000590999\n## [2] 468 1 0.000686799\n## [3] 500 1 0.000728215\n## [4] 508 1 0.000923092\n## [5] 486 1 0.000382222\n## ... ... ... ...\n## [6037] 521 1 NaN\n## [6038] 519 15 0.0560633\n## [6039] 520 25 NaN\n## [6040] 521 1 NaN\n## [6041] 520 10 NaN\n## -------\n## regions: 257 ranges and 4 metadata columns\n## seqinfo: 16 sequences from an unspecified genome\n\n\n3.1.2.2 <HiCExperiment>[\"...\"]\n\nThe square bracket operator [ allows for more advanced textual queries, similarly to focus arguments that can be used when importing contact matrices in memory.\nThis ensures that only the needed data is parsed in R, reducing memory load and accelerating the import. Thus, this should be the preferred way of parsing HiCExperiment data, as disk-stored contact matrices allow efficient random access to indexed data.\nThe following string types can be used to subset a HiCExperiment object with the [ notation:\n\n# \"II\" --> import contacts over an entire chromosome\n# \"II:300001-800000\" --> import on-diagonal contacts within a chromosome\n# \"II:300001-400000|II:600001-700000\" --> import off-diagonal contacts within a chromosome\n# \"II|III\" --> import contacts between two chromosomes\n# \"II:300001-800000|V:1-500000\" --> import contacts between segments of two chromosomes\n# c(\"II\", \"III\", \"IV\") --> import contacts within and between several chromosomes\n\n\n\n\n\n\n\nMore examples for subsetting with [ 👇\n\n\n\n\n\n\nSubsetting to a specific on-diagonal genomic location using standard UCSC coordinates query:\n\n\nhic[\"II:800001-813184\"]\n## `HiCExperiment` object with 1,040 contacts over 6 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II:800,001-813,184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 19 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting to a specific off-diagonal genomic location using pairs of coordinates query:\n\n\nhic[\"II:300001-320000|II:800001-813184\"]\n## `HiCExperiment` object with 3 contacts over 6 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II:300001-320000|II:800001-813184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 3 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those constrained within a single chromosome:\n\n\nhic[\"II\"]\n## `HiCExperiment` object with 306,212 contacts over 257 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 18513 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those between two chromosomes:\n\n\nhic[\"II|IV\"]\n## `HiCExperiment` object with 0 contacts over 0 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II:1-813184|IV:1-1531933\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 0 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those between segments of two chromosomes:\n\n\nhic[\"II:300001-320000|IV:1-100000\"]\n## `HiCExperiment` object with 0 contacts over 0 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II:300001-320000|IV:1-100000\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 0 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\nSubsetting interactions to retain those constrained within several chromosomes:\n\n\nhic[c('II', 'III', 'IV')]\n## `HiCExperiment` object with 306,212 contacts over 257 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II, III, IV\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 18513 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\nSome notes:\n\nThis last example (subsetting for a vector of several chromosomes) is the only scenario for which [-based in-memory subsetting of pre-imported data is the only way to go, as such subsetting is not possible with focus from disk-stored data.\nAll the other [ subsetting scenarii illustrated above can be achieved more efficiently using the focus argument when importing data into a HiCExperiment object.\nHowever, keep in mind that subsetting preserves extra data, e.g. added scores, topologicalFeatures, metadata or pairsFile, whereas this information is lost using focus with import.\n\n\n\n\n\n3.1.3 Zooming on a HiCExperiment\n\n“Zooming” refers to dynamically changing the resolution of a HiCExperiment. By zooming a HiCExperiment, one can refine or coarsen the contact matrix. This operation takes aContactFile and focus from an existing HiCExperiment input and re-generates a new HiCExperiment with updated resolution, interactions and scores. Note that zoom will preserve existing metadata, topologicalFeatures and pairsFile information.\n\nhic\n## `HiCExperiment` object with 306,212 contacts over 257 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II:300,001-813,184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 18513 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\nzoom(hic, 4000)\n## `HiCExperiment` object with 306,212 contacts over 129 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II:300,001-813,184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 4000 \n## interactions: 6800 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\nzoom(hic, 1000)\n## `HiCExperiment` object with 306,212 contacts over 514 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II:300,001-813,184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 1000 \n## interactions: 44363 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\n\n\n\n\n\n\nNote\n\n\n\nThe sum of raw counts do not change after zooming, however the number of individual interactions and regions changes.\n\nlength(hic)\n## [1] 18513\nlength(zoom(hic, 1000))\n## [1] 44363\nlength(zoom(hic, 4000))\n## [1] 6800\nsum(scores(hic, \"count\"))\n## [1] 306212\nsum(scores(zoom(hic, 1000), \"count\"))\n## [1] 306212\nsum(scores(zoom(hic, 4000), \"count\"))\n## [1] 306212\n\n\n\n\n\n\n\n\n\nImportant\n\n\n\n\n\nzoom does not change the focus! It only affects the resolution (and consequently, the interactions).\n\nzoom will only work for multi-resolution contact matrices, e.g. .mcool or .hic." }, { "objectID": "pages/parsing.html#updating-an-hicexperiment-object", "href": "pages/parsing.html#updating-an-hicexperiment-object", "title": "\n3  Manipulating Hi-C data in R\n", "section": "\n3.2 Updating an HiCExperiment object", - "text": "3.2 Updating an HiCExperiment object\n\n\n\n\n\n\nTL;DR: Which HiCExperiment slots are mutable (✅) / immutable (⛔️)?\n\n\n\n\n\nfileName(hic): ⛔️ (obtained from disk-stored file)\n\nfocus(hic): 🤔 (see subsetting section)\n\nresolutions(hic): ⛔️ (obtained from disk-stored file)\n\nresolution(hic): 🤔 (see zooming section)\n\ninteractions(hic): ⛔️ (obtained from disk-stored file)\n\nscores(hic): ✅\n\ntopologicalFeatures(hic): ✅\n\npairsFile(hic): ✅\n\nmetadata(hic): ✅\n\n\n\n\n3.2.1 Immutable slots\nAn HiCExperiment object acts as an interface exposing disk-stored data. This implies that the fileName slot itself is immutable (i.e. cannot be changed). This should be obvious, as a HiCExperiment has to be associated with a disk-stored contact matrix to properly function (except in some advanced cases developed in next chapters).\nFor this reason, methods to manually modify interactions and resolutions slots are also not exposed in the HiCExperiment package.\nA corollary of this is that the associated regions and anchors of an HiCExperiment should not be modified by hand either, since they are directly linked to interactions.\n\n3.2.2 Mutable slots\nThat being said, HiCExperiment objects are flexible and can be partially modified in memory without having to change/overwrite the original, disk-stored contact matrix.\nSeveral slots can be modified in memory: slots, topologicalFeatures, pairsFile and metadata.\n\n3.2.2.1 scores\n\nWe have seen in the previous chapter that scores are stored in a list and are available using the scores function.\n\nscores(hic)\n## List of length 2\n## names(2): count balanced\n\nhead(scores(hic, \"count\"))\n## [1] 7 92 75 61 38 43\n\nhead(scores(hic, \"balanced\"))\n## [1] 0.009657438 0.076622340 0.054101992 0.042940512 0.040905212 0.029293930\n\nExtra scores can be added to this list, e.g. to describe the “expected” interaction frequency for each interaction stored in the HiCExperiment object). This can be achieved using the scores()<- function.\n\nscores(hic, \"random\") <- runif(length(hic))\n\nscores(hic)\n## List of length 3\n## names(3): count balanced random\n\nhead(scores(hic, \"random\"))\n## [1] 0.4036088 0.1918757 0.6034038 0.9544792 0.5272511 0.2514426\n\n\n3.2.2.2 topologicalFeatures\n\nThe end-user can create additional topologicalFeatures or modify the existing ones using the topologicalFeatures()<- function.\n\ntopologicalFeatures(hic, 'CTCF') <- GRanges(c(\n \"II:340-352\", \n \"II:3520-3532\", \n \"II:7980-7992\", \n \"II:9240-9252\" \n))\ntopologicalFeatures(hic, 'CTCF')\n## GRanges object with 4 ranges and 0 metadata columns:\n## seqnames ranges strand\n## <Rle> <IRanges> <Rle>\n## [1] II 340-352 *\n## [2] II 3520-3532 *\n## [3] II 7980-7992 *\n## [4] II 9240-9252 *\n## -------\n## seqinfo: 1 sequence from an unspecified genome; no seqlengths\n\ntopologicalFeatures(hic, 'loops') <- GInteractions(\n topologicalFeatures(hic, 'CTCF')[rep(1:3, each = 3)],\n topologicalFeatures(hic, 'CTCF')[rep(1:3, 3)]\n)\ntopologicalFeatures(hic, 'loops')\n## GInteractions object with 9 interactions and 0 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2\n## <Rle> <IRanges> <Rle> <IRanges>\n## [1] II 340-352 --- II 340-352\n## [2] II 340-352 --- II 3520-3532\n## [3] II 340-352 --- II 7980-7992\n## [4] II 3520-3532 --- II 340-352\n## [5] II 3520-3532 --- II 3520-3532\n## [6] II 3520-3532 --- II 7980-7992\n## [7] II 7980-7992 --- II 340-352\n## [8] II 7980-7992 --- II 3520-3532\n## [9] II 7980-7992 --- II 7980-7992\n## -------\n## regions: 3 ranges and 0 metadata columns\n## seqinfo: 1 sequence from an unspecified genome; no seqlengths\n\nhic\n## `HiCExperiment` object with 306,212 contacts over 257 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II:300,001-813,184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 18513 \n## scores(3): count balanced random \n## topologicalFeatures: compartments(0) borders(0) loops(9) viewpoints(0) CTCF(4) \n## pairsFile: N/A \n## metadata(0):\n\nAll these objects can be used in *Overlap methods, as they all extend the GRanges class of objects.\n\n# ---- This counts the number of times `CTCF` anchors are being used in the \n# `loops` `GInteractions` object\ncountOverlaps(\n query = topologicalFeatures(hic, 'CTCF'), \n subject = topologicalFeatures(hic, 'loops')\n)\n## [1] 5 5 5 0\n\n\n3.2.2.3 pairsFile\n\nIf pairsFile is not specified when importing the ContactFile into a HiCExperiment object, one can add it later.\n\npairsf <- HiContactsData('yeast_wt', 'pairs.gz')\n\n\npairsFile(hic) <- pairsf\nhic\n## `HiCExperiment` object with 306,212 contacts over 257 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II:300,001-813,184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 18513 \n## scores(3): count balanced random \n## topologicalFeatures: compartments(0) borders(0) loops(9) viewpoints(0) CTCF(4) \n## pairsFile: /root/.cache/R/ExperimentHub/174733eb553_7753 \n## metadata(0):\n\n\n3.2.2.4 metadata\n\nMetadata associated with a HiCExperiment can be updated at any point.\n\nmetadata(hic) <- list(\n info = \"HiCExperiment created from an example .mcool file from `HiContactsData`\", \n date = date()\n)\nmetadata(hic)\n## $info\n## [1] \"HiCExperiment created from an example .mcool file from `HiContactsData`\"\n## \n## $date\n## [1] \"Tue Nov 7 18:48:53 2023\"" + "text": "3.2 Updating an HiCExperiment object\n\n\n\n\n\n\nTL;DR: Which HiCExperiment slots are mutable (✅) / immutable (⛔️)?\n\n\n\n\n\nfileName(hic): ⛔️ (obtained from disk-stored file)\n\nfocus(hic): 🤔 (see subsetting section)\n\nresolutions(hic): ⛔️ (obtained from disk-stored file)\n\nresolution(hic): 🤔 (see zooming section)\n\ninteractions(hic): ⛔️ (obtained from disk-stored file)\n\nscores(hic): ✅\n\ntopologicalFeatures(hic): ✅\n\npairsFile(hic): ✅\n\nmetadata(hic): ✅\n\n\n\n\n3.2.1 Immutable slots\nAn HiCExperiment object acts as an interface exposing disk-stored data. This implies that the fileName slot itself is immutable (i.e. cannot be changed). This should be obvious, as a HiCExperiment has to be associated with a disk-stored contact matrix to properly function (except in some advanced cases developed in next chapters).\nFor this reason, methods to manually modify interactions and resolutions slots are also not exposed in the HiCExperiment package.\nA corollary of this is that the associated regions and anchors of an HiCExperiment should not be modified by hand either, since they are directly linked to interactions.\n\n3.2.2 Mutable slots\nThat being said, HiCExperiment objects are flexible and can be partially modified in memory without having to change/overwrite the original, disk-stored contact matrix.\nSeveral slots can be modified in memory: slots, topologicalFeatures, pairsFile and metadata.\n\n3.2.2.1 scores\n\nWe have seen in the previous chapter that scores are stored in a list and are available using the scores function.\n\nscores(hic)\n## List of length 2\n## names(2): count balanced\n\nhead(scores(hic, \"count\"))\n## [1] 7 92 75 61 38 43\n\nhead(scores(hic, \"balanced\"))\n## [1] 0.009657438 0.076622340 0.054101992 0.042940512 0.040905212 0.029293930\n\nExtra scores can be added to this list, e.g. to describe the “expected” interaction frequency for each interaction stored in the HiCExperiment object). This can be achieved using the scores()<- function.\n\nscores(hic, \"random\") <- runif(length(hic))\n\nscores(hic)\n## List of length 3\n## names(3): count balanced random\n\nhead(scores(hic, \"random\"))\n## [1] 0.85021122 0.60707706 0.07291116 0.18050687 0.70713349 0.68386247\n\n\n3.2.2.2 topologicalFeatures\n\nThe end-user can create additional topologicalFeatures or modify the existing ones using the topologicalFeatures()<- function.\n\ntopologicalFeatures(hic, 'CTCF') <- GRanges(c(\n \"II:340-352\", \n \"II:3520-3532\", \n \"II:7980-7992\", \n \"II:9240-9252\" \n))\ntopologicalFeatures(hic, 'CTCF')\n## GRanges object with 4 ranges and 0 metadata columns:\n## seqnames ranges strand\n## <Rle> <IRanges> <Rle>\n## [1] II 340-352 *\n## [2] II 3520-3532 *\n## [3] II 7980-7992 *\n## [4] II 9240-9252 *\n## -------\n## seqinfo: 1 sequence from an unspecified genome; no seqlengths\n\ntopologicalFeatures(hic, 'loops') <- GInteractions(\n topologicalFeatures(hic, 'CTCF')[rep(1:3, each = 3)],\n topologicalFeatures(hic, 'CTCF')[rep(1:3, 3)]\n)\ntopologicalFeatures(hic, 'loops')\n## GInteractions object with 9 interactions and 0 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2\n## <Rle> <IRanges> <Rle> <IRanges>\n## [1] II 340-352 --- II 340-352\n## [2] II 340-352 --- II 3520-3532\n## [3] II 340-352 --- II 7980-7992\n## [4] II 3520-3532 --- II 340-352\n## [5] II 3520-3532 --- II 3520-3532\n## [6] II 3520-3532 --- II 7980-7992\n## [7] II 7980-7992 --- II 340-352\n## [8] II 7980-7992 --- II 3520-3532\n## [9] II 7980-7992 --- II 7980-7992\n## -------\n## regions: 3 ranges and 0 metadata columns\n## seqinfo: 1 sequence from an unspecified genome; no seqlengths\n\nhic\n## `HiCExperiment` object with 306,212 contacts over 257 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II:300,001-813,184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 18513 \n## scores(3): count balanced random \n## topologicalFeatures: compartments(0) borders(0) loops(9) viewpoints(0) CTCF(4) \n## pairsFile: N/A \n## metadata(0):\n\nAll these objects can be used in *Overlap methods, as they all extend the GRanges class of objects.\n\n# ---- This counts the number of times `CTCF` anchors are being used in the \n# `loops` `GInteractions` object\ncountOverlaps(\n query = topologicalFeatures(hic, 'CTCF'), \n subject = topologicalFeatures(hic, 'loops')\n)\n## [1] 5 5 5 0\n\n\n3.2.2.3 pairsFile\n\nIf pairsFile is not specified when importing the ContactFile into a HiCExperiment object, one can add it later.\n\npairsf <- HiContactsData('yeast_wt', 'pairs.gz')\n\n\npairsFile(hic) <- pairsf\nhic\n## `HiCExperiment` object with 306,212 contacts over 257 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II:300,001-813,184\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 18513 \n## scores(3): count balanced random \n## topologicalFeatures: compartments(0) borders(0) loops(9) viewpoints(0) CTCF(4) \n## pairsFile: /root/.cache/R/ExperimentHub/16f6ddc5c03_7753 \n## metadata(0):\n\n\n3.2.2.4 metadata\n\nMetadata associated with a HiCExperiment can be updated at any point.\n\nmetadata(hic) <- list(\n info = \"HiCExperiment created from an example .mcool file from `HiContactsData`\", \n date = date()\n)\nmetadata(hic)\n## $info\n## [1] \"HiCExperiment created from an example .mcool file from `HiContactsData`\"\n## \n## $date\n## [1] \"Tue Nov 7 18:48:31 2023\"" }, { "objectID": "pages/parsing.html#coercing-hicexperiment-objects", "href": "pages/parsing.html#coercing-hicexperiment-objects", "title": "\n3  Manipulating Hi-C data in R\n", "section": "\n3.3 Coercing HiCExperiment objects", - "text": "3.3 Coercing HiCExperiment objects\nConvenient coercing functions exist to transform data stored as a HiCExperiment into another class.\n\n\nas.matrix(): allows to coerce the HiCExperiment into a sparse or dense matrix (using the sparse logical argument, TRUE by default) and choosing specific scores of interest (using the use.scores argument, \"balanced\" by default).\n\n\n# ----- `as.matrix` coerces a `HiCExperiment` into a `sparseMatrix` by default \nas.matrix(hic) |> class()\n## [1] \"dgTMatrix\"\n## attr(,\"package\")\n## [1] \"Matrix\"\n\nas.matrix(hic) |> dim()\n## [1] 257 257\n\n# ----- One can specify which scores should be used when coercing into a matrix\nas.matrix(hic, use.scores = \"balanced\")[1:5, 1:5]\n## 5 x 5 sparse Matrix of class \"dgTMatrix\"\n## \n## [1,] 0.009657438 0.07662234 0.05410199 0.04294051 0.04090521\n## [2,] 0.076622340 0.05128277 0.09841564 0.06926737 0.05263611\n## [3,] 0.054101992 0.09841564 0.05657589 0.08723160 0.07316890\n## [4,] 0.042940512 0.06926737 0.08723160 0.03699543 0.08403496\n## [5,] 0.040905212 0.05263611 0.07316890 0.08403496 0.04787415\n\nas.matrix(hic, use.scores = \"count\")[1:5, 1:5]\n## 5 x 5 sparse Matrix of class \"dgTMatrix\"\n## \n## [1,] 7 92 75 61 38\n## [2,] 92 102 226 163 81\n## [3,] 75 226 150 237 130\n## [4,] 61 163 237 103 153\n## [5,] 38 81 130 153 57\n\n# ----- If **expressly required**, one can coerce a HiCExperiment into a dense matrix\nas.matrix(hic, use.scores = \"count\", sparse = FALSE)[1:5, 1:5]\n## [,1] [,2] [,3] [,4] [,5]\n## [1,] 7 92 75 61 38\n## [2,] 92 102 226 163 81\n## [3,] 75 226 150 237 130\n## [4,] 61 163 237 103 153\n## [5,] 38 81 130 153 57\n\n\n\nas.data.frame(): simply coercing interactions into a rectangular data frame\n\n\nas.data.frame(hic) |> head()\n## seqnames1 start1 end1 width1 strand1 bin_id1 weight1 center1\n## 1 II 300001 302000 2000 * 266 0.03714342 301000\n## 2 II 300001 302000 2000 * 266 0.03714342 301000\n## 3 II 300001 302000 2000 * 266 0.03714342 301000\n## 4 II 300001 302000 2000 * 266 0.03714342 301000\n## 5 II 300001 302000 2000 * 266 0.03714342 301000\n## 6 II 300001 302000 2000 * 266 0.03714342 301000\n## seqnames2 start2 end2 width2 strand2 bin_id2 weight2 center2 count\n## 1 II 300001 302000 2000 * 266 0.03714342 301000 7\n## 2 II 302001 304000 2000 * 267 0.02242258 303000 92\n## 3 II 304001 306000 2000 * 268 0.01942093 305000 75\n## 4 II 306001 308000 2000 * 269 0.01895202 307000 61\n## 5 II 308001 310000 2000 * 270 0.02898098 309000 38\n## 6 II 310001 312000 2000 * 271 0.01834118 311000 43\n## balanced random\n## 1 0.009657438 0.4036088\n## 2 0.076622340 0.1918757\n## 3 0.054101992 0.6034038\n## 4 0.042940512 0.9544792\n## 5 0.040905212 0.5272511\n## 6 0.029293930 0.2514426\n\n\n\n\n\n\n\nWarning\n\n\n\nThese coercing methods only operate on interactions and scores, and discard all other information, e.g. regarding genomic regions, available resolutions, associated metadata, pairsFile or topologicalFeatures." + "text": "3.3 Coercing HiCExperiment objects\nConvenient coercing functions exist to transform data stored as a HiCExperiment into another class.\n\n\nas.matrix(): allows to coerce the HiCExperiment into a sparse or dense matrix (using the sparse logical argument, TRUE by default) and choosing specific scores of interest (using the use.scores argument, \"balanced\" by default).\n\n\n# ----- `as.matrix` coerces a `HiCExperiment` into a `sparseMatrix` by default \nas.matrix(hic) |> class()\n## [1] \"dgTMatrix\"\n## attr(,\"package\")\n## [1] \"Matrix\"\n\nas.matrix(hic) |> dim()\n## [1] 257 257\n\n# ----- One can specify which scores should be used when coercing into a matrix\nas.matrix(hic, use.scores = \"balanced\")[1:5, 1:5]\n## 5 x 5 sparse Matrix of class \"dgTMatrix\"\n## \n## [1,] 0.009657438 0.07662234 0.05410199 0.04294051 0.04090521\n## [2,] 0.076622340 0.05128277 0.09841564 0.06926737 0.05263611\n## [3,] 0.054101992 0.09841564 0.05657589 0.08723160 0.07316890\n## [4,] 0.042940512 0.06926737 0.08723160 0.03699543 0.08403496\n## [5,] 0.040905212 0.05263611 0.07316890 0.08403496 0.04787415\n\nas.matrix(hic, use.scores = \"count\")[1:5, 1:5]\n## 5 x 5 sparse Matrix of class \"dgTMatrix\"\n## \n## [1,] 7 92 75 61 38\n## [2,] 92 102 226 163 81\n## [3,] 75 226 150 237 130\n## [4,] 61 163 237 103 153\n## [5,] 38 81 130 153 57\n\n# ----- If **expressly required**, one can coerce a HiCExperiment into a dense matrix\nas.matrix(hic, use.scores = \"count\", sparse = FALSE)[1:5, 1:5]\n## [,1] [,2] [,3] [,4] [,5]\n## [1,] 7 92 75 61 38\n## [2,] 92 102 226 163 81\n## [3,] 75 226 150 237 130\n## [4,] 61 163 237 103 153\n## [5,] 38 81 130 153 57\n\n\n\nas.data.frame(): simply coercing interactions into a rectangular data frame\n\n\nas.data.frame(hic) |> head()\n## seqnames1 start1 end1 width1 strand1 bin_id1 weight1 center1\n## 1 II 300001 302000 2000 * 266 0.03714342 301000\n## 2 II 300001 302000 2000 * 266 0.03714342 301000\n## 3 II 300001 302000 2000 * 266 0.03714342 301000\n## 4 II 300001 302000 2000 * 266 0.03714342 301000\n## 5 II 300001 302000 2000 * 266 0.03714342 301000\n## 6 II 300001 302000 2000 * 266 0.03714342 301000\n## seqnames2 start2 end2 width2 strand2 bin_id2 weight2 center2 count\n## 1 II 300001 302000 2000 * 266 0.03714342 301000 7\n## 2 II 302001 304000 2000 * 267 0.02242258 303000 92\n## 3 II 304001 306000 2000 * 268 0.01942093 305000 75\n## 4 II 306001 308000 2000 * 269 0.01895202 307000 61\n## 5 II 308001 310000 2000 * 270 0.02898098 309000 38\n## 6 II 310001 312000 2000 * 271 0.01834118 311000 43\n## balanced random\n## 1 0.009657438 0.85021122\n## 2 0.076622340 0.60707706\n## 3 0.054101992 0.07291116\n## 4 0.042940512 0.18050687\n## 5 0.040905212 0.70713349\n## 6 0.029293930 0.68386247\n\n\n\n\n\n\n\nWarning\n\n\n\nThese coercing methods only operate on interactions and scores, and discard all other information, e.g. regarding genomic regions, available resolutions, associated metadata, pairsFile or topologicalFeatures." }, { "objectID": "pages/visualization.html", @@ -179,7 +179,7 @@ "href": "pages/visualization.html#advanced-visualization", "title": "\n4  Hi-C data visualization\n", "section": "\n4.3 Advanced visualization", - "text": "4.3 Advanced visualization\n\n4.3.1 Overlaying topological features\nTopological features (e.g. chromatin loops, domain borders, A/B compartments, e.g. …) are often displayed over a Hi-C heatmap.\nTo illustrate how to do this, let’s import pre-computed chromatin loops in R. These loops have been identified using chromosight (Matthey-Doret et al. (2020)) on the contact matrix which we imported interactions from.\n\nlibrary(rtracklayer)\nlibrary(InteractionSet)\nloops <- system.file('extdata', 'S288C-loops.bedpe', package = 'HiCExperiment') |> \n import() |> \n makeGInteractionsFromGRangesPairs()\nloops\n## GInteractions object with 162 interactions and 0 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2\n## <Rle> <IRanges> <Rle> <IRanges>\n## [1] I 3001-4000 --- I 29001-30000\n## [2] I 29001-30000 --- I 50001-51000\n## [3] I 95001-96000 --- I 128001-129000\n## [4] I 133001-134000 --- I 157001-158000\n## [5] II 8001-9000 --- II 46001-47000\n## ... ... ... ... ... ...\n## [158] XVI 773001-774000 --- XVI 803001-804000\n## [159] XVI 834001-835000 --- XVI 859001-860000\n## [160] XVI 860001-861000 --- XVI 884001-885000\n## [161] XVI 901001-902000 --- XVI 940001-941000\n## [162] XVI 917001-918000 --- XVI 939001-940000\n## -------\n## regions: 316 ranges and 0 metadata columns\n## seqinfo: 16 sequences from an unspecified genome; no seqlengths\n\nSimilarly, borders have also been mapped with chromosight. We can also import them in R.\n\nborders <- system.file('extdata', 'S288C-borders.bed', package = 'HiCExperiment') |> \n import()\nborders\n## GRanges object with 814 ranges and 0 metadata columns:\n## seqnames ranges strand\n## <Rle> <IRanges> <Rle>\n## [1] I 73001-74000 *\n## [2] I 108001-109000 *\n## [3] I 181001-182000 *\n## [4] II 90001-91000 *\n## [5] II 119001-120000 *\n## ... ... ... ...\n## [810] XVI 777001-778000 *\n## [811] XVI 796001-797000 *\n## [812] XVI 811001-812000 *\n## [813] XVI 890001-891000 *\n## [814] XVI 933001-934000 *\n## -------\n## seqinfo: 16 sequences from an unspecified genome; no seqlengths\n\nChromatin loops are stored in GInteractions while borders are GRanges. The former will be displayed as off-diagonal circles and the later as on-diagonal diamonds on the Hi-C heatmap.\n\nplotMatrix(hic, loops = loops, borders = borders)\n\n\n\n\n\n\n\n\n4.3.2 Aggregated Hi-C maps\nFinally, Hi-C map “snippets” (i.e. extracts) are often aggregated together to show an average signal. This analysis is sometimes referred to as APA (Aggregated Plot Analysis).\nAggregated Hi-C maps can be computed over a collection of targets using the aggregate function. These targets can be GRanges (to extract on-diagonal snippets) or GInteractions (to extract off-diagonal snippets). The flankingBins specifies how many matrix bins should be extracted on each side of the targets of interest.\nHere, we compute the aggregated Hi-C snippets of ± 15kb around each chromatin loop listed in loops.\n\nhic <- zoom(hic, 1000)\naggr_loops <- aggregate(hic, targets = loops, flankingBins = 15)\n## Going through preflight checklist...\n## Parsing the entire contact matrice as a sparse matrix...\n## Modeling distance decay...\n## Filtering for contacts within provided targets...\naggr_loops\n## `AggrHiCExperiment` object over 148 targets \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: 148 targets \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 1000 \n## interactions: 961 \n## scores(4): count balanced expected detrended \n## slices(4): count balanced expected detrended \n## topologicalFeatures: targets(148) compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) \n## pairsFile: N/A \n## metadata(0):\n\naggregate generates a AggrHiCExperiment object, a flavor of HiCExperiment class of objects.\n\n\nAggrHiCExperiment objects have an extra slices slot. This stores a list of arrays, one per scores. Each array is of 3 dimensions, x and y representing the heatmap axes, and z representing the index of the target.\n\nAggrHiCExperiment objects also have a mandatory topologicalFeatures element named targets, storing the genomic loci provided in aggregate.\n\n\nslices(aggr_loops)\n## List of length 4\n## names(4): count balanced expected detrended\ndim(slices(aggr_loops, 'count'))\n## [1] 31 31 148\ntopologicalFeatures(aggr_loops, 'targets')\n## Pairs object with 148 pairs and 0 metadata columns:\n## first second\n## <GRanges> <GRanges>\n## [1] I:14501-44500 I:35501-65500\n## [2] I:80501-110500 I:113501-143500\n## [3] I:118501-148500 I:142501-172500\n## [4] II:33501-63500 II:63501-93500\n## [5] II:134501-164500 II:159501-189500\n## ... ... ...\n## [144] XVI:586501-616500 XVI:606501-636500\n## [145] XVI:733501-763500 XVI:754501-784500\n## [146] XVI:758501-788500 XVI:788501-818500\n## [147] XVI:819501-849500 XVI:844501-874500\n## [148] XVI:845501-875500 XVI:869501-899500\n\nThe resulting AggrHiCExperiment can be plotted using the same plotMatrix function with the arguments described above.\n\nplotMatrix(\n aggr_loops, \n use.scores = 'detrended', \n scale = 'linear', \n limits = c(-1, 1), \n cmap = bgrColors()\n)" + "text": "4.3 Advanced visualization\n\n4.3.1 Overlaying topological features\nTopological features (e.g. chromatin loops, domain borders, A/B compartments, e.g. …) are often displayed over a Hi-C heatmap.\nTo illustrate how to do this, let’s import pre-computed chromatin loops in R. These loops have been identified using chromosight (Matthey-Doret et al. (2020)) on the contact matrix which we imported interactions from.\n\nlibrary(rtracklayer)\nlibrary(InteractionSet)\nloops <- system.file('extdata', 'S288C-loops.bedpe', package = 'HiCExperiment') |> \n import() |> \n makeGInteractionsFromGRangesPairs()\nloops\n## GInteractions object with 162 interactions and 0 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2\n## <Rle> <IRanges> <Rle> <IRanges>\n## [1] I 3001-4000 --- I 29001-30000\n## [2] I 29001-30000 --- I 50001-51000\n## [3] I 95001-96000 --- I 128001-129000\n## [4] I 133001-134000 --- I 157001-158000\n## [5] II 8001-9000 --- II 46001-47000\n## ... ... ... ... ... ...\n## [158] XVI 773001-774000 --- XVI 803001-804000\n## [159] XVI 834001-835000 --- XVI 859001-860000\n## [160] XVI 860001-861000 --- XVI 884001-885000\n## [161] XVI 901001-902000 --- XVI 940001-941000\n## [162] XVI 917001-918000 --- XVI 939001-940000\n## -------\n## regions: 316 ranges and 0 metadata columns\n## seqinfo: 16 sequences from an unspecified genome; no seqlengths\n\nSimilarly, borders have also been mapped with chromosight. We can also import them in R.\n\nborders <- system.file('extdata', 'S288C-borders.bed', package = 'HiCExperiment') |> \n import()\nborders\n## GRanges object with 814 ranges and 0 metadata columns:\n## seqnames ranges strand\n## <Rle> <IRanges> <Rle>\n## [1] I 73001-74000 *\n## [2] I 108001-109000 *\n## [3] I 181001-182000 *\n## [4] II 90001-91000 *\n## [5] II 119001-120000 *\n## ... ... ... ...\n## [810] XVI 777001-778000 *\n## [811] XVI 796001-797000 *\n## [812] XVI 811001-812000 *\n## [813] XVI 890001-891000 *\n## [814] XVI 933001-934000 *\n## -------\n## seqinfo: 16 sequences from an unspecified genome; no seqlengths\n\nChromatin loops are stored in GInteractions while borders are GRanges. The former will be displayed as off-diagonal circles and the later as on-diagonal diamonds on the Hi-C heatmap.\n\nplotMatrix(hic, loops = loops, borders = borders)\n\n\n\n\n\n\n\n\n4.3.2 Aggregated Hi-C maps\nFinally, Hi-C map “snippets” (i.e. extracts) are often aggregated together to show an average signal. This analysis is sometimes referred to as APA (Aggregated Plot Analysis).\nAggregated Hi-C maps can be computed over a collection of targets using the aggregate function. These targets can be GRanges (to extract on-diagonal snippets) or GInteractions (to extract off-diagonal snippets). The flankingBins specifies how many matrix bins should be extracted on each side of the targets of interest.\nHere, we compute the aggregated Hi-C snippets of ± 15kb around each chromatin loop listed in loops.\n\nhic <- zoom(hic, 1000)\naggr_loops <- aggregate(hic, targets = loops, flankingBins = 15)\n## Going through preflight checklist...\n## Parsing the entire contact matrice as a sparse matrix...\n## Modeling distance decay...\n## Filtering for contacts within provided targets...\naggr_loops\n## `AggrHiCExperiment` object over 148 targets \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: 148 targets \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 1000 \n## interactions: 961 \n## scores(4): count balanced expected detrended \n## slices(4): count balanced expected detrended \n## topologicalFeatures: targets(148) compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) \n## pairsFile: N/A \n## metadata(0):\n\naggregate generates a AggrHiCExperiment object, a flavor of HiCExperiment class of objects.\n\n\nAggrHiCExperiment objects have an extra slices slot. This stores a list of arrays, one per scores. Each array is of 3 dimensions, x and y representing the heatmap axes, and z representing the index of the target.\n\nAggrHiCExperiment objects also have a mandatory topologicalFeatures element named targets, storing the genomic loci provided in aggregate.\n\n\nslices(aggr_loops)\n## List of length 4\n## names(4): count balanced expected detrended\ndim(slices(aggr_loops, 'count'))\n## [1] 31 31 148\ntopologicalFeatures(aggr_loops, 'targets')\n## Pairs object with 148 pairs and 0 metadata columns:\n## first second\n## <GRanges> <GRanges>\n## [1] I:14501-44500 I:35501-65500\n## [2] I:80501-110500 I:113501-143500\n## [3] I:118501-148500 I:142501-172500\n## [4] II:33501-63500 II:63501-93500\n## [5] II:134501-164500 II:159501-189500\n## ... ... ...\n## [144] XVI:586501-616500 XVI:606501-636500\n## [145] XVI:733501-763500 XVI:754501-784500\n## [146] XVI:758501-788500 XVI:788501-818500\n## [147] XVI:819501-849500 XVI:844501-874500\n## [148] XVI:845501-875500 XVI:869501-899500\n\nThe resulting AggrHiCExperiment can be plotted using the same plotMatrix function with the arguments described above.\n\nplotMatrix(\n aggr_loops, \n use.scores = 'detrended', \n scale = 'linear', \n limits = c(-1, 1), \n cmap = bgrColors()\n)" }, { "objectID": "pages/matrix-centric.html", @@ -193,7 +193,7 @@ "href": "pages/matrix-centric.html#operations-in-an-individual-matrix", "title": "\n5  Matrix-centric analysis\n", "section": "\n5.1 Operations in an individual matrix", - "text": "5.1 Operations in an individual matrix\n\n5.1.1 Balancing a raw interaction count map\nHi-C sequencing coverage is systematically affected by multiple confounding factors, e.g.  density of restriction sites, GC%, genome mappability, etc.. Overall, it generally ends up not homogenous throughout the entire genome and this leads to artifacts in un-normalized count matrices.\nTo correct for sequencing coverage heterogeneity of raw count maps, Hi-C data can be normalized using matrix balancing approaches (Cournac et al. (2012), Imakaev et al. (2012)). This is generally done directly on the disk-stored matrices using out-of-memory strategies (e.g. with cooler balance <.cool>). However, if contact matrix files are imported into a HiCExperiment object but no balanced scores are available, in-memory balancing can be performed using the normalize function. This adds an extra ICE element in scores list (while the interactions themselves are unmodified).\n\nnormalized_hic <- normalize(hic)\nnormalized_hic\n## `HiCExperiment` object with 471,364 contacts over 407 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 34063 \n## scores(3): count balanced ICE \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) \n## pairsFile: N/A \n## metadata(0):\n\nIt is possible to plot the different scores of the resulting object to visualize the newly computed scores. In this example, ICE scores should be nearly identical to balanced scores, which were originally imported from the disk-stored contact matrix.\n\n\npatchwork::wrap_plots(\n plotMatrix(normalized_hic, use.scores = 'count', caption = FALSE),\n plotMatrix(normalized_hic, use.scores = 'balanced', caption = FALSE),\n plotMatrix(normalized_hic, use.scores = 'ICE', caption = FALSE), \n nrow = 1\n)\n\n\n\n\n\n\n\n\n\n5.1.2 Computing observed/expected (O/E) map\nThe most prominent feature of a balanced Hi-C matrix is the strong main diagonal. This main diagonal is observed because interactions between immediate adjacent genomic loci are more prone to happen than interactions spanning longer genomic distances. This “expected” behavior is due to the polymer nature of the chromosomes being studied, and can be locally estimated using the distance-dependent interaction frequency (a.k.a. the “distance law”, or P(s)). It can be used to compute an expected matrix on interactions.\nWhen it is desirable to “mask” this polymer behavior to emphasize topological structures formed by chromosomes, one can divide a given balanced matrix by its expected matrix, i.e. calculate the observed/expected (O/E) map. This is sometimes called “detrending”, as it effectively removes the average polymer behavior from the balanced matrix.\nThe detrend function performs this operation on a given HiCExperiment object. It adds two extra elements in scores list: expected and detrended metrics (while the interactions themselves are unmodified).\n\ndetrended_hic <- detrend(hic)\ndetrended_hic\n## `HiCExperiment` object with 471,364 contacts over 407 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 34063 \n## scores(4): count balanced expected detrended \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) \n## pairsFile: N/A \n## metadata(0):\n\nTopological features will be visually more prominent in the O/E detrended Hi-C map.\n\n\npatchwork::wrap_plots(\n plotMatrix(detrended_hic, use.scores = 'balanced', scale = 'log10', limits = c(-3.5, -1.2), caption = FALSE),\n plotMatrix(detrended_hic, use.scores = 'expected', scale = 'log10', limits = c(-3.5, -1.2), caption = FALSE),\n plotMatrix(detrended_hic, use.scores = 'detrended', scale = 'linear', limits = c(-1, 1), cmap = bwrColors(), caption = FALSE), \n nrow = 1\n)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nScale for detrended scores\n\n\n\n\n\nexpected scores are in linear scale and ± in the same amplitude than balanced scores;\n\ndetrended scores are in log2 scale, in general approximately centered around 0. When plotting detrended scores, scale = linear should be set to prevent the default log10 scaling.\n\n\n\n\n5.1.3 Computing autocorrelated map\nCorrelation matrices are often calculated from balanced Hi-C matrices. For instance, in genomes composed of eu- and heterochromatin, a correlation matrix can be used to reveal a checkerboard pattern emphasizing the segregation of chromatin into two A/B compartments (Lieberman-Aiden et al. (2009)).\nThe autocorrelate function is used to compute a correlation matrix of a HiCExperiment object. For each pair of interacting loci, the autocorrelated score represents the correlation between their respective interaction profiles with the rest of the genome.\n\nautocorr_hic <- autocorrelate(hic)\n## \nautocorr_hic\n## `HiCExperiment` object with 471,364 contacts over 407 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 34063 \n## scores(5): count balanced expected detrended autocorrelated \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) \n## pairsFile: N/A \n## metadata(0):\n\nSince these metrics represent correlation scores, they range between -1 and 1. Two loci with an autocorrelated score close to -1 have anti-correlated interaction profiles, while two loci with a autocorrelated score close to 1 are likely to interact with shared targets.\n\nsummary(scores(autocorr_hic, 'autocorrelated'))\n## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's \n## -0.4156 0.0025 0.0504 0.0645 0.1036 1.0000 564\n\nCorrelated and anti-correlated loci will be visually represented in the autocorrelated Hi-C map in red and blue pixels, respectively.\n\n\n\n\n\n\nNote\n\n\n\nHere we have illustrated how to compute an autocorrelation matrix from a HiCExperiment object using the example yeast Hi-C experiment. Bear in mind that this is unusual and not very useful, as yeast chromatin is not segregated in two compartments but rather follows a Rabl conformation (Duan et al. (2010)). An example of autocorrelation map from a vertebrate Hi-C experiment (for which chromatin is segregated in A/B compartments) is shown in Chapter 10.\n\n\n\nplotMatrix(\n autocorr_hic, \n use.scores = 'autocorrelated', \n scale = 'linear', \n limits = c(-0.4, 0.4), \n cmap = bgrColors()\n)\n\n\n\n\n\n\n\n\n\n\n\n\n\nScale for autocorrelated scores\n\n\n\n\n\nautocorrelated scores are in linear scale, in general approximately centered around 0. When plotting autocorrelated scores, scale = linear should be set to prevent the default log10 scaling.\n\nlimits should be manually set to c(-x, x) (0 < x <= 1) to ensure that the color range is effectively centered on 0.\n\n\n\n\n5.1.4 Despeckling (smoothing out) a contact map\nShallow-sequenced Hi-C libraries or matrices binned with an overly small bin size sometimes produce “grainy” Hi-C maps with noisy backgrounds. A grainy map may also be obtained when dividing two matrices, e.g. when computing the O/E ratio with detrend. This is particularly true for sparser long-range interactions. To overcome such limitations, HiCExperiment objects can be “despeckled” to smooth out focal speckles.\n\nhic2 <- detrend(hic['II:400000-700000'])\nhic2 <- despeckle(hic2, use.scores = 'detrended', focal.size = 2)\nhic2\n## `HiCExperiment` object with 168,785 contacts over 150 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II:400,000-700,000\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 11325 \n## scores(5): count balanced expected detrended detrended.despeckled \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) \n## pairsFile: N/A \n## metadata(0):\n\nThe added <use.scores>.despeckled scores correspond to scores averaged using a window, whose width is provided with the focal.size argument. This results in a smoother Hi-C heatmap, effectively removing the “speckles” observed at longer range.\n\n\nlibrary(InteractionSet)\nloops <- system.file('extdata', 'S288C-loops.bedpe', package = 'HiCExperiment') |> \n import() |> \n makeGInteractionsFromGRangesPairs()\nborders <- system.file('extdata', 'S288C-borders.bed', package = 'HiCExperiment') |> \n import()\npatchwork::wrap_plots(\n plotMatrix(hic2, caption = FALSE),\n plotMatrix(hic2, use.scores = 'detrended', scale = 'linear', limits = c(-1, 1), caption = FALSE),\n plotMatrix(\n hic2, \n use.scores = 'detrended.despeckled', \n scale = 'linear', \n limits = c(-1, 1), \n caption = FALSE, \n loops = loops, \n borders = borders\n ),\n nrow = 1\n)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nScale for despeckled scores\n\n\n\ndespeckled scores are in the same scale than the scores they were computed from." + "text": "5.1 Operations in an individual matrix\n\n5.1.1 Balancing a raw interaction count map\nHi-C sequencing coverage is systematically affected by multiple confounding factors, e.g.  density of restriction sites, GC%, genome mappability, etc.. Overall, it generally ends up not homogenous throughout the entire genome and this leads to artifacts in un-normalized count matrices.\nTo correct for sequencing coverage heterogeneity of raw count maps, Hi-C data can be normalized using matrix balancing approaches (Cournac et al. (2012), Imakaev et al. (2012)). This is generally done directly on the disk-stored matrices using out-of-memory strategies (e.g. with cooler balance <.cool>). However, if contact matrix files are imported into a HiCExperiment object but no balanced scores are available, in-memory balancing can be performed using the normalize function. This adds an extra ICE element in scores list (while the interactions themselves are unmodified).\n\nnormalized_hic <- normalize(hic)\nnormalized_hic\n## `HiCExperiment` object with 471,364 contacts over 407 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 34063 \n## scores(3): count balanced ICE \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) \n## pairsFile: N/A \n## metadata(0):\n\nIt is possible to plot the different scores of the resulting object to visualize the newly computed scores. In this example, ICE scores should be nearly identical to balanced scores, which were originally imported from the disk-stored contact matrix.\n\n\npatchwork::wrap_plots(\n plotMatrix(normalized_hic, use.scores = 'count', caption = FALSE),\n plotMatrix(normalized_hic, use.scores = 'balanced', caption = FALSE),\n plotMatrix(normalized_hic, use.scores = 'ICE', caption = FALSE), \n nrow = 1\n)\n\n\n\n\n\n\n\n\n\n5.1.2 Computing observed/expected (O/E) map\nThe most prominent feature of a balanced Hi-C matrix is the strong main diagonal. This main diagonal is observed because interactions between immediate adjacent genomic loci are more prone to happen than interactions spanning longer genomic distances. This “expected” behavior is due to the polymer nature of the chromosomes being studied, and can be locally estimated using the distance-dependent interaction frequency (a.k.a. the “distance law”, or P(s)). It can be used to compute an expected matrix on interactions.\nWhen it is desirable to “mask” this polymer behavior to emphasize topological structures formed by chromosomes, one can divide a given balanced matrix by its expected matrix, i.e. calculate the observed/expected (O/E) map. This is sometimes called “detrending”, as it effectively removes the average polymer behavior from the balanced matrix.\nThe detrend function performs this operation on a given HiCExperiment object. It adds two extra elements in scores list: expected and detrended metrics (while the interactions themselves are unmodified).\n\ndetrended_hic <- detrend(hic)\ndetrended_hic\n## `HiCExperiment` object with 471,364 contacts over 407 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 34063 \n## scores(4): count balanced expected detrended \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) \n## pairsFile: N/A \n## metadata(0):\n\nTopological features will be visually more prominent in the O/E detrended Hi-C map.\n\n\npatchwork::wrap_plots(\n plotMatrix(detrended_hic, use.scores = 'balanced', scale = 'log10', limits = c(-3.5, -1.2), caption = FALSE),\n plotMatrix(detrended_hic, use.scores = 'expected', scale = 'log10', limits = c(-3.5, -1.2), caption = FALSE),\n plotMatrix(detrended_hic, use.scores = 'detrended', scale = 'linear', limits = c(-1, 1), cmap = bwrColors(), caption = FALSE), \n nrow = 1\n)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nScale for detrended scores\n\n\n\n\n\nexpected scores are in linear scale and ± in the same amplitude than balanced scores;\n\ndetrended scores are in log2 scale, in general approximately centered around 0. When plotting detrended scores, scale = linear should be set to prevent the default log10 scaling.\n\n\n\n\n5.1.3 Computing autocorrelated map\nCorrelation matrices are often calculated from balanced Hi-C matrices. For instance, in genomes composed of eu- and heterochromatin, a correlation matrix can be used to reveal a checkerboard pattern emphasizing the segregation of chromatin into two A/B compartments (Lieberman-Aiden et al. (2009)).\nThe autocorrelate function is used to compute a correlation matrix of a HiCExperiment object. For each pair of interacting loci, the autocorrelated score represents the correlation between their respective interaction profiles with the rest of the genome.\n\nautocorr_hic <- autocorrelate(hic)\n## \nautocorr_hic\n## `HiCExperiment` object with 471,364 contacts over 407 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 34063 \n## scores(5): count balanced expected detrended autocorrelated \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) \n## pairsFile: N/A \n## metadata(0):\n\nSince these metrics represent correlation scores, they range between -1 and 1. Two loci with an autocorrelated score close to -1 have anti-correlated interaction profiles, while two loci with a autocorrelated score close to 1 are likely to interact with shared targets.\n\nsummary(scores(autocorr_hic, 'autocorrelated'))\n## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's \n## -0.4156 0.0025 0.0504 0.0645 0.1036 1.0000 564\n\nCorrelated and anti-correlated loci will be visually represented in the autocorrelated Hi-C map in red and blue pixels, respectively.\n\n\n\n\n\n\nNote\n\n\n\nHere we have illustrated how to compute an autocorrelation matrix from a HiCExperiment object using the example yeast Hi-C experiment. Bear in mind that this is unusual and not very useful, as yeast chromatin is not segregated in two compartments but rather follows a Rabl conformation (Duan et al. (2010)). An example of autocorrelation map from a vertebrate Hi-C experiment (for which chromatin is segregated in A/B compartments) is shown in Chapter 10.\n\n\n\nplotMatrix(\n autocorr_hic, \n use.scores = 'autocorrelated', \n scale = 'linear', \n limits = c(-0.4, 0.4), \n cmap = bgrColors()\n)\n\n\n\n\n\n\n\n\n\n\n\n\n\nScale for autocorrelated scores\n\n\n\n\n\nautocorrelated scores are in linear scale, in general approximately centered around 0. When plotting autocorrelated scores, scale = linear should be set to prevent the default log10 scaling.\n\nlimits should be manually set to c(-x, x) (0 < x <= 1) to ensure that the color range is effectively centered on 0.\n\n\n\n\n5.1.4 Despeckling (smoothing out) a contact map\nShallow-sequenced Hi-C libraries or matrices binned with an overly small bin size sometimes produce “grainy” Hi-C maps with noisy backgrounds. A grainy map may also be obtained when dividing two matrices, e.g. when computing the O/E ratio with detrend. This is particularly true for sparser long-range interactions. To overcome such limitations, HiCExperiment objects can be “despeckled” to smooth out focal speckles.\n\nhic2 <- detrend(hic['II:400000-700000'])\nhic2 <- despeckle(hic2, use.scores = 'detrended', focal.size = 2)\nhic2\n## `HiCExperiment` object with 168,785 contacts over 150 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II:400,000-700,000\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 2000 \n## interactions: 11325 \n## scores(5): count balanced expected detrended detrended.despeckled \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) centromeres(16) \n## pairsFile: N/A \n## metadata(0):\n\nThe added <use.scores>.despeckled scores correspond to scores averaged using a window, whose width is provided with the focal.size argument. This results in a smoother Hi-C heatmap, effectively removing the “speckles” observed at longer range.\n\n\nlibrary(InteractionSet)\nloops <- system.file('extdata', 'S288C-loops.bedpe', package = 'HiCExperiment') |> \n import() |> \n makeGInteractionsFromGRangesPairs()\nborders <- system.file('extdata', 'S288C-borders.bed', package = 'HiCExperiment') |> \n import()\npatchwork::wrap_plots(\n plotMatrix(hic2, caption = FALSE),\n plotMatrix(hic2, use.scores = 'detrended', scale = 'linear', limits = c(-1, 1), caption = FALSE),\n plotMatrix(\n hic2, \n use.scores = 'detrended.despeckled', \n scale = 'linear', \n limits = c(-1, 1), \n caption = FALSE, \n loops = loops, \n borders = borders\n ),\n nrow = 1\n)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nScale for despeckled scores\n\n\n\ndespeckled scores are in the same scale than the scores they were computed from." }, { "objectID": "pages/matrix-centric.html#operations-between-multiple-matrices", @@ -214,7 +214,7 @@ "href": "pages/interactions-centric.html#distance-laws", "title": "\n6  Interactions-centric analysis\n", "section": "\n6.1 Distance law(s)", - "text": "6.1 Distance law(s)\n\n6.1.1 P(s) from a single .pairs file\nDistance laws are generally computed directly from .pairs files. This is because the .pairs files are at 1-bp resolution whereas the contact matrices (for example from .cool files) are binned at a minimum resolution.\nAn example .pairs file can be fetched from the ExperimentHub database using the HiContactsData package.\n\nlibrary(HiCExperiment)\nlibrary(HiContactsData)\npairsf <- HiContactsData('yeast_wt', 'pairs.gz')\npf <- PairsFile(pairsf)\n\n\npf\n## PairsFile object\n## resource: /root/.cache/R/ExperimentHub/174733eb553_7753\n\nIf needed, PairsFile connections can be imported directly into a GInteractions object with import().\n\nimport(pf)\n## GInteractions object with 471364 interactions and 3 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | frag1 frag2\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric> <numeric>\n## [1] II 105 --- II 48548 | 1358 1681\n## [2] II 113 --- II 45003 | 1358 1658\n## [3] II 119 --- II 687251 | 1358 5550\n## [4] II 160 --- II 26124 | 1358 1510\n## [5] II 169 --- II 39052 | 1358 1613\n## ... ... ... ... ... ... . ... ...\n## [471360] II 808605 --- II 809683 | 6316 6320\n## [471361] II 808609 --- II 809917 | 6316 6324\n## [471362] II 808617 --- II 809506 | 6316 6319\n## [471363] II 809447 --- II 809685 | 6319 6321\n## [471364] II 809472 --- II 809675 | 6319 6320\n## distance\n## <integer>\n## [1] 48443\n## [2] 44890\n## [3] 687132\n## [4] 25964\n## [5] 38883\n## ... ...\n## [471360] 1078\n## [471361] 1308\n## [471362] 889\n## [471363] 238\n## [471364] 203\n## -------\n## regions: 549331 ranges and 0 metadata columns\n## seqinfo: 1 sequence from an unspecified genome; no seqlengths\n\nWe can compute a P(s) per chromosome from this .pairs file using the distanceLaw function.\n\nlibrary(HiContacts)\nps <- distanceLaw(pf, by_chr = TRUE) \n## Importing pairs file /root/.cache/R/ExperimentHub/174733eb553_7753 in memory. This may take a while...\nps\n## # A tibble: 115 × 6\n## chr binned_distance p norm_p norm_p_unity slope\n## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>\n## 1 II 14 0.00000212 0.00000106 2.27 0 \n## 2 II 16 0.0000170 0.0000170 36.4 1.56\n## 3 II 17 0.0000361 0.0000180 38.6 1.55\n## 4 II 19 0.0000424 0.0000212 45.5 1.55\n## 5 II 21 0.0000467 0.0000233 50.0 1.54\n## 6 II 23 0.0000870 0.0000290 62.1 1.53\n## # ℹ 109 more rows\n\nThe plotPs() and plotPsSlope() functions are convenient ggplot2-based functions with pre-configured settings optimized for P(s) visualization.\n\nlibrary(ggplot2)\nplotPs(ps, aes(x = binned_distance, y = norm_p, color = chr))\n## Warning: Removed 67 rows containing missing values (`geom_line()`).\n\n\n\n\n\n\nplotPsSlope(ps, aes(x = binned_distance, y = slope, color = chr))\n## Warning: Removed 67 rows containing missing values (`geom_line()`).\n\n\n\n\n\n\n\n\n6.1.2 P(s) for multiple .pairs files\nLet’s first import a second example dataset. We’ll import pairs identified in a eco1 yeast mutant.\n\neco1_pairsf <- HiContactsData('yeast_eco1', 'pairs.gz')\neco1_pf <- PairsFile(eco1_pairsf)\n\n\neco1_ps <- distanceLaw(eco1_pf, by_chr = TRUE) \n## Importing pairs file /root/.cache/R/ExperimentHub/fae309a2f91_7755 in memory. This may take a while...\neco1_ps\n## # A tibble: 115 × 6\n## chr binned_distance p norm_p norm_p_unity slope\n## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>\n## 1 II 14 0.00000201 0.00000100 0.660 0 \n## 2 II 16 0.0000221 0.0000221 14.5 1.46\n## 3 II 17 0.0000492 0.0000246 16.2 1.46\n## 4 II 19 0.0000412 0.0000206 13.5 1.45\n## 5 II 21 0.0000653 0.0000326 21.5 1.45\n## 6 II 23 0.0000803 0.0000268 17.6 1.44\n## # ℹ 109 more rows\n\nA little data wrangling can help plotting the distance laws for 2 different samples in the same plot.\n\nlibrary(dplyr)\nmerged_ps <- rbind(\n ps |> mutate(sample = 'WT'), \n eco1_ps |> mutate(sample = 'eco1')\n)\nplotPs(merged_ps, aes(x = binned_distance, y = norm_p, color = sample, linetype = chr)) + \n scale_color_manual(values = c('#c6c6c6', '#ca0000'))\n## Warning: Removed 134 rows containing missing values (`geom_line()`).\n\n\n\n\n\n\nplotPsSlope(merged_ps, aes(x = binned_distance, y = slope, color = sample, linetype = chr)) + \n scale_color_manual(values = c('#c6c6c6', '#ca0000'))\n## Warning: Removed 135 rows containing missing values (`geom_line()`).\n\n\n\n\n\n\n\n\n6.1.3 P(s) from HiCExperiment objects\nAlternatively, distance laws can be computed from binned matrices directly by providing HiCExperiment objects. For deeply sequenced datasets, this can be significantly faster than when using original .pairs files, but the smoothness of the resulting curves will be greatly impacted, notably at short distances.\n\nps_from_hic <- distanceLaw(hic, by_chr = TRUE) \n## pairsFile not specified. The P(s) curve will be an approximation.\nplotPs(ps_from_hic, aes(x = binned_distance, y = norm_p))\n## Warning: Removed 9 rows containing missing values (`geom_line()`).\n\n\n\n\n\n\nplotPsSlope(ps_from_hic, aes(x = binned_distance, y = slope))\n## Warning: Removed 8 rows containing missing values (`geom_line()`)." + "text": "6.1 Distance law(s)\n\n6.1.1 P(s) from a single .pairs file\nDistance laws are generally computed directly from .pairs files. This is because the .pairs files are at 1-bp resolution whereas the contact matrices (for example from .cool files) are binned at a minimum resolution.\nAn example .pairs file can be fetched from the ExperimentHub database using the HiContactsData package.\n\nlibrary(HiCExperiment)\nlibrary(HiContactsData)\npairsf <- HiContactsData('yeast_wt', 'pairs.gz')\npf <- PairsFile(pairsf)\n\n\npf\n## PairsFile object\n## resource: /root/.cache/R/ExperimentHub/16f6ddc5c03_7753\n\nIf needed, PairsFile connections can be imported directly into a GInteractions object with import().\n\nimport(pf)\n## GInteractions object with 471364 interactions and 3 metadata columns:\n## seqnames1 ranges1 seqnames2 ranges2 | frag1 frag2\n## <Rle> <IRanges> <Rle> <IRanges> | <numeric> <numeric>\n## [1] II 105 --- II 48548 | 1358 1681\n## [2] II 113 --- II 45003 | 1358 1658\n## [3] II 119 --- II 687251 | 1358 5550\n## [4] II 160 --- II 26124 | 1358 1510\n## [5] II 169 --- II 39052 | 1358 1613\n## ... ... ... ... ... ... . ... ...\n## [471360] II 808605 --- II 809683 | 6316 6320\n## [471361] II 808609 --- II 809917 | 6316 6324\n## [471362] II 808617 --- II 809506 | 6316 6319\n## [471363] II 809447 --- II 809685 | 6319 6321\n## [471364] II 809472 --- II 809675 | 6319 6320\n## distance\n## <integer>\n## [1] 48443\n## [2] 44890\n## [3] 687132\n## [4] 25964\n## [5] 38883\n## ... ...\n## [471360] 1078\n## [471361] 1308\n## [471362] 889\n## [471363] 238\n## [471364] 203\n## -------\n## regions: 549331 ranges and 0 metadata columns\n## seqinfo: 1 sequence from an unspecified genome; no seqlengths\n\nWe can compute a P(s) per chromosome from this .pairs file using the distanceLaw function.\n\nlibrary(HiContacts)\nps <- distanceLaw(pf, by_chr = TRUE) \n## Importing pairs file /root/.cache/R/ExperimentHub/16f6ddc5c03_7753 in memory. This may take a while...\nps\n## # A tibble: 115 × 6\n## chr binned_distance p norm_p norm_p_unity slope\n## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>\n## 1 II 14 0.00000212 0.00000106 2.27 0 \n## 2 II 16 0.0000170 0.0000170 36.4 1.56\n## 3 II 17 0.0000361 0.0000180 38.6 1.55\n## 4 II 19 0.0000424 0.0000212 45.5 1.55\n## 5 II 21 0.0000467 0.0000233 50.0 1.54\n## 6 II 23 0.0000870 0.0000290 62.1 1.53\n## # ℹ 109 more rows\n\nThe plotPs() and plotPsSlope() functions are convenient ggplot2-based functions with pre-configured settings optimized for P(s) visualization.\n\nlibrary(ggplot2)\nplotPs(ps, aes(x = binned_distance, y = norm_p, color = chr))\n## Warning: Removed 67 rows containing missing values (`geom_line()`).\n\n\n\n\n\n\nplotPsSlope(ps, aes(x = binned_distance, y = slope, color = chr))\n## Warning: Removed 67 rows containing missing values (`geom_line()`).\n\n\n\n\n\n\n\n\n6.1.2 P(s) for multiple .pairs files\nLet’s first import a second example dataset. We’ll import pairs identified in a eco1 yeast mutant.\n\neco1_pairsf <- HiContactsData('yeast_eco1', 'pairs.gz')\neco1_pf <- PairsFile(eco1_pairsf)\n\n\neco1_ps <- distanceLaw(eco1_pf, by_chr = TRUE) \n## Importing pairs file /root/.cache/R/ExperimentHub/f90656c7e8e_7755 in memory. This may take a while...\neco1_ps\n## # A tibble: 115 × 6\n## chr binned_distance p norm_p norm_p_unity slope\n## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>\n## 1 II 14 0.00000201 0.00000100 0.660 0 \n## 2 II 16 0.0000221 0.0000221 14.5 1.46\n## 3 II 17 0.0000492 0.0000246 16.2 1.46\n## 4 II 19 0.0000412 0.0000206 13.5 1.45\n## 5 II 21 0.0000653 0.0000326 21.5 1.45\n## 6 II 23 0.0000803 0.0000268 17.6 1.44\n## # ℹ 109 more rows\n\nA little data wrangling can help plotting the distance laws for 2 different samples in the same plot.\n\nlibrary(dplyr)\nmerged_ps <- rbind(\n ps |> mutate(sample = 'WT'), \n eco1_ps |> mutate(sample = 'eco1')\n)\nplotPs(merged_ps, aes(x = binned_distance, y = norm_p, color = sample, linetype = chr)) + \n scale_color_manual(values = c('#c6c6c6', '#ca0000'))\n## Warning: Removed 134 rows containing missing values (`geom_line()`).\n\n\n\n\n\n\nplotPsSlope(merged_ps, aes(x = binned_distance, y = slope, color = sample, linetype = chr)) + \n scale_color_manual(values = c('#c6c6c6', '#ca0000'))\n## Warning: Removed 135 rows containing missing values (`geom_line()`).\n\n\n\n\n\n\n\n\n6.1.3 P(s) from HiCExperiment objects\nAlternatively, distance laws can be computed from binned matrices directly by providing HiCExperiment objects. For deeply sequenced datasets, this can be significantly faster than when using original .pairs files, but the smoothness of the resulting curves will be greatly impacted, notably at short distances.\n\nps_from_hic <- distanceLaw(hic, by_chr = TRUE) \n## pairsFile not specified. The P(s) curve will be an approximation.\nplotPs(ps_from_hic, aes(x = binned_distance, y = norm_p))\n## Warning: Removed 9 rows containing missing values (`geom_line()`).\n\n\n\n\n\n\nplotPsSlope(ps_from_hic, aes(x = binned_distance, y = slope))\n## Warning: Removed 8 rows containing missing values (`geom_line()`)." }, { "objectID": "pages/interactions-centric.html#cistrans-ratios", @@ -235,7 +235,7 @@ "href": "pages/interactions-centric.html#scalograms", "title": "\n6  Interactions-centric analysis\n", "section": "\n6.4 Scalograms", - "text": "6.4 Scalograms\nScalograms were introduced in Lioy et al. (2018) to investigate distance-dependent contact frequencies for individual genomic bins along chromosomes.\nTo generate a scalogram, one needs to provide a HiCExperiment object with a valid associated pairsFile.\n\npairsFile(hic) <- pairsf\nscalo <- scalogram(hic) \n## Importing pairs file /root/.cache/R/ExperimentHub/174733eb553_7753 in memory. This may take a while...\nplotScalogram(scalo |> filter(chr == 'II'), ylim = c(1e3, 1e5))\n\n\n\n\n\n\n\nSeveral scalograms can be plotted together to compare distance-dependent contact frequencies along a given chromosome in different samples.\n\n\neco1_hic <- import(\n CoolFile(HiContactsData('yeast_eco1', 'mcool')), \n focus = 'II', \n resolution = 2000\n)\n## see ?HiContactsData and browseVignettes('HiContactsData') for documentation\n## loading from cache\neco1_pairsf <- HiContactsData('yeast_eco1', 'pairs.gz')\n## see ?HiContactsData and browseVignettes('HiContactsData') for documentation\n## loading from cache\npairsFile(eco1_hic) <- eco1_pairsf\neco1_scalo <- scalogram(eco1_hic) \n## Importing pairs file /root/.cache/R/ExperimentHub/fae309a2f91_7755 in memory. This may take a while...\nmerged_scalo <- rbind(\n scalo |> mutate(sample = 'WT'), \n eco1_scalo |> mutate(sample = 'eco1')\n)\nplotScalogram(merged_scalo |> filter(chr == 'II'), ylim = c(1e3, 1e5)) + \n facet_grid(~sample)\n\n\n\n\n\n\n\n\nThis example points out the overall longer interactions within the long arm of the chromosome II in an eco1 mutant." + "text": "6.4 Scalograms\nScalograms were introduced in Lioy et al. (2018) to investigate distance-dependent contact frequencies for individual genomic bins along chromosomes.\nTo generate a scalogram, one needs to provide a HiCExperiment object with a valid associated pairsFile.\n\npairsFile(hic) <- pairsf\nscalo <- scalogram(hic) \n## Importing pairs file /root/.cache/R/ExperimentHub/16f6ddc5c03_7753 in memory. This may take a while...\nplotScalogram(scalo |> filter(chr == 'II'), ylim = c(1e3, 1e5))\n\n\n\n\n\n\n\nSeveral scalograms can be plotted together to compare distance-dependent contact frequencies along a given chromosome in different samples.\n\n\neco1_hic <- import(\n CoolFile(HiContactsData('yeast_eco1', 'mcool')), \n focus = 'II', \n resolution = 2000\n)\n## see ?HiContactsData and browseVignettes('HiContactsData') for documentation\n## loading from cache\neco1_pairsf <- HiContactsData('yeast_eco1', 'pairs.gz')\n## see ?HiContactsData and browseVignettes('HiContactsData') for documentation\n## loading from cache\npairsFile(eco1_hic) <- eco1_pairsf\neco1_scalo <- scalogram(eco1_hic) \n## Importing pairs file /root/.cache/R/ExperimentHub/f90656c7e8e_7755 in memory. This may take a while...\nmerged_scalo <- rbind(\n scalo |> mutate(sample = 'WT'), \n eco1_scalo |> mutate(sample = 'eco1')\n)\nplotScalogram(merged_scalo |> filter(chr == 'II'), ylim = c(1e3, 1e5)) + \n facet_grid(~sample)\n\n\n\n\n\n\n\n\nThis example points out the overall longer interactions within the long arm of the chromosome II in an eco1 mutant." }, { "objectID": "pages/topological-features.html", @@ -249,14 +249,14 @@ "href": "pages/topological-features.html#chromosome-compartments", "title": "\n7  Finding topological features in Hi-C\n", "section": "\n7.1 Chromosome compartments", - "text": "7.1 Chromosome compartments\nChromosome compartments refer to the segregation of the chromatin into active euchromatin (A compartments) and regulated heterochromatin (B compartment).\n\n7.1.1 Importing Hi-C data\nTo investigate chromosome compartments, we will fetch a contact matrix generated from a micro-C experiment (from Krietenstein et al. (2020)). A subset of the genome-wide dataset is provided in the OHCA package. It contains intra-chromosomal interactions within chr17, binned at 5000, 100000 and 250000 bp.\n\nlibrary(HiCExperiment)\nlibrary(OHCA)\ncf <- fs::path_package('OHCA', 'extdata', 'chr17.mcool')\nmicroC <- import(cf, resolution = 250000)\nmicroC\n## `HiCExperiment` object with 10,086,710 contacts over 334 regions \n## -------\n## fileName: \"/tmp/RtmpVQDQnT/Rinst55b2b1d77/OHCA/extdata/chr17.mcool\" \n## focus: \"whole genome\" \n## resolutions(3): 5000 100000 250000\n## active resolution: 250000 \n## interactions: 52755 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\nseqinfo(microC)\n## Seqinfo object with 1 sequence from an unspecified genome:\n## seqnames seqlengths isCircular genome\n## chr17 83257441 NA <NA>\n\n\n7.1.2 Annotating A/B compartments\nThe consensus approach to annotate A/B compartments is to compute the eigenvectors of a Hi-C contact matrix and identify the eigenvector representing the chromosome-wide bi-partite segmentation of the genome.\nThe getCompartments() function performs several internal operations to achieve this:\n\nObtains cis interactions per chromosome\nComputes O/E contact matrix scores\nComputes 3 first eigenvectors of this Hi-C contact matrix\nNormalizes eigenvectors\nPicks the eigenvector that has the greatest absolute correlation with a phasing track (e.g. a GC% track automatically computed from a genome reference sequence, or a gene density track)\nSigns this eigenvector so that positive values represent the A compartment\n\n\nphasing_track <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38\nmicroC_compts <- getCompartments(microC, genome = phasing_track)\n## Going through preflight checklist...\n## Parsing intra-chromosomal contacts for each chromosome...\n## Computing eigenvectors for each chromosome...\n\nmicroC_compts\n## `HiCExperiment` object with 10,086,710 contacts over 334 regions \n## -------\n## fileName: \"/tmp/RtmpVQDQnT/Rinst55b2b1d77/OHCA/extdata/chr17.mcool\" \n## focus: \"whole genome\" \n## resolutions(3): 5000 100000 250000\n## active resolution: 250000 \n## interactions: 52755 \n## scores(2): count balanced \n## topologicalFeatures: compartments(41) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(1): eigens\n\ngetCompartments() is an endomorphism: it returns the original object, enriched with two new pieces of information:\n\nA compartments topologicalFeatures:\n\n\ntopologicalFeatures(microC_compts, \"compartments\")\n## GRanges object with 41 ranges and 1 metadata column:\n## seqnames ranges strand | compartment\n## <Rle> <IRanges> <Rle> | <character>\n## [1] chr17 250001-3000000 * | A\n## [2] chr17 3000001-3500000 * | B\n## [3] chr17 3500001-5500000 * | A\n## [4] chr17 5500001-6500000 * | B\n## [5] chr17 6500001-8500000 * | A\n## ... ... ... ... . ...\n## [37] chr17 72750001-73250000 * | A\n## [38] chr17 73250001-74750000 * | B\n## [39] chr17 74750001-79250000 * | A\n## [40] chr17 79250001-79750000 * | B\n## [41] chr17 79750001-83250000 * | A\n## -------\n## seqinfo: 1 sequence from an unspecified genome\n\n\nThe calculated eigenvectors stored in metadata:\n\n\nmetadata(microC_compts)$eigens\n## GRanges object with 334 ranges and 9 metadata columns:\n## seqnames ranges strand |\n## <Rle> <IRanges> <Rle> |\n## chr17.chr17_1_250000 chr17 1-250000 * |\n## chr17.chr17_250001_500000 chr17 250001-500000 * |\n## chr17.chr17_500001_750000 chr17 500001-750000 * |\n## chr17.chr17_750001_1000000 chr17 750001-1000000 * |\n## chr17.chr17_1000001_1250000 chr17 1000001-1250000 * |\n## ... ... ... ... .\n## chr17.chr17_82250001_82500000 chr17 82250001-82500000 * |\n## chr17.chr17_82500001_82750000 chr17 82500001-82750000 * |\n## chr17.chr17_82750001_83000000 chr17 82750001-83000000 * |\n## chr17.chr17_83000001_83250000 chr17 83000001-83250000 * |\n## chr17.chr17_83250001_83257441 chr17 83250001-83257441 * |\n## bin_id weight chr center\n## <numeric> <numeric> <Rle> <integer>\n## chr17.chr17_1_250000 0 NaN chr17 125000\n## chr17.chr17_250001_500000 1 0.00626903 chr17 375000\n## chr17.chr17_500001_750000 2 0.00567190 chr17 625000\n## chr17.chr17_750001_1000000 3 0.00528588 chr17 875000\n## chr17.chr17_1000001_1250000 4 0.00464628 chr17 1125000\n## ... ... ... ... ...\n## chr17.chr17_82250001_82500000 329 0.00463044 chr17 82375000\n## chr17.chr17_82500001_82750000 330 0.00486910 chr17 82625000\n## chr17.chr17_82750001_83000000 331 0.00561269 chr17 82875000\n## chr17.chr17_83000001_83250000 332 0.00546433 chr17 83125000\n## chr17.chr17_83250001_83257441 333 NaN chr17 83253721\n## E1 E2 E3 phasing\n## <numeric> <numeric> <numeric> <numeric>\n## chr17.chr17_1_250000 0.000000 0.000000 0.000000 0.383084\n## chr17.chr17_250001_500000 0.450991 0.653287 0.615300 0.433972\n## chr17.chr17_500001_750000 0.716784 0.707461 0.845033 0.465556\n## chr17.chr17_750001_1000000 0.904423 0.414952 0.864288 0.503592\n## chr17.chr17_1000001_1250000 0.913023 0.266287 0.759016 0.547712\n## ... ... ... ... ...\n## chr17.chr17_82250001_82500000 1.147060 0.239112 1.133498 0.550872\n## chr17.chr17_82500001_82750000 1.106937 0.419647 1.169464 0.513212\n## chr17.chr17_82750001_83000000 0.818990 0.591955 0.850340 0.522432\n## chr17.chr17_83000001_83250000 0.874038 0.503175 0.847926 0.528448\n## chr17.chr17_83250001_83257441 0.000000 0.000000 0.000000 0.000000\n## eigen\n## <numeric>\n## chr17.chr17_1_250000 0.000000\n## chr17.chr17_250001_500000 0.450991\n## chr17.chr17_500001_750000 0.716784\n## chr17.chr17_750001_1000000 0.904423\n## chr17.chr17_1000001_1250000 0.913023\n## ... ...\n## chr17.chr17_82250001_82500000 1.147060\n## chr17.chr17_82500001_82750000 1.106937\n## chr17.chr17_82750001_83000000 0.818990\n## chr17.chr17_83000001_83250000 0.874038\n## chr17.chr17_83250001_83257441 0.000000\n## -------\n## seqinfo: 1 sequence from an unspecified genome\n\n\n7.1.3 Exporting compartment tracks\nTo save the eigenvector (as a bigwig file) and the compartments(as a gff file), the export function can be used:\n\nlibrary(GenomicRanges)\nlibrary(rtracklayer)\ncoverage(metadata(microC_compts)$eigens, weight = 'eigen') |> export('microC_eigen.bw')\ntopologicalFeatures(microC_compts, \"compartments\") |> export('microC_compartments.gff3')\n\n\n7.1.4 Visualizing compartment tracks\nCompartment tracks should be visualized in a dedicated genome browser, with the phasing track loaded as well, to ensure they are phased accordingly.\nThat being said, it is possible to visualize a genome track in R besides the matching Hi-C contact matrix.\n\nlibrary(ggplot2)\nlibrary(patchwork)\nmicroC <- autocorrelate(microC)\n## \np1 <- plotMatrix(microC, use.scores = 'autocorrelated', scale = 'linear', limits = c(-1, 1), caption = FALSE)\neigen <- coverage(metadata(microC_compts)$eigens, weight = 'eigen')[[1]]\neigen_df <- tibble(pos = cumsum(runLength(eigen)), eigen = runValue(eigen))\np2 <- ggplot(eigen_df, aes(x = pos, y = eigen)) + \n geom_area() + \n theme_void() + \n coord_cartesian(expand = FALSE) + \n labs(x = \"Genomic position\", y = \"Eigenvector value\")\nwrap_plots(p1, p2, ncol = 1, heights = c(10, 1))\n\n\n\n\n\n\n\nHere, we clearly note the concordance between the Hi-C correlation matrix, highlighting correlated interactions between pairs of genomic segments, and the eigenvector representing chromosome segmentation into 2 compartments: A (for positive values) and B (for negative values).\n\n7.1.5 Saddle plots\nSaddle plots are typically used to measure the observed vs. expected interaction scores within or between genomic loci belonging to A and B compartments.\nNon-overlapping genomic windows are grouped in nbins quantiles (typically between 10 and 50 quantiles) according to their A/B compartment eigenvector value, from lowest eigenvector values (i.e. strongest B compartments) to highest eigenvector values (i.e. strongest A compartments). The average observed vs. expected interaction scores are then computed for pairwise eigenvector quantiles and plotted in a 2D heatmap.\n\nlibrary(BiocParallel)\nplotSaddle(microC_compts, nbins = 25, BPPARAM = SerialParam(progressbar = FALSE))\n\n\n\n\n\n\n\nHere, the top-left small corner represents average O/E scores between strong B compartments and the bottom-right larger corner represents average O/E scores between strong A compartments. Note that only chr17 interactions are contained in this dataset, explaining the grainy aspect of the saddle plot." + "text": "7.1 Chromosome compartments\nChromosome compartments refer to the segregation of the chromatin into active euchromatin (A compartments) and regulated heterochromatin (B compartment).\n\n7.1.1 Importing Hi-C data\nTo investigate chromosome compartments, we will fetch a contact matrix generated from a micro-C experiment (from Krietenstein et al. (2020)). A subset of the genome-wide dataset is provided in the OHCA package. It contains intra-chromosomal interactions within chr17, binned at 5000, 100000 and 250000 bp.\n\nlibrary(HiCExperiment)\nlibrary(OHCA)\ncf <- fs::path_package('OHCA', 'extdata', 'chr17.mcool')\nmicroC <- import(cf, resolution = 250000)\nmicroC\n## `HiCExperiment` object with 10,086,710 contacts over 334 regions \n## -------\n## fileName: \"/tmp/RtmpyjIh7u/Rinst5327a02592/OHCA/extdata/chr17.mcool\" \n## focus: \"whole genome\" \n## resolutions(3): 5000 100000 250000\n## active resolution: 250000 \n## interactions: 52755 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(0):\n\nseqinfo(microC)\n## Seqinfo object with 1 sequence from an unspecified genome:\n## seqnames seqlengths isCircular genome\n## chr17 83257441 NA <NA>\n\n\n7.1.2 Annotating A/B compartments\nThe consensus approach to annotate A/B compartments is to compute the eigenvectors of a Hi-C contact matrix and identify the eigenvector representing the chromosome-wide bi-partite segmentation of the genome.\nThe getCompartments() function performs several internal operations to achieve this:\n\nObtains cis interactions per chromosome\nComputes O/E contact matrix scores\nComputes 3 first eigenvectors of this Hi-C contact matrix\nNormalizes eigenvectors\nPicks the eigenvector that has the greatest absolute correlation with a phasing track (e.g. a GC% track automatically computed from a genome reference sequence, or a gene density track)\nSigns this eigenvector so that positive values represent the A compartment\n\n\nphasing_track <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38\nmicroC_compts <- getCompartments(microC, genome = phasing_track)\n## Going through preflight checklist...\n## Parsing intra-chromosomal contacts for each chromosome...\n## Computing eigenvectors for each chromosome...\n\nmicroC_compts\n## `HiCExperiment` object with 10,086,710 contacts over 334 regions \n## -------\n## fileName: \"/tmp/RtmpyjIh7u/Rinst5327a02592/OHCA/extdata/chr17.mcool\" \n## focus: \"whole genome\" \n## resolutions(3): 5000 100000 250000\n## active resolution: 250000 \n## interactions: 52755 \n## scores(2): count balanced \n## topologicalFeatures: compartments(41) borders(0) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(1): eigens\n\ngetCompartments() is an endomorphism: it returns the original object, enriched with two new pieces of information:\n\nA compartments topologicalFeatures:\n\n\ntopologicalFeatures(microC_compts, \"compartments\")\n## GRanges object with 41 ranges and 1 metadata column:\n## seqnames ranges strand | compartment\n## <Rle> <IRanges> <Rle> | <character>\n## [1] chr17 250001-3000000 * | A\n## [2] chr17 3000001-3500000 * | B\n## [3] chr17 3500001-5500000 * | A\n## [4] chr17 5500001-6500000 * | B\n## [5] chr17 6500001-8500000 * | A\n## ... ... ... ... . ...\n## [37] chr17 72750001-73250000 * | A\n## [38] chr17 73250001-74750000 * | B\n## [39] chr17 74750001-79250000 * | A\n## [40] chr17 79250001-79750000 * | B\n## [41] chr17 79750001-83250000 * | A\n## -------\n## seqinfo: 1 sequence from an unspecified genome\n\n\nThe calculated eigenvectors stored in metadata:\n\n\nmetadata(microC_compts)$eigens\n## GRanges object with 334 ranges and 9 metadata columns:\n## seqnames ranges strand |\n## <Rle> <IRanges> <Rle> |\n## chr17.chr17_1_250000 chr17 1-250000 * |\n## chr17.chr17_250001_500000 chr17 250001-500000 * |\n## chr17.chr17_500001_750000 chr17 500001-750000 * |\n## chr17.chr17_750001_1000000 chr17 750001-1000000 * |\n## chr17.chr17_1000001_1250000 chr17 1000001-1250000 * |\n## ... ... ... ... .\n## chr17.chr17_82250001_82500000 chr17 82250001-82500000 * |\n## chr17.chr17_82500001_82750000 chr17 82500001-82750000 * |\n## chr17.chr17_82750001_83000000 chr17 82750001-83000000 * |\n## chr17.chr17_83000001_83250000 chr17 83000001-83250000 * |\n## chr17.chr17_83250001_83257441 chr17 83250001-83257441 * |\n## bin_id weight chr center\n## <numeric> <numeric> <Rle> <integer>\n## chr17.chr17_1_250000 0 NaN chr17 125000\n## chr17.chr17_250001_500000 1 0.00626903 chr17 375000\n## chr17.chr17_500001_750000 2 0.00567190 chr17 625000\n## chr17.chr17_750001_1000000 3 0.00528588 chr17 875000\n## chr17.chr17_1000001_1250000 4 0.00464628 chr17 1125000\n## ... ... ... ... ...\n## chr17.chr17_82250001_82500000 329 0.00463044 chr17 82375000\n## chr17.chr17_82500001_82750000 330 0.00486910 chr17 82625000\n## chr17.chr17_82750001_83000000 331 0.00561269 chr17 82875000\n## chr17.chr17_83000001_83250000 332 0.00546433 chr17 83125000\n## chr17.chr17_83250001_83257441 333 NaN chr17 83253721\n## E1 E2 E3 phasing\n## <numeric> <numeric> <numeric> <numeric>\n## chr17.chr17_1_250000 0.000000 0.000000 0.000000 0.383084\n## chr17.chr17_250001_500000 0.450991 0.653287 0.615300 0.433972\n## chr17.chr17_500001_750000 0.716784 0.707461 0.845033 0.465556\n## chr17.chr17_750001_1000000 0.904423 0.414952 0.864288 0.503592\n## chr17.chr17_1000001_1250000 0.913023 0.266287 0.759016 0.547712\n## ... ... ... ... ...\n## chr17.chr17_82250001_82500000 1.147060 0.239112 1.133498 0.550872\n## chr17.chr17_82500001_82750000 1.106937 0.419647 1.169464 0.513212\n## chr17.chr17_82750001_83000000 0.818990 0.591955 0.850340 0.522432\n## chr17.chr17_83000001_83250000 0.874038 0.503175 0.847926 0.528448\n## chr17.chr17_83250001_83257441 0.000000 0.000000 0.000000 0.000000\n## eigen\n## <numeric>\n## chr17.chr17_1_250000 0.000000\n## chr17.chr17_250001_500000 0.450991\n## chr17.chr17_500001_750000 0.716784\n## chr17.chr17_750001_1000000 0.904423\n## chr17.chr17_1000001_1250000 0.913023\n## ... ...\n## chr17.chr17_82250001_82500000 1.147060\n## chr17.chr17_82500001_82750000 1.106937\n## chr17.chr17_82750001_83000000 0.818990\n## chr17.chr17_83000001_83250000 0.874038\n## chr17.chr17_83250001_83257441 0.000000\n## -------\n## seqinfo: 1 sequence from an unspecified genome\n\n\n7.1.3 Exporting compartment tracks\nTo save the eigenvector (as a bigwig file) and the compartments(as a gff file), the export function can be used:\n\nlibrary(GenomicRanges)\nlibrary(rtracklayer)\ncoverage(metadata(microC_compts)$eigens, weight = 'eigen') |> export('microC_eigen.bw')\ntopologicalFeatures(microC_compts, \"compartments\") |> export('microC_compartments.gff3')\n\n\n7.1.4 Visualizing compartment tracks\nCompartment tracks should be visualized in a dedicated genome browser, with the phasing track loaded as well, to ensure they are phased accordingly.\nThat being said, it is possible to visualize a genome track in R besides the matching Hi-C contact matrix.\n\nlibrary(ggplot2)\nlibrary(patchwork)\nmicroC <- autocorrelate(microC)\n## \np1 <- plotMatrix(microC, use.scores = 'autocorrelated', scale = 'linear', limits = c(-1, 1), caption = FALSE)\neigen <- coverage(metadata(microC_compts)$eigens, weight = 'eigen')[[1]]\neigen_df <- tibble(pos = cumsum(runLength(eigen)), eigen = runValue(eigen))\np2 <- ggplot(eigen_df, aes(x = pos, y = eigen)) + \n geom_area() + \n theme_void() + \n coord_cartesian(expand = FALSE) + \n labs(x = \"Genomic position\", y = \"Eigenvector value\")\nwrap_plots(p1, p2, ncol = 1, heights = c(10, 1))\n\n\n\n\n\n\n\nHere, we clearly note the concordance between the Hi-C correlation matrix, highlighting correlated interactions between pairs of genomic segments, and the eigenvector representing chromosome segmentation into 2 compartments: A (for positive values) and B (for negative values).\n\n7.1.5 Saddle plots\nSaddle plots are typically used to measure the observed vs. expected interaction scores within or between genomic loci belonging to A and B compartments.\nNon-overlapping genomic windows are grouped in nbins quantiles (typically between 10 and 50 quantiles) according to their A/B compartment eigenvector value, from lowest eigenvector values (i.e. strongest B compartments) to highest eigenvector values (i.e. strongest A compartments). The average observed vs. expected interaction scores are then computed for pairwise eigenvector quantiles and plotted in a 2D heatmap.\n\nlibrary(BiocParallel)\nplotSaddle(microC_compts, nbins = 25, BPPARAM = SerialParam(progressbar = FALSE))\n\n\n\n\n\n\n\nHere, the top-left small corner represents average O/E scores between strong B compartments and the bottom-right larger corner represents average O/E scores between strong A compartments. Note that only chr17 interactions are contained in this dataset, explaining the grainy aspect of the saddle plot." }, { "objectID": "pages/topological-features.html#topological-domains", "href": "pages/topological-features.html#topological-domains", "title": "\n7  Finding topological features in Hi-C\n", "section": "\n7.2 Topological domains", - "text": "7.2 Topological domains\nTopological domains (a.k.a. Topologically Associating Domains, TADs, isolated neighborhoods, contact domains, …) refer to local chromosomal segments (e.b. roughly ≤ 1Mb in mammal genomes) which preferentially self-interact, in a constrained manner. They are demarcated by domain boundaries.\n\n\n\n\nThey are generally conserved across cell types and species (Schmitt et al. (2016)), typically correlate with units of DNA replication (Pope et al. (2014)), and could play a role during development (Stadhouders et al. (2019)).\n\n7.2.1 Computing diamond insulation score\nSeveral approaches exist to annotate topological domains (Sefer (2022)). Several packages in R implement some of these functionalities, e.g. spectralTAD or TADcompare.\nHiContacts offers a simple getDiamondInsulation function which computes the diamond insulation score (Crane et al. (2015)). This score quantifies average interaction frequency in an insulation window (of a certain window_size) sliding along contact matrices at a chosen resolution.\n\n# - Compute insulation score\nbpparam <- SerialParam(progressbar = FALSE)\nhic <- zoom(microC, 5000) |> \n refocus('chr17:60000001-83257441') |>\n getDiamondInsulation(window_size = 100000, BPPARAM = bpparam) |> \n getBorders()\n## Going through preflight checklist...\n## Scan each window and compute diamond insulation score...\n## Annotating diamond score prominence for each window...\n\nhic\n## `HiCExperiment` object with 2,156,222 contacts over 4,652 regions \n## -------\n## fileName: \"/tmp/RtmpVQDQnT/Rinst55b2b1d77/OHCA/extdata/chr17.mcool\" \n## focus: \"chr17:60,000,001-83,257,441\" \n## resolutions(3): 5000 100000 250000\n## active resolution: 5000 \n## interactions: 2156044 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(21) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(1): insulation\n\ngetDiamondInsulation() is an endomorphism: it returns the original object, enriched with two new pieces of information:\n\nA borders topologicalFeatures:\n\n\ntopologicalFeatures(hic, \"borders\")\n## GRanges object with 21 ranges and 1 metadata column:\n## seqnames ranges strand | score\n## <Rle> <IRanges> <Rle> | <numeric>\n## strong chr17 60105001-60110000 * | 0.574760\n## weak chr17 60210001-60215000 * | 0.414425\n## weak chr17 61415001-61420000 * | 0.346668\n## strong chr17 61500001-61505000 * | 0.544336\n## weak chr17 62930001-62935000 * | 0.399794\n## ... ... ... ... . ...\n## weak chr17 78395001-78400000 * | 0.235613\n## weak chr17 79065001-79070000 * | 0.236535\n## weak chr17 80155001-80160000 * | 0.284855\n## weak chr17 81735001-81740000 * | 0.497478\n## strong chr17 81840001-81845000 * | 1.395949\n## -------\n## seqinfo: 1 sequence from an unspecified genome\n\n\nThe calculated insulation scores stored in metadata:\n\n\nmetadata(hic)$insulation\n## GRanges object with 4611 ranges and 8 metadata columns:\n## seqnames ranges strand | bin_id\n## <Rle> <IRanges> <Rle> | <numeric>\n## chr17_60100001_60105000 chr17 60100001-60105000 * | 12020\n## chr17_60105001_60110000 chr17 60105001-60110000 * | 12021\n## chr17_60110001_60115000 chr17 60110001-60115000 * | 12022\n## chr17_60115001_60120000 chr17 60115001-60120000 * | 12023\n## chr17_60120001_60125000 chr17 60120001-60125000 * | 12024\n## ... ... ... ... . ...\n## chr17_83130001_83135000 chr17 83130001-83135000 * | 16626\n## chr17_83135001_83140000 chr17 83135001-83140000 * | 16627\n## chr17_83140001_83145000 chr17 83140001-83145000 * | 16628\n## chr17_83145001_83150000 chr17 83145001-83150000 * | 16629\n## chr17_83150001_83155000 chr17 83150001-83155000 * | 16630\n## weight chr center score insulation\n## <numeric> <Rle> <integer> <numeric> <numeric>\n## chr17_60100001_60105000 0.0406489 chr17 60102500 0.188061 -0.750142\n## chr17_60105001_60110000 0.0255539 chr17 60107500 0.180860 -0.806466\n## chr17_60110001_60115000 NaN chr17 60112500 0.196579 -0.686232\n## chr17_60115001_60120000 NaN chr17 60117500 0.216039 -0.550046\n## chr17_60120001_60125000 NaN chr17 60122500 0.230035 -0.459489\n## ... ... ... ... ... ...\n## chr17_83130001_83135000 0.0314684 chr17 83132500 0.262191 -0.270723\n## chr17_83135001_83140000 0.0307197 chr17 83137500 0.240779 -0.393632\n## chr17_83140001_83145000 0.0322810 chr17 83142500 0.219113 -0.529664\n## chr17_83145001_83150000 0.0280840 chr17 83147500 0.199645 -0.663900\n## chr17_83150001_83155000 0.0272775 chr17 83152500 0.180434 -0.809873\n## min prominence\n## <logical> <numeric>\n## chr17_60100001_60105000 FALSE NA\n## chr17_60105001_60110000 TRUE 0.57476\n## chr17_60110001_60115000 FALSE NA\n## chr17_60115001_60120000 FALSE NA\n## chr17_60120001_60125000 FALSE NA\n## ... ... ...\n## chr17_83130001_83135000 FALSE NA\n## chr17_83135001_83140000 FALSE NA\n## chr17_83140001_83145000 FALSE NA\n## chr17_83145001_83150000 FALSE NA\n## chr17_83150001_83155000 FALSE NA\n## -------\n## seqinfo: 1 sequence from an unspecified genome\n\n\n\n\n\n\n\nNote\n\n\n\nThe getDiamondInsulation function can be parallelized over multiple threads by specifying the Bioconductor generic BPPARAM argument.\n\n\n\n7.2.2 Exporting insulation scores tracks\nTo save the diamond insulation scores (as a bigwig file) and the borders (as a bed file), the export function can be used:\n\ncoverage(metadata(hic)$insulation, weight = 'insulation') |> export('microC_insulation.bw')\ntopologicalFeatures(hic, \"borders\") |> export('microC_borders.bed')\n\n\n7.2.3 Visualizing chromatin domains\nInsulation tracks should be visualized in a dedicated genome browser.\nThat being said, it is possible to visualize a genome track in R besides the matching Hi-C contact matrix.\n\nhic <- zoom(hic, 100000)\np1 <- plotMatrix(\n hic, \n use.scores = 'balanced', \n limits = c(-3.5, -1),\n borders = topologicalFeatures(hic, \"borders\"),\n caption = FALSE\n)\ninsulation <- coverage(metadata(hic)$insulation, weight = 'insulation')[[1]]\ninsulation_df <- tibble(pos = cumsum(runLength(insulation)), insulation = runValue(insulation))\np2 <- ggplot(insulation_df, aes(x = pos, y = insulation)) + \n geom_area() + \n theme_void() + \n coord_cartesian(expand = FALSE) + \n labs(x = \"Genomic position\", y = \"Diamond insulation score\")\nwrap_plots(p1, p2, ncol = 1, heights = c(10, 1))\n\n\n\n\n\n\n\nLocal minima in the diamond insulation score displayed below the Hi-C contact matrix are identified using the getBorders() function, which automatically estimates a minimum threshold. These local minima correspond to borders and are visually depicted on the Hi-C map by blue diamonds." + "text": "7.2 Topological domains\nTopological domains (a.k.a. Topologically Associating Domains, TADs, isolated neighborhoods, contact domains, …) refer to local chromosomal segments (e.b. roughly ≤ 1Mb in mammal genomes) which preferentially self-interact, in a constrained manner. They are demarcated by domain boundaries.\n\n\n\n\nThey are generally conserved across cell types and species (Schmitt et al. (2016)), typically correlate with units of DNA replication (Pope et al. (2014)), and could play a role during development (Stadhouders et al. (2019)).\n\n7.2.1 Computing diamond insulation score\nSeveral approaches exist to annotate topological domains (Sefer (2022)). Several packages in R implement some of these functionalities, e.g. spectralTAD or TADcompare.\nHiContacts offers a simple getDiamondInsulation function which computes the diamond insulation score (Crane et al. (2015)). This score quantifies average interaction frequency in an insulation window (of a certain window_size) sliding along contact matrices at a chosen resolution.\n\n# - Compute insulation score\nbpparam <- SerialParam(progressbar = FALSE)\nhic <- zoom(microC, 5000) |> \n refocus('chr17:60000001-83257441') |>\n getDiamondInsulation(window_size = 100000, BPPARAM = bpparam) |> \n getBorders()\n## Going through preflight checklist...\n## Scan each window and compute diamond insulation score...\n## Annotating diamond score prominence for each window...\n\nhic\n## `HiCExperiment` object with 2,156,222 contacts over 4,652 regions \n## -------\n## fileName: \"/tmp/RtmpyjIh7u/Rinst5327a02592/OHCA/extdata/chr17.mcool\" \n## focus: \"chr17:60,000,001-83,257,441\" \n## resolutions(3): 5000 100000 250000\n## active resolution: 5000 \n## interactions: 2156044 \n## scores(2): count balanced \n## topologicalFeatures: compartments(0) borders(21) loops(0) viewpoints(0) \n## pairsFile: N/A \n## metadata(1): insulation\n\ngetDiamondInsulation() is an endomorphism: it returns the original object, enriched with two new pieces of information:\n\nA borders topologicalFeatures:\n\n\ntopologicalFeatures(hic, \"borders\")\n## GRanges object with 21 ranges and 1 metadata column:\n## seqnames ranges strand | score\n## <Rle> <IRanges> <Rle> | <numeric>\n## strong chr17 60105001-60110000 * | 0.574760\n## weak chr17 60210001-60215000 * | 0.414425\n## weak chr17 61415001-61420000 * | 0.346668\n## strong chr17 61500001-61505000 * | 0.544336\n## weak chr17 62930001-62935000 * | 0.399794\n## ... ... ... ... . ...\n## weak chr17 78395001-78400000 * | 0.235613\n## weak chr17 79065001-79070000 * | 0.236535\n## weak chr17 80155001-80160000 * | 0.284855\n## weak chr17 81735001-81740000 * | 0.497478\n## strong chr17 81840001-81845000 * | 1.395949\n## -------\n## seqinfo: 1 sequence from an unspecified genome\n\n\nThe calculated insulation scores stored in metadata:\n\n\nmetadata(hic)$insulation\n## GRanges object with 4611 ranges and 8 metadata columns:\n## seqnames ranges strand | bin_id\n## <Rle> <IRanges> <Rle> | <numeric>\n## chr17_60100001_60105000 chr17 60100001-60105000 * | 12020\n## chr17_60105001_60110000 chr17 60105001-60110000 * | 12021\n## chr17_60110001_60115000 chr17 60110001-60115000 * | 12022\n## chr17_60115001_60120000 chr17 60115001-60120000 * | 12023\n## chr17_60120001_60125000 chr17 60120001-60125000 * | 12024\n## ... ... ... ... . ...\n## chr17_83130001_83135000 chr17 83130001-83135000 * | 16626\n## chr17_83135001_83140000 chr17 83135001-83140000 * | 16627\n## chr17_83140001_83145000 chr17 83140001-83145000 * | 16628\n## chr17_83145001_83150000 chr17 83145001-83150000 * | 16629\n## chr17_83150001_83155000 chr17 83150001-83155000 * | 16630\n## weight chr center score insulation\n## <numeric> <Rle> <integer> <numeric> <numeric>\n## chr17_60100001_60105000 0.0406489 chr17 60102500 0.188061 -0.750142\n## chr17_60105001_60110000 0.0255539 chr17 60107500 0.180860 -0.806466\n## chr17_60110001_60115000 NaN chr17 60112500 0.196579 -0.686232\n## chr17_60115001_60120000 NaN chr17 60117500 0.216039 -0.550046\n## chr17_60120001_60125000 NaN chr17 60122500 0.230035 -0.459489\n## ... ... ... ... ... ...\n## chr17_83130001_83135000 0.0314684 chr17 83132500 0.262191 -0.270723\n## chr17_83135001_83140000 0.0307197 chr17 83137500 0.240779 -0.393632\n## chr17_83140001_83145000 0.0322810 chr17 83142500 0.219113 -0.529664\n## chr17_83145001_83150000 0.0280840 chr17 83147500 0.199645 -0.663900\n## chr17_83150001_83155000 0.0272775 chr17 83152500 0.180434 -0.809873\n## min prominence\n## <logical> <numeric>\n## chr17_60100001_60105000 FALSE NA\n## chr17_60105001_60110000 TRUE 0.57476\n## chr17_60110001_60115000 FALSE NA\n## chr17_60115001_60120000 FALSE NA\n## chr17_60120001_60125000 FALSE NA\n## ... ... ...\n## chr17_83130001_83135000 FALSE NA\n## chr17_83135001_83140000 FALSE NA\n## chr17_83140001_83145000 FALSE NA\n## chr17_83145001_83150000 FALSE NA\n## chr17_83150001_83155000 FALSE NA\n## -------\n## seqinfo: 1 sequence from an unspecified genome\n\n\n\n\n\n\n\nNote\n\n\n\nThe getDiamondInsulation function can be parallelized over multiple threads by specifying the Bioconductor generic BPPARAM argument.\n\n\n\n7.2.2 Exporting insulation scores tracks\nTo save the diamond insulation scores (as a bigwig file) and the borders (as a bed file), the export function can be used:\n\ncoverage(metadata(hic)$insulation, weight = 'insulation') |> export('microC_insulation.bw')\ntopologicalFeatures(hic, \"borders\") |> export('microC_borders.bed')\n\n\n7.2.3 Visualizing chromatin domains\nInsulation tracks should be visualized in a dedicated genome browser.\nThat being said, it is possible to visualize a genome track in R besides the matching Hi-C contact matrix.\n\nhic <- zoom(hic, 100000)\np1 <- plotMatrix(\n hic, \n use.scores = 'balanced', \n limits = c(-3.5, -1),\n borders = topologicalFeatures(hic, \"borders\"),\n caption = FALSE\n)\ninsulation <- coverage(metadata(hic)$insulation, weight = 'insulation')[[1]]\ninsulation_df <- tibble(pos = cumsum(runLength(insulation)), insulation = runValue(insulation))\np2 <- ggplot(insulation_df, aes(x = pos, y = insulation)) + \n geom_area() + \n theme_void() + \n coord_cartesian(expand = FALSE) + \n labs(x = \"Genomic position\", y = \"Diamond insulation score\")\nwrap_plots(p1, p2, ncol = 1, heights = c(10, 1))\n\n\n\n\n\n\n\nLocal minima in the diamond insulation score displayed below the Hi-C contact matrix are identified using the getBorders() function, which automatically estimates a minimum threshold. These local minima correspond to borders and are visually depicted on the Hi-C map by blue diamonds." }, { "objectID": "pages/topological-features.html#chromatin-loops", @@ -319,7 +319,7 @@ "href": "pages/interoperability.html#gothic", "title": "\n9  Interoperability: using HiCExperiment with other R packages\n", "section": "\n9.4 GOTHiC", - "text": "9.4 GOTHiC\nGOTHiC relies on a cumulative binomial test to detect interactions between distal genomic loci that have significantly more reads than expected by chance in Hi-C experiments (Mifsud et al. (2017)).\n\n\n\n\n\n\nUsing the GOTHiC function\n\n\n\nUnfortunately, the main GOTHiC function require two .bam files as input. These files are often deleted due to their larger size, while the filtered pairs file itself is retained.\nMoreover, the internal nuts and bolts of the main GOTHiC function perform several operations that are not required in modern workflows:\n\n\nFiltering pairs from same restriction fragment; this step is now usually taken care of automatically, e.g. with HiCool Hi-C processing package.\n\nFiltering short-range pairs; the GOTHiC package hard-codes a 10kb lower threshold for minimum pair distance. More advanced optimized filtering approaches have been implemented since then, to circumvent the need for such hard-coded threshold.\n\nBinning pairs; this step is also already taken care of, when working with Hi-C matrices in modern formats, e.g. with .(m)cool files.\n\n\n\nBased on these facts, we can simplify the binomial test function provided by GOTHiC so that it can directly used binned interactions imported as a HiCExperiment object in R.\n\nShow the code for GOTHiC_binomial functionGOTHiC_binomial <- function(x) {\n\n if (length(trans(x)) != 0) stop(\"Only `cis` interactions can be used here.\")\n ints <- interactions(x) |>\n as.data.frame() |> \n select(seqnames1, start1, seqnames2, start2, count) |>\n dplyr::rename(chr1 = seqnames1, locus1 = start1, chr2 = seqnames2, locus2 = start2, frequencies = count) |>\n mutate(locus1 = locus1 - 1, locus2 = locus2 - 1) |>\n mutate(int1 = paste0(chr1, '_', locus1), int2 = paste0(chr2, '_', locus2))\n \n numberOfReadPairs <- sum(ints$frequencies)\n all_bins <- unique(c(unique(ints$int1), unique(ints$int2)))\n all_bins <- sort(all_bins)\n upperhalfBinNumber <- (length(all_bins)^2 - length(all_bins))/2\n\n cov <- ints |> \n group_by(int1) |> \n tally(frequencies) |> \n full_join(ints |> \n group_by(int2) |> \n tally(frequencies), \n by = c('int1' = 'int2')\n ) |> \n rowwise() |> \n mutate(coverage = sum(n.x, n.y, na.rm = TRUE)) |> \n ungroup() |>\n mutate(relative_coverage = coverage/sum(coverage))\n \n results <- mutate(ints,\n cov1 = left_join(ints, select(cov, int1, relative_coverage), by = c('int1' = 'int1'))$relative_coverage, \n cov2 = left_join(ints, select(cov, int1, relative_coverage), by = c('int2' = 'int1'))$relative_coverage,\n probability = cov1 * cov2 * 2 * 1/(1 - sum(cov$relative_coverage^2)),\n predicted = probability * numberOfReadPairs\n ) |> \n rowwise() |>\n mutate(\n pvalue = binom.test(\n frequencies, \n numberOfReadPairs, \n probability,\n alternative = \"greater\"\n )$p.value\n ) |> \n ungroup() |> \n mutate(\n logFoldChange = log2(frequencies / predicted), \n qvalue = stats::p.adjust(pvalue, method = \"BH\", n = upperhalfBinNumber)\n )\n\n scores(x, \"probability\") <- results$probability\n scores(x, \"predicted\") <- results$predicted\n scores(x, \"pvalue\") <- results$pvalue\n scores(x, \"qvalue\") <- results$qvalue\n scores(x, \"logFoldChange\") <- results$logFoldChange\n\n return(x)\n\n} \n\n\n\nres <- GOTHiC_binomial(hic[\"II\"])\nres\n## `HiCExperiment` object with 471,364 contacts over 802 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/1747aa0ffd6_7752\" \n## focus: \"II\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 1000 \n## interactions: 74360 \n## scores(7): count balanced probability predicted pvalue qvalue logFoldChange \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) domain(52) \n## pairsFile: N/A \n## metadata(0):\n\ninteractions(res)\n## GInteractions object with 74360 interactions and 9 metadata columns:\n## seqnames1 ranges1 strand1 seqnames2 ranges2\n## <Rle> <IRanges> <Rle> <Rle> <IRanges>\n## [1] II 1-1000 * --- II 1001-2000\n## [2] II 1-1000 * --- II 5001-6000\n## [3] II 1-1000 * --- II 6001-7000\n## [4] II 1-1000 * --- II 8001-9000\n## [5] II 1-1000 * --- II 9001-10000\n## ... ... ... ... ... ... ...\n## [74356] II 807001-808000 * --- II 809001-810000\n## [74357] II 807001-808000 * --- II 810001-811000\n## [74358] II 808001-809000 * --- II 808001-809000\n## [74359] II 808001-809000 * --- II 809001-810000\n## [74360] II 809001-810000 * --- II 809001-810000\n## strand2 | bin_id1 bin_id2 count balanced probability\n## <Rle> | <numeric> <numeric> <numeric> <numeric> <numeric>\n## [1] * | 231 232 1 NaN 7.83580e-09\n## [2] * | 231 236 2 NaN 2.81318e-08\n## [3] * | 231 237 1 NaN 2.02960e-08\n## [4] * | 231 239 2 NaN 6.73108e-08\n## [5] * | 231 240 3 NaN 7.37336e-08\n## ... ... . ... ... ... ... ...\n## [74356] * | 1038 1040 8 0.0472023 3.85638e-07\n## [74357] * | 1038 1041 1 NaN 5.03006e-08\n## [74358] * | 1039 1039 1 NaN 8.74604e-08\n## [74359] * | 1039 1040 7 NaN 1.02111e-07\n## [74360] * | 1040 1040 2 0.0411355 1.19216e-07\n## predicted pvalue qvalue logFoldChange\n## <numeric> <numeric> <numeric> <numeric>\n## [1] 0.00369352 3.68670e-03 0.063385760 8.08079\n## [2] 0.01326033 8.71446e-05 0.001926954 7.23674\n## [3] 0.00956681 9.52120e-03 0.150288341 6.70775\n## [4] 0.03172791 4.92808e-04 0.009806734 5.97810\n## [5] 0.03475538 6.81713e-06 0.000173165 6.43158\n## ... ... ... ... ...\n## [74356] 0.1817758 2.51560e-11 1.07966e-09 5.45977\n## [74357] 0.0237099 2.34310e-02 3.38098e-01 5.39837\n## [74358] 0.0412257 4.03875e-02 5.49519e-01 4.60031\n## [74359] 0.0481315 1.13834e-13 5.77259e-12 7.18423\n## [74360] 0.0561941 1.52097e-03 2.79707e-02 5.15344\n## -------\n## regions: 802 ranges and 4 metadata columns\n## seqinfo: 16 sequences from an unspecified genome" + "text": "9.4 GOTHiC\nGOTHiC relies on a cumulative binomial test to detect interactions between distal genomic loci that have significantly more reads than expected by chance in Hi-C experiments (Mifsud et al. (2017)).\n\n\n\n\n\n\nUsing the GOTHiC function\n\n\n\nUnfortunately, the main GOTHiC function require two .bam files as input. These files are often deleted due to their larger size, while the filtered pairs file itself is retained.\nMoreover, the internal nuts and bolts of the main GOTHiC function perform several operations that are not required in modern workflows:\n\n\nFiltering pairs from same restriction fragment; this step is now usually taken care of automatically, e.g. with HiCool Hi-C processing package.\n\nFiltering short-range pairs; the GOTHiC package hard-codes a 10kb lower threshold for minimum pair distance. More advanced optimized filtering approaches have been implemented since then, to circumvent the need for such hard-coded threshold.\n\nBinning pairs; this step is also already taken care of, when working with Hi-C matrices in modern formats, e.g. with .(m)cool files.\n\n\n\nBased on these facts, we can simplify the binomial test function provided by GOTHiC so that it can directly used binned interactions imported as a HiCExperiment object in R.\n\nShow the code for GOTHiC_binomial functionGOTHiC_binomial <- function(x) {\n\n if (length(trans(x)) != 0) stop(\"Only `cis` interactions can be used here.\")\n ints <- interactions(x) |>\n as.data.frame() |> \n select(seqnames1, start1, seqnames2, start2, count) |>\n dplyr::rename(chr1 = seqnames1, locus1 = start1, chr2 = seqnames2, locus2 = start2, frequencies = count) |>\n mutate(locus1 = locus1 - 1, locus2 = locus2 - 1) |>\n mutate(int1 = paste0(chr1, '_', locus1), int2 = paste0(chr2, '_', locus2))\n \n numberOfReadPairs <- sum(ints$frequencies)\n all_bins <- unique(c(unique(ints$int1), unique(ints$int2)))\n all_bins <- sort(all_bins)\n upperhalfBinNumber <- (length(all_bins)^2 - length(all_bins))/2\n\n cov <- ints |> \n group_by(int1) |> \n tally(frequencies) |> \n full_join(ints |> \n group_by(int2) |> \n tally(frequencies), \n by = c('int1' = 'int2')\n ) |> \n rowwise() |> \n mutate(coverage = sum(n.x, n.y, na.rm = TRUE)) |> \n ungroup() |>\n mutate(relative_coverage = coverage/sum(coverage))\n \n results <- mutate(ints,\n cov1 = left_join(ints, select(cov, int1, relative_coverage), by = c('int1' = 'int1'))$relative_coverage, \n cov2 = left_join(ints, select(cov, int1, relative_coverage), by = c('int2' = 'int1'))$relative_coverage,\n probability = cov1 * cov2 * 2 * 1/(1 - sum(cov$relative_coverage^2)),\n predicted = probability * numberOfReadPairs\n ) |> \n rowwise() |>\n mutate(\n pvalue = binom.test(\n frequencies, \n numberOfReadPairs, \n probability,\n alternative = \"greater\"\n )$p.value\n ) |> \n ungroup() |> \n mutate(\n logFoldChange = log2(frequencies / predicted), \n qvalue = stats::p.adjust(pvalue, method = \"BH\", n = upperhalfBinNumber)\n )\n\n scores(x, \"probability\") <- results$probability\n scores(x, \"predicted\") <- results$predicted\n scores(x, \"pvalue\") <- results$pvalue\n scores(x, \"qvalue\") <- results$qvalue\n scores(x, \"logFoldChange\") <- results$logFoldChange\n\n return(x)\n\n} \n\n\n\nres <- GOTHiC_binomial(hic[\"II\"])\nres\n## `HiCExperiment` object with 471,364 contacts over 802 regions \n## -------\n## fileName: \"/root/.cache/R/ExperimentHub/16ffc32b3a_7752\" \n## focus: \"II\" \n## resolutions(5): 1000 2000 4000 8000 16000\n## active resolution: 1000 \n## interactions: 74360 \n## scores(7): count balanced probability predicted pvalue qvalue logFoldChange \n## topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) domain(52) \n## pairsFile: N/A \n## metadata(0):\n\ninteractions(res)\n## GInteractions object with 74360 interactions and 9 metadata columns:\n## seqnames1 ranges1 strand1 seqnames2 ranges2\n## <Rle> <IRanges> <Rle> <Rle> <IRanges>\n## [1] II 1-1000 * --- II 1001-2000\n## [2] II 1-1000 * --- II 5001-6000\n## [3] II 1-1000 * --- II 6001-7000\n## [4] II 1-1000 * --- II 8001-9000\n## [5] II 1-1000 * --- II 9001-10000\n## ... ... ... ... ... ... ...\n## [74356] II 807001-808000 * --- II 809001-810000\n## [74357] II 807001-808000 * --- II 810001-811000\n## [74358] II 808001-809000 * --- II 808001-809000\n## [74359] II 808001-809000 * --- II 809001-810000\n## [74360] II 809001-810000 * --- II 809001-810000\n## strand2 | bin_id1 bin_id2 count balanced probability\n## <Rle> | <numeric> <numeric> <numeric> <numeric> <numeric>\n## [1] * | 231 232 1 NaN 7.83580e-09\n## [2] * | 231 236 2 NaN 2.81318e-08\n## [3] * | 231 237 1 NaN 2.02960e-08\n## [4] * | 231 239 2 NaN 6.73108e-08\n## [5] * | 231 240 3 NaN 7.37336e-08\n## ... ... . ... ... ... ... ...\n## [74356] * | 1038 1040 8 0.0472023 3.85638e-07\n## [74357] * | 1038 1041 1 NaN 5.03006e-08\n## [74358] * | 1039 1039 1 NaN 8.74604e-08\n## [74359] * | 1039 1040 7 NaN 1.02111e-07\n## [74360] * | 1040 1040 2 0.0411355 1.19216e-07\n## predicted pvalue qvalue logFoldChange\n## <numeric> <numeric> <numeric> <numeric>\n## [1] 0.00369352 3.68670e-03 0.063385760 8.08079\n## [2] 0.01326033 8.71446e-05 0.001926954 7.23674\n## [3] 0.00956681 9.52120e-03 0.150288341 6.70775\n## [4] 0.03172791 4.92808e-04 0.009806734 5.97810\n## [5] 0.03475538 6.81713e-06 0.000173165 6.43158\n## ... ... ... ... ...\n## [74356] 0.1817758 2.51560e-11 1.07966e-09 5.45977\n## [74357] 0.0237099 2.34310e-02 3.38098e-01 5.39837\n## [74358] 0.0412257 4.03875e-02 5.49519e-01 4.60031\n## [74359] 0.0481315 1.13834e-13 5.77259e-12 7.18423\n## [74360] 0.0561941 1.52097e-03 2.79707e-02 5.15344\n## -------\n## regions: 802 ranges and 4 metadata columns\n## seqinfo: 16 sequences from an unspecified genome" }, { "objectID": "pages/workflow-yeast.html",