Shared Types#
Types documented in this page are used throughout hictk code-base to model various concepts such as genomic intervals, reference genomes, bins and pixels.
Chromosome#
-
class Chromosome#
This class models chromosomes as triplets consisting of:
A numeric identifier
The chromosome name
The chromosome size
Chromosome
s are compared by ID.Constructors
-
Chromosome() = default;#
-
Chromosome(std::uint32_t id_, std::string name_, std::uint32_t size_) noexcept;#
Operators
-
[[nodiscard]] explicit constexpr operator bool() const noexcept;#
Accessors
-
[[nodiscard]] constexpr std::uint32_t id() const noexcept;#
-
[[nodiscard]] std::string_view name() const noexcept;#
-
[[nodiscard]] constexpr std::uint32_t size() const noexcept;#
-
[[nodiscard]] bool is_all() const noexcept;#
Comparison operators
-
[[nodiscard]] constexpr bool operator<(const Chromosome &other) const noexcept;#
-
[[nodiscard]] constexpr bool operator>(const Chromosome &other) const noexcept;#
-
[[nodiscard]] constexpr bool operator<=(const Chromosome &other) const noexcept;#
-
[[nodiscard]] constexpr bool operator>=(const Chromosome &other) const noexcept;#
-
[[nodiscard]] bool operator==(const Chromosome &other) const noexcept;#
-
[[nodiscard]] bool operator!=(const Chromosome &other) const noexcept;#
-
friend bool operator==(const Chromosome &a, std::string_view b_name) noexcept;#
-
friend bool operator!=(const Chromosome &a, std::string_view b_name) noexcept;#
-
friend bool operator==(std::string_view a_name, const Chromosome &b) noexcept;#
-
friend bool operator!=(std::string_view a_name, const Chromosome &b) noexcept;#
-
friend constexpr bool operator<(const Chromosome &a, std::uint32_t b_id) noexcept;#
-
friend constexpr bool operator>(const Chromosome &a, std::uint32_t b_id) noexcept;#
-
friend constexpr bool operator<=(const Chromosome &a, std::uint32_t b_id) noexcept;#
-
friend constexpr bool operator>=(const Chromosome &a, std::uint32_t b_id) noexcept;#
-
friend constexpr bool operator==(const Chromosome &a, std::uint32_t b_id) noexcept;#
-
friend constexpr bool operator!=(const Chromosome &a, std::uint32_t b_id) noexcept;#
-
friend constexpr bool operator<(std::uint32_t a_id, const Chromosome &b) noexcept;#
-
friend constexpr bool operator>(std::uint32_t a_id, const Chromosome &b) noexcept;#
-
friend constexpr bool operator<=(std::uint32_t a_id, const Chromosome &b) noexcept;#
-
friend constexpr bool operator>=(std::uint32_t a_id, const Chromosome &b) noexcept;#
-
friend constexpr bool operator==(std::uint32_t a_id, const Chromosome &b) noexcept;#
-
friend constexpr bool operator!=(std::uint32_t a_id, const Chromosome &b) noexcept;#
Genomic intervals#
-
class GenomicInterval#
Class to represent 1D genomic intervals.
This class has two main purposes:
Storing information regarding genomic intervals
Simplifying comparison of genomic intervals (e.g. is interval A upstream of interval B)
Constructors
-
constexpr GenomicInterval() = default;#
-
explicit GenomicInterval(const Chromosome &chrom_) noexcept;#
-
GenomicInterval(const Chromosome &chrom_, std::uint32_t start_, std::uint32_t end) noexcept;#
Factory methods
-
[[nodiscard]] static GenomicInterval parse(const Reference &chroms, std::string query, Type type = Type::UCSC);#
-
[[nodiscard]] static GenomicInterval parse_ucsc(const Reference &chroms, std::string query);#
-
[[nodiscard]] static GenomicInterval parse_bed(const Reference &chroms, std::string_view query, char sep = '\t');#
Operators
-
[[nodiscard]] explicit operator bool() const noexcept;#
-
[[nodiscard]] bool operator==(const GenomicInterval &other) const noexcept;#
-
[[nodiscard]] bool operator!=(const GenomicInterval &other) const noexcept;#
-
[[nodiscard]] bool operator<(const GenomicInterval &other) const noexcept;#
-
[[nodiscard]] bool operator<=(const GenomicInterval &other) const noexcept;#
-
[[nodiscard]] bool operator>(const GenomicInterval &other) const noexcept;#
-
[[nodiscard]] bool operator>=(const GenomicInterval &other) const noexcept;#
Accessors
-
[[nodiscard]] const Chromosome &chrom() const noexcept;#
-
[[nodiscard]] constexpr std::uint32_t start() const noexcept;#
-
[[nodiscard]] constexpr std::uint32_t end() const noexcept;#
-
[[nodiscard]] constexpr std::uint32_t size() const noexcept;#
Genomic bins#
-
class Bin#
Class modeling genomic bins.
The class is implemented as a thin wrapper around
GenomicInterval
s. The main difference betweenBin
andGenomicInterval
objects is that in addition to genomic coordinates, theBin
object also store two identifiers:A unique identifier that can be used to refer
Bin
s in aReference
.A relative identifier that can be used to refer to
Bin
s in aChromosome
.
-
constexpr Bin() = default;#
-
Bin(const Chromosome &chrom_, std::uint32_t start_, std::uint32_t end) noexcept;#
-
Bin(std::uint64_t id_, std::uint32_t rel_id_, const Chromosome &chrom_, std::uint32_t start_, std::uint32_t end_) noexcept;#
-
explicit Bin(GenomicInterval interval) noexcept;#
-
Bin(std::uint64_t id_, std::uint32_t rel_id_, GenomicInterval interval) noexcept;#
-
[[nodiscard]] explicit operator bool() const noexcept;#
-
[[nodiscard]] constexpr std::uint64_t id() const noexcept;#
-
[[nodiscard]] constexpr std::uint32_t rel_id() const noexcept;#
-
[[nodiscard]] const GenomicInterval &interval() const noexcept;#
-
[[nodiscard]] const Chromosome &chrom() const noexcept;#
-
[[nodiscard]] constexpr std::uint32_t start() const noexcept;#
-
[[nodiscard]] constexpr std::uint32_t end() const noexcept;#
-
[[nodiscard]] constexpr bool has_null_id() const noexcept;#
Reference genome#
-
class Reference#
This class models the reference genome used as coordinate systems in Hi-C matrices.
Reference
objects consist of collections ofChromosome
s with unique IDs.Chromosome
s can be queried by ID or by name.As a general rule, queries by
Chromosome
ID are more efficient than queries by name.Constructors
-
Reference() = default;#
-
template<typename ChromosomeNameIt, typename ChromosomeSizeIt>
Reference(ChromosomeNameIt first_chrom_name, ChromosomeNameIt last_chrom_name, ChromosomeSizeIt first_chrom_size);#
-
template<typename ChromosomeIt>
Reference(ChromosomeIt first_chrom, ChromosomeIt last_chrom);#
-
Reference(std::initializer_list<Chromosome> chromosomes);#
Factory methods
Operators
Iteration
-
[[nodiscard]] auto begin() const -> const_iterator;#
-
[[nodiscard]] auto end() const -> const_iterator;#
-
[[nodiscard]] auto cbegin() const -> const_iterator;#
-
[[nodiscard]] auto cend() const -> const_iterator;#
-
[[nodiscard]] auto rbegin() const -> const_reverse_iterator;#
-
[[nodiscard]] auto rend() const -> const_reverse_iterator;#
-
[[nodiscard]] auto rcbegin() const -> const_reverse_iterator;#
-
[[nodiscard]] auto rcend() const -> const_reverse_iterator;#
Accessors
-
[[nodiscard]] bool empty() const noexcept;#
-
[[nodiscard]] std::size_t size() const noexcept;#
Lookup
-
[[nodiscard]] auto find(std::uint32_t id) const -> const_iterator;#
-
[[nodiscard]] auto find(std::string_view chrom_name) const -> const_iterator;#
-
[[nodiscard]] auto find(const Chromosome &chrom) const -> const_iterator;#
-
[[nodiscard]] const Chromosome &at(std::uint32_t id) const;#
-
[[nodiscard]] const Chromosome &at(std::string_view chrom_name) const;#
-
[[nodiscard]] const Chromosome &operator[](std::uint32_t id) const noexcept;#
-
[[nodiscard]] const Chromosome &operator[](std::string_view chrom_name) const noexcept;#
-
[[nodiscard]] bool contains(std::uint32_t id) const;#
-
[[nodiscard]] bool contains(const Chromosome &chrom) const;#
-
[[nodiscard]] bool contains(std::string_view chrom_name) const;#
-
[[nodiscard]] std::uint32_t get_id(std::string_view chrom_name) const;#
-
[[nodiscard]] const Chromosome &longest_chromosome() const;#
-
[[nodiscard]] const Chromosome &chromosome_with_longest_name() const;#
Other .. cpp:function:: [[nodiscard]] Reference remove_ALL() const; .. cpp:function:: [[nodiscard]] Reference add_ALL(std::uint32_t scaling_factor = 1) const;
-
Reference() = default;#
Bin Table#
-
class BinTable#
This class models the bin table used as coordinate systems in Hi-C matrices.
The class API gives the illusion of operating over a collection of
Bin
s. In realityBinTable
s do not store anyBin
s. All queries are satisfied through simple arithmetic operations on the prefix sum ofChromosome
sizes andBin
s are generated on the fly as needed.This implementation has two main benefits:
Decoupling of
BinTable
resolution and memory requirementsLookups in constant or linear time complexity with performance independent of resolution.
Constructors
-
BinTable() = default;#
-
template<typename ChromIt>
BinTable(ChromIt first_chrom, ChromIt last_chrom, std::uint32_t bin_size, std::size_t bin_offset = 0);#
-
template<typename ChromNameIt, typename ChromSizeIt>
BinTable(ChromNameIt first_chrom_name, ChromNameIt last_chrom_name, ChromSizeIt first_chrom_size, std::uint32_t bin_size, std::size_t bin_offset = 0);#
Operators
Accessors
-
[[nodiscard]] std::size_t size() const noexcept;#
-
[[nodiscard]] bool empty() const noexcept;#
-
[[nodiscard]] std::size_t num_chromosomes() const;#
-
[[nodiscard]] constexpr std::uint32_t resolution() const noexcept;#
-
[[nodiscard]] constexpr const std::vector<std::uint64_t> &num_bin_prefix_sum() const noexcept;#
Iteration
-
[[nodiscard]] auto begin() const -> iterator;#
-
[[nodiscard]] auto end() const -> iterator;#
-
[[nodiscard]] auto cbegin() const -> iterator;#
-
[[nodiscard]] auto cend() const -> iterator;#
Slicing
-
[[nodiscard]] BinTable subset(const Chromosome &chrom) const;#
Lookup
-
[[nodiscard]] auto find_overlap(const GenomicInterval &query) const -> std::pair<BinTable::iterator, BinTable::iterator>;#
-
[[nodiscard]] auto find_overlap(const Chromosome &chrom, std::uint32_t start, std::uint32_t end) const -> std::pair<BinTable::iterator, BinTable::iterator>;#
-
[[nodiscard]] auto find_overlap(std::string_view chrom_name, std::uint32_t start, std::uint32_t end) const -> std::pair<BinTable::iterator, BinTable::iterator>;#
-
[[nodiscard]] auto find_overlap(std::uint32_t chrom_id, std::uint32_t start, std::uint32_t end) const -> std::pair<BinTable::iterator, BinTable::iterator>;#
-
[[nodiscard]] std::pair<Bin, Bin> at(const GenomicInterval &gi) const;#
-
[[nodiscard]] std::pair<std::uint64_t, std::uint64_t> map_to_bin_ids(const GenomicInterval &gi) const;#
Query bins by genomic interval.
-
[[nodiscard]] Bin at(const Chromosome &chrom, std::uint32_t pos = 0) const;#
-
[[nodiscard]] Bin at_hint(std::uint64_t bin_id, const Chromosome &chrom) const;#
Query by bin identifier.
-
[[nodiscard]] std::uint64_t map_to_bin_id(const Chromosome &chrom, std::uint32_t pos) const;#
-
[[nodiscard]] std::uint64_t map_to_bin_id(std::string_view chrom_name, std::uint32_t pos) const;#
-
[[nodiscard]] std::uint64_t map_to_bin_id(std::uint32_t chrom_id, std::uint32_t pos) const;#
Query by genomic coordinates
Others
-
[[nodiscard]] BinTableConcrete concretize() const;#
Pixels#
-
template<typename N>
class ThinPixel# Struct to model a genomic pixel using as little memory as possible.
Member variables
-
static constexpr auto null_id = std::numeric_limits<std::uint64_t>::max();#
Factory methods
Operators
-
[[nodiscard]] explicit operator bool() const noexcept;#
-
static constexpr auto null_id = std::numeric_limits<std::uint64_t>::max();#
-
class PixelCoordinates;#
Struct to model 2D genomic coordinates using a pair of
Bin
s.Member variables
Constructors
-
PixelCoordinates() = default;#
Operators
-
[[nodiscard]] explicit operator bool() const noexcept;#
-
[[nodiscard]] bool operator==(const PixelCoordinates &other) const noexcept;#
-
[[nodiscard]] bool operator!=(const PixelCoordinates &other) const noexcept;#
-
[[nodiscard]] bool operator<(const PixelCoordinates &other) const noexcept;#
-
[[nodiscard]] bool operator<=(const PixelCoordinates &other) const noexcept;#
-
[[nodiscard]] bool operator>(const PixelCoordinates &other) const noexcept;#
-
[[nodiscard]] bool operator>=(const PixelCoordinates &other) const noexcept;#
Accessors
-
[[nodiscard]] bool is_intra() const noexcept;#
-
PixelCoordinates() = default;#
-
template<typename N>
class Pixel# Struct to model genomic pixels as interaction counts associated to a pair of genomic
Bin
s.The main difference between
ThinPixel
andPixel
objects, is that the latter possessesall the knowledge required to map interactions to genomic coordinates, not just bin IDs.Member variables
-
PixelCoordinates coords{};#
Constructors
-
Pixel() = default;#
-
explicit Pixel(PixelCoordinates coords_, N count_ = 0) noexcept;#
-
Pixel(const Chromosome &chrom, std::uint32_t start, std::uint32_t end, N count_ = 0) noexcept;#
-
Pixel(const Chromosome &chrom1, std::uint32_t start1, std::uint32_t end1, const Chromosome &chrom2, std::uint32_t start2, std::uint32_t end2, N count_ = 0) noexcept;#
Factory methods
Operators
-
[[nodiscard]] explicit operator bool() const noexcept;#
Conversion
-
PixelCoordinates coords{};#