Reader of BAM file format. | (ns cljam.io.bam.reader (:require [cljam.io.protocols :as protocols] [cljam.io.sam.util.refs :as refs] [cljam.io.sam.util.header :as header] [cljam.io.bam-index.core :as bai] [cljam.io.bam.decoder :as decoder] [cljam.io.util.lsb.data-io :as lsb]) (:import [java.io Closeable FileNotFoundException] [cljam.io.bam.decoder BAMRawBlock] [bgzf4j BGZFInputStream])) |
(declare read-blocks-sequentially* read-blocks-randomly*) | |
BAMReader | |
(deftype BAMReader [url header refs reader data-reader index-delay start-pos] Closeable (close [this] (.close ^Closeable (.reader this))) protocols/IReader (reader-url [this] (.url this)) (read [this] (protocols/read this {})) (read [this region] (protocols/read-alignments this region)) (indexed? [this] (try @(.index-delay this) true (catch FileNotFoundException _ false))) protocols/IAlignmentReader (read-header [this] (.header this)) (read-refs [this] (.refs this)) (read-alignments [this] (protocols/read-alignments this {})) (read-alignments [this {:keys [chr start end] :or {start 1 end Long/MAX_VALUE}}] (let [decoder (partial decoder/decode-alignment (.refs this))] (if (nil? chr) (read-blocks-sequentially* this 64 decoder) (read-blocks-randomly* this chr start end 64 decoder)))) (read-blocks [this] (protocols/read-blocks this {} {})) (read-blocks [this region] (protocols/read-blocks this region {})) (read-blocks [this {:keys [chr start end] :or {start 1 end Long/MAX_VALUE}} {:keys [mode chunk-size] :or {mode :normal chunk-size 64}}] (let [decoder (if (fn? mode) mode (case mode :normal decoder/raw-block :region decoder/decode-region-block :coordinate decoder/decode-coordinate-block :queryname decoder/decode-queryname-block :pointer decoder/decode-pointer-block))] (if (nil? chr) (read-blocks-sequentially* this chunk-size decoder) (read-blocks-randomly* this chr start end chunk-size decoder)))) protocols/IRegionReader (read-in-region [this region] (protocols/read-in-region this region {})) (read-in-region [this region _] (protocols/read-alignments this region))) | |
Reads a single alignment block from a reader. | (defn- read-a-block! ^"[B" [rdr] (let [block-size (lsb/read-int rdr)] (lsb/read-bytes rdr block-size))) |
Reads alignment blocks until reaches to the finish pointer or EOF. | (defn- read-to-finish ([^BAMReader rdr ^long chunk-size] (let [r ^BGZFInputStream (.-reader rdr) dr (.-data-reader rdr)] (letfn [(step [^long start] (lazy-seq (let [buf (chunk-buffer chunk-size)] (loop [i chunk-size, start start] (if (> i 0) (if (zero? (.available r)) (chunk-cons (chunk buf) nil) (let [data (read-a-block! dr) curr (.getFilePointer r)] (chunk-append buf (BAMRawBlock. data start curr)) (recur (dec i) curr))) (chunk-cons (chunk buf) (step start)))))))] (step (.getFilePointer r))))) ([^BAMReader rdr ^long start ^long finish ^long chunk-size] (let [r ^BGZFInputStream (.reader rdr) dr (.data-reader rdr)] (letfn [(step [^long start] (lazy-seq (let [buf (chunk-buffer chunk-size)] (loop [i chunk-size, start start] (.seek r start) (if (> i 0) (if (and (< start finish) (> (.available r) 0)) (let [data (read-a-block! dr) curr (.getFilePointer r)] (chunk-append buf (BAMRawBlock. data start curr)) (recur (dec i) curr)) (chunk-cons (chunk buf) nil)) (chunk-cons (chunk buf) (step start)))))))] (.seek r start) (step start))))) |
Reads blocks sequentially from current position. Returns an eduction of decoded blocks. | (defn- read-blocks-sequentially* [^BAMReader rdr chunk-size decoder] (eduction (keep decoder) (read-to-finish rdr chunk-size))) |
Reads blocks crossing the given range using BAM index. Returns an eduction of decoded blocks. | (defn- read-blocks-randomly* [^BAMReader rdr chr start end chunk-size decoder] (let [bai @(.index-delay rdr)] (if (= chr "*") (do (.seek ^BGZFInputStream (.reader rdr) (ffirst (bai/get-unplaced-spans bai))) (read-blocks-sequentially* rdr chunk-size decoder)) (let [refs (.refs rdr)] (->> (bai/get-spans bai (refs/ref-id refs chr) start end) (eduction (comp (mapcat (fn [[begin finish]] (read-to-finish rdr begin finish chunk-size))) (keep #(decoder % start end))))))))) |
Reads header section of BAM file and returns it as a map. | (defn load-headers [rdr] (let [header (header/parse-header (lsb/read-string rdr (lsb/read-int rdr))) n-ref (int (lsb/read-int rdr)) refs (loop [i n-ref, ret []] (if (zero? i) ret (let [l-name (int (lsb/read-int rdr)) name' (lsb/read-string rdr l-name) l-ref (lsb/read-int rdr)] (recur (dec i) (conj ret {:name (subs name' 0 (dec l-name)) :len l-ref})))))] {:header header :refs refs})) |