(ns cljam.io.sam.reader (:require [clojure.java.io :as cio] [clojure.tools.logging :as logging] [cljam.io.sam.util :as sam-util] [cljam.io.sam.util.refs :as refs] [cljam.io.sam.util.header :as header] [cljam.io.protocols :as protocols] [cljam.util :as util]) (:import [java.io BufferedReader Closeable] [cljam.io.protocols SAMCoordinateBlock SAMQuerynameBlock])) | |
(declare read-alignments* read-blocks* read-alignments-in-region*) | |
reader | |
(deftype SAMReader [url header reader] Closeable (close [this] (.close ^Closeable (.reader this))) protocols/IReader (reader-url [this] (.url this)) (read [this] (protocols/read this {})) (read [this region] (protocols/read-alignments this region)) (indexed? [_] false) protocols/IRegionReader (read-in-region [this region] (protocols/read-in-region this region {})) (read-in-region [this region _] (read-alignments-in-region* this region)) protocols/IAlignmentReader (read-header [this] (.header this)) (read-refs [this] (vec (refs/make-refs (.header this)))) (read-alignments [this] (protocols/read-alignments this {})) (read-alignments [this {:keys [chr start end] :as region}] (if (or chr start end) (read-alignments-in-region* this region) (read-alignments* this))) (read-blocks [this] (protocols/read-blocks this {})) (read-blocks [this region] (protocols/read-blocks this region {})) (read-blocks [this _ option] (read-blocks* this option))) | |
(defn- read-alignments* [^SAMReader sam-reader] (eduction (comp (drop-while (fn [[f]] (= f \@))) (map sam-util/parse-alignment)) (line-seq (.reader sam-reader)))) | |
(defn- read-alignments-in-region* [^SAMReader sam-reader {:keys [chr start end]}] (logging/warn "May cause degradation of performance.") (eduction (filter (fn [a] (and (if chr (= (:rname a) chr) true) (if start (<= (long start) (sam-util/get-end a)) true) (if end (<= (long (:pos a)) (long end)) true)))) (read-alignments* sam-reader))) | |
(defn- parse-coordinate [rname->ref-id ^String line] (let [t0 (.indexOf line (int \tab) 0) t1 (.indexOf line (int \tab) (unchecked-inc t0)) t2 (.indexOf line (int \tab) (unchecked-inc t1)) t3 (.indexOf line (int \tab) (unchecked-inc t2)) flag (Integer/parseInt (.substring line (unchecked-inc t0) t1)) rname (.substring line (unchecked-inc t1) t2) pos (Integer/parseInt (.substring line (unchecked-inc t2) t3))] (SAMCoordinateBlock. line (rname->ref-id rname 0) pos flag))) | |
(defn- parse-qname [^String line] (let [t0 (.indexOf line (int \tab) 0) t1 (.indexOf line (int \tab) (unchecked-inc t0)) qname (.substring line 0 t0) flag (Integer/parseInt (.substring line (unchecked-inc t0) t1))] (SAMQuerynameBlock. line qname flag))) | |
(defn- read-blocks* [^SAMReader sam-reader {:keys [mode] :or {mode :normal}}] (let [parse-fn (if (fn? mode) mode (case mode :normal (fn [line] {:data line}) :coordinate (->> (.header sam-reader) :SQ (into {"*" -1} (map-indexed (fn [i {:keys [SN]}] [SN i]))) (partial parse-coordinate)) :queryname parse-qname))] (eduction (comp (drop-while (fn [[f]] (= f \@))) (map parse-fn)) (line-seq (.reader sam-reader))))) | |
(defn- read-header* [^BufferedReader rdr] (->> (line-seq rdr) (transduce (comp (take-while (fn [line] (= (first line) \@))) (map header/parse-header-line)) header/into-header))) | |
Returns an open instance of | (defn reader [f] (let [header (with-open [r (cio/reader f)] (read-header* r))] (->SAMReader (util/as-url f) header (cio/reader f)))) |