(ns cljam.io.util.bgzf (:refer-clojure :exclude [compare]) (:require [clojure.java.io :as cio]) (:import [java.io File] [java.net MalformedURLException URI URL] [bgzf4j BGZFInputStream BGZFOutputStream])) | |
(defprotocol BGZFIOFactory (make-bgzf-input-stream [x]) (make-bgzf-output-stream [x])) | |
(extend-protocol BGZFIOFactory File (make-bgzf-input-stream [^File x] (BGZFInputStream. x)) (make-bgzf-output-stream [^File x] (BGZFOutputStream. x)) URL (make-bgzf-input-stream [^URL x] (if (= (.getProtocol x) "file") (make-bgzf-input-stream (cio/as-file x)) (BGZFInputStream. x))) (make-bgzf-output-stream [^URL x] (if (= (.getProtocol x) "file") (make-bgzf-output-stream (cio/as-file x)) (throw (IllegalArgumentException. (str "Can not write to non-file URL <" x ">"))))) URI (make-bgzf-input-stream [^URI x] (make-bgzf-input-stream (.toURL x))) (make-bgzf-output-stream [^URI x] (make-bgzf-output-stream (.toURL x))) String (make-bgzf-input-stream [x] (try (make-bgzf-input-stream (URL. x)) (catch MalformedURLException _ (make-bgzf-input-stream (cio/as-file x))))) (make-bgzf-output-stream [x] (try (make-bgzf-output-stream (URL. x)) (catch MalformedURLException _ (make-bgzf-output-stream (cio/as-file x)))))) | |
Make a bgzf input stream from a String, File, URL, etc. | (defn bgzf-input-stream ^BGZFInputStream [x] (make-bgzf-input-stream x)) |
Make a bgzf output stream from a String, File, URL, etc. | (defn bgzf-output-stream ^BGZFOutputStream [x] (make-bgzf-output-stream x)) |
(def ^:private ^:const shift-amount 16) | |
(def ^:private ^:const address-mask 0xFFFFFFFFFFFF) | |
(def ^:private ^:const offset-mask 0xFFFF) | |
Negative if fp1 is earlier in file than fp2, positive if it is later, 0 if equal. | (defn compare ^long [^long fp1 ^long fp2] (cond (= fp1 fp2) 0 ;; When treating as unsigned, negative number is > positive. (and (< fp1 0) (>= fp2 0)) 1 (and (>= fp1 0) (< fp2 0)) -1 ;; Either both negative or both non-negative, so regular comparison works. (< fp1 fp2) -1 :else 1)) |
File offset of start of BGZF block for this file pointer. | (defn get-block-address ^long [^long fp] (bit-and (bit-shift-right fp shift-amount) address-mask)) |
Offset into uncompressed block for this virtual file pointer. | (defn get-block-offset ^long [^long fp] (bit-and fp offset-mask)) |
Returns true if fp2 points to somewhere in the same BGZF block, or the one immediately following fp1's BGZF block. | (defn same-or-adjacent-blocks? [^long fp1 ^long fp2] (let [block1 (long (get-block-address fp1)) block2 (long (get-block-address fp2))] (or (= block1 block2) (= (inc block1) block2)))) |
(defn- bgzip-header? [^bytes b] (and (<= 16 (alength b)) (= (unchecked-byte 0x1f) (aget b 0)) (= (unchecked-byte 0x8b) (aget b 1)) (bit-test (aget b 3) 2) ;; FEXTRA (= (byte (int \B)) (aget b 12)) ;; SI1 (= (byte (int \C)) (aget b 13)) ;; SI2 (= 2 (aget b 14)) ;; LEN (zero? (aget b 15)))) | |
Checks if a given file is bgzipped or not. | (defn bgzip? [f] (let [buf (byte-array 16)] (with-open [r (cio/input-stream f)] (loop [off 0 len (alength buf)] (let [n (.read r buf off len)] (cond (neg? n) false (< n len) (recur (+ off n) (- len n)) :else (bgzip-header? buf))))))) |