(ns cljam.io.util.bgzf
(:refer-clojure :exclude [compare])
(:require [clojure.java.io :as cio])
(:import [java.io File]
[java.net MalformedURLException URI URL]
[bgzf4j BGZFInputStream BGZFOutputStream])) | |
(defprotocol BGZFIOFactory (make-bgzf-input-stream [x]) (make-bgzf-output-stream [x])) | |
(extend-protocol BGZFIOFactory
File
(make-bgzf-input-stream [^File x]
(BGZFInputStream. x))
(make-bgzf-output-stream [^File x]
(BGZFOutputStream. x))
URL
(make-bgzf-input-stream [^URL x]
(if (= (.getProtocol x) "file")
(make-bgzf-input-stream (cio/as-file x))
(BGZFInputStream. x)))
(make-bgzf-output-stream [^URL x]
(if (= (.getProtocol x) "file")
(make-bgzf-output-stream (cio/as-file x))
(throw (IllegalArgumentException. (str "Can not write to non-file URL <" x ">")))))
URI
(make-bgzf-input-stream [^URI x]
(make-bgzf-input-stream (.toURL x)))
(make-bgzf-output-stream [^URI x]
(make-bgzf-output-stream (.toURL x)))
String
(make-bgzf-input-stream [x]
(try
(make-bgzf-input-stream (URL. x))
(catch MalformedURLException _
(make-bgzf-input-stream (cio/as-file x)))))
(make-bgzf-output-stream [x]
(try
(make-bgzf-output-stream (URL. x))
(catch MalformedURLException _
(make-bgzf-output-stream (cio/as-file x)))))) | |
Make a bgzf input stream from a String, File, URL, etc. | (defn bgzf-input-stream ^BGZFInputStream [x] (make-bgzf-input-stream x)) |
Make a bgzf output stream from a String, File, URL, etc. | (defn bgzf-output-stream ^BGZFOutputStream [x] (make-bgzf-output-stream x)) |
(def ^:private ^:const shift-amount 16) | |
(def ^:private ^:const address-mask 0xFFFFFFFFFFFF) | |
(def ^:private ^:const offset-mask 0xFFFF) | |
Negative if fp1 is earlier in file than fp2, positive if it is later, 0 if equal. | (defn compare
^long
[^long fp1 ^long fp2]
(cond
(= fp1 fp2) 0
;; When treating as unsigned, negative number is > positive.
(and (< fp1 0) (>= fp2 0)) 1
(and (>= fp1 0) (< fp2 0)) -1
;; Either both negative or both non-negative, so regular comparison works.
(< fp1 fp2) -1
:else 1)) |
File offset of start of BGZF block for this file pointer. | (defn get-block-address ^long [^long fp] (bit-and (bit-shift-right fp shift-amount) address-mask)) |
Offset into uncompressed block for this virtual file pointer. | (defn get-block-offset ^long [^long fp] (bit-and fp offset-mask)) |
Returns true if fp2 points to somewhere in the same BGZF block, or the one immediately following fp1's BGZF block. | (defn same-or-adjacent-blocks?
[^long fp1 ^long fp2]
(let [block1 (long (get-block-address fp1))
block2 (long (get-block-address fp2))]
(or (= block1 block2) (= (inc block1) block2)))) |
(defn- bgzip-header?
[^bytes b]
(and (<= 16 (alength b))
(= (unchecked-byte 0x1f) (aget b 0))
(= (unchecked-byte 0x8b) (aget b 1))
(bit-test (aget b 3) 2) ;; FEXTRA
(= (byte (int \B)) (aget b 12)) ;; SI1
(= (byte (int \C)) (aget b 13)) ;; SI2
(= 2 (aget b 14)) ;; LEN
(zero? (aget b 15)))) | |
Checks if a given file is bgzipped or not. | (defn bgzip?
[f]
(let [buf (byte-array 16)]
(with-open [r (cio/input-stream f)]
(loop [off 0
len (alength buf)]
(let [n (.read r buf off len)]
(cond
(neg? n) false
(< n len) (recur (+ off n) (- len n))
:else (bgzip-header? buf))))))) |