(ns cljam.io.cram.decode.data-series (:require [cljam.io.cram.itf8 :as itf8] [cljam.io.cram.bit-stream :as bs] [cljam.io.util.byte-buffer :as bb] [clojure.string :as str]) (:import [java.nio Buffer ByteBuffer])) | |
(defn- data-series-type [ds] (case ds (:BF :CF :RI :RL :AP :RG :MF :NS :NP :TS :NF :TL :FN :FP :DL :RS :PD :HC :MQ) :int (:FC :BS :BA :QS) :byte (:RN :BB :QQ :IN :SC) :bytes)) | |
(defn- build-codec-decoder [{:keys [codec] :as params} data-type bs-decoder content-id->block-data] (case codec :external (let [^ByteBuffer block (get content-id->block-data (:content-id params))] (case data-type :byte #(.get block) :int #(itf8/decode-itf8 block))) :huffman (let [{:keys [alphabet bit-len]} params] (assert (and (= (count alphabet) 1) (zero? (long (first bit-len)))) "Huffman coding for more than one word is not supported yet.") (constantly (first alphabet))) :byte-array-len (let [{:keys [len-encoding val-encoding]} params len-decoder (build-codec-decoder len-encoding :int bs-decoder content-id->block-data) val-decoder (build-codec-decoder val-encoding :byte bs-decoder content-id->block-data)] (fn [] (let [len (len-decoder) bb (bb/allocate-lsb-byte-buffer len)] (dotimes [_ len] (.put bb (byte (val-decoder)))) (.array bb)))) :byte-array-stop (let [{:keys [stop-byte external-id]} params ^ByteBuffer block (get content-id->block-data external-id)] (fn [] (.mark ^Buffer block) (let [start (.position block) end (long (loop [] (if (= (.get block) (byte stop-byte)) (.position block) (recur)))) len (dec (- end start)) _ (.reset ^Buffer block) ret (bb/read-bytes block len)] (.get block) ret))) :beta (let [{:keys [offset length]} params] (fn [] (+ (long (bs/read-bits bs-decoder (long length))) (long offset)))))) | |
Builds decoders for data series based on the encodings specified in the given compression header and block data.
| (defn build-data-series-decoders [{ds-encodings :data-series} bs-decoder blocks] (let [content-id->block-data (into {} (map (juxt :content-id :data)) blocks)] (reduce-kv (fn [decoders ds params] (let [dt (data-series-type ds) decoder (build-codec-decoder params dt bs-decoder content-id->block-data)] (assoc decoders ds decoder))) {} ds-encodings))) |
(defn- tag-value-coercer [tag-type] (case tag-type \A #(char (.get ^ByteBuffer %)) \c #(.get ^ByteBuffer %) \C bb/read-ubyte \s #(.getShort ^ByteBuffer %) \S bb/read-ushort \i #(.getInt ^ByteBuffer %) \I bb/read-uint \f #(.getFloat ^ByteBuffer %) \Z bb/read-null-terminated-string \H (fn [^ByteBuffer bb] (let [s (.getBytes ^String (bb/read-null-terminated-string bb)) n (quot (alength s) 2) arr (byte-array n)] (dotimes [i n] (let [b (bit-or (bit-shift-left (Character/digit (aget s (* 2 i)) 16) 4) (Character/digit (aget s (inc (* 2 i))) 16))] (aset arr i (byte b)))) arr)) \B (fn [^ByteBuffer bb] (let [tag-type' (char (.get bb)) len (.getInt bb) coercer (tag-value-coercer tag-type') vs (repeatedly len (partial coercer bb))] (str/join \, (cons tag-type' vs)))))) | |
(defn- build-tag-decoder [tag-encoding tag-type bs-decoder content-id->block-data] (let [decoder (build-codec-decoder tag-encoding :bytes bs-decoder content-id->block-data) coercer (tag-value-coercer tag-type)] (fn [] (let [bb (bb/make-lsb-byte-buffer (decoder))] (coercer bb))))) | |
Builds decoders for tags based on the encodings specified in the given compression header and block data.
| (defn build-tag-decoders [{:keys [tags]} bs-decoder blocks] (let [content-id->block-data (into {} (map (juxt :content-id :data)) blocks)] (reduce-kv (fn [decoders tag m] (reduce-kv (fn [decoders tag-type encoding] (let [decoder (build-tag-decoder encoding tag-type bs-decoder content-id->block-data) tag-type' (str (if (#{\c \C \s \S \i \I} tag-type) \i tag-type))] (assoc-in decoders [tag tag-type] (fn [] {:type tag-type' :value (decoder)})))) decoders m)) {} tags))) |