(ns cljam.io.cram.decode.structure
  (:require [cljam.io.cram.codecs.rans4x8 :as rans]
            [cljam.io.cram.itf8 :as itf8]
            [cljam.io.sam.util.header :as sam.header]
            [cljam.io.util.byte-buffer :as bb])
  (:import [java.io ByteArrayInputStream IOException]
           [java.nio Buffer ByteBuffer ByteOrder]
           [java.util Arrays]
           [org.apache.commons.compress.compressors CompressorStreamFactory]))
(def ^:private ^:const cram-magic "CRAM")
(defn- decode-itf8-array [bb]
  (let [n (itf8/decode-itf8 bb)]
    (loop [i n, acc (transient [])]
      (if (zero? i)
        (persistent! acc)
        (recur (dec i) (conj! acc (itf8/decode-itf8 bb)))))))
(defn- decode-encoding [^ByteBuffer bb]
  (let [codec-id (itf8/decode-itf8 bb)
        _n (itf8/decode-itf8 bb)]
    (case codec-id
      0 {:codec :null}
      1 (let [content-id (itf8/decode-itf8 bb)]
          {:codec :external
           :content-id content-id})
      3 (let [alphabet (decode-itf8-array bb)
              bit-len (decode-itf8-array bb)]
          {:codec :huffman, :alphabet alphabet, :bit-len bit-len})
      4 (let [len-encoding (decode-encoding bb)
              val-encoding (decode-encoding bb)]
          {:codec :byte-array-len, :len-encoding len-encoding, :val-encoding val-encoding})
      5 (let [stop-byte (.get bb)
              external-id (itf8/decode-itf8 bb)]
          {:codec :byte-array-stop, :stop-byte stop-byte, :external-id external-id})
      6 (let [offset (itf8/decode-itf8 bb)
              length (itf8/decode-itf8 bb)]
          {:codec :beta, :offset offset, :length length})
      7 (let [offset (itf8/decode-itf8 bb)
              k (itf8/decode-itf8 bb)]
          {:codec :subexp, :offset offset, :k k})
      9 (let [offset (itf8/decode-itf8 bb)]
          {:codec :gamma, :offset offset})
      (throw (ex-info (str "codec " codec-id " not supported") {})))))

Decodes the CRAM file definition from the given byte buffer.

(defn decode-file-definition
  [bb]
  (when-not (Arrays/equals ^bytes (bb/read-bytes bb 4) (.getBytes cram-magic))
    (throw (IOException. "Invalid CRAM file")))
  (let [major (bb/read-ubyte bb)
        minor (bb/read-ubyte bb)
        file-id (String. ^bytes (bb/read-bytes bb 20))]
    {:version {:major major :minor minor}, :id file-id}))

Decodes a container header from the given byte buffer.

(defn decode-container-header
  [^ByteBuffer bb]
  (let [len (.getInt bb)
        ref-seq-id (itf8/decode-itf8 bb)
        start-pos (itf8/decode-itf8 bb)
        span (itf8/decode-itf8 bb)
        n-records (itf8/decode-itf8 bb)
        counter (itf8/decode-ltf8 bb)
        n-bases (itf8/decode-ltf8 bb)
        n-blocks (itf8/decode-itf8 bb)
        landmarks (decode-itf8-array bb)
        crc (bb/read-bytes bb 4)]
    {:length len
     :ref-seq-id ref-seq-id
     :start start-pos
     :span span
     :records n-records
     :counter counter
     :bases n-bases
     :blocks n-blocks
     :landmarks landmarks
     :crc crc}))

Returns true iff the given container header represents an EOF container.

(defn eof-container?
  [container-header]
  (and (= (:length container-header) 15)
       (= (:ref-seq-id container-header) -1)
       (= (:start container-header) 4542278)
       (= (:span container-header) 0)
       (= (:records container-header) 0)
       (= (:counter container-header) 0)
       (= (:bases container-header) 0)
       (= (:blocks container-header) 1)
       (= (:landmarks container-header) [])))
(defn- split-buffer ^ByteBuffer [^ByteBuffer bb size]
  (let [^Buffer bb' (.order (.slice bb) ByteOrder/LITTLE_ENDIAN)]
    (bb/skip bb size)
    (.limit bb' size)))
(def ^:private decode-block-data
  (let [factory (CompressorStreamFactory.)]
    (fn [^ByteBuffer bb ^long method ^long size ^long raw-size]
      (if (zero? size)
        (bb/allocate-lsb-byte-buffer 0)
        (case method
          0 (split-buffer bb size)
          4 (->> (split-buffer bb size)
                 rans/decode
                 bb/make-lsb-byte-buffer)
          (let [compressed (bb/read-bytes bb size)
                bais (ByteArrayInputStream. compressed)
                uncompressed (byte-array raw-size)
                bb' (bb/make-lsb-byte-buffer uncompressed)
                compressor (case method
                             1 CompressorStreamFactory/GZIP
                             2 CompressorStreamFactory/BZIP2
                             3 CompressorStreamFactory/LZMA
                             (throw
                              (ex-info (str "compression method " method
                                            " not supported")
                                       {:method method})))]
            (with-open [is (.createCompressorInputStream factory compressor bais)]
              (.read is uncompressed)
              bb')))))))

Decodes a block from the given byte buffer.

(defn decode-block
  [bb]
  (let [method (bb/read-ubyte bb)
        content-type-id (bb/read-ubyte bb)
        content-id (itf8/decode-itf8 bb)
        size (itf8/decode-itf8 bb)
        raw-size (itf8/decode-itf8 bb)
        data (decode-block-data bb method size raw-size)
        crc (bb/read-bytes bb 4)]
    {:method method
     :content-type content-type-id
     :content-id content-id
     :size size
     :raw-size raw-size
     :data data
     :crc crc}))

Decodes a CRAM header block from the given byte buffer.

(defn decode-cram-header-block
  [bb]
  (let [{bb' :data} (decode-block bb)
        size (bb/read-uint bb')]
    (sam.header/parse-header (String. ^bytes (bb/read-bytes bb' size)))))
(defn- decode-substitution-matrix [bb]
  (let [bs (bb/read-bytes bb 5)
        all-bases [\A \C \G \T \N]]
    (into {}
          (map (fn [r ^long b]
                 [r (zipmap [(bit-and (bit-shift-right b 6) 0x3)
                             (bit-and (bit-shift-right b 4) 0x3)
                             (bit-and (bit-shift-right b 2) 0x3)
                             (bit-and b 0x3)]
                            (remove #{r} all-bases))])
               all-bases bs))))
(defn- decode-tag-dictionary [^ByteBuffer bb]
  (let [n (itf8/decode-itf8 bb)
        bb' (split-buffer bb n)
        decode-tags (fn [bb]
                      (loop [acc (transient [])]
                        (let [c1 (long (bb/read-ubyte bb))]
                          (if (zero? c1)
                            (persistent! acc)
                            (let [c2 (bb/read-ubyte bb)
                                  t (bb/read-ubyte bb)
                                  tag (keyword (str (char c1) (char c2)))]
                              (recur (conj! acc {:tag tag, :type (char t)})))))))]
    (loop [acc (transient [])]
      (if (.hasRemaining bb')
        (recur (conj! acc (decode-tags bb')))
        (persistent! acc)))))
(defn- decode-preservation-map [^ByteBuffer bb]
  (let [_size (itf8/decode-itf8 bb)
        n (itf8/decode-itf8 bb)]
    (loop [i n, acc (transient {:RN true, :AP true, :RR true})]
      (if (zero? i)
        (persistent! acc)
        (let [k (keyword (String. ^bytes (bb/read-bytes bb 2)))
              v (case k
                  (:RN :AP :RR) (pos? (.get bb))
                  :SM (decode-substitution-matrix bb)
                  :TD (decode-tag-dictionary bb))]
          (recur (dec i) (assoc! acc k v)))))))
(defn- decode-data-series-encodings [bb]
  (let [_size (itf8/decode-itf8 bb)
        n (itf8/decode-itf8 bb)]
    (loop [n n, acc (transient {})]
      (if (zero? n)
        (persistent! acc)
        (let [k (keyword (String. ^bytes (bb/read-bytes bb 2)))
              v (decode-encoding bb)]
          (recur (dec n) (assoc! acc k v)))))))
(defn- decode-tag-encoding-map [bb]
  (let [_size (itf8/decode-itf8 bb)
        n (itf8/decode-itf8 bb)]
    (loop [i n, acc {}]
      (if (zero? i)
        acc
        (let [k (itf8/decode-itf8 bb)
              c1 (char (bit-and (bit-shift-right k 16) 0xff))
              c2 (char (bit-and (bit-shift-right k 8) 0xff))
              t (char (bit-and k 0xff))
              v (decode-encoding bb)
              tag (keyword (str c1 c2))]
          (recur (dec i) (assoc-in acc [tag t] v)))))))

Decodes a compression header block from the given byte buffer.

(defn decode-compression-header-block
  [bb]
  (let [{bb' :data} (decode-block bb)
        preservation-map (decode-preservation-map bb')
        data-series-encodings (decode-data-series-encodings bb')
        tag-encoding-map (decode-tag-encoding-map bb')]
    {:preservation-map preservation-map
     :data-series data-series-encodings
     :tags tag-encoding-map}))

Decodes a slice header block from the given byte buffer.

(defn decode-slice-header-block
  [bb]
  (let [{bb' :data} (decode-block bb)
        ref-seq-id (itf8/decode-itf8 bb')
        start (itf8/decode-itf8 bb')
        span (itf8/decode-itf8 bb')
        n-records (itf8/decode-itf8 bb')
        counter (itf8/decode-ltf8 bb')
        n-blocks (itf8/decode-itf8 bb')
        content-ids (decode-itf8-array bb')
        embedded-reference (itf8/decode-itf8 bb')
        reference-md5 (bb/read-bytes bb' 16)
        tags []]
    {:ref-seq-id ref-seq-id
     :start start
     :span span
     :records n-records
     :counter counter
     :blocks n-blocks
     :content-ids content-ids
     :embedded-reference embedded-reference
     :reference-md5 reference-md5
     :tags tags}))