I/O utilities.

(ns cljam.io.util
  (:require [cljam.io.protocols :as protocols]
            cljam.io.sam.reader
            cljam.io.sam.writer
            cljam.io.bam.reader
            cljam.io.bam.writer
            cljam.io.vcf.reader
            cljam.io.vcf.writer
            cljam.io.bcf.reader
            cljam.io.bcf.writer
            cljam.io.fasta.reader
            cljam.io.fasta.writer
            cljam.io.twobit.reader
            cljam.io.twobit.writer
            cljam.io.fastq
            cljam.io.bed
            cljam.io.wig
            cljam.io.bigwig
            [cljam.util :as util])
  (:import [java.nio ByteBuffer ByteOrder]))

Checks if given object implements protocol IAlignmentReader.

(defn alignment-reader?
  [rdr]
  (satisfies? protocols/IAlignmentReader rdr))

Checks if given object implements protocol IAlignmentWriter.

(defn alignment-writer?
  [wtr]
  (satisfies? protocols/IAlignmentWriter wtr))

Checks if given object is an instance of SAMReader.

(defn sam-reader?
  [rdr]
  (instance? cljam.io.sam.reader.SAMReader rdr))

Checks if given object is an instance of SAMWriter.

(defn sam-writer?
  [wtr]
  (instance? cljam.io.sam.writer.SAMWriter wtr))

Checks if given object is an instance of BAMReader.

(defn bam-reader?
  [rdr]
  (instance? cljam.io.bam.reader.BAMReader rdr))

Checks if given object is an instance of BAMWriter.

(defn bam-writer?
  [wtr]
  (instance? cljam.io.bam.writer.BAMWriter wtr))

Checks if given object implements protocol IVariantReader.

(defn variant-reader?
  [rdr]
  (satisfies? protocols/IVariantReader rdr))

Checks if given object implements protocol IVariantWriter.

(defn variant-writer?
  [wtr]
  (satisfies? protocols/IVariantWriter wtr))

Checks if given object is an instance of VCFReader.

(defn vcf-reader?
  [rdr]
  (instance? cljam.io.vcf.reader.VCFReader rdr))

Checks if given object is an instance of VCFWriter.

(defn vcf-writer?
  [wtr]
  (instance? cljam.io.vcf.writer.VCFWriter wtr))

Checks if given object is an instance of BCFReader.

(defn bcf-reader?
  [rdr]
  (instance? cljam.io.bcf.reader.BCFReader rdr))

Checks if given object is an instance of BCFWriter.

(defn bcf-writer?
  [wtr]
  (instance? cljam.io.bcf.writer.BCFWriter wtr))

Checks if given object implements protocol ISequenceReader.

(defn sequence-reader?
  [rdr]
  (satisfies? protocols/ISequenceReader rdr))

Checks if given object implements protocol ISequenceWriter.

(defn sequence-writer?
  [wtr]
  (satisfies? protocols/ISequenceWriter wtr))

Checks if given object is an instance of FASTAReader.

(defn fasta-reader?
  [rdr]
  (instance? cljam.io.fasta.reader.FASTAReader rdr))

Checks if given object is an instance of FASTAWriter.

(defn fasta-writer?
  [wtr]
  (instance? cljam.io.fasta.writer.FASTAWriter wtr))

Checks if given object is an instance of TwoBitReader.

(defn twobit-reader?
  [rdr]
  (instance? cljam.io.twobit.reader.TwoBitReader rdr))

Checks if given object is an instance of TwoBitWriter.

(defn twobit-writer?
  [wtr]
  (instance? cljam.io.twobit.writer.TwoBitWriter wtr))

Checks if given object is an instance of FASTQReader.

(defn fastq-reader?
  [rdr]
  (instance? cljam.io.fastq.FASTQReader rdr))

Checks if given object is an instance of FASTQWriter

(defn fastq-writer?
  [wtr]
  (instance? cljam.io.fastq.FASTQWriter wtr))

Checks if given object is an instance of BEDReader.

(defn bed-reader?
  [rdr]
  (instance? cljam.io.bed.BEDReader rdr))

Checks if given object is an instance of BEDWriter.

(defn bed-writer?
  [wtr]
  (instance? cljam.io.bed.BEDWriter wtr))

Checks if given object is an instance of WIGReader.

(defn wig-reader?
  [rdr]
  (instance? cljam.io.wig.WIGReader rdr))

Checks if given object is an instance of WIGWriter.

(defn wig-writer?
  [wtr]
  (instance? cljam.io.wig.WIGWriter wtr))

Checks if given object is an instance of BIGWIGReader.

(defn bigwig-reader?
  [rdr]
  (instance? cljam.io.bigwig.BIGWIGReader rdr))

Detects a file format from a path of f, returning a keyword representing the format. Throws an exception if an unsupported file is supplied.

(defn file-type
  [f]
  (condp re-find (.getPath (util/as-url f))
    #"(?i)\.sam$" :sam
    #"(?i)\.bai$" :bai
    #"(?i)\.bam$" :bam
    #"(?i)\.f(ast)?q" :fastq
    #"(?i)\.fai$" :fai
    #"(?i)\.(fa|fasta|fas|fsa|seq|fna|faa|ffn|frn|mpfa)" :fasta
    #"(?i)\.2bit$" :2bit
    #"(?i)\.tbi$" :tbi
    #"(?i)\.vcf" :vcf
    #"(?i)\.bcf$" :bcf
    #"(?i)\.bed" :bed
    #"(?i)\.gff3?" :gff
    #"(?i)\.wig" :wig
    #"(?i)\.(bigWig|bw)" :bigwig
    (throw (IllegalArgumentException. "Invalid file type"))))

Tries to detect a file format based on contents of the byte array ba. The input byte array must be larger than 4 bytes. Note that detection of some formats #{:fasta :fastq :wig :fai :bed} is based on naive heuristics and thus can fail.

(defn file-type-from-bytes
  [^bytes ba]
  (let [s (String. ba 0 (Math/min (int 64) (alength ba)))
        i (-> (ByteBuffer/wrap ba)
              (.order ByteOrder/LITTLE_ENDIAN)
              (.getInt)
              (Integer/toUnsignedLong))]
    (case i
      0x888ffc26 :bigwig
      (0x1A412743 0x4327411A) :2bit
      (condp re-find s
        #"^BAM\01" :bam
        #"^BAI\01" :bai
        #"^BCF\02" :bcf
        #"^TBI\01" :tbi
        #"^##fileformat=VCF" :vcf
        #"^##gff-version 3" :gff
        #"^@HD\t" :sam
        #"^@SQ\t" :sam
        #"^@RG\t" :sam
        #"^@PG\t" :sam
        #"^@CO\t" :sam
        #"^>\S" :fasta
        #"^@\S" :fastq
        #"(variable|fixed)Step" :wig
        #"^\S+(?:\t\d+){4}\n" :fai
        #"(?m)^\S+( |\t)\d+( |\t)\d+( |\t|$)" :bed
        nil))))

Detects a file format based on contents of the input file f. Causes a side effect of reading some header bytes.

(defn file-type-from-contents
  [f]
  (let [ba (byte-array 64)]
    (with-open [is (util/compressor-input-stream f)]
      (.read is ba))
    (file-type-from-bytes ba)))