Utilities for handling chromosome name. | (ns cljam.util.chromosome (:require [clojure.string :as cstr] [proton.core :as proton])) |
(defn normalize-name [s] (-> s (cstr/replace #"[,\.]" "_") (cstr/replace #"[\"\']" ))) | |
Removes chr prefix from chromosome name. | (defn trim-chromosome-key [s] (cstr/replace s #"(?i)^chr" "")) |
(defn- split-version-suffix [s] (let [[_ base suffix _] (re-matches #"(.+?)((?i)v[0-9](_alt|_random)?)?" s)] [base suffix])) | |
(defn- normalize-chromosome-prefix [s] (if-let [[_ base leftover] (re-matches #"(?i)chr([0-9]{1,2}|X|Y|M|MT|Un)(.*)" s)] (let [base* (condp re-matches base #"\d+" (str (Integer/parseInt base)) #"(?i)Un" "Un" (cstr/upper-case base))] (str "chr" (str base* (cstr/upper-case leftover)))) s)) | |
(defn- prepend-chromosome-prefix [s] (if (re-matches #"(?i)([0-9]{1,2}|X|Y|M|MT|Un).*" s) (str "chr" s) s)) | |
Normalizes chromosome name. | (defn normalize-chromosome-key [s] (let [[base version-suffix] (split-version-suffix s)] (str (-> base normalize-name prepend-chromosome-prefix normalize-chromosome-prefix) (when version-suffix (cstr/lower-case version-suffix))))) |
(defn is-primary-chromosome? [s] (some? (re-matches #"^chr([0-9]{1,2}|X|Y|M|MT)" (normalize-chromosome-key s)))) | |
(defn chromosome-order-key [s] (if-let [[_ _ chr suffix] (re-find #"(?i)^(chr)?([1-9][0-9]*|X|Y|MT|M)(\S*)" s)] (if-let [num (proton/as-int chr)] [num suffix] [(- Integer/MAX_VALUE (case chr "X" 4 "Y" 3 "M" 2 "MT" 1)) suffix]) [Integer/MAX_VALUE s])) | |