;; A Very Gentle Introduction to Information Theory : Part II ;; Entropy and Huffman Coding ;; Here's the essential code from part I, which I'm not going to explain again: (defn random-stream [P] (let [pseq (vec (mapcat (fn[[k v]](repeat v k )) P))] (for [i (range)] (rand-nth pseq)))) (defn cost [encoder decoder message] (let [coded (encoder message)] (if (= (decoder coded) message) (count coded) :fail))) (def unfair-pairs {:HH 9, :HT 3, :TH 3, :TT 1}) ;; We're trying to transmit the output of the random process represented by: (def stream (random-stream unfair-pairs)) (take 20 stream) ;(:HH :HH :HH :HH :HH :HH :HH :HH :HT :HH :HH :TH :HH :HH :HH :TT :HH :HH :HT :HT) ;; And we're using the code HH -> 1, HT ->01 TH->001, TT-> 000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Our code seems to have something of a tree structure ;; 1-> :HH ;; 0-> 1 -> :HT ;; 0 -> 1 -> :TH ;; 0 -> :TT ;; Let's see if we can find some way of expressing that, so that we don't have to hand-code a decoder ;; for every different code. (def code-tree [ :HH [ :HT [ :TH :TT]]]) (defn decoder ([code-tree stream] (decoder code-tree code-tree stream)) ([current-code-tree code-tree stream] (lazy-seq (if (keyword? current-code-tree) (cons current-code-tree (decoder code-tree code-tree stream)) (if-let [stream (seq stream)] (if (= (first stream) 1) (decoder (first current-code-tree) code-tree (rest stream)) (decoder (second current-code-tree) code-tree (rest stream)))))))) (decoder code-tree '(0 1 1 0 1 0 1 1 0 0 0)) ;(:HT :HH :HT :HT :HH :TT) ;; A general encoder, by comparison, is fairly straightforward: (def code {:HH '(1) :HT '(0 1) :TH '(0 0 1) :TT '(0 0 0)}) (defn encoder [code stream] (mapcat code stream)) (take 20 (encoder code stream)) ;(1 1 1 1 1 1 1 1 0 1 1 1 0 0 1 1 1 1 0 0) ;; Trying the two together: (take 20 (decoder code-tree (encoder code stream))) ;(:HH :HH :HH :HH :HH :HH :HH :HH :HT :HH :HH :TH :HH :HH :HH :TT :HH :HH :HT :HT) ;; And finally: (defn make-encoder [code] (fn [s] (encoder code s))) (defn make-decoder [code-tree] (fn[s] (decoder code-tree s))) (cost (make-encoder code) (make-decoder code-tree) (take 10000 stream)) ; £16992 ;; It costs us £16992 to send 10000 symbols. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; We can use our code to send the output from the original biased coin very easily (def unfair-coin {:H 3 :T 1}) (def unfair-stream (random-stream unfair-coin)) (take 20 unfair-stream) ; (:H :H :H :H :H :H :T :H :H :H :H :H :H :H :T :H :H :H :H :T) (defn combine-keywords [& a] (keyword (apply str (mapcat #(drop 1 (str %)) a)))) (defn split-keyword [a] (map #(keyword (str %)) (drop 1 (str a)))) (defn make-combination-encoder [code n] (fn [s] (encoder code (map #(apply combine-keywords %) (partition n s))))) (defn make-combination-decoder [code-tree] (fn [s] (mapcat split-keyword (decoder code-tree s)))) (cost (make-combination-encoder code 2) (make-combination-decoder code-tree) (take 10000 unfair-stream)) ; £8460 ;; So our method of coding for {:HH 9, :HT 3, :TH 3, :TT 1} has given us a method of coding for {:H 3, :T 1} ;; which is 16% more efficient than the obvious one. ;; What if we try it on the output from the fair coin? (def fair-stream (random-stream {:H 1 :T 1})) (cost (make-combination-encoder code 2) (make-combination-decoder code-tree) (take 10000 fair-stream)) ; £ 11257 ;; Using this code on the output from an unbiased coin actually makes it more expensive to transmit! ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; To recap: ;; We can transmit the output from a series of coin tosses, or other random ;; processes, down a binary channel, if we choose a code. ;; The code can be trivial, like :H -> 0 :T -> 1, ;; or it can be complex, like :HH -> 1, :HT -> 01, :TH ->001, :TT -> 000 ;; Different codes can result in different costs of transmission for the outputs ;; of different processes ;; So far, we've seen costs of £1/symbol for the fair coin with the trivial code ;; or £1.12/symbol with the more complex code ;; And we've seen costs of £1/symbol and £0.84/symbol for the unfair coin with ;; the trivial and complex code respectively. ;; It seems that choosing the right code can make transmission cheaper, and ;; choosing the wrong code can make it more expensive.
Blog Archive
-
▼
2011
(26)
-
▼
January
(10)
- Turning Exceptions into Return Values
- Finding Something in a Vector, Parsing CSV Files
- £500 if you can find me a job (version 1.0)
- K-means : An Algorithm for Clustering Data
- Cleaning Old Definitions from the REPL : shred-user
- take-while-unstable
- A Very Gentle Introduction to Information Theory: ...
- A Very Gentle Introduction to Information Theory: ...
- A Very Gentle Introduction to Information Theory :...
- A Very Gentle Introduction to Information Theory :...
-
▼
January
(10)
Search This Blog
Friday, January 14, 2011
A Very Gentle Introduction to Information Theory : Generalizing the Encoder and Decoder
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment