(defn levenshtein-distance-1
"Calculates the edit-distance between two sequences"
[seq1 seq2]
(cond
(empty? seq1) (count seq2)
(empty? seq2) (count seq1)
:else (min
(+ (if (= (first seq1) (first seq2)) 0 1)
(levenshtein-distance-1 (rest seq1) (rest seq2)))
(inc (levenshtein-distance-1 (rest seq1) seq2))
(inc (levenshtein-distance-1 seq1 (rest seq2))))))
(def levenshtein-distance-1 (memoize levenshtein-distance-1))
(levenshtein-distance-1 "avada kedavra" "abracadabra")
(time (levenshtein-distance-1 (repeat 100 \a) (repeat 100 \b)))
(levenshtein-distance-1 (repeat 140 \a) (repeat 140 \b))
(def fact-list (atom {}))
(defn add-fact! [n fn] (swap! fact-list #(assoc % n fn)))
(def to-do-list (atom '()))
(defn add-task! [t] (swap! to-do-list #(cons t %)))
(defn add-tasks! [tlist] (doseq [t (reverse tlist)] (add-task! t)))
(defn pop-task! [] (let [h (first @to-do-list)] (swap! to-do-list rest) h))
(defn run-list! []
(let [a (pop-task!)]
(when (not (nil? a))
(a)
(recur))))
(defn peek-lists [] [fact-list to-do-list])
(defn init! [] (reset! fact-list {}) (reset! to-do-list '()))
(defn calculate-levenshtein-distance[m n]
(init!)
(let [a (levenshtein-distance m n)]
(if (= a :tasks-added-to-do-list)
(do
(run-list!)
(levenshtein-distance m n))
a)))
(defn levenshtein-distance
"Calculates the edit-distance between two sequences"
[seq1 seq2]
(let [return (fn [x] (add-fact! [seq1 seq2] x) x)]
(cond
(empty? seq1) (return (count seq2))
(empty? seq2) (return (count seq1))
:else (let [l1 (@fact-list [(rest seq1) (rest seq2)])
l2 (@fact-list [(rest seq1) seq2])
l3 (@fact-list [seq1 (rest seq2)])]
(if (and l1 l2 l3)
(return (min
(+ (if (= (first seq1) (first seq2)) 0 1)
l1)
(inc l2)
(inc l3)))
(do (add-task! #(levenshtein-distance seq1 seq2))
(when (nil? l1) (add-task! #(levenshtein-distance (rest seq1) (rest seq2))))
(when (nil? l2) (add-task! #(levenshtein-distance (rest seq1) seq2)))
(when (nil? l3) (add-task! #(levenshtein-distance seq1 (rest seq2))))
:tasks-added-to-do-list))))))
(calculate-levenshtein-distance "abracadabra" "avada kedavra")
(calculate-levenshtein-distance (repeat 140 \a) (repeat 140 \b))
(time (calculate-levenshtein-distance (repeat 100 \a) (repeat 100 \b)))
(time (calculate-levenshtein-distance (repeat 100 \a) (repeat 100 \b))) (time (calculate-levenshtein-distance (repeat 200 \a) (repeat 200 \b))) (time (calculate-levenshtein-distance (repeat 300 \a) (repeat 300 \b))) (time (calculate-levenshtein-distance (repeat 400 \a) (repeat 400 \b))) (time (calculate-levenshtein-distance (repeat 500 \a) (repeat 500 \b))) (time (calculate-levenshtein-distance (repeat 600 \a) (repeat 600 \b))) (time (calculate-levenshtein-distance (repeat 700 \a) (repeat 700 \b))) (time (calculate-levenshtein-distance (repeat 800 \a) (repeat 800 \b))) (time (calculate-levenshtein-distance (repeat 900 \a) (repeat 900 \b))) (time (calculate-levenshtein-distance (repeat 1000 \a) (repeat 1000 \b)))
(map /
(map #(/ % 341.) '(1624 3761 6652 10858 17312 25577 38125 53218 72846))
(map #(* % % (Math/log %)) '(2 3 4 5 6 7 8 9 10)))
custom made for twitter, apparently.
ReplyDeletethank you for an informative blog!
This comment has been removed by the author.
ReplyDelete