;; Successively more sophisticated approaches to the Smith-Waterman algorithm ;; for matching sequences of nucleotides (define alphabet '(a c t g)) (load "tables.scm") (define point-mutations (make-table2)) ;; Mutation penalty to 1st element from 2nd. (table2-set! point-mutations 'a 'a 0) (table2-set! point-mutations 'a 'c 0.3) (table2-set! point-mutations 'a 'g 0.4) (table2-set! point-mutations 'a 't 0.3) (table2-set! point-mutations 'c 'a 0.4) (table2-set! point-mutations 'c 'c 0) (table2-set! point-mutations 'c 'g 0.2) (table2-set! point-mutations 'c 't 0.3) (table2-set! point-mutations 'g 'a 0.1) (table2-set! point-mutations 'g 'c 0.3) (table2-set! point-mutations 'g 'g 0) (table2-set! point-mutations 'g 't 0.2) (table2-set! point-mutations 't 'a 0.3) (table2-set! point-mutations 't 'c 0.4) (table2-set! point-mutations 't 'g 0.1) (table2-set! point-mutations 't 't 0) (define (mutation to from) (table2-get point-mutations to from)) (define omit-penalty 0.5) (define insert-penalty 0.7) (define (match0 one two) (define (helper x y score) (cond ((and (null? x) (null? y)) score) ((null? x) (helper x (cdr y) (+ score omit-penalty))) ((null? y) (helper (cdr x) y (+ score insert-penalty))) ((eq? (car x) (car y)) (helper (cdr x) (cdr y) score)) (else (let ((mutated (helper (cdr x) (cdr y) (+ score (mutation (car x) (car y))))) (omitted (helper x (cdr y) (+ score omit-penalty))) (inserted (helper (cdr x) y (+ score insert-penalty)))) (min mutated omitted inserted))))) (helper one two 0.0)) (define (match1 one two) (let ((past (make-table2))) (define (helper x y score) (let ((old (table2-get past x y))) (or old (let ((new (cond ((and (null? x) (null? y)) score) ((null? x) (helper x (cdr y) (+ score omit-penalty))) ((null? y) (helper (cdr x) y (+ score insert-penalty))) ((eq? (car x) (car y)) (helper (cdr x) (cdr y) score)) (else (let ((mutated (helper (cdr x) (cdr y) (+ score (mutation (car x) (car y))))) (omitted (helper x (cdr y) (+ score omit-penalty))) (inserted (helper (cdr x) y (+ score insert-penalty)))) (min mutated omitted inserted)))))) (table2-set! past x y new) new)))) (let ((ans (helper one two 0.0))) (display (table2-size past)) ans))) (define (match1 one two) (let ((past (make-table2))) (define (helper x y score) (let ((old (table2-get past x y))) (if old (+ score old) (let ((new (cond ((and (null? x) (null? y)) score) ((null? x) (helper x (cdr y) (+ score omit-penalty))) ((null? y) (helper (cdr x) y (+ score insert-penalty))) ((eq? (car x) (car y)) (helper (cdr x) (cdr y) score)) (else (let ((mutated (helper (cdr x) (cdr y) (+ score (mutation (car x) (car y))))) (omitted (helper x (cdr y) (+ score omit-penalty))) (inserted (helper (cdr x) y (+ score insert-penalty)))) (min mutated omitted inserted)))))) (table2-set! past x y (- new score)) new)))) (let ((ans (helper one two 0.0))) (display (table2-size past)) ans))) (define infty 10000.0) (define (match2 one two limit) (let ((past (make-table2))) (define (helper x y score) (let ((old (table2-get past x y))) (or old (let ((new (cond ((> score limit) infty) ((and (null? x) (null? y)) score) ((null? x) (helper x (cdr y) (+ score omit-penalty))) ((null? y) (helper (cdr x) y (+ score insert-penalty))) ((eq? (car x) (car y)) (helper (cdr x) (cdr y) score)) (else (let ((mutated (helper (cdr x) (cdr y) (+ score (mutation (car x) (car y))))) (omitted (helper x (cdr y) (+ score omit-penalty))) (inserted (helper (cdr x) y (+ score insert-penalty)))) (min mutated omitted inserted)))))) (table2-set! past x y new) new)))) (let ((ans (helper one two 0.0))) (display (table2-size past)) ans))) ;;; Example sequences (define s1 '(a a t c t g c c t a t t g t c g a c g c)) (define s2 '(a a t c a g c a g c t c a t c g a c g g)) (define s3 '(a g a t c a g c a c t c a t c g a c g g)) (define s11 (append s1 s1)) (define s1111 (append s11 s11)) (define s22 (append s2 s2)) (define s2222 (append s22 s22)) (define s33 (append s3 s3)) (define s3333 (append s33 s33)) ;; (match1 s1 s2) ;; (match1 s2 s3) ;; (match1 s1 s3) ;; (match2 s1 s2 3) ;; (match2 s1 s2 2.5) ;; (match2 s1 s2 2) ;; (match2 s1 s2 1.5) ;; (match1 s2 s3) ;; (match1 s1 s3) ;(for-each (lambda (cutoff) ; (newline) ; (display (list 'cutoff cutoff)) ; (newline) ; (display (list "s1,s2" (match2 s1 s2 cutoff))) ; (newline) ; (display (list "s11,s22" (match2 s11 s22 cutoff))) ; (newline) ; (display (list "s1111,s2222" (match2 s1111 s2222 cutoff)))) ; '(10000 15 10 5 3 2.5 2 1.5)) ;;; Search-based matcher (define (make-search-state score x y history) (list score x y history)) (define ss-score car) (define ss-x cadr) (define ss-y caddr) (define ss-history cadddr) (define (successors ss) (let ((x (ss-x ss)) (y (ss-y ss)) (s (ss-score ss)) (h (ss-history ss))) (cond ((and (null? x) (null? y)) '()) ((null? x) (list (make-search-state (+ s omit-penalty) x (cdr y) (cons (list 'o (car y)) h)))) ((null? y) (list (make-search-state (+ s insert-penalty) (cdr x) y (cons (list 'i (car x)) h)))) ((eq? (car x) (car y)) (list (make-search-state s (cdr x) (cdr y) (cons (list '= (car x) (car y)) h)))) (else (list (make-search-state (+ s (mutation (car x) (car y))) (cdr x) (cdr y) (cons (list 'm (car x) (car y)) h)) (make-search-state (+ s omit-penalty) x (cdr y) (cons (list 'o (car y)) h)) (make-search-state (+ s insert-penalty) (cdr x) y (cons (list 'i (car x)) h))))))) (define mq 0) (define (search start-state done? succ-fn merge-fn) (define (inner queue) (set! mq (max mq (length queue))) (if (null? queue) #f (let ((current (car queue))) (if (done? current) current (inner (merge-fn (succ-fn current) (cdr queue))))))) (set! mq 0) (inner (list start-state))) (define (completed? ss) (and (null? (ss-x ss)) (null? (ss-y ss)))) (define (match-dfs x y) (search (make-search-state 0.0 x y '()) completed? successors append)) (define (match-bfs x y) (search (make-search-state 0.0 x y '()) completed? successors (lambda (new old) (append old new)))) (load "sort.scm") (define (better? ss1 ss2) (< (ss-score ss1) (ss-score ss2))) (define (same-state? ss1 ss2) (and (eq? (ss-x ss1) (ss-x ss2)) (eq? (ss-y ss1) (ss-y ss2)))) (define (priority-merge new old) (let ((new-sorted (sort new better?))) (define (merge x y ans) (cond ((and (null? x) (null? y)) ans) ((null? x) (append (reverse! y) ans)) ((null? y) (append (reverse! x) ans)) ((better? (car x) (car y)) (merge (cdr x) y (cons (car x) ans))) (else (merge x (cdr y) (cons (car y) ans))))) (reverse! (merge new-sorted old '())))) (define (match-best-fs-1st-try x y) (search (make-search-state 0.0 x y '()) completed? successors priority-merge)) (define (filter proc l) (cond ((null? l) '()) ((proc (car l)) (cons (car l) (filter proc (cdr l)))) (else (filter proc (cdr l))))) (define (match-best-fs x y) (let ((past (make-table2)) (start (make-search-state 0.0 x y '()))) (define (my-merge new old) (let* ((new-useful (filter (lambda (n) (let ((earlier (table2-get past (ss-x n) (ss-y n)))) (cond ((or (not earlier) (better? n earlier)) (table2-set! past (ss-x n) (ss-y n) n) #t) (else #f)))) new)) (old-useful (filter (lambda (o) (let ((reached (table2-get past (ss-x o) (ss-y o)))) (not (better? reached o)))) old))) (priority-merge new-useful old-useful))) (let ((res (search start completed? successors my-merge))) (list mq (table2-size past) res)))) (define (match-beam x y width) (let ((past (make-table2)) (start (make-search-state 0.0 x y '()))) (define (my-merge new old) (let* ((new-useful (filter (lambda (n) (let ((earlier (table2-get past (ss-x n) (ss-y n)))) (cond ((or (not earlier) (better? n earlier)) (table2-set! past (ss-x n) (ss-y n) n) #t) (else #f)))) new)) (old-useful (filter (lambda (o) (let ((reached (table2-get past (ss-x o) (ss-y o)))) (not (better? reached o)))) old))) (list-head! (priority-merge new-useful old-useful) width))) (let ((res (search start completed? successors my-merge))) (list mq (table2-size past) res)))) (define (test) (let ((cases (list ; (list s11 s33) (list s1111 s3333) ; (list s1 s2) ; (list s2 s3) ; (list s1 s3) ; (list s11 s22) ; (list s1111 s2222) ))) (for-each (lambda (case) (newline) (newline) (display (car case)) (newline) (display (cadr case)) (let* ((best (match-best-fs (car case) (cadr case))) (pen (caaddr best))) (define (iter n) (if (< n 1) (begin (newline) (display 'ok-to-0)) (let ((beam (match-beam (car case) (cadr case) n))) (cond ((< pen (caaddr beam)) (newline) (display (list 'failed n (car beam) (cadr beam) (caaddr beam)))) (else (newline) (display (list 'succeeded n (car beam) (cadr beam) (caaddr beam))) (iter (- n 1))))))) (newline) (display (list (car best) (cadr best) pen)) (iter 150))) cases)))