|
| 1 | +''' |
| 2 | +DNA Alignment |
| 3 | +Needleman-Wunsch Exercise |
| 4 | +
|
| 5 | +1. Two random sequences must be provided to the students. |
| 6 | +2. A standard scoring system needs to be shown to the students. Example: +2 for a match, -1 for a mismatch, -2 for a gap. |
| 7 | +''' |
| 8 | + |
| 9 | +def sequenceAlign(seqA, seqB): |
| 10 | + |
| 11 | + DNA = {'G': { 'G':2, 'C':-1, 'A':-1, 'T':-1 }, |
| 12 | + 'C': { 'G':-1, 'C':2, 'A':-1, 'T':-1 }, |
| 13 | + 'A': { 'G':-1, 'C':-1, 'A':2, 'T':-1 }, |
| 14 | + 'T': { 'G':-1, 'C':-1, 'A':-1, 'T':2 }} |
| 15 | + |
| 16 | + numI = len(seqA) + 1 |
| 17 | + numJ = len(seqB) + 1 |
| 18 | + scoreMatrix = [[0] * numJ for x in range(numI)] # Initiate scoreMatrix and routeMatrix, all 0 |
| 19 | + routeMatrix = [[0] * numJ for x in range(numI)] |
| 20 | + |
| 21 | + for i in range(1, numI): |
| 22 | + for j in range(1, numJ): |
| 23 | + gapPenalty = -2 # -2 for gap |
| 24 | + similarity = DNA[seqA[i - 1]][seqB[j - 1]] # +2 for match, -1 for dismatch |
| 25 | + paths = [scoreMatrix[i - 1][j - 1] + similarity, |
| 26 | + scoreMatrix[i - 1][j] + gapPenalty, |
| 27 | + scoreMatrix[i][j - 1] + gapPenalty] |
| 28 | + best = max(paths) |
| 29 | + route = paths.index(best) |
| 30 | + scoreMatrix[i][j] = best |
| 31 | + routeMatrix[i][j] = route |
| 32 | + |
| 33 | + alignA = [] |
| 34 | + alignB = [] |
| 35 | + i = len(seqA) |
| 36 | + j = len(seqB) |
| 37 | + score = scoreMatrix[i][j] # store the score in the scoreMatrix[i][j] |
| 38 | + |
| 39 | + while i > 0 or j > 0: # Traceback |
| 40 | + route = routeMatrix[i][j] |
| 41 | + if route == 0: # that score comes from diagonal |
| 42 | + alignA.append(seqA[i - 1]) |
| 43 | + alignB.append(seqB[j - 1]) |
| 44 | + i -= 1 |
| 45 | + j -= 1 |
| 46 | + if route == 1: # that score comes from vertical, gap in seqB |
| 47 | + alignA.append(seqA[i - 1]) |
| 48 | + alignB.append('-') |
| 49 | + i -= 1 |
| 50 | + if route == 2: # that score comes from horizon, gap in seqA |
| 51 | + alignA.append('-') |
| 52 | + alignB.append(seqB[j - 1]) |
| 53 | + j -= 1 |
| 54 | + alignA.reverse() |
| 55 | + alignB.reverse() |
| 56 | + alignA = ''.join(alignA) |
| 57 | + alignB = ''.join(alignB) |
| 58 | + |
| 59 | + return score, alignA, alignB |
| 60 | + |
| 61 | +score, alignA, alignB = sequenceAlign('GGCTCAATCA', 'ACCTAAGG') |
| 62 | + |
| 63 | +print(score) |
| 64 | +print(alignA) |
| 65 | +print(alignB) |
| 66 | + |
| 67 | + |
0 commit comments