Answer To: Assignment 4• In motif finding, a weight matrix (also referred to as Position Weight Matrix or...
Kshitij answered on Apr 05 2021
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from bz2 import BZ2File\n",
"from math import log\n",
"from itertools import islice"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def read_count_matrix(file):\n",
" dm = {}\n",
" with open(file) as f:\n",
" for line in f:\n",
" line = line.strip().split()\n",
" dm[line[0]] = [int(x) for x in line[2:]]\n",
" return (dm)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def generate_freq_weight_matrices(countm):\n",
" freqm = {}\n",
" weightm = {}\n",
" for k,v in countm.items():\n",
" freqm[k] = [(float(x)+1.0)/31.0 for x in v]\n",
" weightm[k] = [log(x/0.25) for x in freqm.get(k)]\n",
" return (freqm, weightm)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def sliding_window(seq, n=18):\n",
" it = iter(seq)\n",
" result = tuple(islice(it,n))\n",
" if len(result) == n:\n",
" yield result\n",
" for elem in it:\n",
" result = result[1:] + (elem,)\n",
" yield result"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def get_max_score(seq,weightm):\n",
" maxscore = None\n",
" for s in sliding_window(seq):\n",
" ...