Assignment_5.ipynb
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "tEdcbX_zFQ3y"
},
"source": [
"Importing the necessary libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "CjUaJqSfFQ34"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import collections\n",
"import itertools\n",
"from scipy.stats import mannwhitneyu as wilcox\n",
"import networkx as nx"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "94c-lvvYFQ4k"
},
"source": [
"# Part 1"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 202
},
"colab_type": "code",
"id": "sv1tPvW-FQ4l",
"outputId": "1b8d79cc-c94f-4c03-c0fd-e305d8b806a2"
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
"\n",
"\n",
" | \n",
"OFFICIAL_SYMBOL_A | \n",
"OFFICIAL_SYMBOL_B | \n",
"
\n",
"\n",
"\n",
"\n",
"0 | \n",
"1810055G02Rik | \n",
"MPRIP | \n",
"
\n",
"\n",
"1 | \n",
"1810055G02Rik | \n",
"RAI14 | \n",
"
\n",
"\n",
"2 | \n",
"1810055G02Rik | \n",
"TPM1 | \n",
"
\n",
"\n",
"3 | \n",
"2810046L04Rik | \n",
"ARF1 | \n",
"
\n",
"\n",
"4 | \n",
"2810046L04Rik | \n",
"ARF3 | \n",
"
\n",
"\n",
"
\n",
"
"
],
"text/plain": [
" OFFICIAL_SYMBOL_A OFFICIAL_SYMBOL_B\n",
"0 1810055G02Rik MPRIP\n",
"1 1810055G02Rik RAI14\n",
"2 1810055G02Rik TPM1\n",
"3 2810046L04Rik ARF1\n",
"4 2810046L04Rik ARF3"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"ppi = pd.read_csv('human ppi.txt', sep=\"\\s+\")\n",
"display(ppi.head())"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "B5MfBy14FQ47"
},
"source": [
"Adding nodes to the graph"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "NcdtiBPiFQ5B"
},
"outputs": [],
"source": [
"G=nx.Graph()\n",
"nodes = ppi.OFFICIAL_SYMBOL_A.append(ppi.OFFICIAL_SYMBOL_B).unique()\n",
"G.add_nodes_from(nodes)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "WlMoK2M7FQ5J"
},
"source": [
"Adding edges to the graph"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "hTWR9eQNFQ5L"
},
"outputs": [],
"source": [
"G.add_edges_from(ppi.values)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "oDJZm-_hFQ5i"
},
"source": [
"## Average Clustering Coefficient"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "tZBhKlgPFQ5k",
"outputId": "2ae49a57-8947-467a-f021-0106a119c786"
},
"outputs": [
{
"data": {
"text/plain": [
"0.20981150204746712"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(nx.average_clustering(G))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "lcYV5qYhFQ5u"
},
"source": [
"## Scale Free Structure"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "yXM44PMVF-pV"
},
"source": [
"Plotting the degree distribution"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 295
},
"colab_type": "code",
"id": "djDnRUtCFQ5w",
"outputId": "127a312e-b09e-42c3-d38d-892f43fc0b3b"
},
"outputs": [
{
"data": {
"image/png":...