As discussed in the chat all necessary data is provided. You need to use the skeleton jupyter...

Question

As discussed in the chat all necessary data is provided. You need to use the skeleton jupyter notebook and fill in the "gaps" all the rest is in the description of the assignment. They are very picky with plagiarism and libraries. no one is allowed.

Ximi · Accepted Answer

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Assignment 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys
",
    "import nltk
",
    "import math
",
    "import itertools
",
    "import scipy.stats
",
    "import numpy as np
",
    "import pandas as pd
",
    "from sklearn.metrics import confusion_matrix
",
    "import matplotlib.pyplot as plt
",
    "import matplotlib as mpl 
",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Daten einlesen"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "people_en = pd.read_csv('people_wiki_EN.csv', sep=',')
",
    "people_de = pd.read_csv('10k-people-raw.csv', sep=';')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Hilfsfunktionen"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "def box_plot(dist, title = None):
",
    "    chars = [chr(c) for c in range(ord('a'), ord('z') + 1)]
",
    "    
",
    "    fig1, ax1 = plt.subplots(figsize = (18, 8))
",
    "    ax1.set_xlabel('Character')
",
    "    ax1.set_ylabel('Frequency')
",
    "    ax1.boxplot(dist, labels = chars)
",
    "    #ax1.set_ylim([0, 0.3])
",
    "    if title is not None:
",
    "        plt.title(title)
",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "def frequency_plot(mean_en, mean_de):
",
    "    chars = [chr(c) for c in range(ord('a'), ord('z')+1)]
",
    "    ind = np.arange(len(chars))
",
    "    width = 1
",
    "    frequency = np.log((np.array(mean_en) / np.array(mean_de)))
",
    "    color = ['g' if f > 0 else 'b' for f in frequency]
",
    "    
",
    "    fig, ax = plt.subplots(figsize = (18, 8))
",
    "    rects1 = ax.bar(ind, frequency, width, color = color, edgecolor = 'black')
",
    "    ax.set_xticklabels(chars)
",
    "    ax.set_xticks(ind + (width * 0.5) - 0.5)
",
    "    ax.set_xlabel('Character')
",
    "    ax.set_ylim([-2, 2])
",
    "    ax.set_ylabel('F')
",
    "    ax.set_xlim([-0.5, len(chars) - 0.5])
",
    "    ax.text(1, 1.5, 'More frequent in English ', bbox={'facecolor':'green', 'alpha':0.5, 'pad':10})
",
    "    ax.text(1, 1.2, 'More frequent in German', bbox={'facecolor':'blue', 'alpha':0.5, 'pad':10})
",
    "    ax.text(5.5, 1.35, r'$F = \ln(\frac{mean_e}{mean_d})$', fontsize=25)
",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_confusion_matrix(cm, classes, title = 'Confusion matrix', cmap = plt.cm.Greens):
",
    "    plt.title(title)
",
    "    tick_marks = np.arange(len(classes))
",
    "    plt.xticks(tick_marks, classes, rotation = 45)
",
    "    plt.yticks(tick_marks, classes)
",
    "
",
    "    cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
",
    "    cm_norm[np.isnan(cm_norm)] = 0
",
    "    cm_norm = np.around(cm_norm, decimals = 3)
",
    "        
",
    "    plt.imshow(cm_norm, interpolation='nearest', cmap = cmap)
",
    "    
",
    "    thresh = cm_norm.max() / 2.
",
    "    for i, j in itertools.product(range(cm_norm.shape[0]), range(cm_norm.shape[1])):
",
    "        plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm_norm[i, j] > thresh else "black")
",
    "
",
    "    plt.tight_layout()
",
    "    plt.ylabel('True label')
",
    "    plt.xlabel('Predicted label')
",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Häufigkeitsverteilungen"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "def character_distribution(people):
",
    "    # Implementieren Sie eine Funktion die die Buchstabenhäufigkeiten berechnet.
",
    "    # Die Funktion soll folgendes zurückgeben:
",
    "    #  mean: Durchschnittlicher relative Häufigkeit pro Buchstabe 
",
    "    #  std: Relative Standardabweichung pro Buchstabe
",
    "    #  dist: Relative Häufigkeit pro Text und pro Buchstabe 
",
    "    mean = np.zeros(shape = 26)
",
    "    std = np.zeros(shape = 26)
",
    "    dist = np.zeros(shape = (len(people), 26))
",
    "    
",
    "    def get_chars():
",
    "        return {chr(c):0 for c in range(ord('a'), ord('z')+1)}
",
    "    def get_freq(text):
",
    "        chars = get_chars()
",
    "        for i in text: 
",
    "            if i in chars: 
",
    "                chars[i] += 1
",
    "        return list(chars.values())
",
    "    
",
    "    freq = people.apply(get_freq)
",
    "    mean = np.mean(list(freq), axis=0)
",
    "    std = np.std(list(freq), axis=0)
",
    "    dist = freq.apply(lambda x: mean/x)
",
    "    dist = np.array(list(dist))
",
    "    
",
    "    return mean, std, dist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png":

As discussed in the chat all necessary data is provided. You need to use the skeleton jupyter notebook and fill in the "gaps" all the rest is in the description of the assignment. They are very picky...

Answer To: As discussed in the chat all necessary data is provided. You need to use the skeleton jupyter...

Answer To This Question Is Available To Download

Related Questions & Answers

Submit New Assignment