{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/admin/Documents/work/onderwijs/teaching/DataScience/NoteBooks/Rutte2\n", "--2017-01-17 15:10:48-- https://watstemthetparlement.nl/download/rutte2data.zip\n", "Resolving watstemthetparlement.nl... 139.162.184.92\n", "Connecting to watstemthetparlement.nl|139.162.184.92|:443... connected.\n", "WARNING: cannot verify watstemthetparlement.nl's certificate, issued by `/C=GB/ST=Greater Manchester/L=Salford/O=COMODO CA Limited/CN=COMODO RSA Domain Validation Secure Server CA':\n", " Unable to locally verify the issuer's authority.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 13884722 (13M) [application/zip]\n", "Saving to: `rutte2data.zip'\n", "\n", "100%[======================================>] 13,884,722 16.0M/s in 0.8s \n", "\n", "2017-01-17 15:10:49 (16.0 MB/s) - `rutte2data.zip' saved [13884722/13884722]\n", "\n" ] } ], "source": [ "!mkdir Rutte2\n", "%cd Rutte2\n", "!wget --no-check-certificate https://watstemthetparlement.nl/download/rutte2data.zip" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Archive: rutte2data.zip\n", " inflating: rutte2data.csv \n" ] } ], "source": [ "!unzip rutte2data.zip" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-rw-r--r-- 1 admin staff 57M Jan 15 22:42 rutte2data.csv\n", " 3227638 rutte2data.csv\n" ] } ], "source": [ "!ls -lh rutte2data.csv\n", "!wc -l rutte2data.csv" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id\tfile\tdocument\ttitle\tdate\tproposaltype\tvotetype\tresult\tvoteURL\tproposalURL\tsource\t(Unknown)\t50PLUS\tAOV\tBontes\tBontes/Van Klaveren\tBrinkman\tCD\tCDA\tChristenUnie\tD66\tDe Jong\tEerdmans/Van Schijndel\tGPV\tGroenLinks\tHendriks\tHouwers\tKlein\tKortenoeven/Hernandez\tKuzu/Öztürk\tLazrak\tLN\tLPF\tNawijn\tNijpels\tPvdA\tPvdD\tPVV\tRPF\tSGP\tSP\tUnie 55+\tVan Klaveren\tVan Oudenallen\tVan Vliet\tVerdonk\tVerkerk\tVVD\tWijnschenk\tWilders\treign\tauthor\tauthorparty\tsupporter\tsupporterparties\tcategory\tsubcategory\tcategories\tpro\tcontra\ttext\r", "\r\n", "20121114-h-tk-20122013-22-10-100267125\t33410\t47\tKabinetsformatie 2012 ; Motie; Motie van het lid Slob c.s. over een goedkeuringswet\t2012-11-14\tMotion\tNormal\tadopted\thttps://zoek.officielebekendmakingen.nl/h-tk-20122013-22-10.html\thttps://zoek.officielebekendmakingen.nl/kst-33410-47.html\tOB\t9\t1\t9\t9\t9\t9\t9\t1\t1\t1\t9\t9\t9\t1\t9\t9\t9\t9\t9\t9\t9\t9\t9\t9\t1\t1\t1\t9\t1\t1\t9\t9\t9\t9\t9\t9\t1\t9\t9\tKabinet-Rutte II\t[Arie Slob]\t[ChristenUnie]\t[Sybrand van Haersma Buma, Alexander Pechtold, Kees van der Staaij]\t[CDA, D66, SGP]\t[Bestuur]\t[Organisatie en beleid]\tBestuur, Organisatie en beleid\t[50PLUS, CDA, ChristenUnie, D66, GroenLinks, PvdA, PvdD, PVV, SGP, SP, VVD]\t[]\t\"\r", "\r\n", "\r", "\r\n", "De Kamer,\r", "\r\n", "gehoord de beraadslaging,\r", "\r\n", "constaterende dat in het regeerakkoord is opgenomen dat Nederland het verdrag van de Verenigde Naties voor de rechten van gehandicapten zal gaan ratificeren;\r", "\r\n", "verzoekt de regering, hiertoe uiterlijk in 2013 een goedkeuringswet aan de Raad van State ter advisering voor te leggen,\r", "\r\n", "en gaat over tot de orde van de dag.\r", "\r\n", "\r", "\r\n", "Slob\r", "\r\n" ] } ], "source": [ "!head rutte2data.csv" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [], "source": [ "r2=pd.read_csv('rutte2data.csv', sep='\\t', encoding='utf-8', index_col='id')" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(11307, 60)" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "r2.shape" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-rw-r--r-- 1 admin staff 14M Jan 17 15:18 MotiesRutte2.xls\r\n" ] } ], "source": [ "r2.text= r2.text.apply(lambda x: x[:5000]) # truncate text for Excel\n", "r2=r2.replace(9,np.nan) # replace strange 9 values with NaN, because that is what they mean\n", "r2.to_excel('StemmingenRutte2.xls')\n", "!ls -lh MotiesRutte2.xls" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " adding: StemmingenRutte2.xls (deflated 78%)\n" ] } ], "source": [ "!zip StemmingenRutte2.xls.zip StemmingenRutte2.xls" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2017-01-17 15:13:13-- https://github.com/Questgame/incest_moties/blob/master/Moties%20Rutte%202.csv\n", "Resolving github.com... 192.30.253.112, 192.30.253.113\n", "Connecting to github.com|192.30.253.112|:443... connected.\n", "WARNING: cannot verify github.com's certificate, issued by `/C=US/O=DigiCert Inc/OU=www.digicert.com/CN=DigiCert SHA2 Extended Validation Server CA':\n", " Unable to locally verify the issuer's authority.\n", "HTTP request sent, awaiting response... 404 Not Found\n", "2017-01-17 15:13:13 ERROR 404: Not Found.\n", "\n" ] } ], "source": [ "# data van Max\n", "\n", "!wget --no-check-certificate https://github.com/Questgame/incest_moties/blob/master/Moties%20Rutte%202.csv\n", "r2= pd.read_csv('/Users/admin/Downloads/incest_moties/Moties Rutte 2.csv', encoding='utf-8') " ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 0 }