Skip to content

Instantly share code, notes, and snippets.

@0xzkzhao
Created May 3, 2015 04:22
Show Gist options
  • Select an option

  • Save 0xzkzhao/a330a5eaa7e9621e56fb to your computer and use it in GitHub Desktop.

Select an option

Save 0xzkzhao/a330a5eaa7e9621e56fb to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
{
"metadata": {
"name": "",
"signature": "sha256:9685c5d0f663be78b3bccac808e2fbc1c80e6546f15a0451c07ba555efc96e8b"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import sklearn\n",
"import sklearn.tree\n",
"import sklearn.datasets\n",
"import sklearn.preprocessing\n",
"import sklearn.decomposition\n",
"import sklearn.ensemble\n",
"\n",
"import urllib2\n",
"import bs4\n",
"import itertools\n",
"\n",
"import pandas as pd\n",
"import pandas.tools.plotting \n",
"\n",
"import numpy as np\n",
"\n",
"import matplotlib\n",
"from matplotlib.colors import ListedColormap\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import seaborn as sns\n",
"sns.set_style(\"white\")\n",
"\n",
"cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF', '#000000'])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 142
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data'\n",
"url = 'abalone.csv'\n",
"columns = ['sex','length','diameter','height','wholeWeight','shuckedWeight','visceraWeight','shellWeight','rings']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 95
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data_orig = pd.io.parsers.read_csv(url, header=None, names=columns)\n",
"data_orig.head(3)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sex</th>\n",
" <th>length</th>\n",
" <th>diameter</th>\n",
" <th>height</th>\n",
" <th>wholeWeight</th>\n",
" <th>shuckedWeight</th>\n",
" <th>visceraWeight</th>\n",
" <th>shellWeight</th>\n",
" <th>rings</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> M</td>\n",
" <td> 0.455</td>\n",
" <td> 0.365</td>\n",
" <td> 0.095</td>\n",
" <td> 0.5140</td>\n",
" <td> 0.2245</td>\n",
" <td> 0.1010</td>\n",
" <td> 0.15</td>\n",
" <td> 15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> M</td>\n",
" <td> 0.350</td>\n",
" <td> 0.265</td>\n",
" <td> 0.090</td>\n",
" <td> 0.2255</td>\n",
" <td> 0.0995</td>\n",
" <td> 0.0485</td>\n",
" <td> 0.07</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> F</td>\n",
" <td> 0.530</td>\n",
" <td> 0.420</td>\n",
" <td> 0.135</td>\n",
" <td> 0.6770</td>\n",
" <td> 0.2565</td>\n",
" <td> 0.1415</td>\n",
" <td> 0.21</td>\n",
" <td> 9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 165,
"text": [
" sex length diameter height wholeWeight shuckedWeight visceraWeight \\\n",
"0 M 0.455 0.365 0.095 0.5140 0.2245 0.1010 \n",
"1 M 0.350 0.265 0.090 0.2255 0.0995 0.0485 \n",
"2 F 0.530 0.420 0.135 0.6770 0.2565 0.1415 \n",
"\n",
" shellWeight rings \n",
"0 0.15 15 \n",
"1 0.07 7 \n",
"2 0.21 9 "
]
}
],
"prompt_number": 165
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"## need to convert the label\n",
"#0:infant, 1:female, 2:male\n",
"def parse_label(label):\n",
" options = {'I': 0, 'F': 1, 'M': 2}\n",
" return options[label]\n",
"\n",
"data['sex'] = data_orig['sex'].map(parse_label)\n",
"data.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sex</th>\n",
" <th>length</th>\n",
" <th>diameter</th>\n",
" <th>height</th>\n",
" <th>wholeWeight</th>\n",
" <th>shuckedWeight</th>\n",
" <th>visceraWeight</th>\n",
" <th>shellWeight</th>\n",
" <th>rings</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 2</td>\n",
" <td> 0.455</td>\n",
" <td> 0.365</td>\n",
" <td> 0.095</td>\n",
" <td> 0.5140</td>\n",
" <td> 0.2245</td>\n",
" <td> 0.1010</td>\n",
" <td> 0.150</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 2</td>\n",
" <td> 0.350</td>\n",
" <td> 0.265</td>\n",
" <td> 0.090</td>\n",
" <td> 0.2255</td>\n",
" <td> 0.0995</td>\n",
" <td> 0.0485</td>\n",
" <td> 0.070</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 1</td>\n",
" <td> 0.530</td>\n",
" <td> 0.420</td>\n",
" <td> 0.135</td>\n",
" <td> 0.6770</td>\n",
" <td> 0.2565</td>\n",
" <td> 0.1415</td>\n",
" <td> 0.210</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 2</td>\n",
" <td> 0.440</td>\n",
" <td> 0.365</td>\n",
" <td> 0.125</td>\n",
" <td> 0.5160</td>\n",
" <td> 0.2155</td>\n",
" <td> 0.1140</td>\n",
" <td> 0.155</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 0</td>\n",
" <td> 0.330</td>\n",
" <td> 0.255</td>\n",
" <td> 0.080</td>\n",
" <td> 0.2050</td>\n",
" <td> 0.0895</td>\n",
" <td> 0.0395</td>\n",
" <td> 0.055</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 166,
"text": [
" sex length diameter height wholeWeight shuckedWeight visceraWeight \\\n",
"0 2 0.455 0.365 0.095 0.5140 0.2245 0.1010 \n",
"1 2 0.350 0.265 0.090 0.2255 0.0995 0.0485 \n",
"2 1 0.530 0.420 0.135 0.6770 0.2565 0.1415 \n",
"3 2 0.440 0.365 0.125 0.5160 0.2155 0.1140 \n",
"4 0 0.330 0.255 0.080 0.2050 0.0895 0.0395 \n",
"\n",
" shellWeight rings \n",
"0 0.150 3 \n",
"1 0.070 0 \n",
"2 0.210 1 \n",
"3 0.155 2 \n",
"4 0.055 0 "
]
}
],
"prompt_number": 166
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sns.pairplot(data, hue='sex')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 98,
"text": [
"<seaborn.axisgrid.PairGrid at 0x40a13898>"
]
},
{
"metadata": {},
"output_type": "display_data",
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment