{ "cells": [ { "cell_type": "markdown", "id": "0bb1b424-2357-4964-906c-b60789e73afa", "metadata": {}, "source": [ "#### Riding Mowers Dataset" ] }, { "cell_type": "code", "execution_count": 5, "id": "2b77d6af-b6de-415f-aab3-0b200cf8ed1e", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn import preprocessing\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier\n", "import matplotlib.pylab as plt" ] }, { "cell_type": "code", "execution_count": 7, "id": "15b01946-757c-46aa-b442-15386bc20a53", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(r'/Users/patriciaxufre/Documents/SBE - Disciplinas/2957 | ABA/2024-25/Datasets Examples/RidingMowers.csv')" ] }, { "cell_type": "code", "execution_count": 9, "id": "87b18232-4eaa-4da6-ac81-17e987073c97", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IncomeLot_SizeOwnershipNumber
060.018.4Owner1
185.516.8Owner2
264.821.6Owner3
361.520.8Owner4
487.023.6Owner5
\n", "
" ], "text/plain": [ " Income Lot_Size Ownership Number\n", "0 60.0 18.4 Owner 1\n", "1 85.5 16.8 Owner 2\n", "2 64.8 21.6 Owner 3\n", "3 61.5 20.8 Owner 4\n", "4 87.0 23.6 Owner 5" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Number'] = df.index + 1\n", "train_df, test_df = train_test_split(df, test_size = 0.4, random_state = 123)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 11, "id": "321cb85e-1f74-4db4-a317-39776fd83f39", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 21, "id": "fb511c60-138a-47c8-8469-88848fc952a8", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "def plotdataset(ax, data, showLabel = True, **kwargs):\n", " subset = data.loc[data['Ownership'] == 'Owner']\n", " ax.scatter(subset.Income, subset.Lot_Size, marker = 'o',\n", " label = 'Owner' if showLabel else None, color = 'C1', **kwargs)\n", " subset = data.loc[data['Ownership'] == 'Nonowner']\n", " ax.scatter(subset.Income, subset.Lot_Size, marker = 'D',\n", " label = 'Nonowner' if showLabel else None, color = 'C0', **kwargs)\n", " for _,row in data.iterrows():\n", " ax.annotate(row.Number, (row.Income + 2, row.Lot_Size))\n", "\n", "fig, ax = plt.subplots()\n", "plotdataset(ax, train_df)\n", "plotdataset(ax,test_df, showLabel = False, facecolors = 'none')\n", "ax.scatter(new.Income,new.Lot_Size, marker = '*', label = 'New Household', color = 'black', s = 150)\n", "\n", "plt.xlabel('Income')\n", "plt.ylabel('Lot Size')\n", "ax.set_xlim(40,115)\n", "handles, labels = ax.get_legend_handles_labels()\n", "ax.legend(handles, labels, loc = 4)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 29, "id": "86ae9acf-d3d1-403c-bbdd-00bedaf7d954", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
zIncomezLot_SizeOwnershipNumber
0-0.5350910.012395Owner1
10.956146-0.681719Owner2
2-0.2543871.400623Owner3
3-0.4473711.053566Owner4
41.0438652.268266Owner5
\n", "
" ], "text/plain": [ " zIncome zLot_Size Ownership Number\n", "0 -0.535091 0.012395 Owner 1\n", "1 0.956146 -0.681719 Owner 2\n", "2 -0.254387 1.400623 Owner 3\n", "3 -0.447371 1.053566 Owner 4\n", "4 1.043865 2.268266 Owner 5" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": 33, "id": "1a79df5a-eff4-4955-b412-c89ff2f254d7", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 43, "id": "f0b91d88-330b-4c3d-b419-5103ea6fa797", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
zIncomezLot_SizeOwnershipNumber
13-0.9561461.053566Nonowner14
0-0.5350910.012395Owner1
2-0.2543871.400623Owner3
\n", "
" ], "text/plain": [ " zIncome zLot_Size Ownership Number\n", "13 -0.956146 1.053566 Nonowner 14\n", "0 -0.535091 0.012395 Owner 1\n", "2 -0.254387 1.400623 Owner 3" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": 53, "id": "aa8a3b35-f532-4eb6-a081-1396322cea0b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " k accuracy\n", "0 1 0.8\n", "1 2 0.7\n", "2 3 0.8\n", "3 4 0.7\n", "4 5 0.7\n", "5 6 0.4\n", "6 7 0.5\n", "7 8 0.4\n", "8 9 0.7\n", "9 10 0.5\n", "10 11 0.7\n", "11 12 0.4\n", "12 13 0.4\n", "13 14 0.4\n" ] } ], "source": [ "train_X = trainNorm[['zIncome', 'zLot_Size']]\n", "train_y = trainNorm['Ownership']\n", "test_X = testNorm[['zIncome', 'zLot_Size']]\n", "test_y = testNorm['Ownership']\n", "results = []\n" ] }, { "cell_type": "code", "execution_count": 63, "id": "f5ac0059-029c-458d-a9c1-74003e4e21ac", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Owner']\n", "Distances [[0.35797119 0.52631868 0.54565179]]\n", "Indices [[ 3 8 13]]\n", " zIncome zLot_Size Ownership Number\n", "3 -0.447371 1.053566 Owner 4\n", "8 -0.008772 0.706509 Owner 9\n", "13 -0.956146 1.053566 Nonowner 14\n" ] } ], "source": [ "X = df_norm[['zIncome','zLot_Size']]\n", "y = df_norm['Ownership']\n", "knn = KNeighborsClassifier(n_neighbors=3).fit(X,y)\n", "distances, indices = knn.kneighbors(newNorm)\n", "print(knn.predict(newNorm))\n", "print('Distances',distances)\n", "print('Indices', indices)\n", "print(df_norm.iloc[indices[0],:])" ] }, { "cell_type": "code", "execution_count": null, "id": "954ed46f-b6ad-4018-84e4-a6a4f469002b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:base] *", "language": "python", "name": "conda-base-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }