{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "5d835106-2c35-4478-a134-0b3ad99e5bcd",
   "metadata": {},
   "source": [
    "a.\tLoad the dataset into a Pandas dataframe and display the first five rows and inspect the column names and their data types."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9a1eb9db-7d22-4c9a-bdf0-13c242885d65",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "First five rows:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>chest pain</th>\n",
       "      <th>rest bp</th>\n",
       "      <th>cholesterol</th>\n",
       "      <th>fbs</th>\n",
       "      <th>rest ecg</th>\n",
       "      <th>max heart rate</th>\n",
       "      <th>ex angina</th>\n",
       "      <th>oldpeak</th>\n",
       "      <th>slope ST</th>\n",
       "      <th>vessels</th>\n",
       "      <th>thal</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>52</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>125</td>\n",
       "      <td>212</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>168</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>53</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>140</td>\n",
       "      <td>203</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>155</td>\n",
       "      <td>1</td>\n",
       "      <td>3.1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>70</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>145</td>\n",
       "      <td>174</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>125</td>\n",
       "      <td>1</td>\n",
       "      <td>2.6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>61</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>148</td>\n",
       "      <td>203</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>161</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>62</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>138</td>\n",
       "      <td>294</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>106</td>\n",
       "      <td>0</td>\n",
       "      <td>1.9</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age  sex  chest pain  rest bp  cholesterol  fbs  rest ecg  max heart rate  \\\n",
       "0   52    1           0      125          212    0         1             168   \n",
       "1   53    1           0      140          203    1         0             155   \n",
       "2   70    1           0      145          174    0         1             125   \n",
       "3   61    1           0      148          203    0         1             161   \n",
       "4   62    0           0      138          294    1         1             106   \n",
       "\n",
       "   ex angina  oldpeak  slope ST  vessels  thal  target  \n",
       "0          0      1.0         2        2     3       0  \n",
       "1          1      3.1         0        0     3       0  \n",
       "2          1      2.6         0        0     3       0  \n",
       "3          0      0.0         2        1     3       0  \n",
       "4          0      1.9         1        3     2       0  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Column names and data types:\n",
      "age                 int64\n",
      "sex                 int64\n",
      "chest pain          int64\n",
      "rest bp             int64\n",
      "cholesterol         int64\n",
      "fbs                 int64\n",
      "rest ecg            int64\n",
      "max heart rate      int64\n",
      "ex angina           int64\n",
      "oldpeak           float64\n",
      "slope ST            int64\n",
      "vessels             int64\n",
      "thal                int64\n",
      "target              int64\n",
      "dtype: object\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Load the dataset\n",
    "file_path = \"/Users/patriciaxufre/Documents/SBE - Disciplinas/2957 | ABA/2024-25/Datasets Examples/heart.csv\"\n",
    "df = pd.read_csv(file_path)\n",
    "\n",
    "# Display first five rows\n",
    "print(\"First five rows:\")\n",
    "display(df.head())\n",
    "\n",
    "# Inspect column names and data types\n",
    "print(\"Column names and data types:\")\n",
    "print(df.dtypes)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "475f0086-b2e3-458f-8eb8-9c7d99d3cfaa",
   "metadata": {},
   "source": [
    "b.\tCheck the shape of the dataframe and identify missing values, if any."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "7a9e0821-b85d-4fcb-aea8-31b602ec2033",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Shape of dataframe: (1025, 14)\n",
      "Missing values:\n",
      "age               0\n",
      "sex               0\n",
      "chest pain        0\n",
      "rest bp           0\n",
      "cholesterol       0\n",
      "fbs               0\n",
      "rest ecg          0\n",
      "max heart rate    0\n",
      "ex angina         0\n",
      "oldpeak           0\n",
      "slope ST          0\n",
      "vessels           0\n",
      "thal              0\n",
      "target            0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# Check shape and missing values\n",
    "print(\"Shape of dataframe:\", df.shape)\n",
    "print(\"Missing values:\")\n",
    "print(df.isnull().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "1fa12942-12ce-4f77-925e-e4aef36a9406",
   "metadata": {},
   "outputs": [],
   "source": [
    "# The data set has 1025 observations and 13 input variables and 1 output variable. There are no missing values in the dataset."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a8a7b2dd-2609-45b1-be1a-fb344c1d1085",
   "metadata": {},
   "source": [
    "c.\tRename the column rest bp to Resting_Blood_Pressure."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "bd630d77-e836-4852-8fc0-c15361addfba",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Rename 'rest bp' to 'Resting_Blood_Pressure' (if it exists in the dataset)\n",
    "df.rename(columns={'rest bp': 'Resting_Blood_Pressure'}, inplace=True) # inplace=True modifies the DataFrame directly without returning a new object."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "59906a77-e35f-42ea-89cc-9989c5f346fd",
   "metadata": {},
   "source": [
    "d.\tReplace spaces in column names with underscores."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "78ce10d2-29be-427d-8b3f-7e44c967f32b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Replace spaces in column names with underscores\n",
    "df.columns = df.columns.str.replace(\" \", \"_\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1515cacd-0128-4a85-935a-2c3e796febd9",
   "metadata": {},
   "source": [
    "e.\tClassify all the variables in the dataset. Using .dtypes method,  check the data type of all columns in the dataframe. Convert target to a categorical variable, and ensure cholesterol is a continuous variable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d216b92c-3ed6-4066-8960-c3253d05ab80",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Categorical - nominal variables: sex, ex angina, thal\n",
    "# Categorical - ordinal variables: chest pain, fbs, rest ecg, slope ST\n",
    "# Numerical - discrete variables: age, rest bp, vessels\n",
    "# Numerical - continuous variables: cholestrerol, max heart rate, old peak"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "0c8bc2b7-afe3-4dbe-b7c4-72992070ac18",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Classify variables and convert data types\n",
    "df['target'] = df['target'].astype('category')  # Convert target to categorical\n",
    "df['sex'] = df['sex'].astype('category')\n",
    "df['ex_angina'] = df['ex_angina'].astype('category')\n",
    "df['thal'] = df['thal'].astype('category')\n",
    "\n",
    "df['cholesterol'] = pd.to_numeric(df['cholesterol'], errors='coerce')  # Ensure cholesterol is continuous\n",
    "df['max_heart_rate'] = pd.to_numeric(df['max_heart_rate'], errors='coerce')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7e60db58-b9fd-4bbf-8799-fcae29dfa280",
   "metadata": {},
   "source": [
    "f.\tSelect and display the cholesterol levels of the first four patients."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "216921ee-9617-47e8-b25b-65ded69adbd5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cholesterol levels of first four patients:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cholesterol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>212</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>203</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   cholesterol\n",
       "0          212\n",
       "1          203\n",
       "2          174\n",
       "3          203"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Display cholesterol levels of the first four patients\n",
    "print(\"Cholesterol levels of first four patients:\")\n",
    "df[['cholesterol']].head(4)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4aa137c2-4b38-4cb7-af55-2b88cc66748c",
   "metadata": {},
   "source": [
    "g.\tSelect the first four rows of columns age, cholesterol, and max heart rate."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "1c31f9c3-071b-41ee-8917-02b9856edf30",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "First four rows of age, cholesterol, and max_heart_rate:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>cholesterol</th>\n",
       "      <th>max_heart_rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>52</td>\n",
       "      <td>212</td>\n",
       "      <td>168</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>53</td>\n",
       "      <td>203</td>\n",
       "      <td>155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>70</td>\n",
       "      <td>174</td>\n",
       "      <td>125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>61</td>\n",
       "      <td>203</td>\n",
       "      <td>161</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age  cholesterol  max_heart_rate\n",
       "0   52          212             168\n",
       "1   53          203             155\n",
       "2   70          174             125\n",
       "3   61          203             161"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Select first four rows of specific columns\n",
    "print(\"First four rows of age, cholesterol, and max_heart_rate:\")\n",
    "display(df.loc[:3, ['age', 'cholesterol', 'max_heart_rate']])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ff7244e4-3055-4357-b0cb-d2c66a707997",
   "metadata": {},
   "source": [
    "h.\tCombine non-consecutive columns age, sex, and max heart rate into a new dataframe."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "30a2673c-3731-4554-9d7c-6aed86b26bc1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Combined dataframe:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>max_heart_rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>52</td>\n",
       "      <td>1</td>\n",
       "      <td>168</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>53</td>\n",
       "      <td>1</td>\n",
       "      <td>155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>70</td>\n",
       "      <td>1</td>\n",
       "      <td>125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>61</td>\n",
       "      <td>1</td>\n",
       "      <td>161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>62</td>\n",
       "      <td>0</td>\n",
       "      <td>106</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age sex  max_heart_rate\n",
       "0   52   1             168\n",
       "1   53   1             155\n",
       "2   70   1             125\n",
       "3   61   1             161\n",
       "4   62   0             106"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Combine non-consecutive columns into a new dataframe\n",
    "combined_df = df[['age', 'sex', 'max_heart_rate']]\n",
    "print(\"Combined dataframe:\")\n",
    "combined_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e7a5cbd8-273c-4d7a-bbcb-1fc141567573",
   "metadata": {},
   "source": [
    "i.\tGenerate a random sample of 10 patients from the dataset. Oversample patients older than 60 years for a new sample."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "21938ebe-a2b7-4999-8e32-eeff0a0b3d4d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Random sample of 10 patients:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>chest_pain</th>\n",
       "      <th>Resting_Blood_Pressure</th>\n",
       "      <th>cholesterol</th>\n",
       "      <th>fbs</th>\n",
       "      <th>rest_ecg</th>\n",
       "      <th>max_heart_rate</th>\n",
       "      <th>ex_angina</th>\n",
       "      <th>oldpeak</th>\n",
       "      <th>slope_ST</th>\n",
       "      <th>vessels</th>\n",
       "      <th>thal</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>527</th>\n",
       "      <td>62</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>124</td>\n",
       "      <td>209</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>163</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>359</th>\n",
       "      <td>53</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>128</td>\n",
       "      <td>216</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>115</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>447</th>\n",
       "      <td>55</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>160</td>\n",
       "      <td>289</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>145</td>\n",
       "      <td>1</td>\n",
       "      <td>0.8</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>50</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>120</td>\n",
       "      <td>244</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>162</td>\n",
       "      <td>0</td>\n",
       "      <td>1.1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>621</th>\n",
       "      <td>48</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>130</td>\n",
       "      <td>256</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>150</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>590</th>\n",
       "      <td>74</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>120</td>\n",
       "      <td>269</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>121</td>\n",
       "      <td>1</td>\n",
       "      <td>0.2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>905</th>\n",
       "      <td>64</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>120</td>\n",
       "      <td>246</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>96</td>\n",
       "      <td>1</td>\n",
       "      <td>2.2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>737</th>\n",
       "      <td>67</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>120</td>\n",
       "      <td>229</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>129</td>\n",
       "      <td>1</td>\n",
       "      <td>2.6</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>76</th>\n",
       "      <td>48</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>124</td>\n",
       "      <td>255</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>175</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>948</th>\n",
       "      <td>70</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>145</td>\n",
       "      <td>174</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>125</td>\n",
       "      <td>1</td>\n",
       "      <td>2.6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     age sex  chest_pain  Resting_Blood_Pressure  cholesterol  fbs  rest_ecg  \\\n",
       "527   62   0           0                     124          209    0         1   \n",
       "359   53   0           2                     128          216    0         0   \n",
       "447   55   1           0                     160          289    0         0   \n",
       "31    50   0           1                     120          244    0         1   \n",
       "621   48   1           0                     130          256    1         0   \n",
       "590   74   0           1                     120          269    0         0   \n",
       "905   64   1           0                     120          246    0         0   \n",
       "737   67   1           0                     120          229    0         0   \n",
       "76    48   1           2                     124          255    1         1   \n",
       "948   70   1           0                     145          174    0         1   \n",
       "\n",
       "     max_heart_rate ex_angina  oldpeak  slope_ST  vessels thal target  \n",
       "527             163         0      0.0         2        0    2      1  \n",
       "359             115         0      0.0         2        0    0      1  \n",
       "447             145         1      0.8         1        1    3      0  \n",
       "31              162         0      1.1         2        0    2      1  \n",
       "621             150         1      0.0         2        2    3      0  \n",
       "590             121         1      0.2         2        1    2      1  \n",
       "905              96         1      2.2         0        1    2      0  \n",
       "737             129         1      2.6         1        2    3      0  \n",
       "76              175         0      0.0         2        2    2      1  \n",
       "948             125         1      2.6         0        0    3      0  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Generate a random sample of 10 patients\n",
    "sample_df = df.sample(n=10, random_state=42) # random state = 42: it ensures the reproducibility of the random sample\n",
    "print(\"Random sample of 10 patients:\")\n",
    "sample_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "09146aac-32c8-4af4-a327-a1da154a48bc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Oversampled patients older than 60 years:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>chest_pain</th>\n",
       "      <th>Resting_Blood_Pressure</th>\n",
       "      <th>cholesterol</th>\n",
       "      <th>fbs</th>\n",
       "      <th>rest_ecg</th>\n",
       "      <th>max_heart_rate</th>\n",
       "      <th>ex_angina</th>\n",
       "      <th>oldpeak</th>\n",
       "      <th>slope_ST</th>\n",
       "      <th>vessels</th>\n",
       "      <th>thal</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>413</th>\n",
       "      <td>70</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>130</td>\n",
       "      <td>322</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>109</td>\n",
       "      <td>0</td>\n",
       "      <td>2.4</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1002</th>\n",
       "      <td>66</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>112</td>\n",
       "      <td>212</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>132</td>\n",
       "      <td>1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>431</th>\n",
       "      <td>65</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>150</td>\n",
       "      <td>225</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>114</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>296</th>\n",
       "      <td>67</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>120</td>\n",
       "      <td>237</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>71</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>698</th>\n",
       "      <td>66</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>112</td>\n",
       "      <td>212</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>132</td>\n",
       "      <td>1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89</th>\n",
       "      <td>68</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>144</td>\n",
       "      <td>193</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>141</td>\n",
       "      <td>0</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>413</th>\n",
       "      <td>70</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>130</td>\n",
       "      <td>322</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>109</td>\n",
       "      <td>0</td>\n",
       "      <td>2.4</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>487</th>\n",
       "      <td>65</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>135</td>\n",
       "      <td>254</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>127</td>\n",
       "      <td>0</td>\n",
       "      <td>2.8</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>792</th>\n",
       "      <td>68</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>144</td>\n",
       "      <td>193</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>141</td>\n",
       "      <td>0</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>375</th>\n",
       "      <td>66</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>160</td>\n",
       "      <td>228</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>138</td>\n",
       "      <td>0</td>\n",
       "      <td>2.3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      age sex  chest_pain  Resting_Blood_Pressure  cholesterol  fbs  rest_ecg  \\\n",
       "413    70   1           0                     130          322    0         0   \n",
       "1002   66   1           0                     112          212    0         0   \n",
       "431    65   0           0                     150          225    0         0   \n",
       "296    67   1           0                     120          237    0         1   \n",
       "698    66   1           0                     112          212    0         0   \n",
       "89     68   1           0                     144          193    1         1   \n",
       "413    70   1           0                     130          322    0         0   \n",
       "487    65   1           0                     135          254    0         0   \n",
       "792    68   1           0                     144          193    1         1   \n",
       "375    66   1           0                     160          228    0         0   \n",
       "\n",
       "      max_heart_rate ex_angina  oldpeak  slope_ST  vessels thal target  \n",
       "413              109         0      2.4         1        3    2      0  \n",
       "1002             132         1      0.1         2        1    2      0  \n",
       "431              114         0      1.0         1        3    3      0  \n",
       "296               71         0      1.0         1        0    2      0  \n",
       "698              132         1      0.1         2        1    2      0  \n",
       "89               141         0      3.4         1        2    3      0  \n",
       "413              109         0      2.4         1        3    2      0  \n",
       "487              127         0      2.8         1        1    3      0  \n",
       "792              141         0      3.4         1        2    3      0  \n",
       "375              138         0      2.3         2        0    1      1  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Oversample patients older than 60 years\n",
    "older_patients = df[df['age'] > 60].sample(n=10, replace=True, random_state=42)\n",
    "print(\"Oversampled patients older than 60 years:\")\n",
    "older_patients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "af7951eb-c375-4484-8848-593609ac2495",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:base] *",
   "language": "python",
   "name": "conda-base-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}