From 08ab9f126cf4a8085e185939bbbaa9f703e27bca Mon Sep 17 00:00:00 2001 From: scuti Date: Wed, 30 Apr 2025 03:29:07 -0700 Subject: [PATCH] Expanded probe_excluded_rows(). Shows blue bars to represent total professional developers, and red bar to represent those included in the analysis. x-axis is years of professional experience (changed from age). --- stackoverflow-survey.ipynb | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/stackoverflow-survey.ipynb b/stackoverflow-survey.ipynb index 7b994f5..d253e34 100644 --- a/stackoverflow-survey.ipynb +++ b/stackoverflow-survey.ipynb @@ -254,8 +254,12 @@ "\n", " # https://stackoverflow.com/questions/47443134/update-column-in-pandas-dataframe-without-warning\n", " pd.options.mode.chained_assignment = None # default='warn'\n", + " \n", " new_column = devs[key_x].replace(replacement_dict)\n", - " devs[key_x] = pd.to_numeric(new_column, errors='coerce')\n", + " devs[key_x] = pd.to_numeric(new_column, errors='raise')\n", + "\n", + " new_column = self.df_no_y[key_x].replace(replacement_dict)\n", + " self.df_no_y[key_x] = pd.to_numeric(new_column, errors='raise')\n", " pd.options.mode.chained_assignment = 'warn' # default='warn'\n", " # print( devs[key_x].unique() )\n", " \n", @@ -427,10 +431,25 @@ " print(nan_y_count, 'did not specify', self.key_y)\n", " print('total developers:', self.devs.shape[0] \n", " + nan_x_count + nan_y_count)\n", - " title1 = 'Age of %s Programmers excluded from analysis'\n", - " visualize_devs(self.df_no_y, title1 % self.language)\n", - " title2 = 'Age of %s programmers included in analysis'\n", - " visualize_devs(self.devs, title2 % self.language)\n", + " title = '%s Developers Total vs Included' % self.language\n", + " total_devs = pd.concat([self.devs, self.df_no_y])\n", + " \n", + " plt.figure()\n", + " plt.title(title)\n", + " plt.xticks(rotation=45)\n", + " key = self.key_x\n", + "\n", + " bins = [0, 10, 20, 30, 40, 50]\n", + " labels = ['0-10', '11-20', '21-30', '31-40', '41-50']\n", + " total_binned = pd.cut(total_devs[key], bins=bins, labels=labels).to_frame()\n", + " devs_binned = pd.cut(self.devs[key], bins=bins, labels=labels).to_frame()\n", + "\n", + " sb.countplot(x=key, data=total_binned, label='total')\n", + " sb.countplot(x=key, data=devs_binned,\n", + " color='red', label='included in analysis')\n", + " plt.legend()\n", + " plt.savefig('images/%s-total-vs-included.png' % self.language)\n", + " \n", " \n", "def show_model_stats(coef, intercept, y_test, y_pred, label):\n", " '''\n",