From 67d144130324b76ea0122928d6fee04f811a820f Mon Sep 17 00:00:00 2001 From: scuti Date: Sun, 27 Apr 2025 10:49:37 -0700 Subject: [PATCH] Added logarithmic regression Not part of the course but fits better. --- stackoverflow-survey.ipynb | 40 ++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/stackoverflow-survey.ipynb b/stackoverflow-survey.ipynb index f3f2bd8..dadb6fc 100644 --- a/stackoverflow-survey.ipynb +++ b/stackoverflow-survey.ipynb @@ -268,6 +268,35 @@ " plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n", " del y_pred, model, X, y\n", "\n", + " def run_log_regression(self, color='pink', nodraw=True):\n", + " df = self.devs\n", + " X = df[self.key_x].to_frame().sort_values(by=self.key_x)\n", + " y = df[self.key_y].to_frame().sort_values(by=self.key_y)\n", + "\n", + " X_log = np.log(X)\n", + " x_fit = np.linspace(1, 40, len(y)).reshape(-1, 1)\n", + " \n", + " model = LinearRegression()\n", + " model.fit(X_log, y)\n", + " y_pred = model.predict(X_log)\n", + "\n", + " m = model.coef_[0][0]\n", + " b = model.intercept_[0]\n", + " print('+----------------------+')\n", + " print('%s log regression line for %s' % (color, self.language))\n", + " print('coefficient = %0.2f' % m)\n", + " print('intercept = %0.2f' % b)\n", + " rmse = root_mean_squared_error(y, y_pred)\n", + " print('rmse = %0.2f' % rmse)\n", + " r2 = r2_score(y, y_pred)\n", + " print('r2 score = %0.2f' % r2)\n", + " print('sample predictions:')\n", + " print(y_pred[3:6])\n", + " print('+----------------------+')\n", + " if nodraw:\n", + " return\n", + " plt.plot(X, y_pred, color=color, label=\"Log regression\")\n", + " plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n", "\n", " def export_image(self, base_filename = 'images/programmers-%s-%s.png'):\n", " plt.figure(self.canvas)\n", @@ -313,8 +342,9 @@ " x_shift=0, y_shift=-1.5e4, random=888,\n", " color='cyan', name='Tuned regression line')\n", "\n", - "python.run_regression(x_transform=log_base_a, change_base=1.20, risky=2, random=555, \n", - " color='pink', name='Risky regression line')\n", + "#python.run_regression(x_transform=log_base_a, change_base=1.20, risky=2, random=555, \n", + "# color='pink', name='Risky regression line')\n", + "python.run_log_regression(nodraw=False)\n", "python.export_image()" ] }, @@ -370,14 +400,16 @@ "c.run_regression()\n", "c.run_regression(x_transform=log_base_a, change_base=1.3, \n", " x_shift=2, y_shift=-5000, color='magenta', random=555)\n", - "c.run_regression(x_transform=log_base_a, change_base=1.3, risky=2, color='pink')\n", + "c.run_log_regression(nodraw=False)\n", "c.export_image()" ] }, { "cell_type": "markdown", "id": "b7026c56-3049-4e60-bbc6-ee548ff58297", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "## Evaluation for C\n", "\n",