diff --git a/stackoverflow-survey.ipynb b/stackoverflow-survey.ipynb index 103029e..5f514c4 100644 --- a/stackoverflow-survey.ipynb +++ b/stackoverflow-survey.ipynb @@ -149,22 +149,12 @@ " self.devs = devs.drop(indices)\n", " del devs, new_column\n", " \n", - " def visualize(self, n_lowest=0, \n", - " hue=\"Country\", palette=sb.color_palette() ): \n", + " def visualize(self, hue=\"Country\", \n", + " palette=sb.color_palette() ): \n", " self.canvas = plt.figure()\n", " key_x = self.key_x\n", " key_y = self.key_y\n", "\n", - " if n_lowest > 0:\n", - " # chatgpt draws my line\n", - " # Calculate the lowest nth point (for example, the 5th lowest value)\n", - " # iloc[-1] gets the last element from the n smallest\n", - " lowest_nth = self.devs[key_y].nsmallest(n_lowest).iloc[-1] \n", - " # Draw a horizontal line at the lowest nth point\n", - " # label=f'Lowest {n_poorest}th Point: {lowest_nth_value:.2f}'\n", - " plt.axhline(y=lowest_nth, color='purple', linestyle='--', \n", - " label=\"y=%0.2f\" % lowest_nth, zorder=-1 )\n", - "\n", " sb.scatterplot(data=self.devs, x=key_x, y=key_y, hue=hue, palette=palette)\n", " plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n", " title = \"Annual Salary of %s Developers Over Years of Experience\" % self.language\\\n", @@ -200,10 +190,12 @@ " print(\"sample predictions:\")\n", " print(y_pred[3:6])\n", " print(\"+----------------------+\")\n", - " \n", - " plt.figure(self.canvas)\n", + " b = model.intercept_[0]\n", "\n", + " plt.figure(self.canvas)\n", " plt.plot(X_test, y_pred, color=line_color, label='Regression Line')\n", + " plt.axhline(y=b, color=\"purple\", linestyle='--', \n", + " label=\"b=%0.2f\" % b, zorder=-1 )\n", " plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n", " del y_pred, model\n", "\n", @@ -280,7 +272,7 @@ " # \"Product manager\"\n", "]\n", "c = Foo(so_df, \"C\", jobs=cjobs, n_rich_outliers=30, n_poor_outliers=2)\n", - "c.visualize(n_lowest=7, hue=\"DevType\", palette=[\"#57e6da\",\"#d9e352\",\"#cc622d\"] ) \n", + "c.visualize(hue=\"DevType\", palette=[\"#57e6da\",\"#d9e352\",\"#cc622d\"] ) \n", "c.run_regression()\n", "c.run_regression(x_transform=log_base_a, change_base=1.3, \n", " x_shift=2, y_shift=-5000, line_color=\"magenta\", random=555)\n",