First draft of README. Let's see how ti looks.

Repurposed horizontal line to show the y-intercept value.
2025-04-23 07:28:15 -07:00 · 2025-04-23 07:16:48 -07:00
4 changed files with 39 additions and 15 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,32 @@
+
+<!--Your Github repository must have the following contents:
+
+    A README.md file that communicates the libraries used, the motivation for the project, the files in the repository with a small description of each, a summary of the results of the analysis, and necessary acknowledgments.
+
+    Your code in a Jupyter notebook, with appropriate comments, analysis, and documentation.
+
+    You may also provide any other necessary documentation you find necessary.-->
+
+# stacksurvey
+
+**stacksurvey** is an exploration and analysis of data from StackOverflow's developer survey of 2024.
+
+[https://survey.stackoverflow.co/2024/](https://survey.stackoverflow.co/2024/)
+
+The motivation for project is satisfying a class assignment. Eventually, an interesting (enough) topic was discovered in the data set: 
+
+What is the annual compensation (y) over years of experience (x) of deveopers who use a programming language from a specific country?
+
+## Requirements
+
+    numpy pandas sklearn matplotlib seaborn
+
+## Summary of Analysis
+
+*For the purpose of this analysis, (data) scientists and researchers are also considered developers.*
+
+The models generated by the notebook become less reliable for incomes greater than $200,000 per year and years of experience after 10.
+
+![graph of c programmers](images/programmers-C-United-States-of-America.png)
+
+![graph of python programmers](images/programmers-Python-United-States-of-America.png)
--- a/images/programmers-C-United-States-of-America.png
+++ b/images/programmers-C-United-States-of-America.png
--- a/images/programmers-Python-United-States-of-America.png
+++ b/images/programmers-Python-United-States-of-America.png
--- a/stackoverflow-survey.ipynb
+++ b/stackoverflow-survey.ipynb
@@ -149,22 +149,12 @@
    "        self.devs = devs.drop(indices)\n",
    "        del devs, new_column\n",
    "    \n",
-    "    def visualize(self, n_lowest=0, \n",
-    "                  hue=\"Country\", palette=sb.color_palette() ):    \n",
+    "    def visualize(self,  hue=\"Country\", \n",
+    "                  palette=sb.color_palette() ):    \n",
    "        self.canvas = plt.figure()\n",
    "        key_x = self.key_x\n",
    "        key_y = self.key_y\n",
    "\n",
-    "        if n_lowest > 0:\n",
-    "            # chatgpt draws my line\n",
-    "            # Calculate the lowest nth point (for example, the 5th lowest value)\n",
-    "            # iloc[-1] gets the last element from the n smallest\n",
-    "            lowest_nth = self.devs[key_y].nsmallest(n_lowest).iloc[-1]  \n",
-    "            # Draw a horizontal line at the lowest nth point\n",
-    "            # label=f'Lowest {n_poorest}th Point: {lowest_nth_value:.2f}'\n",
-    "            plt.axhline(y=lowest_nth, color='purple', linestyle='--', \n",
-    "                        label=\"y=%0.2f\" % lowest_nth, zorder=-1 )\n",
-    "\n",
    "        sb.scatterplot(data=self.devs, x=key_x, y=key_y, hue=hue, palette=palette)\n",
    "        plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n",
    "        title = \"Annual Salary of %s Developers Over Years of Experience\" % self.language\\\n",
@@ -200,10 +190,12 @@
    "        print(\"sample predictions:\")\n",
    "        print(y_pred[3:6])\n",
    "        print(\"+----------------------+\")\n",
-    "    \n",
-    "        plt.figure(self.canvas)\n",
+    "        b = model.intercept_[0]\n",
    "\n",
+    "        plt.figure(self.canvas)\n",
    "        plt.plot(X_test, y_pred, color=line_color, label='Regression Line')\n",
+    "        plt.axhline(y=b, color=\"purple\", linestyle='--', \n",
+    "                    label=\"b=%0.2f\" % b, zorder=-1 )\n",
    "        plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n",
    "        del y_pred, model\n",
    "\n",
@@ -280,7 +272,7 @@
    " #        \"Product manager\"\n",
    "]\n",
    "c = Foo(so_df, \"C\", jobs=cjobs, n_rich_outliers=30, n_poor_outliers=2)\n",
-    "c.visualize(n_lowest=7, hue=\"DevType\", palette=[\"#57e6da\",\"#d9e352\",\"#cc622d\"] ) \n",
+    "c.visualize(hue=\"DevType\", palette=[\"#57e6da\",\"#d9e352\",\"#cc622d\"] ) \n",
    "c.run_regression()\n",
    "c.run_regression(x_transform=log_base_a, change_base=1.3, \n",
    "                 x_shift=2, y_shift=-5000, line_color=\"magenta\", random=555)\n",
Author	SHA1	Message	Date
scuti	800712c63c	First draft of README. Let's see how ti looks.	2025-04-23 07:28:15 -07:00
scuti	721a38435b	Repurposed horizontal line to show the y-intercept value.	2025-04-23 07:16:48 -07:00