Compare commits
2 Commits
bdcba003fe
...
800712c63c
Author | SHA1 | Date | |
---|---|---|---|
800712c63c | |||
721a38435b |
32
README.md
Normal file
32
README.md
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
|
||||||
|
<!--Your Github repository must have the following contents:
|
||||||
|
|
||||||
|
A README.md file that communicates the libraries used, the motivation for the project, the files in the repository with a small description of each, a summary of the results of the analysis, and necessary acknowledgments.
|
||||||
|
|
||||||
|
Your code in a Jupyter notebook, with appropriate comments, analysis, and documentation.
|
||||||
|
|
||||||
|
You may also provide any other necessary documentation you find necessary.-->
|
||||||
|
|
||||||
|
# stacksurvey
|
||||||
|
|
||||||
|
**stacksurvey** is an exploration and analysis of data from StackOverflow's developer survey of 2024.
|
||||||
|
|
||||||
|
[https://survey.stackoverflow.co/2024/](https://survey.stackoverflow.co/2024/)
|
||||||
|
|
||||||
|
The motivation for project is satisfying a class assignment. Eventually, an interesting (enough) topic was discovered in the data set:
|
||||||
|
|
||||||
|
What is the annual compensation (y) over years of experience (x) of deveopers who use a programming language from a specific country?
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
numpy pandas sklearn matplotlib seaborn
|
||||||
|
|
||||||
|
## Summary of Analysis
|
||||||
|
|
||||||
|
*For the purpose of this analysis, (data) scientists and researchers are also considered developers.*
|
||||||
|
|
||||||
|
The models generated by the notebook become less reliable for incomes greater than $200,000 per year and years of experience after 10.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
BIN
images/programmers-C-United-States-of-America.png
Normal file
BIN
images/programmers-C-United-States-of-America.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 81 KiB |
BIN
images/programmers-Python-United-States-of-America.png
Normal file
BIN
images/programmers-Python-United-States-of-America.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 91 KiB |
@@ -149,22 +149,12 @@
|
|||||||
" self.devs = devs.drop(indices)\n",
|
" self.devs = devs.drop(indices)\n",
|
||||||
" del devs, new_column\n",
|
" del devs, new_column\n",
|
||||||
" \n",
|
" \n",
|
||||||
" def visualize(self, n_lowest=0, \n",
|
" def visualize(self, hue=\"Country\", \n",
|
||||||
" hue=\"Country\", palette=sb.color_palette() ): \n",
|
" palette=sb.color_palette() ): \n",
|
||||||
" self.canvas = plt.figure()\n",
|
" self.canvas = plt.figure()\n",
|
||||||
" key_x = self.key_x\n",
|
" key_x = self.key_x\n",
|
||||||
" key_y = self.key_y\n",
|
" key_y = self.key_y\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if n_lowest > 0:\n",
|
|
||||||
" # chatgpt draws my line\n",
|
|
||||||
" # Calculate the lowest nth point (for example, the 5th lowest value)\n",
|
|
||||||
" # iloc[-1] gets the last element from the n smallest\n",
|
|
||||||
" lowest_nth = self.devs[key_y].nsmallest(n_lowest).iloc[-1] \n",
|
|
||||||
" # Draw a horizontal line at the lowest nth point\n",
|
|
||||||
" # label=f'Lowest {n_poorest}th Point: {lowest_nth_value:.2f}'\n",
|
|
||||||
" plt.axhline(y=lowest_nth, color='purple', linestyle='--', \n",
|
|
||||||
" label=\"y=%0.2f\" % lowest_nth, zorder=-1 )\n",
|
|
||||||
"\n",
|
|
||||||
" sb.scatterplot(data=self.devs, x=key_x, y=key_y, hue=hue, palette=palette)\n",
|
" sb.scatterplot(data=self.devs, x=key_x, y=key_y, hue=hue, palette=palette)\n",
|
||||||
" plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n",
|
" plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n",
|
||||||
" title = \"Annual Salary of %s Developers Over Years of Experience\" % self.language\\\n",
|
" title = \"Annual Salary of %s Developers Over Years of Experience\" % self.language\\\n",
|
||||||
@@ -200,10 +190,12 @@
|
|||||||
" print(\"sample predictions:\")\n",
|
" print(\"sample predictions:\")\n",
|
||||||
" print(y_pred[3:6])\n",
|
" print(y_pred[3:6])\n",
|
||||||
" print(\"+----------------------+\")\n",
|
" print(\"+----------------------+\")\n",
|
||||||
|
" b = model.intercept_[0]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" plt.figure(self.canvas)\n",
|
" plt.figure(self.canvas)\n",
|
||||||
"\n",
|
|
||||||
" plt.plot(X_test, y_pred, color=line_color, label='Regression Line')\n",
|
" plt.plot(X_test, y_pred, color=line_color, label='Regression Line')\n",
|
||||||
|
" plt.axhline(y=b, color=\"purple\", linestyle='--', \n",
|
||||||
|
" label=\"b=%0.2f\" % b, zorder=-1 )\n",
|
||||||
" plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n",
|
" plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n",
|
||||||
" del y_pred, model\n",
|
" del y_pred, model\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -280,7 +272,7 @@
|
|||||||
" # \"Product manager\"\n",
|
" # \"Product manager\"\n",
|
||||||
"]\n",
|
"]\n",
|
||||||
"c = Foo(so_df, \"C\", jobs=cjobs, n_rich_outliers=30, n_poor_outliers=2)\n",
|
"c = Foo(so_df, \"C\", jobs=cjobs, n_rich_outliers=30, n_poor_outliers=2)\n",
|
||||||
"c.visualize(n_lowest=7, hue=\"DevType\", palette=[\"#57e6da\",\"#d9e352\",\"#cc622d\"] ) \n",
|
"c.visualize(hue=\"DevType\", palette=[\"#57e6da\",\"#d9e352\",\"#cc622d\"] ) \n",
|
||||||
"c.run_regression()\n",
|
"c.run_regression()\n",
|
||||||
"c.run_regression(x_transform=log_base_a, change_base=1.3, \n",
|
"c.run_regression(x_transform=log_base_a, change_base=1.3, \n",
|
||||||
" x_shift=2, y_shift=-5000, line_color=\"magenta\", random=555)\n",
|
" x_shift=2, y_shift=-5000, line_color=\"magenta\", random=555)\n",
|
||||||
|
Reference in New Issue
Block a user