Compare commits
2 Commits
bdcba003fe
...
800712c63c
Author | SHA1 | Date | |
---|---|---|---|
800712c63c | |||
721a38435b |
32
README.md
Normal file
32
README.md
Normal file
@@ -0,0 +1,32 @@
|
||||
|
||||
<!--Your Github repository must have the following contents:
|
||||
|
||||
A README.md file that communicates the libraries used, the motivation for the project, the files in the repository with a small description of each, a summary of the results of the analysis, and necessary acknowledgments.
|
||||
|
||||
Your code in a Jupyter notebook, with appropriate comments, analysis, and documentation.
|
||||
|
||||
You may also provide any other necessary documentation you find necessary.-->
|
||||
|
||||
# stacksurvey
|
||||
|
||||
**stacksurvey** is an exploration and analysis of data from StackOverflow's developer survey of 2024.
|
||||
|
||||
[https://survey.stackoverflow.co/2024/](https://survey.stackoverflow.co/2024/)
|
||||
|
||||
The motivation for project is satisfying a class assignment. Eventually, an interesting (enough) topic was discovered in the data set:
|
||||
|
||||
What is the annual compensation (y) over years of experience (x) of deveopers who use a programming language from a specific country?
|
||||
|
||||
## Requirements
|
||||
|
||||
numpy pandas sklearn matplotlib seaborn
|
||||
|
||||
## Summary of Analysis
|
||||
|
||||
*For the purpose of this analysis, (data) scientists and researchers are also considered developers.*
|
||||
|
||||
The models generated by the notebook become less reliable for incomes greater than $200,000 per year and years of experience after 10.
|
||||
|
||||

|
||||
|
||||

|
BIN
images/programmers-C-United-States-of-America.png
Normal file
BIN
images/programmers-C-United-States-of-America.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 81 KiB |
BIN
images/programmers-Python-United-States-of-America.png
Normal file
BIN
images/programmers-Python-United-States-of-America.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 91 KiB |
@@ -149,22 +149,12 @@
|
||||
" self.devs = devs.drop(indices)\n",
|
||||
" del devs, new_column\n",
|
||||
" \n",
|
||||
" def visualize(self, n_lowest=0, \n",
|
||||
" hue=\"Country\", palette=sb.color_palette() ): \n",
|
||||
" def visualize(self, hue=\"Country\", \n",
|
||||
" palette=sb.color_palette() ): \n",
|
||||
" self.canvas = plt.figure()\n",
|
||||
" key_x = self.key_x\n",
|
||||
" key_y = self.key_y\n",
|
||||
"\n",
|
||||
" if n_lowest > 0:\n",
|
||||
" # chatgpt draws my line\n",
|
||||
" # Calculate the lowest nth point (for example, the 5th lowest value)\n",
|
||||
" # iloc[-1] gets the last element from the n smallest\n",
|
||||
" lowest_nth = self.devs[key_y].nsmallest(n_lowest).iloc[-1] \n",
|
||||
" # Draw a horizontal line at the lowest nth point\n",
|
||||
" # label=f'Lowest {n_poorest}th Point: {lowest_nth_value:.2f}'\n",
|
||||
" plt.axhline(y=lowest_nth, color='purple', linestyle='--', \n",
|
||||
" label=\"y=%0.2f\" % lowest_nth, zorder=-1 )\n",
|
||||
"\n",
|
||||
" sb.scatterplot(data=self.devs, x=key_x, y=key_y, hue=hue, palette=palette)\n",
|
||||
" plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n",
|
||||
" title = \"Annual Salary of %s Developers Over Years of Experience\" % self.language\\\n",
|
||||
@@ -200,10 +190,12 @@
|
||||
" print(\"sample predictions:\")\n",
|
||||
" print(y_pred[3:6])\n",
|
||||
" print(\"+----------------------+\")\n",
|
||||
" \n",
|
||||
" plt.figure(self.canvas)\n",
|
||||
" b = model.intercept_[0]\n",
|
||||
"\n",
|
||||
" plt.figure(self.canvas)\n",
|
||||
" plt.plot(X_test, y_pred, color=line_color, label='Regression Line')\n",
|
||||
" plt.axhline(y=b, color=\"purple\", linestyle='--', \n",
|
||||
" label=\"b=%0.2f\" % b, zorder=-1 )\n",
|
||||
" plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n",
|
||||
" del y_pred, model\n",
|
||||
"\n",
|
||||
@@ -280,7 +272,7 @@
|
||||
" # \"Product manager\"\n",
|
||||
"]\n",
|
||||
"c = Foo(so_df, \"C\", jobs=cjobs, n_rich_outliers=30, n_poor_outliers=2)\n",
|
||||
"c.visualize(n_lowest=7, hue=\"DevType\", palette=[\"#57e6da\",\"#d9e352\",\"#cc622d\"] ) \n",
|
||||
"c.visualize(hue=\"DevType\", palette=[\"#57e6da\",\"#d9e352\",\"#cc622d\"] ) \n",
|
||||
"c.run_regression()\n",
|
||||
"c.run_regression(x_transform=log_base_a, change_base=1.3, \n",
|
||||
" x_shift=2, y_shift=-5000, line_color=\"magenta\", random=555)\n",
|
||||
|
Reference in New Issue
Block a user