Squashed commit of the following:

commit e1691bb85b611c84ae9e4315523de1b79837ef2b
Author: scuti <scuti@tutamail.com>
Date:   Sat Apr 19 14:00:28 2025 -0700

    Created graph for job title and compensation

commit 50e00a42686f7135508ca08d1354a36012e839d7
Author: scuti <scuti@tutamail.com>
Date:   Sat Apr 19 06:38:16 2025 -0700

    Got visualization idea for annual compensation
This commit is contained in:
2025-04-19 14:10:44 -07:00
parent e4ba004fec
commit cbd575697f

View File

@@ -168,14 +168,55 @@
"cell_type": "code",
"execution_count": null,
"id": "e90cf119-c50d-468a-bc87-72dac41176ce",
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# print survey ans\n",
"employment_status = Counter(so_df[\"MainBranch\"])\n",
"print(employment_status)\n",
"# see how much money are people making\n",
"\n",
"print(so_df[\"ConvertedCompYearly\"][])"
"def get_mean_by_category(df, category, key=\"ConvertedCompYearly\"):\n",
" unique = df[category].unique()\n",
" result = dict()\n",
" for u in unique:\n",
" mean = df[df[category] == u][key].mean()\n",
" result[u] = mean\n",
" return result\n",
"\n",
"def show_me_the_money(df, saveto=None):\n",
" key_x = \"ConvertedCompYearly\"\n",
" key_y = \"DevType\"\n",
" \n",
" means = get_mean_by_category(df, key_y) \n",
" mean_df = pd.DataFrame(means.items(), columns=[key_y, key_x])\n",
"\n",
" plt.figure(figsize=(14,18)) \n",
" plt.axvline(x=1e5, color='red', linestyle='--', label=\"x = $100,000\")\n",
" plt.axvline(x=1e6, color='lightgreen', linestyle='--', label=\"x = millionaire\")\n",
" sb.barplot(x=key_x, y=key_y, data=mean_df.sort_values(by=key_x), \\\n",
" color='lavender', alpha=0.7, label=\"average compensation\")\n",
" sb.stripplot(x=key_x, y=key_y, data=df, \\\n",
" size=3, jitter=True)\n",
" if saveto is not None:\n",
" plt.savefig(saveto, bbox_inches='tight')\n",
" \n",
"# print survey ans\n",
"#employment_status = Counter(so_df[\"MainBranch\"])\n",
"#print(employment_status)\n",
"\n",
"#employment_type = Counter(so_df[\"DevType\"])\n",
"#print(employment_type)\n",
"\n",
"key = \"ConvertedCompYearly\"\n",
"# answers = so_df[:-1][key].count()\n",
"# print(answers, \"people answered re: \", key)\n",
"df_no_na = so_df.dropna(subset=[key])\n",
"indices = df_no_na[key].nlargest(15).index\n",
"\n",
"show_me_the_money( df_no_na.drop(indices), saveto=\"images/compensation-by-profession.png\" )\n",
"# could also ask myself what portion of developers \n",
"# earn less than the mean compensation\n",
"# (what titles have high standard deviations in earnings)"
]
},
{