Squashed commit of the following:
commit e1691bb85b611c84ae9e4315523de1b79837ef2b Author: scuti <scuti@tutamail.com> Date: Sat Apr 19 14:00:28 2025 -0700 Created graph for job title and compensation commit 50e00a42686f7135508ca08d1354a36012e839d7 Author: scuti <scuti@tutamail.com> Date: Sat Apr 19 06:38:16 2025 -0700 Got visualization idea for annual compensation
This commit is contained in:
@@ -168,14 +168,55 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e90cf119-c50d-468a-bc87-72dac41176ce",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# print survey ans\n",
|
||||
"employment_status = Counter(so_df[\"MainBranch\"])\n",
|
||||
"print(employment_status)\n",
|
||||
"# see how much money are people making\n",
|
||||
"\n",
|
||||
"print(so_df[\"ConvertedCompYearly\"][])"
|
||||
"def get_mean_by_category(df, category, key=\"ConvertedCompYearly\"):\n",
|
||||
" unique = df[category].unique()\n",
|
||||
" result = dict()\n",
|
||||
" for u in unique:\n",
|
||||
" mean = df[df[category] == u][key].mean()\n",
|
||||
" result[u] = mean\n",
|
||||
" return result\n",
|
||||
"\n",
|
||||
"def show_me_the_money(df, saveto=None):\n",
|
||||
" key_x = \"ConvertedCompYearly\"\n",
|
||||
" key_y = \"DevType\"\n",
|
||||
" \n",
|
||||
" means = get_mean_by_category(df, key_y) \n",
|
||||
" mean_df = pd.DataFrame(means.items(), columns=[key_y, key_x])\n",
|
||||
"\n",
|
||||
" plt.figure(figsize=(14,18)) \n",
|
||||
" plt.axvline(x=1e5, color='red', linestyle='--', label=\"x = $100,000\")\n",
|
||||
" plt.axvline(x=1e6, color='lightgreen', linestyle='--', label=\"x = millionaire\")\n",
|
||||
" sb.barplot(x=key_x, y=key_y, data=mean_df.sort_values(by=key_x), \\\n",
|
||||
" color='lavender', alpha=0.7, label=\"average compensation\")\n",
|
||||
" sb.stripplot(x=key_x, y=key_y, data=df, \\\n",
|
||||
" size=3, jitter=True)\n",
|
||||
" if saveto is not None:\n",
|
||||
" plt.savefig(saveto, bbox_inches='tight')\n",
|
||||
" \n",
|
||||
"# print survey ans\n",
|
||||
"#employment_status = Counter(so_df[\"MainBranch\"])\n",
|
||||
"#print(employment_status)\n",
|
||||
"\n",
|
||||
"#employment_type = Counter(so_df[\"DevType\"])\n",
|
||||
"#print(employment_type)\n",
|
||||
"\n",
|
||||
"key = \"ConvertedCompYearly\"\n",
|
||||
"# answers = so_df[:-1][key].count()\n",
|
||||
"# print(answers, \"people answered re: \", key)\n",
|
||||
"df_no_na = so_df.dropna(subset=[key])\n",
|
||||
"indices = df_no_na[key].nlargest(15).index\n",
|
||||
"\n",
|
||||
"show_me_the_money( df_no_na.drop(indices), saveto=\"images/compensation-by-profession.png\" )\n",
|
||||
"# could also ask myself what portion of developers \n",
|
||||
"# earn less than the mean compensation\n",
|
||||
"# (what titles have high standard deviations in earnings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
Reference in New Issue
Block a user