Showed difference in admired and desired languages.

Visualizations for "have worked with" and 'want to work with".
This commit is contained in:
2025-04-18 19:07:54 -07:00
parent be0ee359d4
commit 59857f8d36

View File

@@ -36,7 +36,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "e5070e38-8b93-4dc2-9ddb-9a06283ef8d9",
"id": "35b9727a-176c-4193-a1f9-a508aecd2d1c",
"metadata": {},
"outputs": [],
"source": [
@@ -45,9 +45,6 @@
"#keys re: languages are:\n",
"#LanguageHaveWorkedWith,LanguageWantToWorkWith,LanguageAdmired,LanguageDesired\n",
"\n",
"# draw horizontal bar plot\n",
"# https://seaborn.pydata.org/examples/part_whole_bars.html\n",
"\n",
"# draw as strip chart\n",
"# https://seaborn.pydata.org/generated/seaborn.stripplot.html#seaborn.stripplot\n",
"\n",
@@ -100,21 +97,64 @@
"l3 = get_langs( so_df, \"LanguageAdmired\")\n",
"l4 = get_langs( so_df, \"LanguageWantToWorkWith\")\n",
"visualize_langs(l3, l4, label1=\"admired\", label2=\"want to work with\")\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d0bfdb92-378a-4452-91cc-4d21afd2d6cc",
"metadata": {},
"outputs": [],
"source": [
"# draw horizontal bar plot\n",
"# https://seaborn.pydata.org/examples/part_whole_bars.html\n",
"\n",
"# determine extrinsic vs intrinsic motivation\n",
"# investigate extrinsic vs intrinsic motivation\n",
"def get_difference(dict1, dict2):\n",
" keys = dict1.keys()\n",
" result = dict()\n",
" for key in keys:\n",
" result[key] = dict1[key] - dict2[key]\n",
" return result\n",
"\n",
"def visualize_diff(diff_dict, color=\"lightblue\"):\n",
" diff_sorted = dict(\n",
" sorted(diff_dict.items(), key=lambda item: item[1], reverse=True)\n",
" )\n",
" KEY = \"Value\"\n",
" df = pd.DataFrame(diff_sorted.items(), columns=['Languages', 'Value'])\n",
" plt.figure(figsize=(15,20)) \n",
" sb.barplot(x=KEY, y='Languages', data=df, color=color)\n",
" \n",
" DELTA = '\\u0394'\n",
" for index, value in enumerate(df[KEY]):\n",
" # chatgpt annotates my chart\n",
" # Position the text at the base of the bar\n",
" if value >= 0:\n",
" # Adjust the x position for positive values\n",
" plt.text(value, index, DELTA+str(value), va='center', ha=\"left\") \n",
" else:\n",
" # Adjust the x position for negative values\n",
" plt.text(value, index, DELTA+str(value), va='center', ha='right') \n",
" lowest = 0\n",
" offset = 0.5\n",
" positive_values = df[df[KEY] > 0][KEY]\n",
" if not positive_values.empty:\n",
" lowest = positive_values.min()\n",
" if len(positive_values) < len(df):\n",
" # don't draw the line if every value is greater than 0\n",
" plt.axhline(y=df[KEY].tolist().index(lowest) + offset, color='red', linestyle='--')\n",
" pass\n",
" \n",
"motiv_diff = get_difference(l2, l1)\n",
"print(motiv_diff)\n",
"# print(motiv_diff)\n",
"visualize_diff(motiv_diff)\n",
"\n",
"# determine level of hype\n",
"hype = get_difference(l3, l4)\n",
"print(hype)\n"
"hype = get_difference(l4, l3)\n",
"# print(hype)\n",
"visualize_diff(hype, color=\"red\")"
]
},
{
@@ -128,7 +168,7 @@
"employment_status = Counter(so_df[\"MainBranch\"])\n",
"print(employment_status)\n",
"\n",
"print(so_df[\"ConvertedCompYearly\"][:3])"
"print(so_df[\"ConvertedCompYearly\"][])"
]
},
{