Created function to generate chart of salary over years of exp.
This commit is contained in:
@@ -380,72 +380,89 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"\n",
|
"\n",
|
||||||
"# focus on people who have given ...\n",
|
"# come up with name later\n",
|
||||||
"key = \"ConvertedCompYearly\"\n",
|
"# do_actual_project\n",
|
||||||
"key2 = \"YearsCodePro\"\n",
|
"# earnings over years of exp\n",
|
||||||
"df = so_df.dropna(subset=[key, key2])\n",
|
"def foo(dataset, language, jobs=None, n_rich_outliers=0, n_lowest=0, hue=\"Country\", country=\"United States of America\"):\n",
|
||||||
|
" # focus on people who have given ...\n",
|
||||||
|
" key = \"ConvertedCompYearly\"\n",
|
||||||
|
" key2 = \"YearsCodePro\"\n",
|
||||||
|
" df = dataset.dropna(subset=[key, key2])\n",
|
||||||
|
" \n",
|
||||||
|
" criteria = {\"MainBranch\":\"I am a developer by profession\"}\n",
|
||||||
|
" \n",
|
||||||
|
" #print(df[\"Country\"].unique)\n",
|
||||||
|
" if country:\n",
|
||||||
|
" criteria[\"Country\"] = country\n",
|
||||||
|
" for k in criteria:\n",
|
||||||
|
" df = df[df[k] == criteria[k] ] \n",
|
||||||
|
" \n",
|
||||||
|
" # chatgpt tells me about filtering with multiple strings\n",
|
||||||
|
" if jobs:\n",
|
||||||
|
" df = df[df.isin(jobs).any(axis=1)]\n",
|
||||||
|
" \n",
|
||||||
|
" devs = None\n",
|
||||||
|
" if len(language) > 1:\n",
|
||||||
|
" devs = get_lang_devs(df, language)\n",
|
||||||
|
" else:\n",
|
||||||
|
" devs = get_c_devs(df, lang=language)\n",
|
||||||
|
" replacement_dict = {\n",
|
||||||
|
" 'Less than 1 year': '0.5',\n",
|
||||||
|
" 'More than 50 years': '51',\n",
|
||||||
|
" }\n",
|
||||||
|
" \n",
|
||||||
|
" # https://stackoverflow.com/questions/47443134/update-column-in-pandas-dataframe-without-warning\n",
|
||||||
|
" pd.options.mode.chained_assignment = None # default='warn'\n",
|
||||||
|
" new_column = devs[key2].replace(replacement_dict)\n",
|
||||||
|
" devs[key2] = pd.to_numeric(new_column, errors='coerce')\n",
|
||||||
|
" # print( devs[key2].unique() )\n",
|
||||||
|
" \n",
|
||||||
|
" indices = devs[key].nlargest(n_rich_outliers).index\n",
|
||||||
|
" devs = devs.drop(indices)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"criteria = {\"MainBranch\":\"I am a developer by profession\"}\n",
|
" plt.figure()\n",
|
||||||
|
" \n",
|
||||||
|
" if n_lowest > 0:\n",
|
||||||
|
" # chatgpt draws my line\n",
|
||||||
|
" # Calculate the lowest nth point (for example, the 5th lowest value)\n",
|
||||||
|
" # iloc[-1] gets the last element from the n smallest\n",
|
||||||
|
" lowest_nth = df[key].nsmallest(n_lowest).iloc[-1] \n",
|
||||||
|
" # Draw a horizontal line at the lowest nth point\n",
|
||||||
|
" # label=f'Lowest {n_poorest}th Point: {lowest_nth_value:.2f}'\n",
|
||||||
|
" plt.axhline(y=lowest_nth, color='r', linestyle='--', label=\"y=%0.2f\" % lowest_nth )\n",
|
||||||
"\n",
|
"\n",
|
||||||
"#print(df[\"Country\"].unique)\n",
|
"# plt.xticks(rotation=90)\n",
|
||||||
|
" sb.scatterplot(data=devs, x=key2, y=key, hue=hue)\n",
|
||||||
|
" plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n",
|
||||||
|
" title = \"Annual Salary of %s Developers Over Years of Experience\" %language\\\n",
|
||||||
|
" + \"\\nsample size=%i\" % len (devs)\\\n",
|
||||||
|
" + \"\\ncountry=%s\" %country\n",
|
||||||
|
" plt.title(title)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# criteria[\"Country\"] = \"United States of America\"\n",
|
|
||||||
"for k in criteria:\n",
|
|
||||||
" df = df[df[k] == criteria[k] ] \n",
|
|
||||||
"\n",
|
|
||||||
"jobs = None\n",
|
|
||||||
"# expected C jobs\n",
|
"# expected C jobs\n",
|
||||||
"#jobs = [\"Developer, embedded applications or devices\", \n",
|
"cjobs = [\"Developer, embedded applications or devices\", \n",
|
||||||
"# \"Developer, game or graphics\",\n",
|
" \"Developer, game or graphics\",\n",
|
||||||
"# \"Engineering manager\" , \n",
|
" \"Hardware Engineer\" ,\n",
|
||||||
"# \"Project manager\", \n",
|
" # \"Project manager\", \n",
|
||||||
"# \"Product manager\"\n",
|
" # \"Product manager\"\n",
|
||||||
"#]\n",
|
"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# expected python jobs\n",
|
"# expected python jobs\n",
|
||||||
"#jobs = [\"Data scientist or machine learning specialist\",\n",
|
"pyjobs = [\"Data scientist or machine learning specialist\",\n",
|
||||||
"# \"Data or business analyst\",\n",
|
" \"Data or business analyst\",\n",
|
||||||
"# \"Data engineer\",\n",
|
" \"Data engineer\",\n",
|
||||||
"# \"DevOps specialist\",\n",
|
"# \"DevOps specialist\",\n",
|
||||||
"# \"Developer, QA or test\"\n",
|
"# \"Developer, QA or test\"\n",
|
||||||
"#]\n",
|
"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# chatgpt tells me about filtering with multiple strings\n",
|
"jsjobs = [\"Developer, full-stack\",\n",
|
||||||
"if jobs:\n",
|
" \"Developer, front-end\",\n",
|
||||||
" df = df[df.isin(jobs).any(axis=1)]\n",
|
" \"Developer, mobile\"\n",
|
||||||
|
"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# play with these\n",
|
"foo(so_df, \"Python\", jobs=pyjobs, hue=\"DevType\", n_rich_outliers=7, n_lowest=12)\n",
|
||||||
"language = \"Cobol\"\n",
|
"foo(so_df, \"C\", jobs=cjobs, hue=\"DevType\", n_rich_outliers=3, n_lowest=5)\n",
|
||||||
"legend = True\n",
|
"foo(so_df, \"JavaScript\", jobs=jsjobs, hue=\"DevType\", n_rich_outliers=6, country=\"Ukraine\")"
|
||||||
"NUM_OF_TOO_RICH = 3\n",
|
|
||||||
"# \"Employment\"\n",
|
|
||||||
"hue = \"Country\"\n",
|
|
||||||
"\n",
|
|
||||||
"devs = None\n",
|
|
||||||
"if len(language) > 1:\n",
|
|
||||||
" devs = get_lang_devs(df, language)\n",
|
|
||||||
"else:\n",
|
|
||||||
" devs = get_c_devs(df, lang=language)\n",
|
|
||||||
"replacement_dict = {\n",
|
|
||||||
" 'Less than 1 year': '0.5',\n",
|
|
||||||
" 'More than 50 years': '51',\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"# https://stackoverflow.com/questions/47443134/update-column-in-pandas-dataframe-without-warning\n",
|
|
||||||
"pd.options.mode.chained_assignment = None # default='warn'\n",
|
|
||||||
"new_column = devs[key2].replace(replacement_dict)\n",
|
|
||||||
"devs[key2] = pd.to_numeric(new_column, errors='coerce')\n",
|
|
||||||
"# print( devs[key2].unique() )\n",
|
|
||||||
"\n",
|
|
||||||
"indices = devs[key].nlargest(NUM_OF_TOO_RICH).index\n",
|
|
||||||
"devs = devs.drop(indices)\n",
|
|
||||||
"print( len (devs) )\n",
|
|
||||||
"\n",
|
|
||||||
"plt.figure()\n",
|
|
||||||
"plt.xticks(rotation=90)\n",
|
|
||||||
"sb.scatterplot(data=devs, x=key2, y=key, hue=hue, legend=legend)\n",
|
|
||||||
"plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n",
|
|
||||||
"plt.title(\"Annual Salary of %s Developers Over Years of Experience\" %language)\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user