From 7b34548a2d78cc07c9726079ac0fdf7387fe0ac7 Mon Sep 17 00:00:00 2001 From: scuti Date: Fri, 25 Apr 2025 01:31:45 -0700 Subject: [PATCH] Comply with PEP 8. For consistent quotes and number of line breaks. At least for the code that is being used. --- stackoverflow-survey.ipynb | 89 ++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/stackoverflow-survey.ipynb b/stackoverflow-survey.ipynb index 551d541..6dc0a46 100644 --- a/stackoverflow-survey.ipynb +++ b/stackoverflow-survey.ipynb @@ -31,7 +31,7 @@ "import matplotlib.pyplot as plt\n", "\n", "# avoid burning my eyes @ night\n", - "plt.style.use(\"dark_background\")" + "plt.style.use('dark_background')" ] }, { @@ -51,14 +51,14 @@ "metadata": {}, "outputs": [], "source": [ - "FILE = \"data/survey_results_public.csv\"\n", + "FILE = 'data/survey_results_public.csv'\n", "so_df = pd.read_csv(FILE)\n", "\n", "print(so_df.keys())\n", "so_df.describe()\n", "\n", "# check for people who aren't paying attention\n", - "count_not_apple = (so_df[\"Check\"] != \"Apples\").sum()\n", + "count_not_apple = (so_df['Check'] != 'Apples').sum()\n", "print(count_not_apple)\n", "print(so_df.shape)\n", "assert(count_not_apple == 0)\n", @@ -74,7 +74,7 @@ "source": [ "# draw count plot of developers based on age\n", "\n", - "def visualize_devs(df, lang, key=\"Age\",):\n", + "def visualize_devs(df, lang, key='Age'):\n", " plt.figure()\n", " plt.xticks(rotation=45)\n", " # from:\n", @@ -84,19 +84,21 @@ " '45-54 years old', '55-64 years old', \\\n", " '65 years or older', 'Prefer not to say']\n", " sb.countplot(x=key, data=df, order=order)\n", - " title=\"Ages of %s Programmers\" % lang\n", + " title='Ages of %s Programmers' % lang\n", " plt.title(title)\n", - " filename= \"images/%s-of-%s-programmers.png\" % (key, lang)\n", - " plt.savefig(filename, bbox_inches=\"tight\")\n", + " filename= 'images/%s-of-%s-programmers.png' % (key, lang)\n", + " plt.savefig(filename, bbox_inches='tight')\n", + "\n", "\n", "def get_lang_devs(df, lang):\n", - " col = \"LanguageHaveWorkedWith\"\n", + " col = 'LanguageHaveWorkedWith'\n", " # will not work for single character languages (C, R)\n", " # will mangle Java and JavaScript, Python and MicroPython\n", " return df[ df[col].str.contains(lang, na=False) ] \n", "\n", - "def get_c_devs(df, lang=\"C\"):\n", - " key = \"LanguageHaveWorkedWith\"\n", + "\n", + "def get_c_devs(df, lang='C'):\n", + " key = 'LanguageHaveWorkedWith'\n", " cdevs = []\n", " for index, dev in df.iterrows():\n", " try:\n", @@ -117,9 +119,9 @@ "metadata": {}, "outputs": [], "source": [ - "visualize_devs( get_c_devs(so_df) , \"C\")\n", + "visualize_devs( get_c_devs(so_df) , 'C')\n", "\n", - "for lang in [\"Cobol\", \"Prolog\", \"Ada\", \"Python\"]:\n", + "for lang in ['Cobol', 'Prolog', 'Ada', 'Python']:\n", " foo = get_lang_devs(so_df, lang)\n", " visualize_devs(foo, lang)" ] @@ -152,23 +154,23 @@ "class Foo:\n", " def __init__(self, dataset, language, jobs=None, \n", " n_rich_outliers=0, n_poor_outliers=0, \n", - " country=\"United States of America\"):\n", + " country='United States of America'):\n", " self.devs = None\n", " self.canvas = None\n", " self.language = language\n", " self.country = country\n", " # focus on people who have given ...\n", - " key_x = \"YearsCodePro\"\n", - " key_y = \"ConvertedCompYearly\"\n", + " key_x = 'YearsCodePro'\n", + " key_y = 'ConvertedCompYearly'\n", " df = dataset.dropna(subset=[key_x, key_y])\n", " self.key_x = key_x\n", " self.key_y = key_y\n", " \n", " qualifiers = {\n", - " \"MainBranch\":\"I am a developer by profession\",\n", + " 'MainBranch': 'I am a developer by profession',\n", " }\n", " if country:\n", - " qualifiers[\"Country\"] = country\n", + " qualifiers['Country'] = country\n", " for k in qualifiers:\n", " df = df[df[k] == qualifiers[k] ] \n", "\n", @@ -177,7 +179,7 @@ " df = df[df.isin(jobs).any(axis=1)]\n", "\n", " devs = None\n", - " if len(language) == 1 or language in [\"Python\", \"Java\"]:\n", + " if len(language) == 1 or language in ['Python', 'Java']:\n", " devs = get_c_devs(df, lang=language)\n", " else:\n", " devs = get_lang_devs(df, language)\n", @@ -200,7 +202,7 @@ " self.devs = devs.drop(indices)\n", " del devs, new_column\n", " \n", - " def visualize(self, hue=\"Country\", \n", + " def visualize(self, hue='Country', \n", " palette=sb.color_palette() ): \n", " self.canvas = plt.figure()\n", " key_x = self.key_x\n", @@ -208,9 +210,9 @@ "\n", " sb.scatterplot(data=self.devs, x=key_x, y=key_y, hue=hue, palette=palette)\n", " plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n", - " title = \"Annual Compensation of %s Programmers Over Years of Experience\" % self.language\\\n", - " + \"\\nsample size=%i\" % len (self.devs)\\\n", - " + \"\\ncountry=%s\" % self.country\n", + " title = 'Annual Compensation of %s Programmers Over Years of Experience' % self.language\\\n", + " + '\\nsample size=%i' % len (self.devs)\\\n", + " + '\\ncountry=%s' % self.country\n", " plt.title(title)\n", "\n", " def run_regression(self, model=LinearRegression(), split=train_test_split, \n", @@ -232,31 +234,32 @@ " \n", " m = model.coef_[0][0]\n", " b = model.intercept_[0]\n", - " print(\"+----------------------+\")\n", - " print(\"%s regression line for %s\" % (line_color, self.language))\n", - " print(\"coefficient = %0.2f\" % m)\n", + " print('+----------------------+')\n", + " print('%s regression line for %s' % (line_color, self.language))\n", + " print('coefficient = %0.2f' % m)\n", " print('intercept = %0.2f' % b)\n", " rmse = root_mean_squared_error(y_test, y_pred)\n", - " print(\"rmse = %0.2f\" % rmse)\n", + " print('rmse = %0.2f' % rmse)\n", " r2 = r2_score(y_test, y_pred)\n", - " print(\"r2 score = %0.2f\" % r2)\n", - " print(\"sample predictions:\")\n", + " print('r2 score = %0.2f' % r2)\n", + " print('sample predictions:')\n", " print(y_pred[3:6])\n", - " print(\"+----------------------+\")\n", + " print('+----------------------+')\n", "\n", " plt.figure(self.canvas)\n", " plt.plot(X_test, y_pred, color=line_color, label='Regression Line')\n", - " plt.axhline(y=b, color=\"purple\", linestyle='--', \n", - " label=\"b=%0.2f\" % b, zorder=-1 )\n", + " plt.axhline(y=b, color='purple', linestyle='--', \n", + " label='b=%0.2f' % b, zorder=-1 )\n", " plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n", " del y_pred, model\n", "\n", "\n", - " def export_image(self, base_filename = \"images/programmers-%s-%s.png\"):\n", + " def export_image(self, base_filename = 'images/programmers-%s-%s.png'):\n", " plt.figure(self.canvas)\n", " filename = base_filename % (self.language, self.country)\n", " plt.savefig(filename.replace(' ', '-'), bbox_inches='tight')\n", "\n", + "\n", "# the higher a is, the steeper the line gets\n", "def log_base_a(x, a=1.07):\n", " return np.log10(x)/np.log(a)" @@ -281,15 +284,15 @@ "source": [ "\n", "# expected python jobs\n", - "pyjobs = [\"Data scientist or machine learning specialist\",\n", - " \"Data or business analyst\",\n", - " \"Data engineer\",\n", + "pyjobs = ['Data scientist or machine learning specialist',\n", + " 'Data or business analyst',\n", + " 'Data engineer',\n", "# \"DevOps specialist\",\n", "# \"Developer, QA or test\"\n", "]\n", "\n", - "python = Foo(so_df, \"Python\", jobs=pyjobs, n_rich_outliers=12, n_poor_outliers=2)\n", - "python.visualize(hue=\"DevType\", palette=[\"#dbdb32\", \"#34bf65\", \"#ac70e0\"])\n", + "python = Foo(so_df, 'Python', jobs=pyjobs, n_rich_outliers=12, n_poor_outliers=2)\n", + "python.visualize(hue='DevType', palette=['#dbdb32', '#34bf65', '#ac70e0'])\n", "python.run_regression()\n", "python.run_regression( x_transform=log_base_a, change_base=1.20, \n", " x_shift=0, y_shift=-1.5e4, line_color='cyan', random=888)\n", @@ -337,17 +340,17 @@ "source": [ "# expected C jobs\n", "cjobs = [\n", - " \"Developer, embedded applications or devices\", \n", - " \"Developer, game or graphics\",\n", - " \"Hardware Engineer\" ,\n", + " 'Developer, embedded applications or devices', \n", + " 'Developer, game or graphics',\n", + " 'Hardware Engineer',\n", " # \"Project manager\", \n", " # \"Product manager\"\n", "]\n", - "c = Foo(so_df, \"C\", jobs=cjobs, n_rich_outliers=30, n_poor_outliers=2)\n", - "c.visualize(hue=\"DevType\", palette=[\"#57e6da\",\"#d9e352\",\"#cc622d\"] ) \n", + "c = Foo(so_df, 'C', jobs=cjobs, n_rich_outliers=30, n_poor_outliers=2)\n", + "c.visualize(hue='DevType', palette=['#57e6da','#d9e352','#cc622d'] ) \n", "c.run_regression()\n", "c.run_regression(x_transform=log_base_a, change_base=1.3, \n", - " x_shift=2, y_shift=-5000, line_color=\"magenta\", random=555)\n", + " x_shift=2, y_shift=-5000, line_color='magenta', random=555)\n", "c.export_image()" ] },