From 990b33556a34cc66983680346532b1b80c649874 Mon Sep 17 00:00:00 2001 From: Leon Plaza Date: Tue, 24 Oct 2023 01:39:19 +0200 Subject: [PATCH] Leon --- code/pandas_1_concat-merge-join.ipynb | 673 +++++++++++++++++++++++--- 1 file changed, 616 insertions(+), 57 deletions(-) diff --git a/code/pandas_1_concat-merge-join.ipynb b/code/pandas_1_concat-merge-join.ipynb index c66e580..d667823 100644 --- a/code/pandas_1_concat-merge-join.ipynb +++ b/code/pandas_1_concat-merge-join.ipynb @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -102,7 +102,7 @@ " 'F': ['f'+str(x) for x in range(3, 6)]},\n", " index=[3, 4, 5]) \n", "\n", - "print(df1, '\\n---\\n', df2, '\\n---\\n', df3, '\\n---\\n',df4)" + "print(df1, '\\n---\\n', df2, '\\n---\\n', df3, '\\n---\\n',df4)\n" ] }, { @@ -114,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -194,7 +194,7 @@ "5 a5 b5 c5" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -205,10 +205,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DEF
0d0e0f0
1d1e1f1
2d2e2f2
3d3e3f3
4d4e4f4
5d5e5f5
\n", + "
" + ], + "text/plain": [ + " D E F\n", + "0 d0 e0 f0\n", + "1 d1 e1 f1\n", + "2 d2 e2 f2\n", + "3 d3 e3 f3\n", + "4 d4 e4 f4\n", + "5 d5 e5 f5" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df3,df4])" + ] }, { "cell_type": "markdown", @@ -223,10 +307,175 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDEF
0a0b0c0NaNNaNNaN
1a1b1c1NaNNaNNaN
2a2b2c2NaNNaNNaN
3a3b3c3NaNNaNNaN
4a4b4c4NaNNaNNaN
5a5b5c5NaNNaNNaN
0NaNNaNNaNd0e0f0
1NaNNaNNaNd1e1f1
2NaNNaNNaNd2e2f2
3NaNNaNNaNd3e3f3
4NaNNaNNaNd4e4f4
5NaNNaNNaNd5e5f5
\n", + "
" + ], + "text/plain": [ + " A B C D E F\n", + "0 a0 b0 c0 NaN NaN NaN\n", + "1 a1 b1 c1 NaN NaN NaN\n", + "2 a2 b2 c2 NaN NaN NaN\n", + "3 a3 b3 c3 NaN NaN NaN\n", + "4 a4 b4 c4 NaN NaN NaN\n", + "5 a5 b5 c5 NaN NaN NaN\n", + "0 NaN NaN NaN d0 e0 f0\n", + "1 NaN NaN NaN d1 e1 f1\n", + "2 NaN NaN NaN d2 e2 f2\n", + "3 NaN NaN NaN d3 e3 f3\n", + "4 NaN NaN NaN d4 e4 f4\n", + "5 NaN NaN NaN d5 e5 f5" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df1,df2,df3,df4], sort=False)" + ] }, { "cell_type": "markdown", @@ -244,10 +493,176 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDEF
0a0b0c0NaNNaNNaN
1a1b1c1NaNNaNNaN
2a2b2c2NaNNaNNaN
3a3b3c3NaNNaNNaN
4a4b4c4NaNNaNNaN
5a5b5c5NaNNaNNaN
6NaNNaNNaNd0e0f0
7NaNNaNNaNd1e1f1
8NaNNaNNaNd2e2f2
9NaNNaNNaNd3e3f3
10NaNNaNNaNd4e4f4
11NaNNaNNaNd5e5f5
\n", + "
" + ], + "text/plain": [ + " A B C D E F\n", + "0 a0 b0 c0 NaN NaN NaN\n", + "1 a1 b1 c1 NaN NaN NaN\n", + "2 a2 b2 c2 NaN NaN NaN\n", + "3 a3 b3 c3 NaN NaN NaN\n", + "4 a4 b4 c4 NaN NaN NaN\n", + "5 a5 b5 c5 NaN NaN NaN\n", + "6 NaN NaN NaN d0 e0 f0\n", + "7 NaN NaN NaN d1 e1 f1\n", + "8 NaN NaN NaN d2 e2 f2\n", + "9 NaN NaN NaN d3 e3 f3\n", + "10 NaN NaN NaN d4 e4 f4\n", + "11 NaN NaN NaN d5 e5 f5" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df1,df2,df3,df4], sort=False, ignore_index=True)\n", + "# The output is the same" + ] }, { "cell_type": "code", @@ -277,7 +692,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -293,7 +708,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -352,7 +767,7 @@ "2 i2 a2 b2" ] }, - "execution_count": 8, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -363,7 +778,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -422,7 +837,7 @@ "2 i3 c3 d3" ] }, - "execution_count": 9, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -447,10 +862,85 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
idx
i0a0b0NaNNaN
i1a1b1c1d1
i2a2b2c2d2
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "idx \n", + "i0 a0 b0 NaN NaN\n", + "i1 a1 b1 c1 d1\n", + "i2 a2 b2 c2 d2" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "left.set_index(\"idx\").join(right.set_index(\"idx\"))" + ] }, { "cell_type": "markdown", @@ -474,29 +964,9 @@ "[pandas.DataFrame.join](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.join.html)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Bonus Question\n", - "\n", - "Now if you look back on `merge` and `join`, you realize that in order to perform these functions on a set of dataframes, these dataframes must share a common column as the index. Only rows that have the same index values will be joined. This is similar to the [`join` function in MySQL](https://www.w3schools.com/sql/sql_join.asp), isn't it?\n", - "\n", - "The bonus question for you is to figure out how to join and concatenate `df1`, `df2`, `df3`, and `df4` we created at the beginning of this challenge. Your end product should look like this:\n", - "\n", - "![df1-2-3-4.png](../images/df1-2-3-4.png)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 60, "metadata": {}, "outputs": [ { @@ -520,53 +990,74 @@ " \n", " \n", " \n", + " idx\n", " A\n", " B\n", " C\n", + " D\n", " \n", " \n", " \n", " \n", " 0\n", + " i0\n", " a0\n", " b0\n", - " c0\n", + " NaN\n", + " NaN\n", " \n", " \n", " 1\n", + " i1\n", " a1\n", " b1\n", " c1\n", + " d1\n", " \n", " \n", " 2\n", + " i2\n", " a2\n", " b2\n", " c2\n", + " d2\n", " \n", " \n", "\n", "" ], "text/plain": [ - " A B C\n", - "0 a0 b0 c0\n", - "1 a1 b1 c1\n", - "2 a2 b2 c2" + " idx A B C D\n", + "0 i0 a0 b0 NaN NaN\n", + "1 i1 a1 b1 c1 d1\n", + "2 i2 a2 b2 c2 d2" ] }, - "execution_count": 11, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df1" + "left.merge(right, how=\"left\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bonus Question\n", + "\n", + "Now if you look back on `merge` and `join`, you realize that in order to perform these functions on a set of dataframes, these dataframes must share a common column as the index. Only rows that have the same index values will be joined. This is similar to the [`join` function in MySQL](https://www.w3schools.com/sql/sql_join.asp), isn't it?\n", + "\n", + "The bonus question for you is to figure out how to join and concatenate `df1`, `df2`, `df3`, and `df4` we created at the beginning of this challenge. Your end product should look like this:\n", + "\n", + "![df1-2-3-4.png](../images/df1-2-3-4.png)" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 48, "metadata": {}, "outputs": [ { @@ -593,45 +1084,113 @@ " A\n", " B\n", " C\n", + " D\n", + " E\n", + " F\n", " \n", " \n", " \n", " \n", + " 0\n", + " a0\n", + " b0\n", + " c0\n", + " d0\n", + " e0\n", + " f0\n", + " \n", + " \n", + " 1\n", + " a1\n", + " b1\n", + " c1\n", + " d1\n", + " e1\n", + " f1\n", + " \n", + " \n", + " 2\n", + " a2\n", + " b2\n", + " c2\n", + " d2\n", + " e2\n", + " f2\n", + " \n", + " \n", " 3\n", " a3\n", " b3\n", " c3\n", + " d3\n", + " e3\n", + " f3\n", " \n", " \n", " 4\n", " a4\n", " b4\n", " c4\n", + " d4\n", + " e4\n", + " f4\n", " \n", " \n", " 5\n", " a5\n", " b5\n", " c5\n", + " d5\n", + " e5\n", + " f5\n", " \n", " \n", "\n", "" ], "text/plain": [ - " A B C\n", - "3 a3 b3 c3\n", - "4 a4 b4 c4\n", - "5 a5 b5 c5" + " A B C D E F\n", + "0 a0 b0 c0 d0 e0 f0\n", + "1 a1 b1 c1 d1 e1 f1\n", + "2 a2 b2 c2 d2 e2 f2\n", + "3 a3 b3 c3 d3 e3 f3\n", + "4 a4 b4 c4 d4 e4 f4\n", + "5 a5 b5 c5 d5 e5 f5" ] }, - "execution_count": 12, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df2" + "pd.concat([pd.concat([df1,df3],axis=1),pd.concat([df2,df4],axis=1)], axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "columns overlap but no suffix specified: Index(['A', 'B', 'C'], dtype='object')", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[64], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df1\u001b[38;5;241m.\u001b[39mjoin(df2,how\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mleft\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/miniconda3/envs/ironhack/lib/python3.11/site-packages/pandas/core/frame.py:10415\u001b[0m, in \u001b[0;36mDataFrame.join\u001b[0;34m(self, other, on, how, lsuffix, rsuffix, sort, validate)\u001b[0m\n\u001b[1;32m 10405\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m how \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcross\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 10406\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m merge(\n\u001b[1;32m 10407\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 10408\u001b[0m other,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 10413\u001b[0m validate\u001b[38;5;241m=\u001b[39mvalidate,\n\u001b[1;32m 10414\u001b[0m )\n\u001b[0;32m> 10415\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m merge(\n\u001b[1;32m 10416\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 10417\u001b[0m other,\n\u001b[1;32m 10418\u001b[0m left_on\u001b[38;5;241m=\u001b[39mon,\n\u001b[1;32m 10419\u001b[0m how\u001b[38;5;241m=\u001b[39mhow,\n\u001b[1;32m 10420\u001b[0m left_index\u001b[38;5;241m=\u001b[39mon \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 10421\u001b[0m right_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 10422\u001b[0m suffixes\u001b[38;5;241m=\u001b[39m(lsuffix, rsuffix),\n\u001b[1;32m 10423\u001b[0m sort\u001b[38;5;241m=\u001b[39msort,\n\u001b[1;32m 10424\u001b[0m validate\u001b[38;5;241m=\u001b[39mvalidate,\n\u001b[1;32m 10425\u001b[0m )\n\u001b[1;32m 10426\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 10427\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m on \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/miniconda3/envs/ironhack/lib/python3.11/site-packages/pandas/core/reshape/merge.py:183\u001b[0m, in \u001b[0;36mmerge\u001b[0;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 169\u001b[0m op \u001b[38;5;241m=\u001b[39m _MergeOperation(\n\u001b[1;32m 170\u001b[0m left_df,\n\u001b[1;32m 171\u001b[0m right_df,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 181\u001b[0m validate\u001b[38;5;241m=\u001b[39mvalidate,\n\u001b[1;32m 182\u001b[0m )\n\u001b[0;32m--> 183\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m op\u001b[38;5;241m.\u001b[39mget_result(copy\u001b[38;5;241m=\u001b[39mcopy)\n", + "File \u001b[0;32m~/miniconda3/envs/ironhack/lib/python3.11/site-packages/pandas/core/reshape/merge.py:885\u001b[0m, in \u001b[0;36m_MergeOperation.get_result\u001b[0;34m(self, copy)\u001b[0m\n\u001b[1;32m 881\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mleft, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mright \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_indicator_pre_merge(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mleft, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mright)\n\u001b[1;32m 883\u001b[0m join_index, left_indexer, right_indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_join_info()\n\u001b[0;32m--> 885\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reindex_and_concat(\n\u001b[1;32m 886\u001b[0m join_index, left_indexer, right_indexer, copy\u001b[38;5;241m=\u001b[39mcopy\n\u001b[1;32m 887\u001b[0m )\n\u001b[1;32m 888\u001b[0m result \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39m__finalize__(\u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_merge_type)\n\u001b[1;32m 890\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindicator:\n", + "File \u001b[0;32m~/miniconda3/envs/ironhack/lib/python3.11/site-packages/pandas/core/reshape/merge.py:837\u001b[0m, in \u001b[0;36m_MergeOperation._reindex_and_concat\u001b[0;34m(self, join_index, left_indexer, right_indexer, copy)\u001b[0m\n\u001b[1;32m 834\u001b[0m left \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mleft[:]\n\u001b[1;32m 835\u001b[0m right \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mright[:]\n\u001b[0;32m--> 837\u001b[0m llabels, rlabels \u001b[38;5;241m=\u001b[39m _items_overlap_with_suffix(\n\u001b[1;32m 838\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mleft\u001b[38;5;241m.\u001b[39m_info_axis, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mright\u001b[38;5;241m.\u001b[39m_info_axis, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msuffixes\n\u001b[1;32m 839\u001b[0m )\n\u001b[1;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m left_indexer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_range_indexer(left_indexer, \u001b[38;5;28mlen\u001b[39m(left)):\n\u001b[1;32m 842\u001b[0m \u001b[38;5;66;03m# Pinning the index here (and in the right code just below) is not\u001b[39;00m\n\u001b[1;32m 843\u001b[0m \u001b[38;5;66;03m# necessary, but makes the `.take` more performant if we have e.g.\u001b[39;00m\n\u001b[1;32m 844\u001b[0m \u001b[38;5;66;03m# a MultiIndex for left.index.\u001b[39;00m\n\u001b[1;32m 845\u001b[0m lmgr \u001b[38;5;241m=\u001b[39m left\u001b[38;5;241m.\u001b[39m_mgr\u001b[38;5;241m.\u001b[39mreindex_indexer(\n\u001b[1;32m 846\u001b[0m join_index,\n\u001b[1;32m 847\u001b[0m left_indexer,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 852\u001b[0m use_na_proxy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 853\u001b[0m )\n", + "File \u001b[0;32m~/miniconda3/envs/ironhack/lib/python3.11/site-packages/pandas/core/reshape/merge.py:2655\u001b[0m, in \u001b[0;36m_items_overlap_with_suffix\u001b[0;34m(left, right, suffixes)\u001b[0m\n\u001b[1;32m 2652\u001b[0m lsuffix, rsuffix \u001b[38;5;241m=\u001b[39m suffixes\n\u001b[1;32m 2654\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m lsuffix \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m rsuffix:\n\u001b[0;32m-> 2655\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns overlap but no suffix specified: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mto_rename\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 2657\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrenamer\u001b[39m(x, suffix: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 2658\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2659\u001b[0m \u001b[38;5;124;03m Rename the left and right indices.\u001b[39;00m\n\u001b[1;32m 2660\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2671\u001b[0m \u001b[38;5;124;03m x : renamed column name\u001b[39;00m\n\u001b[1;32m 2672\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n", + "\u001b[0;31mValueError\u001b[0m: columns overlap but no suffix specified: Index(['A', 'B', 'C'], dtype='object')" + ] + } + ], + "source": [ + "df1.join(df2,how=\"left\")" ] }, { @@ -658,7 +1217,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.11.5" }, "toc": { "base_numbering": 1,