From eade20d8285c411a463b830646e3b6949e6fcd65 Mon Sep 17 00:00:00 2001 From: linharesjunior Date: Thu, 19 Oct 2023 15:36:30 +0200 Subject: [PATCH] pandas merge done --- code/pandas_1_concat-merge-join.ipynb | 793 ++++++++++++++++++++++++-- 1 file changed, 761 insertions(+), 32 deletions(-) diff --git a/code/pandas_1_concat-merge-join.ipynb b/code/pandas_1_concat-merge-join.ipynb index c66e580..7eb56a4 100644 --- a/code/pandas_1_concat-merge-join.ipynb +++ b/code/pandas_1_concat-merge-join.ipynb @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 18, "metadata": { "scrolled": true }, @@ -114,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -194,7 +194,7 @@ "5 a5 b5 c5" ] }, - "execution_count": 5, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -205,10 +205,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DEF
0d0e0f0
1d1e1f1
2d2e2f2
3d3e3f3
4d4e4f4
5d5e5f5
\n", + "
" + ], + "text/plain": [ + " D E F\n", + "0 d0 e0 f0\n", + "1 d1 e1 f1\n", + "2 d2 e2 f2\n", + "3 d3 e3 f3\n", + "4 d4 e4 f4\n", + "5 d5 e5 f5" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df3,df4])" + ] }, { "cell_type": "markdown", @@ -223,10 +307,175 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDEF
0a0b0c0NaNNaNNaN
1a1b1c1NaNNaNNaN
2a2b2c2NaNNaNNaN
3a3b3c3NaNNaNNaN
4a4b4c4NaNNaNNaN
5a5b5c5NaNNaNNaN
0NaNNaNNaNd0e0f0
1NaNNaNNaNd1e1f1
2NaNNaNNaNd2e2f2
3NaNNaNNaNd3e3f3
4NaNNaNNaNd4e4f4
5NaNNaNNaNd5e5f5
\n", + "
" + ], + "text/plain": [ + " A B C D E F\n", + "0 a0 b0 c0 NaN NaN NaN\n", + "1 a1 b1 c1 NaN NaN NaN\n", + "2 a2 b2 c2 NaN NaN NaN\n", + "3 a3 b3 c3 NaN NaN NaN\n", + "4 a4 b4 c4 NaN NaN NaN\n", + "5 a5 b5 c5 NaN NaN NaN\n", + "0 NaN NaN NaN d0 e0 f0\n", + "1 NaN NaN NaN d1 e1 f1\n", + "2 NaN NaN NaN d2 e2 f2\n", + "3 NaN NaN NaN d3 e3 f3\n", + "4 NaN NaN NaN d4 e4 f4\n", + "5 NaN NaN NaN d5 e5 f5" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df1,df2, df3,df4])" + ] }, { "cell_type": "markdown", @@ -244,10 +493,175 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDEF
0a0b0c0NaNNaNNaN
1a1b1c1NaNNaNNaN
2a2b2c2NaNNaNNaN
3a3b3c3NaNNaNNaN
4a4b4c4NaNNaNNaN
5a5b5c5NaNNaNNaN
6NaNNaNNaNd0e0f0
7NaNNaNNaNd1e1f1
8NaNNaNNaNd2e2f2
9NaNNaNNaNd3e3f3
10NaNNaNNaNd4e4f4
11NaNNaNNaNd5e5f5
\n", + "
" + ], + "text/plain": [ + " A B C D E F\n", + "0 a0 b0 c0 NaN NaN NaN\n", + "1 a1 b1 c1 NaN NaN NaN\n", + "2 a2 b2 c2 NaN NaN NaN\n", + "3 a3 b3 c3 NaN NaN NaN\n", + "4 a4 b4 c4 NaN NaN NaN\n", + "5 a5 b5 c5 NaN NaN NaN\n", + "6 NaN NaN NaN d0 e0 f0\n", + "7 NaN NaN NaN d1 e1 f1\n", + "8 NaN NaN NaN d2 e2 f2\n", + "9 NaN NaN NaN d3 e3 f3\n", + "10 NaN NaN NaN d4 e4 f4\n", + "11 NaN NaN NaN d5 e5 f5" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df1, df2, df3, df4], ignore_index=True)" + ] }, { "cell_type": "code", @@ -277,7 +691,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -293,7 +707,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -352,7 +766,7 @@ "2 i2 a2 b2" ] }, - "execution_count": 8, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -363,7 +777,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -422,7 +836,7 @@ "2 i3 c3 d3" ] }, - "execution_count": 9, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -447,10 +861,85 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
idx
i0a0b0NaNNaN
i1a1b1c1d1
i2a2b2c2d2
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "idx \n", + "i0 a0 b0 NaN NaN\n", + "i1 a1 b1 c1 d1\n", + "i2 a2 b2 c2 d2" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "left.set_index(\"idx\").join(right.set_index(\"idx\"))" + ] }, { "cell_type": "markdown", @@ -489,14 +978,121 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 49, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDEF
0a0b0c0NaNNaNNaN
1a1b1c1NaNNaNNaN
2a2b2c2NaNNaNNaN
3NaNNaNNaNd3e3f3
4NaNNaNNaNd4e4f4
5NaNNaNNaNd5e5f5
\n", + "
" + ], + "text/plain": [ + " A B C D E F\n", + "0 a0 b0 c0 NaN NaN NaN\n", + "1 a1 b1 c1 NaN NaN NaN\n", + "2 a2 b2 c2 NaN NaN NaN\n", + "3 NaN NaN NaN d3 e3 f3\n", + "4 NaN NaN NaN d4 e4 f4\n", + "5 NaN NaN NaN d5 e5 f5" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "left.set_index(\"idx\").join(right.set_index(\"idx\"))\n", + "result = pd.concat([df1, df4], ignore_index=True, sort=False)\n", + "result" + ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -555,7 +1151,7 @@ "2 a2 b2 c2" ] }, - "execution_count": 11, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -566,7 +1162,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -625,7 +1221,7 @@ "5 a5 b5 c5" ] }, - "execution_count": 12, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -636,10 +1232,143 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DEF
0d0e0f0
1d1e1f1
2d2e2f2
\n", + "
" + ], + "text/plain": [ + " D E F\n", + "0 d0 e0 f0\n", + "1 d1 e1 f1\n", + "2 d2 e2 f2" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DEF
3d3e3f3
4d4e4f4
5d5e5f5
\n", + "
" + ], + "text/plain": [ + " D E F\n", + "3 d3 e3 f3\n", + "4 d4 e4 f4\n", + "5 d5 e5 f5" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df4" + ] } ], "metadata": { @@ -658,7 +1387,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.11.5" }, "toc": { "base_numbering": 1,