Skip to content

Commit

Permalink
Removed requirements.txt
Browse files Browse the repository at this point in the history
Added Poetry
  • Loading branch information
rahulnyk committed Nov 13, 2023
1 parent 697020f commit 5d21abd
Show file tree
Hide file tree
Showing 6 changed files with 4,061 additions and 309 deletions.
1 change: 0 additions & 1 deletion .python-version

This file was deleted.

2 changes: 1 addition & 1 deletion docs/index.html

Large diffs are not rendered by default.

127 changes: 52 additions & 75 deletions extract_graph.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -39,14 +39,14 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 17.20it/s]"
"100%|██████████| 1/1 [00:01<00:00, 1.82s/it]"
]
},
{
Expand Down Expand Up @@ -94,7 +94,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -135,31 +135,31 @@
" <th>0</th>\n",
" <td>Abstract India’s health indicators have improv...</td>\n",
" <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
" <td>d6c50af2f56d482db830d502516192d6</td>\n",
" <td>0f56d8fbefa04f1e877f573938f78ff1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Categories: Public Health, Epidemiology/Public...</td>\n",
" <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
" <td>278e3d8ae6714f889a984c217a3d166d</td>\n",
" <td>92789b719a254c8385327b9d243935b6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Introduction And Background India’s health ind...</td>\n",
" <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
" <td>813107203afb4531ae915f7bf459b501</td>\n",
" <td>9eefb3bf352a459c8895f272b632724e</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>An extensive literature search was performed, ...</td>\n",
" <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
" <td>67d0704481e243539db4effb9664c48f</td>\n",
" <td>7c21bdb708d14855b7b3de9d8564b175</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Review Overview of the public and private heal...</td>\n",
" <td>data_input/cureus/cureus-0015-00000040274.txt</td>\n",
" <td>01029501dd3249738f761a6fe1d19ece</td>\n",
" <td>bfc37e1213e7428d963fdac63eb80079</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
Expand All @@ -181,14 +181,14 @@
"4 data_input/cureus/cureus-0015-00000040274.txt \n",
"\n",
" chunk_id \n",
"0 d6c50af2f56d482db830d502516192d6 \n",
"1 278e3d8ae6714f889a984c217a3d166d \n",
"2 813107203afb4531ae915f7bf459b501 \n",
"3 67d0704481e243539db4effb9664c48f \n",
"4 01029501dd3249738f761a6fe1d19ece "
"0 0f56d8fbefa04f1e877f573938f78ff1 \n",
"1 92789b719a254c8385327b9d243935b6 \n",
"2 9eefb3bf352a459c8895f272b632724e \n",
"3 7c21bdb708d14855b7b3de9d8564b175 \n",
"4 bfc37e1213e7428d963fdac63eb80079 "
]
},
"execution_count": 21,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -209,7 +209,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -234,7 +234,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 5,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -340,7 +340,7 @@
"7 ae0fd26675d645e787964255667e90f4 4 "
]
},
"execution_count": 23,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -378,7 +378,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -477,7 +477,7 @@
"2831 contextual proximity "
]
},
"execution_count": 24,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -522,7 +522,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -703,7 +703,7 @@
"[758 rows x 5 columns]"
]
},
"execution_count": 25,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -727,7 +727,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand All @@ -736,7 +736,7 @@
"(215,)"
]
},
"execution_count": 26,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -748,7 +748,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -780,7 +780,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 10,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -810,7 +810,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -843,31 +843,31 @@
" <tr>\n",
" <th>0</th>\n",
" <td>56 articles</td>\n",
" <td>#ad57db</td>\n",
" <td>#db57db</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>analysis</td>\n",
" <td>#ad57db</td>\n",
" <td>#db57db</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>corresponding authors' experiential knowledge</td>\n",
" <td>#ad57db</td>\n",
" <td>#db57db</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>extensive literature search</td>\n",
" <td>#ad57db</td>\n",
" <td>#db57db</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>peer-reviewed journals</td>\n",
" <td>#ad57db</td>\n",
" <td>#db57db</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
Expand All @@ -879,31 +879,31 @@
" <tr>\n",
" <th>210</th>\n",
" <td>rural medical assistants (rmas)</td>\n",
" <td>#57db6e</td>\n",
" <td>#57bcdb</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>211</th>\n",
" <td>limited uptake</td>\n",
" <td>#57bcdb</td>\n",
" <td>#db57ac</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>212</th>\n",
" <td>national health protection mission</td>\n",
" <td>#57bcdb</td>\n",
" <td>#db57ac</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>213</th>\n",
" <td>private health sector systems</td>\n",
" <td>#578ddb</td>\n",
" <td>#57dbcc</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>214</th>\n",
" <td>public</td>\n",
" <td>#578ddb</td>\n",
" <td>#57dbcc</td>\n",
" <td>17</td>\n",
" </tr>\n",
" </tbody>\n",
Expand All @@ -913,22 +913,22 @@
],
"text/plain": [
" node color group\n",
"0 56 articles #ad57db 1\n",
"1 analysis #ad57db 1\n",
"2 corresponding authors' experiential knowledge #ad57db 1\n",
"3 extensive literature search #ad57db 1\n",
"4 peer-reviewed journals #ad57db 1\n",
"0 56 articles #db57db 1\n",
"1 analysis #db57db 1\n",
"2 corresponding authors' experiential knowledge #db57db 1\n",
"3 extensive literature search #db57db 1\n",
"4 peer-reviewed journals #db57db 1\n",
".. ... ... ...\n",
"210 rural medical assistants (rmas) #57db6e 15\n",
"211 limited uptake #57bcdb 16\n",
"212 national health protection mission #57bcdb 16\n",
"213 private health sector systems #578ddb 17\n",
"214 public #578ddb 17\n",
"210 rural medical assistants (rmas) #57bcdb 15\n",
"211 limited uptake #db57ac 16\n",
"212 national health protection mission #db57ac 16\n",
"213 private health sector systems #57dbcc 17\n",
"214 public #57dbcc 17\n",
"\n",
"[215 rows x 3 columns]"
]
},
"execution_count": 29,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -966,7 +966,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -978,7 +978,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 13,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1019,30 +1019,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"SYS_PROMPT = (\n",
" \"You are a network graph maker who extracts terms and their relations from a given context. \"\n",
" \"You are provided with a context chunk (delimited by ```) Your task is to extract the ontology \"\n",
" \"of terms mentioned in the given context. These terms should represent the key concepts as per the context. \\n\"\n",
" \"Thought 1: While traversing through each sentence, Think about the key terms mentioned in it.\\n\"\n",
" \"\\tTerms may include object, entity, location, organization, person, \\n\"\n",
" \"\\tcondition, acronym, documents, service, concept, etc.\\n\"\n",
" \"\\tTerms should be as atomistic as possible\\n\\n\"\n",
" \"Thought 2: Think about how these terms can have one on one relation with other terms.\\n\"\n",
" \"\\tTerms that are mentioned in the same sentence or the same paragraph are typically related to each other.\\n\"\n",
" \"\\tTerms can be related to many other terms\\n\\n\"\n",
" \"Thought 3: Find out the relation between each such related pair of terms. \\n\\n\"\n",
" \"Format your output as a list of json. Each element of the list contains a pair of terms\"\n",
" \"and the relation between them, like the follwing: \\n\"\n",
" \"[\\n\"\n",
" \" {\\n\"\n",
" ' \"node_1\": \"A concept from extracted ontology\",\\n'\n",
" ' \"node_2\": \"A related concept from extracted ontology\",\\n'\n",
" ' \"edge\": \"relationship between the two concepts, node_1 and node_2 in one or two sentences\"\\n'\n",
" \" }, {...}\\n\"\n",
" \"]\"\n",
")"
]
"source": []
}
],
"metadata": {
Expand All @@ -1061,7 +1038,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.1"
"version": "3.11.4"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 5d21abd

Please sign in to comment.