From 53a092c7d5a03dfb3c7a2456079a4a81723e1b03 Mon Sep 17 00:00:00 2001 From: Reinder Vos de Wael Date: Mon, 15 Jul 2024 14:40:45 -0400 Subject: [PATCH] feat: improve summarization prompting --- src/routes/summarization/+page.svelte | 12 ++--- src/routes/summarization/prompt.ts | 65 ++++++++++++++++++++------- 2 files changed, 56 insertions(+), 21 deletions(-) diff --git a/src/routes/summarization/+page.svelte b/src/routes/summarization/+page.svelte index 9cfe2cc..2fa18f8 100644 --- a/src/routes/summarization/+page.svelte +++ b/src/routes/summarization/+page.svelte @@ -29,7 +29,7 @@ return } const text = await docxToText(file[0]) - const userPrompt = getTextBetween(text, "clinical summary and impressions", "Recommendations") + const userPrompt = getTextBetween(text, /\n\s+clinical summary and impressions/i, /\n\s+recommendations/i) if (!userPrompt) { const toast = { message: "Could not find the 'clinical summary and impressions' and 'recommendations' in the document.", @@ -42,6 +42,7 @@ const form = new FormData() form.append("userPrompt", userPrompt) form.append("systemPrompt", systemPrompt) + form.append("model", model) loading = true const response = await fetch("/api/llm", { @@ -107,10 +108,11 @@ }) } - function getTextBetween(text: string, start: string, end: string) { - const startIndex = text.toLowerCase().indexOf(start.toLowerCase()) - const endIndex = text.toLowerCase().indexOf(end.toLowerCase()) - console.log(startIndex, endIndex) + function getTextBetween(text: string, start: RegExp, end: RegExp) { + const regexStart = new RegExp(start) + const regexEnd = new RegExp(end) + const startIndex = text.search(regexStart) + const endIndex = text.search(regexEnd) if (startIndex === -1 || endIndex === -1) return null return text.slice(startIndex, endIndex) } diff --git a/src/routes/summarization/prompt.ts b/src/routes/summarization/prompt.ts index 8f63a5b..0ebae02 100644 --- a/src/routes/summarization/prompt.ts +++ b/src/routes/summarization/prompt.ts @@ -1,5 +1,5 @@ export const systemPrompt = ` -The following is a part of a report summarising developmental and mental +The following is a part of a Healthy Brain Network (HBN) report summarising developmental and mental health assessment of a child. It has been written for psychologists and other mental health professionals. Shorten it and rephrase it to make it easier to understand for parents that are not mental health professionals @@ -20,21 +20,45 @@ assessments. The results are based on the information you provided, the information [FIRST_NAME] provided, and the results of the assessments. For more detailed information, please refer to the clinical report.” -This paragraph should start with “Your child's assessment results led to the -following diagnoses:”, followed by a numbered list of diagnoses without -their DSM-5 number, followed by “To explain these diagnoses in simple +The list of diagnoses can take one of two forms + +1) If there are no diagnoses to report, the paragraph should start with “Your +child's assessment results did not lead to any diagnoses.” and end there. +2) If there are diagnoses to report, the paragraph should start with “Your +child's assessment results led to the +following diagnoses:”, followed by an empty line and then a numbered list of diagnoses without +their DSM-5 number. Include all classifiers in this list i.e. do not separate +ruled-out diagnoses from confirmed or similar. This list is followed by a new paragraph starting with “To explain +these diagnoses in simple language” followed by explanations of these diagnoses in simple terms. -Always start each explanation with “[FIRST_NAME] was diagnosed with”. The -names of the diagnoses should never be altered or simplified. +The names of the diagnoses should never be altered or simplified. +Only provide an explanation of the diagnoses without specific examples e.g. +do not say something like "These challenges can affect their daily life, +including school and social interactions." Examples of explanations are as +follows: + +"Your child’s assessment results led to the following diagnoses: + +1. Attention-Deficit/Hyperactivity Disorder, Predominantly inattentive presentation +2. Language Disorder (rule-out) + +To explain these diagnoses in simple language: + +[FIRST_NAME] was diagnosed with Attention-Deficit/Hyperactivity Disorder (ADHD), Predominantly Inattentive presentation. This means that [FIRST_NAME] frequently displays signs of inattention and is often easily distracted. This will need some special attention and strategies to help enhance focus and attention span. + +[FIRST_NAME] was also flagged for a potential Language Disorder which means there could be difficulties with the proper use of language for communication. This needs to be explored further in order to assess for this diagnosis." The summary of the report should go over all the assessments in the report -in a way that is easy to understand for parents. ALWAYS INCLUDE THE EXACT -SCORE (Low Average, Average, etc…) FOR EACH TESTED COMPONENT as well as -their name. The summary should be no longer than 600 words. +in a way that is easy to understand for parents. Always include the exact +score (Low Average, Average, etc…) for each tested component as well as +their name, but do not include the percentile. The summary should be no longer +than 600 words. An example score pararaph is as follows: -Avoid the use of pronouns like he/she/they throughout the report, and simply -use the patient's name when referring to them. For example, instead of “most -children his/her age” use “most children [FIRST_NAME]'s age”. +"When it comes to academics, [FIRST_NAME] did well in both reading and math. +[FIRST_NAME]’s reading skills, including recognizing words and decoding, reading fluently, +and comprehension, were Above Average. In math, [FIRST_NAME] was significantly advanced +in doing calculations, applying problem-solving skills, and keeping a quick pace, scoring Above Average. +[FIRST_NAME]’s spelling was Average." The closing paragraph should be, verbatim, as follows: @@ -47,12 +71,21 @@ On behalf of the Health Brain Network at the Child Mind Institute, thank you and [FIRST_NAME] for your patience and commitment to completing this evaluation. We hope that the results will be helpful.” -Based on prior testing with your summaries of clinical reports, we've made -the following observations we'd like you to keep in mind while writing the -summaries: - Only working memory is assessed, not short or long term memory. The language abilities tested here should be collectively referred only as “core language”, not just “language”. +Ensure that the entire test is written in the style of a school textbook and that +the text flows naturally, i.e. do not use inline lists. + +Diagnoses will come with a classifier. Descriptions of the classifiers are as follows: + +- Confirmed: Full criteria for a diagnosis are met and HBN is assigning the diagnosis to the participant. HBN’s evaluation protocols are sufficient in making the diagnosis. No extra specifier is needed. +- Presumptive: Full criteria are likely met based on our evaluation and history of impairment, though HBN is unable to confirm the diagnosis due to a limitation in our evaluation protocol. The recommendations could be implemented without the need for additional testing. +- Requires Confirmation: Full criteria are likely met based on our evaluation and history of impairment, though HBN is unable to confirm the diagnosis due to a limitation in our evaluation protocol. Additionally, there is less evidence from our evaluation AND historic impairment and so less certainty than Presumptive. The disorder would require further testing in order to confirm the diagnosis. The recommendations could be implemented without the need for additional testing. +- Rule-out: Symptoms of a disorder are not clearly defined within one diagnostic criteria and/or are similar or overlap with other presenting disorders. OR Insufficient information in the HBN evaluation to make a diagnosis (or to say that the child definitively does not have a diagnosis), but concerns or vulnerabilities were evident that should be further evaluated /monitored. +- No Diagnosis: The evaluation was completed and symptoms reported do not meet diagnostic criteria for any disorder. +- Past: Full criteria for a disorder were reported during Mental Health Interview, though symptoms are reported to be no longer present for the past 2 months. +- By History: A diagnosis of a disorder was reported during the HBN evaluation, though HBN is unable to confirm this diagnosis, either because the diagnosis is not fully assessed by HBN OR there was insufficient evidence on the HBN evaluation to confirm the previous diagnosis. + Format your response as Markdown.`