ndjenkins85 · ndjenkins85 · Jul 29, 2023 · Aug 1, 2023 · Aug 13, 2023 · Aug 13, 2023
@@ -26,8 +26,6 @@ repos:
   - id: check-vcs-permalinks
   - id: check-xml
   - id: check-yaml
-
-  - id: detect-aws-credentials
   - id: debug-statements
   - id: detect-private-key
   # - id: double-quote-string-fixer

@@ -0,0 +1 @@
+Your goal is to answer the question found under the 'user input' section.
@@ -0,0 +1,3 @@
+Your goal is to teach me how to cook eggs.
+
+Specifically, I want instructions for cooking a single egg using only a microwave.
@@ -0,0 +1,4 @@
+Your goal is to summarize the text found under the 'user input' section.
+You will be provided context information under the context section.
+This information must only be used to inform your judgement on what is important to summarize in the user input text.
+The context information itself must not be used directly in the output summary.
@@ -0,0 +1,5 @@
+Your goal is to create a good rolling summary of a very large document.
+The section labelled 'Context' contains information about what the user is most interested about the document, such as an overall question about the document or their general intent of what their looking for in a good summary.
+The section labelled 'user input' includes two parts: un-summarized text, and previously summarized text.
+
+Your task is to creating a new summary that combines the user input section, which reflects on what the original goal of the summarization was according to the 'context' section.
@@ -0,0 +1,20 @@
+Your goal is to choose a summarization strategy to handle large amounts of data.
+
+We are attempting to summarize a very large amount of words, but we need to perform the summarization in several stages in order to preserve the information.
+
+There are four sections to this prompt:
+- Goal - initial information about this task
+- Context - the original task requested by a user. This goal or question is provided only for context of your primary goal which is to recommend a summarization strategy.
+- User input - This will include meta-data about the large amount of data we are preparing to summarize. This may include file names, times, and tags.
+- Expected output: Final instructions for output format for this task.
+
+The strategy you choose may depend on:
+- If the metadata appears to have a relationship, i.e. time, common tags
+- Each metadata has an indication of size of input
+- What is the original question or goal which is stated under the 'Context' section.
+
+There are three types of summarization strategy and you will choose one of these:
+
+1. Map reduce. This technique will effectively half the input size, and on each iteration continue to half.
+2. Rolling summary. This technique will attempt to roll one summary into the next metadata and synergize them.
+3. Rank relevance. This technique will read each metadata input and determine it's relevance to the original question or goal indicated under the context section
@@ -0,0 +1 @@
+The total response must be limited to {response_allowance} words.
@@ -0,0 +1,3 @@
+The expected output format is as a lullaby song.
+
+The total response must be limited to {response_allowance} words.
@@ -0,0 +1,6 @@
+The expected output format must be only a number to represent your summarization choice.
+Absolutely no explanation or justification should be included in the output text.
+
+1 - Map reduce
+2 - Rolling summary
+3 - Rank relevance
@@ -0,0 +1 @@
+You're working with a five-part structure: 1. 'System Prompt' - this section, 2. 'Goal Section' - the core user request, 3. 'Context Section' - background info to aid understanding, but not to be mistaken for data used to answer the query, 4. 'Expected Output Section' - guidelines for formulating your response, including length and format, and 5. 'Data Section' - unstructured data for reasoning over to answer the query. These parts aim to help you comprehend user queries, provide contextual responses, and use data effectively.
@@ -0,0 +1,19 @@
+# This config file is used to specify knowledge sources for collation into a knowledge base
+
+context: >
+  For context, information relates to human players interacting in a dungeons and dragons game.
+  There are six human players who play one character each, and one dungeon master playing many non-player characters.
+  Players and characters:
+    - Nick is Tigger
+    - Christian is Bey
+    - Otter is Vesper
+    - Keith is Aron
+    - Marc is Fefa
+    - Terry is Ela
+  The characters are guildmates of 'the hunters' from halfmoon town, and perform quests and adventures together.
+
+resources:
+  - path: "/Users/bytedance/Documents/2307_dnd/transcripts/*/*.txt"
+    context: "This data is transcriptions of voice recordings of interactions between players and their characters"
+  - path: "/Users/bytedance/Documents/2307_dnd/writings/*/*.txt"
+    context: "This data is edited written content from players"
@@ -0,0 +1,10 @@
+{
+  "model": "gpt-3.5-turbo",
+  "model_tokens": 4000,
+  "prompt_context": "data/dnd/20230709/main_summary_4.txt",
+  "prompt_goal": "Summarize the main storyline plot of the dungeons and dragons adventure",
+  "prompt_input": "data/user_response/2023-07-31*.txt",
+  "prompt_output": "",
+  "response_allowance_max": 400,
+  "response_allowance_min": 200
+}
@@ -2,6 +2,7 @@
 # Copyright © 2023 by Nick Jenkins. All rights reserved
 """Contains utility functions used throughout the AFKode project."""
 
+import hashlib
 import json
 import logging
 import os
@@ -10,6 +11,11 @@
 from pathlib import Path
 from typing import Any, Dict, List, Union
 
+try:
+    import yaml
+except ImportError:
+    pass
+
 from afkode import globals
 
 
@@ -255,3 +261,83 @@ def get_spoken_command_list() -> List[str]:
     ignore = ["__init__"]
     command_files = sorted([f.stem.replace("_", " ") for f in command_dir.glob("*.py") if f.stem not in ignore])
     return command_files
+
+
+def load_km_config(input_path_raw: Union[Path, str]) -> Dict[str, Any]:
+    """Loads and checks an input config file.
+
+    Args:
+        input_path_raw: Location of yaml file relative to working directory.
+
+    Raises:
+        ValueError: unresolvable paths
+
+    Returns:
+        Program configuration instructions in JSON compatable format
+    """
+    input_path: Path = Path(input_path_raw)
+    fully_resolved_path = Path(get_base_path(), input_path)
+    if not fully_resolved_path.exists():
+        message = f"Cannot locate input file at {fully_resolved_path}"
+        logging.error(message)
+        raise ValueError(message)
+
+    if ".yaml" in input_path.suffix or ".yml" in input_path.suffix:
+        logging.info(f"Loading YAML config from {input_path}")
+        with input_path.open() as fp:
+            config = yaml.safe_load(fp)
+    else:
+        message = f"Incompatable file type {input_path.suffix}, expected SQL, JSON, or YAML"
+        logging.error(message)
+        raise ValueError(message)
+
+    # _check_config_format(config)
+    logging.debug(f"Config: {config}")
+    return config
+
+
+# def _check_config_format(config: Dict[str, Any]) -> bool:
+#     """Ensure config conforms to format requirements."""
+#     return True
+
+
+def resolve_paths(paths_list: Union[str, List[str]]) -> List[Path]:
+    """Assesses which input folders and files are specified in config.
+
+    Args:
+        paths_list: list of string paths to be resolved
+
+    Raises:
+        ValueError: unresolvable paths
+
+    Returns:
+        A flat list of all file paths specified in config
+    """
+    if isinstance(paths_list, str):
+        paths_list = [paths_list]
+
+    resolved_paths = []
+    for path_str in paths_list:
+        if Path(path_str).parts[0] == "/":
+            path_str = str(Path(*Path(path_str).parts[1:]))
+
+        paths: List[Path] = sorted(list(Path("/").glob(str(path_str))))
+
+        if len(paths) == 0:
+            message = f"Could not find any files at {path_str}"
+            logging.error(message)
+            raise ValueError(message)
+
+        for path in paths:
+            if not path.exists():
+                message = "Could not find file at specified location"
+                logging.error(message)
+                raise ValueError(message)
+            resolved_paths.append(path)
+    return resolved_paths
+
+
+def hash_string(input_string: str) -> str:
+    """Returns hash of string."""
+    sha_signature = hashlib.sha256(input_string.encode())
+    return sha_signature.hexdigest()