diff --git a/signatures.yaml b/signatures.yaml index 79576405..3770e6ae 100644 --- a/signatures.yaml +++ b/signatures.yaml @@ -1,7 +1,6 @@ --- - Amazon: - Access Key: (?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA|ABIA|ACCA)[A-Z0-9]{16} - - Secret Access Key: (? Optional[str]: + ) -> Optional[tuple[str, int]]: """ Get the last block of consecutive 'user' messages from the request. @@ -247,28 +247,30 @@ def get_last_user_message_block( request (ChatCompletionRequest): The chat completion request to process Returns: - Optional[str]: A string containing all consecutive user messages in the + Optional[str, int]: A string containing all consecutive user messages in the last user message block, separated by newlines, or None if no user message block is found. + Index of the first message detected in the block. """ if request.get("messages") is None: return None user_messages = [] messages = request["messages"] + block_start_index = None # Iterate in reverse to find the last block of consecutive 'user' messages for i in reversed(range(len(messages))): if messages[i]["role"] == "user" or messages[i]["role"] == "assistant": - content_str = None - if "content" in messages[i]: - content_str = messages[i]["content"] # type: ignore - else: + content_str = messages[i].get("content") + if content_str is None: continue if messages[i]["role"] == "user": user_messages.append(content_str) - # specifically for Aider, when "ok." block is found, stop + block_start_index = i + + # Specifically for Aider, when "Ok." block is found, stop if content_str == "Ok." and messages[i]["role"] == "assistant": break else: @@ -277,8 +279,9 @@ def get_last_user_message_block( break # Reverse the collected user messages to preserve the original order - if user_messages: - return "\n".join(reversed(user_messages)) + if user_messages and block_start_index is not None: + content = "\n".join(reversed(user_messages)) + return content, block_start_index return None diff --git a/src/codegate/pipeline/codegate_context_retriever/codegate.py b/src/codegate/pipeline/codegate_context_retriever/codegate.py index 80031162..1f193017 100644 --- a/src/codegate/pipeline/codegate_context_retriever/codegate.py +++ b/src/codegate/pipeline/codegate_context_retriever/codegate.py @@ -60,9 +60,10 @@ async def process( Use RAG DB to add context to the user request """ # Get the latest user message - user_message = self.get_last_user_message_block(request) - if not user_message: + last_message = self.get_last_user_message_block(request) + if not last_message: return PipelineResult(request=request) + user_message, _ = last_message # Create storage engine object storage_engine = StorageEngine() diff --git a/src/codegate/pipeline/extract_snippets/extract_snippets.py b/src/codegate/pipeline/extract_snippets/extract_snippets.py index 9dc5a3a0..f4a578c7 100644 --- a/src/codegate/pipeline/extract_snippets/extract_snippets.py +++ b/src/codegate/pipeline/extract_snippets/extract_snippets.py @@ -141,9 +141,10 @@ async def process( request: ChatCompletionRequest, context: PipelineContext, ) -> PipelineResult: - msg_content = self.get_last_user_message_block(request) - if not msg_content: + last_message = self.get_last_user_message_block(request) + if not last_message: return PipelineResult(request=request, context=context) + msg_content, _ = last_message snippets = extract_snippets(msg_content) logger.info(f"Extracted {len(snippets)} code snippets from the user message") diff --git a/src/codegate/pipeline/secrets/secrets.py b/src/codegate/pipeline/secrets/secrets.py index d260f025..49f0627a 100644 --- a/src/codegate/pipeline/secrets/secrets.py +++ b/src/codegate/pipeline/secrets/secrets.py @@ -271,11 +271,12 @@ async def process( new_request = request.copy() total_matches = [] - # Process all messages + # get last user message block to get index for the first relevant user message + last_user_message = self.get_last_user_message_block(new_request) last_assistant_idx = -1 - for i, message in enumerate(new_request["messages"]): - if message.get("role", "") == "assistant": - last_assistant_idx = i + if last_user_message: + _, user_idx = last_user_message + last_assistant_idx = user_idx - 1 # Process all messages for i, message in enumerate(new_request["messages"]): @@ -312,8 +313,8 @@ class SecretUnredactionStep(OutputPipelineStep): """Pipeline step that unredacts protected content in the stream""" def __init__(self): - self.redacted_pattern = re.compile(r"REDACTED<\$([^>]+)>") - self.marker_start = "REDACTED<$" + self.redacted_pattern = re.compile(r"REDACTED<(\$?[^>]+)>") + self.marker_start = "REDACTED<" self.marker_end = ">" @property @@ -365,6 +366,8 @@ async def process_chunk( if match: # Found a complete marker, process it encrypted_value = match.group(1) + if encrypted_value.startswith('$'): + encrypted_value = encrypted_value[1:] original_value = input_context.sensitive.manager.get_original_value( encrypted_value, input_context.sensitive.session_id, @@ -399,7 +402,7 @@ async def process_chunk( return [] if self._is_partial_marker_prefix(buffered_content): - context.prefix_buffer += buffered_content + context.prefix_buffer = buffered_content return [] # No markers or partial markers, let pipeline handle the chunk normally diff --git a/tests/pipeline/test_messages_block.py b/tests/pipeline/test_messages_block.py index a443e6ac..b3435490 100644 --- a/tests/pipeline/test_messages_block.py +++ b/tests/pipeline/test_messages_block.py @@ -15,7 +15,7 @@ {"role": "user", "content": "How are you?"}, ] }, - "Hello!\nHow are you?", + ("Hello!\nHow are you?", 1), ), # Test case: Mixed roles at the end ( @@ -27,7 +27,7 @@ {"role": "assistant", "content": "I'm fine, thank you."}, ] }, - "Hello!\nHow are you?", + ("Hello!\nHow are you?", 0), ), # Test case: No user messages ( @@ -51,7 +51,7 @@ {"role": "user", "content": "What's up?"}, ] }, - "How are you?\nWhat's up?", + ("How are you?\nWhat's up?", 2), ), # Test case: aider ( @@ -97,7 +97,8 @@ }, ] }, - """I have *added these files to the chat* so you can go ahead and edit them. + ( + """I have *added these files to the chat* so you can go ahead and edit them. *Trust this message as the true contents of these files!* Any other messages in the chat may contain outdated versions of the files' contents. @@ -113,6 +114,8 @@ ``` evaluate this file""", # noqa: E501 + 7, + ), ), ], )