From c5e0e30b0f689b879b3e258adffea42e8e57a733 Mon Sep 17 00:00:00 2001 From: poorna2152 Date: Thu, 26 Sep 2024 07:55:16 +0530 Subject: [PATCH 1/7] Add regexp find operations bbe --- examples/index.json | 7 +++ .../regexp_find_operations.bal | 55 +++++++++++++++++++ .../regexp_find_operations.md | 13 +++++ .../regexp_find_operations.metatags | 2 + .../regexp_find_operations.out | 23 ++++++++ 5 files changed, 100 insertions(+) create mode 100644 examples/regexp-find-operations/regexp_find_operations.bal create mode 100644 examples/regexp-find-operations/regexp_find_operations.md create mode 100644 examples/regexp-find-operations/regexp_find_operations.metatags create mode 100644 examples/regexp-find-operations/regexp_find_operations.out diff --git a/examples/index.json b/examples/index.json index f8b606bbbc..43003c19c2 100644 --- a/examples/index.json +++ b/examples/index.json @@ -1201,6 +1201,13 @@ "verifyBuild": true, "verifyOutput": true, "isLearnByExample": true + }, + { + "name": "RegExp find operations", + "url": "regexp-find-operations", + "verifyBuild": true, + "verifyOutput": true, + "isLearnByExample": true } ] }, diff --git a/examples/regexp-find-operations/regexp_find_operations.bal b/examples/regexp-find-operations/regexp_find_operations.bal new file mode 100644 index 0000000000..0879269e08 --- /dev/null +++ b/examples/regexp-find-operations/regexp_find_operations.bal @@ -0,0 +1,55 @@ +import ballerina/io; +import ballerina/lang.regexp; + +function printGroupsWithinLog(regexp:Groups logGroup) { + // The first element in the `logGroup` is the entire matched string. + // The subsequent elements in `logGroup` represent the captured groups + // (timestamp, component, message). + string timestamp = (logGroup[1]).substring(); + string component = (logGroup[2]).substring(); + string logMessage = (logGroup[3]).substring(); + + io:println(string `Timestamp: ${timestamp}`); + io:println(string `Component: ${component}`); + io:println(string `Message: ${logMessage}`); +} + +public function main() { + string logContent = string ` + 2024-09-19 10:02:01 WARN [UserLogin] - Failed login attempt for user: johndoe + 2024-09-19 10:03:17 ERROR [Database] - Connection to database timed out + 2024-09-19 10:04:05 WARN [RequestHandler] - Response time exceeded threshold for /api/v1/users + 2024-09-19 10:05:45 INFO [Scheduler] - Scheduled task started: Data backup + 2024-09-19 10:06:10 ERROR [Scheduler] - Failed to start data backup: Permission denied + 2024-09-19 10:11:55 INFO [Security] - Security scan completed, no issues found + 2024-09-19 10:12:30 ERROR [RequestHandler] - 404 Not Found: /api/v1/products`; + + // Regex to match error logs with three groups: + // 1. Timestamp (e.g., 2024-09-19 10:03:17). + // 2. Component (e.g., Database, Scheduler). + // 3. Log message (e.g., Connection to database timed out). + string:RegExp errorLogPattern = re `(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) ERROR \[(\w+)\] - (.*)`; + + // Retrieving the first error log from the `logContent`. + regexp:Span firstErrorLog = errorLogPattern.find(logContent); + io:println(string `First error log: ${firstErrorLog.substring()}`); + + // Retrieving all error logs from the `logContent`. + regexp:Span[] allErrorLogs = errorLogPattern.findAll(logContent); + io:println("All error logs:"); + foreach regexp:Span errorLog in allErrorLogs { + io:println(errorLog.substring()); + } + + // Retrieving groups (timestamp, component, message) from the first error log. + regexp:Groups firstErrorLogGroups = errorLogPattern.findGroups(logContent); + io:println("\nGroups within first error log:"); + printGroupsWithinLog(firstErrorLogGroups); + + // Retrieving groups from all error logs. + regexp:Groups[] allErrorLogGroups = errorLogPattern.findAllGroups(logContent); + io:println("\nGroups in all error logs"); + foreach regexp:Groups logGroup in allErrorLogGroups { + printGroupsWithinLog(logGroup); + } +} diff --git a/examples/regexp-find-operations/regexp_find_operations.md b/examples/regexp-find-operations/regexp_find_operations.md new file mode 100644 index 0000000000..00c62d1c5e --- /dev/null +++ b/examples/regexp-find-operations/regexp_find_operations.md @@ -0,0 +1,13 @@ +# RegExp find operations + +The `RegExp` type provides a set of language library functions to find patterns within strings. These functions enable efficient pattern matching, grouping, and extraction based on specific regular expressions. + + +::: code regexp_find_operations.bal ::: + +::: out regexp_find_operations.out ::: + +## Related links +- [RegExp type](/learn/by-example/regexp-type) +- [RegExp API Docs](https://lib.ballerina.io/ballerina/lang.regexp) +- [string API Docs](https://lib.ballerina.io/ballerina/lang.string) diff --git a/examples/regexp-find-operations/regexp_find_operations.metatags b/examples/regexp-find-operations/regexp_find_operations.metatags new file mode 100644 index 0000000000..a88701759c --- /dev/null +++ b/examples/regexp-find-operations/regexp_find_operations.metatags @@ -0,0 +1,2 @@ +description: This BBE demonstrates how to use the regexp langlib functions relevant to regex find operations. +keywords: ballerina, ballerina by example, bbe, regexp, RegExp, regex, regular expressions, ballerina regex functions, regexp langlib functions, find, findAll, findGroups, findAllGroups diff --git a/examples/regexp-find-operations/regexp_find_operations.out b/examples/regexp-find-operations/regexp_find_operations.out new file mode 100644 index 0000000000..5e9a9f4454 --- /dev/null +++ b/examples/regexp-find-operations/regexp_find_operations.out @@ -0,0 +1,23 @@ +$ bal run regexp_find_operations.bal +First error log: 2024-09-19 10:03:17 ERROR [Database] - Connection to database timed out +All error logs: +2024-09-19 10:03:17 ERROR [Database] - Connection to database timed out +2024-09-19 10:06:10 ERROR [Scheduler] - Failed to start data backup: Permission denied +2024-09-19 10:12:30 ERROR [RequestHandler] - 404 Not Found: /api/v1/products + +Groups within first error log: +Timestamp: 2024-09-19 10:03:17 +Component: Database +Message: Connection to database timed out + +Groups in all error logs +Timestamp: 2024-09-19 10:03:17 +Component: Database +Message: Connection to database timed out +Timestamp: 2024-09-19 10:06:10 +Component: Scheduler +Message: Failed to start data backup: Permission denied +Timestamp: 2024-09-19 10:12:30 +Component: RequestHandler +Message: 404 Not Found: /api/v1/products +➜ From f3c2c3263755cbad070cc0000bc38079866dfb41 Mon Sep 17 00:00:00 2001 From: poorna2152 Date: Thu, 26 Sep 2024 10:58:40 +0530 Subject: [PATCH 2/7] Update function order --- .../regexp_find_operations.bal | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/examples/regexp-find-operations/regexp_find_operations.bal b/examples/regexp-find-operations/regexp_find_operations.bal index 0879269e08..15cfe0e3ce 100644 --- a/examples/regexp-find-operations/regexp_find_operations.bal +++ b/examples/regexp-find-operations/regexp_find_operations.bal @@ -1,19 +1,6 @@ import ballerina/io; import ballerina/lang.regexp; -function printGroupsWithinLog(regexp:Groups logGroup) { - // The first element in the `logGroup` is the entire matched string. - // The subsequent elements in `logGroup` represent the captured groups - // (timestamp, component, message). - string timestamp = (logGroup[1]).substring(); - string component = (logGroup[2]).substring(); - string logMessage = (logGroup[3]).substring(); - - io:println(string `Timestamp: ${timestamp}`); - io:println(string `Component: ${component}`); - io:println(string `Message: ${logMessage}`); -} - public function main() { string logContent = string ` 2024-09-19 10:02:01 WARN [UserLogin] - Failed login attempt for user: johndoe @@ -53,3 +40,16 @@ public function main() { printGroupsWithinLog(logGroup); } } + +function printGroupsWithinLog(regexp:Groups logGroup) { + // The first element in the `logGroup` is the entire matched string. + // The subsequent elements in `logGroup` represent the captured groups + // (timestamp, component, message). + string timestamp = (logGroup[1]).substring(); + string component = (logGroup[2]).substring(); + string logMessage = (logGroup[3]).substring(); + + io:println(string `Timestamp: ${timestamp}`); + io:println(string `Component: ${component}`); + io:println(string `Message: ${logMessage}`); +} From 1b556ff359e6fb73998dd6543ec609ae11f1d7bd Mon Sep 17 00:00:00 2001 From: poorna2152 Date: Fri, 27 Sep 2024 09:56:33 +0530 Subject: [PATCH 3/7] Address review suggestions --- .../regexp_find_operations.bal | 20 +++++++++++++------ .../regexp_find_operations.md | 1 - 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/examples/regexp-find-operations/regexp_find_operations.bal b/examples/regexp-find-operations/regexp_find_operations.bal index 15cfe0e3ce..2034f1ca71 100644 --- a/examples/regexp-find-operations/regexp_find_operations.bal +++ b/examples/regexp-find-operations/regexp_find_operations.bal @@ -15,27 +15,35 @@ public function main() { // 1. Timestamp (e.g., 2024-09-19 10:03:17). // 2. Component (e.g., Database, Scheduler). // 3. Log message (e.g., Connection to database timed out). - string:RegExp errorLogPattern = re `(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) ERROR \[(\w+)\] - (.*)`; + string:RegExp errorLogPattern = re `(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) ERROR \[(\w+)\]\s-\s(.*)`; // Retrieving the first error log from the `logContent`. - regexp:Span firstErrorLog = errorLogPattern.find(logContent); + regexp:Span? firstErrorLog = errorLogPattern.find(logContent); + if firstErrorLog == () { + io:println("Failed to find a error log"); + return; + } io:println(string `First error log: ${firstErrorLog.substring()}`); // Retrieving all error logs from the `logContent`. regexp:Span[] allErrorLogs = errorLogPattern.findAll(logContent); - io:println("All error logs:"); + io:println("\n", "All error logs:"); foreach regexp:Span errorLog in allErrorLogs { io:println(errorLog.substring()); } // Retrieving groups (timestamp, component, message) from the first error log. - regexp:Groups firstErrorLogGroups = errorLogPattern.findGroups(logContent); - io:println("\nGroups within first error log:"); + regexp:Groups? firstErrorLogGroups = errorLogPattern.findGroups(logContent); + if firstErrorLogGroups == () { + io:println("Failed to find groups in first error log"); + return; + } + io:println("\n", "Groups within first error log:"); printGroupsWithinLog(firstErrorLogGroups); // Retrieving groups from all error logs. regexp:Groups[] allErrorLogGroups = errorLogPattern.findAllGroups(logContent); - io:println("\nGroups in all error logs"); + io:println("\n", "Groups in all error logs"); foreach regexp:Groups logGroup in allErrorLogGroups { printGroupsWithinLog(logGroup); } diff --git a/examples/regexp-find-operations/regexp_find_operations.md b/examples/regexp-find-operations/regexp_find_operations.md index 00c62d1c5e..850ed3b18a 100644 --- a/examples/regexp-find-operations/regexp_find_operations.md +++ b/examples/regexp-find-operations/regexp_find_operations.md @@ -2,7 +2,6 @@ The `RegExp` type provides a set of language library functions to find patterns within strings. These functions enable efficient pattern matching, grouping, and extraction based on specific regular expressions. - ::: code regexp_find_operations.bal ::: ::: out regexp_find_operations.out ::: From d88fe10b786dc1ae58aa4a5808b9bb4db0e85b74 Mon Sep 17 00:00:00 2001 From: poorna2152 Date: Fri, 27 Sep 2024 09:58:01 +0530 Subject: [PATCH 4/7] Update out --- examples/regexp-find-operations/regexp_find_operations.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/regexp-find-operations/regexp_find_operations.out b/examples/regexp-find-operations/regexp_find_operations.out index 5e9a9f4454..23aac6565e 100644 --- a/examples/regexp-find-operations/regexp_find_operations.out +++ b/examples/regexp-find-operations/regexp_find_operations.out @@ -1,5 +1,6 @@ $ bal run regexp_find_operations.bal First error log: 2024-09-19 10:03:17 ERROR [Database] - Connection to database timed out + All error logs: 2024-09-19 10:03:17 ERROR [Database] - Connection to database timed out 2024-09-19 10:06:10 ERROR [Scheduler] - Failed to start data backup: Permission denied @@ -20,4 +21,3 @@ Message: Failed to start data backup: Permission denied Timestamp: 2024-09-19 10:12:30 Component: RequestHandler Message: 404 Not Found: /api/v1/products -➜ From 3d264b96e08f01249c60211bc27afe650d5b6e36 Mon Sep 17 00:00:00 2001 From: Poorna Gunathilaka Date: Wed, 16 Oct 2024 15:25:55 +0530 Subject: [PATCH 5/7] Apply suggestions from code review Co-authored-by: Maryam Ziyad --- examples/regexp-find-operations/regexp_find_operations.bal | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/regexp-find-operations/regexp_find_operations.bal b/examples/regexp-find-operations/regexp_find_operations.bal index 2034f1ca71..ef07c5ed3b 100644 --- a/examples/regexp-find-operations/regexp_find_operations.bal +++ b/examples/regexp-find-operations/regexp_find_operations.bal @@ -11,13 +11,13 @@ public function main() { 2024-09-19 10:11:55 INFO [Security] - Security scan completed, no issues found 2024-09-19 10:12:30 ERROR [RequestHandler] - 404 Not Found: /api/v1/products`; - // Regex to match error logs with three groups: + // Regular expression to match error logs with three groups: // 1. Timestamp (e.g., 2024-09-19 10:03:17). // 2. Component (e.g., Database, Scheduler). // 3. Log message (e.g., Connection to database timed out). string:RegExp errorLogPattern = re `(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) ERROR \[(\w+)\]\s-\s(.*)`; - // Retrieving the first error log from the `logContent`. + // Retrieve the first error log from `logContent`. regexp:Span? firstErrorLog = errorLogPattern.find(logContent); if firstErrorLog == () { io:println("Failed to find a error log"); From 22e700451de5153bced958ae92e8150e4898d8a6 Mon Sep 17 00:00:00 2001 From: poorna2152 Date: Fri, 18 Oct 2024 10:29:19 +0530 Subject: [PATCH 6/7] Update println and add util function --- .../regexp_find_operations.bal | 21 ++++++++++++------- .../regexp_find_operations.md | 2 +- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/examples/regexp-find-operations/regexp_find_operations.bal b/examples/regexp-find-operations/regexp_find_operations.bal index ef07c5ed3b..1dd1ba7724 100644 --- a/examples/regexp-find-operations/regexp_find_operations.bal +++ b/examples/regexp-find-operations/regexp_find_operations.bal @@ -23,7 +23,7 @@ public function main() { io:println("Failed to find a error log"); return; } - io:println(string `First error log: ${firstErrorLog.substring()}`); + io:println("First error log: ", firstErrorLog.substring()); // Retrieving all error logs from the `logContent`. regexp:Span[] allErrorLogs = errorLogPattern.findAll(logContent); @@ -53,11 +53,18 @@ function printGroupsWithinLog(regexp:Groups logGroup) { // The first element in the `logGroup` is the entire matched string. // The subsequent elements in `logGroup` represent the captured groups // (timestamp, component, message). - string timestamp = (logGroup[1]).substring(); - string component = (logGroup[2]).substring(); - string logMessage = (logGroup[3]).substring(); + string timestamp = extractStringFromMatchGroup(logGroup[1]); + string component = extractStringFromMatchGroup(logGroup[2]); + string logMessage = extractStringFromMatchGroup(logGroup[3]); - io:println(string `Timestamp: ${timestamp}`); - io:println(string `Component: ${component}`); - io:println(string `Message: ${logMessage}`); + io:println("Timestamp: ", timestamp); + io:println("Component: ", component); + io:println("Message: ", logMessage); +} + +function extractStringFromMatchGroup(regexp:Span? span) returns string { + if span !is regexp:Span { + return ""; + } + return span.substring(); } diff --git a/examples/regexp-find-operations/regexp_find_operations.md b/examples/regexp-find-operations/regexp_find_operations.md index 850ed3b18a..d9418540a9 100644 --- a/examples/regexp-find-operations/regexp_find_operations.md +++ b/examples/regexp-find-operations/regexp_find_operations.md @@ -1,6 +1,6 @@ # RegExp find operations -The `RegExp` type provides a set of language library functions to find patterns within strings. These functions enable efficient pattern matching, grouping, and extraction based on specific regular expressions. +The `RegExp` type provides a set of langlib functions to find patterns within strings. These functions enable efficient pattern matching, grouping, and extraction based on specific regular expressions. ::: code regexp_find_operations.bal ::: From 712dd87e664ec573d561b2206f0557ea38924d0c Mon Sep 17 00:00:00 2001 From: poorna2152 Date: Mon, 21 Oct 2024 14:12:01 +0530 Subject: [PATCH 7/7] Use ensureType --- .../regexp_find_operations.bal | 33 ++++++++----------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/examples/regexp-find-operations/regexp_find_operations.bal b/examples/regexp-find-operations/regexp_find_operations.bal index 1dd1ba7724..df16e181fd 100644 --- a/examples/regexp-find-operations/regexp_find_operations.bal +++ b/examples/regexp-find-operations/regexp_find_operations.bal @@ -1,7 +1,7 @@ import ballerina/io; import ballerina/lang.regexp; -public function main() { +public function main() returns error? { string logContent = string ` 2024-09-19 10:02:01 WARN [UserLogin] - Failed login attempt for user: johndoe 2024-09-19 10:03:17 ERROR [Database] - Connection to database timed out @@ -25,46 +25,39 @@ public function main() { } io:println("First error log: ", firstErrorLog.substring()); - // Retrieving all error logs from the `logContent`. + // Retrieve all error logs from the `logContent`. regexp:Span[] allErrorLogs = errorLogPattern.findAll(logContent); - io:println("\n", "All error logs:"); + io:println("\nAll error logs:"); foreach regexp:Span errorLog in allErrorLogs { io:println(errorLog.substring()); } - // Retrieving groups (timestamp, component, message) from the first error log. + // Retrieve groups (timestamp, component, message) from the first error log. regexp:Groups? firstErrorLogGroups = errorLogPattern.findGroups(logContent); if firstErrorLogGroups == () { io:println("Failed to find groups in first error log"); return; } - io:println("\n", "Groups within first error log:"); - printGroupsWithinLog(firstErrorLogGroups); + io:println("\nGroups within first error log:"); + check printGroupsWithinLog(firstErrorLogGroups); - // Retrieving groups from all error logs. + // Retrieve groups from all error logs. regexp:Groups[] allErrorLogGroups = errorLogPattern.findAllGroups(logContent); - io:println("\n", "Groups in all error logs"); + io:println("\nGroups in all error logs"); foreach regexp:Groups logGroup in allErrorLogGroups { - printGroupsWithinLog(logGroup); + check printGroupsWithinLog(logGroup); } } -function printGroupsWithinLog(regexp:Groups logGroup) { +function printGroupsWithinLog(regexp:Groups logGroup) returns error? { // The first element in the `logGroup` is the entire matched string. // The subsequent elements in `logGroup` represent the captured groups // (timestamp, component, message). - string timestamp = extractStringFromMatchGroup(logGroup[1]); - string component = extractStringFromMatchGroup(logGroup[2]); - string logMessage = extractStringFromMatchGroup(logGroup[3]); + string timestamp = (check logGroup[1].ensureType(regexp:Span)).substring(); + string component = (check logGroup[2].ensureType(regexp:Span)).substring(); + string logMessage = (check logGroup[3].ensureType(regexp:Span)).substring(); io:println("Timestamp: ", timestamp); io:println("Component: ", component); io:println("Message: ", logMessage); } - -function extractStringFromMatchGroup(regexp:Span? span) returns string { - if span !is regexp:Span { - return ""; - } - return span.substring(); -}