Skip to content

Commit c42d8bf

Browse files
with_ascii_lowercased zig builtin
1 parent 5d0b700 commit c42d8bf

File tree

49 files changed

+1891
-1751
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+1891
-1751
lines changed

crates/compiler/builtins/bitcode/src/main.zig

+1
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ comptime {
211211
exportStrFn(str.withCapacityC, "with_capacity");
212212
exportStrFn(str.strAllocationPtr, "allocation_ptr");
213213
exportStrFn(str.strReleaseExcessCapacity, "release_excess_capacity");
214+
exportStrFn(str.strWithAsciiLowercased, "with_ascii_lowercased");
214215

215216
for (INTEGERS) |T| {
216217
str.exportFromInt(T, ROC_BUILTINS ++ "." ++ STR ++ ".from_int.");

crates/compiler/builtins/bitcode/src/str.zig

+67-11
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ const utils = @import("utils.zig");
22
const RocList = @import("list.zig").RocList;
33
const UpdateMode = utils.UpdateMode;
44
const std = @import("std");
5+
const ascii = std.ascii;
56
const mem = std.mem;
67
const unicode = std.unicode;
78
const testing = std.testing;
@@ -374,7 +375,12 @@ pub const RocStr = extern struct {
374375
return utils.REFCOUNT_ONE;
375376
}
376377

377-
const ptr: [*]usize = @as([*]usize, @ptrCast(@alignCast(self.bytes)));
378+
const data_ptr = if (self.isSeamlessSlice())
379+
self.getAllocationPtr()
380+
else
381+
self.bytes;
382+
383+
const ptr: [*]usize = @as([*]usize, @ptrCast(@alignCast(data_ptr)));
378384
return (ptr - 1)[0];
379385
}
380386

@@ -615,16 +621,6 @@ fn initFromSmallStr(slice_bytes: [*]u8, len: usize, _: usize) RocStr {
615621
return RocStr.init(slice_bytes, len);
616622
}
617623

618-
// The alloc_ptr must already be shifted to be ready for storing in a seamless slice.
619-
fn initFromBigStr(slice_bytes: [*]u8, len: usize, alloc_ptr: usize) RocStr {
620-
// Here we can make seamless slices instead of copying to a new small str.
621-
return RocStr{
622-
.bytes = slice_bytes,
623-
.length = len | SEAMLESS_SLICE_BIT,
624-
.capacity_or_alloc_ptr = alloc_ptr,
625-
};
626-
}
627-
628624
fn strSplitOnHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
629625
if (delimiter.len() == 0) {
630626
string.incref(1);
@@ -1972,6 +1968,66 @@ fn countTrailingWhitespaceBytes(string: RocStr) usize {
19721968
return byte_count;
19731969
}
19741970

1971+
// Str.with_ascii_lowercased
1972+
pub fn strWithAsciiLowercased(string: RocStr) callconv(.C) RocStr {
1973+
var new_str = if (string.isUnique())
1974+
string
1975+
else blk: {
1976+
string.decref();
1977+
break :blk RocStr.fromSlice(string.asSlice());
1978+
};
1979+
1980+
const new_str_bytes = new_str.asU8ptrMut()[0..string.len()];
1981+
for (new_str_bytes) |*c| {
1982+
c.* = ascii.toLower(c.*);
1983+
}
1984+
return new_str;
1985+
}
1986+
1987+
test "withAsciiLowercased: small str" {
1988+
const original = RocStr.fromSlice("cOFFÉ");
1989+
try expect(original.isSmallStr());
1990+
1991+
const expected = RocStr.fromSlice("coffÉ");
1992+
defer expected.decref();
1993+
1994+
const str_result = strWithAsciiLowercased(original);
1995+
defer str_result.decref();
1996+
1997+
try expect(str_result.isSmallStr());
1998+
try expect(str_result.eq(expected));
1999+
}
2000+
2001+
test "withAsciiLowercased: non small str" {
2002+
const original = RocStr.fromSlice("cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ");
2003+
defer original.decref();
2004+
try expect(!original.isSmallStr());
2005+
2006+
const expected = RocStr.fromSlice("coffÉ coffÉ coffÉ coffÉ coffÉ coffÉ");
2007+
defer expected.decref();
2008+
2009+
const str_result = strWithAsciiLowercased(original);
2010+
2011+
try expect(!str_result.isSmallStr());
2012+
try expect(str_result.eq(expected));
2013+
}
2014+
2015+
test "withAsciiLowercased: seamless slice" {
2016+
const l = RocStr.fromSlice("cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ");
2017+
const original = substringUnsafeC(l, 1, l.len() - 1);
2018+
defer original.decref();
2019+
2020+
try expect(original.isSeamlessSlice());
2021+
2022+
const expected = RocStr.fromSlice("offÉ coffÉ coffÉ coffÉ coffÉ coffÉ");
2023+
defer expected.decref();
2024+
2025+
const str_result = strWithAsciiLowercased(original);
2026+
2027+
try expect(!str_result.isSmallStr());
2028+
try expect(str_result.eq(expected));
2029+
}
2030+
19752031
fn rcNone(_: ?[*]u8) callconv(.C) void {}
19762032

19772033
fn decStr(ptr: ?[*]u8) callconv(.C) void {

crates/compiler/builtins/roc/Str.roc

+16
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@ module [
369369
contains,
370370
drop_prefix,
371371
drop_suffix,
372+
with_ascii_lowercased,
372373
]
373374

374375
import Bool exposing [Bool]
@@ -1092,3 +1093,18 @@ drop_suffix = \haystack, suffix ->
10921093
substring_unsafe(haystack, start, len)
10931094
else
10941095
haystack
1096+
1097+
## Returns a version of the string with all [ASCII characters](https://en.wikipedia.org/wiki/ASCII) lowercased. Non-ASCII characters are left unmodified. For example:
1098+
##
1099+
## ```roc
1100+
## expect "CAFÉ".with_ascii_lowercased() == "cafÉ"
1101+
## ```
1102+
##
1103+
## This function is useful for things like [command-line options](https://en.wikipedia.org/wiki/Command-line_interface#Command-line_option) and [environment variables](https://en.wikipedia.org/wiki/Environment_variablewhere you ## know in advance that you're dealing with a hardcoded string containing only ASCII characters. It has better performance than lowercasing operations which take Unicode into account.
1104+
##
1105+
## That said, strings received from user input can always contain non-ASCII Unicode characters, and lowercasing [Unicode](https://unicode.org) works differently in different languages. For example, the string `"I"lowercases to `"i"## ` in English and to `"ı"` (a [dotless i](https://en.wikipedia.org/wiki/Dotless_I)) in Turkish. These rules can also change in each [Unicode release](https://www.unicode.org/releases/), so we have separate [`unicode` package]## (https://github.com/roc-lang/unicode) for Unicode capitalization that can be upgraded independently from the language's builtins.
1106+
##
1107+
## To do a case-insensitive comparison of the ASCII characters in a string, use [`caseless_ascii_equals`](#caseless_ascii_equals).
1108+
with_ascii_lowercased: Str -> Str
1109+
1110+
expect Str.with_ascii_lowercased "cOFFÉ" == "XYZFÉ"

crates/compiler/builtins/src/bitcode.rs

+1
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ pub const STR_CLONE_TO: &str = "roc_builtins.str.clone_to";
358358
pub const STR_WITH_CAPACITY: &str = "roc_builtins.str.with_capacity";
359359
pub const STR_ALLOCATION_PTR: &str = "roc_builtins.str.allocation_ptr";
360360
pub const STR_RELEASE_EXCESS_CAPACITY: &str = "roc_builtins.str.release_excess_capacity";
361+
pub const STR_WITH_ASCII_LOWERCASED: &str = "roc_builtins.str.with_ascii_lowercased";
361362

362363
pub const LIST_MAP: &str = "roc_builtins.list.map";
363364
pub const LIST_MAP2: &str = "roc_builtins.list.map2";

crates/compiler/can/src/builtins.rs

+1
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ map_symbol_to_lowlevel_and_arity! {
130130
StrToNum; STR_TO_NUM; 1,
131131
StrWithCapacity; STR_WITH_CAPACITY; 1,
132132
StrReleaseExcessCapacity; STR_RELEASE_EXCESS_CAPACITY; 1,
133+
StrWithAsciiLowercased; STR_WITH_ASCII_LOWERCASED; 1,
133134

134135
ListLenUsize; LIST_LEN_USIZE; 1,
135136
ListLenU64; LIST_LEN_U64; 1,

crates/compiler/gen_dev/src/lib.rs

+7
Original file line numberDiff line numberDiff line change
@@ -1726,6 +1726,13 @@ trait Backend<'a> {
17261726
arg_layouts,
17271727
ret_layout,
17281728
),
1729+
LowLevel::StrWithAsciiLowercased => self.build_fn_call(
1730+
sym,
1731+
bitcode::STR_WITH_ASCII_LOWERCASED.to_string(),
1732+
args,
1733+
arg_layouts,
1734+
ret_layout,
1735+
),
17291736
LowLevel::StrToNum => {
17301737
let number_layout = match self.interner().get_repr(*ret_layout) {
17311738
LayoutRepr::Struct(field_layouts) => field_layouts[0], // TODO: why is it sometimes a struct?

crates/compiler/gen_llvm/src/llvm/lowlevel.rs

+12
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,7 @@ pub(crate) fn run_low_level<'a, 'ctx>(
593593
bitcode::STR_WITH_CAPACITY,
594594
)
595595
}
596+
596597
ListLenU64 => {
597598
// List.len : List * -> U64
598599
arguments!(list);
@@ -635,6 +636,17 @@ pub(crate) fn run_low_level<'a, 'ctx>(
635636
list_element_layout!(layout_interner, result_layout),
636637
)
637638
}
639+
StrWithAsciiLowercased => {
640+
arguments!(string);
641+
642+
call_str_bitcode_fn(
643+
env,
644+
&[string],
645+
&[],
646+
BitcodeReturns::Str,
647+
bitcode::STR_WITH_ASCII_LOWERCASED,
648+
)
649+
}
638650
ListConcat => {
639651
debug_assert_eq!(args.len(), 2);
640652

crates/compiler/gen_wasm/src/low_level.rs

+3
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,9 @@ impl<'a> LowLevelCall<'a> {
258258
self.load_args_and_call_zig(backend, bitcode::STR_SUBSTRING_UNSAFE)
259259
}
260260
StrWithCapacity => self.load_args_and_call_zig(backend, bitcode::STR_WITH_CAPACITY),
261+
StrWithAsciiLowercased => {
262+
self.load_args_and_call_zig(backend, bitcode::STR_WITH_ASCII_LOWERCASED)
263+
}
261264

262265
// List
263266
ListLenU64 => {

crates/compiler/module/src/low_level.rs

+2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ pub enum LowLevel {
2626
StrReserve,
2727
StrWithCapacity,
2828
StrReleaseExcessCapacity,
29+
StrWithAsciiLowercased,
2930
ListLenUsize,
3031
ListLenU64,
3132
ListWithCapacity,
@@ -267,6 +268,7 @@ map_symbol_to_lowlevel! {
267268
StrToNum <= STR_TO_NUM;
268269
StrWithCapacity <= STR_WITH_CAPACITY;
269270
StrReleaseExcessCapacity <= STR_RELEASE_EXCESS_CAPACITY;
271+
StrWithAsciiLowercased <= STR_WITH_ASCII_LOWERCASED;
270272
ListLenU64 <= LIST_LEN_U64;
271273
ListLenUsize <= LIST_LEN_USIZE;
272274
ListGetCapacity <= LIST_CAPACITY;

crates/compiler/module/src/symbol.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1418,6 +1418,7 @@ define_builtins! {
14181418
48 STR_RELEASE_EXCESS_CAPACITY: "release_excess_capacity"
14191419
49 STR_DROP_PREFIX: "drop_prefix"
14201420
50 STR_DROP_SUFFIX: "drop_suffix"
1421+
51 STR_WITH_ASCII_LOWERCASED: "with_ascii_lowercased"
14211422
}
14221423
6 LIST: "List" => {
14231424
0 LIST_LIST: "List" exposed_apply_type=true // the List.List type alias

crates/compiler/mono/src/drop_specialization.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1549,6 +1549,7 @@ fn low_level_no_rc(lowlevel: &LowLevel) -> RC {
15491549
ListPrepend => RC::Rc,
15501550
StrJoinWith => RC::NoRc,
15511551
ListSortWith => RC::Rc,
1552+
StrWithAsciiLowercased => RC::Rc,
15521553

15531554
ListAppendUnsafe
15541555
| ListReserve

crates/compiler/mono/src/inc_dec.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1258,6 +1258,7 @@ pub(crate) fn lowlevel_borrow_signature(op: LowLevel) -> &'static [Ownership] {
12581258
StrReleaseExcessCapacity => &[OWNED],
12591259
ListIncref => &[OWNED],
12601260
ListDecref => &[OWNED],
1261+
StrWithAsciiLowercased => &[OWNED],
12611262

12621263
Eq | NotEq => &[BORROWED, BORROWED],
12631264

crates/compiler/solve/tests/solve_expr.rs

+12
Original file line numberDiff line numberDiff line change
@@ -3824,6 +3824,18 @@ mod solve_expr {
38243824
);
38253825
}
38263826

3827+
#[test]
3828+
fn str_with_ascii_lowercased() {
3829+
infer_eq_without_problem(
3830+
indoc!(
3831+
r"
3832+
Str.with_ascii_lowercased
3833+
"
3834+
),
3835+
"Str -> Str",
3836+
);
3837+
}
3838+
38273839
#[test]
38283840
fn list_take_first() {
38293841
infer_eq_without_problem(

crates/compiler/test_gen/src/gen_str.rs

+26
Original file line numberDiff line numberDiff line change
@@ -2060,3 +2060,29 @@ fn str_drop_suffix() {
20602060
RocStr
20612061
);
20622062
}
2063+
2064+
#[test]
2065+
#[cfg(any(feature = "gen-llvm", feature = "gen-dev", feature = "gen-wasm"))]
2066+
fn with_ascii_lowercased() {
2067+
assert_evals_to!(
2068+
r#"
2069+
Str.with_ascii_lowercased "cOFFÉ"
2070+
"#,
2071+
RocStr::from("coffÉ"),
2072+
RocStr
2073+
);
2074+
}
2075+
2076+
#[test]
2077+
#[cfg(any(feature = "gen-llvm", feature = "gen-dev", feature = "gen-wasm"))]
2078+
fn with_ascii_lowercased_non_zero_refcount() {
2079+
assert_evals_to!(
2080+
r#"
2081+
original = "cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ"
2082+
res = Str.with_ascii_lowercased original
2083+
Str.dropPrefix res original
2084+
"#,
2085+
RocStr::from("coffÉ coffÉ coffÉ coffÉ coffÉ coffÉ coffÉ coffÉ coffÉ coffÉ coffÉ coffÉ"),
2086+
RocStr
2087+
);
2088+
}

crates/compiler/test_mono/generated/compose_recursive_lambda_set_productive_nullable_wrapped.txt

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/compiler/test_mono/generated/dbg_expr.txt

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)