Skip to content

Added initial support for Bytes type #2831

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions integration_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,7 @@ RUN(NAME test_set_discard LABELS cpython llvm llvm_jit)
RUN(NAME test_set_from_list LABELS cpython llvm llvm_jit)
RUN(NAME test_set_clear LABELS cpython llvm)
RUN(NAME test_set_pop LABELS cpython llvm)
RUN(NAME test_bytes_01 LABELS cpython llvm llvm_jit)
RUN(NAME test_global_set LABELS cpython llvm llvm_jit)
RUN(NAME test_for_loop LABELS cpython llvm llvm_jit c)
RUN(NAME modules_01 LABELS cpython llvm llvm_jit c wasm wasm_x86 wasm_x64)
Expand Down
20 changes: 20 additions & 0 deletions integration_tests/test_bytes_01.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
def f():
a: bytes = b"This is a test string"
b: bytes = b"This is another test string"
c: bytes = b"""Bigger test string with docstrings
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
eiusmod tempor incididunt ut labore et dolore magna aliqua. """


def g(a: bytes) -> bytes:
return a


def h() -> bytes:
bar: bytes
bar = g(b"fiwabcd")
return b"12jw19\\xq0"


f()
h()
2 changes: 2 additions & 0 deletions src/libasr/ASR.asdl
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ expr
| StringOrd(expr arg, ttype type, expr? value)
| StringChr(expr arg, ttype type, expr? value)
| StringFormat(expr fmt, expr* args, string_format_kind kind, ttype type, expr? value)
| BytesConstant(string s, ttype type)
| CPtrCompare(expr left, cmpop op, expr right, ttype type, expr? value)
| SymbolicCompare(expr left, cmpop op, expr right, ttype type, expr? value)
| DictConstant(expr* keys, expr* values, ttype type)
Expand Down Expand Up @@ -198,6 +199,7 @@ ttype
| Real(int kind)
| Complex(int kind)
| Character(int kind, int len, expr? len_expr)
| Byte(int kind, int len, expr? len_expr)
| Logical(int kind)
| Set(ttype type)
| List(ttype type)
Expand Down
3 changes: 2 additions & 1 deletion src/libasr/asdl_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
Generate C++ AST node definitions from an ASDL description.
"""

import sys
import os
import sys

import asdl


Expand Down
42 changes: 41 additions & 1 deletion src/libasr/asr_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,9 @@ static inline int extract_kind_from_ttype_t(const ASR::ttype_t* type) {
case ASR::ttypeType::Character: {
return ASR::down_cast<ASR::Character_t>(type)->m_kind;
}
case ASR::ttypeType::Byte: {
return ASR::down_cast<ASR::Byte_t>(type)->m_kind;
}
case ASR::ttypeType::Logical: {
return ASR::down_cast<ASR::Logical_t>(type)->m_kind;
}
Expand Down Expand Up @@ -251,6 +254,10 @@ static inline void set_kind_to_ttype_t(ASR::ttype_t* type, int kind) {
ASR::down_cast<ASR::Character_t>(type)->m_kind = kind;
break;
}
case ASR::ttypeType::Byte: {
ASR::down_cast<ASR::Byte_t>(type)->m_kind = kind;
break;
}
case ASR::ttypeType::Logical: {
ASR::down_cast<ASR::Logical_t>(type)->m_kind = kind;
break;
Expand Down Expand Up @@ -542,6 +549,9 @@ static inline std::string type_to_str(const ASR::ttype_t *t)
case ASR::ttypeType::Character: {
return "character";
}
case ASR::ttypeType::Byte: {
return "byte";
}
case ASR::ttypeType::Tuple: {
return "tuple";
}
Expand Down Expand Up @@ -990,7 +1000,8 @@ static inline bool is_value_constant(ASR::expr_t *a_value) {
case ASR::exprType::ImpliedDoLoop:
case ASR::exprType::PointerNullConstant:
case ASR::exprType::ArrayConstant:
case ASR::exprType::StringConstant: {
case ASR::exprType::StringConstant:
case ASR::exprType::BytesConstant: {
return true;
}
case ASR::exprType::RealBinOp:
Expand Down Expand Up @@ -1421,6 +1432,9 @@ static inline std::string get_type_code(const ASR::ttype_t *t, bool use_undersco
case ASR::ttypeType::Character: {
return "str";
}
case ASR::ttypeType::Byte: {
return "bytes";
}
case ASR::ttypeType::Tuple: {
ASR::Tuple_t *tup = ASR::down_cast<ASR::Tuple_t>(t);
std::string result = "tuple";
Expand Down Expand Up @@ -1608,6 +1622,9 @@ static inline std::string type_to_str_python(const ASR::ttype_t *t,
case ASR::ttypeType::Character: {
return "str";
}
case ASR::ttypeType::Byte: {
return "bytes";
}
case ASR::ttypeType::Tuple: {
ASR::Tuple_t *tup = ASR::down_cast<ASR::Tuple_t>(t);
std::string result = "tuple[";
Expand Down Expand Up @@ -2148,6 +2165,7 @@ inline size_t extract_dimensions_from_ttype(ASR::ttype_t *x,
case ASR::ttypeType::Real:
case ASR::ttypeType::Complex:
case ASR::ttypeType::Character:
case ASR::ttypeType::Byte:
case ASR::ttypeType::Logical:
case ASR::ttypeType::StructType:
case ASR::ttypeType::Enum:
Expand Down Expand Up @@ -2419,6 +2437,7 @@ inline bool ttype_set_dimensions(ASR::ttype_t** x,
case ASR::ttypeType::Real:
case ASR::ttypeType::Complex:
case ASR::ttypeType::Character:
case ASR::ttypeType::Byte:
case ASR::ttypeType::Logical:
case ASR::ttypeType::StructType:
case ASR::ttypeType::Enum:
Expand Down Expand Up @@ -2540,6 +2559,12 @@ static inline ASR::ttype_t* duplicate_type(Allocator& al, const ASR::ttype_t* t,
tnew->m_kind, tnew->m_len, tnew->m_len_expr));
break;
}
case ASR::ttypeType::Byte: {
ASR::Byte_t* tnew = ASR::down_cast<ASR::Byte_t>(t);
t_ = ASRUtils::TYPE(ASR::make_Byte_t(al, t->base.loc,
tnew->m_kind, tnew->m_len, tnew->m_len_expr));
break;
}
case ASR::ttypeType::StructType: {
ASR::StructType_t* tnew = ASR::down_cast<ASR::StructType_t>(t);
t_ = ASRUtils::TYPE(ASR::make_StructType_t(al, t->base.loc,
Expand Down Expand Up @@ -2696,6 +2721,11 @@ static inline ASR::ttype_t* duplicate_type_without_dims(Allocator& al, const ASR
return ASRUtils::TYPE(ASR::make_Character_t(al, loc,
tnew->m_kind, tnew->m_len, tnew->m_len_expr));
}
case ASR::ttypeType::Byte: {
ASR::Byte_t* tnew = ASR::down_cast<ASR::Byte_t>(t);
return ASRUtils::TYPE(ASR::make_Byte_t(al, loc,
tnew->m_kind, tnew->m_len, tnew->m_len_expr));
}
case ASR::ttypeType::StructType: {
ASR::StructType_t* tstruct = ASR::down_cast<ASR::StructType_t>(t);
return ASRUtils::TYPE(ASR::make_StructType_t(al, t->base.loc,
Expand Down Expand Up @@ -3123,6 +3153,11 @@ inline bool types_equal(ASR::ttype_t *a, ASR::ttype_t *b,
ASR::Character_t *b2 = ASR::down_cast<ASR::Character_t>(b);
return (a2->m_kind == b2->m_kind);
}
case (ASR::ttypeType::Byte) : {
ASR::Byte_t *a2 = ASR::down_cast<ASR::Byte_t>(a);
ASR::Byte_t *b2 = ASR::down_cast<ASR::Byte_t>(b);
return (a2->m_kind == b2->m_kind);
}
case (ASR::ttypeType::List) : {
ASR::List_t *a2 = ASR::down_cast<ASR::List_t>(a);
ASR::List_t *b2 = ASR::down_cast<ASR::List_t>(b);
Expand Down Expand Up @@ -3306,6 +3341,11 @@ inline bool types_equal_with_substitution(ASR::ttype_t *a, ASR::ttype_t *b,
ASR::Character_t *b2 = ASR::down_cast<ASR::Character_t>(b);
return (a2->m_kind == b2->m_kind);
}
case (ASR::ttypeType::Byte) : {
ASR::Byte_t *a2 = ASR::down_cast<ASR::Byte_t>(a);
ASR::Byte_t *b2 = ASR::down_cast<ASR::Byte_t>(b);
return (a2->m_kind == b2->m_kind);
}
case (ASR::ttypeType::List) : {
ASR::List_t *a2 = ASR::down_cast<ASR::List_t>(a);
ASR::List_t *b2 = ASR::down_cast<ASR::List_t>(b);
Expand Down
91 changes: 89 additions & 2 deletions src/libasr/codegen/asr_to_llvm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,23 @@ void string_init(llvm::LLVMContext &context, llvm::Module &module,
builder.CreateCall(fn, args);
}

void bytes_init(llvm::LLVMContext &context, llvm::Module &module,
llvm::IRBuilder<> &builder, llvm::Value* arg_size, llvm::Value* arg_bytes) {
std::string func_name = "_lfortran_bytes_init";
llvm::Function *fn = module.getFunction(func_name);
if (!fn) {
llvm::FunctionType *function_type = llvm::FunctionType::get(
llvm::Type::getVoidTy(context), {
llvm::Type::getInt32Ty(context),
llvm::Type::getInt8PtrTy(context)
}, true);
fn = llvm::Function::Create(function_type,
llvm::Function::ExternalLinkage, func_name, module);
}
std::vector<llvm::Value*> args = {arg_size, arg_bytes};
builder.CreateCall(fn, args);
}

class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
{
private:
Expand Down Expand Up @@ -143,7 +160,7 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
bool prototype_only;
llvm::StructType *complex_type_4, *complex_type_8;
llvm::StructType *complex_type_4_ptr, *complex_type_8_ptr;
llvm::PointerType *character_type;
llvm::PointerType *character_type, *byte_type;
llvm::PointerType *list_type;
std::vector<std::string> struct_type_stack;

Expand Down Expand Up @@ -910,6 +927,7 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
complex_type_4_ptr = llvm_utils->complex_type_4_ptr;
complex_type_8_ptr = llvm_utils->complex_type_8_ptr;
character_type = llvm_utils->character_type;
byte_type = llvm_utils->character_type;
list_type = llvm::Type::getInt8PtrTy(context);

llvm::Type* bound_arg = static_cast<llvm::Type*>(arr_descr->get_dimension_descriptor_type(true));
Expand Down Expand Up @@ -948,7 +966,7 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
prototype_only = false;

// TODO: handle dependencies across modules and main program

;
// Then do all the modules in the right order
std::vector<std::string> build_order
= determine_module_dependencies(x);
Expand Down Expand Up @@ -2879,6 +2897,25 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
}
}
llvm_symtab[h] = ptr;
} else if (x.m_type->type == ASR::ttypeType::Byte) {
llvm::Constant *ptr = module->getOrInsertGlobal(x.m_name,
character_type);
if (!external) {
if (init_value) {
module->getNamedGlobal(x.m_name)->setInitializer(
init_value);
} else {
module->getNamedGlobal(x.m_name)->setInitializer(
llvm::Constant::getNullValue(character_type)
);
ASR::Byte_t *t = down_cast<ASR::Byte_t>(x.m_type);
if( t->m_len >= 0 ) {
strings_to_be_allocated.insert(std::pair(ptr, llvm::ConstantInt::get(
context, llvm::APInt(32, t->m_len+1))));
}
}
}
llvm_symtab[h] = ptr;
} else if( x.m_type->type == ASR::ttypeType::CPtr ) {
llvm::Type* void_ptr = llvm::Type::getVoidTy(context)->getPointerTo();
llvm::Constant *ptr = module->getOrInsertGlobal(x.m_name,
Expand Down Expand Up @@ -3889,6 +3926,36 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
} else {
throw CodeGenError("Unsupported len value in ASR " + std::to_string(strlen));
}
} else if (is_a<ASR::Byte_t>(*v->m_type) && !is_array_type && !is_list) {
ASR::Byte_t *t = down_cast<ASR::Byte_t>(v->m_type);
target_var = ptr;
int byte_len = t->m_len;
if (byte_len >= 0 || byte_len == -3) {
llvm::Value *arg_size;
if (byte_len == -3) {
LCOMPILERS_ASSERT(t->m_len_expr)
this->visit_expr(*t->m_len_expr);
arg_size = builder->CreateAdd(builder->CreateSExtOrTrunc(tmp,
llvm::Type::getInt32Ty(context)),
llvm::ConstantInt::get(context, llvm::APInt(32, 1)) );
} else {
// Compile time length
arg_size = llvm::ConstantInt::get(context,
llvm::APInt(32, byte_len+1));
}
llvm::Value *init_value = LLVM::lfortran_malloc(context, *module, *builder, arg_size);
string_init(context, *module, *builder, arg_size, init_value);
builder->CreateStore(init_value, target_var);
if (v->m_intent == intent_local) {
strings_to_be_deallocated.push_back(al, CreateLoad(target_var));
}
} else if (byte_len == -2) {
// Allocatable string. Initialize to `nullptr` (unallocated)
llvm::Value *init_value = llvm::Constant::getNullValue(type);
builder->CreateStore(init_value, target_var);
} else {
throw CodeGenError("Unsupported bytes len value in ASR " + std::to_string(byte_len));
}
} else if (is_list) {
ASR::List_t* asr_list = ASR::down_cast<ASR::List_t>(v->m_type);
std::string type_code = ASRUtils::get_type_code(asr_list->m_type);
Expand Down Expand Up @@ -7072,6 +7139,10 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
tmp = builder->CreateGlobalStringPtr(x.m_s);
}

void visit_BytesConstant(const ASR::BytesConstant_t &x) {
tmp = builder->CreateGlobalStringPtr(x.m_s);
}

inline void fetch_ptr(ASR::Variable_t* x) {
uint32_t x_h = get_hash((ASR::asr_t*)x);
LCOMPILERS_ASSERT(llvm_symtab.find(x_h) != llvm_symtab.end());
Expand Down Expand Up @@ -7128,6 +7199,7 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
case ASR::ttypeType::Complex:
case ASR::ttypeType::StructType:
case ASR::ttypeType::Character:
case ASR::ttypeType::Byte:
case ASR::ttypeType::Logical:
case ASR::ttypeType::Class: {
if( t2->type == ASR::ttypeType::StructType ) {
Expand Down Expand Up @@ -8848,6 +8920,21 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
target_type = character_type;
break;
}
case (ASR::ttypeType::Byte) : {
ASR::Variable_t *orig_arg = nullptr;
if( func_subrout->type == ASR::symbolType::Function ) {
ASR::Function_t* func = down_cast<ASR::Function_t>(func_subrout);
orig_arg = ASRUtils::EXPR2VAR(func->m_args[i]);
} else {
throw CodeGenError("ICE: expected func_subrout->type == ASR::symbolType::Function.");
}
if (orig_arg->m_abi == ASR::abiType::BindC) {
character_bindc = true;
}

target_type = character_type;
break;
}
case (ASR::ttypeType::Logical) :
target_type = llvm::Type::getInt1Ty(context);
break;
Expand Down
Loading
Loading