diff --git a/.gitignore b/.gitignore index 7a5601f3..bf59c699 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,8 @@ modules.order # Ignore build directory build/* + +# Ignore cscope files +cscope.out +cscope.in.out +cscope.po.out diff --git a/CMakeLists.txt b/CMakeLists.txt index 00bc81b2..800ba70a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,7 +33,12 @@ foreach (arch ${nvidia_archs}) string (CONCAT GPU_ARCHS ${GPU_ARCHS} " -gencode arch=compute_${arch},code=sm_${arch}") endforeach () -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra" CACHE INTERNAL "" FORCE) +set (convert_warnings implicit-function-declaration incompatible-pointer-types int-conversion return-type unused-function unused-variable) +set (flags "") +foreach (flag ${convert_warnings}) + string (APPEND flags " -Werror=${flag}") +endforeach() +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra ${flags}" CACHE INTERNAL "" FORCE) set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_DEBUG} -g -DDEBUG" CACHE INTERNAL "" FORCE) set (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O2" CACHE INTERNAL "" FORCE) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra" CACHE INTERNAL "" FORCE) @@ -75,13 +80,14 @@ endif () find_path (sisci_include NAMES sisci_types.h sisci_api.h sisci_error.h PATHS "${DIS}/include" "${DIS}") find_library (sisci_lib NAMES sisci PATHS "${DIS}/lib64" "${DIS}/lib" NO_DEFAULT_PATH) if (sisci_lib) - message(STATUS "Using DIS library ${sisci_lib}") + message (STATUS "Using SISCI library ${sisci_lib}") endif () # Library source files set (libnvm_root "${PROJECT_SOURCE_DIR}/src") file (GLOB libnvm_source "${libnvm_root}/*.c") file (GLOB libnvm_dis_source "${libnvm_root}/dis/*.c") +file (GLOB libnvm_linux_source "${libnvm_root}/linux/*.c") file (GLOB libnvm_include "${PROJECT_BINARY_DIR}/include/*.h" "${PROJECT_SOURCE_DIR}/include/*.h") # Module source files @@ -111,17 +117,19 @@ endif () # Create NVME library target -add_library (libnvm ${libnvm_source}) -target_include_directories (libnvm PRIVATE "${libnvm_root}" "${libnvm_root}/dis") +add_library (libnvm ${libnvm_source} ${libnvm_linux_source}) # TODO: add check for OS +target_include_directories (libnvm PRIVATE "${libnvm_root}") if (sisci_include AND sisci_lib AND NOT no_smartio) target_sources (libnvm PRIVATE ${libnvm_dis_source}) target_compile_definitions (libnvm PRIVATE _REENTRANT _SISCI __DIS_CLUSTER__) target_link_libraries (libnvm ${sisci_lib} Threads::Threads) + message (STATUS "Configuring libnvm with SmartIO") else () target_link_libraries (libnvm Threads::Threads) + message (STATUS "Configuring libnvm without SmartIO") endif () @@ -142,11 +150,11 @@ install (TARGETS libnvm DESTINATION "lib") # Create kernel module makefile if (NOT no_module) if (CUDA_FOUND AND NOT no_cuda AND EXISTS "${driver_dir}/Module.symvers") - set (module_ccflags "-D_CUDA -I${driver_include} -I${libnvm_root}") + set (module_ccflags "-D_CUDA -I${driver_include} -I${libnvm_root} -I${libnvm_root}/linux") set (module_symbols "${driver_dir}/Module.symvers") message(STATUS "Configuring kernel module with CUDA") else () - set (module_ccflags "-I${libnvm_root}") + set (module_ccflags "-I${libnvm_root} -I${libnvm_root}/linux") message(STATUS "Configuring kernel module without CUDA") endif () diff --git a/examples/identify/common.c b/examples/identify/common.c index 955c0965..f84c62d5 100644 --- a/examples/identify/common.c +++ b/examples/identify/common.c @@ -71,7 +71,7 @@ nvm_aq_ref reset_ctrl(const nvm_ctrl_t* ctrl, const nvm_dma_t* dma_window) { return NULL; } - memset(dma_window->vaddr, 0, dma_window->page_size * 2); + memset((void*) dma_window->vaddr, 0, dma_window->page_size * 2); fprintf(stderr, "Resetting controller and setting up admin queues...\n"); status = nvm_aq_create(&admin, ctrl, dma_window); diff --git a/examples/identify/userspace.c b/examples/identify/userspace.c index 5909d697..130dfb08 100644 --- a/examples/identify/userspace.c +++ b/examples/identify/userspace.c @@ -114,7 +114,7 @@ static int identify(const nvm_ctrl_t* ctrl, uint32_t nvm_ns_id) goto out; } - status = nvm_dma_map(&window, ctrl, memory, page_size, 3, ioaddrs); + status = nvm_raw_dma_map(&window, ctrl, memory, page_size, 3, ioaddrs); if (status != 0) { fprintf(stderr, "Failed to create DMA window: %s\n", strerror(status)); diff --git a/include/nvm_admin.h b/include/nvm_admin.h index 5f93782c..257fc266 100644 --- a/include/nvm_admin.h +++ b/include/nvm_admin.h @@ -53,13 +53,14 @@ int nvm_admin_request_num_queues(nvm_aq_ref ref, uint16_t* n_cqs, uint16_t* n_sq /* * Create IO completion queue (CQ) * Caller must set queue memory to zero manually. + * If n_pages > 1, DMA memory must be contiguous. */ -int nvm_admin_cq_create(nvm_aq_ref ref, // AQ pair reference - nvm_queue_t* cq, // CQ descriptor - uint16_t id, // Queue identifier - void* qmem, // Queue memory (virtual memory) - uint64_t ioaddr); // Bus address to queue memory as seen by controller - +int nvm_admin_cq_create(nvm_aq_ref ref, // AQ pair reference + nvm_queue_t* cq, // CQ descriptor + uint16_t id, // Queue identifier + const nvm_dma_t* dma, // Queue memory handle + size_t page_offset, // Number of pages to offset into the handle + size_t n_pages); // Number of pages to use /* * Delete IO completion queue (CQ) @@ -73,13 +74,15 @@ int nvm_admin_cq_delete(nvm_aq_ref ref, nvm_queue_t* cq); /* * Create IO submission queue (SQ) * Caller must set queue memory to zero manually. + * If n_pages > 1, DMA memory must be contiguous. */ -int nvm_admin_sq_create(nvm_aq_ref ref, // AQ pair reference - nvm_queue_t* sq, // SQ descriptor - const nvm_queue_t* cq, // Descriptor to paired CQ - uint16_t id, // Queue identifier - void* qmem, // Queue memory (virtual) - uint64_t ioaddr); // Bus address to queue as seen by controller +int nvm_admin_sq_create(nvm_aq_ref ref, // AQ pair reference + nvm_queue_t* sq, // SQ descriptor + const nvm_queue_t* cq, // Descriptor to paired CQ + uint16_t id, // Queue identifier + const nvm_dma_t* dma, // Queue memory handle + size_t page_offset, // Number of pages to offset into the handle + size_t n_pages); // Number of pages to use @@ -92,6 +95,16 @@ int nvm_admin_sq_delete(nvm_aq_ref ref, const nvm_queue_t* cq); +/* + * Get log page. + */ +int nvm_admin_get_log_page(nvm_aq_ref ref, + uint32_t ns_id, + void* ptr, + uint64_t ioaddr, + uint8_t log_id, + uint64_t log_offset); + #ifdef __cplusplus } #endif diff --git a/include/nvm_aq.h b/include/nvm_aq.h index bc2ff0ef..6d4e4521 100644 --- a/include/nvm_aq.h +++ b/include/nvm_aq.h @@ -19,8 +19,9 @@ extern "C" { * * Returns a reference handle that can be used for admin RPC calls. */ -int nvm_aq_create(nvm_aq_ref* ref, const nvm_ctrl_t* ctrl, const nvm_dma_t* aq_mem); - +int nvm_aq_create(nvm_aq_ref* ref, + const nvm_ctrl_t* ctrl, + const nvm_dma_t* dma_window); /* @@ -44,6 +45,7 @@ void nvm_aq_destroy(nvm_aq_ref ref); #ifdef __DIS_CLUSTER__ + /* * Maximum number of adapters. */ diff --git a/include/nvm_cmd.h b/include/nvm_cmd.h index dceb4d0e..80ff7121 100644 --- a/include/nvm_cmd.h +++ b/include/nvm_cmd.h @@ -10,20 +10,22 @@ #include #include #include +#include /* All namespaces identifier */ #define NVM_CMD_NS_ALL 0xffffffff +#define NVM_CMD_OPCODE(gen, fun, data) (_WB((gen), 7, 7) | _WB((fun), 6, 2) | _WB((data), 1, 0)) /* List of NVM IO command opcodes */ enum nvm_io_command_set { - NVM_IO_FLUSH = (0x00 << 7) | (0x00 << 2) | 0x00, // 00h - NVM_IO_WRITE = (0x00 << 7) | (0x00 << 2) | 0x01, // 01h - NVM_IO_READ = (0x00 << 7) | (0x00 << 2) | 0x02, // 02h - NVM_IO_WRITE_ZEROES = (0x00 << 7) | (0x02 << 2) | 0x00 // 08h + NVM_IO_FLUSH = NVM_CMD_OPCODE(0, 0, 0), // 00h + NVM_IO_WRITE = NVM_CMD_OPCODE(0, 0, 1), // 01h + NVM_IO_READ = NVM_CMD_OPCODE(0, 0, 2), // 02h + NVM_IO_WRITE_ZEROES = NVM_CMD_OPCODE(0, 2, 0) // 08h }; @@ -31,21 +33,19 @@ enum nvm_io_command_set /* List of NVM admin command opcodes */ enum nvm_admin_command_set { - NVM_ADMIN_DELETE_SUBMISSION_QUEUE = (0x00 << 7) | (0x00 << 2) | 0x00, - NVM_ADMIN_CREATE_SUBMISSION_QUEUE = (0x00 << 7) | (0x00 << 2) | 0x01, - NVM_ADMIN_DELETE_COMPLETION_QUEUE = (0x00 << 7) | (0x01 << 2) | 0x00, - NVM_ADMIN_CREATE_COMPLETION_QUEUE = (0x00 << 7) | (0x01 << 2) | 0x01, - NVM_ADMIN_IDENTIFY = (0x00 << 7) | (0x01 << 2) | 0x02, - NVM_ADMIN_GET_LOG_PAGE = (0x00 << 7) | (0x00 << 2) | 0x02, - NVM_ADMIN_ABORT = (0x00 << 7) | (0x02 << 2) | 0x00, - NVM_ADMIN_SET_FEATURES = (0x00 << 7) | (0x02 << 2) | 0x01, - NVM_ADMIN_GET_FEATURES = (0x00 << 7) | (0x02 << 2) | 0x02 + NVM_ADMIN_DELETE_SQ = NVM_CMD_OPCODE(0, 0, 0), // 00h + NVM_ADMIN_CREATE_SQ = NVM_CMD_OPCODE(0, 0, 1), // 01h + NVM_ADMIN_GET_LOG_PAGE = NVM_CMD_OPCODE(0, 0, 2), // 02h + NVM_ADMIN_DELETE_CQ = NVM_CMD_OPCODE(0, 1, 0), // 04h + NVM_ADMIN_CREATE_CQ = NVM_CMD_OPCODE(0, 1, 1), // 05h + NVM_ADMIN_IDENTIFY = NVM_CMD_OPCODE(0, 1, 2), // 06h + NVM_ADMIN_ABORT = NVM_CMD_OPCODE(0, 2, 0), // 08h + NVM_ADMIN_SET_FEATURES = NVM_CMD_OPCODE(0, 2, 1), // 09h + NVM_ADMIN_GET_FEATURES = NVM_CMD_OPCODE(0, 2, 2) // 0Ah }; - - /* * Set command's DWORD0 and DWORD1 */ @@ -100,50 +100,90 @@ void nvm_cmd_rw_blks(nvm_cmd_t* cmd, uint64_t start_lba, uint16_t n_blks) /* - * Build a PRP list consisting of PRP entries. + * Build PRP list consisting of PRP entries. * * Populate a memory page with PRP entries required for a transfer. - * Returns the number of PRP entries used. Number of pages should - * always be max_data_size (MDTS) for IO commands. + * If the number of pages exceed the available number of entries, the last + * entry will not be used (so that it can be used to point to the next list). + * (See Chapter 4.4, Figure 14 in the NVMe specification). * - * Note: currently, PRP lists can only be a single page + * Returns the number of PRP entries used. */ __host__ __device__ static inline -size_t nvm_prp_list(size_t page_size, size_t n_pages, void* list_ptr, const uint64_t* data_ioaddrs) +size_t nvm_prp_list(const nvm_prp_list_t* list, size_t n_pages, const uint64_t* ioaddrs) { - // TODO #ifdef __NO_COHERENCE__, make a nvm_prp_list_far variant that does not call nvm_cache_flush() - size_t prps_per_page = page_size / sizeof(uint64_t); + size_t n_prps = list->page_size / sizeof(uint64_t); size_t i_prp; - uint64_t* list; - - if (prps_per_page < n_pages) + volatile uint64_t* entries = (volatile uint64_t*) list->vaddr; + + // Do we need to reserve the last entry for the next list? + if (n_pages > n_prps) { - n_pages = prps_per_page; + --n_prps; } - - list = (uint64_t*) list_ptr; - for (i_prp = 0; i_prp < n_pages; ++i_prp) + else + { + n_prps = n_pages; + } + + // Populate list + for (i_prp = 0; i_prp < n_prps; ++i_prp) { - list[i_prp] = data_ioaddrs[i_prp]; + entries[i_prp] = ioaddrs[i_prp]; } - nvm_cache_flush(list_ptr, sizeof(uint64_t) * i_prp); + // Flush list cache + if (list->local) + { + nvm_cache_flush((void*) list->vaddr, sizeof(uint64_t) * i_prps); + } + else + { + nvm_wcb_flush(); + } return i_prp; } +/* + * Build chain of PRP lists. + * Returns the total number of PRP entries. + */ +__host__ __device__ static inline +size_t nvm_prp_list_chain(size_t n_lists, const nvm_prp_list_t* lists, size_t n_pages, const uint64_t* ioaddrs) +{ + size_t i_list; + size_t list_prps; + size_t n_prps; + + if (n_lists == 0 || lists == NULL) + { + return 0; + } + + list_prps = nvm_prp_list(&lists[0], n_pages, ioaddrs); + + for (i_list = 1, n_prps = list_prps; i_list < n_lists && n_prps < n_pages; ++i_list, n_prps += list_prps) + { + volatile uint64_t* next_list_ptr = ((volatile uint64_t*) lists[i_list - 1].vaddr) + list_prps; + *next_list_ptr = lists[i_list].ioaddr; + + list_prps = nvm_prp_list(&lists[i_list], n_pages - n_prps, &ioaddrs[n_prps]); + } + + return n_prps; +} + + + /* * Helper function to build a PRP list and set a command's data pointer fields. + * Number of pages should always be max_data_size (MDTS) for IO commands. */ __host__ __device__ static inline -size_t nvm_cmd_data(nvm_cmd_t* cmd, - size_t page_size, - size_t n_pages, - void* list_ptr, - uint64_t list_ioaddr, - const uint64_t* data_ioaddrs) +size_t nvm_cmd_data(nvm_cmd_t* cmd, size_t n_lists, const nvm_prp_list_t* lists, size_t n_pages, const uint64_t* ioaddrs) { size_t prp = 0; uint64_t dptr0 = 0; @@ -155,17 +195,21 @@ size_t nvm_cmd_data(nvm_cmd_t* cmd, return 0; } #endif + if (lists == NULL) + { + n_lists = 0; + } - dptr0 = data_ioaddrs[prp++]; + dptr0 = ioaddrs[prp++]; - if (n_pages > 2 && list_ptr != NULL) + if (n_pages > 2 && n_lists != 0) { - dptr1 = list_ioaddr; - prp += nvm_prp_list(page_size, n_pages - 1, list_ptr, &data_ioaddrs[prp]); + prp += nvm_prp_list_chain(n_lists, lists, n_pages - 1, &ioaddrs[prp]); + dptr1 = lists[0].ioaddr; } else if (n_pages >= 2) { - dptr1 = data_ioaddrs[prp++]; + dptr1 = ioaddrs[prp++]; } nvm_cmd_data_ptr(cmd, dptr0, dptr1); diff --git a/include/nvm_ctrl.h b/include/nvm_ctrl.h index 8bbdb4de..acf6c0c4 100644 --- a/include/nvm_ctrl.h +++ b/include/nvm_ctrl.h @@ -7,6 +7,7 @@ extern "C" { #include #include #include +#include #ifdef __DIS_CLUSTER__ #include @@ -82,19 +83,19 @@ int nvm_raw_ctrl_reset(const nvm_ctrl_t* ctrl, uint64_t acq_ioaddr, uint64_t asq * Read from device registers and initialize controller handle. * This function should be used when SmartIO is being used. */ -int nvm_dis_ctrl_init(nvm_ctrl_t** ctrl, uint64_t smartio_dev_id, uint32_t dis_adapter); +int nvm_dis_ctrl_init(nvm_ctrl_t** ctrl, uint32_t smartio_fdid); #endif #ifdef __DIS_CLUSTER__ -int nvm_dis_ctrl_map_p2p_device(const nvm_ctrl_t* ctrl, sci_device_t dev, uint64_t* ioaddr); +int nvm_dis_ctrl_map_p2p_device(const nvm_ctrl_t* ctrl, sci_smartio_device_t dev, uint64_t* ioaddr); #endif #ifdef __DIS_CLUSTER__ -void nvm_dis_ctrl_unmap_p2p_device(const nvm_ctrl_t* ctrl, sci_device_t dev); +void nvm_dis_ctrl_unmap_p2p_device(const nvm_ctrl_t* ctrl, sci_smartio_device_t dev); #endif diff --git a/include/nvm_dma.h b/include/nvm_dma.h index e872ddc8..9c823dc5 100644 --- a/include/nvm_dma.h +++ b/include/nvm_dma.h @@ -31,12 +31,20 @@ extern "C" { * * Note: vaddr can be NULL. */ -int nvm_dma_map(nvm_dma_t** map, // Mapping descriptor reference - const nvm_ctrl_t* ctrl, // NVM controller reference - void* vaddr, // Pointer to userspace memory (can be NULL if not required) - size_t page_size, // Physical page size - size_t n_pages, // Number of pages to map - const uint64_t* page_addrs); // List of physical/bus addresses to the pages +int nvm_raw_dma_map(nvm_dma_t** map, // Mapping descriptor reference + const nvm_ctrl_t* ctrl, // NVM controller reference + void* vaddr, // Pointer to userspace memory (can be NULL if not required) + size_t page_size, // Physical page size + size_t n_pages, // Number of pages to map + const uint64_t* page_addrs); // List of physical/bus addresses to the pages + + + +/* + * Create DMA mapping descriptor using offsets from a previously + * created DMA descriptor. + */ +int nvm_dma_remap(nvm_dma_t** new_map, const nvm_dma_t* other_map); @@ -93,13 +101,11 @@ int nvm_dma_map_device(nvm_dma_t** map, const nvm_ctrl_t* ctrl, void* devptr, si * * The controller handle must have been created using SmartIO, and * the segment must already be prepared on the local adapter. - * - * Note: vaddr can be NULL. */ int nvm_dis_dma_map_local(nvm_dma_t** map, // Mapping descriptor reference const nvm_ctrl_t* ctrl, // NVM controller handle - uint32_t dis_adapter, // Local DIS adapter sci_local_segment_t segment, // Local segment descriptor + uint32_t dis_adapter, // Local DIS adapter segment is prepared on bool map_vaddr); // Should function also map segment into local space #endif /* __DIS_CLUSTER__ */ @@ -117,7 +123,8 @@ int nvm_dis_dma_map_local(nvm_dma_t** map, // Mapping descriptor re * * The remote segment must already be connected. * - * Note: vaddr can be NULL. + * Note: You should generally prefer write combining, except + * for mapped device registers that require fine-grained writes. */ int nvm_dis_dma_map_remote(nvm_dma_t** map, // Mapping descriptor reference const nvm_ctrl_t* ctrl, // NVM controller handle @@ -144,32 +151,31 @@ int nvm_dma_create(nvm_dma_t** map, #if defined( __DIS_CLUSTER__ ) - /* - * Create segment and map it for the controller. - * Short-hand function for creating a local segment. + * Create device memory segment and map it for the controller. + * Short-hand function for creating a device memory segment. + * If mem_hints is 0, the API will assume that the local CPU + * will read from the segment (and thus create it locally). */ int nvm_dis_dma_create(nvm_dma_t** map, const nvm_ctrl_t* ctrl, - uint32_t dis_adapter, uint32_t id, - size_t size); + size_t size, + bool shared, + unsigned int mem_hints); #endif /* __DIS_CLUSTER__ */ #if defined( __DIS_CLUSTER__ ) - /* * Connect to device memory. * Short-hand function for connecting to device memory. */ int nvm_dis_dma_connect(nvm_dma_t** map, const nvm_ctrl_t* ctrl, - uint32_t dis_adapter, - uint32_t segment_no, - size_t size, + uint32_t id, bool shared); #endif /* __DIS_CLUSTER__ */ @@ -180,11 +186,11 @@ int nvm_dis_dma_connect(nvm_dma_t** map, /* * Note: This function requires the IOMMU to be enabled. + * Currently not implemented. */ int nvm_dis_dma_map_host(nvm_dma_t** map, const nvm_ctrl_t* ctrl, uint32_t dis_adapter, - uint32_t id, void* vaddr, size_t size); @@ -196,7 +202,6 @@ int nvm_dis_dma_map_host(nvm_dma_t** map, int nvm_dis_dma_map_device(nvm_dma_t** map, const nvm_ctrl_t* ctrl, uint32_t dis_adapter, - uint32_t id, void* devptr, size_t size); diff --git a/include/nvm_queue.h b/include/nvm_queue.h index 57e61689..b388c936 100644 --- a/include/nvm_queue.h +++ b/include/nvm_queue.h @@ -10,6 +10,7 @@ #include #include #include +#include /* @@ -28,13 +29,32 @@ void nvm_queue_clear(nvm_queue_t* q, // NVM queue descriptor const nvm_ctrl_t* ctrl, // NVM controller handle bool cq, // Is this a completion queue or submission queue? uint16_t no, // Queue number - void* vaddr, // Virtual address to queue memory + uint16_t qs, // Queue size (number of entries) + bool local, // Is this local or remote memory + volatile void* vaddr, // Virtual address to queue memory uint64_t ioaddr); // Bus address to queue memory (as seen from the controller) #ifdef __cplusplus } #endif +/* + * Reset queue descriptor and set all members to initial state. + * + * Note: this function should not be used if the queue has been created but + * not yet deleted, as it will lead to inconsistent state for the + * controller. + */ +#ifdef __cplusplus +extern "C" { +#endif +__host__ +void nvm_queue_reset(nvm_queue_t* q); +#ifdef __cplusplus +} +#endif + + /* * Enqueue a submission command. @@ -47,23 +67,22 @@ void nvm_queue_clear(nvm_queue_t* q, // NVM queue descriptor __host__ __device__ static inline nvm_cmd_t* nvm_sq_enqueue(nvm_queue_t* sq) { - // Check if queue is full - if ((uint16_t) ((sq->tail - sq->head) % sq->max_entries) == sq->max_entries - 1) + // Check if queue has available slots + if (((uint16_t) (sq->tail - sq->head)) < sq->qs) { - return NULL; - } + // Take slot and end of queue + nvm_cmd_t* cmd = (nvm_cmd_t*) (((unsigned char*) sq->vaddr) + sq->es * sq->tail % sq->qs); - // Take slot and end of queue - nvm_cmd_t* cmd = (nvm_cmd_t*) (((unsigned char*) sq->vaddr) + sq->entry_size * sq->tail); + // Increase tail pointer and invert phase tag if necessary + if (++sq->tail % sq->qs == 0) + { + sq->phase = !sq->phase; + } - // Increase tail pointer and wrap around if necessary - if (++sq->tail == sq->max_entries) - { - sq->phase = !sq->phase; - sq->tail = 0; + return cmd; } - return cmd; + return NULL; } @@ -80,16 +99,18 @@ __device__ static inline nvm_cmd_t* nvm_sq_enqueue_n(nvm_queue_t* sq, nvm_cmd_t* last, uint16_t n, uint16_t i) { unsigned char* start = (unsigned char*) sq->vaddr; - unsigned char* end = start + (sq->max_entries * sq->entry_size); + unsigned char* end = start + (sq->qs * sq->es); nvm_cmd_t* cmd = NULL; + //if (sq->tail + + if (last == NULL) { - cmd = (nvm_cmd_t*) (start + sq->entry_size * i); + cmd = (nvm_cmd_t*) (start + sq->es * i); } else { - cmd = (nvm_cmd_t*) (((unsigned char*) last) + n * sq->entry_size); + cmd = (nvm_cmd_t*) (((unsigned char*) last) + n * sq->es); if (((nvm_cmd_t*) end) <= cmd) { @@ -99,7 +120,7 @@ nvm_cmd_t* nvm_sq_enqueue_n(nvm_queue_t* sq, nvm_cmd_t* last, uint16_t n, uint16 if (i == 0) { - sq->tail = (((uint32_t) sq->tail) + ((uint32_t) n)) % sq->max_entries; + sq->tail = (((uint32_t) sq->tail) + ((uint32_t) n)) % sq->qs; } //#ifdef __CUDA_ARCH__ @@ -124,9 +145,12 @@ nvm_cmd_t* nvm_sq_enqueue_n(nvm_queue_t* sq, nvm_cmd_t* last, uint16_t n, uint16 __host__ __device__ static inline nvm_cpl_t* nvm_cq_poll(const nvm_queue_t* cq) { - nvm_cpl_t* cpl = (nvm_cpl_t*) (((unsigned char*) cq->vaddr) + cq->entry_size * cq->head); + nvm_cpl_t* cpl = (nvm_cpl_t*) (((unsigned char*) cq->vaddr) + cq->es * (cq->head % cq->qs)); - nvm_cache_invalidate((void*) cpl, sizeof(nvm_cpl_t)); + if (cq->local) + { + nvm_cache_invalidate((void*) cpl, sizeof(nvm_cpl_t)); + } // Check if new completion is ready by checking the phase tag if (!!_RB(*NVM_CPL_STATUS(cpl), 0, 0) != cq->phase) @@ -156,10 +180,9 @@ nvm_cpl_t* nvm_cq_dequeue(nvm_queue_t* cq) if (cpl != NULL) { - // Increase head pointer and wrap around if necessary - if (++cq->head == cq->max_entries) + // Increase head pointer and invert phase tag + if (++cq->head % cq->qs == 0) { - cq->head = 0; cq->phase = !cq->phase; } } @@ -200,13 +223,29 @@ nvm_cpl_t* nvm_cq_dequeue_block(nvm_queue_t* cq, uint64_t timeout); __host__ __device__ static inline void nvm_sq_submit(nvm_queue_t* sq) { - if (sq->last != sq->tail && sq->db != NULL) + const uint16_t t = sq->tail % sq->qs; + + if (sq->last != t && sq->db != NULL) { - nvm_cache_flush((void*) sq->vaddr, sizeof(nvm_cmd_t) * sq->max_entries); - nvm_wcb_flush(); + if (sq->local) + { + if (t < sq->last) + { + nvm_cache_flush((void*) (sq->vaddr + sq->es * sq->last), sq->es * (sq->qs - sq->last)); + nvm_cache_flush((void*) sq->vaddr, sq->es * sq->last); + } + else + { + nvm_cache_flush((void*) (sq->vaddr + sq->es * sq->last), sq->es * (sq->tail - sq->last)); + } + } + else + { + nvm_wcb_flush(); + } - *((volatile uint32_t*) sq->db) = sq->tail; - sq->last = sq->tail; + *((volatile uint32_t*) sq->db) = t; + sq->last = t; } } @@ -219,9 +258,9 @@ __host__ __device__ static inline void nvm_sq_update(nvm_queue_t* sq) { // Update head pointer of submission queue - if (sq->db != NULL && ++sq->head == sq->max_entries) + if (sq->db != NULL) { - sq->head = 0; + ++sq->head; } } @@ -237,11 +276,11 @@ void nvm_sq_update(nvm_queue_t* sq) __host__ __device__ static inline void nvm_cq_update(nvm_queue_t* cq) { - if (cq->last != cq->head && cq->db != NULL) + if (cq->last != cq->head % cq->qs && cq->db != NULL) { - *((volatile uint32_t*) cq->db) = cq->head; - cq->last = cq->head; - nvm_wcb_flush(); + cq->last = cq->head % cq->qs; + *((volatile uint32_t*) cq->db) = cq->last; + cq->tail = cq->head; } } diff --git a/include/nvm_types.h b/include/nvm_types.h index ded69f85..af03892c 100644 --- a/include/nvm_types.h +++ b/include/nvm_types.h @@ -15,6 +15,9 @@ extern "C" { /* * NVM controller handle. + * + * Note: This structure will be allocated by the API and needs to be + * released by the API. */ typedef struct { @@ -36,6 +39,9 @@ typedef struct * setting administration queues, this structure represents a remote handle to * that process. It is used as a descriptor for executing RPC calls to the * remote process owning the admin queues. + * + * Note: This structure will be allocated by the API and needs to be released + * by the API. */ struct nvm_admin_reference; typedef struct nvm_admin_reference* nvm_aq_ref; @@ -55,10 +61,15 @@ typedef struct nvm_admin_reference* nvm_aq_ref; * instantiate members. * * Note: Only page-aligned addresses are supported in NVM Express + * + * Note: This structure will be allocated by the API and needs to be released + * by the API. */ typedef struct __align__(32) { - void* vaddr; // Virtual address to start of region (NB! can be NULL) + volatile void* vaddr; // Virtual address to start of region (NB! can be NULL) + int8_t local; // Is this local memory + int8_t contiguous; // Is memory contiguous size_t page_size; // Controller's page size (MPS) size_t n_ioaddrs; // Number of MPS-sized pages uint64_t ioaddrs[]; // Physical/IO addresses of the memory pages @@ -79,22 +90,33 @@ typedef struct __align__(32) typedef struct __align__(64) { uint16_t no; // Queue number (must be unique per SQ/CQ pair) - uint16_t max_entries; // Maximum number of queue entries supported - uint16_t entry_size; // Queue entry size - uint32_t head; // Queue's head pointer - uint32_t tail; // Queue's tail pointer - // TODO: Create bitfield for phase, add a remote field indicating - // if queue is far memory nor not, in which case we whould NOT do - // cache operations - int16_t phase; // Current phase bit + uint16_t es; // Queue entry size + uint16_t qs; // Queue size (number of entries) + uint16_t head; // Queue's head pointer + uint16_t tail; // Queue's tail pointer + int8_t phase; // Current phase tag + int8_t local; // Is the queue allocated in local memory uint32_t last; // Used internally to check db writes volatile uint32_t* db; // Pointer to doorbell register (NB! write only) volatile void* vaddr; // Virtual address to start of queue memory - uint64_t ioaddr; // Physical/IO address of the memory page + uint64_t ioaddr; // Physical/IO address to start of queue memory } __attribute__((aligned (64))) nvm_queue_t; +/* + * Convenience type for representing a single-page PRP list. + */ +typedef struct __align__(32) +{ + volatile void* vaddr; // Virtual address to memory page + int16_t local; // Indicates if the page is local memory + size_t page_size; // Page size + uint64_t ioaddr; // Physical/IO address of memory page +} __attribute__((aligned (32))) nvm_prp_list_t; + + + /* * NVM completion queue entry type (16 bytes) */ diff --git a/include/nvm_util.h b/include/nvm_util.h index 9dd6d707..555f1f2b 100644 --- a/include/nvm_util.h +++ b/include/nvm_util.h @@ -12,22 +12,12 @@ -/* Convenience function for creating a bit mask */ -#ifdef __CUDACC__ -__host__ __device__ -#endif -static inline -uint64_t _nvm_bitmask(int hi, int lo) -{ - uint64_t mask = 0; - - for (int i = lo; i <= hi; ++i) - { - mask |= 1UL << i; - } +/* Convenience macro for creating a bit mask */ +#define _NVM_MASK(num_bits) \ + ((1ULL << (num_bits)) - 1) - return mask; -} +#define _NVM_MASK_PART(hi, lo) \ + (_NVM_MASK((hi) + 1) - _NVM_MASK(lo)) #if defined( __NO_COHERENCE__ ) && defined( __DIS_CLUSTER__ ) @@ -91,12 +81,12 @@ void _nvm_wcb_flush() /* Extract specific bits */ #define _RB(v, hi, lo) \ - ( ( (v) & _nvm_bitmask((hi), (lo)) ) >> (lo) ) + ( ( (v) & _NVM_MASK_PART((hi), (lo)) ) >> (lo) ) /* Set specifics bits */ #define _WB(v, hi, lo) \ - ( ( (v) << (lo) ) & _nvm_bitmask((hi), (lo)) ) + ( ( (v) << (lo) ) & _NVM_MASK_PART((hi), (lo)) ) /* Offset to a register */ @@ -104,6 +94,7 @@ void _nvm_wcb_flush() ((volatile uint##bits##_t *) (((volatile unsigned char*) ((volatile void*) (p))) + (offs))) + /* * Calculate block number from page number. */ @@ -183,6 +174,15 @@ void _nvm_wcb_flush() NVM_PTR_OFFSET((dma_ptr)->vaddr, (dma_ptr)->page_size, (pageno)) +/* Make PRP list descriptor from values */ +#define NVM_PRP_LIST(vaddr, page_size, local, ioaddr) \ + {(vaddr), !!(local), (page_size), (ioaddr)} + + +/* Make PRP list descriptor from DMA descriptor */ +#define NVM_PRP_LIST_FROM_DMA(dma_ptr, offset) \ + NVM_PRP_LIST(NVM_DMA_OFFSET(dma_ptr, offset), (dma-ptr)->local, (dma_ptr)->page_size, (dma_ptr)->ioaddrs[(offset)]) + /* Standard fields in a command */ @@ -198,7 +198,7 @@ void _nvm_wcb_flush() /* Convenience macro for creating a default CID based on submission queue */ -#define NVM_DEFAULT_CID(sq) ((sq)->tail + (!(sq)->phase) * (sq)->max_entries) +#define NVM_DEFAULT_CID(sq) ((sq)->tail + (!(sq)->phase) * (sq)->qs) #ifdef __cplusplus @@ -213,4 +213,16 @@ const nvm_ctrl_t* nvm_ctrl_from_aq_ref(nvm_aq_ref ref); #endif + +#ifdef __cplusplus +extern "C" { +#endif +/* + * Get controller associated with DMA window + */ +const nvm_ctrl_t* nvm_ctrl_from_dma(const nvm_dma_t* dma); +#ifdef __cplusplus +} +#endif + #endif /* __NVM_UTIL_H__ */ diff --git a/module/pci.c b/module/pci.c index 8920e4ce..1566c16c 100644 --- a/module/pci.c +++ b/module/pci.c @@ -21,7 +21,7 @@ #define PCI_CLASS_NVME_MASK 0xffffff -MODULE_AUTHOR("Jonas Markussen "); +MODULE_AUTHOR("Jonas Markussen "); MODULE_DESCRIPTION("Set up DMA mappings for userspace buffers"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION("0.3"); diff --git a/src/admin.c b/src/admin.c index e9469d0e..e204fd9c 100644 --- a/src/admin.c +++ b/src/admin.c @@ -10,7 +10,6 @@ #include #include #include -#include "admin.h" #include "rpc.h" #include "regs.h" #include "util.h" @@ -18,45 +17,45 @@ -void _nvm_admin_cq_create(nvm_cmd_t* cmd, const nvm_queue_t* cq) +static void admin_cq_create(nvm_cmd_t* cmd, const nvm_queue_t* cq, uint64_t ioaddr) { - nvm_cmd_header(cmd, 0, NVM_ADMIN_CREATE_COMPLETION_QUEUE, 0); - nvm_cmd_data_ptr(cmd, cq->ioaddr, 0); + nvm_cmd_header(cmd, 0, NVM_ADMIN_CREATE_CQ, 0); + nvm_cmd_data_ptr(cmd, ioaddr, 0); - cmd->dword[10] = (((uint32_t) cq->max_entries - 1) << 16) | cq->no; + cmd->dword[10] = (((uint32_t) cq->qs - 1) << 16) | cq->no; cmd->dword[11] = (0x0000 << 16) | (0x00 << 1) | 0x01; } -void _nvm_admin_cq_delete(nvm_cmd_t* cmd, const nvm_queue_t* cq) +static void admin_cq_delete(nvm_cmd_t* cmd, const nvm_queue_t* cq) { - nvm_cmd_header(cmd, 0, NVM_ADMIN_DELETE_COMPLETION_QUEUE, 0); + nvm_cmd_header(cmd, 0, NVM_ADMIN_DELETE_CQ, 0); cmd->dword[10] = cq->no & 0xffff; } -void _nvm_admin_sq_create(nvm_cmd_t* cmd, const nvm_queue_t* sq, const nvm_queue_t* cq) +static void admin_sq_create(nvm_cmd_t* cmd, const nvm_queue_t* sq, const nvm_queue_t* cq, uint64_t ioaddr) { - nvm_cmd_header(cmd, 0, NVM_ADMIN_CREATE_SUBMISSION_QUEUE, 0); - nvm_cmd_data_ptr(cmd, sq->ioaddr, 0); + nvm_cmd_header(cmd, 0, NVM_ADMIN_CREATE_SQ, 0); + nvm_cmd_data_ptr(cmd, ioaddr, 0); - cmd->dword[10] = (((uint32_t) sq->max_entries - 1) << 16) | sq->no; + cmd->dword[10] = (((uint32_t) sq->qs - 1) << 16) | sq->no; cmd->dword[11] = (((uint32_t) cq->no) << 16) | (0x00 << 1) | 0x01; } -void _nvm_admin_sq_delete(nvm_cmd_t* cmd, const nvm_queue_t* sq) +static void admin_sq_delete(nvm_cmd_t* cmd, const nvm_queue_t* sq) { - nvm_cmd_header(cmd, 0, NVM_ADMIN_DELETE_SUBMISSION_QUEUE, 0); + nvm_cmd_header(cmd, 0, NVM_ADMIN_DELETE_SQ, 0); cmd->dword[10] = sq->no & 0xffff; } -void _nvm_admin_current_num_queues(nvm_cmd_t* cmd, bool set, uint16_t n_cqs, uint16_t n_sqs) +static void admin_current_num_queues(nvm_cmd_t* cmd, bool set, uint16_t n_cqs, uint16_t n_sqs) { nvm_cmd_header(cmd, 0, set ? NVM_ADMIN_SET_FEATURES : NVM_ADMIN_GET_FEATURES, 0); nvm_cmd_data_ptr(cmd, 0, 0); @@ -67,7 +66,7 @@ void _nvm_admin_current_num_queues(nvm_cmd_t* cmd, bool set, uint16_t n_cqs, uin -void _nvm_admin_identify_ctrl(nvm_cmd_t* cmd, uint64_t ioaddr) +static void admin_identify_ctrl(nvm_cmd_t* cmd, uint64_t ioaddr) { nvm_cmd_header(cmd, 0, NVM_ADMIN_IDENTIFY, 0); nvm_cmd_data_ptr(cmd, ioaddr, 0); @@ -78,7 +77,7 @@ void _nvm_admin_identify_ctrl(nvm_cmd_t* cmd, uint64_t ioaddr) -void _nvm_admin_identify_ns(nvm_cmd_t* cmd, uint32_t ns_id, uint64_t ioaddr) +static void admin_identify_ns(nvm_cmd_t* cmd, uint32_t ns_id, uint64_t ioaddr) { nvm_cmd_header(cmd, 0, NVM_ADMIN_IDENTIFY, ns_id); nvm_cmd_data_ptr(cmd, ioaddr, 0); @@ -89,7 +88,7 @@ void _nvm_admin_identify_ns(nvm_cmd_t* cmd, uint32_t ns_id, uint64_t ioaddr) -void _nvm_admin_get_log_page(nvm_cmd_t* cmd, uint32_t ns_id, uint64_t ioaddr, uint8_t log_id, uint64_t log_offset) +static void admin_get_log_page(nvm_cmd_t* cmd, uint32_t ns_id, uint64_t ioaddr, uint8_t log_id, uint64_t log_offset) { nvm_cmd_header(cmd, 0, NVM_ADMIN_GET_LOG_PAGE, ns_id); nvm_cmd_data_ptr(cmd, ioaddr, 0); @@ -128,7 +127,7 @@ int nvm_admin_ctrl_info(nvm_aq_ref ref, struct nvm_ctrl_info* info, void* ptr, u info->contiguous = !!CAP$CQR(ctrl->mm_ptr); info->max_entries = ctrl->max_entries; - _nvm_admin_identify_ctrl(&command, ioaddr); + admin_identify_ctrl(&command, ioaddr); int err = nvm_raw_rpc(ref, &command, &completion); if (!nvm_ok(err)) @@ -176,7 +175,7 @@ int nvm_admin_ns_info(nvm_aq_ref ref, struct nvm_ns_info* info, uint32_t ns_id, info->ns_id = ns_id; - _nvm_admin_identify_ns(&command, ns_id, ioaddr); + admin_identify_ns(&command, ns_id, ioaddr); int err = nvm_raw_rpc(ref, &command, &completion); if (!nvm_ok(err)) @@ -201,6 +200,8 @@ int nvm_admin_ns_info(nvm_aq_ref ref, struct nvm_ns_info* info, uint32_t ns_id, return NVM_ERR_PACK(NULL, 0); } + + int nvm_admin_get_log_page(nvm_aq_ref ref, uint32_t ns_id, void* ptr, uint64_t ioaddr, uint8_t log_id, uint64_t log_offset) { nvm_cmd_t command; @@ -212,7 +213,7 @@ int nvm_admin_get_log_page(nvm_aq_ref ref, uint32_t ns_id, void* ptr, uint64_t i nvm_cache_invalidate(ptr, 0x1000); - _nvm_admin_get_log_page(&command, ns_id, ioaddr, log_id, log_offset); + admin_get_log_page(&command, ns_id, ioaddr, log_id, log_offset); int err = nvm_raw_rpc(ref, &command, &completion); if (!nvm_ok(err)) @@ -228,24 +229,53 @@ int nvm_admin_get_log_page(nvm_aq_ref ref, uint32_t ns_id, void* ptr, uint64_t i -int nvm_admin_cq_create(nvm_aq_ref ref, nvm_queue_t* cq, uint16_t id, void* vaddr, uint64_t ioaddr) +int nvm_admin_cq_create(nvm_aq_ref ref, nvm_queue_t* cq, uint16_t id, const nvm_dma_t* dma, size_t offset, size_t n_pages) { nvm_cmd_t command; nvm_cpl_t completion; nvm_queue_t queue; + const size_t cpls_per_page = dma->page_size / sizeof(nvm_cpl_t); - const nvm_ctrl_t* ctrl = nvm_ctrl_from_aq_ref(ref); - + // Queue number 0 is reserved for admin queues if (id == 0) { return NVM_ERR_PACK(NULL, EINVAL); } - nvm_queue_clear(&queue, ctrl, true, id, vaddr, ioaddr); + // Do some sanity checking + if (dma->vaddr == NULL) + { + return NVM_ERR_PACK(NULL, EINVAL); + } + else if (n_pages == 0 + || n_pages > dma->n_ioaddrs + || offset > dma->n_ioaddrs + || offset + n_pages > dma->n_ioaddrs) + { + return NVM_ERR_PACK(NULL, ERANGE); + } + + // We currently only support contiguous memory + if (n_pages > 1 && !dma->contiguous) + { + dprintf("Non-contiguous queues are not supported\n"); + return NVM_ERR_PACK(NULL, ENOTSUP); + } + + // Get controller reference + const nvm_ctrl_t* ctrl = nvm_ctrl_from_aq_ref(ref); + if (ctrl == NULL) + { + return NVM_ERR_PACK(NULL, ENOTTY); + } + + nvm_queue_clear(&queue, ctrl, true, id, cpls_per_page * n_pages, + dma->local, NVM_DMA_OFFSET(dma, offset), dma->ioaddrs[offset]); memset(&command, 0, sizeof(command)); memset(&completion, 0, sizeof(completion)); - _nvm_admin_cq_create(&command, &queue); + + admin_cq_create(&command, &queue, dma->ioaddrs[offset]); int err = nvm_raw_rpc(ref, &command, &completion); if (!nvm_ok(err)) @@ -272,7 +302,7 @@ int nvm_admin_cq_delete(nvm_aq_ref ref, nvm_queue_t* cq) memset(&command, 0, sizeof(command)); memset(&completion, 0, sizeof(completion)); - _nvm_admin_cq_delete(&command, cq); + admin_cq_delete(&command, cq); int err = nvm_raw_rpc(ref, &command, &completion); if (!nvm_ok(err)) @@ -288,24 +318,52 @@ int nvm_admin_cq_delete(nvm_aq_ref ref, nvm_queue_t* cq) -int nvm_admin_sq_create(nvm_aq_ref ref, nvm_queue_t* sq, const nvm_queue_t* cq, uint16_t id, void* vaddr, uint64_t ioaddr) +int nvm_admin_sq_create(nvm_aq_ref ref, nvm_queue_t* sq, const nvm_queue_t* cq, uint16_t id, const nvm_dma_t* dma, size_t offset, size_t n_pages) { nvm_cmd_t command; nvm_cpl_t completion; nvm_queue_t queue; + const size_t cmds_per_page = dma->page_size / sizeof(nvm_cmd_t); - const nvm_ctrl_t* ctrl = nvm_ctrl_from_aq_ref(ref); - + // Queue number 0 is reserved for admin queues if (id == 0) { return NVM_ERR_PACK(NULL, EINVAL); } - nvm_queue_clear(&queue, ctrl, false, id, vaddr, ioaddr); + // Do some sanity checking + if (dma->vaddr == NULL) + { + return NVM_ERR_PACK(NULL, EINVAL); + } + else if (n_pages == 0 + || n_pages > dma->n_ioaddrs + || offset > dma->n_ioaddrs + || offset + n_pages > dma->n_ioaddrs) + { + return NVM_ERR_PACK(NULL, ERANGE); + } + + // We currently only support contiguous memory + if (n_pages > 1 && !dma->contiguous) + { + dprintf("Non-contiguous queues are not supported\n"); + return NVM_ERR_PACK(NULL, ENOTSUP); + } + + // Get controller reference + const nvm_ctrl_t* ctrl = nvm_ctrl_from_aq_ref(ref); + if (ctrl == NULL) + { + return NVM_ERR_PACK(NULL, ENOTTY); + } + + nvm_queue_clear(&queue, ctrl, false, id, cmds_per_page * n_pages, + dma->local, NVM_DMA_OFFSET(dma, offset), dma->ioaddrs[offset]); memset(&command, 0, sizeof(command)); memset(&completion, 0, sizeof(completion)); - _nvm_admin_sq_create(&command, &queue, cq); + admin_sq_create(&command, &queue, cq, dma->ioaddrs[offset]); int err = nvm_raw_rpc(ref, &command, &completion); if (!nvm_ok(err)) @@ -337,7 +395,7 @@ int nvm_admin_sq_delete(nvm_aq_ref ref, nvm_queue_t* sq, const nvm_queue_t* cq) memset(&command, 0, sizeof(command)); memset(&completion, 0, sizeof(completion)); - _nvm_admin_sq_delete(&command, sq); + admin_sq_delete(&command, sq); int err = nvm_raw_rpc(ref, &command, &completion); if (!nvm_ok(err)) @@ -361,7 +419,7 @@ int nvm_admin_get_num_queues(nvm_aq_ref ref, uint16_t* n_cqs, uint16_t* n_sqs) memset(&command, 0, sizeof(command)); memset(&completion, 0, sizeof(completion)); - _nvm_admin_current_num_queues(&command, false, 0, 0); + admin_current_num_queues(&command, false, 0, 0); int err = nvm_raw_rpc(ref, &command, &completion); if (!nvm_ok(err)) @@ -398,7 +456,7 @@ int nvm_admin_request_num_queues(nvm_aq_ref ref, uint16_t* n_cqs, uint16_t* n_sq memset(&command, 0, sizeof(command)); memset(&completion, 0, sizeof(completion)); - _nvm_admin_current_num_queues(&command, true, *n_cqs, *n_sqs); + admin_current_num_queues(&command, true, *n_cqs, *n_sqs); int err = nvm_raw_rpc(ref, &command, &completion); if (err != 0) diff --git a/src/admin.h b/src/admin.h deleted file mode 100644 index e74b04ac..00000000 --- a/src/admin.h +++ /dev/null @@ -1,70 +0,0 @@ -#ifndef __NVM_INTERNAL_ADMIN_H__ -#define __NVM_INTERNAL_ADMIN_H__ - - - -/* - * Create IO completion queue (CQ). - * - * Build an NVM admin command for creating a CQ. - */ -void _nvm_admin_cq_create(nvm_cmd_t* cmd, const nvm_queue_t* cq); - - - -/* - * Create IO submission queue (SQ). - * - * Build an NVM admin command for creating an SQ. Note that the associated - * CQ must have been created first. - */ -void _nvm_admin_sq_create(nvm_cmd_t* cmd, const nvm_queue_t* cq, const nvm_queue_t* sq); - - - -/* - * Delete IO submission queue (SQ). - * - * Build an NVM admin command for deleting an SQ. - */ -void _nvm_admin_sq_delete(nvm_cmd_t* cmd, const nvm_queue_t* sq); - - - -/* - * Delete IO completion queue (CQ). - * - * Build an NVM admin command for deleting a CQ. Note that the associated - * SQ must have been deleted first. - */ -void _nvm_admin_cq_delete(nvm_cmd_t* cmd, const nvm_queue_t* cq); - - - -/* - * Identify controller. - * - * Build an NVM admin command for identifying the controller. - */ -void _nvm_admin_identify_ctrl(nvm_cmd_t* cmd, uint64_t ioaddr); - - - -/* - * Identify namespace. - */ -void _nvm_admin_identify_ns(nvm_cmd_t* cmd, uint32_t ns_id, uint64_t ioaddr); - - - -/* - * Set/get current number of queues. - */ -void _nvm_admin_current_num_queues(nvm_cmd_t* cmd, bool set, uint16_t n_cqs, uint16_t n_sqs); - -/* - * Get log page. - */ -int nvm_admin_get_log_page(nvm_aq_ref ref, uint32_t ns_id, void* ptr, uint64_t ioaddr, uint8_t log_id, uint64_t log_offset); - -#endif /* __NVM_INTERNAL_ADMIN_H__ */ diff --git a/src/ctrl.c b/src/ctrl.c index c6589cdc..11d16daa 100644 --- a/src/ctrl.c +++ b/src/ctrl.c @@ -1,12 +1,3 @@ -#ifdef _SISCI -#include -#include -#include -#ifndef __DIS_CLUSTER__ -#define __DIS_CLUSTER__ -#endif -#endif - #include #include #include @@ -15,227 +6,196 @@ #include #include #include -#include -#include -#include -#include -#include "dis/device.h" -#include "dis/map.h" +#include "mutex.h" #include "ctrl.h" #include "util.h" #include "regs.h" #include "dprintf.h" - -/* Forward declaration */ -struct memory_reference; - - -/* - * Controller device type. - * Indicates how we access the controller. - */ -enum device_type -{ - _DEVICE_TYPE_UNKNOWN = 0x00, // Device is mapped manually by the user - _DEVICE_TYPE_SYSFS = 0x01, // Device is mapped through file descriptor - _DEVICE_TYPE_SMARTIO = 0x04 // Device is mapped by SISCI SmartIO API -}; - - - -/* - * Internal handle container. - */ -struct controller -{ - enum device_type type; // Device type - struct memory_reference* ref; // Reference to mapped BAR0 - int fd; // File descriptor to memory mapping - nvm_ctrl_t handle; // User handle -}; - - -#ifdef _SISCI -struct memory_reference -{ - struct device_memory bar; // Reference to BAR0 - struct va_map map; // Mapping descriptor -}; -#endif - - - - /* Convenience defines */ #define encode_page_size(ps) _nvm_b2log((ps) >> 12) #define encode_entry_size(es) _nvm_b2log(es) -#define container(ctrl) \ - ((struct controller*) (((unsigned char*) (ctrl)) - offsetof(struct controller, handle))) - -#define const_container(ctrl) \ - ((const struct controller*) (((const unsigned char*) (ctrl)) - offsetof(struct controller, handle))) - - - /* - * Look up file descriptor from controller handle. + * Helper function to allocate a handle container. */ -int _nvm_fd_from_ctrl(const nvm_ctrl_t* ctrl) +static struct controller* create_handle(struct device* dev, const struct device_ops* ops, enum device_type type) { - const struct controller* container = const_container(ctrl); + int err; + struct controller* handle; - switch (container->type) + handle = (struct controller*) malloc(sizeof(struct controller)); + if (handle == NULL) { - case _DEVICE_TYPE_SYSFS: - return container->fd; + dprintf("Failed to allocate controller handle: %s\n", strerror(errno)); + return NULL; + } - default: - return -EBADF; + memset(&handle->handle, 0, sizeof(nvm_ctrl_t)); + + err = _nvm_mutex_init(&handle->lock); + if (err != 0) + { + free(handle); + return NULL; + } + + handle->count = 1; + handle->device = dev; + handle->type = type; + if (ops != NULL) + { + handle->ops = *ops; + } + else + { + memset(&handle->ops, 0, sizeof(struct device_ops)); } + + return handle; } -#ifdef _SISCI -/* - * Look up device from controller handle. - */ -const struct device* _nvm_device_from_ctrl(const nvm_ctrl_t* ctrl) +static void remove_handle(struct controller* handle) { - const struct controller* container = const_container(ctrl); + int status; - if (container->type == _DEVICE_TYPE_SMARTIO && container->ref != NULL) + do { - return &container->ref->bar.device; + status = _nvm_mutex_free(&handle->lock); } + while (status == EBUSY); - return NULL; + free(handle); } -#endif /* - * Helper function to allocate a handle container. + * Take device reference. */ -static struct controller* create_container() +struct controller* _nvm_ctrl_get(const nvm_ctrl_t* ctrl) { - struct controller* container = (struct controller*) malloc(sizeof(struct controller)); - - if (container == NULL) + if (ctrl != NULL) { - dprintf("Failed to allocate controller handle: %s\n", strerror(errno)); - return NULL; - } + // This is technically undefined behaviour (casting away const), + // but we are not modifying the handle itself, only the container. + struct controller* controller = _nvm_container_of(ctrl, struct controller, handle); + + int err = _nvm_mutex_lock(&controller->lock); + if (err != 0) + { + dprintf("Failed to take device reference lock: %s\n", strerror(err)); + return NULL; + } - container->type = _DEVICE_TYPE_UNKNOWN; - container->fd = -1; - container->ref = NULL; + // Increase reference count + ++controller->count; - return container; + _nvm_mutex_unlock(&controller->lock); + + return controller; + } + + return NULL; } -#ifdef _SISCI /* - * Helper function to increase a device reference and connect - * to a PCI BAR0 on the controller's device. + * Release device reference. */ -static int connect_register_memory(struct memory_reference** ref, const struct device* dev, uint32_t adapter) +void _nvm_ctrl_put(struct controller* controller) { - *ref = NULL; - - struct memory_reference* mem = (struct memory_reference*) malloc(sizeof(struct memory_reference)); - if (mem == NULL) + if (controller != NULL) { - dprintf("Failed to allocate controller memory reference: %s\n", strerror(errno)); - return ENOMEM; - } + _nvm_mutex_lock(&controller->lock); + if (--controller->count == 0) + { + if (controller->ops.release_device != NULL) + { + controller->ops.release_device(controller->device, controller->handle.mm_ptr, controller->handle.mm_size); + } - int err = _nvm_device_memory_get(&mem->bar, dev, adapter, 0, SCI_FLAG_BAR); - if (err != 0) - { - free(mem); - dprintf("Failed to get controller memory reference: %s\n", strerror(err)); - return err; - } + controller->device = NULL; + } + _nvm_mutex_unlock(&controller->lock); - err = _nvm_va_map_remote(&mem->map, NVM_CTRL_MEM_MINSIZE, mem->bar.segment, true, false); - if (err != 0) - { - _nvm_device_memory_put(&mem->bar); - free(mem); - dprintf("Failed to map controller memory: %s\n", strerror(err)); - return err; + if (controller->device == NULL) + { + remove_handle(controller); + } } - - *ref = mem; - return 0; } -#endif -#ifdef _SISCI -static void disconnect_register_memory(struct memory_reference* ref) +int _nvm_ctrl_init(nvm_ctrl_t** handle, struct device* dev, const struct device_ops* ops, enum device_type type, + volatile void* mm_ptr, size_t mm_size) { - _nvm_va_unmap(&ref->map); - _nvm_device_memory_put(&ref->bar); - free(ref); -} -#endif - + struct controller* container; + nvm_ctrl_t* ctrl; + *handle = NULL; -/* - * Helper function to initialize the controller handle by reading - * the appropriate registers from the controller BAR. - */ -static int initialize_handle(nvm_ctrl_t* ctrl, volatile void* mm_ptr, size_t mm_size) -{ - if (mm_size < NVM_CTRL_MEM_MINSIZE) + container = create_handle(dev, ops, type); + if (container == NULL) { - return EINVAL; + return ENOMEM; } - ctrl->mm_size = mm_size; + ctrl = &container->handle; ctrl->mm_ptr = mm_ptr; + ctrl->mm_size = mm_size; + + if (ctrl->mm_size < NVM_CTRL_MEM_MINSIZE) + { + remove_handle(container); + return EINVAL; + } // Get the system page size size_t page_size = _nvm_host_page_size(); if (page_size == 0) { + remove_handle(container); return ENOMEM; } // Get the controller page size uint8_t host_page_size = encode_page_size(page_size); - uint8_t max_page_size = CAP$MPSMAX(mm_ptr); - uint8_t min_page_size = CAP$MPSMIN(mm_ptr); + uint8_t max_page_size = CAP$MPSMAX(ctrl->mm_ptr); + uint8_t min_page_size = CAP$MPSMIN(ctrl->mm_ptr); if ( ! (min_page_size <= host_page_size && host_page_size <= max_page_size) ) { + remove_handle(container); dprintf("System page size is incompatible with controller page size\n"); return ERANGE; } // Set controller properties ctrl->page_size = page_size; - ctrl->dstrd = CAP$DSTRD(mm_ptr); - ctrl->timeout = CAP$TO(mm_ptr) * 500UL; - ctrl->max_entries = CAP$MQES(mm_ptr) + 1; // CAP.MQES is 0's based + ctrl->dstrd = CAP$DSTRD(ctrl->mm_ptr); + ctrl->timeout = CAP$TO(ctrl->mm_ptr) * 500UL; + ctrl->max_entries = CAP$MQES(ctrl->mm_ptr) + 1; // CAP.MQES is 0's based + + *handle = ctrl; return 0; } +/* + * Reset the controller manually. + * We deliberately use the pointers from the user-handle, in case the user + * overrides them somehow. + */ int nvm_raw_ctrl_reset(const nvm_ctrl_t* ctrl, uint64_t acq_addr, uint64_t asq_addr) { volatile uint32_t* cc = CC(ctrl->mm_ptr); @@ -297,224 +257,19 @@ int nvm_raw_ctrl_reset(const nvm_ctrl_t* ctrl, uint64_t acq_addr, uint64_t asq_a -int nvm_raw_ctrl_init(nvm_ctrl_t** ctrl, volatile void* mm_ptr, size_t mm_size) -{ - int err; - *ctrl = NULL; - - struct controller* container = create_container(); - if (container == NULL) - { - return ENOMEM; - } - - container->type = _DEVICE_TYPE_UNKNOWN; - - err = initialize_handle(&container->handle, mm_ptr, mm_size); - if (err != 0) - { - free(container); - return err; - } - - *ctrl = &container->handle; - return 0; -} - - - -#ifdef _SISCI -int nvm_dis_ctrl_init(nvm_ctrl_t** ctrl, uint64_t dev_id, uint32_t adapter) -{ - int err; - *ctrl = NULL; - - struct controller* container = create_container(); - if (container == NULL) - { - return ENOMEM; - } - - container->type = _DEVICE_TYPE_SMARTIO; - - struct device dev; - err = _nvm_device_get(&dev, dev_id); - if (err != 0) - { - free(container); - return err; - } - - err = connect_register_memory(&container->ref, &dev, adapter); - if (err != 0) - { - _nvm_device_put(&dev); - free(container); - return err; - } - - _nvm_device_put(&dev); - - size_t size = SCIGetRemoteSegmentSize(container->ref->bar.segment); - - err = initialize_handle(&container->handle, container->ref->map.vaddr, size); - if (err != 0) - { - disconnect_register_memory(container->ref); - free(container); - return err; - } - - *ctrl = &container->handle; - return 0; -} -#endif - - - -#if defined ( __unix__ ) -int nvm_ctrl_init(nvm_ctrl_t** ctrl, int filedes) -{ - int err; - - int fd = dup(filedes); - if (fd < 0) - { - dprintf("Could not duplicate file descriptor: %s\n", strerror(errno)); - return errno; - } - - err = fcntl(fd, F_SETFD, O_RDWR|O_NONBLOCK); - if (err == -1) - { - close(fd); - dprintf("Failed to set file descriptor flags: %s\n", strerror(errno)); - return errno; - } - - volatile void* ptr = mmap(NULL, NVM_CTRL_MEM_MINSIZE, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE, fd, 0); - if (ptr == NULL) - { - close(fd); - dprintf("Failed to map device memory: %s\n", strerror(errno)); - return errno; - } - - struct controller* container = create_container(); - if (container == NULL) - { - munmap((void*) ptr, NVM_CTRL_MEM_MINSIZE); - close(fd); - return ENOMEM; - } - - container->type = _DEVICE_TYPE_SYSFS; - - err = initialize_handle(&container->handle, ptr, NVM_CTRL_MEM_MINSIZE); - if (err != 0) - { - munmap((void*) ptr, NVM_CTRL_MEM_MINSIZE); - free(container); - close(fd); - return err; - } - - container->fd = fd; - - *ctrl = &container->handle; - return 0; -} -#endif - - - void nvm_ctrl_free(nvm_ctrl_t* ctrl) { if (ctrl != NULL) { - struct controller* container = container(ctrl); - - switch (container->type) - { - case _DEVICE_TYPE_UNKNOWN: - // Do nothing - break; - -#if defined ( __unix__ ) - case _DEVICE_TYPE_SYSFS: - munmap((void*) ctrl->mm_ptr, ctrl->mm_size); - close(container->fd); - break; -#endif - -#if _SISCI - case _DEVICE_TYPE_SMARTIO: - disconnect_register_memory(container->ref); - break; -#endif - - default: - dprintf("Unknown controller type\n"); - break; - } - - free(container); - } -} - - - -#ifdef _SISCI -void nvm_dis_ctrl_unmap_p2p_device(const nvm_ctrl_t* ctrl, sci_device_t dev) -{ - if (ctrl != NULL) - { - const struct controller* container = const_container(ctrl); - - if (container->type == _DEVICE_TYPE_SMARTIO && container->ref != NULL) - { - sci_error_t err; - do - { - SCIUnmapRemoteSegmentForDevice(container->ref->bar.segment, dev, 0, &err); - } - while (err == SCI_ERR_BUSY); - } + struct controller* container = _nvm_container_of(ctrl, struct controller, handle); + _nvm_ctrl_put(container); } } -#endif -#ifdef _SISCI -int nvm_dis_ctrl_map_p2p_device(const nvm_ctrl_t* ctrl, sci_device_t dev, uint64_t* ioaddr) +int nvm_raw_ctrl_init(nvm_ctrl_t** ctrl, volatile void* mm_ptr, size_t mm_size) { - if (ctrl == NULL) - { - return EINVAL; - } - - const struct controller* container = const_container(ctrl); - - if (container->type != _DEVICE_TYPE_SMARTIO || container->ref == NULL) - { - return EINVAL; - } - - sci_error_t err; - sci_ioaddr_t addr; - SCIMapRemoteSegmentForDevice(container->ref->bar.segment, dev, &addr, 0, 0, &err); - if (err != SCI_ERR_OK) - { - dprintf("Failed to map controller BAR for device: %s\n", _SCIGetErrorString(err)); - return EIO; - } - - if (ioaddr != NULL) - { - *ioaddr = addr; - } - return 0; + return _nvm_ctrl_init(ctrl, NULL, NULL, DEVICE_TYPE_UNKNOWN, mm_ptr, mm_size); } -#endif diff --git a/src/ctrl.h b/src/ctrl.h index d89cdc7a..32490533 100644 --- a/src/ctrl.h +++ b/src/ctrl.h @@ -2,30 +2,109 @@ #define __NVM_INTERNAL_CTRL_H__ #include +#include "mutex.h" +#include "util.h" -/* Forward declaration */ +/* + * Device handle. + */ struct device; /* - * Look up file descriptor from controller handle. - * Returns a valid file descriptor (positive integer) or -EBADF if the - * controller memory is not mapped through this library. + * Forward declaration of a virtual memory address range. + */ +struct va_range; + + + +/* + * Device reference operations. + */ +struct device_ops +{ + /* + * Release device reference (called when refcount is 0) + * This should also unmap MLBAR/BAR0 of the device. + */ + void (*release_device)(struct device* dev, volatile void* mm_ptr, size_t mm_size); + + + /* + * Map an address range for the device. + */ + int (*map_range)(const struct device* dev, const struct va_range* va, uint64_t* ioaddrs); + + + /* + * Unmap an address range for the device. + */ + void (*unmap_range)(const struct device* dev, const struct va_range* va); +}; + + + +/* + * Controller device type. + * Indicates how the controller handle was initialized. + */ +enum device_type +{ + DEVICE_TYPE_UNKNOWN = 0x00, /* Device is mapped manually by the user */ + DEVICE_TYPE_IOCTL = 0x01, /* Device is mapped through UNIX file descriptor */ + DEVICE_TYPE_SMARTIO = 0x02, /* Device is mapped by SISCI SmartIO API */ +}; + + + +/* + * Internal controller handle. + * Used to reference count the device handle. + * Reference counting is handled by the get and put functions. */ -int _nvm_fd_from_ctrl(const nvm_ctrl_t* ctrl); +struct controller +{ + struct mutex lock; /* Ensure exclusive access */ + uint32_t count; /* Reference count */ + enum device_type type; /* Controller device type */ + struct device* device; /* Device handle */ + struct device_ops ops; /* Device operations */ + nvm_ctrl_t handle; /* User's handle */ +}; + + +/* + * Helper function to initialize the controller handle by reading + * the appropriate registers from the controller BAR. + */ +int _nvm_ctrl_init(nvm_ctrl_t** handle, /* User's handle */ + struct device* dev, /* Device handle */ + const struct device_ops* ops, /* Device handle operations */ + enum device_type type, /* Device type */ + volatile void* mm_ptr, /* Memory-mapped pointer */ + size_t mm_size); /* Size of memory-map */ -#ifdef _SISCI /* - * Look up device reference from controller handle. - * Returns a device reference if used, or NULL if not used. + * Increase controller reference count. */ -const struct device* _nvm_device_from_ctrl(const nvm_ctrl_t* ctrl); -#endif +struct controller* _nvm_ctrl_get(const nvm_ctrl_t* handle); + +/* + * Decrease controller reference count. + */ +void _nvm_ctrl_put(struct controller* ctrl); + + + +/* + * Convenience macro to get the controller type. + */ +#define _nvm_ctrl_type(ctrl) _nvm_container_of(ctrl, struct controller, handle)->type #endif /* __NVM_INTERNAL_CTRL_H__ */ diff --git a/src/dis/device.c b/src/dis/device.c index 6723ae2c..945ce583 100644 --- a/src/dis/device.c +++ b/src/dis/device.c @@ -6,11 +6,18 @@ #define __DIS_CLUSTER__ #endif -#include +#include +#include +#include #include #include +#include +#include #include #include "dis/device.h" +#include "dis/map.h" +#include "ctrl.h" +#include "dma.h" #include "dprintf.h" #include #include @@ -19,117 +26,415 @@ /* - * Increase device reference. + * Helper function to connect to a device memory segment. */ -int _nvm_device_get(struct device* dev, uint64_t dev_id) +int _nvm_connect_device_memory(sci_remote_segment_t* segment, const struct device* dev, uint32_t id, unsigned int memtype) { sci_error_t err; + + SCIConnectDeviceSegment(dev->device, segment, id, memtype, NULL, NULL, 0, &err); + if (err == SCI_ERR_OK) + { + return 0; + } - if (dev == NULL) + *segment = NULL; + switch (err) { - return EINVAL; + case SCI_ERR_API_NOSPC: + dprintf("Could not connect to device memory segment, out of memory\n"); + return ENOMEM; + + case SCI_ERR_NOSPC: + dprintf("Could not connect to device memory segment, out of resources\n"); + return ENOSPC; + + case SCI_ERR_NO_SUCH_SEGMENT: + dprintf("Failed to connect to device memory segment, segment not found\n"); + return ENOTTY; + + case SCI_ERR_CONNECTION_REFUSED: + dprintf("Failed to connect to device memory segment, connection was refused\n"); + return EPERM; + + default: + dprintf("Failed to get device memory reference: %s\n", _SCIGetErrorString(err)); + return ENOMEM; } +} - dev->device_id = dev_id; - SCIOpen(&dev->sd, 0, &err); - if (err != SCI_ERR_OK) + +void _nvm_disconnect_device_memory(sci_remote_segment_t* segment) +{ + if (segment != NULL && *segment != NULL) { - dprintf("Failed to create virtual device: %s\n", _SCIGetErrorString(err)); - return EIO; + sci_error_t err = SCI_ERR_OK; + + do + { + SCIDisconnectSegment(*segment, 0, &err); + } + while (err == SCI_ERR_BUSY); + +#ifndef NDEBUG + if (err != SCI_ERR_OK) + { + dprintf("Failed to disconnect segment: %s\n", _SCIGetErrorString(err)); + } +#endif + + *segment = NULL; + } +} + + + +/* + * Map local segment for device. + */ +static int io_map_local(const struct device* dev, const struct map* map, uint64_t* ioaddr) +{ + sci_error_t err; + size_t size = SCIGetLocalSegmentSize(map->lseg); + sci_ioaddr_t addr; + + *ioaddr = 0; + + SCISetSegmentAvailable(map->lseg, map->adapter, 0, &err); + switch (err) + { + case SCI_ERR_OK: + break; + + case SCI_ERR_ILLEGAL_OPERATION: + case SCI_ERR_SEGMENT_NOT_PREPARED: + return EINVAL; + + default: + dprintf("Failed to set segment available: %s\n", _SCIGetErrorString(err)); + return EIO; } - SCIBorrowDevice(dev->sd, &dev->device, dev_id, 0, &err); + SCIMapLocalSegmentForDevice(map->lseg, map->adapter, dev->device, &addr, 0, size, 0, &err); if (err != SCI_ERR_OK) { - dprintf("Failed to increase device reference: %s\n", _SCIGetErrorString(err)); - SCIClose(dev->sd, 0, &err); - return ENODEV; + dprintf("Failed to map segment for device: %s\n", _SCIGetErrorString(err)); + return EIO; } + *ioaddr = (uint64_t) addr; return 0; } /* - * Decrease device reference. + * Map remote segment for device */ -void _nvm_device_put(struct device* dev) +static int io_map_remote(const struct device* dev, const struct map* map, uint64_t* ioaddr) { sci_error_t err; + size_t size; + sci_ioaddr_t addr; + + *ioaddr = 0; + + size = SCIGetRemoteSegmentSize(map->rseg); - if (dev != NULL) + SCIMapRemoteSegmentForDevice(map->rseg, dev->device, &addr, 0, size, 0, &err); + if (err != SCI_ERR_OK) { - SCIReturnDevice(dev->device, 0, &err); - SCIClose(dev->sd, 0, &err); + dprintf("Failed to map segment for device: %s\n", _SCIGetErrorString(err)); + return EIO; } + + *ioaddr = (uint64_t) addr; + return 0; } /* - * Helper function to connect to a segment. + * Take device reference ("borrow" device) and map MLBAR/BAR0 */ -int _nvm_device_memory_get(struct device_memory* mem, - const struct device* dev, - uint32_t adapter, - uint32_t segment_no, - uint32_t flags) +static int borrow_device(struct device** handle, uint32_t fdid) { + int status; sci_error_t err; - int status = 0; + struct device* dev; - status = _nvm_device_get(&mem->device, dev->device_id); - if (status != 0) + *handle = NULL; + dev = (struct device*) malloc(sizeof(struct device)); + if (dev == NULL) { - goto leave; + dprintf("Failed to allocate device handle: %s\n", strerror(errno)); + return ENOMEM; } - const struct device* d = &mem->device; - mem->adapter = adapter; - mem->segment_no = segment_no; - mem->flags = flags; + dev->fdid = fdid; - SCIConnectDeviceMemory(d->sd, &mem->segment, mem->adapter, d->device, segment_no, 0, flags, &err); + SCIOpen(&dev->sd, 0, &err); if (err != SCI_ERR_OK) { - dprintf("Failed to connect to device memory: %s\n", _SCIGetErrorString(err)); + dprintf("Failed to create SISCI virtual device: %s\n", _SCIGetErrorString(err)); + free(dev); + return ENOSYS; + } + + SCIBorrowDevice(dev->sd, &dev->device, dev->fdid, 0, &err); + if (err != SCI_ERR_OK) + { status = ENODEV; - goto release; + dprintf("Failed to increase device reference: %s\n", _SCIGetErrorString(err)); + switch (err) + { + case SCI_ERR_NO_SUCH_FDID: + status = ENODEV; + break; + + + case SCI_ERR_NOT_AVAILABLE: + status = EBUSY; + break; + + default: + status = EIO; + break; + } + + SCIClose(dev->sd, 0, &err); + free(dev); + return status; } - return 0; + status = _nvm_connect_device_memory(&dev->segment, dev, 0, SCI_MEMTYPE_BAR); + if (status != 0) + { + SCIReturnDevice(dev->device, 0, &err); + SCIClose(dev->sd, 0, &err); + free(dev); + return status; + } -release: - _nvm_device_put(&mem->device); + dev->size = SCIGetRemoteSegmentSize(dev->segment); -leave: - return status; + dev->ptr = SCIMapRemoteSegment(dev->segment, &dev->md, 0, dev->size, NULL, 0, &err); + if (err != SCI_ERR_OK) + { + dprintf("Failed to map device memory into local address space: %s\n", _SCIGetErrorString(err)); + _nvm_disconnect_device_memory(&dev->segment); + SCIReturnDevice(dev->device, 0, &err); + SCIClose(dev->sd, 0, &err); + free(dev); + return ENOSPC; + } + + *handle = dev; + return 0; } /* - * Disconnect from device memory. + * Release device reference ("return" device) */ -void _nvm_device_memory_put(struct device_memory* mem) +static void return_device(struct device* dev, volatile void* mm_ptr, size_t mm_size) { - sci_error_t err = SCI_ERR_OK; + sci_error_t err; + + // Suppress warnings about unused parameters + if (mm_ptr != dev->ptr || mm_size != dev->size) + { + dprintf("Something is not right\n"); + } do { - SCIDisconnectSegment(mem->segment, 0, &err); + SCIUnmapSegment(dev->md, 0, &err); } while (err == SCI_ERR_BUSY); + _nvm_disconnect_device_memory(&dev->segment); + + SCIReturnDevice(dev->device, 0, &err); + SCIClose(dev->sd, 0, &err); + free(dev); +} + + + +/* + * Dispatch the proper mapping function based on the type. + */ +static int io_map(const struct device* dev, const struct va_range* va, uint64_t* ioaddr) +{ + const struct map* map = _nvm_container_of(va, struct map, range); + +#ifndef NDEBUG + // SISCI segments, being contiguous memory, should + // be considered one large page rather than many pages + if (va->n_pages != 1) + { + dprintf("Virtual address range must be a SISCI segment\n"); + return EINVAL; + } +#endif + + switch (map->type) + { + case SEGMENT_TYPE_LOCAL: + case SEGMENT_TYPE_PHYSICAL: + return io_map_local(dev, map, ioaddr); + + case SEGMENT_TYPE_REMOTE: + case SEGMENT_TYPE_DEVICE: + return io_map_remote(dev, map, ioaddr); + + default: + dprintf("Unknown segment type\n"); + return EINVAL; + } +} + + + +static void io_unmap(const struct device* dev, const struct va_range* va) +{ + sci_error_t err = SCI_ERR_OK; + const struct map* map = _nvm_container_of(va, struct map, range); + + switch (map->type) + { + case SEGMENT_TYPE_LOCAL: + case SEGMENT_TYPE_PHYSICAL: + do + { + SCIUnmapLocalSegmentForDevice(map->lseg, map->adapter, dev->device, 0, &err); + } + while (err == SCI_ERR_BUSY); + break; + + case SEGMENT_TYPE_REMOTE: + case SEGMENT_TYPE_DEVICE: + do + { + SCIUnmapRemoteSegmentForDevice(map->rseg, dev->device, 0, &err); + } + while (err == SCI_ERR_BUSY); + break; + + default: + dprintf("Unknown segment type\n"); + return; + } + #ifndef NDEBUG if (err != SCI_ERR_OK) { - dprintf("Failed to disconnect from device memory: %s\n", _SCIGetErrorString(err)); + dprintf("Unmapping segment for device failed: %s\n", _SCIGetErrorString(err)); } #endif - _nvm_device_put(&mem->device); + if (map->type == SEGMENT_TYPE_PHYSICAL) + { + do + { + SCISetSegmentUnavailable(map->lseg, map->adapter, 0, &err); + } + while (err == SCI_ERR_BUSY); + } +} + + + +/* + * Device operations + */ +static const struct device_ops smartio_device_ops = +{ + .release_device = &return_device, + .map_range = &io_map, + .unmap_range = &io_unmap, +}; + + + +int nvm_dis_ctrl_init(nvm_ctrl_t** ctrl, uint32_t fdid) +{ + int err; + struct device* dev; + + *ctrl = NULL; + + err = borrow_device(&dev, fdid); + if (err != 0) + { + return err; + } + + err = _nvm_ctrl_init(ctrl, dev, &smartio_device_ops, DEVICE_TYPE_SMARTIO, dev->ptr, dev->size); + if (err != 0) + { + return_device(dev, dev->ptr, dev->size); + return err; + } + + return 0; +} + + + +void nvm_dis_ctrl_unmap_p2p_device(const nvm_ctrl_t* ctrl, sci_smartio_device_t dev) +{ + if (ctrl != NULL) + { + const struct controller* container = _nvm_container_of(ctrl, struct controller, handle); + + if (container->device != NULL && container->type == DEVICE_TYPE_SMARTIO) + { + sci_error_t err; + + do + { + SCIUnmapRemoteSegmentForDevice(container->device->segment, dev, 0, &err); + } + while (err == SCI_ERR_BUSY); + } + } +} + + + +int nvm_dis_ctrl_map_p2p_device(const nvm_ctrl_t* ctrl, sci_smartio_device_t dev, uint64_t* ioaddr) +{ + sci_error_t err; + sci_ioaddr_t addr; + const struct controller* container; + + if (ctrl == NULL) + { + return EINVAL; + } + + container = _nvm_container_of(ctrl, struct controller, handle); + if (container->device == NULL || container->type != DEVICE_TYPE_SMARTIO) + { + return EINVAL; + } + + SCIMapRemoteSegmentForDevice(container->device->segment, dev, &addr, 0, container->device->size, 0, &err); + if (err != SCI_ERR_OK) + { + dprintf("Failed to map controller BAR for device: %s\n", _SCIGetErrorString(err)); + return EIO; + } + + if (ioaddr != NULL) + { + *ioaddr = (uint64_t) addr; + } + + return 0; } diff --git a/src/dis/device.h b/src/dis/device.h index cdfdc71a..9ad8c6ed 100644 --- a/src/dis/device.h +++ b/src/dis/device.h @@ -1,11 +1,5 @@ #ifndef __NVM_INTERNAL_DIS_DEVICE_H__ #define __NVM_INTERNAL_DIS_DEVICE_H__ - -/* Forward declarations */ -struct device; -struct device_memory; - - #ifdef _SISCI /* Make sure everything is defined as needed */ @@ -16,75 +10,42 @@ struct device_memory; /* Necessary includes */ #include #include +#include #include -#include "dis/map.h" - /* * Device descriptor. * - * Holds a "borrowed" reference to a SISCI SmartIO device. - * Handles that require a device or controller reference should take this. + * Holds a reference to a "borrowed" SISCI SmartIO device. */ struct device { + uint32_t fdid; // SISCI SmartIO device identifier (fabric device identifier) sci_desc_t sd; // SISCI virtual device descriptor - uint64_t device_id; // SISCI SmartIO device identifier - sci_device_t device; // SmartIO device handle -}; - - - -/* - * Device memory segment descriptor. - * - * Describes mapping to device-local memory, e.g. PCI BARs or memory residing - * on the same host as the device. - */ -struct device_memory -{ - struct device device; // Device reference - uint32_t adapter; // DIS adapter number - uint32_t segment_no; // Device segment number - sci_remote_segment_t segment; // SISCI remote segment to device memory - uint32_t flags; // SISCI flags used when connecting segment + sci_smartio_device_t device; // SmartIO device handle + sci_remote_segment_t segment; // Reference to PCI bar 0 + size_t size; // Size of BAR0 + volatile void* ptr; // Mapped pointer + sci_map_t md; // SISCI mapping descriptor }; /* - * Acquire device reference (increase). + * Connect to SmartIO device memory data segment. */ -int _nvm_device_get(struct device* dev, uint64_t dev_id); +int _nvm_connect_device_memory(sci_remote_segment_t* segment, + const struct device* dev, + uint32_t id, + unsigned int memtype); /* - * Release device reference (decrease). + * Disconnect from SmartIO device memory segment. */ -void _nvm_device_put(struct device* dev); - - - -/* - * Connect to device memory. - * This function will increase the device reference. - */ -int _nvm_device_memory_get(struct device_memory* mem, - const struct device* dev, - uint32_t adapter, - uint32_t segment_no, - uint32_t flags); - - - -/* - * Disconnect from device memory. - * This function will decrease the device reference. - */ -void _nvm_device_memory_put(struct device_memory* mem); - +void _nvm_disconnect_device_memory(sci_remote_segment_t* segment); #endif /* _SISCI */ diff --git a/src/dis/dma.c b/src/dis/dma.c index b168becb..362ae83b 100644 --- a/src/dis/dma.c +++ b/src/dis/dma.c @@ -17,7 +17,6 @@ #include #include "dma.h" #include "ctrl.h" -#include "dis/local.h" #include "dis/device.h" #include "dis/map.h" #include "dprintf.h" @@ -28,354 +27,392 @@ /* - * SmartIO mapping descriptor. - * - * Holds a device reference and mapping descriptors. + * Device memory segments are a special case of mappings, + * where a controller reference must be taken in order to + * get the segment and map it. */ -struct map_descriptor +struct segment { - struct dma_map dma_mapping; // DMA mapping head - struct device device; // Device reference - struct va_map va_mapping; // Virtual address mapping - struct io_map io_mapping; // Mapping for device - void* segment; // Reference to segment descriptor (if used) + struct controller* ctrl; + struct map map; }; -/* - * Allocate a map descriptor container and take device reference. +/* + * Map segment into virtual address space. */ -static int create_map_descriptor(struct map_descriptor** container, const nvm_ctrl_t* ctrl, size_t segment_size) +static int va_map(struct map* m, bool write, bool wc) { - // Get device reference from controller - const struct device* dev = _nvm_device_from_ctrl(ctrl); - if (dev == NULL) - { - dprintf("Controller is not a cluster device\n"); - return EINVAL; - } + sci_error_t err; + unsigned int flags = 0; - // Allocate map descriptor container - struct map_descriptor* map = (struct map_descriptor*) malloc(sizeof(struct map_descriptor)); - if (map == NULL) + flags |= !write ? SCI_FLAG_READONLY_MAP : 0; + flags |= !wc ? SCI_FLAG_IO_MAP_IOSPACE : 0; + + m->range.n_pages = 1; + switch (m->type) { - dprintf("Failed to allocate mapping descriptor: %s\n", strerror(errno)); - return ENOMEM; + case SEGMENT_TYPE_LOCAL: + m->range.page_size = SCIGetLocalSegmentSize(m->lseg); + m->range.vaddr = SCIMapLocalSegment(m->lseg, &m->md, 0, m->range.page_size, NULL, 0, &err); + break; + + case SEGMENT_TYPE_REMOTE: + case SEGMENT_TYPE_DEVICE: + m->range.page_size = SCIGetRemoteSegmentSize(m->rseg); + m->range.vaddr = SCIMapRemoteSegment(m->rseg, &m->md, 0, m->range.page_size, NULL, 0, &err); + break; + + default: + dprintf("Unknown segment type\n"); + return EINVAL; } - // Increase device reference - int err = _nvm_device_get(&map->device, dev->device_id); - if (err != 0) + switch (err) { - free(map); - dprintf("Failed to take device reference: %s\n", strerror(err)); - return err; - } + case SCI_ERR_OK: + return 0; - // Clear all members - VA_MAP_CLEAR(&map->va_mapping); - IO_MAP_CLEAR(&map->io_mapping); - map->dma_mapping.vaddr = NULL; - map->dma_mapping.page_size = segment_size; - map->dma_mapping.n_pages = 1; - map->segment = NULL; + case SCI_ERR_FLAG_NOT_IMPLEMENTED: + case SCI_ERR_ILLEGAL_FLAG: + case SCI_ERR_OUT_OF_RANGE: + case SCI_ERR_SIZE_ALIGNMENT: + case SCI_ERR_OFFSET_ALIGNMENT: + return EINVAL; - *container = map; - return 0; + default: + dprintf("Mapping segment into virtual address space failed: %s\n", _SCIGetErrorString(err)); + return EIO; + } } /* - * Unmap mappings, release device reference and free map descriptor. + * Unmap segment. */ -static void remove_map_descriptor(struct map_descriptor* map) +static void va_unmap(struct map* m) { - if (map != NULL) + if (m->mapped) { - _nvm_va_unmap(&map->va_mapping); - _nvm_io_unmap(&map->io_mapping); - _nvm_device_put(&map->device); - free(map); + sci_error_t err; + + do + { + SCIUnmapSegment(m->md, 0, &err); + } + while (err == SCI_ERR_BUSY); + + m->mapped = false; + +#ifndef NDEBUG + if (err != SCI_ERR_OK) + { + dprintf("Failed to unmap segment: %s\n", _SCIGetErrorString(err)); + } +#endif } } /* - * Create DMA mapping for a local segment. + * Release SISCI resources and remove mapping descriptor. */ -int nvm_dis_dma_map_local(nvm_dma_t** handle, - const nvm_ctrl_t* ctrl, - uint32_t adapter, - sci_local_segment_t segment, - bool map_va) +static void remove_map(struct map* m) { - struct map_descriptor* md; - size_t size; - *handle = NULL; + va_unmap(m); - size = NVM_CTRL_ALIGN(ctrl, SCIGetLocalSegmentSize(segment)); - if (size == 0) + if (m->type == SEGMENT_TYPE_DEVICE) { - dprintf("Local segment has no size"); - return -ENOSPC; + struct segment* seg = _nvm_container_of(m, struct segment, map); + _nvm_ctrl_put(seg->ctrl); + free(seg); } - - // Create mapping descriptor - int err = create_map_descriptor(&md, ctrl, size); - if (err != 0) + else if (m->type == SEGMENT_TYPE_PHYSICAL) { - return err; - } + sci_error_t err; + struct segment* seg = _nvm_container_of(m, struct segment, map); - // Set reverse mapping (device-local mapping) - err = _nvm_io_map_local(&md->io_mapping, md->device.device, segment, adapter); - if (err != 0) - { - remove_map_descriptor(md); - return err; - } + do + { + SCIRemoveSegment(seg->map.lseg, 0, &err); + } + while (err == SCI_ERR_BUSY); - // Set up virtual address space mapping - if (map_va) - { - err = _nvm_va_map_local(&md->va_mapping, size, segment); - if (err != 0) +#ifndef NDEBUG + if (err != SCI_ERR_OK) { - remove_map_descriptor(md); - return err; + dprintf("Failed to remove physical segment: %s\n", _SCIGetErrorString(err)); } +#endif - md->dma_mapping.vaddr = (void*) md->va_mapping.vaddr; + _nvm_ctrl_put(seg->ctrl); + free(seg); } - - // Create handle container - err = _nvm_dma_create(handle, ctrl, (struct dma_map*) md, (dma_map_free_t) remove_map_descriptor); - if (err != 0) + else { - remove_map_descriptor(md); - return err; + free(m); } +} - // Initialize DMA handle - _nvm_dma_handle_populate(*handle, ctrl, (uint64_t*) &md->io_mapping.ioaddr); - return 0; + +static void release_map(struct va_range* va) +{ + struct map* m = _nvm_container_of(va, struct map, range); + remove_map(m); } /* - * Create DMA mapping for a remote segment. + * Create mapping descriptor */ -int nvm_dis_dma_map_remote(nvm_dma_t** handle, - const nvm_ctrl_t* ctrl, - sci_remote_segment_t segment, - bool map_va, - bool map_wc) +static int create_map(struct map** md, const nvm_ctrl_t* ctrl, enum segment_type type, size_t size) { - struct map_descriptor* md; - *handle = NULL; - - // Find segment size - size_t size = SCIGetRemoteSegmentSize(segment) & NVM_PAGE_MASK(ctrl->page_size); - - // Create mapping descriptor - int err = create_map_descriptor(&md, ctrl, size); - if (err != 0) - { - return err; - } + struct map* map; - // Set up device-local mapping - err = _nvm_io_map_remote(&md->io_mapping, md->device.device, segment); - if (err != 0) - { - remove_map_descriptor(md); - return err; - } + *md = NULL; - // Map into local address space - if (map_va) + size = NVM_CTRL_ALIGN(ctrl, size); + if (size == 0) { - err = _nvm_va_map_remote(&md->va_mapping, size, segment, true, map_wc); - if (err != 0) - { - remove_map_descriptor(md); - return err; - } - - md->dma_mapping.vaddr = (void*) md->va_mapping.vaddr; + return EINVAL; } - // Create handle container - err = _nvm_dma_create(handle, ctrl, (struct dma_map*) md, (dma_map_free_t) remove_map_descriptor); - if (err != 0) + map = (struct map*) malloc(sizeof(struct map)); + if (map == NULL) { - remove_map_descriptor(md); - return err; + dprintf("Failed to allocate mapping descriptor: %s\n", strerror(errno)); + return errno; } - // Initialize DMA handle - _nvm_dma_handle_populate(*handle, ctrl, (uint64_t*) &md->io_mapping.ioaddr); + map->type = type; + map->lseg = NULL; + map->rseg = NULL; + map->adapter = -1; + map->size = size; + map->mapped = false; + map->md = NULL; + map->range = VA_RANGE_INIT(NULL, map->size, 1); + *md = map; return 0; } /* - * Helper function to connect to a remote segment. + * Create mapping descriptor from device memory segment. */ -static int connect_remote_segment(struct device_memory** seg, const struct device* dev, uint32_t adapter, uint32_t segno, bool shared) +static int connect_device_segment(struct map** md, const nvm_ctrl_t* ctrl, uint32_t id, unsigned int memtype) { - int status = 0; - *seg = NULL; + int err; + struct segment* s; + struct map* map; + sci_remote_segment_t segment; + struct controller* ref; - struct device_memory* s = (struct device_memory*) malloc(sizeof(struct device_memory)); - if (s == NULL) + *md = NULL; + + ref = _nvm_ctrl_get(ctrl); + if (ref == NULL) { - dprintf("Failed to allocate segment descriptor: %s\n", strerror(errno)); - return ENOMEM; + return ENOSPC; } - status = _nvm_device_memory_get(s, dev, adapter, segno, shared ? SCI_FLAG_SHARED : SCI_FLAG_PRIVATE); - if (status != 0) + err = _nvm_connect_device_memory(&segment, ref->device, id, memtype); + if (err != 0) { - free(s); - return status; + _nvm_ctrl_put(ref); + return err; + } + + s = (struct segment*) malloc(sizeof(struct segment)); + if (s == NULL) + { + _nvm_disconnect_device_memory(&segment); + _nvm_ctrl_put(ref); + dprintf("Failed to allocate segment handle: %s\n", strerror(errno)); + return errno; } - *seg = s; + s->ctrl = ref; + map = &s->map; + map->type = SEGMENT_TYPE_DEVICE; + map->lseg = NULL; + map->rseg = segment; + map->adapter = -1; + map->size = NVM_CTRL_ALIGN(ctrl, SCIGetRemoteSegmentSize(segment)); + map->md = NULL; + map->range = VA_RANGE_INIT(NULL, map->size, 1); + + *md = map; return 0; } -/* - * Helper function to allocate a local segment descriptor. - */ -static int create_local_segment(struct local_memory** seg, uint32_t id, size_t size, void* dptr) +#ifdef _CUDA +static int create_physical_segment(struct map** md, const nvm_ctrl_t* ctrl, uint32_t adapter, void* ptr, size_t size) { - int status = 0; - *seg = NULL; + sci_error_t err; + struct segment* s; + struct map* map; + sci_local_segment_t segment; + struct controller* ref; - struct local_memory* s = (struct local_memory*) malloc(sizeof(struct local_memory)); - if (s == NULL) + *md = NULL; + + ref = _nvm_ctrl_get(ctrl); + if (ref == NULL) { - dprintf("Failed to allocate segment descriptor: %s\n", strerror(errno)); - return ENOMEM; + return ENOSPC; } - if (dptr == NULL) + SCICreateSegment(ref->device->sd, &segment, 0, size, NULL, NULL, SCI_FLAG_EMPTY | SCI_FLAG_AUTO_ID, &err); + if (err != SCI_ERR_OK) { - status = _nvm_local_memory_get(s, id, size); + _nvm_ctrl_put(ref); + dprintf("Failed to create physical segment: %s\n", _SCIGetErrorString(err)); + return EIO; } - else + + SCIAttachPhysicalMemory(0, ptr, 0, size, segment, SCI_FLAG_CUDA_BUFFER, &err); + if (err != SCI_ERR_OK) { - status = _nvm_local_memory_get_attached(s, id, dptr, size); + dprintf("Failed to attach physical memory: %s\n", _SCIGetErrorString(err)); + SCIRemoveSegment(segment, 0, &err); + _nvm_ctrl_put(ref); + return EIO; } - if (status != 0) + SCIPrepareSegment(segment, adapter, 0, &err); + if (err != SCI_ERR_OK) { - free(s); - return status; + dprintf("Failed to prepare physical segment: %s\n", _SCIGetErrorString(err)); + SCIRemoveSegment(segment, 0, &err); + _nvm_ctrl_put(ref); + return EIO; + } + + s = (struct segment*) malloc(sizeof(struct segment)); + if (s == NULL) + { + dprintf("Failed to allocate segment handle: %s\n", strerror(errno)); + SCIRemoveSegment(segment, 0, &err); + _nvm_ctrl_put(ref); + return errno; } - *seg = s; + s->ctrl = ref; + map = &s->map; + map->type = SEGMENT_TYPE_PHYSICAL; + map->lseg = segment; + map->rseg = NULL; + map->adapter = adapter; + map->size = size; + map->mapped = false; + map->md = NULL; + map->range = VA_RANGE_INIT(NULL, map->size, 1); + + *md = map; return 0; } +#endif /* - * Helper function to unmap and remove local segment. + * Map local for device segment. */ -static void remove_local_segment(struct map_descriptor* md) +int nvm_dis_dma_map_local(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, sci_local_segment_t lseg, uint32_t adapter, bool map_va) { - if (md != NULL) + int status; + struct map* m; + + *handle = NULL; + + status = create_map(&m, ctrl, SEGMENT_TYPE_LOCAL, SCIGetLocalSegmentSize(lseg)); + if (status != 0) { - if (md->segment != NULL) - { - struct local_memory* segment = (struct local_memory*) md->segment; + return status; + } + + m->adapter = adapter; + m->lseg = lseg; - _nvm_va_unmap(&md->va_mapping); - _nvm_io_unmap(&md->io_mapping); - _nvm_local_memory_put(segment); - free(segment); + if (map_va) + { + status = va_map(m, true, true); + if (status != 0) + { + remove_map(m); + return status; } + } - _nvm_device_put(&md->device); - free(md); + status = _nvm_dma_init(handle, ctrl, &m->range, false, &release_map); + if (status != 0) + { + remove_map(m); + return status; } + + return 0; } /* - * Create local segment and map it. + * Map remote segment for device. */ -int nvm_dis_dma_create(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, uint32_t adapter, uint32_t id, size_t size) +int nvm_dis_dma_map_remote(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, sci_remote_segment_t rseg, bool map_va, bool map_wc) { - struct map_descriptor* md; + int status; + struct map* m; + *handle = NULL; - size = NVM_CTRL_ALIGN(ctrl, size); - // Create mapping descriptor - int err = create_map_descriptor(&md, ctrl, size); - if (err != 0) + if (map_wc && !map_va) { - return err; + return EINVAL; } - // Create local segment - // FIXME: When support is added in SISCI for SCI_FLAG_PRIVATE, remove id argument - err = create_local_segment((struct local_memory**) &md->segment, id, size, NULL); - if (err != 0) + status = create_map(&m, ctrl, SEGMENT_TYPE_REMOTE, SCIGetRemoteSegmentSize(rseg)); + if (status != 0) { - remove_map_descriptor(md); - return err; + return status; } - // Map segment for device - struct local_memory* sd = (struct local_memory*) md->segment; - err = _nvm_io_map_local_memory(&md->io_mapping, &md->device, sd, adapter); - if (err != 0) - { - remove_local_segment(md); - return err; - } + m->rseg = rseg; - // Map segment into virtual memory - err = _nvm_va_map_local(&md->va_mapping, size, sd->segment); - if (err != 0) + if (map_va) { - remove_local_segment(md); - return err; + status = va_map(m, true, map_wc); + if (status != 0) + { + remove_map(m); + return status; + } } - md->dma_mapping.vaddr = (void*) md->va_mapping.vaddr; - // Create handle container - err = _nvm_dma_create(handle, ctrl, (struct dma_map*) md, (dma_map_free_t) remove_local_segment); - if (err != 0) + status = _nvm_dma_init(handle, ctrl, &m->range, true, &release_map); + if (status != 0) { - remove_local_segment(md); - return err; + remove_map(m); + return status; } - // Initialize DMA handle - _nvm_dma_handle_populate(*handle, ctrl, (uint64_t*) &md->io_mapping.ioaddr); - return 0; } -int nvm_dis_dma_map_host(nvm_dma_t** map, const nvm_ctrl_t* ctrl, uint32_t adapter, uint32_t id, void* vaddr, size_t size) +int nvm_dis_dma_map_host(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, uint32_t adapter, void* vaddr, size_t size) { - // TODO: add support for _nvm_local_memory_get_registered similar to _nvm_local_memory_get_attached in create_segment() dprintf("Function not implemented\n"); return ENOTSUP; } @@ -386,48 +423,29 @@ int nvm_dis_dma_map_host(nvm_dma_t** map, const nvm_ctrl_t* ctrl, uint32_t adapt /* * Map CUDA device memory for device. */ -int nvm_dis_dma_map_device(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, uint32_t adapter, uint32_t id, void* dptr, size_t size) +int nvm_dis_dma_map_device(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, uint32_t adapter, void* dptr, size_t size) { - struct map_descriptor* md; - *handle = NULL; - //size = NVM_CTRL_ALIGN(ctrl, size); + int status; + struct map* m; + size = NVM_PAGE_ALIGN(size, (1ULL << 16)); + *handle = NULL; - // Create mapping descriptor - int err = create_map_descriptor(&md, ctrl, size); - if (err != 0) - { - return err; - } - - // Create local segment - err = create_local_segment((struct local_memory**) &md->segment, id, size, dptr); - if (err != 0) - { - remove_map_descriptor(md); - return err; - } - - // Map segment for device - struct local_memory* sd = (struct local_memory*) md->segment; - err = _nvm_io_map_local_memory(&md->io_mapping, &md->device, sd, adapter); - if (err != 0) + status = create_physical_segment(&m, ctrl, adapter, dptr, size); + if (status != 0) { - remove_local_segment(md); - return err; + return status; } - // Create handle container - err = _nvm_dma_create(handle, ctrl, (struct dma_map*) md, (dma_map_free_t) remove_local_segment); - if (err != 0) + // TODO: va_map() when supported by SISCI + + status = _nvm_dma_init(handle, ctrl, &m->range, false, &release_map); + if (status != 0) { - remove_local_segment(md); - return err; + remove_map(m); + return status; } - // Initialize DMA handle - _nvm_dma_handle_populate(*handle, ctrl, (uint64_t*) &md->io_mapping.ioaddr); - return 0; } #endif @@ -435,84 +453,76 @@ int nvm_dis_dma_map_device(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, uint32_t /* - * Helper function to unmap and remove local segment. + * Connect to device memory. */ -static void disconnect_remote_segment(struct map_descriptor* md) +int nvm_dis_dma_connect(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, uint32_t id, bool shared) { - if (md != NULL) + int status; + struct map* m; + unsigned int memtype = shared ? SCI_MEMTYPE_SHARED : SCI_MEMTYPE_PRIVATE; + + *handle = NULL; + + status = connect_device_segment(&m, ctrl, id, memtype); + if (status != 0) { - if (md->segment != NULL) - { - struct device_memory* segment = (struct device_memory*) md->segment; + return status; + } - _nvm_va_unmap(&md->va_mapping); - _nvm_io_unmap(&md->io_mapping); - _nvm_device_memory_put(segment); - free(segment); - } + status = va_map(m, true, true); + if (status != 0) + { + remove_map(m); + return status; + } - _nvm_device_put(&md->device); - free(md); + status = _nvm_dma_init(handle, ctrl, &m->range, true, &release_map); + if (status != 0) + { + remove_map(m); + return status; } + + return 0; } /* - * Connect to device memory. + * Create device memory. */ -int nvm_dis_dma_connect(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, uint32_t adapter, uint32_t segno, size_t size, bool shared) +int nvm_dis_dma_create(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, uint32_t id, size_t size, bool shared, unsigned int hints) { - struct map_descriptor* md; + sci_error_t err; + unsigned int flags = 0; + struct controller* ref; + unsigned int memtype = shared ? SCI_MEMTYPE_SHARED : SCI_MEMTYPE_PRIVATE; + *handle = NULL; size = NVM_CTRL_ALIGN(ctrl, size); - // Create mapping descriptor - int err = create_map_descriptor(&md, ctrl, size); - if (err != 0) + if (hints == 0) { - return err; + hints = SCI_MEMACCESS_HOST_READ | SCI_MEMACCESS_HOST_WRITE | SCI_MEMACCESS_DEVICE_READ | SCI_MEMACCESS_DEVICE_WRITE; + flags = SCI_FLAG_LOCAL_ONLY; } - const struct device* dev = _nvm_device_from_ctrl(ctrl); - - // Create local segment - err = connect_remote_segment((struct device_memory**) &md->segment, dev, adapter, segno, shared); - if (err != 0) + ref = _nvm_ctrl_get(ctrl); + if (ref == NULL) { - remove_map_descriptor(md); - return err; + return ENOTTY; } - // Map segment for device - struct device_memory* sd = (struct device_memory*) md->segment; - err = _nvm_io_map_remote(&md->io_mapping, md->device.device, sd->segment); - if (err != 0) - { - disconnect_remote_segment(md); - return err; - } + SCICreateDeviceSegment(ref->device->device, id, size, memtype, hints, flags, &err); - // Map segment into virtual memory - err = _nvm_va_map_remote(&md->va_mapping, size, sd->segment, true, true); - if (err != 0) - { - disconnect_remote_segment(md); - return err; - } - md->dma_mapping.vaddr = (void*) md->va_mapping.vaddr; + _nvm_ctrl_put(ref); - // Create handle container - err = _nvm_dma_create(handle, ctrl, (struct dma_map*) md, (dma_map_free_t) disconnect_remote_segment); - if (err != 0) + if (err != SCI_ERR_OK) { - disconnect_remote_segment(md); - return err; + dprintf("Failed to create device segment: %s\n", _SCIGetErrorString(err)); + return EIO; } - // Initialize DMA handle - _nvm_dma_handle_populate(*handle, ctrl, (uint64_t*) &md->io_mapping.ioaddr); - - return 0; + return nvm_dis_dma_connect(handle, ctrl, id, shared); } diff --git a/src/dis/interrupt.h b/src/dis/interrupt.h index 24b0ca7f..8c357a1c 100644 --- a/src/dis/interrupt.h +++ b/src/dis/interrupt.h @@ -1,11 +1,5 @@ #ifndef __NVM_INTERNAL_DIS_INTERRUPT_H__ #define __NVM_INTERNAL_DIS_INTERRUPT_H__ - -/* Forward declarations */ -struct local_intr; -struct remote_intr; - - #ifdef _SISCI /* Make sure everything is defined as needed */ @@ -20,6 +14,13 @@ struct remote_intr; #include + +/* Forward declarations */ +struct local_intr; +struct remote_intr; + + + /* * Interrupt callback. */ diff --git a/src/dis/local.c b/src/dis/local.c deleted file mode 100644 index e28a65a0..00000000 --- a/src/dis/local.c +++ /dev/null @@ -1,171 +0,0 @@ -#ifndef _SISCI -#error "Must compile with SISCI support" -#endif - -#ifndef __DIS_CLUSTER__ -#define __DIS_CLUSTER__ -#endif - -#include -#include -#include -#include -#include "dis/local.h" -#include "dprintf.h" -#include -#include -#include - - - -/* - * Helper function to create a local segment. - */ -static int create_segment(struct local_memory* m, uint32_t id, size_t size, uint32_t flags) -{ - sci_error_t err; - sci_error_t status = SCI_ERR_OK; - - m->id = id; - m->size = size; - m->flags = flags; - - SCIOpen(&m->sd, 0, &err); - if (err != SCI_ERR_OK) - { - dprintf("Failed to open SISCI descriptor: %s\n", _SCIGetErrorString(err)); - return EIO; - } - - SCICreateSegment(m->sd, &m->segment, id, size, NULL, NULL, flags, &err); - if (err != SCI_ERR_OK) - { - status = err; - SCIClose(m->sd, 0, &err); - } - - switch (status) - { - case SCI_ERR_OK: - return 0; - - case SCI_ERR_SEGMENTID_USED: - return EEXIST; - - default: - dprintf("Unknown error while creating local segment: %s\n", _SCIGetErrorString(status)); - return EIO; - } -} - - - -void _nvm_local_memory_put(struct local_memory* mem) -{ - sci_error_t err = SCI_ERR_OK; - - do - { - SCIRemoveSegment(mem->segment, 0, &err); - } - while (err == SCI_ERR_BUSY); - -#ifndef NDEBUG - if (err != SCI_ERR_OK) - { - dprintf("Unknown error while removing local segment: %s\n", _SCIGetErrorString(err)); - } -#endif -} - - - -int _nvm_local_memory_get(struct local_memory* mem, uint32_t id, size_t size) -{ - int err; - - // FIXME: When support is added, remove id argument and pass SCI_FLAG_PRIVATE - err = create_segment(mem, id, size, 0); - //err = create_segment(mem, id, size, SCI_FLAG_PRIVATE); - if (err != 0) - { - return err; - } - - return 0; -} - - - -int _nvm_local_memory_get_attached(struct local_memory* mem, uint32_t id, void* ptr, size_t size) -{ - sci_error_t err; - int status; - - // FIXME: When support is added, use SCI_FLAG_PRIVATE - //status = create_segment(mem, id, size, SCI_FLAG_EMPTY | SCI_FLAG_PRIVATE); - status = create_segment(mem, id, size, SCI_FLAG_EMPTY); - if (status != 0) - { - return status; - } - - SCIAttachPhysicalMemory(0, ptr, 0, size, mem->segment, SCI_FLAG_CUDA_BUFFER, &err); - if (err != SCI_ERR_OK) - { - _nvm_local_memory_put(mem); - dprintf("Failed to attach memory to local segment: %s\n", _SCIGetErrorString(err)); - return EIO; - } - - return 0; -} - - - -int _nvm_local_memory_get_registered(struct local_memory* mem, uint32_t id, void* ptr, size_t size) -{ - sci_error_t err; - int status; - - // FIXME: Add support for SCI_FLAG_PRIVATE - status = create_segment(mem, id, size, SCI_FLAG_EMPTY /* | SCI_FLAG_PRIVATE */); - if (status != 0) - { - return status; - } - - SCIRegisterSegmentMemory(ptr, size, mem->segment, SCI_FLAG_LOCK_USER_MEM, &err); - - switch (err) - { - case SCI_ERR_OK: - // Do nothing - break; - - case SCI_ERR_SIZE_ALIGNMENT: - _nvm_local_memory_put(mem); - return EINVAL; - - case SCI_ERR_OUT_OF_RANGE: - _nvm_local_memory_put(mem); - return ERANGE; - - case SCI_ERR_ILLEGAL_ADDRESS: - _nvm_local_memory_put(mem); - return EPERM; - - case SCI_ERR_NOT_SUPPORTED: - _nvm_local_memory_put(mem); - dprintf("Function is not supported on platform\n"); - return ENOTSUP; - - default: - _nvm_local_memory_put(mem); - dprintf("Unknown error when registering host memory: %s\n", _SCIGetErrorString(err)); - return EIO; - } - - return 0; -} - diff --git a/src/dis/local.h b/src/dis/local.h deleted file mode 100644 index 38b9815d..00000000 --- a/src/dis/local.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef __NVM_INTERNAL_DIS_LOCAL_MEMORY_H__ -#define __NVM_INTERNAL_DIS_LOCAL_MEMORY_H__ - -/* Forward declarations */ -struct local_memory; - - -#ifdef _SISCI - -/* Make sure everything is defined as needed */ -#ifndef __DIS_CLUSTER__ -#define __DIS_CLUSTER__ -#endif - -/* Necessary includes */ -#include -#include -#include - - - -/* - * Local memory descriptor. - * - * Describes a local memory segment and its mapping into address space. - */ -struct local_memory -{ - sci_desc_t sd; // SISCI virtual device descriptor - sci_local_segment_t segment; // SISCI local segment descriptor - uint32_t id; // Local segment identifier - size_t size; // Size of local memory - uint32_t flags; // SISCI flags used when creating segment -}; - - - -/* - * Create local segment and map it into local address space. - */ -int _nvm_local_memory_get(struct local_memory* mem, uint32_t id, size_t size); - - - -/* - * Create local segment with attached memory. - * This function will NOT map segment into address space. - */ -int _nvm_local_memory_get_attached(struct local_memory* mem, uint32_t id, void* ptr, size_t size); - - - -/* - * Create local segment with registered host memory. - * This function will not map segment into address space (as it is already mapped). - */ -int _nvm_local_memory_get_registered(struct local_memory* mem, uint32_t id, void* ptr, size_t size); - - -/* - * Remove local segment. - */ -void _nvm_local_memory_put(struct local_memory* mem); - - - -#endif /* _SISCI */ -#endif /* __NVM_INTERNAL_DIS_LOCAL_MEMORY_H__ */ diff --git a/src/dis/map.c b/src/dis/map.c deleted file mode 100644 index 988c7313..00000000 --- a/src/dis/map.c +++ /dev/null @@ -1,247 +0,0 @@ -#ifndef _SISCI -#error "Must compile with SISCI support" -#endif - -#ifndef __DIS_CLUSTER__ -#define __DIS_CLUSTER__ -#endif - -#include -#include -#include -#include -#include "dis/map.h" -#include "dis/local.h" -#include "dis/device.h" -#include "dprintf.h" -#include -#include -#include - - - -int _nvm_va_map_local(struct va_map* m, size_t size, sci_local_segment_t segment) -{ - sci_error_t err = SCI_ERR_OK; - - m->type = _VA_MAP_NOT_MAPPED; - - m->vaddr = SCIMapLocalSegment(segment, &m->md, 0, size, NULL, 0, &err); - - switch (err) - { - case SCI_ERR_OK: - m->type = _VA_MAP_MAPPED_LOCAL; - return 0; - - case SCI_ERR_OUT_OF_RANGE: - case SCI_ERR_SIZE_ALIGNMENT: - case SCI_ERR_OFFSET_ALIGNMENT: - return EINVAL; - - default: - dprintf("Mapping segment into virtual address space failed: %s\n", _SCIGetErrorString(err)); - return EIO; - } -} - - -int _nvm_va_map_remote(struct va_map* m, size_t size, sci_remote_segment_t segment, bool write, bool wc) -{ - sci_error_t err = SCI_ERR_OK; - uint32_t flags = 0; - - flags |= !write ? SCI_FLAG_READONLY_MAP : 0; - flags |= !wc ? SCI_FLAG_IO_MAP_IOSPACE : 0; - - m->type = _VA_MAP_NOT_MAPPED; - - m->vaddr = SCIMapRemoteSegment(segment, &m->md, 0, size, NULL, flags, &err); - - switch (err) - { - case SCI_ERR_OK: - m->type = _VA_MAP_MAPPED_REMOTE; - return 0; - - case SCI_ERR_OUT_OF_RANGE: - case SCI_ERR_SIZE_ALIGNMENT: - case SCI_ERR_OFFSET_ALIGNMENT: - return EINVAL; - - case SCI_ERR_NOT_CONNECTED: - return ENOTCONN; - - default: - dprintf("Mapping segment into virtual address space failed: %s\n", _SCIGetErrorString(err)); - return EIO; - } -} - - - -void _nvm_va_unmap(struct va_map* m) -{ - sci_error_t err = SCI_ERR_OK; - - switch (m->type) - { - case _VA_MAP_MAPPED_LOCAL: - case _VA_MAP_MAPPED_REMOTE: - do - { - SCIUnmapSegment(m->md, 0, &err); - } - while (err == SCI_ERR_BUSY); - break; - - case _VA_MAP_NOT_MAPPED: - // Do nothing - break; - - default: - dprintf("Unknown mapping type\n"); - break; - } - - m->type = _VA_MAP_NOT_MAPPED; - -#ifndef NDEBUG - if (err != SCI_ERR_OK) - { - dprintf("Unmapping segment from virtual address space failed: %s\n", _SCIGetErrorString(err)); - } -#endif -} - - - -int _nvm_io_map_local(struct io_map* m, sci_device_t device, sci_local_segment_t segment, uint32_t adapter) -{ - sci_error_t err = SCI_ERR_OK; - - m->type = _IO_MAP_NOT_MAPPED; - m->lsegment = segment; - m->rsegment = NULL; - m->device = device; - m->adapter = adapter; - m->ioaddr = 0; - - // FIXME: Remove this call when Lars adds support for segments created with SCI_FLAG_PRIVATE - SCISetSegmentAvailable(m->lsegment, m->adapter, 0, &err); - switch (err) - { - case SCI_ERR_OK: - break; - - case SCI_ERR_ILLEGAL_OPERATION: - case SCI_ERR_SEGMENT_NOT_PREPARED: - return EINVAL; - - default: - dprintf("Failed to set segment available: %s\n", _SCIGetErrorString(err)); - return EIO; - } - - SCIMapLocalSegmentForDevice(m->lsegment, m->adapter, m->device, &m->ioaddr, 0, 0, &err); - if (err != SCI_ERR_OK) - { - dprintf("Failed to map segment for device: %s\n", _SCIGetErrorString(err)); - return EIO; - } - - m->type = _IO_MAP_MAPPED_LOCAL; - return 0; -} - - - -int _nvm_io_map_remote(struct io_map* m, sci_device_t device, sci_remote_segment_t segment) -{ - sci_error_t err = SCI_ERR_OK; - - m->type = _IO_MAP_NOT_MAPPED; - m->lsegment = NULL; - m->rsegment = segment; - m->device = device; - m->adapter = 0; - m->ioaddr = 0; - - SCIMapRemoteSegmentForDevice(m->rsegment, m->device, &m->ioaddr, 0, 0, &err); - - if (err != SCI_ERR_OK) - { - dprintf("Failed to map segment for device: %s\n", _SCIGetErrorString(err)); - return EIO; - } - - m->type = _IO_MAP_MAPPED_REMOTE; - return 0; -} - - - -void _nvm_io_unmap(struct io_map* m) -{ - sci_error_t err = SCI_ERR_OK; - - switch (m->type) - { - case _IO_MAP_NOT_MAPPED: - // Do nothing - break; - - case _IO_MAP_MAPPED_LOCAL: - do - { - SCIUnmapLocalSegmentForDevice(m->lsegment, m->adapter, m->device, 0, &err); - } - while (err == SCI_ERR_BUSY); - break; - - case _IO_MAP_MAPPED_REMOTE: - do - { - SCIUnmapRemoteSegmentForDevice(m->rsegment, m->device, 0, &err); - } - while (err == SCI_ERR_BUSY); - break; - - default: - dprintf("Unknown mapping type\n"); - break; - } - -#ifndef NDEBUG - if (err != SCI_ERR_OK) - { - dprintf("Unmapping segment for device failed: %s\n", _SCIGetErrorString(err)); - } -#endif - - m->type = _IO_MAP_NOT_MAPPED; -} - - - -int _nvm_io_map_device_memory(struct io_map* m, const struct device_memory* mem) -{ - return _nvm_io_map_remote(m, mem->device.device, mem->segment); -} - - - -int _nvm_io_map_local_memory(struct io_map* m, const struct device* dev, const struct local_memory* mem, uint32_t adapter) -{ - sci_error_t err = SCI_ERR_OK; - - SCIPrepareSegment(mem->segment, adapter, 0, &err); - if (err != SCI_ERR_OK) - { - dprintf("Failed to prepare local segment: %s\n", _SCIGetErrorString(err)); - return ENOSPC; - } - - return _nvm_io_map_local(m, dev->device, mem->segment, adapter); -} - diff --git a/src/dis/map.h b/src/dis/map.h index 6e70e059..1e65097c 100644 --- a/src/dis/map.h +++ b/src/dis/map.h @@ -1,14 +1,5 @@ -#ifndef __NVM_INTERNAL_DIS_MAPPING_H__ -#define __NVM_INTERNAL_DIS_MAPPING_H__ - -/* Forward declarations */ -struct va_map; -struct io_map; -struct device; -struct device_memory; -struct local_memory; - - +#ifndef __NVM_INTERNAL_DIS_MAP_H__ +#define __NVM_INTERNAL_DIS_MAP_H__ #ifdef _SISCI /* Make sure everything is defined as needed */ @@ -17,143 +8,39 @@ struct local_memory; #endif /* Necessary includes */ -#include -#include -#include #include - +#include "dma.h" +#include /* - * VA mapping kind. - * Indicates whether or not a mapping is in use. + * What kind of memory are we mapping. */ -enum va_map_type +enum segment_type { - _VA_MAP_NOT_MAPPED = 0x00, - _VA_MAP_MAPPED_LOCAL = 0x01, - _VA_MAP_MAPPED_REMOTE = 0x02 + SEGMENT_TYPE_LOCAL = 0x1, // Local segment + SEGMENT_TYPE_REMOTE = 0x2, // Remote segment + SEGMENT_TYPE_DEVICE = 0x3, // Device segment (requires connect/disconnect) + SEGMENT_TYPE_PHYSICAL = 0x4 // Physical memory segment }; -/* - * Virtual address mapping descriptor. - * - * Describes a mapping of a segment in local virtual address space. - */ -struct va_map -{ - enum va_map_type type; // Type of mapping - sci_map_t md; // SISCI mapping descriptor - volatile void* vaddr; // Pointer to mapped memory -}; - -#define VA_MAP_CLEAR(p) \ - do { (p)->type = _VA_MAP_NOT_MAPPED; (p)->vaddr = NULL; } while (0) - -#define VA_MAP_INIT { .type = _VA_MAP_NOT_MAPPED, .md = NULL, .vaddr = NULL } - -/* - * IO mapping kind. - * Indicates whether or not a mapping is in use. - */ -enum io_map_type -{ - _IO_MAP_NOT_MAPPED = 0x00, - _IO_MAP_MAPPED_REMOTE = 0x01, - _IO_MAP_MAPPED_LOCAL = 0x02, -}; - /* - * IO address space mapping descriptor. - * - * Describes a mapping of a segment as seen from the device. + * Describe mapping for device. */ -struct io_map +struct map { - enum io_map_type type; // Type of mapping - sci_local_segment_t lsegment; // Local segment descriptor - sci_remote_segment_t rsegment; // Remote segment descriptor - sci_device_t device; // SmartIO device the segment is mapped for - uint32_t adapter; // Local adapter - sci_ioaddr_t ioaddr; // Device-local address to segment + enum segment_type type; // Kind of segment + sci_local_segment_t lseg; // Local segment descriptor + sci_remote_segment_t rseg; // Remote segment descriptor + uint32_t adapter; // DIS adapter number + size_t size; // Segment size + bool mapped; // Is segment mapped into virtual address space + sci_map_t md; // SISCI mapping descriptor + struct va_range range; // Memory range descriptor }; -#define IO_MAP_CLEAR(p) \ - do { (p)->type = _IO_MAP_NOT_MAPPED; (p)->ioaddr = 0; } while (0) - -#define IO_MAP_INIT \ - { .type = _IO_MAP_NOT_MAPPED, .lsegment = NULL, .rsegment = NULL, .device = NULL, .adapter = 0, .ioaddr = 0 } - - - - - -/* - * Map local segment into virtual address space. - */ -int _nvm_va_map_local(struct va_map* map, size_t size, sci_local_segment_t segment); - - -/* - * Map remote segment into virtual address space. - */ -int _nvm_va_map_remote(struct va_map* map, - size_t size, - sci_remote_segment_t segment, - bool write, - bool wc); - - -/* - * Unmap segment from virtual address space. - */ -void _nvm_va_unmap(struct va_map* map); - - - -/* - * Map local segment for device. - */ -int _nvm_io_map_local(struct io_map* map, - sci_device_t device, - sci_local_segment_t segment, - uint32_t adapter); - - -/* - * Map remote segment for device. - */ -int _nvm_io_map_remote(struct io_map* map, - sci_device_t device, - sci_remote_segment_t segment); - - -/* - * Unmap segment for device. - */ -void _nvm_io_unmap(struct io_map* map); - - - -/* - * Convenience function to map device memory for device. - */ -int _nvm_io_map_device_memory(struct io_map* map, const struct device_memory* mem); - - - -/* - * Convenience function to map local segment for device. - * This will implicitly prepare segment on the specified adapter. - */ -int _nvm_io_map_local_memory(struct io_map* map, - const struct device* dev, - const struct local_memory* mem, - uint32_t adapter); - - #endif /* _SISCI */ -#endif /* __NVM_INTERNAL_DIS_MAPPING_H__ */ +#endif /* __NVM_INTERNAL_DIS_MAP_H__ */ diff --git a/src/dis/rpc.c b/src/dis/rpc.c index e806eca8..10dde845 100644 --- a/src/dis/rpc.c +++ b/src/dis/rpc.c @@ -74,7 +74,8 @@ struct __attribute__((packed)) handle_info */ struct binding_handle { - struct device_memory dmem; // Device memory reference + struct controller* ctrl; // Controller reference + sci_remote_segment_t segment; // Device memory segment nvm_aq_ref rpc_ref; // RPC reference nvm_dis_rpc_cb_t rpc_cb; // RPC callback struct local_intr intr; // Interrupt handle @@ -87,7 +88,7 @@ struct binding_handle */ struct binding { - struct device_memory dmem; // Device memory reference + sci_remote_segment_t segment; // Device memory segment struct local_intr lintr; // Local interrupt handle struct remote_intr rintr; // Remote interrupt handle }; @@ -180,23 +181,33 @@ static int remote_command(struct binding* binding, nvm_cmd_t* cmd, nvm_cpl_t* cp */ static int write_handle_info(const struct binding_handle* handle, uint32_t adapter, bool clear) { - int status = 0; - struct va_map mapping = VA_MAP_INIT; + sci_error_t err; + sci_map_t map; + const size_t size = sizeof(struct handle_info) * NVM_DIS_RPC_MAX_ADAPTER; + volatile struct handle_info* infos; - status = _nvm_va_map_remote(&mapping, sizeof(struct handle_info) * NVM_DIS_RPC_MAX_ADAPTER, - handle->dmem.segment, true, false); - if (status != 0) + infos = (volatile struct handle_info*) SCIMapRemoteSegment(handle->segment, &map, 0, size, NULL, 0, &err); + if (err != SCI_ERR_OK) { - dprintf("Failed to map shared segment: %s\n", strerror(status)); - return status; + dprintf("Failed to map shared device segment: %s\n", _SCIGetErrorString(err)); + return EIO; } - volatile struct handle_info* info = (volatile struct handle_info*) mapping.vaddr; - info[adapter].magic = clear ? 0 : 0xDEADBEEF; - info[adapter].node_id = clear ? 0 : handle->intr.node_id; - info[adapter].intr_no = clear ? 0 : handle->intr.intr_no; + struct handle_info info; + info.magic = clear ? 0 : RPC_MAGIC_SIGNATURE; + info.node_id = clear ? 0 : handle->intr.node_id; + info.intr_no = clear ? 0 : handle->intr.intr_no; + + infos[adapter] = info; + + nvm_wcb_flush(); + + do + { + SCIUnmapSegment(map, 0, &err); + } + while (err == SCI_ERR_BUSY); - _nvm_va_unmap(&mapping); return 0; } @@ -215,10 +226,10 @@ static int create_binding_handle(struct binding_handle** handle, nvm_aq_ref ref, return EINVAL; } - const struct device* dev = _nvm_device_from_ctrl(ctrl); - if (dev == NULL) + struct controller* cref = _nvm_ctrl_get(ctrl); + if (cref == NULL) { - return EINVAL; + return ENOTTY; } struct binding_handle* bh = (struct binding_handle*) malloc(sizeof(struct binding_handle)); @@ -228,9 +239,11 @@ static int create_binding_handle(struct binding_handle** handle, nvm_aq_ref ref, return ENOMEM; } - int status = _nvm_device_memory_get(&bh->dmem, dev, adapter, 0, SCI_FLAG_SHARED); + bh->ctrl = cref; + int status = _nvm_connect_device_memory(&bh->segment, bh->ctrl->device, 0, SCI_MEMTYPE_SHARED); if (status != 0) { + _nvm_ctrl_put(bh->ctrl); free(bh); return status; } @@ -241,7 +254,8 @@ static int create_binding_handle(struct binding_handle** handle, nvm_aq_ref ref, status = _nvm_local_intr_get(&bh->intr, adapter, bh, (intr_callback_t) handle_remote_command); if (status != 0) { - _nvm_device_memory_put(&bh->dmem); + _nvm_disconnect_device_memory(&bh->segment); + _nvm_ctrl_put(bh->ctrl); free(bh); return status; } @@ -249,7 +263,8 @@ static int create_binding_handle(struct binding_handle** handle, nvm_aq_ref ref, status = write_handle_info(bh, adapter, false); if (status != 0) { - _nvm_device_memory_put(&bh->dmem); + _nvm_disconnect_device_memory(&bh->segment); + _nvm_ctrl_put(bh->ctrl); free(bh); return status; } @@ -268,7 +283,9 @@ static void remove_binding_handle(struct binding_handle* handle, uint32_t adapte write_handle_info(handle, adapter, true); _nvm_local_intr_put(&handle->intr); - _nvm_device_memory_put(&handle->dmem); + _nvm_disconnect_device_memory(&handle->segment); + _nvm_ctrl_put(handle->ctrl); + free(handle); } @@ -277,19 +294,20 @@ static void remove_binding_handle(struct binding_handle* handle, uint32_t adapte /* * Helper function to try to connect to remote interrupt. */ -static int try_bind(struct binding* binding, size_t max) +static int try_bind(struct binding* binding, uint32_t adapter, size_t max) { + sci_error_t err; + sci_map_t map; + volatile const struct handle_info* info; + // Create mapping to remote shared segment - struct va_map mapping; - int err = _nvm_va_map_remote(&mapping, sizeof(struct handle_info) * NVM_DIS_RPC_MAX_ADAPTER, - binding->dmem.segment, false, false); - if (err != 0) + info = SCIMapRemoteSegment(binding->segment, &map, 0, sizeof(struct handle_info) * max, NULL, SCI_FLAG_READONLY_MAP, &err); + if (err != SCI_ERR_OK) { - return err; + dprintf("Failed to map remote segment: %s\n", _SCIGetErrorString(err)); + return EIO; } - volatile const struct handle_info* info = (volatile const struct handle_info*) mapping.vaddr; - // Iterate over exported interrupts for (size_t i = 0; i < max; ++i) { @@ -304,15 +322,14 @@ static int try_bind(struct binding* binding, size_t max) } // Attempt to connect - uint32_t adapter = binding->dmem.adapter; if (_nvm_remote_intr_get(&binding->rintr, adapter, node_id, intr_no) == 0) { - _nvm_va_unmap(&mapping); + SCIUnmapSegment(map, 0, &err); return 0; } } - _nvm_va_unmap(&mapping); + SCIUnmapSegment(map, 0, &err); //dprintf("Failed to connect to remote interrupt\n"); return ECONNREFUSED; } @@ -334,7 +351,7 @@ static int create_binding(struct binding** handle, const struct device* dev, uin return ENOMEM; } - status = _nvm_device_memory_get(&binding->dmem, dev, adapter, 0, SCI_FLAG_SHARED); + status = _nvm_connect_device_memory(&binding->segment, dev, 0, SCI_MEMTYPE_SHARED); if (status != 0) { free(binding); @@ -345,16 +362,16 @@ static int create_binding(struct binding** handle, const struct device* dev, uin status = _nvm_local_intr_get(&binding->lintr, adapter, NULL, NULL); if (status != 0) { - _nvm_device_memory_put(&binding->dmem); + _nvm_disconnect_device_memory(&binding->segment); free(binding); return status; } - status = try_bind(binding, NVM_DIS_RPC_MAX_ADAPTER); + status = try_bind(binding, adapter, NVM_DIS_RPC_MAX_ADAPTER); if (status != 0) { _nvm_local_intr_put(&binding->lintr); - _nvm_device_memory_put(&binding->dmem); + _nvm_disconnect_device_memory(&binding->segment); free(binding); return status; } @@ -373,7 +390,7 @@ static void remove_binding(struct binding* binding) { _nvm_remote_intr_put(&binding->rintr); _nvm_local_intr_put(&binding->lintr); - _nvm_device_memory_put(&binding->dmem); + _nvm_disconnect_device_memory(&binding->segment); free(binding); } @@ -419,13 +436,6 @@ int nvm_dis_rpc_bind(nvm_aq_ref* handle, const nvm_ctrl_t* ctrl, uint32_t adapte nvm_aq_ref ref; *handle = NULL; - const struct device* dev = _nvm_device_from_ctrl(ctrl); - if (dev == NULL) - { - dprintf("Could not look up device from controller\n"); - return EINVAL; - } - int err = _nvm_ref_get(&ref, ctrl); if (err != 0) { @@ -433,7 +443,7 @@ int nvm_dis_rpc_bind(nvm_aq_ref* handle, const nvm_ctrl_t* ctrl, uint32_t adapte } struct binding* binding; - err = create_binding(&binding, dev, adapter); + err = create_binding(&binding, _nvm_container_of(ctrl, struct controller, handle)->device, adapter); if (err != 0) { _nvm_ref_put(ref); diff --git a/src/dma.c b/src/dma.c index 9b3072b2..b74e9d8b 100644 --- a/src/dma.c +++ b/src/dma.c @@ -5,510 +5,454 @@ #endif #include -#include #include -#include +#include #include -#include +#include #include +#include #include #include -#include -#include -#include +#include "util.h" #include "ctrl.h" #include "dma.h" -#include "ioctl.h" -#include "util.h" -#include "regs.h" +#include "mutex.h" #include "dprintf.h" /* - * DMA handle container. - * - * Note that this structure is of variable size due to the list of pages - * at the end of the DMA handle. - */ -struct __attribute__((aligned (64))) dma -{ - struct dma_map* map; // DMA mapping descriptor - dma_map_free_t release; // Free mapping descriptor - nvm_dma_t handle; // DMA mapping handle -}; - - - - - -/* - * Memory mapping using kernel module. - * Indicates type of memory we have mapped. + * Reference counted mapping descriptor */ -enum map_type +struct map { - _MAP_TYPE_HOST = 0x01, - _MAP_TYPE_CUDA = 0x02, - _MAP_TYPE_INTERNAL = 0x03 + struct mutex lock; // Ensure exclusive access + uint32_t count; // Reference count + struct controller* ctrl; // Device reference + struct va_range* va; // Virtual address range + va_range_free_t release;// Callback for releasing address range }; /* - * Memory mapping descriptor. - * Describes memory mapped using the kernel module. + * Internal DMA handle container. + * + * Note that this structure is of variable size due to the list of pages + * at the end of the DMA handle. */ -struct ioctl_mapping +struct __attribute__((aligned (32))) container { - struct dma_map mapping; // DMA mapping descriptor - enum map_type type; // Type of memory - void* buffer; // Buffer pointer - int ioctl_fd; // File descriptor to kernel module - bool mapped; // Indicates if memory is mapped + struct map* map; // DMA mapping descriptor + nvm_dma_t handle; // User handle }; -/* Get handle container */ -#define container(m) \ - ((struct dma*) (((unsigned char*) (m)) - offsetof(struct dma, handle))) - - - /* Calculate number of controller pages */ #define n_ctrl_pages(ctrl, page_size, n_pages) \ (((page_size) * (n_pages)) / (ctrl)->page_size) -/* - * Initialize the mapping handle. - * Sets handle members and populates page address list. +/* + * Create reference counted mapping descriptor. */ -void _nvm_dma_handle_populate(nvm_dma_t* handle, const nvm_ctrl_t* ctrl, const uint64_t* ioaddrs) +static int create_map(struct map** md, const nvm_ctrl_t* ctrl, struct va_range* va, va_range_free_t release) { - const struct dma_map* md = container(handle)->map; + *md = NULL; - size_t i_page; - size_t page_size = md->page_size; - size_t ctrl_page_size = ctrl->page_size; - - // Set handle members - handle->vaddr = md->vaddr; - handle->page_size = ctrl->page_size; - handle->n_ioaddrs = n_ctrl_pages(ctrl, page_size, md->n_pages); - - // Calculate logical page addresses - for (i_page = 0; i_page < handle->n_ioaddrs; ++i_page) +#ifndef NDEBUG + if (release == NULL) { - size_t current_page = (i_page * ctrl_page_size) / page_size; - size_t offset_within_page = (i_page * ctrl_page_size) % page_size; - - handle->ioaddrs[i_page] = ioaddrs[current_page] + offset_within_page; + return EINVAL; } -} - - - -/* - * Create a DMA handle container. - * - * Need to use the controller reference to calculate from local page size - * to controller page size. - */ -int _nvm_dma_create(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, struct dma_map* md, dma_map_free_t release) -{ - *handle = NULL; - - size_t page_size = md->page_size; - size_t n_pages = md->n_pages; +#endif - // Do some sanity checking - if (page_size == 0 || n_pages == 0 || (page_size * n_pages) % ctrl->page_size != 0) + // Take device reference + struct controller* ref = _nvm_ctrl_get(ctrl); + if (ref == NULL) { - dprintf("Addresses do not align with controller pages"); - return EINVAL; + return ENOSPC; } - // Size of the handle container - size_t container_size = sizeof(struct dma) + (n_ctrl_pages(ctrl, page_size, n_pages)) * sizeof(uint64_t); + struct map* m = (struct map*) malloc(sizeof(struct map)); + if (m == NULL) + { + _nvm_ctrl_put(ref); + dprintf("Failed to allocate mapping descriptor: %s\n", strerror(errno)); + return errno; + } - // Allocate the container and set mapping - struct dma* container = (struct dma*) malloc(container_size); - if (container == NULL) + int err = _nvm_mutex_init(&m->lock); + if (err != 0) { - dprintf("Failed to allocate DMA descriptor: %s\n", strerror(errno)); - return ENOMEM; + free(m); + _nvm_ctrl_put(ref); + return err; } - container->map = md; - container->release = release; - container->handle.vaddr = NULL; - container->handle.page_size = 0; - container->handle.n_ioaddrs = 0; + m->count = 1; + m->ctrl = ref; + m->va = va; + m->release = release; - *handle = &container->handle; + *md = m; return 0; } /* - * Free DMA handle. + * Release mapping descriptor. */ -void _nvm_dma_remove(nvm_dma_t* handle) +static void remove_map(struct map* md) { - struct dma* dma = container(handle); - - if (dma->release != NULL) - { - dma->release(dma->map); - } - free(dma); + _nvm_ctrl_put(md->ctrl); + free(md); } /* - * Create DMA mapping descriptor from physical/bus addresses. + * Helper function to initialize DMA handle members and + * populate bus address list. */ -int nvm_dma_map(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, void* vaddr, size_t page_size, size_t n_pages, const uint64_t* ioaddrs) +static void populate_handle(nvm_dma_t* handle, const struct va_range* va, const nvm_ctrl_t* ctrl, const uint64_t* ioaddrs) { - int status; - *handle = NULL; + size_t i_page; + size_t page_size = va->page_size; + + // Set handle members + handle->vaddr = va->vaddr; + handle->page_size = ctrl->page_size; + handle->n_ioaddrs = n_ctrl_pages(ctrl, page_size, va->n_pages); - struct dma_map* map = malloc(sizeof(struct dma_map)); - if (map == NULL) + // Calculate logical page addresses + handle->contiguous = true; + for (i_page = 0; i_page < handle->n_ioaddrs; ++i_page) { - dprintf("Failed to allocate mapping descriptor: %s\n", strerror(errno)); - return ENOMEM; - } + size_t current_page = (i_page * handle->page_size) / page_size; + size_t offset_within_page = (i_page * handle->page_size) % page_size; - map->vaddr = vaddr; - map->page_size = page_size; - map->n_pages = n_pages; + handle->ioaddrs[i_page] = ioaddrs[current_page] + offset_within_page; - status = _nvm_dma_create(handle, ctrl, map, (dma_map_free_t) free); - if (status != 0) - { - free(map); - return status; + if (i_page > 0 && handle->ioaddrs[i_page - 1] != handle->ioaddrs[i_page] + handle->page_size) + { + handle->contiguous = false; + } } - - _nvm_dma_handle_populate(*handle, ctrl, ioaddrs); - return 0; } /* - * Remove DMA mapping descriptor. + * Helper function to map an address range for device and populate DMA handle. */ -void nvm_dma_unmap(nvm_dma_t* handle) +static int dma_map(struct container* container) { - if (handle != NULL) + struct map* md = container->map; + + // Check if mapping is supported + if (md->ctrl->ops.map_range == NULL) { - _nvm_dma_remove(handle); + return EINVAL; } -} + // Allocate list of bus addresses + uint64_t* ioaddrs = (uint64_t*) calloc(md->va->n_pages, sizeof(uint64_t)); + if (ioaddrs == NULL) + { + return ENOMEM; + } + int err = md->ctrl->ops.map_range(md->ctrl->device, md->va, ioaddrs); + if (err != 0) + { + free(ioaddrs); + return err; + } + + populate_handle(&container->handle, md->va, &md->ctrl->handle, ioaddrs); + free(ioaddrs); + + return 0; +} /* - * Helper function to lock pages and retrieve IO addresses for a - * virtual memory range. + * Decrease mapping descriptor reference count. */ -static int map_memory(struct ioctl_mapping* md, uint64_t* ioaddrs) +static void put_map(struct map* md) { - enum nvm_ioctl_type type; - - switch (md->type) + if (md == NULL) { - case _MAP_TYPE_INTERNAL: - case _MAP_TYPE_HOST: - type = NVM_MAP_HOST_MEMORY; - break; - -#ifdef _CUDA - case _MAP_TYPE_CUDA: - type = NVM_MAP_DEVICE_MEMORY; - break; -#endif - - default: - dprintf("Unknown memory type\n"); - return EINVAL; + return; } - struct nvm_ioctl_map request = { - .vaddr_start = (uintptr_t) md->mapping.vaddr, - .n_pages = md->mapping.n_pages, - .ioaddrs = ioaddrs - }; + _nvm_mutex_lock(&md->lock); + if (--md->count == 0) + { + if (md->ctrl->ops.unmap_range != NULL) + { + md->ctrl->ops.unmap_range(md->ctrl->device, md->va); + } + md->release(md->va); + md->va = NULL; + } + _nvm_mutex_unlock(&md->lock); - int err = ioctl(md->ioctl_fd, type, &request); - if (err < 0) + if (md->va == NULL) { - dprintf("Page mapping kernel request failed: %s\n", strerror(errno)); - return errno; + remove_map(md); } - - md->mapped = true; - return 0; } /* - * Release locked pages. + * Increase mapping descriptor reference count. */ -static int unmap_memory(const struct ioctl_mapping* md) +static int get_map(struct map* md) { - uint64_t addr = (uintptr_t) md->mapping.vaddr; + int err; - int err = ioctl(md->ioctl_fd, NVM_UNMAP_MEMORY, &addr); + if (md == NULL) + { + return EINVAL; + } - if (err < 0) + err = _nvm_mutex_lock(&md->lock); + if (err != 0) { - dprintf("Page unmapping kernel request failed: %s\n", strerror(errno)); - return errno; + dprintf("Failed to take map reference lock: %s\n", strerror(err)); + return err; } + ++md->count; + _nvm_mutex_unlock(&md->lock); + return 0; } -static int create_mapping(struct ioctl_mapping** handle, enum map_type type, int fd, void* vaddr, size_t size) +/* + * Create a DMA handle container. + * This function assumes that the device reference has already been increased. + */ +static int create_container(struct container** container, struct map* md, bool remote) { - size_t page_size = 0; + *container = NULL; + const struct controller* ctrl = md->ctrl; + + size_t page_size = md->va->page_size; + size_t n_pages = md->va->n_pages; - switch (type) + // Do some sanity checking + if (page_size == 0 || n_pages == 0 || (page_size * n_pages) % ctrl->handle.page_size != 0) { - case _MAP_TYPE_HOST: - case _MAP_TYPE_INTERNAL: - page_size = _nvm_host_page_size(); - break; - -#ifdef _CUDA - case _MAP_TYPE_CUDA: - page_size = (1ULL << 16); - break; -#endif - - default: - dprintf("Unknown memory type\n"); - return EINVAL; + dprintf("Addresses do not align with controller pages\n"); + return EINVAL; } - size_t n_pages = NVM_PAGE_ALIGN(size, page_size) / page_size; - - struct ioctl_mapping* md = malloc(sizeof(struct ioctl_mapping)); + // Size of the handle container + size_t container_size = sizeof(struct container) + (n_ctrl_pages(&ctrl->handle, page_size, n_pages)) * sizeof(uint64_t); - if (md == NULL) + // Allocate the container and set mapping + *container = (struct container*) malloc(container_size); + if (*container == NULL) { - dprintf("Failed to allocate mapping descriptor: %s\n", strerror(errno)); + dprintf("Failed to allocate DMA descriptor: %s\n", strerror(errno)); return ENOMEM; } - md->mapping.vaddr = vaddr; - md->mapping.page_size = page_size; - md->mapping.n_pages = n_pages; - md->type = type; - md->mapped = false; - md->buffer = NULL; - - if (type == _MAP_TYPE_INTERNAL) - { - md->buffer = vaddr; - } + (*container)->map = md; + (*container)->handle.contiguous = true; + (*container)->handle.local = !remote; - md->ioctl_fd = dup(fd); - if (md->ioctl_fd < 0) - { - dprintf("Failed to duplicate file descriptor: %s\n", strerror(errno)); - free(md); - return EBADF; - } - - *handle = md; return 0; } /* - * Remove mapping descriptor. + * Call release callback and free container. + * NB! unmap (if map was successful) must be called + * before calling this function. */ -static void remove_mapping(struct ioctl_mapping* md) +static void remove_container(struct container* container) { - if (md->mapped) - { - unmap_memory(md); - } - close(md->ioctl_fd); - free(md->buffer); - free(md); + put_map(container->map); + container->map = NULL; + free(container); } /* - * Helper function to map an address range and initialize DMA handle. + * Create and initialize a DMA handle, and map a virtual address range + * for the controller. */ -static int populate_handle(struct dma* container, const nvm_ctrl_t* ctrl) +int _nvm_dma_init(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, struct va_range* va, bool remote, va_range_free_t release) { - uint64_t* ioaddrs = calloc(container->map->n_pages, sizeof(uint64_t)); - if (ioaddrs == NULL) - { - return ENOMEM; - } - - int err = map_memory((struct ioctl_mapping*) container->map, ioaddrs); - if (err != 0) - { - free(ioaddrs); - return err; - } - - _nvm_dma_handle_populate(&container->handle, ctrl, ioaddrs); - free(ioaddrs); - - return 0; -} - - - - -#if defined ( __unix__ ) -int nvm_dma_create(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, size_t size) -{ - void* buffer; - struct ioctl_mapping* md; - size = NVM_CTRL_ALIGN(ctrl, size); - *handle = NULL; - int fd = _nvm_fd_from_ctrl(ctrl); - if (fd < 0) - { - return EBADF; - } + struct map* map; + struct container* container; - int err = posix_memalign(&buffer, ctrl->page_size, size); - if (err != 0) + if (release == NULL) { - dprintf("Faile to allocate page-aligned buffer: %s\n", strerror(err)); - return err; + return EINVAL; } - err = create_mapping(&md, _MAP_TYPE_INTERNAL, fd, buffer, size); + // Create mapping descriptor + int err = create_map(&map, ctrl, va, release); if (err != 0) { - free(buffer); return err; } - - err = _nvm_dma_create(handle, ctrl, (struct dma_map*) md, (dma_map_free_t) remove_mapping); + + // Create DMA handle container + err = create_container(&container, map, remote); if (err != 0) { - remove_mapping(md); + remove_map(map); return err; } - err = populate_handle(container(*handle), ctrl); + // Map controller for device and populate handle + err = dma_map(container); if (err != 0) { - remove_mapping(md); - *handle = NULL; + remove_map(map); + free(container); return err; } + *handle = &container->handle; return 0; } -#endif /* - * Create DMA mapping descriptor from virtual address using kernel module. + * Create DMA mapping descriptor from user supplied physical/bus addresses. */ -int nvm_dma_map_host(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, void* vaddr, size_t size) +int nvm_raw_dma_map(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, void* vaddr, size_t page_size, size_t n_pages, const uint64_t* ioaddrs) { - struct ioctl_mapping* md; + int status; + struct map* map; + struct container* container; + struct va_range* va; *handle = NULL; - int fd = _nvm_fd_from_ctrl(ctrl); - if (fd < 0) + if (ioaddrs == NULL) { - return EBADF; + return EINVAL; } - int err = create_mapping(&md, _MAP_TYPE_HOST, fd, vaddr, size); - if (err != 0) + // Create virtual address range descriptor + va = (struct va_range*) malloc(sizeof(struct va_range)); + if (va == NULL) { - return err; + dprintf("Failed to allocate mapping descriptor: %s\n", strerror(errno)); + return ENOMEM; } - - err = _nvm_dma_create(handle, ctrl, (struct dma_map*) md, (dma_map_free_t) remove_mapping); - if (err != 0) + + va->vaddr = (volatile void*) vaddr; + va->page_size = page_size; + va->n_pages = n_pages; + + // Create empty mapping descriptor + status = create_map(&map, ctrl, va, (va_range_free_t) &free); + if (status != 0) { - remove_mapping(md); - return err; + free(va); + return status; } - - err = populate_handle(container(*handle), ctrl); - if (err != 0) + + // Create DMA handle container + status = create_container(&container, map, false); + if (status != 0) { - remove_mapping(md); - *handle = NULL; - return err; + remove_map(map); + free(va); + return status; } + populate_handle(&container->handle, va, ctrl, ioaddrs); + *handle = &container->handle; return 0; } - -#ifdef _CUDA -int nvm_dma_map_device(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, void* devptr, size_t size) +/* + * Create DMA mapping descriptor from other descriptor. + */ +int nvm_dma_remap(nvm_dma_t** handle, const nvm_dma_t* other) { - struct ioctl_mapping* md; - + int status; + struct container* container; + struct map* map = _nvm_container_of(other, struct container, handle)->map; + struct va_range va; + *handle = NULL; - int fd = _nvm_fd_from_ctrl(ctrl); - if (fd < 0) + // Increase mapping descriptor reference count + status = get_map(map); + if (status != 0) { - return EBADF; + return status; } - int err = create_mapping(&md, _MAP_TYPE_CUDA, fd, devptr, size); - if (err != 0) + // Create DMA handle container + status = create_container(&container, map, !other->local); + if (status != 0) { - return err; + put_map(map); // Will implicitly release device reference + return status; } - err = _nvm_dma_create(handle, ctrl, (struct dma_map*) md, (dma_map_free_t) remove_mapping); - if (err != 0) + // Hack to get list of bus addresses, since we don't want to + // actually call map again, simply increase reference count. + va.vaddr = map->va->vaddr; + va.page_size = other->page_size; + va.n_pages = other->n_ioaddrs; + + populate_handle(&container->handle, &va, &map->ctrl->handle, other->ioaddrs); + *handle = &container->handle; + + return 0; +} + + + +/* + * Remove DMA mapping descriptor. + */ +void nvm_dma_unmap(nvm_dma_t* handle) +{ + if (handle != NULL) { - remove_mapping(md); - return err; + struct container* dma = _nvm_container_of(handle, struct container, handle); + remove_container(dma); } +} - err = populate_handle(container(*handle), ctrl); - if (err != 0) + + +const nvm_ctrl_t* nvm_ctrl_from_dma(const nvm_dma_t* handle) +{ + if (handle != NULL) { - remove_mapping(md); - *handle = NULL; - return err; + const struct container* dma; + dma = _nvm_container_of(handle, struct container, handle); + return &dma->map->ctrl->handle; } - return 0; + return NULL; } -#endif diff --git a/src/dma.h b/src/dma.h index 19dc5fd1..30aaecf3 100644 --- a/src/dma.h +++ b/src/dma.h @@ -4,54 +4,47 @@ #include #include #include - -/* Forward declaration */ -struct dma_map; +#include -/* Callback type for unmapping and deleting custom mapping */ -typedef void (*dma_map_free_t)(struct dma_map* map); +/* Forward declaration */ +struct va_range; /* - * DMA mapping descriptor. - * - * This structure describes a custom address range mapping. - * Custom types should include this structure in head. + * Callback type for freeing an address range descriptor. + * Called after the range is unmapped for the device and virtual address mapping can + * be released. */ -struct __attribute__((aligned (64))) dma_map -{ - void* vaddr; // Virtual address of mapped address range - size_t page_size; // Page size of address range - size_t n_pages; // Number of pages for address range -}; +typedef void (*va_range_free_t)(struct va_range* va); /* - * Create a DMA handle container. + * Virtual address range descriptor. + * This structure describes a custom address range mapped in userspace. */ -int _nvm_dma_create(nvm_dma_t** handle, - const nvm_ctrl_t* ctrl, - struct dma_map* map, - dma_map_free_t release); - +struct va_range +{ + volatile void* vaddr; // Virtual address of mapped address range + size_t page_size; // Alignment of mapping (page size) + size_t n_pages; // Number of pages for address range +}; -/* - * Initialize DMA handle. - */ -void _nvm_dma_handle_populate(nvm_dma_t* handle, - const nvm_ctrl_t* ctrl, - const uint64_t* ioaddrs); +#define VA_RANGE_INIT(vaddr, page_size, n_pages) \ + (struct va_range) {(vaddr), (page_size), (n_pages)} /* - * Invoke release callback and remove a DMA handle container. + * Map address range for a controller and create and initialize a DMA handle. */ -void _nvm_dma_remove(nvm_dma_t* handle); - +int _nvm_dma_init(nvm_dma_t** handle, + const nvm_ctrl_t* ctrl, + struct va_range* va, + bool remote, + va_range_free_t release); #endif /* __NVM_INTERNAL_DMA_H__ */ diff --git a/src/linux/device.c b/src/linux/device.c new file mode 100644 index 00000000..6d5c217a --- /dev/null +++ b/src/linux/device.c @@ -0,0 +1,167 @@ +#ifndef __linux__ +#error "Must compile for Linux" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "linux/map.h" +#include "linux/ioctl.h" +#include "ctrl.h" +#include "dprintf.h" + + + +/* + * Device descriptor + */ +struct device +{ + int fd; /* ioctl file descriptor */ +}; + + + +/* + * Unmap controller memory and close file descriptor. + */ +static void release_device(struct device* dev, volatile void* mm_ptr, size_t mm_size) +{ + dprintf("releasing device\n"); + munmap((void*) mm_ptr, mm_size); + close(dev->fd); + free(dev); +} + + + +/* + * Call kernel module ioctl and map memory for DMA. + */ +static int ioctl_map(const struct device* dev, const struct va_range* va, uint64_t* ioaddrs) +{ + const struct ioctl_mapping* m = _nvm_container_of(va, struct ioctl_mapping, range); + enum nvm_ioctl_type type; + + switch (m->type) + { + case MAP_TYPE_API: + case MAP_TYPE_HOST: + type = NVM_MAP_HOST_MEMORY; + break; + +#ifdef _CUDA + case MAP_TYPE_CUDA: + type = NVM_MAP_DEVICE_MEMORY; + break; +#endif + default: + dprintf("Unknown memory type in map for device"); + return EINVAL; + } + + + struct nvm_ioctl_map request = { + .vaddr_start = (uintptr_t) m->buffer, + .n_pages = va->n_pages, + .ioaddrs = ioaddrs + }; + + int err = ioctl(dev->fd, type, &request); + if (err < 0) + { + dprintf("Page mapping kernel request failed: %s\n", strerror(errno)); + return errno; + } + + return 0; +} + + + +/* + * Call kernel module ioctl and unmap memory. + */ +static void ioctl_unmap(const struct device* dev, const struct va_range* va) +{ + const struct ioctl_mapping* m = _nvm_container_of(va, struct ioctl_mapping, range); + uint64_t addr = (uintptr_t) m->buffer; + + int err = ioctl(dev->fd, NVM_UNMAP_MEMORY, &addr); + +#ifndef NDEBUG + if (err < 0) + { + dprintf("Page unmapping kernel request failed: %s\n", strerror(errno)); + } +#endif +} + + + +int nvm_ctrl_init(nvm_ctrl_t** ctrl, int filedes) +{ + int err; + struct device* dev; + const struct device_ops ops = { + .release_device = &release_device, + .map_range = &ioctl_map, + .unmap_range = &ioctl_unmap, + }; + + *ctrl = NULL; + dev = (struct device*) malloc(sizeof(struct device)); + if (dev == NULL) + { + dprintf("Failed to allocate device handle: %s\n", strerror(errno)); + return ENOMEM; + } + + dev->fd = dup(filedes); + if (dev->fd < 0) + { + free(dev); + dprintf("Could not duplicate file descriptor: %s\n", strerror(errno)); + return errno; + } + + err = fcntl(dev->fd, F_SETFD, O_RDWR | O_NONBLOCK); + if (err == -1) + { + close(dev->fd); + free(dev); + dprintf("Failed to set file descriptor control: %s\n", strerror(errno)); + return errno; + } + + const size_t mm_size = NVM_CTRL_MEM_MINSIZE; + void* mm_ptr = mmap(NULL, mm_size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE, dev->fd, 0); + if (mm_ptr == NULL) + { + close(dev->fd); + free(dev); + dprintf("Failed to map device memory: %s\n", strerror(errno)); + return errno; + } + + err = _nvm_ctrl_init(ctrl, dev, &ops, DEVICE_TYPE_IOCTL, mm_ptr, mm_size); + if (err != 0) + { + release_device(dev, mm_ptr, mm_size); + return err; + } + + return 0; +} + diff --git a/src/linux/dma.c b/src/linux/dma.c new file mode 100644 index 00000000..f3236b34 --- /dev/null +++ b/src/linux/dma.c @@ -0,0 +1,182 @@ +#ifndef __linux__ +#error "Must compile for Linux" +#endif + +#ifdef _CUDA +#ifndef __CUDA__ +#define __CUDA__ +#endif +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "util.h" +#include "ctrl.h" +#include "dma.h" +#include "linux/map.h" +#include "dprintf.h" + + + +static void remove_mapping_descriptor(struct ioctl_mapping* md) +{ + if (md->type == MAP_TYPE_API) + { + free((void*) md->buffer); + } + + free(md); +} + + + +static void release_mapping_descriptor(struct va_range* va) +{ + remove_mapping_descriptor(_nvm_container_of(va, struct ioctl_mapping, range)); +} + + + +static int create_mapping_descriptor(struct ioctl_mapping** handle, enum mapping_type type, void* buffer, size_t size) +{ + size_t page_size = 0; + + switch (type) + { + case MAP_TYPE_API: + case MAP_TYPE_HOST: + page_size = _nvm_host_page_size(); + break; + +#ifdef _CUDA + case MAP_TYPE_CUDA: + page_size = (1ULL << 16); + break; +#endif + + default: + dprintf("Unknown memory type\n"); + return EINVAL; + } + + size_t n_pages = NVM_PAGE_ALIGN(size, page_size) / page_size; + + struct ioctl_mapping* md = malloc(sizeof(struct ioctl_mapping)); + if (md == NULL) + { + dprintf("Failed to allocate mapping descriptor: %s\n", strerror(errno)); + return errno; + } + + md->type = type; + md->buffer = buffer; + md->range.vaddr = (volatile void*) buffer; + md->range.page_size = page_size; + md->range.n_pages = n_pages; + + *handle = md; + return 0; +} + + + +int nvm_dma_create(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, size_t size) +{ + void* buffer; + struct ioctl_mapping* md; + size = NVM_CTRL_ALIGN(ctrl, size); + + *handle = NULL; + if (_nvm_ctrl_type(ctrl) != DEVICE_TYPE_IOCTL) + { + return EBADF; + } + + int err = posix_memalign(&buffer, ctrl->page_size, size); + if (err != 0) + { + dprintf("Failed to allocate page-aligned memory buffer: %s\n", strerror(err)); + return err; + } + + err = create_mapping_descriptor(&md, MAP_TYPE_API, buffer, size); + if (err != 0) + { + free(buffer); + return err; + } + + err = _nvm_dma_init(handle, ctrl, &md->range, false, &release_mapping_descriptor); + if (err != 0) + { + remove_mapping_descriptor(md); + return err; + } + + return 0; +} + + + +int nvm_dma_map_host(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, void* vaddr, size_t size) +{ + struct ioctl_mapping* md; + *handle = NULL; + + if (_nvm_ctrl_type(ctrl) != DEVICE_TYPE_IOCTL) + { + return EBADF; + } + + int err = create_mapping_descriptor(&md, MAP_TYPE_HOST, vaddr, size); + if (err != 0) + { + return err; + } + + err = _nvm_dma_init(handle, ctrl, &md->range, false, &release_mapping_descriptor); + if (err != 0) + { + remove_mapping_descriptor(md); + return err; + } + + return 0; +} + + + +#ifdef _CUDA +int nvm_dma_map_device(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, void* devptr, size_t size) +{ + struct ioctl_mapping* md; + *handle = NULL; + + if (_nvm_ctrl_type(ctrl) != DEVICE_TYPE_IOCTL) + { + return EBADF; + } + + int err = create_mapping_descriptor(&md, MAP_TYPE_CUDA, devptr, size); + if (err != 0) + { + return err; + } + + err = _nvm_dma_init(handle, ctrl, &md->range, false, &release_mapping_descriptor); + if (err != 0) + { + remove_mapping_descriptor(md); + return err; + } + + return 0; +} +#endif diff --git a/src/ioctl.h b/src/linux/ioctl.h similarity index 76% rename from src/ioctl.h rename to src/linux/ioctl.h index d8644571..5c8b48b9 100644 --- a/src/ioctl.h +++ b/src/linux/ioctl.h @@ -1,5 +1,6 @@ -#ifndef __NVM_INTERNAL_IOCTL_H__ -#define __NVM_INTERNAL_IOCTL_H__ +#ifndef __NVM_INTERNAL_LINUX_IOCTL_H__ +#define __NVM_INTERNAL_LINUX_IOCTL_H__ +#ifdef __linux__ #include #include @@ -29,5 +30,5 @@ enum nvm_ioctl_type }; - -#endif /* __NVM_INTERNAL_IOCTL_H__ */ +#endif /* __linux__ */ +#endif /* __NVM_INTERNAL_LINUX_IOCTL_H__ */ diff --git a/src/linux/map.h b/src/linux/map.h new file mode 100644 index 00000000..3d1abb75 --- /dev/null +++ b/src/linux/map.h @@ -0,0 +1,33 @@ +#ifndef __NVM_INTERNAL_LINUX_MAP_H__ +#define __NVM_INTERNAL_LINUX_MAP_H__ +#ifdef __linux__ + +#include "linux/ioctl.h" +#include "dma.h" + + +/* + * What kind of memory are we mapping. + */ +enum mapping_type +{ + MAP_TYPE_CUDA = 0x1, // CUDA device memory + MAP_TYPE_HOST = 0x2, // Host memory (RAM) + MAP_TYPE_API = 0x4 // Allocated by the API (RAM) +}; + + + +/* + * Mapping container + */ +struct ioctl_mapping +{ + enum mapping_type type; // What kind of memory + void* buffer; + struct va_range range; // Memory range descriptor +}; + + +#endif /* __linux__ */ +#endif /* __NVM_INTERNAL_LINUX_MAP_H__ */ diff --git a/src/mutex.c b/src/mutex.c new file mode 100644 index 00000000..9da22baf --- /dev/null +++ b/src/mutex.c @@ -0,0 +1,54 @@ +#ifdef __unix__ +#include +#include +#endif + +#include "mutex.h" +#include "dprintf.h" + + + +#ifdef __unix__ +int _nvm_mutex_init(struct mutex* mtx) +{ + int err; + + err = pthread_mutex_init(&mtx->mutex, NULL); + if (err != 0) + { + dprintf("Failed to initialize mutex: %s\n", strerror(err)); + return err; + } + + return 0; +} +#endif + + + +#ifdef __unix__ +int _nvm_mutex_free(struct mutex* mtx) +{ + return pthread_mutex_destroy(&mtx->mutex); +} +#endif + + + +#ifdef __unix__ +int _nvm_mutex_lock(struct mutex* mtx) +{ + pthread_mutex_lock(&mtx->mutex); + return 0; +} +#endif + + + +#ifdef __unix__ +void _nvm_mutex_unlock(struct mutex* mtx) +{ + pthread_mutex_unlock(&mtx->mutex); +} +#endif + diff --git a/src/mutex.h b/src/mutex.h new file mode 100644 index 00000000..ef005caf --- /dev/null +++ b/src/mutex.h @@ -0,0 +1,61 @@ +#ifndef __NVM_INTERNAL_MUTEX_H__ +#define __NVM_INTERNAL_MUTEX_H__ + +/* Forward declaration */ +struct mutex; + + +/* + * We currently only support OSes that have pthreads + */ +#if defined( __unix__ ) +#include +#else +#error "OS is not supported" +#endif + + + +/* + * We don't want another level of indirection by + * hiding implementation and using pointers, so + * we expose the struct definition here. + */ +#if defined( __unix__ ) +struct mutex +{ + pthread_mutex_t mutex; +}; +#endif + + + +/* + * Initialize mutex handle. + */ +int _nvm_mutex_init(struct mutex* mtx); + + + +/* + * Destroy mutex handle. + */ +int _nvm_mutex_free(struct mutex* mtx); + + + +/* + * Enter critical section. + */ +int _nvm_mutex_lock(struct mutex* mtx); + + + +/* + * Leave critical section. + */ +void _nvm_mutex_unlock(struct mutex* mtx); + + + +#endif /* __NVM_INTERNAL_MUTEX_H__ */ diff --git a/src/queue.c b/src/queue.c index de4853d2..39331228 100644 --- a/src/queue.c +++ b/src/queue.c @@ -9,19 +9,30 @@ -void nvm_queue_clear(nvm_queue_t* queue, const nvm_ctrl_t* ctrl, bool cq, uint16_t no, void* vaddr, uint64_t ioaddr) +void nvm_queue_clear(nvm_queue_t* queue, const nvm_ctrl_t* ctrl, bool cq, uint16_t no, uint16_t qs, + bool local, volatile void* vaddr, uint64_t ioaddr) { queue->no = no; - queue->max_entries = 0; - queue->entry_size = cq ? sizeof(nvm_cpl_t) : sizeof(nvm_cmd_t); + queue->qs = _MIN(qs, ctrl->max_entries); + queue->es = cq ? sizeof(nvm_cpl_t) : sizeof(nvm_cmd_t); queue->head = 0; queue->tail = 0; - queue->phase = 1; queue->last = 0; + queue->phase = 1; + queue->local = !!local; + queue->db = cq ? CQ_DBL(ctrl->mm_ptr, queue->no, ctrl->dstrd) : SQ_DBL(ctrl->mm_ptr, queue->no, ctrl->dstrd); queue->vaddr = vaddr; queue->ioaddr = ioaddr; - queue->db = cq ? CQ_DBL(ctrl->mm_ptr, queue->no, ctrl->dstrd) : SQ_DBL(ctrl->mm_ptr, queue->no, ctrl->dstrd); - queue->max_entries = _MIN(ctrl->max_entries, ctrl->page_size / queue->entry_size); +} + + + +void nvm_queue_reset(nvm_queue_t* queue) +{ + queue->head = 0; + queue->tail = 0; + queue->last = 0; + queue->phase = 1; } diff --git a/src/rpc.c b/src/rpc.c index 3f07fed6..807bc87c 100644 --- a/src/rpc.c +++ b/src/rpc.c @@ -16,14 +16,14 @@ #include #include #include +#include #include #include #include #include -#include #include #include -#include "dis/device.h" +#include "mutex.h" #include "rpc.h" #include "ctrl.h" #include "util.h" @@ -36,6 +36,7 @@ */ struct local_admin { + nvm_dma_t* qmem; // Queue memory nvm_queue_t acq; // Admin completion queue (ACQ) nvm_queue_t asq; // Admin submission queue (ASQ) uint64_t timeout; // Controller timeout @@ -65,8 +66,8 @@ struct rpc_handle */ struct nvm_admin_reference { - const nvm_ctrl_t* ctrl; // Controller reference - pthread_mutex_t lock; // Ensure exclusive access to the reference + struct controller* ctrl; // Controller reference + struct mutex lock; // Ensure exclusive access to the reference int n_handles; // Number of handles struct rpc_handle* handles; // Linked list of binding handles (if server) void* data; // Custom instance data @@ -95,7 +96,7 @@ int _nvm_rpc_handle_insert(nvm_aq_ref ref, uint32_t key, void* data, rpc_deleter handle->data = data; handle->release = release; - int err = pthread_mutex_lock(&ref->lock); + int err = _nvm_mutex_lock(&ref->lock); if (err != 0) { free(handle); @@ -110,7 +111,7 @@ int _nvm_rpc_handle_insert(nvm_aq_ref ref, uint32_t key, void* data, rpc_deleter { if (curr->key == handle->key) { - pthread_mutex_unlock(&ref->lock); + _nvm_mutex_unlock(&ref->lock); free(handle); dprintf("Handle already inserted\n"); return EINVAL; @@ -131,7 +132,7 @@ int _nvm_rpc_handle_insert(nvm_aq_ref ref, uint32_t key, void* data, rpc_deleter ++ref->n_handles; - pthread_mutex_unlock(&ref->lock); + _nvm_mutex_unlock(&ref->lock); return 0; } @@ -139,7 +140,7 @@ int _nvm_rpc_handle_insert(nvm_aq_ref ref, uint32_t key, void* data, rpc_deleter void _nvm_rpc_handle_remove(nvm_aq_ref ref, uint32_t key) { - pthread_mutex_lock(&ref->lock); + _nvm_mutex_lock(&ref->lock); struct rpc_handle* prev = NULL; struct rpc_handle* curr = ref->handles; @@ -165,7 +166,7 @@ void _nvm_rpc_handle_remove(nvm_aq_ref ref, uint32_t key) free(curr); } - pthread_mutex_unlock(&ref->lock); + _nvm_mutex_unlock(&ref->lock); } @@ -203,23 +204,32 @@ static void release_handles(nvm_aq_ref ref) int _nvm_ref_get(nvm_aq_ref* handle, const nvm_ctrl_t* ctrl) { *handle = NULL; + struct controller* cref; + + cref = _nvm_ctrl_get(ctrl); + if (cref == NULL) + { + return ENOTTY; + } nvm_aq_ref ref = (nvm_aq_ref) malloc(sizeof(struct nvm_admin_reference)); if (ref == NULL) { + _nvm_ctrl_put(cref); dprintf("Failed to allocate reference: %s\n", strerror(errno)); return ENOMEM; } - int err = pthread_mutex_init(&ref->lock, NULL); + int err = _nvm_mutex_init(&ref->lock); if (err != 0) { + _nvm_ctrl_put(cref); free(ref); dprintf("Failed to initialize reference lock: %s\n", strerror(err)); return err; } - ref->ctrl = ctrl; + ref->ctrl = cref; ref->handles = NULL; ref->data = NULL; ref->release = NULL; @@ -238,7 +248,7 @@ void _nvm_ref_put(nvm_aq_ref ref) { if (ref != NULL) { - pthread_mutex_lock(&ref->lock); + _nvm_mutex_lock(&ref->lock); release_handles(ref); @@ -246,10 +256,11 @@ void _nvm_ref_put(nvm_aq_ref ref) { ref->release(ref->data, 0, 0); } + + _nvm_ctrl_put(ref->ctrl); + _nvm_mutex_unlock(&ref->lock); - pthread_mutex_unlock(&ref->lock); - - pthread_mutex_destroy(&ref->lock); + _nvm_mutex_free(&ref->lock); free(ref); } } @@ -303,36 +314,68 @@ static int execute_command(struct local_admin* admin, const nvm_cmd_t* cmd, nvm_ /* * Helper function to create a local admin descriptor. */ -static struct local_admin* create_admin(const nvm_ctrl_t* ctrl, const nvm_dma_t* window) +static int create_admin(struct local_admin** handle, const struct controller* ctrl, const nvm_dma_t* window) { - struct local_admin* admin = (struct local_admin*) malloc(sizeof(struct local_admin)); - + int status; + nvm_dma_t* copy; + struct local_admin* admin; + + *handle = NULL; + + if (ctrl->handle.page_size != window->page_size) + { + dprintf("Controller page size differs from DMA window page size\n"); + return EINVAL; + } + else if (window->n_ioaddrs < 2) + { + dprintf("DMA window is not large enough\n"); + return ERANGE; + } + else if (window->vaddr == NULL) + { + dprintf("DMA window is not mapped into virtual address space\n"); + return EINVAL; + } + + status = nvm_dma_remap(©, window); + if (status != 0) + { + return status; + } + + admin = (struct local_admin*) malloc(sizeof(struct local_admin)); if (admin == NULL) { + nvm_dma_unmap(copy); dprintf("Failed to create admin queue-pair descriptors: %s\n", strerror(errno)); - return NULL; + return errno; } - nvm_queue_clear(&admin->acq, ctrl, true, 0, window->vaddr, window->ioaddrs[0]); - - void* asq_vaddr = (void*) (((unsigned char*) window->vaddr) + window->page_size); - nvm_queue_clear(&admin->asq, ctrl, false, 0, asq_vaddr, window->ioaddrs[1]); + admin->qmem = copy; + memset((void*) admin->qmem->vaddr, 0, 2 * window->page_size); - memset(window->vaddr, 0, 2 * window->page_size); + nvm_queue_clear(&admin->acq, &ctrl->handle, true, 0, admin->qmem->page_size / sizeof(nvm_cmd_t), + admin->qmem->local, admin->qmem->vaddr, admin->qmem->ioaddrs[0]); + nvm_queue_clear(&admin->asq, &ctrl->handle, false, 0, admin->qmem->page_size / sizeof(nvm_cpl_t), + admin->qmem->local, NVM_DMA_OFFSET(admin->qmem, 1), admin->qmem->ioaddrs[1]); - admin->timeout = ctrl->timeout; + admin->timeout = ctrl->handle.timeout; - return admin; + *handle = admin; + return 0; } /* * Helper function to remove an admin descriptor. + * TODO: This logic needs cleaning up */ -static void remove_admin(const struct nvm_admin_reference* ref, struct local_admin* admin) +static void remove_admin(struct local_admin* admin, uint32_t key, int remaining_handles) { - if (ref != NULL) + if (admin != NULL) { + nvm_dma_unmap(admin->qmem); free(admin); } } @@ -346,7 +389,7 @@ int nvm_raw_rpc(nvm_aq_ref ref, nvm_cmd_t* cmd, nvm_cpl_t* cpl) { int err; - err = pthread_mutex_lock(&ref->lock); + err = _nvm_mutex_lock(&ref->lock); if (err != 0) { dprintf("Failed to take reference lock\n"); @@ -355,14 +398,14 @@ int nvm_raw_rpc(nvm_aq_ref ref, nvm_cmd_t* cmd, nvm_cpl_t* cpl) if (ref->stub == NULL) { - pthread_mutex_unlock(&ref->lock); + _nvm_mutex_unlock(&ref->lock); dprintf("Reference is not bound!\n"); return NVM_ERR_PACK(NULL, EINVAL); } err = ref->stub(ref->data, cmd, cpl); - pthread_mutex_unlock(&ref->lock); + _nvm_mutex_unlock(&ref->lock); return NVM_ERR_PACK(cpl, err); } @@ -376,7 +419,7 @@ int _nvm_rpc_bind(nvm_aq_ref ref, void* data, rpc_deleter_t release, rpc_stub_t { int err; - err = pthread_mutex_lock(&ref->lock); + err = _nvm_mutex_lock(&ref->lock); if (err != 0) { dprintf("Failed to take reference lock\n"); @@ -385,7 +428,7 @@ int _nvm_rpc_bind(nvm_aq_ref ref, void* data, rpc_deleter_t release, rpc_stub_t if (ref->data != NULL || ref->stub != NULL) { - pthread_mutex_unlock(&ref->lock); + _nvm_mutex_unlock(&ref->lock); dprintf("Reference is already bound!\n"); return EINVAL; } @@ -394,7 +437,7 @@ int _nvm_rpc_bind(nvm_aq_ref ref, void* data, rpc_deleter_t release, rpc_stub_t ref->release = release; ref->stub = stub; - pthread_mutex_unlock(&ref->lock); + _nvm_mutex_unlock(&ref->lock); return 0; } @@ -410,22 +453,6 @@ int nvm_aq_create(nvm_aq_ref* handle, const nvm_ctrl_t* ctrl, const nvm_dma_t* w *handle = NULL; - if (ctrl->page_size != window->page_size) - { - dprintf("Controller page size differs from DMA window page size\n"); - return EINVAL; - } - else if (window->n_ioaddrs < 2) - { - dprintf("DMA window is not large enough\n"); - return EINVAL; - } - else if (window->vaddr == NULL) - { - dprintf("DMA window is not mapped into virtual address space\n"); - return EINVAL; - } - // Allocate reference err = _nvm_ref_get(&ref, ctrl); if (err != 0) @@ -434,19 +461,19 @@ int nvm_aq_create(nvm_aq_ref* handle, const nvm_ctrl_t* ctrl, const nvm_dma_t* w } // Allocate admin descriptor - ref->data = create_admin(ref->ctrl, window); - if (ref->data == NULL) + err = create_admin((struct local_admin**) &ref->data, ref->ctrl, window); + if (err != 0) { _nvm_ref_put(ref); - return ENOMEM; + return err; } ref->stub = (rpc_stub_t) execute_command; - ref->release = (rpc_deleter_t) remove_admin; + ref->release = (rpc_deleter_t) remove_admin; // TODO: This logic needs cleaning up // Reset controller const struct local_admin* admin = (const struct local_admin*) ref->data; - nvm_raw_ctrl_reset(ctrl, admin->acq.ioaddr, admin->asq.ioaddr); + nvm_raw_ctrl_reset(ctrl, admin->qmem->ioaddrs[0], admin->qmem->ioaddrs[1]); *handle = ref; return 0; @@ -468,7 +495,7 @@ const nvm_ctrl_t* nvm_ctrl_from_aq_ref(nvm_aq_ref ref) { if (ref != NULL) { - return ref->ctrl; + return &ref->ctrl->handle; } return NULL; @@ -478,7 +505,7 @@ const nvm_ctrl_t* nvm_ctrl_from_aq_ref(nvm_aq_ref ref) int _nvm_local_admin(nvm_aq_ref ref, const nvm_cmd_t* cmd, nvm_cpl_t* cpl) { - int err = pthread_mutex_lock(&ref->lock); + int err = _nvm_mutex_lock(&ref->lock); if (err != 0) { dprintf("Failed to take reference lock: %s\n", strerror(err)); @@ -487,14 +514,14 @@ int _nvm_local_admin(nvm_aq_ref ref, const nvm_cmd_t* cmd, nvm_cpl_t* cpl) if (ref->stub != (rpc_stub_t) execute_command) { - pthread_mutex_unlock(&ref->lock); + _nvm_mutex_unlock(&ref->lock); dprintf("Reference is not local descriptor\n"); return NVM_ERR_PACK(NULL, EINVAL); } err = execute_command((struct local_admin*) ref->data, cmd, cpl); - pthread_mutex_unlock(&ref->lock); + _nvm_mutex_unlock(&ref->lock); return NVM_ERR_PACK(NULL, err); } diff --git a/src/util.h b/src/util.h index 45854b85..dd80dd33 100644 --- a/src/util.h +++ b/src/util.h @@ -4,9 +4,11 @@ #include #include #include + +#if defined( __unix__ ) #include #include -//#include +#endif #ifndef NDEBUG #include @@ -15,6 +17,17 @@ #endif +/* Get the containing struct */ +#if defined( __clang__ ) || defined( __GNUC__ ) +#define _nvm_container_of(ptr, type, member) ({ \ + const typeof( ((type *) 0)->member )* __mptr = (ptr); \ + (type *) (((unsigned char*) __mptr) - offsetof(type, member)); }) +#else +#define _nvm_container_of(ptr, type, member) \ + ((type *) (((unsigned char*) (ptr)) - ((unsigned char*) (&((type *) 0)->member)))) +#endif + + /* Get minimum of two values */ #define _MIN(a, b) ( (a) <= (b) ? (a) : (b) ) @@ -39,6 +52,7 @@ static inline uint32_t _nvm_b2log(uint32_t n) } +#if defined( __unix__ ) /* Delay the minimum of one millisecond and a time remainder */ static inline uint64_t _nvm_delay_remain(uint64_t remaining_nanoseconds) { @@ -52,14 +66,15 @@ static inline uint64_t _nvm_delay_remain(uint64_t remaining_nanoseconds) ts.tv_sec = 0; ts.tv_nsec = _MIN(1000000UL, remaining_nanoseconds); - //pthread_yield(); clock_nanosleep(CLOCK_REALTIME, 0, &ts, NULL); remaining_nanoseconds -= _MIN(1000000UL, remaining_nanoseconds); return remaining_nanoseconds; } +#endif +#if defined( __unix__ ) /* Get the system page size */ static inline size_t _nvm_host_page_size() { @@ -75,6 +90,9 @@ static inline size_t _nvm_host_page_size() return page_size; } +#else +#define _nvm_host_page_size() 0x1000 +#endif #endif /* __NVM_INTERNAL_UTIL_H__ */