diff --git a/.gitmodules b/.gitmodules index 6d9dd010..7133941d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,15 +1,3 @@ -[submodule "SDAccel/examples/xilinx_2017.4"] - path = SDAccel/examples/xilinx_2017.4 - url = https://github.com/Xilinx/SDAccel_Examples.git - branch = aws_2017.4 -[submodule "SDAccel/examples/xilinx_2018.2"] - path = SDAccel/examples/xilinx_2018.2 - url = https://github.com/Xilinx/SDAccel_Examples.git - branch = 2018.2_xdf -[submodule "SDAccel/examples/xilinx_2018.3"] - path = SDAccel/examples/xilinx_2018.3 - url = https://github.com/Xilinx/SDAccel_Examples.git - branch = master [submodule "SDAccel/examples/xilinx_2019.1"] path = SDAccel/examples/xilinx_2019.1 url = https://github.com/Xilinx/SDAccel_Examples.git @@ -17,3 +5,6 @@ path = Vitis/examples/xilinx_2019.2 branch = master url = https://github.com/Xilinx/Vitis_Accel_Examples +[submodule "Vitis/examples/xilinx_2020.1"] + path = Vitis/examples/xilinx_2020.1 + url = https://github.com/Xilinx/Vitis_Accel_Examples diff --git a/ERRATA.md b/ERRATA.md index 7f99a344..5fbf6d82 100644 --- a/ERRATA.md +++ b/ERRATA.md @@ -20,6 +20,8 @@ If the check fails, the design is susceptible to the issue and will need to be r For designs under development, we recommend applying the patch to your on-premises tools or update to developer kit v1.4.15. For additional details, please refer to the [Xilinx Answer Record #73068](https://www.xilinx.com/support/answers/73068.html) +We recommend using [Developer Kit Release v1.4.15a](https://github.com/aws/aws-fpga/releases/tag/v1.4.15a) or newer to allow for patching and fixing the DDR4 IP timing exception by re-generating the IP. + ### 2019.1 * Vivado `compile_simlib` command fails to generate the following verilog IP libraries for the following simulators. * Please refer to the Xilinx Answer record for details. diff --git a/FAQs.md b/FAQs.md index a4e2a3cd..18707026 100644 --- a/FAQs.md +++ b/FAQs.md @@ -29,7 +29,7 @@ AWS designed its FPGA instances to provide a developer experience with ease of u - AWS provides cloud based debug tools: [Virtual JTAG](./hdk/docs/Virtual_JTAG_XVC.md) which is equivalent to debug using JTAG with on-premises development, and Virtual LED together with Virtual DIP Switch emulation the LED and DIP switches in typical development board. -- For developers who want to develop on-premises, Xilinx provides an [on-premises license](./hdk/docs/on_premise_licensing_help.md ) that matches all the needed components needed to be licensed for F1 development on premises. +- For developers who want to develop on-premises, Xilinx provides an [on-premises license](docs/on_premise_licensing_help.md ) that matches all the needed components needed to be licensed for F1 development on premises. - The developers' output is a Design Checkpoint (DCP) and not an FPGA bitstream: The FPGA bitstream is actually generated by AWS after the developer submits the DCP. @@ -185,7 +185,7 @@ AWS prefers not to limit developers to a specific template in terms of how we ad If you decide to use the [FPGA Developer AMI on AWS Marketplace](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ), Xilinx licenses for simulation, encryption, SDAccel and Design Checkpoint generation are included at no additional cost. -If you want to run using other methods or on a local machine, you will need to obtain any necessary licenses, specifically you will need to have setup the appropriate Xilinx Vivado license. For more details, please refer to [On-premises licensing help](./hdk/docs/on_premise_licensing_help.md) +If you want to run using other methods or on a local machine, you will need to obtain any necessary licenses, specifically you will need to have setup the appropriate Xilinx Vivado license. For more details, please refer to [On-premises licensing help](docs/on_premise_licensing_help.md) **Q: Does AWS provide physical FPGA boards for on-premises development?** @@ -492,7 +492,7 @@ Parent process (pid 8160) has died. This helper process will now exit *For On Premise runs:* -You would need a valid [on premise license](./hdk/docs/on_premise_licensing_help.md) provided by Xilinx. +You would need a valid [on premise license](docs/on_premise_licensing_help.md) provided by Xilinx. *For runs using the FPGA Developer AMI:* Please contact us through [AWS FPGA Developers forum](https://forums.aws.amazon.com/forum.jspa?forumID=243) @@ -504,4 +504,25 @@ Please modify RDP options to choose any color depth less than 32 bit and try re- **Q: Why did my AFI creation fail with `***ERROR***: DCP has DNA_PORT instantiation, ingestion failed, exiting`?** -AWS does not support creating AFI's with the Device DNA instantiated within your design. Please create your design without instantiating the DNA_PORT primitive to be able to create your AFI. \ No newline at end of file +AWS does not support creating AFI's with the Device DNA instantiated within your design. Please create your design without instantiating the DNA_PORT primitive to be able to create your AFI. + +**Q: How do I know which HDK version I have on my instance/machine? ** + +Look for the ./hdk/hdk_version.txt file. + +**Q: How do I know what my Shell version is? ** + +The Shell version of an FPGA slot is available through the FPGA Image Management tools after an AFI has been loaded. +See the description of `fpga-describe-local-image` for more details on retrieving the shell version from a slot. +Prior to loading an AFI, the state of the FPGA (including shell version) is undefined and non-deterministic. + +**Q: How do I know what version of FPGA Image management tools are running on my instance? ** + +The FPGA Image management tools version is reported with any command executed from these tools. +See the description of `fpga-describe-local-image` for more details. + +**Q: How do I update my existing design with a new release?** + +1. Start by pulling changes from a new [aws-fpga github release](https://github.com/aws/aws-fpga) +1. If the [AWS Shell Interface Specification](./hdk/docs/AWS_Shell_Interface_Specification.md) has changed, update your CL design to conform to the new specification. +3. Follow the process for AFI generation diff --git a/Jenkinsfile b/Jenkinsfile index e6006785..b1a1fd95 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -126,45 +126,23 @@ task_label = [ ] // Put the latest version last -def xilinx_versions = [ '2019.1', '2019.2' ] +def xilinx_versions = [ '2019.1', '2019.2', '2020.1' ] -def vitis_versions = ['2019.2'] +def vitis_versions = ['2019.2', '2020.1'] // We want the default to be the latest. def default_xilinx_version = xilinx_versions.last() def dsa_map = [ - '2017.4' : [ 'DYNAMIC_5_0' : 'dyn'], - '2018.2' : [ 'DYNAMIC_5_0' : 'dyn'], - '2018.3' : [ 'DYNAMIC_5_0' : 'dyn'], '2019.1' : [ 'DYNAMIC_5_0' : 'dyn'], ] def xsa_map = [ - '2019.2' : [ 'DYNAMIC':'dyn'] + '2019.2' : [ 'DYNAMIC':'dyn'], + '2020.1' : [ 'DYNAMIC':'dyn'] ] def sdaccel_example_default_map = [ - '2017.4' : [ - 'Hello_World_1ddr': 'SDAccel/examples/xilinx/getting_started/host/helloworld_ocl', - 'Gmem_2Banks_2ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/gmem_2banks_ocl', - 'kernel_3ddr_bandwidth_4ddr': 'SDAccel/examples/aws/kernel_3ddr_bandwidth', - 'Kernel_Global_Bw_4ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/kernel_global_bandwidth', - 'RTL_Vadd_Debug': 'SDAccel/examples/xilinx/getting_started/rtl_kernel/rtl_vadd_hw_debug' - ], - '2018.2' : [ - 'Hello_World_1ddr': 'SDAccel/examples/xilinx/getting_started/host/helloworld_ocl', - 'Gmem_2Banks_2ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/gmem_2banks_ocl', - 'kernel_3ddr_bandwidth_4ddr': 'SDAccel/examples/aws/kernel_3ddr_bandwidth', - 'Kernel_Global_Bw_4ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/kernel_global_bandwidth', - 'RTL_Vadd_Debug': 'SDAccel/examples/xilinx/getting_started/rtl_kernel/rtl_vadd_hw_debug' - ], - '2018.3' : [ - 'Hello_World_1ddr': 'SDAccel/examples/xilinx/getting_started/host/helloworld_ocl', - 'Gmem_2Banks_2ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/gmem_2banks_ocl', - 'Kernel_Global_Bw_4ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/kernel_global_bandwidth', - 'RTL_Vadd_Debug': 'SDAccel/examples/xilinx/getting_started/rtl_kernel/rtl_vadd_hw_debug' - ], '2019.1' : [ 'Hello_World_1ddr': 'SDAccel/examples/xilinx/getting_started/hello_world/helloworld_ocl', 'Gmem_2Banks_2ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/gmem_2banks_ocl_5.0_shell', @@ -179,28 +157,18 @@ def vitis_example_default_map = [ 'Gmem_2Banks_2ddr': 'Vitis/examples/xilinx/ocl_kernels/cl_gmem_2banks', 'Kernel_Global_Bw_4ddr': 'Vitis/examples/xilinx/cpp_kernels/kernel_global_bandwidth', 'RTL_Vadd_Debug': 'Vitis/examples/xilinx/rtl_kernels/rtl_vadd_hw_debug' + ], + '2020.1' : [ + 'Hello_World_1ddr': 'Vitis/examples/xilinx/ocl_kernels/cl_helloworld', + 'Gmem_2Banks_2ddr': 'Vitis/examples/xilinx/ocl_kernels/cl_gmem_2banks', + 'Kernel_Global_Bw_4ddr': 'Vitis/examples/xilinx/cpp_kernels/kernel_global_bandwidth', + 'RTL_Vadd_Debug': 'Vitis/examples/xilinx/rtl_kernels/rtl_vadd_hw_debug', + 'gemm_blas': 'Vitis/examples/xilinx/library_examples/gemm', + 'gzip_app': 'Vitis/examples/xilinx/library_examples/gzip_app' ] ] def simulator_tool_default_map = [ - '2017.4' : [ - 'vivado': 'xilinx/SDx/2017.4_04112018', - 'vcs': 'synopsys/vcs-mx/M-2017.03-SP2-11', - 'questa': 'questa/10.6b', - 'ies': 'incisive/15.20.063' - ], - '2018.2' : [ - 'vivado': 'xilinx/SDx/2018.2_06142018', - 'vcs': 'synopsys/vcs-mx/N-2017.12-SP2', - 'questa': 'questa/10.6c_1', - 'ies': 'incisive/15.20.063' - ], - '2018.3' : [ - 'vivado': 'xilinx/SDx/2018.3_1207', - 'vcs': 'synopsys/vcs-mx/N-2017.12-SP2', - 'questa': 'questa/10.6c_1', - 'ies': 'incisive/15.20.063' - ], '2019.1' : [ 'vivado': 'xilinx/SDx/2019.1.op2552052', 'vcs': 'synopsys/vcs-mx/N-2017.12-SP2', @@ -212,6 +180,12 @@ def simulator_tool_default_map = [ 'vcs': 'synopsys/vcs-mx/O-2018.09-SP2-1', 'questa': 'questa/2019.2', 'ies': 'incisive/15.20.063' + ], + '2020.1' : [ + 'vivado': 'xilinx/Vivado/2020.1', + 'vcs': 'synopsys/vcs-mx/P-2019.06-SP1-1', + 'questa': 'questa/2019.4', + 'ies': 'incisive/15.20.079' ] ] diff --git a/Jenkinsfile_int_sims b/Jenkinsfile_int_sims index 927e0e32..7c8349c2 100644 --- a/Jenkinsfile_int_sims +++ b/Jenkinsfile_int_sims @@ -47,6 +47,12 @@ def simulator_tool_default_map = [ 'vcs': 'synopsys/vcs-mx/O-2018.09-SP2-1', 'questa': 'questa/2019.2', 'ies': 'incisive/15.20.063' + ], + '2020.1' : [ + 'vivado': 'xilinx/Vivado/2020.1', + 'vcs': 'synopsys/vcs-mx/P-2019.06-SP1-1', + 'questa': 'questa/2019.4', + 'ies': 'incisive/15.20.079' ] ] diff --git a/README.md b/README.md index 10d63b23..8dbd1764 100644 --- a/README.md +++ b/README.md @@ -1,195 +1,197 @@ - - # Table of Contents -1. [Overview of AWS EC2 FPGA Development Kit](#overviewdevkit) - - [Development environments](#overviewdevenv) - - [Runtime environments](#overviewrunenv) - - [Development tools](#overviewdevtools) - - [Example applications](#overviewexapps) -2. [Getting Started](#gettingstarted) -3. [FPGA Developer AMI available on AWS Marketplace](#devAmi) -4. [FPGA Hardware Development Kit (HDK)](#fpgahdk) -5. [FPGA Software Development Kit (SDK)](#fpgasdk) -6. [OpenCL Development Environment with Amazon EC2 F1 FPGA Instances to accelerate your C/C++ applications](#sdaccel) -7. [Developer Support](#devSupport) -8. [Recommended Documentation](#doccontents) - - +1. [Overview of AWS EC2 FPGA Development Kit](#overview-of-aws-ec2-fpga-development-kit) + - [Development Flow](#development-flow) + - [Development environments](#development-environments) + - [FPGA Developer AMI](#fpga-developer-ami) + - [FPGA Hardware Development Kit (HDK)](#hardware-development-kit-hdk) + - [FPGA Software Development Kit (SDK)](#runtime-tools-sdk) + - [Software Defined Development Environment](#software-defined-development-environment) +1. [Amazon EC2 F1 platform features](#amazon-ec2-f1-platform-features) +1. [Getting Started](#getting-started) + - [Getting Familiar with AWS](#getting-familiar-with-aws) + - [First time setup](#setting-up-development-environment-for-the-first-time) + - [Quickstarts](#quickstarts) + - [How To's](#how-tos) +1. [Documentation Overview](#documentation-overview) +1. [Developer Support](#developer-support) + # Overview of AWS EC2 FPGA Development Kit -AWS EC2 FPGA Development Kit is a set of free development and runtime tools that provide everything needed to develop, simulate, debug, compile and run hardware accelerated applications on [Amazon EC2 F1 instances](https://aws.amazon.com/ec2/instance-types/f1/), EC2 F1 instances are high-performance compute instances with field programmable gate arrays (FPGAs) that enable the development and deployment of custom hardware accelerators on AWS cloud. +AWS EC2 FPGA Development Kit is a set of development and runtime tools to develop, simulate, debug, compile and run hardware accelerated applications on [Amazon EC2 F1 instances](https://aws.amazon.com/ec2/instance-types/f1/). +It is distributed between this github repository and [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) provided by AWS with no cost of development tools. + +⚠️ NOTE: The developer kit is supported for Linux operating systems only. + +## Development Flow +After creating an FPGA design (also called CL - Custom logic), developers can create an Amazon FPGA Image (AFI) and easily deploy it to an F1 instance. AFIs are reusable, shareable and can be deployed in a scalable and secure way. -AWS EC2 FPGA Development Kit content is distributed between this github repository and [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) provided by AWS, developers are able to develop, simulate and debug an FPGA design on compute [EC2 instance](https://aws.amazon.com/ec2/) with no cost of development or runtime tools. Once the FPGA design (also called CL - Custom logic) is complete, developers create the Amazon FPGA Image (AFI), and easily deploy it to the F1 instance. AFIs are reusable, shareable and can be deployed in a scalable and secure way. ![Alt text](hdk/docs/images/f1-Instance-How-it-Works-flowchart.jpg) - -## Overview of Development Environments - -| Development Environment | Description | Accelerator Language | Development Tool | Debug Options| Typical Developer / FPGA Experience | -| --------|---------|---------|-------|-------|-------| -| Software Defined Accelerator Development - [Vitis](Vitis/README.md)/[SDAccel](SDAccel/README.md) | Development experience leverages an optimized compiler to allow easy new accelerator development or migration of existing C/C++/openCL, Verilog/VHDL to AWS FPGA instances | C/C++/OpenCL, Verilog/VHDL (RTL) | Vitis/SDx/Vivado (GUI or script) | SW/HW Emulation, Simulation, GDB, Virtual JTAG (Chipscope) | SW or HW Developer with zero FPGA experience | -| [Hardware Accelerator Development - HDK](hdk/README.md) | Fully custom hardware development experience provides hardware developers with the tools required for developing AFIs for AWS FPGA instances | Verilog/VHDL | Vivado | Simulation, Virtual JTAG | HW Developer with advanced FPGA experience | -| [IP Integrator or High Level Synthesis (HLx)](hdk/docs/IPI_GUI_Vivado_Setup.md) | Graphical interface development experience for integrating IP and high level synthesis development | Verilog/VHDL/C | Vivado (GUI) | Simulation, Virtual JTAG | HW Developer with intermediate FPGA experience | - - -## Overview of Runtime Environments - -| Runtime Environment | Hardware Interface | Host Code Language | FPGA Tools | -| --------|---------|---------|-------| -| C/C++ Software Defined Accelerator Development - [Vitis](Vitis/README.md) / [SDAccel](SDAccel/README.md) | OpenCL APIs and XRT | C/C++ | [SDK](./sdk), Vitis / SDAccel| -| [Hardware Accelerator Development](hdk/README.md) | [XDMA Driver](sdk/linux_kernel_drivers/xdma/README.md), [peek/poke](sdk/userspace/README.md) | C/C++ | [SDK](./sdk), Vivado | -| [IP Integrator or High Level Synthesis (HLx)](hdk/docs/IPI_GUI_Vivado_Setup.md) | [XDMA Driver](sdk/linux_kernel_drivers/xdma/README.md), [peek/poke](sdk/userspace/README.md) | C/C++ | [SDK](./sdk), Vivado | - - -## Overview of Development Tools - -| Tool | Development/Runtime | Tool location | Description | -| --------|---------|---------|---------| -| Vitis 2019.2 | Development | [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) | Used for [Software Defined Accelerator Development using the new Vitis toolset](Vitis/README.md) | -| Vivado 2017.4, 2018.2, 2018.3, 2019.1 & 2019.2 | Development | [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) | Used for [Hardware Accelerator Development](hdk/README.md) | -| SDx 2017.4, 2018.2, 2018.3 & 2019.1| Development | [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) | Used for [Software Defined Accelerator Development](SDAccel/README.md) | -| FPGA AFI Management Tools | Runtime | [SDK - fpga\_mgmt\_tools](sdk/userspace/fpga_mgmt_tools) | Command-line tools used for FPGA management while running on the F1 instance | -| Virtual JTAG | Development (Debug) | [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) | Runtime debug waveform | -| wait\_for\_afi | Development | [wait\_for\_afi.py](shared/bin/scripts/wait_for_afi.py) | Helper script that notifies via email on AFI generation completion | -| notify\_via\_sns | Development | [notify\_via\_sns.py](shared/bin/scripts/notify_via_sns.py) | Notifies developer when design build process completes | -| AFI Administration | Development | [Copy](hdk/docs/copy_fpga_image.md), [Delete](hdk/docs/delete_fpga_image.md), [Describe](hdk/docs/describe_fpga_images.md), [Attributes](hdk/docs/fpga_image_attributes.md) | AWS CLI EC2 commands for managing your AFIs | - - -> For on-premises development, SDx/Vitis/Vivado must have the correct license and use one of the [supported versions of SDx/Vitis/Vivado](./supported_vivado_versions.txt). -> The following links have more information on on-premises development: [Vivado requirements](hdk/docs/on_premise_licensing_help.md), [Vitis requirements](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug1393-vitis-application-acceleration.pdf) and [SDx requirements](SDAccel/docs/On_Premises_Development_Steps.md) - - -## Overview of Example Applications -| Accelerator Application | Example | Development Environment | Description | -| --------|---------|---------|-------| -| Custom hardware | [cl\_hello\_world](hdk/cl/examples/cl_hello_world) | HDK - RTL (Verilog) | Simple [getting started example](hdk/README.md) with minimal hardware | -| Custom hardware | [cl\_dram\_dma](hdk/cl/examples/cl_dram_dma) | HDK - RTL (Verilog) | Demonstrates CL connectivity to the F1 shell and connectivity to/from all DDRs | -| Custom hardware IP integration example using a GUI | [cl\_dram\_dma\_hlx](hdk/cl/examples/cl_dram_dma_hlx) | HLx - Verilog | Demonstrates CL connectivity to the F1 shell and connectivity to/from DRAM using the Vivado IP Integrator GUI | -| Virtual Ethernet Application | [Example Application](sdk/apps/virtual-ethernet) | [HDK SDE Example](hdk/cl/examples/cl_sde) | The Virtual Ethernet framework facilitates streaming Ethernet frames from a network interface (or any source) into the FPGA for processing and back out to some destination. Possible use cases for this include deep packet inspection, software defined networking, stream encryption or compression, and more. | -| Pipelined Workload Applications | [cl\_dram\_dma\_data\_retention](hdk/docs/data_retention.md)| [HDK](hdk/cl/examples/cl_dram_dma/software/runtime/test_dram_dma_retention.c) [SDAccel](SDAccel/examples/aws/data_retention) | Demonstrates how to preserve data in DRAMs while swapping out accelerators. Applications that use a temporal accelerator pipeline can take advantage of this feature to reduce latency between FPGA image swaps | -| Digital Up-Converter using High Level Synthesis | [cl\_hls\_dds\_hlx](hdk/cl/examples/cl_hls_dds_hlx) | HLx - C-to-RTL | Demonstrates an example application written in C that is synthesized to RTL (Verilog) | -| Security | [AES, RSA, SHA1](https://github.com/Xilinx/SDAccel_Examples/tree/2018.2/security) | SDAccel - C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates methods of using hardware acceleration to speed up security software algorithms | -| Computer Vision | [Affine, Convolve, Huffman, IDCT](https://github.com/Xilinx/SDAccel_Examples/tree/master/vision) | SDAccel - C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates methods of using hardware acceleration to speed up image detection algorithms | -| Misc Algorithms | [Kmeans, SmithWaterman, MatrixMult](https://github.com/Xilinx/SDAccel_Examples/tree/master/acceleration) | SDAccel - C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates methods of applying hardware acceleration to a variety of sorting and search algorithms | -| Financial | [Blacksholes, Heston](https://github.com/KitAway/FinancialModels_AmazonF1) | SDAccel - C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates methods of using hardware acceleration on Monte Carlo financial models | -| Custom Hardware with Software Defined Acceleration | [RTL Kernels](https://github.com/Xilinx/SDAccel_Examples/tree/master/getting_started/rtl_kernel) | SDAccel - RTL (Verilog) + C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates a quick method for developing new or migrating existing hardware designs (RTL) | -| File Compression | [GZip](https://github.com/Xilinx/Applications/tree/master/GZip) | SDAccel - C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates methods of using hardware acceleration to speed up GZIP compression on an FPGA | -| WebP Image Compression | [WebP](https://github.com/Xilinx/Applications/tree/master/webp) | SDAccel - C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates methods of using hardware acceleration to speed up WebP encoder application on an FPGA | - - -# Getting Started +## Development Environments -### Getting familiar with AWS -If you have never used AWS before, we recommend you start with [AWS getting started training](https://aws.amazon.com/getting-started/), and focus on the basics of the [AWS EC2](https://aws.amazon.com/ec2/) and [AWS S3](https://aws.amazon.com/s3/) services. Understanding the fundamentals of these services will make it easier to work with AWS FPGAs. +| Development Environment | Description | Accelerator Language | Hardware Interface | Debug Options| Typical Developer | +| --------|---------|-------|---------|-------|-------| +| Software Defined Accelerator Development using [Vitis](Vitis/README.md)/[SDAccel](SDAccel/README.md)| Development experience leverages an optimized compiler to allow easy new accelerator development or migration of existing C/C++/openCL, Verilog/VHDL to AWS FPGA instances | C/C++/OpenCL, Verilog/VHDL (RTL) | OpenCL APIs and XRT | SW/HW Emulation, Simulation, GDB, Virtual JTAG (Chipscope) | SW or HW Developer with zero FPGA experience | +| [Hardware Accelerator Development using Vivado](hdk/README.md) | Fully custom hardware development experience provides hardware developers with the tools required for developing AFIs for AWS FPGA instances | Verilog/VHDL | [XDMA Driver](sdk/linux_kernel_drivers/xdma/README.md), [peek/poke](sdk/userspace/README.md) | Simulation, Virtual JTAG | HW Developer with advanced FPGA experience | +| [IP Integrator/High Level Design(HLx) using Vivado](hdk/docs/IPI_GUI_Vivado_Setup.md) | Graphical interface development experience for integrating IP and high level synthesis development | Verilog/VHDL/C | [XDMA Driver](sdk/linux_kernel_drivers/xdma/README.md), [peek/poke](sdk/userspace/README.md) | Simulation, Virtual JTAG | HW Developer with intermediate FPGA experience | -AWS FPGA generation and EC2 F1 instances are supported in the us-east-1 (N. Virginia), us-west-2 (Oregon), eu-west-1 (Ireland) and us-gov-west-1 ([GovCloud US](https://aws.amazon.com/govcloud-us/)) [regions](https://aws.amazon.com/about-aws/global-infrastructure/). +> For on-premise development, SDAccel/Vitis/Vivado must have the [correct license and use one of the supported tool versions](./docs/on_premise_licensing_help.md). -### Setting up development environment for the first time -The developer kit is supported for Linux operating systems only. -You have the choice to develop on AWS EC2 using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) or on-premises. Within a linux environment, you can execute `git clone https://github.com/aws/aws-fpga.git` to download the latest release to your EC2 Instance or local server. Help on cloning from github is available [here](https://help.github.com/articles/which-remote-url-should-i-use/). When using a SSH connection, execute `git clone git@github.com:aws/aws-fpga.git`. [To get help with connecting to Github via SSH](https://help.github.com/articles/connecting-to-github-with-ssh/). - -To setup your instance for development, checkout our [Developer Resources](./developer_resources/README.md) where we provide Step-By-Step guides to setting up a GUI Desktop or a compute cluster. - -Before you start your first AWS FPGA design, we recommend that you go through one of the step-by-step guides. The guides will walk through development steps for hello world examples. Based on the tables above, pick the development environment that best fits your needs and use the guide to get started: - * For fastest way to get started on FPGA accelerator development, start with the software-defined development environment. The guide starts with the [Hello World example](Vitis/README.md). - * Next use the same guide to develop using the C/C++/openCL/RTL based [60+ examples on github](https://github.com/Xilinx/Vitis_Accel_Examples/tree/bb80c8ec699c3131e8874735bd99475ac6fe2ec7). - * For custom hardware development (HDK) environment, start with the [HDK Hello World example](hdk/README.md). - * Next use the same guide to develop using the [cl\_dram\_dma](hdk/cl/examples/cl_dram_dma). - -### In-depth training and resources -Once you have completed your hello world examples, we recommend diving deeper into a training workshop or application notes - * Software-defined [re:Invent 2019 Workshop](https://github.com/Xilinx/SDAccel-AWS-F1-Developer-Labs). - * Lab modules will take you through accelerating compute intensive functions like Inverse Discrete Cosine Transform, Bloom Filter, 2D video convolution, etc. - * You will learn how to identify functions to accelerate and use profiling on example applications use that information to optimize your data movement between the HOST and FPGA. - * Software-defined [re:Invent 2018 Workshop](https://github.com/awslabs/aws-fpga-app-notes/blob/master/reInvent18_Developer_Workshop/README.md) demonstrates a 2D Filter acceleration and how to debug and optimize your accelerator. - * Custom hardware developers need to learn about how the hardware accelerator interfaces to the F1 Shell - * [Shell Interface](hdk/docs/AWS_Shell_Interface_Specification.md) - * [Shell Address Map](hdk/docs/AWS_Fpga_Pcie_Memory_Map.md) - * [Programmer view of the FPGA](./hdk/docs/Programmer_View.md) - * [Virtual JTAG](hdk/docs/Virtual_JTAG_XVC.md) - * [Application for methods of interfacing the host application to the Hardware accelerator](https://github.com/awslabs/aws-fpga-app-notes) - - -# FPGA Developer AMI - -The [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) is available on the AWS marketplace without a software charge and includes free tools and drivers needed for FPGA development on EC2 instances. FPGA development runs on several [EC2 instance types](https://aws.amazon.com/ec2/instance-types/). Given the large size of the FPGA used inside the AWS FPGA instances, the implementation tools require 32GiB Memory (ex: z1d.xlarge, z1d.2xlarge, c5.4xlarge, m5.2xlarge, r5.xlarge, t2.2xlarge). z1d.xlarge/c5.4xlarge and z1d.2xlarge/c5.8xlarge would provide the fastest execution time with 30GiB+ and 60GiB+ of memory respectively. Developers who want to save on cost, could start coding and run simulations on low-cost instances, like t2.2xlarge, and move to the aforementioned larger instances to run the synthesis of their acceleration code. - -Currently, AWS marketplace includes multiple versions of the FPGA Developer AMI, supporting Xilinx SDx 2017.4, 2018.2, 2018.3 and 2019.1 toolchain versions. The following compatibility table describes the mapping of currently supported developer kit versions to AMI versions: - -| Developer Kit Version | Tool Version Supported | Compatible FPGA Developer AMI Version | +## FPGA Developer AMI + +The [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) is available on the AWS marketplace without a software charge and includes tools needed for developing FPGA Designs to run on AWS F1. + +Given the large size of the FPGA used inside AWS F1 Instances, Xilinx tools work best with 32GiB Memory. +z1d.xlarge/c5.4xlarge and z1d.2xlarge/c5.8xlarge instance types would provide the fastest execution time with 30GiB+ and 60GiB+ of memory respectively. +Developers who want to save on cost, could start coding and run simulations on low-cost instances, like t2.2xlarge, and move to the aforementioned larger instances to run the synthesis of their acceleration code. + +AWS marketplace offers multiple versions of the FPGA Developer AMI. The following compatibility table describes the mapping of currently supported developer kit versions to AMI versions: + +| Developer Kit Version | Tool Version Supported | Compatible FPGA Developer AMI Version | |-----------|-----------|------| -| 1.3.7-1.3.X | 2017.4 | v1.4.0-v1.4.X (Xilinx Vivado/SDx 2017.4) | -| 1.4.X | 2017.4 | v1.4.0-v1.4.X (Xilinx Vivado/SDx 2017.4) | -| 1.4.3+ | 2018.2 | v1.5.0-v1.5.X (Xilinx Vivado/SDx 2018.2) | -| 1.4.8+ | 2018.3 | v1.6.0-v1.6.X (Xilinx Vivado/SDx 2018.3) | -| 1.4.11+ | 2019.1 | v1.7.0-v1.7.X (Xilinx Vivado/SDx 2019.1) | +| 1.4.16+ | 2020.1 | v1.9.0-v1.9.X (Xilinx Vivado/Vitis 2020.1) | | 1.4.13+ | 2019.2 | v1.8.0-v1.8.X (Xilinx Vivado/Vitis 2019.2) | +| 1.4.11+ | 2019.1 | v1.7.0-v1.7.X (Xilinx Vivado/SDx 2019.1) | +| 1.4.8 - 1.4.15a | 2018.3 | v1.6.0-v1.6.X (Xilinx Vivado/SDx 2018.3) | +| 1.4.3 - 1.4.15a | 2018.2 | v1.5.0-v1.5.X (Xilinx Vivado/SDx 2018.2) | +| 1.3.7 - 1.4.15a | 2017.4 | v1.4.0-v1.4.X (Xilinx Vivado/SDx 2017.4) | + +⚠️ Developer kit release v1.4.16 will remove support for Xilinx 2017.4, 2018.2, 2018.3 toolsets. +While developer kit release v1.4.16 onwards will not support older Xilinx tools, you can still use them using HDK releases v1.4.15a or earlier. +Please checkout [the latest v1.4.15a release tag from Github](https://github.com/aws/aws-fpga/releases/tag/v1.4.15a) to use Xilinx 2017.4, 2018.2, 2018.3 toolsets. -Developer kit versions prior to v1.3.7 and Developer AMI prior to v1.4 (2017.1) reached end-of-life. See [AWS forum announcement](https://forums.aws.amazon.com/ann.jspa?annID=6068) for additional details. +⚠️ Developer kit versions prior to v1.3.7 and Developer AMI prior to v1.4 (2017.1) reached end-of-life. See [AWS forum announcement](https://forums.aws.amazon.com/ann.jspa?annID=6068) for additional details. For software-defined development please look at the runtime compatibility table based on the Xilinx toolset in use: [SDAccel](SDAccel/docs/Create_Runtime_AMI.md#runtime-ami-compatibility-table) or [Vitis](Vitis/docs/Create_Runtime_AMI.md#runtime-ami-compatibility-table) - -# Hardware Development Kit (HDK) +## Hardware Development Kit (HDK) -The [HDK directory](./hdk/README.md) contains useful information, examples, and scripts for developers wanting to start building Amazon FPGA Images (AFI). It includes the development environment, simulation, build and AFI creation scripts. The HDK can be installed on any on-premises server or an EC2 instance. The developer kit is not required if you plan to use a pre-built AFI shared from another developer. +The [HDK directory](./hdk/README.md) contains documentation, examples, simulation, build and AFI creation scripts to start building Amazon FPGA Images (AFI). +The HDK can be installed on any on-premises server or an EC2 instance. +The developer kit is not required if you plan to use a pre-built AFI shared from another developer. - -# Software-defined Development Environment +## Software-defined Development Environment -The software-defined development environment allows customers to compile their C/C++/OpenCL code into the FPGA as kernels, and use OpenCL APIs to pass data to the FPGA. Software developers with no FPGA experience will find a familiar development experience that supercharges cloud applications. +The software-defined development environment allows customers to compile their C/C++/OpenCL code into the FPGA as kernels, and use OpenCL APIs to pass data to the FPGA. +Software developers with no FPGA experience will find a familiar development experience that supercharges cloud applications. -In addition, this development environment (also called SDAccel) allows the mix of C/C++ and RTL accelerator designs into a C/C++ software based development environment. This method enables faster prototyping using C/C++ while supporting manual optimization of critical blocks within RTL. This approach is similar to optimizing time critical functions using software compiler optimization methods. - -This developer kit has 80+ examples to help you get started on FPGA acceleration. +In addition, this development environment allows for a mix of C/C++ and RTL accelerator designs into a C/C++ software based development environment. This method enables faster prototyping using C/C++ while supporting manual optimization of critical blocks within RTL. This approach is similar to optimizing time critical functions using software compiler optimization methods. To get started with Xilinx SDAccel, review the [Software-defined development environment readme](SDAccel/README.md). To get started with Xilinx Vitis, review the [Vitis unified development environment readme](Vitis/README.md). - - -# Runtime Tools (SDK) +## Runtime Tools (SDK) The [SDK directory](./sdk/README.md) includes the runtime environment required to run on EC2 FPGA instances. It includes the drivers and tools to manage the AFIs that are loaded on the FPGA instance. The SDK isn't required during the AFI development process; it is only required once an AFI is loaded onto an EC2 FPGA instance. The following sdk resources are provided: * Linux Kernel Drivers - The developer kit includes three drivers: * [XDMA Driver](sdk/linux_kernel_drivers/xdma/README.md) - DMA interface to/from HDK accelerators. - * [XOCL Driver](sdk/linux_kernel_drivers/xocl) - DMA interface with software defined accelerators (also called hardware kernels). * [FPGA Libraries](sdk/userspace/fpga_libs) - APIs used by C/C++ host applications. * [FPGA Management Tools](sdk/userspace/fpga_mgmt_tools/README.md) - AFI management APIs for runtime loading/clearing FPGA image, gathering metrics and debug interface on the F1 instance. - -# Developer Support +# Amazon EC2 F1 Platform Features +* 1-8 Xilinx UltraScale+ VU9P based FPGA slots +* Per FPGA Slot, Interfaces available for Custom Logic(CL): + * One x16 PCIe Gen 3 Interface + * Four DDR4 RDIMM interfaces (with ECC) + * AXI4 protocol support on all interfaces +* User-defined clock frequency driving all CL to Shell interfaces +* Multiple free running auxiliary clocks +* PCI-E endpoint presentation to Custom Logic(CL) + * Management PF (physical function) + * Application PF +* Virtual JTAG, Virtual LED, Virtual DIP Switches +* PCI-E interface between Shell(SH) and Custom Logic(CL). + * SH to CL inbound 512-bit AXI4 interface + * CL to SH outbound 512-bit AXI4 interface + * Multiple 32-bit AXI-Lite buses for register access, mapped to different PCIe BARs + * Maximum payload size set by the Shell + * Maximum read request size set by the Shell + * AXI4 error handling +* DDR interface between SH and CL + * CL to SH 512-bit AXI4 interface + * 1 DDR controller implemented in the SH (always available) + * 3 DDR controllers implemented in the CL (configurable number of implemented controllers allowed) + +# Getting Started + +### Getting familiar with AWS +If you have never used AWS before, we recommend you start with [AWS getting started training](https://aws.amazon.com/getting-started/), and focus on the basics of the [AWS EC2](https://aws.amazon.com/ec2/) and [AWS S3](https://aws.amazon.com/s3/) services. +Understanding the fundamentals of these services will make it easier to work with AWS F1 and the FPGA Developer Kit. + +FPGA Image generation and EC2 F1 instances are supported in the us-east-1 (N. Virginia), us-west-2 (Oregon), eu-west-1 (Ireland) and us-gov-west-1 ([GovCloud US](https://aws.amazon.com/govcloud-us/)) [regions](https://aws.amazon.com/about-aws/global-infrastructure/). + +> ⚠️ NOTE: By default, your AWS Account will have an EC2 F1 Instance launch limit of 0. +> Before using F1 instances, you will have to open a [Support Case](https://console.aws.amazon.com/support/home#/case/create) to increase the EC2 Instance limits to allow launching F1 instances. + +### Setting up development environment for the first time + +You have the choice to develop on AWS EC2 using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) or on-premise. + +> ℹ️ INFO: We suggest starting with the FPGA Developer AMI with [build instances](#fpga-developer-ami) on EC2 as it has Xilinx tools and licenses setup for you to be able to quickly get into development. -The [**Amazon FPGA Development User Forum**](https://forums.aws.amazon.com/forum.jspa?forumID=243&start=0) is the first place to go to post questions, learn from other users and read announcements from the EC2 FPGA team. +> ℹ️ INFO: For on-premise development, you will need to have [Xilinx tools and licenses available for you to use](./docs/on_premise_licensing_help.md) -* Click the "Watch" button in GitHub upper right corner to get regular updates. -* We recommend you will join the [AWS forum](https://forums.aws.amazon.com/forum.jspa?forumID=243) to engage with the FPGA developer community and get help when needed (both AWS and Xilinx engineers monitor this forum). -* In case you can't see "Your Stuff" details, you will need to logout using the logout button on the forums page and log back in again. +1. Start a Build Instance first to start your development. + > 💡 TIP: This instance does not have to be an F1 instance. You only require an F1 instance to run your AFI's(Amazon FPGA Image) once you have gone through your design build and AFI creation steps. + + > ℹ️ INFO: If you need to follow GUI Development flows, please checkout our [Developer Resources](./developer_resources/README.md) where we provide Step-By-Step guides to setting up a GUI Desktop. +1. Clone the [FPGA Developer Kit](https://github.com/aws/aws-fpga) on your instance. + ```git clone https://github.com/aws/aws-fpga.git``` +1. Follow the quickstarts from the next section. + +### Quickstarts +Before you create your own AWS FPGA design, we recommend that you go through one of the step-by-step Quickstart guides: + +| Description | Quickstart | Next Steps | +|----|----|----| +| Software Defined Accelerator Development using Xilinx Vitis | [Vitis hello_world Quickstart](Vitis/README.md) | [60+ Vitis examples](./Vitis/examples/), [Vitis Library Examples](./docs/examples/example_list.md) | +| Software Defined Accelerator Development using Xilinx SDAccel | [SDAccel hello_world Quickstart](SDAccel/README.md) | [60+ SDAccel examples](./SDAccel/examples/) | +| Custom Hardware Development(HDK) | [HDK hello_world Quickstart](hdk/README.md) | [CL to Shell and DRAM connectivity example](./hdk/cl/examples/cl_dram_dma), [Virtual Ethernet Application](./sdk/apps/virtual-ethernet) using the [Streaming Data Engine](./hdk/cl/examples/cl_sde) | +| IP Integrator/High Level Design(HLx) | [IPI hello_world Quickstart](hdk/cl/examples/cl_hello_world_hlx/README.md) | [IPI GUI Examples](hdk/docs/IPI_GUI_Examples.md) | + +ℹ️ INFO: For more in-depth applications and examples of using High level synthesis, Vitis Libraries, App Notes and Workshops, please refer to our [Example List](./docs/examples/example_list.md) + +### How Tos +| How To | Description | +|----|----| +| [Migrate Alveo U200 designs to F1](./Vitis/docs/Alveo_to_AWS_F1_Migration.md) | This application note shows the ease of migrating an Alveo U200 design to F1. | - # Documentation Overview -The documentation is located throughout this developer kit, therefore, to help developers find information quicker the table below consolidates a list of key documents: +Documentation is located throughout this developer kit and the table below consolidates a list of key documents to help developers find information: | Topic | Document Name | Description | |-----------|-----------|------| -| Developer Kit Features | [RELEASE\_NOTES](./RELEASE_NOTES.md), [Errata](./ERRATA.md) | Release notes and Errata for all developer kit features, excluding the shell | -| Frequently asked questions | [FAQ](./FAQs.md), [Errata](./ERRATA.md) | Q/A are added based on developer feedback and common AWS forum questions | -| F1 Shell (HDK) | [AWS\_Shell\_RELEASE\_NOTES](./hdk/docs/AWS_Shell_RELEASE_NOTES.md), [AWS\_Shell\_ERRATA](./hdk/docs/AWS_Shell_ERRATA.md) | Release notes and Errata for F1 shell | -| F1 Shell (HDK) | [AWS\_Shell\_Interface\_Specification](hdk/docs/AWS_Shell_Interface_Specification.md) | Shell-CL interface specification for HDK developers building AFI | -| AWS setup | [Setup\_AWS\_CLI\_and\_S3\_Bucket](SDAccel/docs/Setup_AWS_CLI_and_S3_Bucket.md) | Setup instructions for preparing for AFI creation | -| SDx graphical interface (SDAccel) | [README\_GUI](SDAccel/docs/README_GUI.md) | Instructions using the SDx GUI for software defined acceleration development and debug | -| Software defined acceleration using RTL (SDAccel) | [Debug\_RTL\_Kernel](SDAccel/docs/Debug_RTL_Kernel.md) | Instructions on debugging RTL Kernel | -| Software defined acceleration Run time (SDAccel) | [Create\_Runtime\_AMI](SDAccel/docs/Create_Runtime_AMI.md) | Instructions on creating a runtime AMI | -| Host Application (HDK) | [Programmer\_View](hdk/docs/Programmer_View.md) | Host application to CL interface specification | -| CL Debug (HDK) | [Virtual\_JTAG\_XVC](hdk/docs/Virtual_JTAG_XVC.md) | Debugging CL using Virtual JTAG (Chipscope) | -| CL/Shell Simulation (HDK) | [RTL\_Simulating\_CL\_Designs](hdk/docs/RTL_Simulating_CL_Designs.md) | Shell-CL simulation specification | -| Driver (HDK) | [README](sdk/linux_kernel_drivers/xdma/README.md) | Describes the DMA driver (XDMA) used by HDK examples and includes a link to an installation guide | -| Shell Timeout and AXI Protocol Protection | [HOWTO\_detect\_shell\_timeout](hdk/docs/HOWTO_detect_shell_timeout.md) | The shell will terminate transactions after a time period or on an illegal transaction. This describes how to detect and gather data to help debug CL issues caused by timeouts. | -| AFI Power | [afi\_power](hdk/docs/afi_power.md) | Helps developers with understanding AFI power and preventing power violations on the F1 instance | -| AFI Management | [README](sdk/userspace/fpga_mgmt_tools/README.md) | CLI documentation for managing AFI on the F1 instance | -| AFI Administration | [copy\_fpga\_image](hdk/docs/copy_fpga_image.md), [delete\_fpga\_image](hdk/docs/delete_fpga_image.md), [describe\_fpga\_images](hdk/docs/describe_fpga_images.md), [fpga\_image\_attributes](hdk/docs/fpga_image_attributes.md) | CLI documentation for administering AFIs | -| AFI Creation Error Codes | [create\_fpga\_image\_error\_codes](hdk/docs/create_fpga_image_error_codes.md) | CLI documentation for managing AFIs | -| Developing on-premises | [HDK: on\_premise\_licensing\_help](hdk/docs/on_premise_licensing_help.md), [SDAccel: On\_Premises\_Development\_Steps](SDAccel/docs/On_Premises_Development_Steps.md) | Guidance for developer wanting to develop AFIs from on-premises instead of using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) running on AWS EC2 | +| AWS setup | [Setup AWS CLI and S3 Bucket](./SDAccel/docs/Setup_AWS_CLI_and_S3_Bucket.md) | Setup instructions for preparing for AFI creation | +| Developer Kit | [RELEASE NOTES](./RELEASE_NOTES.md), [Errata](./ERRATA.md) | Release notes and Errata for all developer kit features, excluding the shell | +| Developer Kit | [Errata](./ERRATA.md) | Errata for all developer kit features, excluding the shell | +| F1 Shell | [AWS Shell RELEASE NOTES](./hdk/docs/AWS_Shell_RELEASE_NOTES.md) | Release notes for F1 shell | +| F1 Shell | [AWS Shell ERRATA](./hdk/docs/AWS_Shell_ERRATA.md) | Errata for F1 shell | +| F1 Shell | [AWS Shell Interface Specification](./hdk/docs/AWS_Shell_Interface_Specification.md) | Shell-CL interface specification for HDK developers building AFI | +| F1 Shell - Timeout and AXI Protocol Protection | [How to detect a shell timeout](hdk/docs/HOWTO_detect_shell_timeout.md) | The shell will terminate transactions after a time period or on an illegal transaction. This describes how to detect and gather data to help debug CL issues caused by timeouts. | +| Vitis | [Debug Vitis Kernel](./Vitis/docs/Debug_Vitis_Kernel.md) | Instructions on debugging Vitis Kernel | +| Vitis | [Create Runtime AMI](./Vitis/docs/Create_Runtime_AMI.md) | Instructions on creating a runtime AMI when using Xilinx Vitis| +| Vitis | [XRT Instructions](./Vitis/docs/XRT_installation_instructions.md) | Instructions on building, installing XRT with MPD daemon considerations for F1 | +| SDAccel | [Debug RTL Kernel](./SDAccel/docs/Debug_RTL_Kernel.md) | Instructions on debugging RTL Kernel with SDAccel | +| SDAccel | [Create Runtime AMI](./SDAccel/docs/Create_Runtime_AMI.md) | Instructions on creating a runtime AMI when using Xilinx SDAccel| +| HDK - Host Application | [Programmer View](./hdk/docs/Programmer_View.md) | Host application to CL interface specification | +| HDK - CL Debug | [Debug using Virtual JTAG](./hdk/docs/Virtual_JTAG_XVC.md) | Debugging CL using Virtual JTAG (Chipscope) | +| HDK - Simulation | [Simulating CL Designs](./hdk/docs/RTL_Simulating_CL_Designs.md) | Shell-CL simulation specification | +| HDK - Driver | [README](./sdk/linux_kernel_drivers/xdma/README.md) | Describes the DMA driver (XDMA) used by HDK examples and includes a link to an installation guide | +| AFI | [AFI Management SDK](./sdk/userspace/fpga_mgmt_tools/README.md) | CLI documentation for managing AFI on the F1 instance | +| AFI - EC2 CLI | [copy\_fpga\_image](./hdk/docs/copy_fpga_image.md), [delete\_fpga\_image](./hdk/docs/delete_fpga_image.md), [describe\_fpga\_images](./hdk/docs/describe_fpga_images.md), [fpga\_image\_attributes](./hdk/docs/fpga_image_attributes.md) | CLI documentation for administering AFIs | +| AFI - Creation Error Codes | [create\_fpga\_image\_error\_codes](hdk/docs/create_fpga_image_error_codes.md) | CLI documentation for managing AFIs | +| AFI - Power | [FPGA Power, recovering from clock gating](./hdk/docs/afi_power.md) | Helps developers with understanding FPGA power usage, preventing power violations on the F1 instance and recovering from a clock gated slot. | +| On-premise Development | [Tools, Licenses required for on-premise development](./docs/on_premise_licensing_help.md) | Guidance for developer wanting to develop AFIs from on-premises instead of using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) | +| Frequently asked questions | [FAQ](./FAQs.md)| Q/A are added based on developer feedback and common AWS forum questions | + + +# Developer Support + +* The [**Amazon FPGA Development User Forum**](https://forums.aws.amazon.com/forum.jspa?forumID=243&start=0) is the first place to go to post questions, learn from other users and read announcements. + * We recommend joining the [AWS forums](https://forums.aws.amazon.com/forum.jspa?forumID=243) to engage with the FPGA developer community, AWS and Xilinx engineers to get help. + +* You could also file a [Github Issue](https://github.com/aws/aws-fpga/issues) for support. We prefer the forums as this helps the entire community learn from issues, feedback and answers. + * Click the "Watch" button in GitHub upper right corner to get regular updates. diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 443d2bea..363a00eb 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,28 +1,15 @@ # AWS EC2 FPGA HDK+SDK Release Notes -## AWS EC2 F1 Platform Features: - * 1-8 Xilinx UltraScale+ VU9P based FPGA slots - * Per FPGA Slot, Interfaces available for Custom Logic(CL): - * One x16 PCIe Gen 3 Interface - * Four DDR4 RDIMM interfaces (with ECC) - * AXI4 protocol support on all interfaces - * User-defined clock frequency driving all CL to Shell interfaces - * Multiple free running auxiliary clocks - * PCI-E endpoint presentation to Custom Logic(CL) - * Management PF (physical function) - * Application PF - * Virtual JTAG, Virtual LED, Virtual DIP Switches - * PCI-E interface between Shell(SH) and Custom Logic(CL). - * SH to CL inbound 512-bit AXI4 interface - * CL to SH outbound 512-bit AXI4 interface - * Multiple 32-bit AXI-Lite buses for register access, mapped to different PCIe BARs - * Maximum payload size set by the Shell - * Maximum read request size set by the Shell - * AXI4 error handling - * DDR interface between SH and CL - * CL to SH 512-bit AXI4 interface - * 1 DDR controller implemented in the SH (always available) - * 3 DDR controllers implemented in the CL (configurable number of implemented controllers allowed) +## Release 1.4.16 (See [ERRATA](./ERRATA.md) for unsupported features) +* FPGA developer kit now supports Xilinx Vivado/Vitis 2020.1 + * To upgrade, use [Developer AMI v1.9.0](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) on the AWS Marketplace. +* Updated Vitis examples to include usage of Vitis Libraries. +* Added documentation and examples to show Xilinx Alveo design migration to F1. + +## Release 1.4.15a (See [ERRATA](./ERRATA.md) for unsupported features) +* Fixed Xilinx AR#73068 patching + * DDR4 IP needs to be regenerated for the patch to take effect. +* Updated cl_dram_dma public AFI. ## Release 1.4.15 (See [ERRATA](./ERRATA.md) for unsupported features) * Added Xilinx AR#73068 patching @@ -235,55 +222,6 @@ * Release 1.4.0 greatly improves the performance of the DMA (for interrupt driven DMA on the cl\_dram\_dma example design). This is accomplished through a combination of shell changes to relax DMA timeouts and a new XDMA software driver option. We have ported the relevant HDK examples to the XDMA driver in this release. EDMA is still supported, and developers can freely choose which DMA driver to use as part of their host application. -## Supported Tools and Environment - -* The HDK and SDK are designed for **Linux** environment and has not been tested on other platforms -* The First installation of AWS FPGA SDK requires having gcc installed on the instance. If it's not available, try `sudo yum update && sudo yum group install "Development Tools"` -* The HDK build step requires having Xilinx's Vivado tool and Vivado License Management running. These are provided with AWS FPGA Developer AMI at no additional cost -* This release is tested and validated with Xilinx 2017.4 SDx/Vivado -* Developers that choose to develop on-premises need to have Xilinx license 'EF-VIVADO-SDX-VU9P-OP' installed. For more help, please refer to the [on-premises licensing help](./hdk/docs/on_premise_licensing_help.md) -* The following simulators are supported with this HDK: -**Vivado XSIM RTL simulator -** Mentor Graphics' Questa RTL simulator (with a separate license from MentorGraphics) -** Synopsys' VCS RTL simulator (with a separate license from Synopsys) - -## License Requirements - -The HDK and SDK in the FPGA development kit have different licenses. For more details please refer to the [HDK License](./hdk/LICENSE.txt) and the [SDK License](./sdk/LICENSE.txt). - -## FAQs - -**Q: How do I know which HDK version I have on my instance/machine? ** - -Look for the ./hdk/hdk_version.txt file. - -**Q: How do I know what my Shell version is? ** - -The Shell version of an FPGA slot is available through the FPGA Image Management tools after an AFI has been loaded. See the description of `fpga-describe-local-image` for more details on retrieving the shell version from a slot. Prior to loading an AFI, the state of the FPGA (including shell version) is undefined and non-deterministic. - -**Q: How do I know what version of FPGA Image management tools are running on my instance? ** - -The FPGA Image management tools version is reported with any command executed from these tools. See the description of `fpga-describe-local-image` for more details. - -**Q: How do I update my existing design with this release?** - -1. Start by either cloning the entire GitHub structure for the HDK release or downloading new directories that have changed. AWS recommends an entire GitHub clone to ensure no files are missed -2. Update the CL design to conform to the new AWS_Shell_Interface_Specification TODO: add link. TODO: need a doc to outline what changes are a MUST in this upgrade, and which ones are optional? -3. Follow the process for AFI generation outlined in aws-fpga/hdk/cl/examples/readme.md -4. Update FPGA Image Management Tools to the version included in aws-fpga/sdk/management -TODO: SDaccel design have different steps? - -**Q: How do I get support?** - -The FPGA Development forum provides an easy access to Developer support. It's the first place to go to post questions, suggestions and receive important announcements from the AWS FPGA team. To gain access to the user forum, please go to https://forums.aws.amazon.com/index.jspa and login. To be notified of important messages you will need to click the “Watch Forum” button on the right side of the screen. - -**Q: How do I know which HDK GitHub release I am working with? ** - -See the release notes at the top of the GitHub directory to identify the version of your GitHub clone. - -TODO: The following major features are included in this HDK release: - - ## Previous release notes ## Release 1.3.X Details (See [ERRATA](./ERRATA.md) for unsupported features) diff --git a/SDAccel/README.md b/SDAccel/README.md index a6fd741a..da59da6e 100644 --- a/SDAccel/README.md +++ b/SDAccel/README.md @@ -43,7 +43,7 @@ It is highly recommended you read the documentation and utilize software and har * Launch an instance using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) which comes pre-installed with SDAccel and required licenses. * You may use this F1 instance to [build your host application and Xilinx FPGA binary](#createapp), however, it is more cost efficient to either: * Launch the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) on a compute EC2 instance, with a minimum of 30GiB RAM), **OR** - * Follow the [On-Premises Instructions](../hdk/docs/on_premise_licensing_help.md) to purchase and install a license from Xilinx. + * Follow the [On-Premises Instructions](../docs/on_premise_licensing_help.md) to purchase and install a license from Xilinx. * Setup AWS IAM permissions for creating FPGA Images (CreateFpgaImage and DescribeFpgaImages). [EC2 API Permissions are described in more detail](http://docs.aws.amazon.com/AWSEC2/latest/APIReference/ec2-api-permissions.html). It is highly recommended that you validate your AWS IAM permissions prior to proceeding with this quick start. By calling the [DescribeFpgaImages API](../hdk/docs/describe_fpga_images.md) you can check that your IAM permissions are correct. * [Setup AWS CLI and S3 Bucket](docs/Setup_AWS_CLI_and_S3_Bucket.md) to enable AFI creation. * Install optional [packages](packages.txt) required to run all examples. If you do not install these packages, some examples may not work properly. The setup scripts will warn you of any missing packages. @@ -191,7 +191,7 @@ For help with AFI creation issues, see [create-fpga-image error codes](../hdk/do # 3. Run the FPGA accelerated application on Amazon FPGA instances -* Start an FPGA instance using [FPGA Developer AMI on AWS Marketplace](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) and check the AMI [compatibility table](../README.md#devAmi) and [runtime compatibility table](docs/Create_Runtime_AMI.md#runtime-ami-compatibility-table). Alternatively, you can [create your own Runtime AMI](docs/Create_Runtime_AMI.md) for running your SDAccel applications on Amazon FPGA instances. +* Start an FPGA instance using [FPGA Developer AMI on AWS Marketplace](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) and check the AMI [compatibility table](../README.md#fpga-developer-ami) and [runtime compatibility table](docs/Create_Runtime_AMI.md#runtime-ami-compatibility-table). Alternatively, you can [create your own Runtime AMI](docs/Create_Runtime_AMI.md) for running your SDAccel applications on Amazon FPGA instances. * *Assuming the developer flow (compilation) was done on a separate instance you will need to:* * Copy the compiled host executable (exe) to the new instance * Copy the \*.awsxclbin AWS FPGA binary file to the new instance diff --git a/SDAccel/docs/README_GUI.md b/SDAccel/docs/README_GUI.md index 9c84be84..4d3f5485 100644 --- a/SDAccel/docs/README_GUI.md +++ b/SDAccel/docs/README_GUI.md @@ -31,11 +31,7 @@ First change directory to **helloworld_ocl** example. ``` $ cd /SDAccel/examples/xilinx/getting_started/hello_world/helloworld_ocl ``` -The github examples use common header files and those needs to be copied in the local project source folder to make it easier to use. -Type the command **make local-files** to copy all necessary files in the local directory. -``` - $ make local-files -``` + The SDAccel GUI is invoked with the **sdx** command. ``` diff --git a/SDAccel/examples/xilinx_2017.4 b/SDAccel/examples/xilinx_2017.4 deleted file mode 160000 index cd196250..00000000 --- a/SDAccel/examples/xilinx_2017.4 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit cd196250dfdd63491080e8c6f3e79fe6d1718997 diff --git a/SDAccel/examples/xilinx_2018.2 b/SDAccel/examples/xilinx_2018.2 deleted file mode 160000 index a41b5892..00000000 --- a/SDAccel/examples/xilinx_2018.2 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a41b58921188ad90ace2d34a22a2513d8f74b549 diff --git a/SDAccel/examples/xilinx_2018.3 b/SDAccel/examples/xilinx_2018.3 deleted file mode 160000 index b2884db9..00000000 --- a/SDAccel/examples/xilinx_2018.3 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b2884db9768d6589ae094cd06d9b491b3bd39816 diff --git a/Vitis/README.md b/Vitis/README.md index a3248372..5e34083e 100644 --- a/Vitis/README.md +++ b/Vitis/README.md @@ -2,10 +2,10 @@ There are three steps for accelerating your application on an Amazon EC2 FPGA instance using the software-defined development flow: 1. Build the host application, and the Xilinx FPGA binary -2. Create an AFI +2. Create an AFI 3. Run the FPGA accelerated application on AWS FPGA instances -This quick start guide will utilize a simple "Hello World" Vitis example to get you started. +This quick start guide will utilize a simple "Hello World" Vitis example to get you started. It is highly recommended you read the documentation and utilize software and hardware emulation prior to running on F1. The F1 HW Target compile time is ~50 minutes, therefore, software and hardware emulation should be used during development. @@ -31,7 +31,7 @@ The F1 HW Target compile time is ~50 minutes, therefore, software and hardware e # Overview * Vitis is a complete development environment for applications accelerated using Xilinx FPGAs * It leverages the OpenCL heterogeneous computing framework to offload compute intensive workloads to the FPGA -* The accelerated application is written in C/C++, OpenCL or RTL with OpenCL APIs +* The accelerated application is written in C/C++, OpenCL or RTL with OpenCL APIs # Prerequisites @@ -41,26 +41,28 @@ The F1 HW Target compile time is ~50 minutes, therefore, software and hardware e * Launch an instance using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) which comes pre-installed with Vitis and required licenses. * You may use this F1 instance to [build your host application and Xilinx FPGA binary](#createapp), however, it is more cost efficient to either: * Launch the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) on a compute EC2 instance, with a minimum of 30GiB RAM), **OR** - * Follow the [On-Premises Instructions](../hdk/docs/on_premise_licensing_help.md) to purchase and install a license from Xilinx. + * Follow the [On-Premises Instructions](../docs/on_premise_licensing_help.md) to purchase and install a license from Xilinx. * Setup AWS IAM permissions for creating FPGA Images (CreateFpgaImage and DescribeFpgaImages). [EC2 API Permissions are described in more detail](http://docs.aws.amazon.com/AWSEC2/latest/APIReference/ec2-api-permissions.html). It is highly recommended that you validate your AWS IAM permissions prior to proceeding with this quick start. By calling the [DescribeFpgaImages API](../hdk/docs/describe_fpga_images.md) you can check that your IAM permissions are correct. * [Setup AWS CLI and S3 Bucket](docs/Setup_AWS_CLI_and_S3_Bucket.md) to enable AFI creation. -* Install optional [packages](packages.txt) required to run all examples. If you do not install these packages, some examples may not work properly. The setup scripts will warn you of any missing packages. +* Install optional [packages](packages.txt) required to run all examples. If you do not install these packages, some examples may not work properly. The setup scripts will warn you of any missing packages. * Additional dependencies may get flagged during the AWS Vitis scripts as warnings or errors. ## Github and Environment Setup -* Clone this github repository and source the *vitis_setup.sh* script. This will take care of: - * Downloading the required files: - * [AWS Platform](./aws_platform/xilinx_aws-vu9p-f1_shell-v04261818_201920_2) that allows Xilinx FPGA Binary files to target AWS F1 instances - * [AFI Creation script](./tools/create_vitis_afi.sh) that generates an AFI and AWS FPGA Binary from a Xilinx FPGA Binary - * Installing the required XRT, libraries and drivers - - ``` - $ git clone https://github.com/aws/aws-fpga.git $AWS_FPGA_REPO_DIR - $ cd $AWS_FPGA_REPO_DIR - $ source vitis_setup.sh - ``` +* Clone this github repository and source the *vitis_setup.sh* script: +``` + $ git clone https://github.com/aws/aws-fpga.git $AWS_FPGA_REPO_DIR + $ cd $AWS_FPGA_REPO_DIR + $ source vitis_setup.sh +``` + +* Sourcing the *vitis_setup.sh* script: + * Downloads and sets the correct AWS Platform: + * [AWS Vitis Platform](./aws_platform/xilinx_aws-vu9p-f1_shell-v04261818_201920_2) that contains the dynamic hardware that enables Vitis kernels to run on AWS F1 instances. * Valid platforms for shell_v04261818: `AWS_PLATFORM_201920_2` (Default) AWS F1 Vitis platform. + * Sets up the Xilinx Vitis example submodules. + * Installs the required libraries and package dependencies. + * Run environment checks to verify supported tool/lib versions. # 1. Build the host application, Xilinx FPGA binary and verify you are ready for FPGA acceleration @@ -70,13 +72,15 @@ This section will walk you through creating, emulating and compiling your host a # Emulate your Code - The main goal of emulation is to ensure functional correctness and to determine how to partition the application between the host CPU and the FPGA. +HW/SW Emulation does not require use of actual FPGA's and can be run on any compute instances. Using non-F1 EC2 compute instances for initial development will help reduce costs. ## Software (SW) Emulation -For CPU-based (SW) emulation, both the host code and the FPGA binary code are compiled to run on an x86 processor. The SW Emulation enables developers to iterate and refine the algorithms through fast compilation. The iteration time is similar to software compile and run cycles on a CPU. +For CPU-based (SW) emulation, both the host code and the FPGA binary code are compiled to run on an x86 processor. +SW Emulation enables developers to iterate and refine the algorithms through fast compilation. +The iteration time is similar to software compile and run cycles on a CPU. The instructions below describe how to run the Vitis SW Emulation flow using the Makefile provided with a simple "hello world" example @@ -183,7 +187,7 @@ For help with AFI creation issues, see [create-fpga-image error codes](../hdk/do # 3. Run the FPGA accelerated application on Amazon FPGA instances -* Start an FPGA instance using [FPGA Developer AMI on AWS Marketplace](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) and check the AMI [compatibility table](../README.md#devAmi) and [runtime compatibility table](./docs/Create_Runtime_AMI.md#runtime-ami-compatibility-table). Alternatively, you can [create your own Runtime AMI](docs/Create_Runtime_AMI.md) for running your Vitis applications on Amazon FPGA instances. +* Start an FPGA instance using [FPGA Developer AMI on AWS Marketplace](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) and check the AMI [compatibility table](../README.md#fpga-developer-ami) and [runtime compatibility table](./docs/Create_Runtime_AMI.md#runtime-ami-compatibility-table). Alternatively, you can [create your own Runtime AMI](docs/Create_Runtime_AMI.md) for running your Vitis applications on Amazon FPGA instances. * *Assuming the developer flow (compilation) was done on a separate build instance you will need to:* * Copy the compiled host executable (exe) to the new F1 instance * Copy the \*.awsxclbin AWS FPGA binary file to the new instance @@ -203,8 +207,6 @@ For help with AFI creation issues, see [create-fpga-image error codes](../hdk/do # Additional Vitis Information -* [Vitis Environment tutorial](https://www.github.com/Xilinx/Vitis-Tutorials) - * [Vitis User Guide](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug1393-vitis-application-acceleration.pdf) * [Vitis Product Info](https://www.xilinx.com/products/design-tools/vitis.html) diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration.md b/Vitis/docs/Alveo_to_AWS_F1_Migration.md new file mode 100644 index 00000000..cdcb5fc3 --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration.md @@ -0,0 +1,242 @@ +# Application Migration between Alveo U200 platform & Amazon EC2 F1 instances + +The Vitis development environment provides a unified environment to develop FPGA accelerated applications across Alveo™ products and Amazon EC2 F1 instances. +The Vitis® flow is based on standard programming languages for both software and hardware components, along with an open-source runtime library and optimizing compilation technology. +This approach enables seamless application migration across acceleration platforms. + +Using the Vitis tool flow, Xilinx was able to seamlessly port over 40+ designs from the Alveo U200 platform to F1 instances without touching the kernel source code and making only minor cosmetic changes to application source code. +One example is Xilinx Real-Time Anti Money Laundering Watch List Management Compute Solution that was developed with Vitis and can be deployed to Alveo U200 and F1 instances. + +## Introduction to Vitis + +FPGA-applications built with the Vitis flow rely on a stack of standardized software and hardware components that insulate the application from platform-specific details, as seen in the figure below. + + +![img](./Alveo_to_AWS_F1_Migration/img/image01.png) + +In the Vitis flow, user applications are developed in C or C++ and use standard user-space APIs to interact with accelerated functions (also known as kernels) implemented in the FPGA device. +These APIs are implemented by the Xilinx Runtime library (XRT) and are built on top drivers that manage communication to and from the FPGA device. +On the hardware side, a platform-specific shell is responsible for essential services such as managing the PCIe link, DMA transfers (to and from the host), and interfacing with off-chip DDR memory. +The shell also exposes standard AXI interfaces to which the user kernels can be connected. + +With this architecture, the user’s source code (host application and acceleration kernel) remains mostly agnostic of platform-specific platform details. +The application sees the standardized XRT APIs and AXI interfaces which are common to all Vitis acceleration platforms. +This aspect is key to enabling application portability across similar FPGA platforms. For most designs, porting from an Alveo U200 platform to F1 instances **can be as simple as changing the --platform option when building the design with Vitis.** + +More details about the Vitis programming and execution model can be found in the [Introduction](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/chunkbreaker1.html#ctb1559866511109) chapter of the Vitis documentation. + + +## Comparison of the Alveo U200 platform and AWS EC2 F1 instances + +FPGA accelerated applications developed with Vitis are highly portable across similar acceleration platforms. +While Vitis greatly facilitates the migration process, it is important to recognize that the features and characteristics of the acceleration platform or instance targeted will have an impact on functionality and achievable performance. + +The following table contrasts the key characteristics of the Alveo U200 and AWS EC2 F1 platforms. + +| | **Feature** | **AWS f1.2xlarge instance** | **Alveo U200 platform** | +| ----------------------- | -------------------- | ------------------------------------------------- | --------------------------- | +| **Available resources** | SLRs | 3 | 3 | +| | LUTs | 895k | 983k | +| | Registers | 1790k | 1966k | +| | DSP Slices | 5640 | 5856 | +| | URAM | 800*288kb = 225Mb | 800*288kb = 225Mb | +| | BRAM | 1680*36kb = 59Mb | 1848*36kb = 64.9Mb | +| **Off-chip memory** | DDR total capacity | 64GB (4x16GB) | 64GB (4x16GB) | +| | DDR Total BW | 68GB/s | 77GB/s | +| **Interfaces** | PCI Express | Gen3x16 | Gen3x16 | +| **Floorplan** | Shell Occupancy | SLR0 and SLR1 | SLR1 | +| | SLR0 | DDR3 | DDR0 | +| | SLR1 | DDR0 (in shell)
DDR2 | DDR1 (in shell)
DDR2 | +| | SLR2 | DDR1 | DDR3 | +| **Tool support** | Vitis | Yes | Yes | +| | ERT | Disabled | Available | +| | XRT | Full Access | Full Access | + + + +### FPGA Resources + +FPGA resources are the key building blocks for any FPGA design. +Resources are physically distributed across 3 different logic regions (SLRs) on both platforms, due to the nature of the FPGA architecture. +The FPGA devices on the Alveo U200 platform and AWS F1 instance have very similar numbers of available resources and performance is expected to be comparable across both platforms. + +The slight variance in resources is due to inherent differences between the shells for each platform. As shown in the figure below, the size and layout of the shell vary between the Alveo U200 platform and the AWS F1 instance. +Physical shell differences may impact the layout of available resources across the devices. +Developers should keep this in mind when migrating large and complex designs between the Alveo U200 platform and the F1 instances. +Advanced design considerations such as timing closure techniques may need to be considered. + +![img](./Alveo_to_AWS_F1_Migration/img/image02.png) + +For FPGA designs that utilize more than 70% of the FPGA resources, portability between platforms may require additional optimizations. +Please refer to the [UltraFast Design Methodology Timing Closure Quick Reference Guide (UG1292)](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug1292-ultrafast-timing-closure-quick-reference.pdf) for recommendations on timing closure. + +### Clock Frequency + +Both the Alveo U200 platform and F1 instances will support data clock rates up to 250MHz. +Applications running at 250MHz and below will seamlessly port between both platforms. +In addition, the Alveo U200 platform can support data clock rates between 250Mhz and 300MHz. +The data clock is used to transfer data between kernels and DDR, and deltas in clock rate may impact performance. + + +### Off-Chip DDR Memory + +Both the Alveo U200 platform and F1 instances provide identical off-chip DDR memory: 4 banks of 16GBytes each for a total of 64GBytes. +It is important to note that the placement and identification of DDR banks vary across platforms. +On the Alveo U200 platform, the DDR interface placed in the shell is DDR1. +On F1 instances the equivalent DDR interface placed in the shell is DDR0. +If the application only needs a single DDR interface, it is recommended to use the dedicated controller located in the shell. + +The following table details the naming and location of DDR interfaces on the F1 instances and Alveo U200 platforms: + +| **AWS name** | **Vitis tag** | **Location** | **U200 Equivalent** | +| --------------- | ---------------| ----------------------------- | --------------------- | +| DDR A | DDR[1] | SLR2 (top SLR) | DDR[3] | +| DDR B | DDR[2] | SLR1 (mid SLR) | DDR[2] | +| DDR C | DDR[0] | SLR1 (mid SLR, shell region) | DDR[1] | +| DDR D | DDR[3] | SLR0 (bottom SLR) | DDR[0] | + +The same information can be extracted from the platform file using the `platforminfo` Vitis utility: + +```bash +# Run this after you have sourced vitis_setup.sh +platforminfo $AWS_PLATFORM +``` + +When building the FPGA design, the Vitis linker takes care of connecting kernel ports to DDR interfaces. +For both the Alveo U200 and F1 instances, Vitis will default to use the DDR interface placed in the shell. +This default behavior can be modified by using command line options to specify which DDR interfaces should be used for each connection. +This is especially useful when the FPGA design needs to access multiple DDR banks. +When migrating applications between the Alveo U200 platform and F1 instances updates may be required to the Vitis compilation script to achieve the desired DDR mapping. +The example provided at the end of this document explains how to do update the Vitus scripts for DDR mapping. + + +## Migration Results using Vitis 2020.1 + +To demonstrate the seamless migration path offered by Vitis as well as the potential impact on the performance of the differences between the Alveo U200 platform and F1 instances, we ran over 40+ full system level applications across both platforms. + +For the vast majority of these designs, migrating between the Alveo U200 platform and the F1 instances require zero code changes (to either the host application or the kernel code). Porting the design was as simple as changing a few command line options in the Vitis compilation scripts such as the --platform and --sp switches. See [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/vitiscommandcompiler.html#qcm1528577331870__section_N10049_N10019_N10001) and [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/vitiscommandcompiler.html#clt1568640709907__section_tfc_zxm_1jb) for more details about these options. + +In a few cases, the host application relied on the XCL_MEM_TOPOLOGY flag, and this flag had to be modified to port between the Alveo 200 platform and F1 instances. This optional flag can be used to explicitly specify in which DDR bank a given buffer needs to be allocated. See [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/optimizingperformance.html#utc1504034308941) for more details about this flag. + +The following table shows results for a subset of these applications and compares the performance of each kernel (CU), looking at both the duration and primary kernel clock frequency. All these applications could be easily ported between the Alveo U200 platform and F1 instances maintaining application performance. + +| **TEST CASE** | **KERNEL NAMEs** | **CU Time AWS F1 (ms)** | **CU Time Alveo U200 (ms)** | **CU Clock AWS F1 (MHz)** | **CU Clock Alveo U200 (MHz)** | +| ----------------------------------------------- | ---------------------- | ----------------------- | --------------------------- | ------------------------- | ----------------------------- | +| Data Analytics (Bayes classification training) | naiveBayesTrain_kernel | 0.5491 | 0.5114 | 250 | 279 | +| Compression (gzip) | xilDecompressFull | 0.0335 | 0.0334 | 250 | 231 | +| | xilHuffmanKernel | 0.0435 | 0.0549 | 250 | 231 | +| | xilLz77Compress | 0.0203 | 0.0324 | 250 | 231 | +| Compression (zlib) | xilDecompressFull | 0.0254 | 0.0440 | 245 | 243 | +| | xilHuffmanKernel | 0.0440 | 0.0535 | 245 | 243 | +| | xilLz77Compress | 0.0211 | 0.0358 | 245 | 243 | +| Database (Compound Sort) | SortKernel | 1.1088 | 1.2326 | 250 | 234 | +| Quantitative Finance (BlackScholes) | bs_kernel | 0.0566 | 0.0541 | 250 | 300 | +| Quantitative Finance (BlackScholesMerton) | bsm_kernel | 0.2469 | 0.1984 | 250 | 280 | +| Quantitative Finance (HestonFD) | fd_kernel | 744.6795 | 704.1600 | 156 | 169 | +| Quantitative Finance (MonteCarlo) | mc_euro_k | 0.1351 | 0.1198 | 250 | 300 | +| Quantitative Finance (MonteCarloDJE) | kernel_mc_0 | 0.5365 | 0.4587 | 250 | 300 | +| Quantitative Finance (PortfolioOptimisation) | po_kernel | 0.1678 | 0.2154 | 138 | 115 | +| Quantitative Finance (b76) | b76_kernel | 0.5407 | 0.4251 | 250 | 300 | +| Quantitative Finance (cds) | CDS_kernel | 0.0489 | 0.0459 | 250 | 300 | +| Quantitative Finance (fdbslv) | fd_bs_lv_kernel | 2.3244 | 1.8575 | 250 | 300 | +| Quantitative Finance (hcf) | hcf_kernel | 0.2393 | 0.2144 | 250 | 300 | +| Matrix Solver (gesvdj) | kernel_gesvdj_0 | 0.2579 | 0.2595 | 250 | 273 | +| Matrix Solver (gesvj) | kernel_gesvj_0 | 0.0201 | 0.0377 | 250 | 300 | +| Computer Vision (Color detection) | color_detect | 0.0785 | 0.0755 | 250 | 300 | +| Computer Vision (Pixel pipeline) | pp_pipeline_accel | 0.1603 | 0.1459 | 250 | 300 | +| Computer Vision (Gaussian difference) | gaussiandiference | 33.5225 | 28.0049 | 250 | 300 | +| Computer Vision (Letterbox) | letterbox_accel | 0.0344 | 0.0394 | 250 | 300 | +| Computer Vision (Stereo vision pipeline) | stereopipeline_accel | 11.6349 | 9.7058 | 250 | 300 | +| Computer Vision (Corner Tracker) | cornerTracker | 0.2409 | 0.2160 | 250 | 300 | + +It should also be noted that these results only look at kernel performance, some of which are able to run the full clock rate available, for the F1 instances this is up to 250MHz and for the Alveo U200 platform, this is 300MHz. +Algorithms such as Compression libraries implemented in hardware can be seen to have nearly identical performance as the maximum clock rates are not quite achievable for these libraries. +Additional system level application advantages of either the Alveo U200 platform or F1 instances are not captured with this benchmark. + + +## Migration Example + +A detailed working example walking through all the steps required to migrate an application from U200 to F1 instances can be found [here](./Alveo_to_AWS_F1_Migration/example/README.md). + +In this example, the source code for the software program and the FPGA kernels remains identical whether targeting U200 or F1 instances. +Only command line changes are necessary to port the application. + +The Vitis flow leverages dedicated compilation steps to build the software program and FPGA accelerators. These steps are described below. + + + +### Compiling the software program + +The software program is compiled exactly in the same way in both case: + +```bash +g++ -D__USE_XOPEN2K8 -I/$(XILINX_XRT)/include/ -I./src -O3 -Wall -fmessage-length=0 -std=c++11 ../src/host.cpp -L/$(XILINX_XRT)/lib/ -lxilinxopencl -lpthread -lrt -o host +``` + +The software program is linked with the XRT libraries which manages the specific requirements of each FPGA platform, allowing the source code to remain the same for U200 and F1. + +See [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/buildinghostprogram.html#asy1528754332783) for more details about building the host program for the Vitis flow. + + +### Compiling the FPGA binary + +When building the FPGA binary only a few options need to be changed when retargeting from U200 and F1 instances. These options are contained in a specific file (called options.cfg in our example) and which is passed to the Vitis v++ compiler with the `--config` command line option. + +Here is a side-by-side view of the both options.cfg files: + +| Contents of options.cfg for Alveo U200 | Contents of options.cfg for AWS F1 | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| platform=xilinx_u200_xdma_201830_2
[connectivity]
sp=vadd_1.in1:DDR[1]
sp=vadd_1.in2:DDR[1]
sp=vadd_1.out:DDR[1] | platform=xilinx_aws-vu9p-f1_shell-v04261818_201920_2
[connectivity]
sp=vadd_1.in1:DDR[0]
sp=vadd_1.in2:DDR[0]
sp=vadd_1.out:DDR[0] | + +The platform option specifies which acceleration platform is targeted for the build. + +The `sp` option is used to specify the assignment of kernel interfaces to DDR interfaces. The original U200 design is connecting the kernel interfaces to DDR[1] which is located in the shell. Keeping the same settings would produce a working design on F1 instances, but in order to produce exactly the same configuration and target the DDR interface located in the F1 shell, the `sp` options are modified to use DDR[0]. + +Putting all the platform specific options in a dedicated file allows the v++ build commands remain strictly identical: + +```bash +// Step 1: compile the kernel from source code +v++ -c -g -t hw -R 1 -k vadd --profile_kernel data:all:all:all --profile_kernel stall:all:all:all --save-temps --temp_dir ./temp_dir --report_dir ./report_dir --log_dir ./log_dir --config ./options.cfg -I../src ../src/vadd.cpp -o ./vadd.hw.xo + +// Step 2: link the compiled kernel with the shell and produce the FPGA binary +v++ -l -g -t hw -R 1 --profile_kernel data:all:all:all --profile_kernel stall:all:all:all --temp_dir ./temp_dir --report_dir ./report_dir --log_dir ./log_dir --config ./options.cfg -I../src vadd.hw.xo -o add.hw.xclbin +``` + +See [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/vitiscommandcompiler.html#wrj1504034328013) for more information about the v++ command line options and configuration files. + + + +### Creating the Amazon FPGA Image + +Once you have compiled the host program and the FPGA binary, you are ready to execute the FPGA-accelerated application on a server equipped with an Alveo U200 acceleration card. + +When targeting F1 instances, you need to go through the additional step of creating an Amazon FPGA Image (AFI). This is done with the `create_vitis_afi.sh` command provided by AWS. This command reads in the FPGA binary generated by the v++ linker and requires information about the user’s AWS S3 bucket. + +In this example, the command looks as follows: + +```bash +$AWS_FPGA_REPO_DIR/Vitis/tools/create_vitis_afi.sh -xclbin=./vadd.xclbin -o=./vadd -s3_bucket= -s3_dcp_key=f1-dcp-folder -s3_logs_key=f1-logs +``` + +For more details about the `create_vitis_afi.sh` command, you can consult the AWS documentation [here](https://github.com/aws/aws-fpga/blob/master/Vitis/README.md#2-create-an-amazon-fpga-image-afi). + + + +## Summary – Migration Checklist + +Because Vitis provides platform-independent APIs and interfaces to the developer, the process of migrating applications across similar FPGA acceleration cards is greatly facilitated. + +The following summarizes the main requirements and techniques involved in migrating from Alveo U200 to F1 instances and can be used as a checklist to help along the process. + +#### Mandatory changes +* Update the --platform option in the Vitis compilation script – more details [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/vitiscommandcompiler.html#qcm1528577331870__section_N10049_N10019_N10001) +* Create an Amazon FPGA Image (AFI) from the FPGA binary (.xclbin) generated by Vitis – more details [here](https://github.com/aws/aws-fpga/blob/master/Vitis/README.md#2-create-an-amazon-fpga-image-afi) + +#### Design specific changes related to DDR mapping +* Use the --sp option to specify the assignment of kernel interfaces to DDR banks – more details [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/vitiscommandcompiler.html#clt1568640709907__section_tfc_zxm_1jb) +* Use the XCL_MEM_TOPOLOGY flag in the host source code – more details [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/optimizingperformance.html#utc1504034308941) + +#### Design specific changes related to timing closure +* Use the --frequency option to override the default clock frequency defined on the hardware platform– more details [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/vitiscommandcompiler.html#qcm1528577331870__section_frk_xtr_t3b) +* Use the –slr option to map kernels to specific SLRs in the device in order to help with timing closure – more details [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/vitiscommandcompiler.html#clt1568640709907__section_m3v_qxm_1jb) +* Apply advanced Vivado options to optimize implementation results - more details [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/buildingdevicebinary.html#hnw1523048617934) and [here](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug1292-ultrafast-timing-closure-quick-reference.pdf). diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/README.md b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/README.md new file mode 100644 index 00000000..9aa4ec39 --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/README.md @@ -0,0 +1,149 @@ +# Alveo U200 to AWS F1 Migration Example + +This example illustrates how to port a Vitis application developed for an Alveo U200 card to an AWS EC2 F1 instance. + +The Vitis development flow provides platform independent APIs and interfaces to the developer. This greatly facilitates the process of migrating applications across similar FPGA acceleration cards. In this example, the source code for the software program and for the FPGA kernels remains unchanged. Only command line changes are necessary to port the application from Alveo U200 to AWS F1. + + +## Example Overview + +The accelerator used in this example is a simple vector-add kernel. The `src` directory contains the source code for the project: + +- `vadd.cpp` contains the C++ source code of the accelerator which adds 2 arbitrarily sized input vectors. +- `host.cpp` contains the main function running on the host CPU. The host application is written in C++ and uses OpenCL™ APIs to interact with the FPGA accelerator. + +The `u200` and `f1` directories contain the Makefiles and scripts for building for Alveo U200 and AWS F1 respectively. + + + +## Building for Alveo U200 + +*Note: The instructions below assume that the required tools and platforms are installed and that the environment is properly setup to run Vitis.* + +1. Go to the `u200` directory + +2. The example is built with the following commands: + + ```bash + g++ -D__USE_XOPEN2K8 -I/$XILINX_XRT/include/ -I./src -O3 -Wall -fmessage-length=0 -std=c++11 ../src/host.cpp -L/$XILINX_XRT/lib/ -lxilinxopencl -lpthread -lrt -o host + + v++ -c -g -t hw -R 1 -k vadd --config ./options.cfg --profile_kernel data:all:all:all --profile_kernel stall:all:all:all --save-temps --temp_dir ./temp_dir --report_dir ./report_dir --log_dir ./log_dir -I../src ../src/vadd.cpp -o ./vadd.hw.xo + + v++ -l -g -t hw -R 1 --config ./options.cfg --profile_kernel data:all:all:all --profile_kernel stall:all:all:all --temp_dir ./temp_dir --report_dir ./report_dir --log_dir ./log_dir -I../src vadd.hw.xo -o add.hw.xclbin + ``` + + * The `g++` command compiles the host program and links it with the Xilinx Runtime (XRT) libraries. XRT provides platform-independent APIs to interact with the FPGA, allowing the source code to remain the same for U200 and F1. + * The `v++ -c` command compiles the source code for vector-add kernel (vadd.cpp) and generates the compiled kernel object (.xo). + * The `v++ -l` command links the compiled kernel to the shell and produces the FPGA binary (.xclbin file) which can then be loaded on the U200 acceleration card. + +3. For both `v++`commands, the `--config` option is used to pass the name of a file called `options.cfg` containing additional options specific to building for U200. The `options.cfg` file contains the following options: + + ``` + platform=xilinx_u200_xdma_201830_2 + [connectivity] + sp=vadd_1.in1:DDR[1] + sp=vadd_1.in2:DDR[1] + sp=vadd_1.out:DDR[1] + ``` + + * The `platform` option specifies which acceleration platform is targeted for the build. Here we are using the U200 shell. + * The `sp` options are used to specify the assignment of kernel arguments to DDR banks. In this case, we are mapping all three kernel arguments to DDR[1], which is the DDR interface located in the shell on Alveo U200. + + > Putting all the platform-specific options in one file is not mandatory but it is very convenient and facilitates the porting process. With this approach, the main command line can be reused as is for all platforms. Refer to the [Vitis Documentation](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/kme1569523964461.html) for more information on v++ related commands and options. + +4. The Makefile provided in the directory can be used to build the project for U200. Running `make build` will build the host application, compile the kernel and finally create the FPGA binary for U200. + + +## Building for AWS F1 + +In order to port the vector-add example from Alveo U200 to AWS F1, the only change required is in the `options.cfg` file. The source code remains unchanged and the g++ and v++ commands remain identical. + +1. Go to the `f1` directory + +2. The `options.cfg` file for AWS F1 contains the following options: + + ``` + platform=xilinx_aws-vu9p-f1_shell-v04261818_201920_2 + [connectivity] + sp=vadd_1.in1:DDR[0] + sp=vadd_1.in2:DDR[0] + sp=vadd_1.out:DDR[0] + ``` + + * The `platform` option is set to target the AWS F1 shell. The string used corresponds to the name of the latest shell which can be found [here](https://github.com/aws/aws-fpga/tree/master/Vitis/aws_platform) on the aws-fpga repo. + * The `sp` options are set to connect the kernel arguments to DDR[0], which is the DDR interface located in the AWS F1 shell. Keeping the same settings as the U200 would produce a working design on AWS F1. But in order to produce exactly the same configuration and target the DDR interface located in the AWS F1 shell, the sp options are modified to use DDR[0]. + + These changes are the only ones needed to port this project from U200 to F1. + +3. You can build the project for AWS F1 using the exact same commands that were used for U200: + + ```bash + export PLATFORM_REPO_PATHS=/home/centos/src/project_data/aws-fpga/Vitis/aws_platform + + g++ -D__USE_XOPEN2K8 -I/$XILINX_XRT/include/ -I./src -O3 -Wall -fmessage-length=0 -std=c++11 ../src/host.cpp -L/$XILINX_XRT/lib/ -lxilinxopencl -lpthread -lrt -o host + + v++ -c -g -t hw -R 1 -k vadd --config ./options.cfg --profile_kernel data:all:all:all --profile_kernel stall:all:all:all --save-temps --temp_dir ./temp_dir --report_dir ./report_dir --log_dir ./log_dir -I../src ../src/vadd.cpp -o ./vadd.hw.xo + + v++ -l -g -t hw -R 1 --config ./options.cfg --profile_kernel data:all:all:all --profile_kernel stall:all:all:all --temp_dir ./temp_dir --report_dir ./report_dir --log_dir ./log_dir -I../src vadd.hw.xo -o add.hw.xclbin + ``` + + *NOTE: The PLATFORM_REPO_PATHS environment variable is used to specify the directory where the AWS platform (xilinx_aws-vu9p-f1_shell-v04261818_201920_2) is installed.* + +4. When targeting AWS F1, you need to go through the additional step of creating an Amazon FPGA Image (AFI). This is done with the `create_vitis_afi.sh` command provided by AWS. More information about this command is available on the [AWS documentation](https://github.com/aws/aws-fpga/blob/master/Vitis/README.md#2-create-an-amazon-fpga-image-afi). + + Use the command below to generate the AFI and the .awsxclbin file: + + ```bash + $AWS_FPGA_REPO_DIR/Vitis/tools/create_vitis_afi.sh -xclbin=./vadd.xclbin -o=./vadd -s3_bucket= -s3_dcp_key=f1-dcp-folder -s3_logs_key=f1-logs + ``` + + *NOTE: Make sure to use your S3 bucket information when running the create_vitis_afi command.* + + Check the status of the AFI creation process by using the AFI ID with the follow command: + + ```bash + aws ec2 describe-fpga-images --fpga-image-ids + ``` + + The AFI is ready to use when the state is reported as 'available'. + + ```json + "State": { + "Code": "available" + }, + ``` + + *NOTE: The AFI ID can be found in the _afi_id.txt file created by the create_vitis_afi command.* + + + +## Running the Application on AWS F1 + +1. Execute the following command to source the Vitis runtime environment + + ```bash + source $AWS_FPGA_REPO_DIR/vitis_runtime_setup.sh + ``` + +2. Execute the host application with the .awsxclbin FPGA binary + + ```bash + ./host vadd.awsxclbin + ``` + +3. The messages below will indicate that the program ran successfully. + + ```bash + Found Platform + Platform Name: Xilinx + INFO: Reading ./vadd.awsxclbin + Loading: './vadd.awsxclbin' + TEST PASSED + ``` + + +## Summary + +In the Vitis flow, the user can develop source code which remains mostly agnostic of platform-specific platform details. This greatly facilitates the process of migrating applications across similar FPGA acceleration cards. In this example, the same source code could be ported from Alveo U200 to AWS F1 without any changes at all. Only a couple of changes to the v++ compilation options were required. + +For a complete application migration checklist, refer to the [Migration between Alveo U200 platform & Amazon EC2 F1 instances](../../Alveo_to_AWS_F1_Migration.md) application note. diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/f1/Makefile b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/f1/Makefile new file mode 100644 index 00000000..28023dbd --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/f1/Makefile @@ -0,0 +1,48 @@ +TARGET := hw + +build: xclbin host + +run: build + ./host ./vadd.xclbin + +vadd.xo: ../src/vadd.cpp + v++ -c -g -t $(TARGET) -R 1 -k vadd \ + --profile_kernel data:all:all:all \ + --profile_kernel stall:all:all:all \ + --save-temps \ + --temp_dir ./temp_dir \ + --report_dir ./report_dir \ + --log_dir ./log_dir \ + --config ./options.cfg \ + -I../src \ + ../src/vadd.cpp \ + -o ./vadd.xo + +vadd.xclbin: vadd.xo + v++ -l -g -t $(TARGET) -R 1 \ + --profile_kernel data:all:all:all \ + --profile_kernel stall:all:all:all \ + --temp_dir ./temp_dir \ + --report_dir ./report_dir \ + --log_dir ./log_dir \ + --config ./options.cfg \ + -I../src \ + vadd.xo \ + -o vadd.xclbin + +host: ../src/host.cpp ../src/host.hpp + g++ -D__USE_XOPEN2K8 \ + -I$(XILINX_XRT)/include/ \ + -I./src \ + -O3 -Wall -fmessage-length=0 -std=c++11\ + ../src/host.cpp \ + -L$(XILINX_XRT)/lib/ \ + -lxilinxopencl -lpthread -lrt \ + -o ./host + +xclbin: vadd.xclbin + +xo: vadd.xo + +clean: + rm -rf temp_dir log_dir report_dir *log host vadd.* *.csv *summary .run .Xil vitis* xclbin *.protoinst *.wdb *.wcfg diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/f1/options.cfg b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/f1/options.cfg new file mode 100644 index 00000000..a5eca86f --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/f1/options.cfg @@ -0,0 +1,6 @@ +platform=xilinx_aws-vu9p-f1_shell-v04261818_201920_2 +[connectivity] +sp=vadd_1.in1:DDR[0] +sp=vadd_1.in2:DDR[0] +sp=vadd_1.out:DDR[0] + diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/host.cpp b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/host.cpp new file mode 100644 index 00000000..1b426733 --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/host.cpp @@ -0,0 +1,183 @@ +/********** +Copyright (c) 2018, Xilinx, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********/ + +#include "host.hpp" + +int main(int argc, char** argv) +{ + if (argc != 2) { + std::cout << "Usage: " << argv[0] << " " << std::endl; + return EXIT_FAILURE; + } + + std::string binaryFile = argv[1]; + size_t vector_size_bytes = sizeof(int) * DATA_SIZE; + cl_int err; + unsigned fileBufSize; + // Allocate Memory in Host Memory + std::vector> source_in1(DATA_SIZE); + std::vector> source_in2(DATA_SIZE); + std::vector> source_hw_results(DATA_SIZE); + std::vector> source_sw_results(DATA_SIZE); + + // Create the test data + for(int i = 0 ; i < DATA_SIZE ; i++){ + source_in1[i] = rand() % DATA_SIZE; + source_in2[i] = rand() % DATA_SIZE; + source_sw_results[i] = source_in1[i] + source_in2[i]; + source_hw_results[i] = 0; + } + +// OPENCL HOST CODE AREA START + +// ------------------------------------------------------------------------------------ +// Step 1: Get All PLATFORMS, then search for Target_Platform_Vendor (CL_PLATFORM_VENDOR) +// Search for Platform: Xilinx +// Check if the current platform matches Target_Platform_Vendor +// ------------------------------------------------------------------------------------ + std::vector devices = get_devices("Xilinx"); + devices.resize(1); + cl::Device device = devices[0]; + +// ------------------------------------------------------------------------------------ +// Step 1: Create Context +// ------------------------------------------------------------------------------------ + OCL_CHECK(err, cl::Context context(device, NULL, NULL, NULL, &err)); + +// ------------------------------------------------------------------------------------ +// Step 1: Create Command Queue +// ------------------------------------------------------------------------------------ + OCL_CHECK(err, cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE, &err)); + +// ------------------------------------------------------------------ +// Step 1: Load Binary File from disk +// ------------------------------------------------------------------ + char* fileBuf = read_binary_file(binaryFile, fileBufSize); + cl::Program::Binaries bins{{fileBuf, fileBufSize}}; + +// ------------------------------------------------------------- +// Step 1: Create the program object from the binary and program the FPGA device with it +// ------------------------------------------------------------- + OCL_CHECK(err, cl::Program program(context, devices, bins, NULL, &err)); + +// ------------------------------------------------------------- +// Step 1: Create Kernels +// ------------------------------------------------------------- + OCL_CHECK(err, cl::Kernel krnl_vector_add(program,"vadd", &err)); + +// ================================================================ +// Step 2: Setup Buffers and run Kernels +// ================================================================ +// o) Allocate Memory to store the results +// o) Create Buffers in Global Memory to store data +// ================================================================ + +// ------------------------------------------------------------------ +// Step 2: Create Buffers in Global Memory to store data +// o) buffer_in1 - stores source_in1 +// o) buffer_in2 - stores source_in2 +// o) buffer_ouput - stores Results +// ------------------------------------------------------------------ + +// ....................................................... +// Allocate Global Memory for source_in1 +// ....................................................... + OCL_CHECK(err, cl::Buffer buffer_in1 (context,CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, + vector_size_bytes, source_in1.data(), &err)); +// ....................................................... +// Allocate Global Memory for source_in2 +// ....................................................... + OCL_CHECK(err, cl::Buffer buffer_in2 (context,CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, + vector_size_bytes, source_in2.data(), &err)); +// ....................................................... +// Allocate Global Memory for sourcce_hw_results +// ....................................................... + OCL_CHECK(err, cl::Buffer buffer_output(context,CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, + vector_size_bytes, source_hw_results.data(), &err)); + +// ============================================================================ +// Step 2: Set Kernel Arguments and Run the Application +// o) Set Kernel Arguments +// ---------------------------------------------------- +// Kernel Argument Description +// ---------------------------------------------------- +// in1 (input) --> Input Vector1 +// in2 (input) --> Input Vector2 +// out (output) --> Output Vector +// size (input) --> Size of Vector in Integer +// o) Copy Input Data from Host to Global Memory on the device +// o) Submit Kernels for Execution +// o) Copy Results from Global Memory, device to Host +// ============================================================================ + int size = DATA_SIZE; + OCL_CHECK(err, err = krnl_vector_add.setArg(0, buffer_in1)); + OCL_CHECK(err, err = krnl_vector_add.setArg(1, buffer_in2)); + OCL_CHECK(err, err = krnl_vector_add.setArg(2, buffer_output)); + OCL_CHECK(err, err = krnl_vector_add.setArg(3, size)); + +// ------------------------------------------------------ +// Step 2: Copy Input data from Host to Global Memory on the device +// ------------------------------------------------------ + OCL_CHECK(err, err = q.enqueueMigrateMemObjects({buffer_in1, buffer_in2},0/* 0 means from host*/)); + +// ---------------------------------------- +// Step 2: Submit Kernels for Execution +// ---------------------------------------- + OCL_CHECK(err, err = q.enqueueTask(krnl_vector_add)); + +// -------------------------------------------------- +// Step 2: Copy Results from Device Global Memory to Host +// -------------------------------------------------- + OCL_CHECK(err, err = q.enqueueMigrateMemObjects({buffer_output},CL_MIGRATE_MEM_OBJECT_HOST)); + + q.finish(); + +// OPENCL HOST CODE AREA END + + // Compare the results of the Device to the simulation + bool match = true; + for (int i = 0 ; i < DATA_SIZE ; i++){ + if (source_hw_results[i] != source_sw_results[i]){ + std::cout << "Error: Result mismatch" << std::endl; + std::cout << "i = " << i << " CPU result = " << source_sw_results[i] + << " Device result = " << source_hw_results[i] << std::endl; + match = false; + break; + } + } + +// ============================================================================ +// Step 3: Release Allocated Resources +// ============================================================================ + delete[] fileBuf; + + std::cout << "TEST " << (match ? "PASSED" : "FAILED") << std::endl; + return (match ? EXIT_SUCCESS : EXIT_FAILURE); +} + diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/host.hpp b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/host.hpp new file mode 100644 index 00000000..2f294f4d --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/host.hpp @@ -0,0 +1,85 @@ +#define CL_HPP_CL_1_2_DEFAULT_BUILD +#define CL_HPP_TARGET_OPENCL_VERSION 120 +#define CL_HPP_MINIMUM_OPENCL_VERSION 120 +#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1 +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS + +//OCL_CHECK doesn't work if call has templatized function call +#define OCL_CHECK(error,call) \ + call; \ + if (error != CL_SUCCESS) { \ + printf("%s:%d Error calling " #call ", error code is: %d\n", \ + __FILE__,__LINE__, error); \ + exit(EXIT_FAILURE); \ + } +#define DATA_SIZE 4096 + +#include +#include +#include +#include +#include + +template +struct aligned_allocator +{ + using value_type = T; + T* allocate(std::size_t num) + { + void* ptr = nullptr; + if (posix_memalign(&ptr,4096,num*sizeof(T))) + throw std::bad_alloc(); + return reinterpret_cast(ptr); + } + void deallocate(T* p, std::size_t num) + { + free(p); + } +}; + +std::vector get_devices(const std::string& vendor_name) { + + size_t i; + cl_int err; + std::vector platforms; + OCL_CHECK(err, err = cl::Platform::get(&platforms)); + cl::Platform platform; + for (i = 0 ; i < platforms.size(); i++){ + platform = platforms[i]; + OCL_CHECK(err, std::string platformName = platform.getInfo(&err)); + if (platformName == vendor_name){ + std::cout << "Found Platform" << std::endl; + std::cout << "Platform Name: " << platformName.c_str() << std::endl; + break; + } + } + if (i == platforms.size()) { + std::cout << "Error: Failed to find Xilinx platform" << std::endl; + exit(EXIT_FAILURE); + } + + //Getting ACCELERATOR Devices and selecting 1st such device + std::vector devices; + OCL_CHECK(err, err = platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices)); + return devices; +} + +char* read_binary_file(const std::string &xclbin_file_name, unsigned &nb) +{ + std::cout << "INFO: Reading " << xclbin_file_name << std::endl; + + if(access(xclbin_file_name.c_str(), R_OK) != 0) { + printf("ERROR: %s xclbin not available please build\n", xclbin_file_name.c_str()); + exit(EXIT_FAILURE); + } + //Loading XCL Bin into char buffer + std::cout << "Loading: '" << xclbin_file_name.c_str() << "'\n"; + std::ifstream bin_file(xclbin_file_name.c_str(), std::ifstream::binary); + bin_file.seekg (0, bin_file.end); + nb = bin_file.tellg(); + bin_file.seekg (0, bin_file.beg); + char *buf = new char [nb]; + bin_file.read(buf, nb); + return buf; +} + diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/vadd.cpp b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/vadd.cpp new file mode 100644 index 00000000..805daffd --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/vadd.cpp @@ -0,0 +1,111 @@ +/********** +Copyright (c) 2018, Xilinx, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********/ + +/******************************************************************************* +Description: + HLS pragmas can be used to optimize the design : improve throughput, reduce latency and + device resource utilization of the resulting RTL code + This is vector addition example to demonstrate how HLS optimizations are used in kernel. +*******************************************************************************/ + + +#define BUFFER_SIZE 1024 + +/* + Vector Addition Kernel Implementation + Arguments: + in1 (input) --> Input Vector1 + in2 (input) --> Input Vector2 + out (output) --> Output Vector + size (input) --> Size of Vector in Integer + */ +extern "C" { +void vadd( + const unsigned int *in1, // Read-Only Vector 1 + const unsigned int *in2, // Read-Only Vector 2 + unsigned int *out, // Output Result + int size // Size in integer + ) +{ +// SDAccel kernel must have one and only one s_axilite interface which will be used by host application to configure the kernel. +// Here bundle control is defined which is s_axilite interface and associated with all the arguments (in1, in2, out and size), +// control interface must also be associated with "return". +// All the global memory access arguments must be associated to one m_axi(AXI Master Interface). Here all three arguments(in1, in2, out) are +// associated to bundle gmem which means that a AXI master interface named "gmem" will be created in Kernel and all these variables will be +// accessing global memory through this interface. +// Multiple interfaces can also be created based on the requirements. For example when multiple memory accessing arguments need access to +// global memory simultaneously, user can create multiple master interfaces and can connect to different arguments. +#pragma HLS INTERFACE m_axi port=in1 offset=slave bundle=gmem +#pragma HLS INTERFACE m_axi port=in2 offset=slave bundle=gmem +#pragma HLS INTERFACE m_axi port=out offset=slave bundle=gmem +#pragma HLS INTERFACE s_axilite port=in1 bundle=control +#pragma HLS INTERFACE s_axilite port=in2 bundle=control +#pragma HLS INTERFACE s_axilite port=out bundle=control +#pragma HLS INTERFACE s_axilite port=size bundle=control +#pragma HLS INTERFACE s_axilite port=return bundle=control + + unsigned int v1_buffer[BUFFER_SIZE]; // Local memory to store vector1 + unsigned int v2_buffer[BUFFER_SIZE]; // Local memory to store vector2 + unsigned int vout_buffer[BUFFER_SIZE]; // Local Memory to store result + + + //Per iteration of this loop perform BUFFER_SIZE vector addition + for(int i = 0; i < size; i += BUFFER_SIZE) + { + int chunk_size = BUFFER_SIZE; + //boundary checks + if ((i + BUFFER_SIZE) > size) + chunk_size = size - i; + + // Transferring data in bursts hides the memory access latency as well as improves bandwidth utilization and efficiency of the memory controller. + // It is recommended to infer burst transfers from successive requests of data from consecutive address locations. + // A local memory vl_local is used for buffering the data from a single burst. The entire input vector is read in multiple bursts. + // The choice of LOCAL_MEM_SIZE depends on the specific applications and available on-chip memory on target FPGA. + // burst read of v1 and v2 vector from global memory + read1: for (int j = 0 ; j < chunk_size ; j++){ + v1_buffer[j] = in1[i + j]; + } + read2: for (int j = 0 ; j < chunk_size ; j++){ + v2_buffer[j] = in2[i + j]; + } + + // PIPELINE pragma reduces the initiation interval for loop by allowing the + // concurrent executions of operations + vadd: for (int j = 0 ; j < chunk_size; j ++){ + #pragma HLS PIPELINE II=1 + //perform vector addition + vout_buffer[j] = v1_buffer[j] + v2_buffer[j]; + } + //burst write the result + write: for (int j = 0 ; j < chunk_size ; j++){ + out[i + j] = vout_buffer[j]; + } + } +} +} diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/u200/Makefile b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/u200/Makefile new file mode 100644 index 00000000..28023dbd --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/u200/Makefile @@ -0,0 +1,48 @@ +TARGET := hw + +build: xclbin host + +run: build + ./host ./vadd.xclbin + +vadd.xo: ../src/vadd.cpp + v++ -c -g -t $(TARGET) -R 1 -k vadd \ + --profile_kernel data:all:all:all \ + --profile_kernel stall:all:all:all \ + --save-temps \ + --temp_dir ./temp_dir \ + --report_dir ./report_dir \ + --log_dir ./log_dir \ + --config ./options.cfg \ + -I../src \ + ../src/vadd.cpp \ + -o ./vadd.xo + +vadd.xclbin: vadd.xo + v++ -l -g -t $(TARGET) -R 1 \ + --profile_kernel data:all:all:all \ + --profile_kernel stall:all:all:all \ + --temp_dir ./temp_dir \ + --report_dir ./report_dir \ + --log_dir ./log_dir \ + --config ./options.cfg \ + -I../src \ + vadd.xo \ + -o vadd.xclbin + +host: ../src/host.cpp ../src/host.hpp + g++ -D__USE_XOPEN2K8 \ + -I$(XILINX_XRT)/include/ \ + -I./src \ + -O3 -Wall -fmessage-length=0 -std=c++11\ + ../src/host.cpp \ + -L$(XILINX_XRT)/lib/ \ + -lxilinxopencl -lpthread -lrt \ + -o ./host + +xclbin: vadd.xclbin + +xo: vadd.xo + +clean: + rm -rf temp_dir log_dir report_dir *log host vadd.* *.csv *summary .run .Xil vitis* xclbin *.protoinst *.wdb *.wcfg diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/u200/options.cfg b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/u200/options.cfg new file mode 100644 index 00000000..c48bf33d --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/u200/options.cfg @@ -0,0 +1,6 @@ +platform=xilinx_u200_xdma_201830_2 +[connectivity] +sp=vadd_1.in1:DDR[1] +sp=vadd_1.in2:DDR[1] +sp=vadd_1.out:DDR[1] + diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/img/image01.png b/Vitis/docs/Alveo_to_AWS_F1_Migration/img/image01.png new file mode 100644 index 00000000..b760d4c0 Binary files /dev/null and b/Vitis/docs/Alveo_to_AWS_F1_Migration/img/image01.png differ diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/img/image02.png b/Vitis/docs/Alveo_to_AWS_F1_Migration/img/image02.png new file mode 100644 index 00000000..d0fc2bd2 Binary files /dev/null and b/Vitis/docs/Alveo_to_AWS_F1_Migration/img/image02.png differ diff --git a/Vitis/docs/Create_Runtime_AMI.md b/Vitis/docs/Create_Runtime_AMI.md index 6d2fb49f..087a4ba4 100644 --- a/Vitis/docs/Create_Runtime_AMI.md +++ b/Vitis/docs/Create_Runtime_AMI.md @@ -4,6 +4,7 @@ | Vitis Version used for AFI Development | Compatible Xilinx Runtime | |--------------------------------------|-----------------------------| +| 2020.1 | AWS FPGA Developer AMI 1.9.0 (XRT is pre-installed) or [XRT](https://xilinx.github.io/XRT/2020.1/html/build.html) | | 2019.2 | AWS FPGA Developer AMI 1.8.0 (XRT is pre-installed) or [XRT](https://xilinx.github.io/XRT/2019.2/html/build.html) | ## 1. Launch a Runtime Instance & Install Required Packages @@ -31,4 +32,4 @@ $ ./helloworld ./vector_addition.awsxclbin ## 5. Make Runtime AMI available on the AWS Marketplace -* Please see [Section 5 of the AWS Marketplace Seller's Guide](https://awsmp-loadforms.s3.amazonaws.com/AWS_Marketplace_-_Seller_Guide.pdf#page=19) for more details. \ No newline at end of file +* Please see [Section 5 of the AWS Marketplace Seller's Guide](https://awsmp-loadforms.s3.amazonaws.com/AWS_Marketplace_-_Seller_Guide.pdf#page=19) for more details. diff --git a/Vitis/docs/Debug_Vitis_Kernel.md b/Vitis/docs/Debug_Vitis_Kernel.md index 839f49ff..4ae82c45 100644 --- a/Vitis/docs/Debug_Vitis_Kernel.md +++ b/Vitis/docs/Debug_Vitis_Kernel.md @@ -72,8 +72,7 @@ Now you are ready to instantiate the ILA Debug core in your RTL Kernel. The RTL ## 3. Host code changes to support debugging -The application host code needs to be modified to ensure you can set up the ILA trigger conditions **prior** to running the kernel. - +The application host code needs to be modified to ensure you can set up the ILA trigger conditions **prior** to running the kernel. The host code shown below introduces the wait for the setup of ILA Trigger conditions and the arming of the ILA. diff --git a/Vitis/docs/XRT_installation_instructions.md b/Vitis/docs/XRT_installation_instructions.md index 2b4c85c9..8ea0cc78 100644 --- a/Vitis/docs/XRT_installation_instructions.md +++ b/Vitis/docs/XRT_installation_instructions.md @@ -6,6 +6,7 @@ | Xilinx Vitis Tool Version | XRT Release Tag | SHA | `xrt` and `xrt-aws` pre-built RPM's (Centos/RHEL) | |---|---|---|---| +|2020.1| [202010.2.6.AWS](https://github.com/Xilinx/XRT/releases/tag/202010.2.6.AWS) | d09c4a458c16e8d843b3165dcf929c38f7a32b6f | [xrt_202010.2.6.0_7.7.1908-x86_64-xrt.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.9.0/Patches/XRT_2020_1/xrt_202010.2.6.0_7.7.1908-x86_64-xrt.rpm) [xrt_202010.2.6.0_7.7.1908-x86_64-aws.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.9.0/Patches/XRT_2020_1/xrt_202010.2.6.0_7.7.1908-x86_64-aws.rpm) | |2019.2| [2019.2.0.3](https://github.com/Xilinx/XRT/releases/tag/2019.2.0.3) | 9e13d57c4563e2c19bf5f518993f6e5a8dadc18a | [xrt_201920.2.3.0_7.7.1908-xrt.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.8.0/Patches/XRT_2019_2/xrt_201920.2.3.0_7.7.1908-xrt.rpm) [xrt_201920.2.3.0_7.7.1908-aws.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.8.0/Patches/XRT_2019_2/xrt_201920.2.3.0_7.7.1908-aws.rpm) | @@ -146,4 +147,4 @@ WARNING: card(s) marked by '*' are not ready, is MPD runing? run 'systemctl stat # Additional Documentation * [XRT Documentation](https://xilinx.github.io/XRT/master/html/) -* [XRT MPD Documentation](https://xilinx.github.io/XRT/master/html/cloud_vendor_support.html) \ No newline at end of file +* [XRT MPD Documentation](https://xilinx.github.io/XRT/master/html/cloud_vendor_support.html) diff --git a/Vitis/examples/xilinx_2020.1 b/Vitis/examples/xilinx_2020.1 new file mode 160000 index 00000000..6dc51743 --- /dev/null +++ b/Vitis/examples/xilinx_2020.1 @@ -0,0 +1 @@ +Subproject commit 6dc5174366f13e541af446213db0c98f401ba1e6 diff --git a/Vitis/kernel_version.txt b/Vitis/kernel_version.txt index 20429c73..eb0a1e6f 100644 --- a/Vitis/kernel_version.txt +++ b/Vitis/kernel_version.txt @@ -4,4 +4,5 @@ 3.10.0-957.5.1.el7.x86_64 3.10.0-957.27.2.el7.x86_64 3.10.0-1062.4.1.el7.x86_64 -3.10.0-1062.9.1.el7.x86_64 \ No newline at end of file +3.10.0-1062.9.1.el7.x86_64 +3.10.0-1127.10.1.el7.x86_64 diff --git a/Vitis/vitis_xrt_version.txt b/Vitis/vitis_xrt_version.txt index e37a320d..3c06762f 100644 --- a/Vitis/vitis_xrt_version.txt +++ b/Vitis/vitis_xrt_version.txt @@ -1 +1,4 @@ -2019.2:9e13d57c4563e2c19bf5f518993f6e5a8dadc18a \ No newline at end of file +2019.2:9e13d57c4563e2c19bf5f518993f6e5a8dadc18a +2020.1:12115fd4054cb46a5ade62fafa74c523f59116e6 +2020.1:d09c4a458c16e8d843b3165dcf929c38f7a32b6f + diff --git a/docs/examples/example_list.md b/docs/examples/example_list.md new file mode 100644 index 00000000..5cff1e6d --- /dev/null +++ b/docs/examples/example_list.md @@ -0,0 +1,28 @@ +## Example Applications List + +| Accelerator Application | Example | Development Environment | Description | +| --------|---------|---------|-------| +| Custom hardware | [cl\_hello\_world](../../hdk/cl/examples/cl_hello_world) | HDK - RTL (Verilog) | Simple [getting started example](../../hdk/README.md) with minimal hardware | +| Custom hardware | [cl\_dram\_dma](../../hdk/cl/examples/cl_dram_dma) | HDK - RTL (Verilog) | Demonstrates CL connectivity to the F1 shell and connectivity to/from all DDRs | +| Custom hardware | [IP integration example using a GUI - cl\_dram\_dma\_hlx](../../hdk/cl/examples/cl_dram_dma_hlx) | HLx - Verilog | Demonstrates CL connectivity to the F1 shell and connectivity to/from DRAM using the Vivado IP Integrator GUI | +| Custom hardware | [Virtual Ethernet Application](../../sdk/apps/virtual-ethernet) | [Streaming Data Engine](../../hdk/cl/examples/cl_sde) | The Virtual Ethernet framework facilitates streaming Ethernet frames from a network interface (or any source) into the FPGA for processing and back out to some destination. Possible use cases for this include deep packet inspection, software defined networking, stream encryption or compression, and more. | +| Custom hardware | [Pipelined Workload Applications - cl\_dram\_dma\_data\_retention](../../hdk/docs/data_retention.md)| [HDK](../../hdk/cl/examples/cl_dram_dma/software/runtime/test_dram_dma_retention.c) [SDAccel](../../SDAccel/examples/aws/data_retention) | Demonstrates how to preserve data in DRAMs while swapping out accelerators. Applications that use a temporal accelerator pipeline can take advantage of this feature to reduce latency between FPGA image swaps | +| High Level Synthesis | [Digital Up-Converter - cl\_hls\_dds\_hlx](../../hdk/cl/examples/cl_hls_dds_hlx) | HLx - C-to-RTL | Demonstrates an example application written in C that is synthesized to RTL (Verilog) | +| Custom Hardware with Software Defined Acceleration | [RTL Kernels](https://github.com/Xilinx/Vitis_Accel_Examples/tree/master/rtl_kernels) | Vitis - RTL (Verilog) + C/C++/OpenCL | These examples demonstrate developing new hardware designs (RTL) in a Software Defined workflow| +| Vitis Compression Libraries | [File Compression using GZip](https://github.com/Xilinx/Vitis_Accel_Examples/tree/master/library_examples/gzip_app) | Vitis - C/C++/OpenCL | This example demonstrates how to use Vitis Libraries to speed up GZIP compression on an FPGA | +| Vitis BLAS libraries | [Matrix Transposer using BLAS](https://github.com/Xilinx/Vitis_Accel_Examples/tree/master/library_examples/transp) | Vitis - C/C++/OpenCL | This example shows how to use Vitis BLAS Libraries to create a Matrix Transposer on an FPGA | +| Vitis Financial libraries | [Monte Carlo European Engine](https://github.com/Xilinx/Vitis_Accel_Examples/tree/master/library_examples/MCEuropeanEngine) | Vitis - C/C++/OpenCL | This example shows how to use Vitis Financial Libraries to accelerate MCEuropean Engine on an FPGA| + +## Application Notes + +App Note | Description | +|---------|---------| +| [Using PCIe Peer-2-Peer connectivity](https://github.com/awslabs/aws-fpga-app-notes/tree/master/Using-PCIe-Peer2Peer) | This app note shows how to use PCIe P2P connectivity on F1.16XL instances | +| [Using PCIM Port](https://github.com/awslabs/aws-fpga-app-notes/tree/master/Using-PCIM-Port) | This app note shows how to use the PCIM AXI port to transfer data between card and host memory | +| [Using PCIe User Interrupts](https://github.com/awslabs/aws-fpga-app-notes/tree/master/Using-PCIe-Interrupts) | This app note describes the basic kernel calls needed for a developer to write a custom interrupt service routine (ISR) and provides an example that demonstrates those calls | +| [Using PCIe Write Combining](https://github.com/awslabs/aws-fpga-app-notes/tree/master/Using-PCIe-Write-Combining) | This app note describes when to use write combining and how to take advantage of write combining in software for a F1 accelerator | + +## Workshops + +* [ReInvent:19 Workshop](https://github.com/awslabs/aws-fpga-app-notes/tree/master/reInvent19_Developer_Workshop) +* [ReInvent:18 Workshop](https://github.com/awslabs/aws-fpga-app-notes/tree/master/reInvent18_Developer_Workshop) diff --git a/hdk/docs/on_premise_licensing_help.md b/docs/on_premise_licensing_help.md similarity index 80% rename from hdk/docs/on_premise_licensing_help.md rename to docs/on_premise_licensing_help.md index cab84d18..d58903e6 100644 --- a/hdk/docs/on_premise_licensing_help.md +++ b/docs/on_premise_licensing_help.md @@ -1,39 +1,42 @@ - # Enabling on-premises development with Xilinx tools **NOTE: If you are developing on the AWS cloud and using AWS FPGA Developer AMI provided on AWS Marketplace, you can skip this document.** This document helps developers who choose to develop on-premises with specifying and licensing AWS-compatible Xilinx tools for use with the AWS FPGA HDK. +## Requirements for AWS HDK 1.4.16+ (2020.1) + * Xilinx Vivado or Vitis v2020.1 + * License: EF-VIVADO-SDX-VU9P-OP + * URL: https://www.xilinx.com/member/forms/download/xef.html?filename=Xilinx_Unified_2020.1_0602_1208.tar.gz + * MD5 SUM Value: b018f7b331ab0446137756156ff944d9 + + ## Requirements for AWS HDK 1.4.13+ (2019.2) + * Xilinx Vivado or Vitis v2019.2 + * License: EF-VIVADO-SDX-VU9P-OP + * URL: https://www.xilinx.com/member/forms/download/xef-vitis.html?filename=Xilinx_Vitis_2019.2_1106_2127.tar.gz + * MD5 SUM Value: d63bae9cad9bcaa4b2c7f6df9480eaa6 + ## Requirements for AWS HDK 1.4.11+ (2019.1) * Xilinx Vivado v2019.1 or v2019.1.op (64-bit) * License: EF-VIVADO-SDX-VU9P-OP - * SW Build 2552052 on Fri May 24 14:47:09 MDT 2019 - * IP Build 2548770 on Fri May 24 18:01:18 MDT 2019 * URL: https://www.xilinx.com/member/forms/download/xef.html?filename=Xilinx_SDAccel_2019.1_0524_1430_Lin64.bin * MD5 SUM Value: aa20eba36ebe480ec7ae59a4a8c85896 ## Requirements for AWS HDK 1.4.8+ (2018.3) * Xilinx Vivado v2018.3 or v2018.3.op (64-bit) * License: EF-VIVADO-SDX-VU9P-OP - * SW Build 2405991 on Thu Dec 6 23:36:41 MST 2018 - * IP Build 2404404 on Fri Dec 7 01:43:56 MST 2018 * URL: https://www.xilinx.com/member/forms/download/xef.html?filename=Xilinx_SDx_op_Lin_2018.3_1207_2324_Lin64.bin&akdm=0 * MD5 SUM Value: aa20eba36ebe480ec7ae59a4a8c85896 ## Requirements for AWS HDK 1.4.4+ (2018.2) * Xilinx Vivado v2018.2 or v2018.2.op (64-bit) * License: EF-VIVADO-SDX-VU9P-OP - * SW Build 2258646 on Thu Jun 14 20:02:38 MDT 2018 - * IP Build 2256618 on Thu Jun 14 22:10:49 MDT 2018 * URL: https://www.xilinx.com/member/forms/download/xef.html?filename=Xilinx_SDx_op_Lin_2018.2_0614_1954_Lin64.bin&akdm=0 * MD5 SUM Value: 6b6939e70d4fa90677d2c54a37ec25c7 ## Requirements for AWS HDK 1.3.7+ (2017.4) * Xilinx Vivado v2017.4.op (64-bit) * License: EF-VIVADO-SDX-VU9P-OP - * SW Build 2193838 on Tue Apr 10 18:06:59 MDT 2018 - * IP Build 2189296 on Tue Apr 10 19:39:46 MDT 2018 * URL: https://www.xilinx.com/member/forms/download/xef.html?filename=Xilinx_SDx_op_2017.4_0411_1_Lin64.bin&akdm=0 * MD5 SUM Value: e0b59c86d5ddee601ab17a069d231207 diff --git a/hdk/README.md b/hdk/README.md index 032d0859..f3b74fa9 100644 --- a/hdk/README.md +++ b/hdk/README.md @@ -27,9 +27,9 @@ * Familiarity with concepts related to designing for FPGAs, DMA, DDR, AXI protocol and linux drivers * RTL simulation * Experience with simulation debug or FPGA runtime waveform viewer debug methods -* Developers not familiar with these areas should start with [software defined acceleration](../SDAccel/README.md) -* Developers with existing RTL IP that are not familiar with the areas listed above should start with RTL Kernel development using [software defined acceleration](../SDAccel/README.md). -* Developers looking for a faster HDK development path, should start with RTL Kernel development using [software defined acceleration](../SDAccel/README.md) +* Developers not familiar with these areas should start with [software defined acceleration](../Vitis/README.md) +* Developers with existing RTL IP that are not familiar with the areas listed above should start with RTL Kernel development using [software defined acceleration](../Vitis/README.md). +* Developers looking for a faster HDK development path, should start with RTL Kernel development using [software defined acceleration](../Vitis/README.md) * The [documents directory](./docs) provides the specification for the AWS Shell (SH) to Custom Logic (CL) interface: * [Shell Interface](./docs/AWS_Shell_Interface_Specification.md) @@ -44,7 +44,8 @@ * Developers should not need to change any file under the `/common` directory * `shell_stable` directory contains the files needed by developers to build a CL using a current production shell. -* The [Custom Logic (cl) directory](./cl) is where the Custom Logic is expected to be developed (For RTL-based development using Verilog or VHDL). It includes a number of examples under the [examples directory](./cl/examples), as well as a placeholder for the developer's own Custom Logic under [developer_designs directory](./cl/developer_designs). For more details on the examples, see the [examples table](./cl/examples/cl_examples_list.md). +* The [Custom Logic (cl) directory](./cl) is where the Custom Logic is expected to be developed (For RTL-based development using Verilog or VHDL). It includes a number of examples under the [examples directory](./cl/examples), as well as a placeholder for the developer's own Custom Logic under [developer_designs directory](./cl/developer_designs). +For more details on the examples, see the [examples table](./cl/examples/cl_examples_list.md). ## Getting Started @@ -52,18 +53,19 @@ #### AWS Account, F1/EC2 Instances, On-Premises, AWS IAM Permissions, AWS CLI and S3 Setup (One-time Setup) * [Setup an AWS Account](https://aws.amazon.com/free/) -* Launch an instance using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) which comes pre-installed with Vivado and required licenses. Given the large size of the FPGA used inside the AWS FPGA instances, the implementation tools require 32GiB Memory (ex: c4.4xlarge, m4.2xlarge, r4.xlarge, t2.2xlarge). c4.4xlarge and c4.8xlarge would provide the fastest execution time with 30 and 60GiB of memory respectively. Developers who want to save on cost, would start coding and run simulations on low-cost instances, like t2.2xlarge, and move to the aforementioned larger instances to run the synthesis of their acceleration code. Follow the [On-Premises Instructions](docs/on_premise_licensing_help.md) to purchase and install a license from Xilinx. +* Launch an instance using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) which comes pre-installed with Vivado and required licenses. Given the large size of the FPGA used inside the AWS FPGA instances, the implementation tools require 32GiB Memory (ex: c4.4xlarge, m4.2xlarge, r4.xlarge, t2.2xlarge). c4.4xlarge and c4.8xlarge would provide the fastest execution time with 30 and 60GiB of memory respectively. Developers who want to save on cost, would start coding and run simulations on low-cost instances, like t2.2xlarge, and move to the aforementioned larger instances to run the synthesis of their acceleration code. Follow the [On-Premises Instructions](../docs/on_premise_licensing_help.md) to purchase and install a license from Xilinx. * The compatibility table describes the mapping of developer kit version to [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) version: | Developer Kit Version | Tool Version Supported | Compatible FPGA Developer AMI Version | |-----------|-----------|------| | 1.3.0-1.3.6 | 2017.1(Deprecated) | v1.3.5(Deprecated) | | 1.3.7-1.3.X | 2017.1(Deprecated) | v1.3.5-v1.3.X(Deprecated) | -| 1.3.7-1.3.X | 2017.4 | v1.4.0-v1.4.X (Xilinx SDx 2017.4) | -| 1.4.0-1.4.2 | 2017.4 | v1.4.0-v1.4.X (Xilinx SDx 2017.4) | -| 1.4.3-1.4.7 | 2018.2 | v1.5.0 (Xilinx SDx 2018.2) | -| 1.4.8-1.4.10 | 2018.3 | v1.6.0 (Xilinx SDx 2018.3) | -| 1.4.11-1.4.X | 2019.1 | v1.7.0 (Xilinx SDx 2019.1) | +| 1.3.7-1.4.15a | 2017.4 | v1.4.0-v1.4.X (Xilinx Vivado 2017.4) | +| 1.4.3-1.4.15a | 2018.2 | v1.5.0 (Xilinx Vivado 2018.2) | +| 1.4.8-1.4.15a | 2018.3 | v1.6.0 (Xilinx Vivado 2018.3) | +| 1.4.11-1.4.x | 2019.1 | v1.7.0 (Xilinx Vivado 2019.1) | +| 1.4.11-1.4.x | 2019.2 | v1.8.x (Xilinx Vivado 2019.2) | +| 1.4.16-1.4.x | 2020.1 | v1.9.x (Xilinx Vivado 2020.1) | * The FPGA Developer Kit version is listed in [hdk_version.txt](./hdk_version.txt) @@ -73,23 +75,31 @@ * [Setup AWS CLI and S3 Bucket](../SDAccel/docs/Setup_AWS_CLI_and_S3_Bucket.md) to enable AFI creation. * To install the AWS CLI, please follow the [AWS CLI Installation guide](http://docs.aws.amazon.com/cli/latest/userguide/installing.html). - + ``` $ aws configure # to set your credentials (found in your console.aws.amazon.com page) and default region + ``` Use the aws-cli [region](http://docs.aws.amazon.com/cli/latest/userguide/cli-command-line.html) command line argument to override the profile default region. Supported regions include: us-east-1, us-west-2, eu-west-1 and us-gov-west-1. #### Install the HDK and setup environment -The AWS FPGA HDK can be cloned to your EC2 instance or server by executing: +The AWS FPGA HDK can be cloned to your instance by executing: -When using the developer AMI: ```AWS_FPGA_REPO_DIR=/home/centos/src/project_data/aws-fpga``` +> When using the FPGA Developer AMI, add: +> `AWS_FPGA_REPO_DIR=/home/centos/src/project_data/aws-fpga` +```bash $ git clone https://github.com/aws/aws-fpga.git $AWS_FPGA_REPO_DIR $ cd $AWS_FPGA_REPO_DIR $ source hdk_setup.sh +``` -Note that sourcing `hdk_setup.sh` will set required environment variables that are used throughout the examples in the HDK. DDR simulation models and DCP(s) are downloaded from S3 during hdk setup. New terminal or xterm requires `hdk_setup.sh` to be rerun. +Sourcing `hdk_setup.sh` does the following: +* It sets required environment variables that are used throughout the examples in the HDK. +* Downloads DDR simulation models and DCP(s) from S3. + +New terminals or xterm require `hdk_setup.sh` to be rerun so that the correct environment variables are set. #### Review examples @@ -102,15 +112,18 @@ The [Examples readme](./cl/examples/cl_examples_list.md) provides an overview of #### Fast path to running CL Examples on FPGA Instance -For developers that want to skip the development flow and start running the examples on the FPGA instance. You can skip steps 1 through 3 if you are not interested in the development process. Step 4 through 6 will show you how to use one of the predesigned AFI examples. By using the public AFIs, developers can skip the build flow steps and jump to step 4. [Public AFIs are available for each example and can be found in the example/README](cl/examples/cl_hello_world/README.md#metadata). +For developers that want to skip the development flow and start running the examples on the FPGA instance. You can skip steps 1 through 3 if you are not interested in the development process. Step 4 through 6 will show you how to use one of the pre-designed AFI examples. +By using the public AFIs, developers can skip the build flow steps and jump to step 4. [Public AFIs are available for each example and can be found in the example/README](cl/examples/cl_hello_world/README.md#metadata). #### Step 1. Pick one of the examples and start in the example directory It is recommended that you complete this step-by-step guide using HDK hello world example. Next use this same guide to develop using the [cl\_dram\_dma](cl/examples/cl_dram_dma). When your ready, copy one of the examples provided and modify the design files, scripts and constraints directory. +``` $ cd $HDK_DIR/cl/examples/cl_hello_world # you can change cl_hello_world to cl_dram_dma, cl_uram_example or cl_hello_world_vhdl $ export CL_DIR=$(pwd) +``` Setting up the CL_DIR environment variable is crucial as the build scripts rely on that value. Each example follows the recommended directory structure to match the expected structure for HDK simulation and build scripts. @@ -121,20 +134,25 @@ Each example follows the recommended directory structure to match the expected s This [checklist](./cl/CHECKLIST_BEFORE_BUILDING_CL.md) should be consulted before you start the build process. **NOTE** *This step requires you to have Xilinx Vivado Tools and Licenses installed* - +``` $ vivado -mode batch # Verify Vivado is installed. +``` Executing the `aws_build_dcp_from_cl.sh` script will perform the entire implementation process converting the CL design into a completed Design Checkpoint that meets timing and placement constrains of the target FPGA. The output is a tarball file comprising the DCP file, and other log/manifest files, formatted as `YY_MM_DD-hhmm.Developer_CL.tar`. This file would be submitted to AWS to create an AFI. By default the build script will use Clock Group A Recipe A0 which uses a main clock of 125 MHz. +``` $ cd $CL_DIR/build/scripts $ ./aws_build_dcp_from_cl.sh +``` In order to use a 250 MHz main clock the developer can specify the A1 Clock Group A Recipe as in the following example: +``` $ cd $CL_DIR/build/scripts $ ./aws_build_dcp_from_cl.sh -clock_recipe_a A1 +``` Other clock recipes can be specified as well. More details on the [Clock Group Recipes Table](./docs/clock_recipes.csv) and how to specify different recipes can be found in the following [README](./common/shell_v04261818/new_cl_template/build/README.md). @@ -296,7 +314,7 @@ If fpga-describe-local-image API call returns a status 'Busy', the FPGA is still Now, let us try loading your AFI to FPGA `slot 0`: ``` - $ sudo fpga-load-local-image -S 0 -I agfi-0f0e045f919413242 + $ sudo fpga-load-local-image -S 0 -I agfi-0fcf87119b8e97bf3 ``` @@ -305,11 +323,10 @@ Now, let us try loading your AFI to FPGA `slot 0`: Now, you can verify that the AFI was loaded properly. The output shows the FPGA in the “loaded” state after the FPGA image "load" operation. The "-R" option performs a PCI device remove and recan in order to expose the unique AFI Vendor and Device Id. ``` $ sudo fpga-describe-local-image -S 0 -R -H - Type FpgaImageSlot FpgaImageId StatusName StatusCode ErrorName ErrorCode ShVersion - AFI 0 agfi-0f0e045f919413242 loaded 0 ok 0 + AFI 0 agfi-0fcf87119b8e97bf3 loaded 0 ok 0 0x04261818 Type FpgaImageSlot VendorId DeviceId DBDF - AFIDEVICE 0 0x6789 0x1d50 0000:00:0f.0 + AFIDEVICE 0 0x1d0f 0xf000 0000:00:1d.0 ``` @@ -338,7 +355,7 @@ Follow the [RTL simulation environment setup](./docs/RTL_Simulating_CL_Designs.m * Before starting your new design review the specification for the AWS Shell (SH) to Custom Logic (CL) [interface](./docs/AWS_Shell_Interface_Specification.md). * Try the [debug flow](docs/Virtual_JTAG_XVC.md) and understand the [shell timeout behavior](docs/HOWTO_detect_shell_timeout.md). -* When your ready, copy an example to [start your own CL design](./cl/developer_designs/Starting_Your_Own_CL.md) and make a simple modification to get familiar with customizing the hardware developer kit for your development needs. +* When you are ready, copy an example to [start your own CL design](./cl/developer_designs/Starting_Your_Own_CL.md) and make a simple modification to get familiar with customizing the hardware developer kit for your development needs. diff --git a/hdk/cl/examples/cl_hello_world/README.md b/hdk/cl/examples/cl_hello_world/README.md index fdf690af..213086ef 100644 --- a/hdk/cl/examples/cl_hello_world/README.md +++ b/hdk/cl/examples/cl_hello_world/README.md @@ -1,7 +1,7 @@ # Hello World CL Example -## :exclamation: NOTE: If this is your first time using F1, you should read [How To Create an Amazon FPGA Image (AFI) From One of The CL Examples: Step-by-Step Guide](./../../../README.md) first!! +## **⚠️ NOTE:** If this is your first time using F1, you should read [How To Create an Amazon FPGA Image (AFI) From One of The CL Examples: Step-by-Step Guide](./../../../README.md) first!! ## Table of Contents @@ -13,29 +13,24 @@ ## Overview -This simple *hello_world* example builds a Custom Logic (CL) that will enable the instance to "peek" and "poke" registers in the Custom Logic (CL). -These registers will be in the memory space behind AppPF BAR0, which is the ocl\_cl\_ AXI-lite bus on the Shell to CL interface. - +This *hello_world* example builds a Custom Logic (CL) that will enable the instance to "peek" and "poke" registers in the Custom Logic (CL). This example demonstrate a basic use-case of the Virtual LED and Virtual DIP switches. -All of the unused interfaces between AWS Shell and the CL are tied to fixed values, and it is recommended that the developer use similar values for every unused interface in the developer's CL. - - ## Functional Description -The cl_hello_world example demonstrates basic Shell-to-CL connectivity, memory-mapped register instantiations and the use of the Virtual LED and DIP switches. The cl_hello_world example implements two registers in the FPGA AppPF BAR0 memory space connected to the OCL AXI-L interface. The two registers are: +The cl_hello_world example demonstrates basic Shell-to-CL connectivity, memory-mapped register instantiations and the use of the Virtual LED and DIP switches. +The cl_hello_world example implements two registers in the [FPGA AppPF BAR0 memory space](../../../docs/AWS_Fpga_Pcie_Memory_Map.md) connected to the OCL AXI-L interface. +The two registers are: 1. Hello World Register (offset 0x500) 2. Virtual LED Register (offset 0x504) -Please refer to the [FPGA PCIe memory space overview](../../../docs/AWS_Fpga_Pcie_Memory_Map.md) - The Hello World Register is a 32-bit read/write register. However, in order to demonstrate that the register is being accessed correctly, the read data returned for the register will be byte swapped. The Virtual LED register is a 16-bit read-only register that shadows the lower 16 bits of the Hello World Register such that it will hold the same value as bits 15:0 of the Hello World Register. -The cl_hello_world design utilizes the Virtual LED and DIP switch interface which consistes of two signals described in the [cl_ports.vh] (./../../../common/shell_stable/design/interfaces/cl_ports.vh) file: +The cl_hello_world design utilizes the Virtual LED and DIP switch interface which consists of two signals described in the [cl_ports.vh](./../../../common/shell_stable/design/interfaces/cl_ports.vh) file: ``` @@ -43,9 +38,12 @@ The cl_hello_world design utilizes the Virtual LED and DIP switch interface whic output logic[15:0] cl_sh_status_vled, //Virtual LEDs, monitored through FPGA management PF and tools ``` -In this example the Virtual LED Register is used to drive the Virtual LED signal, cl_sh_status_vled. In addition, the Virtual DIP switch, sh_cl_status_vdip, is used to gate the Virtual LED Register value sent to the Virtual LEDs. So, for example, if the sh_cl_status_vdip is set to 16'h00FF, then only the lower 8 bits of the Virtual LED Register will be signaled on the Virtual LED signal cl_sh_status_vled. +In this example the Virtual LED Register is used to drive the Virtual LED signal, cl_sh_status_vled. +In addition, the Virtual DIP switch, sh_cl_status_vdip, is used to gate the Virtual LED Register value sent to the Virtual LEDs. +So, for example, if the sh_cl_status_vdip is set to 16'h00FF, then only the lower 8 bits of the Virtual LED Register will be signaled on the Virtual LED signal cl_sh_status_vled. -While running on F1, the developer can use the FPGA tools `fpga-get-virtual-led` to read the LED values on the CL-to-Shell interface. While `fpga-set-virtual-dip-switch` tool is used to set the DIP switch values on the Shell-to-CL interface. +While running on F1, the developer can use the FPGA tools `fpga-get-virtual-led` to read the LED values on the CL-to-Shell interface. +While `fpga-set-virtual-dip-switch` tool is used to set the DIP switch values on the Shell-to-CL interface. ### Unused interfaces diff --git a/hdk/cl/examples/cl_hello_world_vhdl/README.md b/hdk/cl/examples/cl_hello_world_vhdl/README.md index 1722a67c..76e8158b 100644 --- a/hdk/cl/examples/cl_hello_world_vhdl/README.md +++ b/hdk/cl/examples/cl_hello_world_vhdl/README.md @@ -10,14 +10,9 @@ ## Overview -The purpose of this example is to provide an environment for VHDL users which uses the hello_world example. -This hello_world_vhdl example is based upon the main hello_world example except for a VHDL wrapper is provided for VHDL users. -This design can be modified to include or exclude certain interfaces for VHDL logic and mean't to be modified for VHDL designs/users. -Unused interfaces interfaces between AWS Shell and the CL are automatically tied off based upon `define in cl_hello_world_defines.vh. - -This simple *hello_world* example builds a Custom Logic (CL) that will enable the instance to "peek" and "poke" registers in the Custom Logic (C). These registers will be in the memory space behind AppPF BAR0, which is the ocl\_cl\_ AXI-lite bus on the Shell to CL interface. - -This example demonstrate a basic use-case of the Virtual LED and Virtual DIP switches. +The purpose of this example is to provide an environment for VHDL users which uses the `hello_world` example. +This `hello_world_vhdl` example is based upon the main `hello_world` example except for a VHDL wrapper is provided for VHDL users. +This design can be modified to include or exclude certain interfaces for VHDL logic. Please read here for [general instructions to build the CL, register an AFI, and start using it on an F1 instance](./../../../README.md). @@ -25,20 +20,20 @@ Please read here for [general instructions to build the CL, register an AFI, and ## Functional Description -The cl_hello_world example demonstrates basic Shell-to-CL connectivity, memory-mapped register instantiations and the use of the Virtual LED and DIP switches. The cl_hello_world example implements two registers in the FPGA AppPF BAR0 memory space connected to the OCL AXI-L interface. The two registers are: +The cl_hello_world example demonstrates basic Shell-to-CL connectivity, memory-mapped register instantiations and the use of the Virtual LED and DIP switches. +The cl_hello_world example implements two registers in the [FPGA AppPF BAR0 memory space](../../../docs/AWS_Fpga_Pcie_Memory_Map.md) connected to the OCL AXI-L interface. +The two registers are: 1. Hello World Register (offset 0x500) 2. Virtual LED Register (offset 0x504) -Please refer to the [FPGA PCIe memory space overview](../../../docs/AWS_Fpga_Pcie_Memory_Map.md) - -The Hello World logic is incorporated into a verilog module and called out in the VHDL wrapper. However, the debug logic is written in the VHDL wrapper. +The Hello World logic is incorporated into a verilog module and called out in the VHDL wrapper. However, the debug logic is written in the VHDL wrapper. The Hello World Register is a 32-bit read/write register. However, in order to demonstrate that the register is being accessed correctly, the read data returned for the register will be byte swapped. The Virtual LED register is a 16-bit read-only register that shadows the lower 16 bits of the Hello World Register such that it will hold the same value as bits 15:0 of the Hello World Register. -The cl_hello_world design utilizes the Virtual LED and DIP switch interface which consistes of two signals described in the [cl_ports.vh] (./../../../common/shell_stable/design/interfaces/cl_ports.vh) file: +The cl_hello_world design utilizes the Virtual LED and DIP switch interface which consists of two signals described in the [cl_ports.vh](./../../../common/shell_stable/design/interfaces/cl_ports.vh) file: ``` @@ -46,11 +41,17 @@ The cl_hello_world design utilizes the Virtual LED and DIP switch interface whic output logic[15:0] cl_sh_status_vled, //Virtual LEDs, monitored through FPGA management PF and tools ``` -In this example the Virtual LED Register is used to drive the Virtual LED signal, cl_sh_status_vled. In addition, the Virtual DIP switch, sh_cl_status_vdip, is used to gate the Virtual LED Register value sent to the Virtual LEDs. So, for example, if the sh_cl_status_vdip is set to 16'h00FF, then only the lower 8 bits of the Virtual LED Register will be signaled on the Virtual LED signal cl_sh_status_vled. +In this example the Virtual LED Register is used to drive the Virtual LED signal, cl_sh_status_vled. +In addition, the Virtual DIP switch, sh_cl_status_vdip, is used to gate the Virtual LED Register value sent to the Virtual LEDs. +So, for example, if the sh_cl_status_vdip is set to 16'h00FF, then only the lower 8 bits of the Virtual LED Register will be signaled on the Virtual LED signal cl_sh_status_vled. + +While running on F1, the developer can use the FPGA tools `fpga-get-virtual-led` to read the LED values on the CL-to-Shell interface. While `fpga-set-virtual-dip-switch` tool is used to set the DIP switch values on the Shell-to-CL interface. + +### Unused interfaces -While running on F1, the developer can use the FPGA tools `fpga-get-virtual-led` to read the LED values on the CL-to-Shell interface. While `fpga-set-virtual-dip-switch` tool is used to set the DIP switch values on the Shell-to-CL interface. +The Hello World example does not use most of AWS Shell interface, hence the unused signals are tied off. +At the end of `cl_hello_world.sv` file, there is a specific `include` command for an interface-specific `.inc` file, to handle the tie-off\'s for every unused interface. - ### VHDL Wrapper Information Clock/Reset/General Information @@ -61,9 +62,9 @@ MISC Interfaces are not added in wrappers (Interrupts). PCIM hasn't been fully tested in the VHDL flow. Use at your own risk but provide feedback if used. -Below is the hiearchy of the design. +Below is the hierarchy of the design. -cl_hello_world.sv - This module uses `define that are configured in cl_hello_world_defines.sv and ensure to tie off signals to the SH when necessary for seamless usage of the different flows (VHDL Flow this file shouldn't be modified) +cl_hello_world.sv - This module uses `define that are configured in cl_hello_world_defines.sv and ensure to tie off signals to the SH when necessary for seamless usage of the different flows (VHDL Flow this file shouldn't be modified) cl_hello_world_defines.sv - Comment out AXI Interfaces that are not used (AXIL_OCL, AXIL_USR, AXIL_SDA, DMA_PCIS, DDR4_SH, DDR4_CL, PCIM). -cl_vhdl_wrapper.vhd - VHDL users are encouraged to modify this wrapper based upon design requirements. VHDL Wrapper flow Can use generate statements to connect signals from top level ports when AXI Interfaces are used. Not required to use these generates statements but makes code more cleaner. This file currently connects the hello_world module for OCL AXI interface and VLED and VDIP logic and contains debug logic. @@ -79,11 +80,11 @@ Alternatively, you can directly use a pre-generated AFI for this CL. | Key | Value | |-----------|------| -| Shell Version | 0x04151701 | +| Shell Version | 0x04261818 | | PCI Device ID | 0xF000 | | PCI Vendor ID | 0x1D0F (Amazon) | | PCI Subsystem ID | 0x1D51 | | PCI Subsystem Vendor ID | 0xFEDD | -| Pre-generated AFI ID | afi-0f0927bc2649e6259 | -| Pre-generated AGFI ID | agfi-0f0e045f919413242 | +| Pre-generated AFI ID | afi-03d11a4ea66e883ef | +| Pre-generated AGFI ID | agfi-0fcf87119b8e97bf3 | diff --git a/hdk/common/verif/scripts/.gitignore b/hdk/common/verif/scripts/.gitignore index bd7e70f1..1366d166 100644 --- a/hdk/common/verif/scripts/.gitignore +++ b/hdk/common/verif/scripts/.gitignore @@ -1,2 +1,2 @@ .done - +tmp diff --git a/hdk/common/verif/tb/scripts/Makefile.common.inc b/hdk/common/verif/tb/scripts/Makefile.common.inc index 9802eb89..c9325c4e 100644 --- a/hdk/common/verif/tb/scripts/Makefile.common.inc +++ b/hdk/common/verif/tb/scripts/Makefile.common.inc @@ -107,58 +107,18 @@ COMMON_LIBLISTS =\ unimacro\ secureip\ xpm - -ifeq ($(VIVADO_TOOL_VERSION), v2017.4) - COMMON_LIBLISTS +=\ - axi_register_slice_v2_1_15\ - axi_infrastructure_v1_1_0\ - axi_crossbar_v2_1_16\ - axi_clock_converter_v2_1_14\ - fifo_generator_v13_2_1\ - fifo_generator_v13_1_4\ - axi_data_fifo_v2_1_14\ - generic_baseblocks_v2_1_0 -else ifeq ($(VIVADO_TOOL_VERSION), v2018.3) - COMMON_LIBLISTS +=\ - axi_register_slice_v2_1_18\ - axi_infrastructure_v1_1_0\ - axi_crossbar_v2_1_19\ - axi_clock_converter_v2_1_17\ - fifo_generator_v13_2_3\ - fifo_generator_v13_1_4\ - axi_data_fifo_v2_1_17\ - generic_baseblocks_v2_1_0 -else ifeq ($(VIVADO_TOOL_VERSION), v2019.1) - COMMON_LIBLISTS +=\ - axi_register_slice_v2_1_19\ - axi_infrastructure_v1_1_0\ - axi_crossbar_v2_1_20\ - axi_clock_converter_v2_1_18\ - fifo_generator_v13_2_4\ - fifo_generator_v13_1_4\ - axi_data_fifo_v2_1_18\ - generic_baseblocks_v2_1_0 -else ifeq ($(VIVADO_TOOL_VERSION), v2019.2) - COMMON_LIBLISTS +=\ - axi_register_slice_v2_1_20\ - axi_infrastructure_v1_1_0\ - axi_crossbar_v2_1_21\ - axi_clock_converter_v2_1_19\ - fifo_generator_v13_2_5\ - fifo_generator_v13_1_4\ - axi_data_fifo_v2_1_19\ - generic_baseblocks_v2_1_0 -else - COMMON_LIBLISTS +=\ - axi_register_slice_v2_1_17\ - axi_infrastructure_v1_1_0\ - axi_crossbar_v2_1_18\ - axi_clock_converter_v2_1_16\ - fifo_generator_v13_2_2\ - fifo_generator_v13_1_4\ - axi_data_fifo_v2_1_16\ - generic_baseblocks_v2_1_0 -endif +COMMON_LIBLISTS +=\ + $(shell cd $(COMPLIB_DIR) >/dev/null 2>&1;\ + for i in\ + axi_register_slice_v2_1_\ + axi_infrastructure_v1_1_\ + axi_crossbar_v2_1_\ + axi_clock_converter_v2_1_\ + fifo_generator_v13_2_\ + fifo_generator_v13_1_\ + axi_data_fifo_v2_1_\ + generic_baseblocks_v2_1_;\ + do ls | grep $$i; done) include $(HDK_COMMON_DIR)/verif/tb/scripts/Makefile.$(SIMULATOR).inc @@ -177,3 +137,7 @@ $(HDK_COMMON_DIR)/verif/models/sh_bfm/cl_ports_sh_bfm.vh: $(HDK_SHELL_DESIGN_DIR make_sim_dir: $(HDK_COMMON_DIR)/verif/models/sh_bfm/cl_ports_sh_bfm.vh mkdir -p $(SIM_ROOT) + +show_common_liblists: + @ for i in $(COMMON_LIBLISTS); do echo $$i; done + diff --git a/hdk/docs/AFI_Manifest.md b/hdk/docs/AFI_Manifest.md index 4386b68a..b120f362 100644 --- a/hdk/docs/AFI_Manifest.md +++ b/hdk/docs/AFI_Manifest.md @@ -40,6 +40,7 @@ The manifest file is a text file formatted with key=value pairs. Some keys are m | vivado tool version | field value | |------------------- | -----------| +| 2020.1 | tool_version=v2020.1 | | 2019.2 | tool_version=v2019.2 | | 2019.1 | tool_version=v2019.1 | | 2018.3 | tool_version=v2018.3 | diff --git a/hdk/docs/AWS_Shell_V1.4_Migration_Guidelines.md b/hdk/docs/AWS_Shell_V1.4_Migration_Guidelines.md index e2a9fab4..f8a4cb17 100644 --- a/hdk/docs/AWS_Shell_V1.4_Migration_Guidelines.md +++ b/hdk/docs/AWS_Shell_V1.4_Migration_Guidelines.md @@ -4,7 +4,7 @@ This document describes the changes required when migrating your design from shell v1.3 to shell v1.4. The HDK build scripts have changed to reflect the new v1.4 shell’s floorplan and newer Vivado tools. It’s strongly recommended users move to these scripts. Users who have already customized v1.3 scripts should diff those with the v1.4 scripts and be sure to include all new parameters that have been added to v1.4 scripts. -1. Upgrade Vivado Tools to version 2017.4 or later. Needs [FPGA DEVELOPER AMI 1.4 or later](../../README.md#overviewdevtools) +1. Upgrade Vivado Tools to version 2019.1 or later. Needs [FPGA DEVELOPER AMI 1.4 or later](../../README.md#fpga-developer-ami) 2. The hierarchy for CL & SH modules have changed. Now they are instantiated in "WRAPPER_INST" Module. The paths in your Build scripts, constraints & verification components have to be updated. diff --git a/hdk/docs/HOWTO_detect_shell_timeout.md b/hdk/docs/HOWTO_detect_shell_timeout.md index 2b7c750c..e304009c 100644 --- a/hdk/docs/HOWTO_detect_shell_timeout.md +++ b/hdk/docs/HOWTO_detect_shell_timeout.md @@ -1,18 +1,22 @@ # AXI Slave Timeouts (DMA_PCIS) -* The Shell provides a timeout mechanism which terminates any outstanding AXI transactions after 8 uS. There is a separate timeout per interface. Upon the first timeout, metrics registers are updated with the offending address and a counter is incremented. Upon further timeouts the counter is incremented. These metrics registers can be read via the fpga-describe-local-image found in [Amazon FPGA Image Management Tools README](../../sdk//userspace/fpga_mgmt_tools/README.md) +* The Shell provides a timeout mechanism which terminates any outstanding AXI transactions after 8 uS. + * There is a separate timeout per interface. + * Upon the first timeout, metrics registers are updated with the offending address and a counter is incremented. + * Upon further timeouts the counter is incremented. + * These metrics registers can be read via the [fpga-describe-local-image found in Amazon FPGA Image Management Tools](../../sdk/userspace/fpga_mgmt_tools/README.md) * Timeouts can occur for three reasons: - 1. The CL doesn’t respond to the address (reserved address space) + 1. The CL doesn't respond to the address (reserved address space) 2. The CL has a protocol violation on AXI which hangs the bus 3. The CL design’s latency is exceeding the timeout value. For example if the cycle is going to DDR, accumulated DDR arbitration and access latenencies may exceed the timeout value. * Best practice is to ensure addresses to reserved address space are fully decoded in your CL design. -* If accesing DDR, note DMA accesses to DDR will accumulate which can lead to timeouts if the transactions are not completed fast enough. This is especially true for CL designs operating at 125MHz or below. See [cl_dram_dma](../cl/examples/cl_dram_dma). This example illustrates best practice for DMA operations to DDR. +* If accessing DDR, note DMA accesses to DDR will accumulate which can lead to timeouts if the transactions are not completed fast enough. This is especially true for CL designs operating at 125MHz or below. See [cl_dram_dma](../cl/examples/cl_dram_dma). This example illustrates best practice for DMA operations to DDR. * CL designs which have multiple masters to the AXI "fabric" will also incur arbitration delays. * If you suspect a timeout, debug by reading the metrics registers. The saved offending address should help narrow whether this is to DDR or registers/RAMs inside the FPGA. The developer should investigate if design parameters allow for long latency responses to the offending address. If not, then the developer should investigate protocol violations. -* **WARNING**: Once a timeout happens the DMA/PCIS interface may no longer be functional and the AFI/Shell must be re-loaded. This can be done by adding the "-F" option to [fpga-load-local-image](../../sdk/userspace/fpga_mgmt_tools/README.md). +* **WARNING**: Once a timeout happens the DMA/PCIS interface may no longer be functional and the AFI/Shell must be re-loaded. This can be done by adding the "-F" option to [fpga-load-local-image](../../sdk/userspace/fpga_mgmt_tools/README.md). # AXI Master Timeouts (PCIM) * AXI Master transactions also have an 8us timeout. Timeout occur when the CL does not respond to some channel within 8us: @@ -74,9 +78,9 @@ DDR3 write-count=0 read-count=0 ``` -* For detailed infomation on metrics, see [Amazon FPGA Image Management Tools README](../../sdk//userspace/fpga_mgmt_tools/README.md) +* For detailed information on metrics, see [Amazon FPGA Image Management Tools README](../../sdk//userspace/fpga_mgmt_tools/README.md) ** NOTE **: The LSB 2 bits of timeout address (sdacl-slave-timeout-addr, virtual-jtag-slave-timeout-addr, ocl-slave-timeout-addr, bar1-slave-timeout-addr and dma-pcis-timeout-addr) in the metrics are used to report whether the timeout occurred due to READ or WRITE transaction. The bits in timeout address should be interpret as follows: > timeout-addr[1:0] == 2'b01 : Interface timed out on READ transaction (Could be either on AR or R channels). > timeout-addr[1:0] == 2'b10 : Interface timed out on WRITE transaction (Could be on AW, W or B channels). - > True 32bit aligned address that triggered first timeout = {timeout-addr[1:0], 2'b00}. \ No newline at end of file + > True 32bit aligned address that triggered first timeout = {timeout-addr[1:0], 2'b00}. diff --git a/hdk/docs/IPI_GUI_Vivado_Setup.md b/hdk/docs/IPI_GUI_Vivado_Setup.md index 5888256c..1451248f 100644 --- a/hdk/docs/IPI_GUI_Vivado_Setup.md +++ b/hdk/docs/IPI_GUI_Vivado_Setup.md @@ -48,7 +48,7 @@ In init.tcl or Vivado\_init.tcl, add the following line based upon the $HDK\_SHE Download, install, and configure the license for Vivado SDx 2017.4, 2018.2, 2018.3 or 2019.1 for Windows. More information is provided at: -[On-Premises Licensing Help](./on_premise_licensing_help.md) +[On-Premises Licensing Help](../../docs/on_premise_licensing_help.md) Clone the `https://github.com/aws/aws-fpga` repository either through Github Desktop or Download ZIP and extract to a new folder location on the Windows machine. This is the install location. diff --git a/hdk/docs/RTL_Simulating_CL_Designs.md b/hdk/docs/RTL_Simulating_CL_Designs.md index 957d79a6..d071bd12 100644 --- a/hdk/docs/RTL_Simulating_CL_Designs.md +++ b/hdk/docs/RTL_Simulating_CL_Designs.md @@ -21,7 +21,7 @@ Developers can write their tests in SystemVerilog and/or C languages. If a devel One easy way is to have a pre-installed environment is to use the [AWS FPGA Developer AMI available on AWS Marketplace](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) which comes with pre-installed Vivado tools and license. -For developers who like to work on-premises or different AMI in the cloud, AWS recommends following the [required license for on-premise document](./on_premise_licensing_help.md). +For developers who like to work on-premises or different AMI in the cloud, AWS recommends following the [required license for on-premise document](../../docs/on_premise_licensing_help.md). Please refer to the [release notes](../../RELEASE_NOTES.md) or the [supported Vivado version](../../supported_vivado_versions.txt) for the exact version of Vivado tools, and the required license components. diff --git a/hdk/docs/Virtual_JTAG_XVC.md b/hdk/docs/Virtual_JTAG_XVC.md index 2551e5a8..f5121fb7 100644 --- a/hdk/docs/Virtual_JTAG_XVC.md +++ b/hdk/docs/Virtual_JTAG_XVC.md @@ -188,7 +188,7 @@ The connection Vivado and the target instance can be terminated by closing the X # Embedding Debug Cores in the CL -Before beginning, it should be noted that the following only applies to the HDK flow. For adding debug cores to a design using SDAccel, see [Debug_RTL_Kernel.md](../../SDAccel/docs/Debug_RTL_Kernel.md) for instructions on how to do so. +> ⚠️ **NOTE:** Before beginning, it should be noted that the following only applies to the HDK flow. [SDAccel instructions](../../SDAccel/docs/Debug_RTL_Kernel.md) and [Vitis instructions](../../Vitis/docs/Debug_Vitis_Kernel.md) are also available. The Custom Logic (CL) is required to include the [CL Debug Bridge](../common/shell_v04261818/design/ip/cl_debug_bridge/sim/cl_debug_bridge.v) provided by AWS as part of the HDK, and any required standard Xilinx debug IP components like ILAs and VIOs. @@ -217,6 +217,7 @@ cl_debug_bridge CL_DEBUG_BRIDGE ( .bscanid(bscanid) ); ``` +**NOTE:** According to [UG908](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug908-vivado-programming-debugging.pdf), the debug hub clock should be atleast 2.5x faster than the JTAG clock frequency. The JTAG clock frequency is fixed in the AWS Shell at 31.25MHz. Therefore the frequency of the clock connected to the cl_debug_bridge should be at-least 2.5 x 31.25MHz = 78.125MHz. Otherwise the debug network will not work. However, the minimum clock frequency requirement does not apply for ILA and rest of the CL logic. If CL design is running on a slower clock from the available [clock_recipes](https://github.com/aws/aws-fpga/blob/master/hdk/docs/clock_recipes.csv) then care must be taken that cl_debug_bridge is clocked at 78.125MHz or above speed. The following list describes the steps to successfully setup debug in a CL: @@ -288,7 +289,7 @@ Press CTRL-C to stop the service. No, other customer instances running on the same F1 server do not have access to the Virtual JTAG of your instance. -**Q: I am getting this error:** +**Q: Why am I getting this error?** ``` % fpga-start-virtual-jtag -P 10201 -S 0 @@ -297,11 +298,24 @@ Press CTRL-C to stop the service. Error: (1) internal-error ``` -This could mean there is already a server running with thtat TCP port. Either find this process and kill it, or choose a different TCP port. +This could mean there is already a server running with that TCP port. Either find this process and kill it, or choose a different TCP port. +**Q: Why am I getting this error?** + +``` +ERROR: [Xicom 50-38] xicom: Device:0, user chain number:1, slave index:3. Reading intermittently wrong data from core. Try slower target speed. Make sure design meets timing requirements. +ERROR: [Xicom 50-38] xicom: Device:0, user chain number:1, slave index:3, is not a valid CseXsdb Slave core. +ERROR: [Labtools 27-3176] hw_server failed during internal command. +Resolution: Check that the hw_server is running and the hardware connectivity to the target + +``` + +This means the clock connected to the cl_debug_bridge module is slower than the required minimum of 78.125MHz. Please choose a faster clock to connect to your cl_debug_bridge. **Q: What is XVC and where can I learn about it?** Xilinc Virtual Cable (XVC) is a protocol for transferring JTAG commands over TCP/IP network connection between a debug tool (like Vivado Lab Edition Hardware Manager) and a debug target. More information including a link to the full specification for XVC version 1.0 is available [here](https://www.xilinx.com/products/intellectual-property/xvc.html). + + diff --git a/hdk/docs/ppts/simulation.pptx b/hdk/docs/ppts/simulation.pptx deleted file mode 100644 index 657074a7..00000000 Binary files a/hdk/docs/ppts/simulation.pptx and /dev/null differ diff --git a/hdk/docs/ppts/simulation/Slide1.PNG b/hdk/docs/ppts/simulation/Slide1.PNG deleted file mode 100644 index 02f4e9b1..00000000 Binary files a/hdk/docs/ppts/simulation/Slide1.PNG and /dev/null differ diff --git a/hdk/docs/ppts/simulation/Slide2.PNG b/hdk/docs/ppts/simulation/Slide2.PNG deleted file mode 100644 index bbfd4e62..00000000 Binary files a/hdk/docs/ppts/simulation/Slide2.PNG and /dev/null differ diff --git a/hdk/docs/ppts/simulation/Slide3.PNG b/hdk/docs/ppts/simulation/Slide3.PNG deleted file mode 100644 index 4d91a413..00000000 Binary files a/hdk/docs/ppts/simulation/Slide3.PNG and /dev/null differ diff --git a/hdk/hdk_version.txt b/hdk/hdk_version.txt index ed532a29..654491f1 100644 --- a/hdk/hdk_version.txt +++ b/hdk/hdk_version.txt @@ -1 +1 @@ -HDK_VERSION=1.4.15 +HDK_VERSION=1.4.16 diff --git a/hdk/tests/test_gen_dcp.py b/hdk/tests/test_gen_dcp.py index 0a995623..24ba04ad 100644 --- a/hdk/tests/test_gen_dcp.py +++ b/hdk/tests/test_gen_dcp.py @@ -84,6 +84,7 @@ def set_allowed_warnings(cls): (('.*',), r'^CRITICAL WARNING: \[Opt 31-430\].*'), (('.*',), r'WARNING: \[Vivado 12-3731\].*'), (('.*',), r'WARNING: \[Constraints 18-619\] A clock with name \'CLK_300M_DIMM._DP\'.*'), + (('.*',), r'WARNING: \[Constraints 18-5648\] .*'), (('.*',), r'WARNING: \[Vivado_Tcl 4-391\] The following IPs are missing output products for Implementation target. These output products could be required for synthesis, please generate the output products using the generate_target or synth_ip command before running synth_design.*'), (('.*',), r'WARNING: \[DRC RPBF-3\] IO port buffering.*'), (('.*',), r'WARNING: \[Place 46-14\] The placer has determined that this design is highly congested and may have difficulty routing. Run report_design_analysis -congestion for a detailed report\.'), @@ -95,6 +96,8 @@ def set_allowed_warnings(cls): (('.*',), r'WARNING: \[Synth 8-689\] .*'), (('.*',), r'WARNING: \[Synth 8-6896\] .*'), (('.*',), r'WARNING: \[Synth 8-7023\] .*'), + (('.*',), r'WARNING: \[Synth 8-7071\] .*'), + (('.*',), r'WARNING: \[Synth 8-7129\] .*'), (('cl_sde_*',), r'WARNING: \[Vivado 12-180\] No cells matched .*'), (('cl_sde_*',), r'WARNING: \[Vivado 12-1008\] No clocks found for command.*'), (('cl_sde_*',), r'CRITICAL WARNING: \[Designutils 20-1280\] .*'), diff --git a/hdk_setup.sh b/hdk_setup.sh index 969bbf2c..7086176b 100644 --- a/hdk_setup.sh +++ b/hdk_setup.sh @@ -209,13 +209,13 @@ if ! make -s -C $HDK_DIR/common/verif/scripts MODEL_DIR=$models_dir; then return 2 fi -if [[ ":$cl_dir" == ':' ]]; then - info_msg "attention: don't forget to set the cl_dir variable for the directory of your custom logic."; +if [[ ":$CL_DIR" == ':' ]]; then + info_msg "attention: don't forget to set the CL_DIR variable for the directory of your custom logic."; else - info_msg "cl_dir is $cl_dir" - if [ ! -d $cl_dir ]; then - err_msg "cl_dir doesn't exist. set cl_dir to a valid directory." - unset cl_dir + info_msg "CL_DIR is $CL_DIR" + if [ ! -d $CL_DIR ]; then + err_msg "CL_DIR doesn't exist. set CL_DIR to a valid directory." + unset CL_DIR fi fi diff --git a/sdk/linux_kernel_drivers/xocl/10-xocl.rules b/sdk/linux_kernel_drivers/xocl/10-xocl.rules deleted file mode 100644 index 297a72b0..00000000 --- a/sdk/linux_kernel_drivers/xocl/10-xocl.rules +++ /dev/null @@ -1 +0,0 @@ -KERNEL=="renderD*",MODE="0666" diff --git a/sdk/linux_kernel_drivers/xocl/LICENSE b/sdk/linux_kernel_drivers/xocl/LICENSE deleted file mode 100644 index d159169d..00000000 --- a/sdk/linux_kernel_drivers/xocl/LICENSE +++ /dev/null @@ -1,339 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Lesser General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. diff --git a/sdk/linux_kernel_drivers/xocl/Makefile b/sdk/linux_kernel_drivers/xocl/Makefile deleted file mode 100644 index 9a334137..00000000 --- a/sdk/linux_kernel_drivers/xocl/Makefile +++ /dev/null @@ -1,70 +0,0 @@ -# Amazon FPGA Hardware Development Kit -# -# Copyright 2016-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Amazon Software License (the "License"). You may not use -# this file except in compliance with the License. A copy of the License is -# located at -# -# http://aws.amazon.com/asl/ -# -# or in the "license" file accompanying this file. This file is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or -# implied. See the License for the specific language governing permissions and -# limitations under the License. - -XOCL_DIR = $(shell pwd) - -obj-m += xocl.o -ccflags-y := -Iinclude/drm - -xocl-y := \ - xocl_sysfs.o \ - xocl_bo.o \ - xocl_drv.o \ - xocl_xdma.o \ - xocl_ioctl.o \ - xocl_test.o \ - xocl_ctx.o \ - xocl_xvc.o \ - xocl_exec.o \ - libxdma.o - -CONFIG_MODULE_SIG=n -KERNELDIR ?= /lib/modules/$(shell uname -r)/build - -PWD := $(shell pwd) -ROOT := $(dir $(M)) -XILINXINCLUDE := -I$(SDACCEL_DIR)/userspace/include -I$(XOCL_DIR) -XILINXINCLUDE += -I$(XOCL_DIR)/../xdma/ - -all: - echo "include: $(XILINXINCLUDE)" - echo "sdaccel_dir: $(SDACCEL_DIR)" - echo "ROOT: $(ROOT)" - echo "XOCL_DIR: $(XOCL_DIR)" - $(MAKE) -C $(KERNELDIR) M=$(PWD) modules - -install: all - $(MAKE) -C $(KERNELDIR) M=$(PWD) modules_install - depmod -a - install -m 644 10-xocl.rules /etc/udev/rules.d - -rmmod -s xocl || true - -rmmod -s xdma || true - -rmmod -s edma_drv || true - -modprobe xocl - -clean: - rm -rf *.o *.o.d *~ core .depend .*.cmd *.ko *.ko.unsigned *.mod.c .tmp_versions *.symvers .#* *.save *.bak Modules.* modules.order Module.markers *.bin - - -CFLAGS_xocl_xdma.o := $(XILINXINCLUDE) -CFLAGS_xocl_sysfs.o := $(XILINXINCLUDE) -CFLAGS_xocl_bo.o := $(XILINXINCLUDE) -CFLAGS_xocl_drv.o := $(XILINXINCLUDE) -CFLAGS_xocl_ioctl.o := $(XILINXINCLUDE) -CFLAGS_xocl_test.o := $(XILINXINCLUDE) -CFLAGS_xocl_ctx.o := $(XILINXINCLUDE) -CFLAGS_xocl_exec.o := $(XILINXINCLUDE) -CFLAGS_xocl_xvc.o := $(XILINXINCLUDE) -CFLAGS_libxdma.o := $(XILINXINCLUDE) diff --git a/sdk/linux_kernel_drivers/xocl/cdev_sgdma.h b/sdk/linux_kernel_drivers/xocl/cdev_sgdma.h deleted file mode 100644 index d3700260..00000000 --- a/sdk/linux_kernel_drivers/xocl/cdev_sgdma.h +++ /dev/null @@ -1,79 +0,0 @@ -/******************************************************************************* - * - * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - * - * The full GNU General Public License is included in this distribution in - * the file called "LICENSE". - * - * Karen Xie - * - ******************************************************************************/ -#ifndef _XDMA_IOCALLS_POSIX_H_ -#define _XDMA_IOCALLS_POSIX_H_ - -#include - - -#define IOCTL_XDMA_PERF_V1 (1) -#define XDMA_ADDRMODE_MEMORY (0) -#define XDMA_ADDRMODE_FIXED (1) - -/* - * S means "Set" through a ptr, - * T means "Tell" directly with the argument value - * G means "Get": reply by setting through a pointer - * Q means "Query": response is on the return value - * X means "eXchange": switch G and S atomically - * H means "sHift": switch T and Q atomically - * - * _IO(type,nr) no arguments - * _IOR(type,nr,datatype) read data from driver - * _IOW(type,nr.datatype) write data to driver - * _IORW(type,nr,datatype) read/write data - * - * _IOC_DIR(nr) returns direction - * _IOC_TYPE(nr) returns magic - * _IOC_NR(nr) returns number - * _IOC_SIZE(nr) returns size - */ - -struct xdma_performance_ioctl -{ - /* IOCTL_XDMA_IOCTL_Vx */ - uint32_t version; - uint32_t transfer_size; - /* measurement */ - uint32_t stopped; - uint32_t iterations; - uint64_t clock_cycle_count; - uint64_t data_cycle_count; - uint64_t pending_count; -}; - - - -/* IOCTL codes */ - -#define IOCTL_XDMA_PERF_START _IOW('q', 1, struct xdma_performance_ioctl *) -#define IOCTL_XDMA_PERF_STOP _IOW('q', 2, struct xdma_performance_ioctl *) -#define IOCTL_XDMA_PERF_GET _IOR('q', 3, struct xdma_performance_ioctl *) -#define IOCTL_XDMA_ADDRMODE_SET _IOW('q', 4, int) -#define IOCTL_XDMA_ADDRMODE_GET _IOR('q', 5, int) -#define IOCTL_XDMA_ALIGN_GET _IOR('q', 6, int) - -#endif /* _XDMA_IOCALLS_POSIX_H_ */ - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/ert.h b/sdk/linux_kernel_drivers/xocl/ert.h deleted file mode 100644 index 6b5c5bda..00000000 --- a/sdk/linux_kernel_drivers/xocl/ert.h +++ /dev/null @@ -1,310 +0,0 @@ -/** - * Copyright (C) 2018 Xilinx, Inc - * - * This file is dual licensed. It may be redistributed and/or modified - * under the terms of the Apache 2.0 License OR version 2 of the GNU - * General Public License. - * - * Apache License Verbiage - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * GPL license Verbiage: - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. This program is - * distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - * License for more details. You should have received a copy of the - * GNU General Public License along with this program; if not, write - * to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, - * Boston, MA 02111-1307 USA - * - */ - -/** - * Xilinx SDAccel Embedded Runtime definition - * Copyright (C) 2018, Xilinx Inc - All rights reserved - * - * This file is dual licensed. It may be redistributed and/or modified - * under the terms of the Apache 2.0 License OR version 2 of the GNU - * General Public License. - */ - -#ifndef _ERT_H_ -#define _ERT_H_ - -#if defined(__KERNEL__) -# include -#else -# include -#endif - -/** - * ERT generic packet format - * - * @state: [3-0] current state of a command - * @custom: [11-4] custom per specific commands - * @count: [22-12] number of words in payload (data) - * @opcode: [27-23] opcode identifying specific command - * @type: [31-27] type of command (currently 0) - * @data: count number of words representing packet payload - */ -struct ert_packet { - union { - struct { - uint32_t state:4; /* [3-0] */ - uint32_t custom:8; /* [11-4] */ - uint32_t count:11; /* [22-12] */ - uint32_t opcode:5; /* [27-23] */ - uint32_t type:4; /* [31-27] */ - }; - uint32_t header; - }; - uint32_t data[1]; /* count number of words */ -}; - -/** - * ERT start kernel command format - * - * @state: [3-0] current state of a command - * @extra_cu_masks: [11-10] extra CU masks in addition to mandatory mask - * @count: [22-12] number of words in payload (data) - * @opcode: [27-23] 0, opcode for start_kernel - * @type: [31-27] 0, type of start_kernel - * - * @cu_mask: first mandatory CU mask - * @data: count number of words representing command payload - * - * The packet payload is comprised of 1 mandatory CU mask plus - * extra_cu_masks per header field, followed a CU register map of size - * (count - (1 + extra_cu_masks)) uint32_t words. - */ -struct ert_start_kernel_cmd { - union { - struct { - uint32_t state:4; /* [3-0] */ - uint32_t unused:6; /* [9-4] */ - uint32_t extra_cu_masks:2; /* [11-10] */ - uint32_t count:11; /* [22-12] */ - uint32_t opcode:5; /* [27-23] */ - uint32_t type:4; /* [31-27] */ - }; - uint32_t header; - }; - - /* payload */ - uint32_t cu_mask; /* mandatory cu mask */ - uint32_t data[1]; /* count-1 number of words */ -}; - -/** - * ERT configure command format - * - * @state: [3-0] current state of a command - * @count: [22-12] 5, number of words in payload - * @opcode: [27-23] 1, opcode for configure - * @type: [31-27] 0, type of configure - * - * @slot_size: command queue slot size - * @num_cus: number of compute units in program - * @cu_shift: shift value to convert CU idx to CU addr - * @cu_base_addr: base address to add to CU addr for actual physical address - * - * @ert:1 enable embedded HW scheduler - * @polling:1 poll for command completion - * @cu_dma:1 enable CUDMA custom module for HW scheduler - * @cu_isr:1 enable CUISR custom module for HW scheduler - * @cq_int:1 enable interrupt from host to HW scheduler - */ -struct ert_configure_cmd { - union { - struct { - uint32_t state:4; /* [3-0] */ - uint32_t unused:8; /* [11-4] */ - uint32_t count:11; /* [22-12] */ - uint32_t opcode:5; /* [27-23] */ - uint32_t type:4; /* [31-27] */ - }; - uint32_t header; - }; - - /* payload */ - uint32_t slot_size; - uint32_t num_cus; - uint32_t cu_shift; - uint32_t cu_base_addr; - - /* features */ - uint32_t ert:1; - uint32_t polling:1; - uint32_t cu_dma:1; - uint32_t cu_isr:1; - uint32_t cq_int:1; - uint32_t unusedf:27; -}; - -/** - * ERT command state - * - * @ERT_CMD_STATE_NEW: Set by host before submitting a command to scheduler - * @ERT_CMD_STATE_QUEUED: Internal scheduler state - * @ERT_CMD_STATE_RUNNING: Internal scheduler state - * @ERT_CMD_STATE_COMPLETE: Set by scheduler when command completes - * @ERT_CMD_STATE_ERROR: Set by scheduler if command failed - * @ERT_CMD_STATE_ABORT: Set by scheduler if command abort - */ -enum ert_cmd_state { - ERT_CMD_STATE_NEW = 1, - ERT_CMD_STATE_QUEUED = 2, - ERT_CMD_STATE_RUNNING = 3, - ERT_CMD_STATE_COMPLETED = 4, - ERT_CMD_STATE_ERROR = 5, - ERT_CMD_STATE_ABORT = 6, -}; - -/** - * Opcode types for commands - * - * @ERT_START_CU: start a workgroup on a CU - * @ERT_START_KERNEL: currently aliased to ERT_START_CU - * @ERT_CONFIGURE: configure command scheduler - */ -enum ert_cmd_opcode { - ERT_START_CU = 0, - ERT_START_KERNEL = 0, - ERT_CONFIGURE = 1, -}; - -/** - * Address constants per spec - */ -#define ERT_WORD_SIZE 4 /* 4 bytes */ -#define ERT_CQ_SIZE 0x10000 /* 64K */ -#define ERT_CQ_BASE_ADDR 0x190000 -#define ERT_CSR_ADDR 0x180000 - -/** - * The STATUS REGISTER is for communicating completed CQ slot indices - * MicroBlaze write, host reads. MB(W) / HOST(COR) - */ -#define ERT_STATUS_REGISTER_ADDR (ERT_CSR_ADDR) -#define ERT_STATUS_REGISTER_ADDR0 (ERT_CSR_ADDR) -#define ERT_STATUS_REGISTER_ADDR1 (ERT_CSR_ADDR + 0x4) -#define ERT_STATUS_REGISTER_ADDR2 (ERT_CSR_ADDR + 0x8) -#define ERT_STATUS_REGISTER_ADDR3 (ERT_CSR_ADDR + 0xC) - -/** - * The CU DMA REGISTER is for communicating which CQ slot is to be started - * on a specific CU. MB selects a free CU on which the command can - * run, then writes the 1< - * Leon Woestenberg - * - ******************************************************************************/ -#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ - -#include -#include -#include -#include -#include -#include -#include - -#include "libxdma.h" -#include "libxdma_api.h" -#include "cdev_sgdma.h" - -/* SECTION: Module licensing */ - -#ifdef __LIBXDMA_MOD__ -#include "version.h" -#define DRV_MODULE_NAME "libxdma" -#define DRV_MODULE_DESC "Xilinx XDMA Base Driver" -#define DRV_MODULE_RELDATE "Feb. 2017" - -static char version[] = - DRV_MODULE_DESC " " DRV_MODULE_NAME " v" DRV_MODULE_VERSION "\n"; - -MODULE_AUTHOR("Xilinx, Inc."); -MODULE_DESCRIPTION(DRV_MODULE_DESC); -MODULE_VERSION(DRV_MODULE_VERSION); -MODULE_LICENSE("GPL v2"); -#endif - -/* Module Parameters */ -static unsigned int poll_mode; -module_param(poll_mode, uint, 0644); -MODULE_PARM_DESC(poll_mode, "Set 1 for hw polling, default is 0 (interrupts)"); - -static unsigned int interrupt_mode; -module_param(interrupt_mode, uint, 0644); -MODULE_PARM_DESC(interrupt_mode, "0 - MSI-x , 1 - MSI, 2 - Legacy"); - -static unsigned int enable_credit_mp; -module_param(enable_credit_mp, uint, 0644); -MODULE_PARM_DESC(enable_credit_mp, "Set 1 to enable creidt feature, default is 0 (no credit control)"); - -/* - * xdma device management - * maintains a list of the xdma devices - */ -static LIST_HEAD(xdev_list); -static DEFINE_MUTEX(xdev_mutex); - -static LIST_HEAD(xdev_rcu_list); -static DEFINE_SPINLOCK(xdev_rcu_lock); - -#ifndef list_last_entry -#define list_last_entry(ptr, type, member) \ - list_entry((ptr)->prev, type, member) -#endif - -static inline void xdev_list_add(struct xdma_dev *xdev) -{ - mutex_lock(&xdev_mutex); - if (list_empty(&xdev_list)) - xdev->idx = 0; - else { - struct xdma_dev *last; - - last = list_last_entry(&xdev_list, struct xdma_dev, list_head); - xdev->idx = last->idx + 1; - } - list_add_tail(&xdev->list_head, &xdev_list); - mutex_unlock(&xdev_mutex); - - dbg_init("dev %s, xdev 0x%p, xdma idx %d.\n", - dev_name(&xdev->pdev->dev), xdev, xdev->idx); - - spin_lock(&xdev_rcu_lock); - list_add_tail_rcu(&xdev->rcu_node, &xdev_rcu_list); - spin_unlock(&xdev_rcu_lock); -} - -#undef list_last_entry - -static inline void xdev_list_remove(struct xdma_dev *xdev) -{ - mutex_lock(&xdev_mutex); - list_del(&xdev->list_head); - mutex_unlock(&xdev_mutex); - - spin_lock(&xdev_rcu_lock); - list_del_rcu(&xdev->rcu_node); - spin_unlock(&xdev_rcu_lock); - synchronize_rcu(); -} - -struct xdma_dev *xdev_find_by_pdev(struct pci_dev *pdev) -{ - struct xdma_dev *xdev, *tmp; - - mutex_lock(&xdev_mutex); - list_for_each_entry_safe(xdev, tmp, &xdev_list, list_head) { - if (xdev->pdev == pdev) { - mutex_unlock(&xdev_mutex); - return xdev; - } - } - mutex_unlock(&xdev_mutex); - return NULL; -} -EXPORT_SYMBOL_GPL(xdev_find_by_pdev); - -static inline int debug_check_dev_hndl(const char *fname, struct pci_dev *pdev, - void *hndl) -{ - struct xdma_dev *xdev; - - if (!pdev) - return -EINVAL; - - xdev = xdev_find_by_pdev(pdev); - if (!xdev) { - pr_info("%s pdev 0x%p, hndl 0x%p, NO match found!\n", - fname, pdev, hndl); - return -EINVAL; - } - if (xdev != hndl) { - pr_err("%s pdev 0x%p, hndl 0x%p != 0x%p!\n", - fname, pdev, hndl, xdev); - return -EINVAL; - } - - return 0; -} - -#ifdef __LIBXDMA_DEBUG__ -/* SECTION: Function definitions */ -inline void __write_register(const char *fn, u32 value, void *iomem, unsigned long off) -{ - pr_err("%s: w reg 0x%lx(0x%p), 0x%x.\n", fn, off, iomem, value); - iowrite32(value, iomem); -} -#define write_register(v,mem,off) __write_register(__func__, v, mem, off) -#else -#define write_register(v,mem,off) iowrite32(v, mem) -#endif - -inline u32 read_register(void *iomem) -{ - return ioread32(iomem); -} - -static inline u32 build_u32(u32 hi, u32 lo) -{ - return ((hi & 0xFFFFUL) << 16) | (lo & 0xFFFFUL); -} - -static inline u64 build_u64(u64 hi, u64 lo) -{ - return ((hi & 0xFFFFFFFULL) << 32) | (lo & 0xFFFFFFFFULL); -} - -static void check_nonzero_interrupt_status(struct xdma_dev *xdev) -{ - struct interrupt_regs *reg = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - u32 w; - - w = read_register(®->user_int_enable); - if (w) - pr_info("%s xdma%d user_int_enable = 0x%08x\n", - dev_name(&xdev->pdev->dev), xdev->idx, w); - - w = read_register(®->channel_int_enable); - if (w) - pr_info("%s xdma%d channel_int_enable = 0x%08x\n", - dev_name(&xdev->pdev->dev), xdev->idx, w); - - w = read_register(®->user_int_request); - if (w) - pr_info("%s xdma%d user_int_request = 0x%08x\n", - dev_name(&xdev->pdev->dev), xdev->idx, w); - w = read_register(®->channel_int_request); - if (w) - pr_info("%s xdma%d channel_int_request = 0x%08x\n", - dev_name(&xdev->pdev->dev), xdev->idx, w); - - w = read_register(®->user_int_pending); - if (w) - pr_info("%s xdma%d user_int_pending = 0x%08x\n", - dev_name(&xdev->pdev->dev), xdev->idx, w); - w = read_register(®->channel_int_pending); - if (w) - pr_info("%s xdma%d channel_int_pending = 0x%08x\n", - dev_name(&xdev->pdev->dev), xdev->idx, w); -} - -/* channel_interrupts_enable -- Enable interrupts we are interested in */ -static void channel_interrupts_enable(struct xdma_dev *xdev, u32 mask) -{ - struct interrupt_regs *reg = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - - write_register(mask, ®->channel_int_enable_w1s, XDMA_OFS_INT_CTRL); -} - -/* channel_interrupts_disable -- Disable interrupts we not interested in */ -static void channel_interrupts_disable(struct xdma_dev *xdev, u32 mask) -{ - struct interrupt_regs *reg = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - - write_register(mask, ®->channel_int_enable_w1c, XDMA_OFS_INT_CTRL); -} - -/* user_interrupts_enable -- Enable interrupts we are interested in */ -static void user_interrupts_enable(struct xdma_dev *xdev, u32 mask) -{ - struct interrupt_regs *reg = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - - write_register(mask, ®->user_int_enable_w1s, XDMA_OFS_INT_CTRL); -} - -/* user_interrupts_disable -- Disable interrupts we not interested in */ -static void user_interrupts_disable(struct xdma_dev *xdev, u32 mask) -{ - struct interrupt_regs *reg = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - - write_register(mask, ®->user_int_enable_w1c, XDMA_OFS_INT_CTRL); -} - -/* read_interrupts -- Print the interrupt controller status */ -static u32 read_interrupts(struct xdma_dev *xdev) -{ - struct interrupt_regs *reg = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - u32 lo; - u32 hi; - - /* extra debugging; inspect complete engine set of registers */ - hi = read_register(®->user_int_request); - dbg_io("ioread32(0x%p) returned 0x%08x (user_int_request).\n", - ®->user_int_request, hi); - lo = read_register(®->channel_int_request); - dbg_io("ioread32(0x%p) returned 0x%08x (channel_int_request)\n", - ®->channel_int_request, lo); - - /* return interrupts: user in upper 16-bits, channel in lower 16-bits */ - return build_u32(hi, lo); -} - -void enable_perf(struct xdma_engine *engine) -{ - u32 w; - - w = XDMA_PERF_CLEAR; - write_register(w, &engine->regs->perf_ctrl, - (unsigned long)(&engine->regs->perf_ctrl) - - (unsigned long)(&engine->regs)); - read_register(&engine->regs->identifier); - w = XDMA_PERF_AUTO | XDMA_PERF_RUN; - write_register(w, &engine->regs->perf_ctrl, - (unsigned long)(&engine->regs->perf_ctrl) - - (unsigned long)(&engine->regs)); - read_register(&engine->regs->identifier); - - dbg_perf("IOCTL_XDMA_PERF_START\n"); - -} -EXPORT_SYMBOL_GPL(enable_perf); - -void get_perf_stats(struct xdma_engine *engine) -{ - u32 hi; - u32 lo; - - BUG_ON(!engine); - BUG_ON(!engine->xdma_perf); - - hi = 0; - lo = read_register(&engine->regs->completed_desc_count); - engine->xdma_perf->iterations = build_u64(hi, lo); - - hi = read_register(&engine->regs->perf_cyc_hi); - lo = read_register(&engine->regs->perf_cyc_lo); - - engine->xdma_perf->clock_cycle_count = build_u64(hi, lo); - - hi = read_register(&engine->regs->perf_dat_hi); - lo = read_register(&engine->regs->perf_dat_lo); - engine->xdma_perf->data_cycle_count = build_u64(hi, lo); - - hi = read_register(&engine->regs->perf_pnd_hi); - lo = read_register(&engine->regs->perf_pnd_lo); - engine->xdma_perf->pending_count = build_u64(hi, lo); -} -EXPORT_SYMBOL_GPL(get_perf_stats); - -static void engine_reg_dump(struct xdma_engine *engine) -{ - u32 w; - - BUG_ON(!engine); - - w = read_register(&engine->regs->identifier); - pr_info("%s: ioread32(0x%p) = 0x%08x (id).\n", - engine->name, &engine->regs->identifier, w); - w &= BLOCK_ID_MASK; - if (w != BLOCK_ID_HEAD) { - pr_info("%s: engine id missing, 0x%08x exp. & 0x%x = 0x%x\n", - engine->name, w, BLOCK_ID_MASK, BLOCK_ID_HEAD); - return; - } - /* extra debugging; inspect complete engine set of registers */ - w = read_register(&engine->regs->status); - pr_info("%s: ioread32(0x%p) = 0x%08x (status).\n", - engine->name, &engine->regs->status, w); - w = read_register(&engine->regs->control); - pr_info("%s: ioread32(0x%p) = 0x%08x (control)\n", - engine->name, &engine->regs->control, w); - w = read_register(&engine->sgdma_regs->first_desc_lo); - pr_info("%s: ioread32(0x%p) = 0x%08x (first_desc_lo)\n", - engine->name, &engine->sgdma_regs->first_desc_lo, w); - w = read_register(&engine->sgdma_regs->first_desc_hi); - pr_info("%s: ioread32(0x%p) = 0x%08x (first_desc_hi)\n", - engine->name, &engine->sgdma_regs->first_desc_hi, w); - w = read_register(&engine->sgdma_regs->first_desc_adjacent); - pr_info("%s: ioread32(0x%p) = 0x%08x (first_desc_adjacent).\n", - engine->name, &engine->sgdma_regs->first_desc_adjacent, w); - w = read_register(&engine->regs->completed_desc_count); - pr_info("%s: ioread32(0x%p) = 0x%08x (completed_desc_count).\n", - engine->name, &engine->regs->completed_desc_count, w); - w = read_register(&engine->regs->interrupt_enable_mask); - pr_info("%s: ioread32(0x%p) = 0x%08x (interrupt_enable_mask)\n", - engine->name, &engine->regs->interrupt_enable_mask, w); -} - -/** - * engine_status_read() - read status of SG DMA engine (optionally reset) - * - * Stores status in engine->status. - * - * @return -1 on failure, status register otherwise - */ -static void engine_status_dump(struct xdma_engine *engine) -{ - u32 v = engine->status; - char buffer[256]; - char *buf = buffer; - int len = 0; - - len = sprintf(buf, "SG engine %s status: 0x%08x: ", engine->name, v); - - if ((v & XDMA_STAT_BUSY)) - len += sprintf(buf + len, "BUSY,"); - if ((v & XDMA_STAT_DESC_STOPPED)) - len += sprintf(buf + len, "DESC_STOPPED,"); - if ((v & XDMA_STAT_DESC_COMPLETED)) - len += sprintf(buf + len, "DESC_COMPL,"); - - /* common H2C & C2H */ - if ((v & XDMA_STAT_COMMON_ERR_MASK)) { - if ((v & XDMA_STAT_ALIGN_MISMATCH)) - len += sprintf(buf + len, "ALIGN_MISMATCH "); - if ((v & XDMA_STAT_MAGIC_STOPPED)) - len += sprintf(buf + len, "MAGIC_STOPPED "); - if ((v & XDMA_STAT_INVALID_LEN)) - len += sprintf(buf + len, "INVLIAD_LEN "); - if ((v & XDMA_STAT_IDLE_STOPPED)) - len += sprintf(buf + len, "IDLE_STOPPED "); - buf[len - 1] = ','; - } - - if ((engine->dir == DMA_TO_DEVICE)) { - /* H2C only */ - if ((v & XDMA_STAT_H2C_R_ERR_MASK)) { - len += sprintf(buf + len, "R:"); - if ((v & XDMA_STAT_H2C_R_UNSUPP_REQ)) - len += sprintf(buf + len, "UNSUPP_REQ "); - if ((v & XDMA_STAT_H2C_R_COMPL_ABORT)) - len += sprintf(buf + len, "COMPL_ABORT "); - if ((v & XDMA_STAT_H2C_R_PARITY_ERR)) - len += sprintf(buf + len, "PARITY "); - if ((v & XDMA_STAT_H2C_R_HEADER_EP)) - len += sprintf(buf + len, "HEADER_EP "); - if ((v & XDMA_STAT_H2C_R_UNEXP_COMPL)) - len += sprintf(buf + len, "UNEXP_COMPL "); - buf[len - 1] = ','; - } - - if ((v & XDMA_STAT_H2C_W_ERR_MASK)) { - len += sprintf(buf + len, "W:"); - if ((v & XDMA_STAT_H2C_W_DECODE_ERR)) - len += sprintf(buf + len, "DECODE_ERR "); - if ((v & XDMA_STAT_H2C_W_SLAVE_ERR)) - len += sprintf(buf + len, "SLAVE_ERR "); - buf[len - 1] = ','; - } - - } else { - /* C2H only */ - if ((v & XDMA_STAT_C2H_R_ERR_MASK)) { - len += sprintf(buf + len, "R:"); - if ((v & XDMA_STAT_C2H_R_DECODE_ERR)) - len += sprintf(buf + len, "DECODE_ERR "); - if ((v & XDMA_STAT_C2H_R_SLAVE_ERR)) - len += sprintf(buf + len, "SLAVE_ERR "); - buf[len - 1] = ','; - } - } - - /* common H2C & C2H */ - if ((v & XDMA_STAT_DESC_ERR_MASK)) { - len += sprintf(buf + len, "DESC_ERR:"); - if ((v & XDMA_STAT_DESC_UNSUPP_REQ)) - len += sprintf(buf + len, "UNSUPP_REQ "); - if ((v & XDMA_STAT_DESC_COMPL_ABORT)) - len += sprintf(buf + len, "COMPL_ABORT "); - if ((v & XDMA_STAT_DESC_PARITY_ERR)) - len += sprintf(buf + len, "PARITY "); - if ((v & XDMA_STAT_DESC_HEADER_EP)) - len += sprintf(buf + len, "HEADER_EP "); - if ((v & XDMA_STAT_DESC_UNEXP_COMPL)) - len += sprintf(buf + len, "UNEXP_COMPL "); - buf[len - 1] = ','; - } - - buf[len - 1] = '\0'; - pr_info("%s\n", buffer); -} - -static u32 engine_status_read(struct xdma_engine *engine, bool clear, bool dump) -{ - u32 value; - - BUG_ON(!engine); - - if (dump) - engine_reg_dump(engine); - - /* read status register */ - if (clear) - value = engine->status = - read_register(&engine->regs->status_rc); - else - value = engine->status = read_register(&engine->regs->status); - - if (dump) - engine_status_dump(engine); - - return value; -} - -/** - * xdma_engine_stop() - stop an SG DMA engine - * - */ -static void xdma_engine_stop(struct xdma_engine *engine) -{ - u32 w; - - BUG_ON(!engine); - dbg_tfr("xdma_engine_stop(engine=%p)\n", engine); - - w = 0; - w |= (u32)XDMA_CTRL_IE_DESC_ALIGN_MISMATCH; - w |= (u32)XDMA_CTRL_IE_MAGIC_STOPPED; - w |= (u32)XDMA_CTRL_IE_READ_ERROR; - w |= (u32)XDMA_CTRL_IE_DESC_ERROR; - - if (poll_mode) { - w |= (u32) XDMA_CTRL_POLL_MODE_WB; - } else { - w |= (u32)XDMA_CTRL_IE_DESC_STOPPED; - w |= (u32)XDMA_CTRL_IE_DESC_COMPLETED; - - /* Disable IDLE STOPPED for MM */ - if ((engine->streaming && (engine->dir == DMA_FROM_DEVICE)) || - (engine->xdma_perf)) - w |= (u32)XDMA_CTRL_IE_IDLE_STOPPED; - } - - dbg_tfr("Stopping SG DMA %s engine; writing 0x%08x to 0x%p.\n", - engine->name, w, (u32 *)&engine->regs->control); - write_register(w, &engine->regs->control, - (unsigned long)(&engine->regs->control) - - (unsigned long)(&engine->regs)); - /* dummy read of status register to flush all previous writes */ - dbg_tfr("xdma_engine_stop(%s) done\n", engine->name); -} - -static void engine_start_mode_config(struct xdma_engine *engine) -{ - u32 w; - - BUG_ON(!engine); - - /* If a perf test is running, enable the engine interrupts */ - if (engine->xdma_perf) { - w = XDMA_CTRL_IE_DESC_STOPPED; - w |= XDMA_CTRL_IE_DESC_COMPLETED; - w |= XDMA_CTRL_IE_DESC_ALIGN_MISMATCH; - w |= XDMA_CTRL_IE_MAGIC_STOPPED; - w |= XDMA_CTRL_IE_IDLE_STOPPED; - w |= XDMA_CTRL_IE_READ_ERROR; - w |= XDMA_CTRL_IE_DESC_ERROR; - - write_register(w, &engine->regs->interrupt_enable_mask, - (unsigned long)(&engine->regs->interrupt_enable_mask) - - (unsigned long)(&engine->regs)); - } - - /* write control register of SG DMA engine */ - w = (u32)XDMA_CTRL_RUN_STOP; - w |= (u32)XDMA_CTRL_IE_READ_ERROR; - w |= (u32)XDMA_CTRL_IE_DESC_ERROR; - w |= (u32)XDMA_CTRL_IE_DESC_ALIGN_MISMATCH; - w |= (u32)XDMA_CTRL_IE_MAGIC_STOPPED; - - if (poll_mode) { - w |= (u32)XDMA_CTRL_POLL_MODE_WB; - } else { - w |= (u32)XDMA_CTRL_IE_DESC_STOPPED; - w |= (u32)XDMA_CTRL_IE_DESC_COMPLETED; - - if ((engine->streaming && (engine->dir == DMA_FROM_DEVICE)) || - (engine->xdma_perf)) - w |= (u32)XDMA_CTRL_IE_IDLE_STOPPED; - - /* set non-incremental addressing mode */ - if (engine->non_incr_addr) - w |= (u32)XDMA_CTRL_NON_INCR_ADDR; - } - - dbg_tfr("iowrite32(0x%08x to 0x%p) (control)\n", w, - (void *)&engine->regs->control); - /* start the engine */ - write_register(w, &engine->regs->control, - (unsigned long)(&engine->regs->control) - - (unsigned long)(&engine->regs)); - - /* dummy read of status register to flush all previous writes */ - w = read_register(&engine->regs->status); - dbg_tfr("ioread32(0x%p) = 0x%08x (dummy read flushes writes).\n", - &engine->regs->status, w); -} - -/** - * engine_start() - start an idle engine with its first transfer on queue - * - * The engine will run and process all transfers that are queued using - * transfer_queue() and thus have their descriptor lists chained. - * - * During the run, new transfers will be processed if transfer_queue() has - * chained the descriptors before the hardware fetches the last descriptor. - * A transfer that was chained too late will invoke a new run of the engine - * initiated from the engine_service() routine. - * - * The engine must be idle and at least one transfer must be queued. - * This function does not take locks; the engine spinlock must already be - * taken. - * - */ -static struct xdma_transfer *engine_start(struct xdma_engine *engine) -{ - struct xdma_transfer *transfer; - u32 w; - int extra_adj = 0; - - /* engine must be idle */ - BUG_ON(engine->running); - /* engine transfer queue must not be empty */ - BUG_ON(list_empty(&engine->transfer_list)); - /* inspect first transfer queued on the engine */ - transfer = list_entry(engine->transfer_list.next, struct xdma_transfer, - entry); - BUG_ON(!transfer); - - /* engine is no longer shutdown */ - engine->shutdown = ENGINE_SHUTDOWN_NONE; - - dbg_tfr("engine_start(%s): transfer=0x%p.\n", engine->name, transfer); - - /* initialize number of descriptors of dequeued transfers */ - engine->desc_dequeued = 0; - - /* write lower 32-bit of bus address of transfer first descriptor */ - w = cpu_to_le32(PCI_DMA_L(transfer->desc_bus)); - dbg_tfr("iowrite32(0x%08x to 0x%p) (first_desc_lo)\n", w, - (void *)&engine->sgdma_regs->first_desc_lo); - write_register(w, &engine->sgdma_regs->first_desc_lo, - (unsigned long)(&engine->sgdma_regs->first_desc_lo) - - (unsigned long)(&engine->sgdma_regs)); - /* write upper 32-bit of bus address of transfer first descriptor */ - w = cpu_to_le32(PCI_DMA_H(transfer->desc_bus)); - dbg_tfr("iowrite32(0x%08x to 0x%p) (first_desc_hi)\n", w, - (void *)&engine->sgdma_regs->first_desc_hi); - write_register(w, &engine->sgdma_regs->first_desc_hi, - (unsigned long)(&engine->sgdma_regs->first_desc_hi) - - (unsigned long)(&engine->sgdma_regs)); - - if (transfer->desc_adjacent > 0) { - extra_adj = transfer->desc_adjacent - 1; - if (extra_adj > MAX_EXTRA_ADJ) - extra_adj = MAX_EXTRA_ADJ; - } - dbg_tfr("iowrite32(0x%08x to 0x%p) (first_desc_adjacent)\n", - extra_adj, (void *)&engine->sgdma_regs->first_desc_adjacent); - write_register(extra_adj, &engine->sgdma_regs->first_desc_adjacent, - (unsigned long)(&engine->sgdma_regs->first_desc_adjacent) - - (unsigned long)(&engine->sgdma_regs)); - - dbg_tfr("ioread32(0x%p) (dummy read flushes writes).\n", - &engine->regs->status); - mmiowb(); - - engine_start_mode_config(engine); - - engine_status_read(engine, 0, 0); - - dbg_tfr("%s engine 0x%p now running\n", engine->name, engine); - /* remember the engine is running */ - engine->running = 1; - return transfer; -} - -/** - * engine_service() - service an SG DMA engine - * - * must be called with engine->lock already acquired - * - * @engine pointer to struct xdma_engine - * - */ -static void engine_service_shutdown(struct xdma_engine *engine) -{ - /* if the engine stopped with RUN still asserted, de-assert RUN now */ - dbg_tfr("engine just went idle, resetting RUN_STOP.\n"); - xdma_engine_stop(engine); - engine->running = 0; - - /* awake task on engine's shutdown wait queue */ - wake_up_interruptible(&engine->shutdown_wq); -} - -struct xdma_transfer *engine_transfer_completion(struct xdma_engine *engine, - struct xdma_transfer *transfer) -{ - BUG_ON(!engine); - BUG_ON(!transfer); - - /* synchronous I/O? */ - /* awake task on transfer's wait queue */ - wake_up_interruptible(&transfer->wq); - - return transfer; -} - -struct xdma_transfer *engine_service_transfer_list(struct xdma_engine *engine, - struct xdma_transfer *transfer, u32 *pdesc_completed) -{ - BUG_ON(!engine); - BUG_ON(!pdesc_completed); - - if (!transfer) { - pr_info("%s xfer empty, pdesc completed %u.\n", - engine->name, *pdesc_completed); - return NULL; - } - - /* - * iterate over all the transfers completed by the engine, - * except for the last (i.e. use > instead of >=). - */ - while (transfer && (!transfer->cyclic) && - (*pdesc_completed > transfer->desc_num)) { - /* remove this transfer from pdesc_completed */ - *pdesc_completed -= transfer->desc_num; - dbg_tfr("%s engine completed non-cyclic xfer 0x%p (%d desc)\n", - engine->name, transfer, transfer->desc_num); - /* remove completed transfer from list */ - list_del(engine->transfer_list.next); - /* add to dequeued number of descriptors during this run */ - engine->desc_dequeued += transfer->desc_num; - /* mark transfer as succesfully completed */ - transfer->state = TRANSFER_STATE_COMPLETED; - - /* Complete transfer - sets transfer to NULL if an async - * transfer has completed */ - transfer = engine_transfer_completion(engine, transfer); - - /* if exists, get the next transfer on the list */ - if (!list_empty(&engine->transfer_list)) { - transfer = list_entry(engine->transfer_list.next, - struct xdma_transfer, entry); - dbg_tfr("Non-completed transfer %p\n", transfer); - } else { - /* no further transfers? */ - transfer = NULL; - } - } - - return transfer; -} - -static void engine_err_handle(struct xdma_engine *engine, - struct xdma_transfer *transfer, u32 desc_completed) -{ - u32 value; - - /* - * The BUSY bit is expected to be clear now but older HW has a race - * condition which could cause it to be still set. If it's set, re-read - * and check again. If it's still set, log the issue. - */ - if (engine->status & XDMA_STAT_BUSY) { - value = read_register(&engine->regs->status); - if ((value & XDMA_STAT_BUSY) && printk_ratelimit()) - pr_info("%s has errors but is still BUSY\n", - engine->name); - } - - if (printk_ratelimit()) { - pr_info("%s, s 0x%x, aborted xfer 0x%p, cmpl %d/%d\n", - engine->name, engine->status, transfer, desc_completed, - transfer->desc_num); - } - - /* mark transfer as failed */ - transfer->state = TRANSFER_STATE_FAILED; - xdma_engine_stop(engine); -} - -struct xdma_transfer *engine_service_final_transfer(struct xdma_engine *engine, - struct xdma_transfer *transfer, u32 *pdesc_completed) -{ - BUG_ON(!engine); - BUG_ON(!transfer); - BUG_ON(!pdesc_completed); - - /* inspect the current transfer */ - if (transfer) { - if (((engine->dir == DMA_FROM_DEVICE) && - (engine->status & XDMA_STAT_C2H_ERR_MASK)) || - ((engine->dir == DMA_TO_DEVICE) && - (engine->status & XDMA_STAT_H2C_ERR_MASK))) { - pr_info("engine %s, status error 0x%x.\n", - engine->name, engine->status); - engine_status_dump(engine); - engine_err_handle(engine, transfer, *pdesc_completed); - goto transfer_del; - } - - if (engine->status & XDMA_STAT_BUSY) - dbg_tfr("Engine %s is unexpectedly busy - ignoring\n", - engine->name); - - /* the engine stopped on current transfer? */ - if (*pdesc_completed < transfer->desc_num) { - transfer->state = TRANSFER_STATE_FAILED; - pr_info("%s, xfer 0x%p, stopped half-way, %d/%d.\n", - engine->name, transfer, *pdesc_completed, - transfer->desc_num); - } else { - dbg_tfr("engine %s completed transfer\n", engine->name); - dbg_tfr("Completed transfer ID = 0x%p\n", transfer); - dbg_tfr("*pdesc_completed=%d, transfer->desc_num=%d", - *pdesc_completed, transfer->desc_num); - - if (!transfer->cyclic) { - /* - * if the engine stopped on this transfer, - * it should be the last - */ - WARN_ON(*pdesc_completed > transfer->desc_num); - } - /* mark transfer as succesfully completed */ - transfer->state = TRANSFER_STATE_COMPLETED; - } - -transfer_del: - /* remove completed transfer from list */ - list_del(engine->transfer_list.next); - /* add to dequeued number of descriptors during this run */ - engine->desc_dequeued += transfer->desc_num; - - /* - * Complete transfer - sets transfer to NULL if an asynchronous - * transfer has completed - */ - transfer = engine_transfer_completion(engine, transfer); - } - - return transfer; -} - -static void engine_service_perf(struct xdma_engine *engine, u32 desc_completed) -{ - BUG_ON(!engine); - - /* performance measurement is running? */ - if (engine->xdma_perf) { - /* a descriptor was completed? */ - if (engine->status & XDMA_STAT_DESC_COMPLETED) { - engine->xdma_perf->iterations = desc_completed; - dbg_perf("transfer->xdma_perf->iterations=%d\n", - engine->xdma_perf->iterations); - } - - /* a descriptor stopped the engine? */ - if (engine->status & XDMA_STAT_DESC_STOPPED) { - engine->xdma_perf->stopped = 1; - /* - * wake any XDMA_PERF_IOCTL_STOP waiting for - * the performance run to finish - */ - wake_up_interruptible(&engine->xdma_perf_wq); - dbg_perf("transfer->xdma_perf stopped\n"); - } - } -} - -static void engine_transfer_dequeue(struct xdma_engine *engine) -{ - struct xdma_transfer *transfer; - - BUG_ON(!engine); - - /* pick first transfer on the queue (was submitted to the engine) */ - transfer = list_entry(engine->transfer_list.next, struct xdma_transfer, - entry); - BUG_ON(!transfer); - BUG_ON(transfer != &engine->cyclic_req->xfer); - dbg_tfr("%s engine completed cyclic transfer 0x%p (%d desc).\n", - engine->name, transfer, transfer->desc_num); - /* remove completed transfer from list */ - list_del(engine->transfer_list.next); -} - -static int engine_ring_process(struct xdma_engine *engine) -{ - struct xdma_result *result; - int start; - int eop_count = 0; - - BUG_ON(!engine); - result = engine->cyclic_result; - BUG_ON(!result); - - /* where we start receiving in the ring buffer */ - start = engine->rx_tail; - - /* iterate through all newly received RX result descriptors */ - dbg_tfr("%s, result %d, 0x%x, len 0x%x.\n", - engine->name, engine->rx_tail, result[engine->rx_tail].status, - result[engine->rx_tail].length); - while (result[engine->rx_tail].status && !engine->rx_overrun) { - /* EOP bit set in result? */ - if (result[engine->rx_tail].status & RX_STATUS_EOP){ - eop_count++; - } - - /* increment tail pointer */ - engine->rx_tail = (engine->rx_tail + 1) % CYCLIC_RX_PAGES_MAX; - - dbg_tfr("%s, head %d, tail %d, 0x%x, len 0x%x.\n", - engine->name, engine->rx_head, engine->rx_tail, - result[engine->rx_tail].status, - result[engine->rx_tail].length); - - /* overrun? */ - if (engine->rx_tail == engine->rx_head) { - dbg_tfr("%s: overrun\n", engine->name); - /* flag to user space that overrun has occurred */ - engine->rx_overrun = 1; - } - } - - return eop_count; -} - -static int engine_service_cyclic_polled(struct xdma_engine *engine) -{ - int eop_count = 0; - int rc = 0; - struct xdma_poll_wb *writeback_data; - u32 sched_limit = 0; - - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); - - writeback_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; - - while (eop_count == 0) { - if (sched_limit != 0) { - if ((sched_limit % NUM_POLLS_PER_SCHED) == 0) - schedule(); - } - sched_limit++; - - /* Monitor descriptor writeback address for errors */ - if ((writeback_data->completed_desc_count) & WB_ERR_MASK) { - rc = -1; - break; - } - - eop_count = engine_ring_process(engine); - } - - if (eop_count == 0) { - engine_status_read(engine, 1, 0); - if ((engine->running) && !(engine->status & XDMA_STAT_BUSY)) { - /* transfers on queue? */ - if (!list_empty(&engine->transfer_list)) - engine_transfer_dequeue(engine); - - engine_service_shutdown(engine); - } - } - - return rc; -} - -static int engine_service_cyclic_interrupt(struct xdma_engine *engine) -{ - int eop_count = 0; - struct xdma_transfer *xfer; - - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); - - engine_status_read(engine, 1, 0); - - eop_count = engine_ring_process(engine); - /* - * wake any reader on EOP, as one or more packets are now in - * the RX buffer - */ - xfer = &engine->cyclic_req->xfer; - if(enable_credit_mp){ - if (eop_count > 0) { - //engine->eop_found = 1; - } - wake_up_interruptible(&xfer->wq); - }else{ - if (eop_count > 0) { - /* awake task on transfer's wait queue */ - dbg_tfr("wake_up_interruptible() due to %d EOP's\n", eop_count); - engine->eop_found = 1; - wake_up_interruptible(&xfer->wq); - } - } - - /* engine was running but is no longer busy? */ - if ((engine->running) && !(engine->status & XDMA_STAT_BUSY)) { - /* transfers on queue? */ - if (!list_empty(&engine->transfer_list)) - engine_transfer_dequeue(engine); - - engine_service_shutdown(engine); - } - - return 0; -} - -/* must be called with engine->lock already acquired */ -static int engine_service_cyclic(struct xdma_engine *engine) -{ - int rc = 0; - - dbg_tfr("engine_service_cyclic()"); - - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); - - if (poll_mode) - rc = engine_service_cyclic_polled(engine); - else - rc = engine_service_cyclic_interrupt(engine); - - return rc; -} - - -static void engine_service_resume(struct xdma_engine *engine) -{ - struct xdma_transfer *transfer_started; - - BUG_ON(!engine); - - /* engine stopped? */ - if (!engine->running) { - /* in the case of shutdown, let it finish what's in the Q */ - if (!list_empty(&engine->transfer_list)) { - /* (re)start engine */ - transfer_started = engine_start(engine); - dbg_tfr("re-started %s engine with pending xfer 0x%p\n", - engine->name, transfer_started); - /* engine was requested to be shutdown? */ - } else if (engine->shutdown & ENGINE_SHUTDOWN_REQUEST) { - engine->shutdown |= ENGINE_SHUTDOWN_IDLE; - /* awake task on engine's shutdown wait queue */ - wake_up_interruptible(&engine->shutdown_wq); - } else { - dbg_tfr("no pending transfers, %s engine stays idle.\n", - engine->name); - } - } else { - /* engine is still running? */ - if (list_empty(&engine->transfer_list)) { - pr_warn("no queued transfers but %s engine running!\n", - engine->name); - WARN_ON(1); - } - } -} - -/** - * engine_service() - service an SG DMA engine - * - * must be called with engine->lock already acquired - * - * @engine pointer to struct xdma_engine - * - */ -static int engine_service(struct xdma_engine *engine, int desc_writeback) -{ - struct xdma_transfer *transfer = NULL; - u32 desc_count = desc_writeback & WB_COUNT_MASK; - u32 err_flag = desc_writeback & WB_ERR_MASK; - int rv = 0; - struct xdma_poll_wb *wb_data; - - BUG_ON(!engine); - - /* If polling detected an error, signal to the caller */ - if (err_flag) - rv = -1; - - /* Service the engine */ - if (!engine->running) { - dbg_tfr("Engine was not running!!! Clearing status\n"); - engine_status_read(engine, 1, 0); - return 0; - } - - /* - * If called by the ISR or polling detected an error, read and clear - * engine status. For polled mode descriptor completion, this read is - * unnecessary and is skipped to reduce latency - */ - if ((desc_count == 0) || (err_flag != 0)) - engine_status_read(engine, 1, 0); - - /* - * engine was running but is no longer busy, or writeback occurred, - * shut down - */ - if ((engine->running && !(engine->status & XDMA_STAT_BUSY)) || - (desc_count != 0)) - engine_service_shutdown(engine); - - /* - * If called from the ISR, or if an error occurred, the descriptor - * count will be zero. In this scenario, read the descriptor count - * from HW. In polled mode descriptor completion, this read is - * unnecessary and is skipped to reduce latency - */ - if (!desc_count) - desc_count = read_register(&engine->regs->completed_desc_count); - dbg_tfr("desc_count = %d\n", desc_count); - - /* transfers on queue? */ - if (!list_empty(&engine->transfer_list)) { - /* pick first transfer on queue (was submitted to the engine) */ - transfer = list_entry(engine->transfer_list.next, - struct xdma_transfer, entry); - - dbg_tfr("head of queue transfer 0x%p has %d descriptors\n", - transfer, (int)transfer->desc_num); - - dbg_tfr("Engine completed %d desc, %d not yet dequeued\n", - (int)desc_count, - (int)desc_count - engine->desc_dequeued); - - engine_service_perf(engine, desc_count); - } - - /* account for already dequeued transfers during this engine run */ - desc_count -= engine->desc_dequeued; - - /* Process all but the last transfer */ - transfer = engine_service_transfer_list(engine, transfer, &desc_count); - - /* - * Process final transfer - includes checks of number of descriptors to - * detect faulty completion - */ - transfer = engine_service_final_transfer(engine, transfer, &desc_count); - - /* Before starting engine again, clear the writeback data */ - if (poll_mode) { - wb_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; - wb_data->completed_desc_count = 0; - } - - /* Restart the engine following the servicing */ - engine_service_resume(engine); - - return 0; -} - -/* engine_service_work */ -static void engine_service_work(struct work_struct *work) -{ - struct xdma_engine *engine; - unsigned long flags; - - engine = container_of(work, struct xdma_engine, work); - BUG_ON(engine->magic != MAGIC_ENGINE); - - /* lock the engine */ - spin_lock_irqsave(&engine->lock, flags); - - dbg_tfr("engine_service() for %s engine %p\n", - engine->name, engine); - if (engine->cyclic_req) - engine_service_cyclic(engine); - else - engine_service(engine, 0); - - /* re-enable interrupts for this engine */ - if (engine->xdev->msix_enabled){ - write_register(engine->interrupt_enable_mask_value, - &engine->regs->interrupt_enable_mask_w1s, - (unsigned long)(&engine->regs->interrupt_enable_mask_w1s) - - (unsigned long)(&engine->regs)); - } else - channel_interrupts_enable(engine->xdev, engine->irq_bitmask); - - /* unlock the engine */ - spin_unlock_irqrestore(&engine->lock, flags); -} - -static u32 engine_service_wb_monitor(struct xdma_engine *engine, - u32 expected_wb) -{ - struct xdma_poll_wb *wb_data; - u32 desc_wb = 0; - u32 sched_limit = 0; - unsigned long timeout; - - BUG_ON(!engine); - wb_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; - - /* - * Poll the writeback location for the expected number of - * descriptors / error events This loop is skipped for cyclic mode, - * where the expected_desc_count passed in is zero, since it cannot be - * determined before the function is called - */ - - timeout = jiffies + (POLL_TIMEOUT_SECONDS * HZ); - while (expected_wb != 0) { - desc_wb = wb_data->completed_desc_count; - - if (desc_wb & WB_ERR_MASK) - break; - else if (desc_wb == expected_wb) - break; - - /* RTO - prevent system from hanging in polled mode */ - if (time_after(jiffies, timeout)) { - dbg_tfr("Polling timeout occurred"); - dbg_tfr("desc_wb = 0x%08x, expected 0x%08x\n", desc_wb, - expected_wb); - if ((desc_wb & WB_COUNT_MASK) > expected_wb) - desc_wb = expected_wb | WB_ERR_MASK; - - break; - } - - /* - * Define NUM_POLLS_PER_SCHED to limit how much time is spent - * in the scheduler - */ - - if (sched_limit != 0) { - if ((sched_limit % NUM_POLLS_PER_SCHED) == 0) - schedule(); - } - sched_limit++; - } - - return desc_wb; -} - -static int engine_service_poll(struct xdma_engine *engine, - u32 expected_desc_count) -{ - struct xdma_poll_wb *writeback_data; - u32 desc_wb = 0; - unsigned long flags; - int rv = 0; - - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); - - writeback_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; - - if ((expected_desc_count & WB_COUNT_MASK) != expected_desc_count) { - dbg_tfr("Queued descriptor count is larger than supported\n"); - return -1; - } - - /* - * Poll the writeback location for the expected number of - * descriptors / error events This loop is skipped for cyclic mode, - * where the expected_desc_count passed in is zero, since it cannot be - * determined before the function is called - */ - - desc_wb = engine_service_wb_monitor(engine, expected_desc_count); - - spin_lock_irqsave(&engine->lock, flags); - dbg_tfr("%s service.\n", engine->name); - if (engine->cyclic_req) { - rv = engine_service_cyclic(engine); - } else { - rv = engine_service(engine, desc_wb); - } - spin_unlock_irqrestore(&engine->lock, flags); - - return rv; -} - -static irqreturn_t user_irq_service(int irq, struct xdma_user_irq *user_irq) -{ - unsigned long flags; - - BUG_ON(!user_irq); - - if (user_irq->handler) - return user_irq->handler(user_irq->user_idx, user_irq->dev); - - spin_lock_irqsave(&(user_irq->events_lock), flags); - if (!user_irq->events_irq) { - user_irq->events_irq = 1; - wake_up_interruptible(&(user_irq->events_wq)); - } - spin_unlock_irqrestore(&(user_irq->events_lock), flags); - - return IRQ_HANDLED; -} - -/* - * xdma_isr() - Interrupt handler - * - * @dev_id pointer to xdma_dev - */ -static irqreturn_t xdma_isr(int irq, void *dev_id) -{ - u32 ch_irq; - u32 user_irq; - u32 mask; - struct xdma_dev *xdev; - struct interrupt_regs *irq_regs; - - dbg_irq("(irq=%d, dev 0x%p) <<<< ISR.\n", irq, dev_id); - BUG_ON(!dev_id); - xdev = (struct xdma_dev *)dev_id; - - if (!xdev) { - WARN_ON(!xdev); - dbg_irq("xdma_isr(irq=%d) xdev=%p ??\n", irq, xdev); - return IRQ_NONE; - } - - irq_regs = (struct interrupt_regs *)(xdev->bar[xdev->config_bar_idx] + - XDMA_OFS_INT_CTRL); - - /* read channel interrupt requests */ - ch_irq = read_register(&irq_regs->channel_int_request); - dbg_irq("ch_irq = 0x%08x\n", ch_irq); - - /* - * disable all interrupts that fired; these are re-enabled individually - * after the causing module has been fully serviced. - */ - if (ch_irq) - channel_interrupts_disable(xdev, ch_irq); - - /* read user interrupts - this read also flushes the above write */ - user_irq = read_register(&irq_regs->user_int_request); - dbg_irq("user_irq = 0x%08x\n", user_irq); - - if (user_irq) { - int user = 0; - u32 mask = 1; - int max = xdev->h2c_channel_max; - - for (; user < max && user_irq; user++, mask <<= 1) { - if (user_irq & mask) { - user_irq &= ~mask; - user_irq_service(irq, &xdev->user_irq[user]); - } - } - } - - mask = ch_irq & xdev->mask_irq_h2c; - if (mask) { - int channel = 0; - int max = xdev->h2c_channel_max; - - /* iterate over H2C (PCIe read) */ - for (channel = 0; channel < max && mask; channel++) { - struct xdma_engine *engine = &xdev->engine_h2c[channel]; - - /* engine present and its interrupt fired? */ - if((engine->irq_bitmask & mask) && - (engine->magic == MAGIC_ENGINE)) { - mask &= ~engine->irq_bitmask; - dbg_tfr("schedule_work, %s.\n", engine->name); - schedule_work(&engine->work); - } - } - } - - mask = ch_irq & xdev->mask_irq_c2h; - if (mask) { - int channel = 0; - int max = xdev->c2h_channel_max; - - /* iterate over C2H (PCIe write) */ - for (channel = 0; channel < max && mask; channel++) { - struct xdma_engine *engine = &xdev->engine_c2h[channel]; - - /* engine present and its interrupt fired? */ - if((engine->irq_bitmask & mask) && - (engine->magic == MAGIC_ENGINE)) { - mask &= ~engine->irq_bitmask; - dbg_tfr("schedule_work, %s.\n", engine->name); - schedule_work(&engine->work); - } - } - } - - xdev->irq_count++; - return IRQ_HANDLED; -} - -/* - * xdma_user_irq() - Interrupt handler for user interrupts in MSI-X mode - * - * @dev_id pointer to xdma_dev - */ -static irqreturn_t xdma_user_irq(int irq, void *dev_id) -{ - struct xdma_user_irq *user_irq; - - dbg_irq("(irq=%d) <<<< INTERRUPT SERVICE ROUTINE\n", irq); - - BUG_ON(!dev_id); - user_irq = (struct xdma_user_irq *)dev_id; - - return user_irq_service(irq, user_irq); -} - -/* - * xdma_channel_irq() - Interrupt handler for channel interrupts in MSI-X mode - * - * @dev_id pointer to xdma_dev - */ -static irqreturn_t xdma_channel_irq(int irq, void *dev_id) -{ - struct xdma_dev *xdev; - struct xdma_engine *engine; - struct interrupt_regs *irq_regs; - - dbg_irq("(irq=%d) <<<< INTERRUPT service ROUTINE\n", irq); - BUG_ON(!dev_id); - - engine = (struct xdma_engine *)dev_id; - xdev = engine->xdev; - - if (!xdev) { - WARN_ON(!xdev); - dbg_irq("xdma_channel_irq(irq=%d) xdev=%p ??\n", irq, xdev); - return IRQ_NONE; - } - - irq_regs = (struct interrupt_regs *)(xdev->bar[xdev->config_bar_idx] + - XDMA_OFS_INT_CTRL); - - /* Disable the interrupt for this engine */ - write_register(engine->interrupt_enable_mask_value, - &engine->regs->interrupt_enable_mask_w1c, - (unsigned long) - (&engine->regs->interrupt_enable_mask_w1c) - - (unsigned long)(&engine->regs)); - /* Dummy read to flush the above write */ - read_register(&irq_regs->channel_int_pending); - /* Schedule the bottom half */ - schedule_work(&engine->work); - - /* - * RTO - need to protect access here if multiple MSI-X are used for - * user interrupts - */ - xdev->irq_count++; - return IRQ_HANDLED; -} - -/* - * Unmap the BAR regions that had been mapped earlier using map_bars() - */ -static void unmap_bars(struct xdma_dev *xdev, struct pci_dev *dev) -{ - int i; - - for (i = 0; i < XDMA_BAR_NUM; i++) { - /* is this BAR mapped? */ - if (xdev->bar[i]) { - /* unmap BAR */ - pci_iounmap(dev, xdev->bar[i]); - /* mark as unmapped */ - xdev->bar[i] = NULL; - } - } -} - -static int map_single_bar(struct xdma_dev *xdev, struct pci_dev *dev, int idx) -{ - resource_size_t bar_start; - resource_size_t bar_len; - resource_size_t map_len; - - bar_start = pci_resource_start(dev, idx); - bar_len = pci_resource_len(dev, idx); - map_len = bar_len; - - xdev->bar[idx] = NULL; - - /* do not map BARs with length 0. Note that start MAY be 0! */ - if (!bar_len) { - //pr_info("BAR #%d is not present - skipping\n", idx); - return 0; - } - - /* BAR size exceeds maximum desired mapping? */ - if (bar_len > INT_MAX) { - pr_info("Limit BAR %d mapping from %llu to %d bytes\n", idx, - (u64)bar_len, INT_MAX); - map_len = (resource_size_t)INT_MAX; - } - /* - * map the full device memory or IO region into kernel virtual - * address space - */ - dbg_init("BAR%d: %llu bytes to be mapped.\n", idx, (u64)map_len); - xdev->bar[idx] = pci_iomap(dev, idx, map_len); - - if (!xdev->bar[idx]) { - pr_info("Could not map BAR %d.\n", idx); - return -1; - } - - pr_info("BAR%d at 0x%llx mapped at 0x%p, length=%llu(/%llu)\n", idx, - (u64)bar_start, xdev->bar[idx], (u64)map_len, (u64)bar_len); - - return (int)map_len; -} - -static int is_config_bar(struct xdma_dev *xdev, int idx) -{ - u32 irq_id = 0; - u32 cfg_id = 0; - int flag = 0; - u32 mask = 0xffff0000; /* Compare only XDMA ID's not Version number */ - struct interrupt_regs *irq_regs = - (struct interrupt_regs *) (xdev->bar[idx] + XDMA_OFS_INT_CTRL); - struct config_regs *cfg_regs = - (struct config_regs *)(xdev->bar[idx] + XDMA_OFS_CONFIG); - - irq_id = read_register(&irq_regs->identifier); - cfg_id = read_register(&cfg_regs->identifier); - - if (((irq_id & mask)== IRQ_BLOCK_ID) && - ((cfg_id & mask)== CONFIG_BLOCK_ID)) { - dbg_init("BAR %d is the XDMA config BAR\n", idx); - flag = 1; - } else { - dbg_init("BAR %d is NOT the XDMA config BAR: 0x%x, 0x%x.\n", - idx, irq_id, cfg_id); - flag = 0; - } - - return flag; -} - -static void identify_bars(struct xdma_dev *xdev, int *bar_id_list, int num_bars, - int config_bar_pos) -{ - /* - * The following logic identifies which BARs contain what functionality - * based on the position of the XDMA config BAR and the number of BARs - * detected. The rules are that the user logic and bypass logic BARs - * are optional. When both are present, the XDMA config BAR will be the - * 2nd BAR detected (config_bar_pos = 1), with the user logic being - * detected first and the bypass being detected last. When one is - * omitted, the type of BAR present can be identified by whether the - * XDMA config BAR is detected first or last. When both are omitted, - * only the XDMA config BAR is present. This somewhat convoluted - * approach is used instead of relying on BAR numbers in order to work - * correctly with both 32-bit and 64-bit BARs. - */ - - BUG_ON(!xdev); - BUG_ON(!bar_id_list); - - dbg_init("xdev 0x%p, bars %d, config at %d.\n", - xdev, num_bars, config_bar_pos); - - switch (num_bars) { - case 1: - /* Only one BAR present - no extra work necessary */ - break; - - case 2: - if (config_bar_pos == 0) { - xdev->bypass_bar_idx = bar_id_list[1]; - } else if (config_bar_pos == 1) { - xdev->user_bar_idx = bar_id_list[0]; - } else { - pr_info("2, XDMA config BAR unexpected %d.\n", - config_bar_pos); - } - break; - - case 3: - case 4: - if ((config_bar_pos == 1) || (config_bar_pos == 2)) { - /* user bar at bar #0 */ - xdev->user_bar_idx = bar_id_list[0]; - /* bypass bar at the last bar */ - xdev->bypass_bar_idx = bar_id_list[num_bars - 1]; - } else { - pr_info("3/4, XDMA config BAR unexpected %d.\n", - config_bar_pos); - } - break; - - default: - /* Should not occur - warn user but safe to continue */ - pr_info("Unexpected # BARs (%d), XDMA config BAR only.\n", - num_bars); - break; - - } - pr_info("%d BARs: config %d, user %d, bypass %d.\n", - num_bars, config_bar_pos, xdev->user_bar_idx, - xdev->bypass_bar_idx); -} - -/* map_bars() -- map device regions into kernel virtual address space - * - * Map the device memory regions into kernel virtual address space after - * verifying their sizes respect the minimum sizes needed - */ -static int map_bars(struct xdma_dev *xdev, struct pci_dev *dev) -{ - int rv; - int i; - int bar_id_list[XDMA_BAR_NUM]; - int bar_id_idx = 0; - int config_bar_pos = 0; - - /* iterate through all the BARs */ - for (i = 0; i < XDMA_BAR_NUM; i++) { - int bar_len; - - bar_len = map_single_bar(xdev, dev, i); - if (bar_len == 0) { - continue; - } else if (bar_len < 0) { - rv = -EINVAL; - goto fail; - } - - /* Try to identify BAR as XDMA control BAR */ - if ((bar_len >= XDMA_BAR_SIZE) && (xdev->config_bar_idx < 0)) { - - if (is_config_bar(xdev, i)) { - xdev->config_bar_idx = i; - config_bar_pos = bar_id_idx; - pr_info("config bar %d, pos %d.\n", - xdev->config_bar_idx, config_bar_pos); - } - } - - bar_id_list[bar_id_idx] = i; - bar_id_idx++; - } - - /* The XDMA config BAR must always be present */ - if (xdev->config_bar_idx < 0) { - pr_info("Failed to detect XDMA config BAR\n"); - rv = -EINVAL; - goto fail; - } - - identify_bars(xdev, bar_id_list, bar_id_idx, config_bar_pos); - - /* successfully mapped all required BAR regions */ - return 0; - -fail: - /* unwind; unmap any BARs that we did map */ - unmap_bars(xdev, dev); - return rv; -} - -/* - * MSI-X interrupt: - * vectors, followed by vectors - */ - -/* - * RTO - code to detect if MSI/MSI-X capability exists is derived - * from linux/pci/msi.c - pci_msi_check_device - */ - -#ifndef arch_msi_check_device -int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) -{ - return 0; -} -#endif - -/* type = PCI_CAP_ID_MSI or PCI_CAP_ID_MSIX */ -static int msi_msix_capable(struct pci_dev *dev, int type) -{ - struct pci_bus *bus; - int ret; - - if (!dev || dev->no_msi) - return 0; - - for (bus = dev->bus; bus; bus = bus->parent) - if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) - return 0; - - ret = arch_msi_check_device(dev, 1, type); - if (ret) - return 0; - - if (!pci_find_capability(dev, type)) - return 0; - - return 1; -} - -static void disable_msi_msix(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - if (xdev->msix_enabled) { - pci_disable_msix(pdev); - xdev->msix_enabled = 0; - } else if (xdev->msi_enabled) { - pci_disable_msi(pdev); - xdev->msi_enabled = 0; - } -} - -static int enable_msi_msix(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - int rv = 0; - - BUG_ON(!xdev); - BUG_ON(!pdev); - - if (!interrupt_mode && msi_msix_capable(pdev, PCI_CAP_ID_MSIX)) { - int req_nvec = xdev->c2h_channel_max + xdev->h2c_channel_max + - xdev->user_max; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) - dbg_init("Enabling MSI-X\n"); - rv = pci_alloc_irq_vectors(pdev, req_nvec, req_nvec, - PCI_IRQ_MSIX); -#else - int i; - - dbg_init("Enabling MSI-X\n"); - for (i = 0; i < req_nvec; i++) - xdev->entry[i].entry = i; - - rv = pci_enable_msix(pdev, xdev->entry, req_nvec); -#endif - if (rv < 0) - dbg_init("Couldn't enable MSI-X mode: %d\n", rv); - - xdev->msix_enabled = 1; - - } else if (interrupt_mode == 1 && - msi_msix_capable(pdev, PCI_CAP_ID_MSI)) { - /* enable message signalled interrupts */ - dbg_init("pci_enable_msi()\n"); - rv = pci_enable_msi(pdev); - if (rv < 0) - dbg_init("Couldn't enable MSI mode: %d\n", rv); - xdev->msi_enabled = 1; - - } else { - dbg_init("MSI/MSI-X not detected - using legacy interrupts\n"); - } - - return rv; -} - -static void pci_check_intr_pend(struct pci_dev *pdev) -{ - u16 v; - - pci_read_config_word(pdev, PCI_STATUS, &v); - if (v & PCI_STATUS_INTERRUPT) { - pr_info("%s PCI STATUS Interrupt pending 0x%x.\n", - dev_name(&pdev->dev), v); - pci_write_config_word(pdev, PCI_STATUS, PCI_STATUS_INTERRUPT); - } -} - -static void pci_keep_intx_enabled(struct pci_dev *pdev) -{ - /* workaround to a h/w bug: - * when msix/msi become unavaile, default to legacy. - * However the legacy enable was not checked. - * If the legacy was disabled, no ack then everything stuck - */ - u16 pcmd, pcmd_new; - - pci_read_config_word(pdev, PCI_COMMAND, &pcmd); - pcmd_new = pcmd & ~PCI_COMMAND_INTX_DISABLE; - if (pcmd_new != pcmd) { - pr_info("%s: clear INTX_DISABLE, 0x%x -> 0x%x.\n", - dev_name(&pdev->dev), pcmd, pcmd_new); - pci_write_config_word(pdev, PCI_COMMAND, pcmd_new); - } -} - -static void prog_irq_msix_user(struct xdma_dev *xdev, bool clear) -{ - /* user */ - struct interrupt_regs *int_regs = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + - XDMA_OFS_INT_CTRL); - u32 i = xdev->c2h_channel_max + xdev->h2c_channel_max; - u32 max = i + xdev->user_max; - int j; - - for (j = 0; i < max; j++) { - u32 val = 0; - int k; - int shift = 0; - - if (clear) - i += 4; - else - for (k = 0; k < 4 && i < max; i++, k++, shift += 8) - val |= (i & 0x1f) << shift; - - write_register(val, &int_regs->user_msi_vector[j], - XDMA_OFS_INT_CTRL + - ((unsigned long)&int_regs->user_msi_vector[j] - - (unsigned long)int_regs)); - - dbg_init("vector %d, 0x%x.\n", j, val); - } -} - -static void prog_irq_msix_channel(struct xdma_dev *xdev, bool clear) -{ - struct interrupt_regs *int_regs = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + - XDMA_OFS_INT_CTRL); - u32 max = xdev->c2h_channel_max + xdev->h2c_channel_max; - u32 i; - int j; - - /* engine */ - for (i = 0, j = 0; i < max; j++) { - u32 val = 0; - int k; - int shift = 0; - - if (clear) - i += 4; - else - for (k = 0; k < 4 && i < max; i++, k++, shift += 8) - val |= (i & 0x1f) << shift; - - write_register(val, &int_regs->channel_msi_vector[j], - XDMA_OFS_INT_CTRL + - ((unsigned long)&int_regs->channel_msi_vector[j] - - (unsigned long)int_regs)); - dbg_init("vector %d, 0x%x.\n", j, val); - } -} - -static void irq_msix_channel_teardown(struct xdma_dev *xdev) -{ - struct xdma_engine *engine; - int j = 0; - int i = 0; - - if (!xdev->msix_enabled) - return; - - prog_irq_msix_channel(xdev, 1); - - engine = xdev->engine_h2c; - for (i = 0; i < xdev->h2c_channel_max; i++, j++, engine++) { - if (!engine->msix_irq_line) - break; - dbg_sg("Release IRQ#%d for engine %p\n", engine->msix_irq_line, - engine); - free_irq(engine->msix_irq_line, engine); - } - - engine = xdev->engine_c2h; - for (i = 0; i < xdev->c2h_channel_max; i++, j++, engine++) { - if (!engine->msix_irq_line) - break; - dbg_sg("Release IRQ#%d for engine %p\n", engine->msix_irq_line, - engine); - free_irq(engine->msix_irq_line, engine); - } -} - -static int irq_msix_channel_setup(struct xdma_dev *xdev) -{ - int i; - int j = xdev->h2c_channel_max; - int rv = 0; - u32 vector; - struct xdma_engine *engine; - - BUG_ON(!xdev); - if (!xdev->msix_enabled) - return 0; - - engine = xdev->engine_h2c; - for (i = 0; i < xdev->h2c_channel_max; i++, engine++) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) - vector = pci_irq_vector(xdev->pdev, i); -#else - vector = xdev->entry[i].vector; -#endif - rv = request_irq(vector, xdma_channel_irq, 0, xdev->mod_name, - engine); - if (rv) { - pr_info("requesti irq#%d failed %d, engine %s.\n", - vector, rv, engine->name); - return rv; - } - pr_info("engine %s, irq#%d.\n", engine->name, vector); - engine->msix_irq_line = vector; - } - - engine = xdev->engine_c2h; - for (i = 0; i < xdev->c2h_channel_max; i++, j++, engine++) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) - vector = pci_irq_vector(xdev->pdev, j); -#else - vector = xdev->entry[j].vector; -#endif - rv = request_irq(vector, xdma_channel_irq, 0, xdev->mod_name, - engine); - if (rv) { - pr_info("requesti irq#%d failed %d, engine %s.\n", - vector, rv, engine->name); - return rv; - } - pr_info("engine %s, irq#%d.\n", engine->name, vector); - engine->msix_irq_line = vector; - } - - return 0; -} - -static void irq_msix_user_teardown(struct xdma_dev *xdev) -{ - int i; - int j = xdev->h2c_channel_max + xdev->c2h_channel_max; - - BUG_ON(!xdev); - - if (!xdev->msix_enabled) - return; - - prog_irq_msix_user(xdev, 1); - - for (i = 0; i < xdev->user_max; i++, j++) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) - u32 vector = pci_irq_vector(xdev->pdev, j); -#else - u32 vector = xdev->entry[j].vector; -#endif - dbg_init("user %d, releasing IRQ#%d\n", i, vector); - free_irq(vector, &xdev->user_irq[i]); - } -} - -static int irq_msix_user_setup(struct xdma_dev *xdev) -{ - int i; - int j = xdev->h2c_channel_max + xdev->c2h_channel_max; - int rv = 0; - - /* vectors set in probe_scan_for_msi() */ - for (i = 0; i < xdev->user_max; i++, j++) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) - u32 vector = pci_irq_vector(xdev->pdev, j); -#else - u32 vector = xdev->entry[j].vector; -#endif - rv = request_irq(vector, xdma_user_irq, 0, xdev->mod_name, - &xdev->user_irq[i]); - if (rv) { - pr_info("user %d couldn't use IRQ#%d, %d\n", - i, vector, rv); - break; - } - pr_info("%d-USR-%d, IRQ#%d with 0x%p\n", xdev->idx, i, vector, - &xdev->user_irq[i]); - } - - /* If any errors occur, free IRQs that were successfully requested */ - if (rv) { - for (i--, j--; i >= 0; i--, j--) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) - u32 vector = pci_irq_vector(xdev->pdev, j); -#else - u32 vector = xdev->entry[j].vector; -#endif - free_irq(vector, &xdev->user_irq[i]); - } - } - - return rv; -} - -static int irq_msi_setup(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - int rv; - - xdev->irq_line = (int)pdev->irq; - rv = request_irq(pdev->irq, xdma_isr, 0, xdev->mod_name, xdev); - if (rv) - dbg_init("Couldn't use IRQ#%d, %d\n", pdev->irq, rv); - else - dbg_init("Using IRQ#%d with 0x%p\n", pdev->irq, xdev); - - return rv; -} - -static int irq_legacy_setup(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - u32 w; - u8 val; - void *reg; - int rv; - - pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &val); - dbg_init("Legacy Interrupt register value = %d\n", val); - if (val > 1) { - val--; - w = (val<<24) | (val<<16) | (val<<8)| val; - /* Program IRQ Block Channel vactor and IRQ Block User vector - * with Legacy interrupt value */ - reg = xdev->bar[xdev->config_bar_idx] + 0x2080; // IRQ user - write_register(w, reg, 0x2080); - write_register(w, reg+0x4, 0x2084); - write_register(w, reg+0x8, 0x2088); - write_register(w, reg+0xC, 0x208C); - reg = xdev->bar[xdev->config_bar_idx] + 0x20A0; // IRQ Block - write_register(w, reg, 0x20A0); - write_register(w, reg+0x4, 0x20A4); - } - - xdev->irq_line = (int)pdev->irq; - rv = request_irq(pdev->irq, xdma_isr, IRQF_SHARED, xdev->mod_name, - xdev); - if (rv) - dbg_init("Couldn't use IRQ#%d, %d\n", pdev->irq, rv); - else - dbg_init("Using IRQ#%d with 0x%p\n", pdev->irq, xdev); - - return rv; -} - -static void irq_teardown(struct xdma_dev *xdev) -{ - if (xdev->msix_enabled) { - irq_msix_channel_teardown(xdev); - irq_msix_user_teardown(xdev); - } else if (xdev->irq_line != -1) { - dbg_init("Releasing IRQ#%d\n", xdev->irq_line); - free_irq(xdev->irq_line, xdev); - } -} - -static int irq_setup(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - pci_keep_intx_enabled(pdev); - - if (xdev->msix_enabled) { - int rv = irq_msix_channel_setup(xdev); - if (rv) - return rv; - rv = irq_msix_user_setup(xdev); - if (rv) - return rv; - prog_irq_msix_channel(xdev, 0); - prog_irq_msix_user(xdev, 0); - - return 0; - } else if (xdev->msi_enabled) - return irq_msi_setup(xdev, pdev); - - return irq_legacy_setup(xdev, pdev); -} - -#ifdef __LIBXDMA_DEBUG__ -static void dump_desc(struct xdma_desc *desc_virt) -{ - int j; - u32 *p = (u32 *)desc_virt; - static char * const field_name[] = { - "magic|extra_adjacent|control", "bytes", "src_addr_lo", - "src_addr_hi", "dst_addr_lo", "dst_addr_hi", "next_addr", - "next_addr_pad"}; - char *dummy; - - /* remove warning about unused variable when debug printing is off */ - dummy = field_name[0]; - - for (j = 0; j < 8; j += 1) { - pr_info("0x%08lx/0x%02lx: 0x%08x 0x%08x %s\n", - (uintptr_t)p, (uintptr_t)p & 15, (int)*p, - le32_to_cpu(*p), field_name[j]); - p++; - } - pr_info("\n"); -} - -static void transfer_dump(struct xdma_transfer *transfer) -{ - int i; - struct xdma_desc *desc_virt = transfer->desc_virt; - - pr_info("xfer 0x%p, state 0x%x, f 0x%x, dir %d, len %u, last %d.\n", - transfer, transfer->state, transfer->flags, transfer->dir, - transfer->len, transfer->last_in_request); - - pr_info("transfer 0x%p, desc %d, bus 0x%llx, adj %d.\n", - transfer, transfer->desc_num, (u64)transfer->desc_bus, - transfer->desc_adjacent); - for (i = 0; i < transfer->desc_num; i += 1) - dump_desc(desc_virt + i); -} -#endif /* __LIBXDMA_DEBUG__ */ - -/* xdma_desc_alloc() - Allocate cache-coherent array of N descriptors. - * - * Allocates an array of 'number' descriptors in contiguous PCI bus addressable - * memory. Chains the descriptors as a singly-linked list; the descriptor's - * next * pointer specifies the bus address of the next descriptor. - * - * - * @dev Pointer to pci_dev - * @number Number of descriptors to be allocated - * @desc_bus_p Pointer where to store the first descriptor bus address - * - * @return Virtual address of the first descriptor - * - */ -static void transfer_desc_init(struct xdma_transfer *transfer, int count) -{ - struct xdma_desc *desc_virt = transfer->desc_virt; - dma_addr_t desc_bus = transfer->desc_bus; - int i; - int adj = count - 1; - int extra_adj; - u32 temp_control; - - BUG_ON(count > XDMA_TRANSFER_MAX_DESC); - - /* create singly-linked list for SG DMA controller */ - for (i = 0; i < count - 1; i++) { - /* increment bus address to next in array */ - desc_bus += sizeof(struct xdma_desc); - - /* singly-linked list uses bus addresses */ - desc_virt[i].next_lo = cpu_to_le32(PCI_DMA_L(desc_bus)); - desc_virt[i].next_hi = cpu_to_le32(PCI_DMA_H(desc_bus)); - desc_virt[i].bytes = cpu_to_le32(0); - - /* any adjacent descriptors? */ - if (adj > 0) { - extra_adj = adj - 1; - if (extra_adj > MAX_EXTRA_ADJ) - extra_adj = MAX_EXTRA_ADJ; - - adj--; - } else { - extra_adj = 0; - } - - temp_control = DESC_MAGIC | (extra_adj << 8); - - desc_virt[i].control = cpu_to_le32(temp_control); - } - /* { i = number - 1 } */ - /* zero the last descriptor next pointer */ - desc_virt[i].next_lo = cpu_to_le32(0); - desc_virt[i].next_hi = cpu_to_le32(0); - desc_virt[i].bytes = cpu_to_le32(0); - - temp_control = DESC_MAGIC; - - desc_virt[i].control = cpu_to_le32(temp_control); -} - -/* xdma_desc_link() - Link two descriptors - * - * Link the first descriptor to a second descriptor, or terminate the first. - * - * @first first descriptor - * @second second descriptor, or NULL if first descriptor must be set as last. - * @second_bus bus address of second descriptor - */ -static void xdma_desc_link(struct xdma_desc *first, struct xdma_desc *second, - dma_addr_t second_bus) -{ - /* - * remember reserved control in first descriptor, but zero - * extra_adjacent! - */ - /* RTO - what's this about? Shouldn't it be 0x0000c0ffUL? */ - u32 control = le32_to_cpu(first->control) & 0x0000f0ffUL; - /* second descriptor given? */ - if (second) { - /* - * link last descriptor of 1st array to first descriptor of - * 2nd array - */ - first->next_lo = cpu_to_le32(PCI_DMA_L(second_bus)); - first->next_hi = cpu_to_le32(PCI_DMA_H(second_bus)); - WARN_ON(first->next_hi); - /* no second descriptor given */ - } else { - /* first descriptor is the last */ - first->next_lo = 0; - first->next_hi = 0; - } - /* merge magic, extra_adjacent and control field */ - control |= DESC_MAGIC; - - /* write bytes and next_num */ - first->control = cpu_to_le32(control); -} - -/* xdma_desc_adjacent -- Set how many descriptors are adjacent to this one */ -static void xdma_desc_adjacent(struct xdma_desc *desc, int next_adjacent) -{ - int extra_adj = 0; - /* remember reserved and control bits */ - u32 control = le32_to_cpu(desc->control) & 0x0000f0ffUL; - u32 max_adj_4k = 0; - - if (next_adjacent > 0) { - extra_adj = next_adjacent - 1; - if (extra_adj > MAX_EXTRA_ADJ){ - extra_adj = MAX_EXTRA_ADJ; - } - max_adj_4k = (0x1000 - ((le32_to_cpu(desc->next_lo))&0xFFF))/32 - 1; - if (extra_adj>max_adj_4k) { - extra_adj = max_adj_4k; - } - if(extra_adj<0){ - printk("Warning: extra_adj<0, converting it to 0\n"); - extra_adj = 0; - } - } - /* merge adjacent and control field */ - control |= 0xAD4B0000UL | (extra_adj << 8); - /* write control and next_adjacent */ - desc->control = cpu_to_le32(control); -} - -/* xdma_desc_control -- Set complete control field of a descriptor. */ -static void xdma_desc_control_set(struct xdma_desc *first, u32 control_field) -{ - /* remember magic and adjacent number */ - u32 control = le32_to_cpu(first->control) & ~(LS_BYTE_MASK); - - BUG_ON(control_field & ~(LS_BYTE_MASK)); - /* merge adjacent and control field */ - control |= control_field; - /* write control and next_adjacent */ - first->control = cpu_to_le32(control); -} - -/* xdma_desc_clear -- Clear bits in control field of a descriptor. */ -static void xdma_desc_control_clear(struct xdma_desc *first, u32 clear_mask) -{ - /* remember magic and adjacent number */ - u32 control = le32_to_cpu(first->control); - - BUG_ON(clear_mask & ~(LS_BYTE_MASK)); - - /* merge adjacent and control field */ - control &= (~clear_mask); - /* write control and next_adjacent */ - first->control = cpu_to_le32(control); -} - -/* xdma_desc_done - recycle cache-coherent linked list of descriptors. - * - * @dev Pointer to pci_dev - * @number Number of descriptors to be allocated - * @desc_virt Pointer to (i.e. virtual address of) first descriptor in list - * @desc_bus Bus address of first descriptor in list - */ -static inline void xdma_desc_done(struct xdma_desc *desc_virt) -{ - memset(desc_virt, 0, XDMA_TRANSFER_MAX_DESC * sizeof(struct xdma_desc)); -} - -/* xdma_desc() - Fill a descriptor with the transfer details - * - * @desc pointer to descriptor to be filled - * @addr root complex address - * @ep_addr end point address - * @len number of bytes, must be a (non-negative) multiple of 4. - * @dir, dma direction - * is the end point address. If zero, vice versa. - * - * Does not modify the next pointer - */ -static void xdma_desc_set(struct xdma_desc *desc, dma_addr_t rc_bus_addr, - u64 ep_addr, int len, int dir) -{ - /* transfer length */ - desc->bytes = cpu_to_le32(len); - if (dir == DMA_TO_DEVICE) { - /* read from root complex memory (source address) */ - desc->src_addr_lo = cpu_to_le32(PCI_DMA_L(rc_bus_addr)); - desc->src_addr_hi = cpu_to_le32(PCI_DMA_H(rc_bus_addr)); - /* write to end point address (destination address) */ - desc->dst_addr_lo = cpu_to_le32(PCI_DMA_L(ep_addr)); - desc->dst_addr_hi = cpu_to_le32(PCI_DMA_H(ep_addr)); - } else { - /* read from end point address (source address) */ - desc->src_addr_lo = cpu_to_le32(PCI_DMA_L(ep_addr)); - desc->src_addr_hi = cpu_to_le32(PCI_DMA_H(ep_addr)); - /* write to root complex memory (destination address) */ - desc->dst_addr_lo = cpu_to_le32(PCI_DMA_L(rc_bus_addr)); - desc->dst_addr_hi = cpu_to_le32(PCI_DMA_H(rc_bus_addr)); - } -} - -/* - * should hold the engine->lock; - */ -static void transfer_abort(struct xdma_engine *engine, - struct xdma_transfer *transfer) -{ - struct xdma_transfer *head; - - BUG_ON(!engine); - BUG_ON(!transfer); - BUG_ON(transfer->desc_num == 0); - - pr_info("abort transfer 0x%p, desc %d, engine desc queued %d.\n", - transfer, transfer->desc_num, engine->desc_dequeued); - - head = list_entry(engine->transfer_list.next, struct xdma_transfer, - entry); - if (head == transfer) - list_del(engine->transfer_list.next); - else - pr_info("engine %s, transfer 0x%p NOT found, 0x%p.\n", - engine->name, transfer, head); - - if (transfer->state == TRANSFER_STATE_SUBMITTED) - transfer->state = TRANSFER_STATE_ABORTED; -} - -/* transfer_queue() - Queue a DMA transfer on the engine - * - * @engine DMA engine doing the transfer - * @transfer DMA transfer submitted to the engine - * - * Takes and releases the engine spinlock - */ -static int transfer_queue(struct xdma_engine *engine, - struct xdma_transfer *transfer) -{ - int rv = 0; - struct xdma_transfer *transfer_started; - struct xdma_dev *xdev; - unsigned long flags; - - BUG_ON(!engine); - BUG_ON(!engine->xdev); - BUG_ON(!transfer); - BUG_ON(transfer->desc_num == 0); - dbg_tfr("transfer_queue(transfer=0x%p).\n", transfer); - - xdev = engine->xdev; - if (xdma_device_flag_check(xdev, XDEV_FLAG_OFFLINE)) { - pr_info("dev 0x%p offline, transfer 0x%p not queued.\n", - xdev, transfer); - return -EBUSY; - } - - /* lock the engine state */ - spin_lock_irqsave(&engine->lock, flags); - - engine->prev_cpu = get_cpu(); - put_cpu(); - - /* engine is being shutdown; do not accept new transfers */ - if (engine->shutdown & ENGINE_SHUTDOWN_REQUEST) { - pr_info("engine %s offline, transfer 0x%p not queued.\n", - engine->name, transfer); - rv = -EBUSY; - goto shutdown; - } - - /* mark the transfer as submitted */ - transfer->state = TRANSFER_STATE_SUBMITTED; - /* add transfer to the tail of the engine transfer queue */ - list_add_tail(&transfer->entry, &engine->transfer_list); - - /* engine is idle? */ - if (!engine->running) { - /* start engine */ - dbg_tfr("transfer_queue(): starting %s engine.\n", - engine->name); - transfer_started = engine_start(engine); - dbg_tfr("transfer=0x%p started %s engine with transfer 0x%p.\n", - transfer, engine->name, transfer_started); - } else { - dbg_tfr("transfer=0x%p queued, with %s engine running.\n", - transfer, engine->name); - } - -shutdown: - /* unlock the engine state */ - dbg_tfr("engine->running = %d\n", engine->running); - spin_unlock_irqrestore(&engine->lock, flags); - return rv; -} - -static void engine_alignments(struct xdma_engine *engine) -{ - u32 w; - u32 align_bytes; - u32 granularity_bytes; - u32 address_bits; - - w = read_register(&engine->regs->alignments); - dbg_init("engine %p name %s alignments=0x%08x\n", engine, - engine->name, (int)w); - - /* RTO - add some macros to extract these fields */ - align_bytes = (w & 0x00ff0000U) >> 16; - granularity_bytes = (w & 0x0000ff00U) >> 8; - address_bits = (w & 0x000000ffU); - - dbg_init("align_bytes = %d\n", align_bytes); - dbg_init("granularity_bytes = %d\n", granularity_bytes); - dbg_init("address_bits = %d\n", address_bits); - - if (w) { - engine->addr_align = align_bytes; - engine->len_granularity = granularity_bytes; - engine->addr_bits = address_bits; - } else { - /* Some default values if alignments are unspecified */ - engine->addr_align = 1; - engine->len_granularity = 1; - engine->addr_bits = 64; - } -} - -static void engine_free_resource(struct xdma_engine *engine) -{ - struct xdma_dev *xdev = engine->xdev; - - /* Release memory use for descriptor writebacks */ - if (engine->poll_mode_addr_virt) { - dbg_sg("Releasing memory for descriptor writeback\n"); - dma_free_coherent(&xdev->pdev->dev, - sizeof(struct xdma_poll_wb), - engine->poll_mode_addr_virt, - engine->poll_mode_bus); - dbg_sg("Released memory for descriptor writeback\n"); - engine->poll_mode_addr_virt = NULL; - } - - if (engine->desc) { - dbg_init("device %s, engine %s pre-alloc desc 0x%p,0x%llx.\n", - dev_name(&xdev->pdev->dev), engine->name, - engine->desc, engine->desc_bus); - dma_free_coherent(&xdev->pdev->dev, - XDMA_TRANSFER_MAX_DESC * sizeof(struct xdma_desc), - engine->desc, engine->desc_bus); - engine->desc = NULL; - } - - if (engine->cyclic_result) { - dma_free_coherent(&xdev->pdev->dev, - CYCLIC_RX_PAGES_MAX * sizeof(struct xdma_result), - engine->cyclic_result, engine->cyclic_result_bus); - engine->cyclic_result = NULL; - } -} - -static void engine_destroy(struct xdma_dev *xdev, struct xdma_engine *engine) -{ - BUG_ON(!xdev); - BUG_ON(!engine); - - dbg_sg("Shutting down engine %s%d", engine->name, engine->channel); - - /* Disable interrupts to stop processing new events during shutdown */ - write_register(0x0, &engine->regs->interrupt_enable_mask, - (unsigned long)(&engine->regs->interrupt_enable_mask) - - (unsigned long)(&engine->regs)); - - if (enable_credit_mp && engine->streaming && - engine->dir == DMA_FROM_DEVICE) { - u32 reg_value = (0x1 << engine->channel) << 16; - struct sgdma_common_regs *reg = (struct sgdma_common_regs *) - (xdev->bar[xdev->config_bar_idx] + - (0x6*TARGET_SPACING)); - write_register(reg_value, ®->credit_mode_enable_w1c, 0); - } - - /* Release memory use for descriptor writebacks */ - engine_free_resource(engine); - - memset(engine, 0, sizeof(struct xdma_engine)); - /* Decrement the number of engines available */ - xdev->engines_num--; -} - -/** - *engine_cyclic_stop() - stop a cyclic transfer running on an SG DMA engine - * - *engine->lock must be taken - */ -struct xdma_transfer *engine_cyclic_stop(struct xdma_engine *engine) -{ - struct xdma_transfer *transfer = 0; - - /* transfers on queue? */ - if (!list_empty(&engine->transfer_list)) { - /* pick first transfer on the queue (was submitted to engine) */ - transfer = list_entry(engine->transfer_list.next, - struct xdma_transfer, entry); - BUG_ON(!transfer); - - xdma_engine_stop(engine); - - if (transfer->cyclic) { - if (engine->xdma_perf) - dbg_perf("Stopping perf transfer on %s\n", - engine->name); - else - dbg_perf("Stopping cyclic transfer on %s\n", - engine->name); - /* make sure the handler sees correct transfer state */ - transfer->cyclic = 1; - /* - * set STOP flag and interrupt on completion, on the - * last descriptor - */ - xdma_desc_control_set( - transfer->desc_virt + transfer->desc_num - 1, - XDMA_DESC_COMPLETED | XDMA_DESC_STOPPED); - } else { - dbg_sg("(engine=%p) running transfer is not cyclic\n", - engine); - } - } else { - dbg_sg("(engine=%p) found not running transfer.\n", engine); - } - return transfer; -} -EXPORT_SYMBOL_GPL(engine_cyclic_stop); - -static int engine_writeback_setup(struct xdma_engine *engine) -{ - u32 w; - struct xdma_dev *xdev; - struct xdma_poll_wb *writeback; - - BUG_ON(!engine); - xdev = engine->xdev; - BUG_ON(!xdev); - - /* - * RTO - doing the allocation per engine is wasteful since a full page - * is allocated each time - better to allocate one page for the whole - * device during probe() and set per-engine offsets here - */ - writeback = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; - writeback->completed_desc_count = 0; - - dbg_init("Setting writeback location to 0x%llx for engine %p", - engine->poll_mode_bus, engine); - w = cpu_to_le32(PCI_DMA_L(engine->poll_mode_bus)); - write_register(w, &engine->regs->poll_mode_wb_lo, - (unsigned long)(&engine->regs->poll_mode_wb_lo) - - (unsigned long)(&engine->regs)); - w = cpu_to_le32(PCI_DMA_H(engine->poll_mode_bus)); - write_register(w, &engine->regs->poll_mode_wb_hi, - (unsigned long)(&engine->regs->poll_mode_wb_hi) - - (unsigned long)(&engine->regs)); - - return 0; -} - - -/* engine_create() - Create an SG DMA engine bookkeeping data structure - * - * An SG DMA engine consists of the resources for a single-direction transfer - * queue; the SG DMA hardware, the software queue and interrupt handling. - * - * @dev Pointer to pci_dev - * @offset byte address offset in BAR[xdev->config_bar_idx] resource for the - * SG DMA * controller registers. - * @dir: DMA_TO/FROM_DEVICE - * @streaming Whether the engine is attached to AXI ST (rather than MM) - */ -static int engine_init_regs(struct xdma_engine *engine) -{ - u32 reg_value; - int rv = 0; - - write_register(XDMA_CTRL_NON_INCR_ADDR, &engine->regs->control_w1c, - (unsigned long)(&engine->regs->control_w1c) - - (unsigned long)(&engine->regs)); - - engine_alignments(engine); - - /* Configure error interrupts by default */ - reg_value = XDMA_CTRL_IE_DESC_ALIGN_MISMATCH; - reg_value |= XDMA_CTRL_IE_MAGIC_STOPPED; - reg_value |= XDMA_CTRL_IE_MAGIC_STOPPED; - reg_value |= XDMA_CTRL_IE_READ_ERROR; - reg_value |= XDMA_CTRL_IE_DESC_ERROR; - - /* if using polled mode, configure writeback address */ - if (poll_mode) { - rv = engine_writeback_setup(engine); - if (rv) { - dbg_init("%s descr writeback setup failed.\n", - engine->name); - goto fail_wb; - } - } else { - /* enable the relevant completion interrupts */ - reg_value |= XDMA_CTRL_IE_DESC_STOPPED; - reg_value |= XDMA_CTRL_IE_DESC_COMPLETED; - - if (engine->streaming && engine->dir == DMA_FROM_DEVICE) - reg_value |= XDMA_CTRL_IE_IDLE_STOPPED; - } - - /* Apply engine configurations */ - write_register(reg_value, &engine->regs->interrupt_enable_mask, - (unsigned long)(&engine->regs->interrupt_enable_mask) - - (unsigned long)(&engine->regs)); - - engine->interrupt_enable_mask_value = reg_value; - - /* only enable credit mode for AXI-ST C2H */ - if (enable_credit_mp && engine->streaming && - engine->dir == DMA_FROM_DEVICE) { - - struct xdma_dev *xdev = engine->xdev; - u32 reg_value = (0x1 << engine->channel) << 16; - struct sgdma_common_regs *reg = (struct sgdma_common_regs *) - (xdev->bar[xdev->config_bar_idx] + - (0x6*TARGET_SPACING)); - - write_register(reg_value, ®->credit_mode_enable_w1s, 0); - } - - return 0; - -fail_wb: - return rv; -} - -static int engine_alloc_resource(struct xdma_engine *engine) -{ - struct xdma_dev *xdev = engine->xdev; - - engine->desc = dma_alloc_coherent(&xdev->pdev->dev, - XDMA_TRANSFER_MAX_DESC * sizeof(struct xdma_desc), - &engine->desc_bus, GFP_KERNEL); - if (!engine->desc) { - pr_warn("dev %s, %s pre-alloc desc OOM.\n", - dev_name(&xdev->pdev->dev), engine->name); - goto err_out; - } - - if (poll_mode) { - engine->poll_mode_addr_virt = dma_alloc_coherent( - &xdev->pdev->dev, - sizeof(struct xdma_poll_wb), - &engine->poll_mode_bus, GFP_KERNEL); - if (!engine->poll_mode_addr_virt) { - pr_warn("%s, %s poll pre-alloc writeback OOM.\n", - dev_name(&xdev->pdev->dev), engine->name); - goto err_out; - } - } - - if (engine->streaming && engine->dir == DMA_FROM_DEVICE) { - engine->cyclic_result = dma_alloc_coherent(&xdev->pdev->dev, - CYCLIC_RX_PAGES_MAX * sizeof(struct xdma_result), - &engine->cyclic_result_bus, GFP_KERNEL); - - if (!engine->cyclic_result) { - pr_warn("%s, %s pre-alloc result OOM.\n", - dev_name(&xdev->pdev->dev), engine->name); - goto err_out; - } - } - - return 0; - -err_out: - engine_free_resource(engine); - return -ENOMEM; -} - -static int engine_init(struct xdma_engine *engine, struct xdma_dev *xdev, - int offset, enum dma_data_direction dir, int channel) -{ - int rv; - u32 val; - - dbg_init("channel %d, offset 0x%x, dir %d.\n", channel, offset, dir); - - /* set magic */ - engine->magic = MAGIC_ENGINE; - - engine->channel = channel; - - /* engine interrupt request bit */ - engine->irq_bitmask = (1 << XDMA_ENG_IRQ_NUM) - 1; - engine->irq_bitmask <<= (xdev->engines_num * XDMA_ENG_IRQ_NUM); - engine->bypass_offset = xdev->engines_num * BYPASS_MODE_SPACING; - - /* parent */ - engine->xdev = xdev; - /* register address */ - engine->regs = (xdev->bar[xdev->config_bar_idx] + offset); - engine->sgdma_regs = xdev->bar[xdev->config_bar_idx] + offset + - SGDMA_OFFSET_FROM_CHANNEL; - val = read_register(&engine->regs->identifier); - if (val & 0x8000U) - engine->streaming = 1; - - /* remember SG DMA direction */ - engine->dir = dir; - sprintf(engine->name, "%d-%s%d-%s", xdev->idx, - (dir == DMA_TO_DEVICE) ? "H2C" : "C2H", channel, - engine->streaming ? "ST" : "MM"); - - dbg_init("engine %p name %s irq_bitmask=0x%08x\n", engine, engine->name, - (int)engine->irq_bitmask); - - /* initialize the deferred work for transfer completion */ - INIT_WORK(&engine->work, engine_service_work); - - if (dir == DMA_TO_DEVICE) - xdev->mask_irq_h2c |= engine->irq_bitmask; - else - xdev->mask_irq_c2h |= engine->irq_bitmask; - xdev->engines_num++; - - rv = engine_alloc_resource(engine); - if (rv) - return rv; - - rv = engine_init_regs(engine); - if (rv) - return rv; - - return 0; -} - -/* transfer_destroy() - free transfer */ -static void transfer_destroy(struct xdma_dev *xdev, struct xdma_transfer *xfer) -{ - /* free descriptors */ - xdma_desc_done(xfer->desc_virt); - - if (xfer->last_in_request && (xfer->flags & XFER_FLAG_NEED_UNMAP)) { - struct sg_table *sgt = xfer->sgt; - - if (sgt->nents) { - pci_unmap_sg(xdev->pdev, sgt->sgl, sgt->nents, - xfer->dir); - sgt->nents = 0; - } - } -} - -static int transfer_build(struct xdma_engine *engine, - struct xdma_request_cb *req, unsigned int desc_max) -{ - struct xdma_transfer *xfer = &req->xfer; - struct sw_desc *sdesc = &(req->sdesc[req->sw_desc_idx]); - int i = 0; - int j = 0; - - for (; i < desc_max; i++, j++, sdesc++) { - dbg_desc("sw desc %d/%u: 0x%llx, 0x%x, ep 0x%llx.\n", - i + req->sw_desc_idx, req->sw_desc_cnt, - sdesc->addr, sdesc->len, req->ep_addr); - - /* fill in descriptor entry j with transfer details */ - xdma_desc_set(xfer->desc_virt + j, sdesc->addr, req->ep_addr, - sdesc->len, xfer->dir); - xfer->len += sdesc->len; - - /* for non-inc-add mode don't increment ep_addr */ - if (!engine->non_incr_addr) - req->ep_addr += sdesc->len; - } - req->sw_desc_idx += desc_max; - return 0; -} - -static int transfer_init(struct xdma_engine *engine, struct xdma_request_cb *req) -{ - struct xdma_transfer *xfer = &req->xfer; - unsigned int desc_max = min_t(unsigned int, - req->sw_desc_cnt - req->sw_desc_idx, - XDMA_TRANSFER_MAX_DESC); - int i = 0; - int last = 0; - u32 control; - - memset(xfer, 0, sizeof(*xfer)); - - /* initialize wait queue */ - init_waitqueue_head(&xfer->wq); - - /* remember direction of transfer */ - xfer->dir = engine->dir; - - xfer->desc_virt = engine->desc; - xfer->desc_bus = engine->desc_bus; - - transfer_desc_init(xfer, desc_max); - - dbg_sg("transfer->desc_bus = 0x%llx.\n", (u64)xfer->desc_bus); - - transfer_build(engine, req, desc_max); - - /* terminate last descriptor */ - last = desc_max - 1; - xdma_desc_link(xfer->desc_virt + last, 0, 0); - /* stop engine, EOP for AXI ST, req IRQ on last descriptor */ - control = XDMA_DESC_STOPPED; - control |= XDMA_DESC_EOP; - control |= XDMA_DESC_COMPLETED; - xdma_desc_control_set(xfer->desc_virt + last, control); - - xfer->desc_num = xfer->desc_adjacent = desc_max; - - dbg_sg("transfer 0x%p has %d descriptors\n", xfer, xfer->desc_num); - /* fill in adjacent numbers */ - for (i = 0; i < xfer->desc_num; i++) - xdma_desc_adjacent(xfer->desc_virt + i, xfer->desc_num - i - 1); - - return 0; -} - -#ifdef __LIBXDMA_DEBUG__ -static void sgt_dump(struct sg_table *sgt) -{ - int i; - struct scatterlist *sg = sgt->sgl; - - pr_info("sgt 0x%p, sgl 0x%p, nents %u/%u.\n", - sgt, sgt->sgl, sgt->nents, sgt->orig_nents); - - for (i = 0; i < sgt->orig_nents; i++, sg = sg_next(sg)) - pr_info("%d, 0x%p, pg 0x%p,%u+%u, dma 0x%llx,%u.\n", - i, sg, sg_page(sg), sg->offset, sg->length, - sg_dma_address(sg), sg_dma_len(sg)); -} - -static void xdma_request_cb_dump(struct xdma_request_cb *req) -{ - int i; - - pr_info("request 0x%p, total %u, ep 0x%llx, sw_desc %u, sgt 0x%p.\n", - req, req->total_len, req->ep_addr, req->sw_desc_cnt, req->sgt); - sgt_dump(req->sgt); - for (i = 0; i < req->sw_desc_cnt; i++) - pr_info("%d/%u, 0x%llx, %u.\n", - i, req->sw_desc_cnt, req->sdesc[i].addr, - req->sdesc[i].len); -} -#endif - -static void xdma_request_free(struct xdma_request_cb *req) -{ - if (((unsigned long)req) >= VMALLOC_START && - ((unsigned long)req) < VMALLOC_END) - vfree(req); - else - kfree(req); -} - -static struct xdma_request_cb * xdma_request_alloc(unsigned int sdesc_nr) -{ - struct xdma_request_cb *req; - unsigned int size = sizeof(struct xdma_request_cb) + - sdesc_nr * sizeof(struct sw_desc); - - req = kzalloc(size, GFP_KERNEL); - if (!req) { - req = vmalloc(size); - if (req) - memset(req, 0, size); - } - if (!req) { - pr_info("OOM, %u sw_desc, %u.\n", sdesc_nr, size); - return NULL; - } - - return req; -} - -static struct xdma_request_cb * xdma_init_request(struct sg_table *sgt, - u64 ep_addr) -{ - struct xdma_request_cb *req; - struct scatterlist *sg = sgt->sgl; - int max = sgt->nents; - int extra = 0; - int i, j = 0; - - for (i = 0; i < max; i++, sg = sg_next(sg)) { - unsigned int len = sg_dma_len(sg); - - if (unlikely(len > XDMA_DESC_BLEN_MAX)) - extra += len >> XDMA_DESC_BLEN_BITS; - } - -//pr_info("ep 0x%llx, desc %u+%u.\n", ep_addr, max, extra); - - max += extra; - req = xdma_request_alloc(max); - if (!req) - return NULL; - - req->sgt = sgt; - req->ep_addr = ep_addr; - - for (i = 0, sg = sgt->sgl; i < sgt->nents; i++, sg = sg_next(sg)) { - unsigned int tlen = sg_dma_len(sg); - dma_addr_t addr = sg_dma_address(sg); - - req->total_len += tlen; - while (tlen) { - req->sdesc[j].addr = addr; - if (tlen > XDMA_DESC_BLEN_MAX) { - req->sdesc[j].len = XDMA_DESC_BLEN_MAX; - addr += XDMA_DESC_BLEN_MAX; - tlen -= XDMA_DESC_BLEN_MAX; - } else { - req->sdesc[j].len = tlen; - tlen = 0; - } - j++; - } - } - BUG_ON(j > max); - - req->sw_desc_cnt = j; -#ifdef __LIBXDMA_DEBUG__ - xdma_request_cb_dump(req); -#endif - return req; -} - -ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, - struct sg_table *sgt, bool dma_mapped, int timeout_ms) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - struct xdma_engine *engine; - int rv = 0; - ssize_t done = 0; - struct scatterlist *sg = sgt->sgl; - int nents; - enum dma_data_direction dir = write ? DMA_TO_DEVICE : DMA_FROM_DEVICE; - struct xdma_request_cb *req = NULL; - - if (!dev_hndl) - return -EINVAL; - - if (debug_check_dev_hndl(__func__, xdev->pdev, dev_hndl) < 0) - return -EINVAL; - - if (write == 1) { - if (channel >= xdev->h2c_channel_max) { - pr_warn("H2C channel %d >= %d.\n", - channel, xdev->h2c_channel_max); - return -EINVAL; - } - engine = &xdev->engine_h2c[channel]; - } else if (write == 0) { - if (channel >= xdev->c2h_channel_max) { - pr_warn("C2H channel %d >= %d.\n", - channel, xdev->c2h_channel_max); - return -EINVAL; - } - engine = &xdev->engine_c2h[channel]; - } else { - pr_warn("write %d, exp. 0|1.\n", write); - return -EINVAL; - } - - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); - - xdev = engine->xdev; - if (xdma_device_flag_check(xdev, XDEV_FLAG_OFFLINE)) { - pr_info("xdev 0x%p, offline.\n", xdev); - return -EBUSY; - } - - /* check the direction */ - if (engine->dir != dir) { - pr_info("0x%p, %s, %d, W %d, 0x%x/0x%x mismatch.\n", - engine, engine->name, channel, write, engine->dir, dir); - return -EINVAL; - } - - if (!dma_mapped) { - nents = pci_map_sg(xdev->pdev, sg, sgt->orig_nents, dir); - if (!nents) { - pr_info("map sgl failed, sgt 0x%p.\n", sgt); - return -EIO; - } - sgt->nents = nents; - } else { - BUG_ON(!sgt->nents); - } - - req = xdma_init_request(sgt, ep_addr); - if (!req) { - rv = -ENOMEM; - goto unmap_sgl; - } - - dbg_tfr("%s, len %u sg cnt %u.\n", - engine->name, req->total_len, req->sw_desc_cnt); - - sg = sgt->sgl; - nents = req->sw_desc_cnt; - while (nents) { - unsigned long flags; - struct xdma_transfer *xfer; - - /* one transfer at a time */ - spin_lock(&engine->desc_lock); - - /* build transfer */ - rv = transfer_init(engine, req); - if (rv < 0) { - spin_unlock(&engine->desc_lock); - goto unmap_sgl; - } - xfer = &req->xfer; - - if (!dma_mapped) - xfer->flags = XFER_FLAG_NEED_UNMAP; - - /* last transfer for the given request? */ - nents -= xfer->desc_num; - if (!nents) { - xfer->last_in_request = 1; - xfer->sgt = sgt; - } - - dbg_tfr("xfer, %u, ep 0x%llx, done %lu, sg %u/%u.\n", - xfer->len, req->ep_addr, done, req->sw_desc_idx, - req->sw_desc_cnt); - -#ifdef __LIBXDMA_DEBUG__ - transfer_dump(xfer); -#endif - - rv = transfer_queue(engine, xfer); - if (rv < 0) { - spin_unlock(&engine->desc_lock); - pr_info("unable to submit %s, %d.\n", engine->name, rv); - goto unmap_sgl; - } - - /* - * When polling, determine how many descriptors have been queued * on the engine to determine the writeback value expected - */ - if (poll_mode) { - unsigned int desc_count; - - spin_lock_irqsave(&engine->lock, flags); - desc_count = xfer->desc_num; - spin_unlock_irqrestore(&engine->lock, flags); - - dbg_tfr("%s poll desc_count=%d\n", - engine->name, desc_count); - rv = engine_service_poll(engine, desc_count); - - } else { - rv = wait_event_interruptible_timeout(xfer->wq, - (xfer->state != TRANSFER_STATE_SUBMITTED), - msecs_to_jiffies(timeout_ms)); - } - - spin_lock_irqsave(&engine->lock, flags); - - switch(xfer->state) { - case TRANSFER_STATE_COMPLETED: - spin_unlock_irqrestore(&engine->lock, flags); - - dbg_tfr("transfer %p, %u, ep 0x%llx compl, +%lu.\n", - xfer, xfer->len, req->ep_addr - xfer->len, done); - done += xfer->len; - rv = 0; - break; - case TRANSFER_STATE_FAILED: - pr_info("xfer 0x%p,%u, failed, ep 0x%llx.\n", - xfer, xfer->len, req->ep_addr - xfer->len); - spin_unlock_irqrestore(&engine->lock, flags); - -#ifdef __LIBXDMA_DEBUG__ - transfer_dump(xfer); - sgt_dump(sgt); -#endif - rv = -EIO; - break; - default: - /* transfer can still be in-flight */ - pr_info("xfer 0x%p,%u, s 0x%x timed out, ep 0x%llx.\n", - xfer, xfer->len, xfer->state, req->ep_addr); - engine_status_read(engine, 0, 1); - //engine_status_dump(engine); - transfer_abort(engine, xfer); - - xdma_engine_stop(engine); - spin_unlock_irqrestore(&engine->lock, flags); - -#ifdef __LIBXDMA_DEBUG__ - transfer_dump(xfer); - sgt_dump(sgt); -#endif - rv = -ERESTARTSYS; - break; - } - transfer_destroy(xdev, xfer); - spin_unlock(&engine->desc_lock); - - if (rv < 0) - goto unmap_sgl; - } /* while (sg) */ - -unmap_sgl: - if (!dma_mapped && sgt->nents) { - pci_unmap_sg(xdev->pdev, sgt->sgl, sgt->orig_nents, dir); - sgt->nents = 0; - } - - if (req) - xdma_request_free(req); - - if (rv < 0) - return rv; - - return done; -} -EXPORT_SYMBOL_GPL(xdma_xfer_submit); - -int xdma_performance_submit(struct xdma_dev *xdev, struct xdma_engine *engine) -{ - u8 *buffer_virt; - u32 max_consistent_size = 128 * 32 * 1024; /* 1024 pages, 4MB */ - dma_addr_t buffer_bus; /* bus address */ - struct xdma_transfer *transfer; - u64 ep_addr = 0; - int num_desc_in_a_loop = 128; - int size_in_desc = engine->xdma_perf->transfer_size; - int size = size_in_desc * num_desc_in_a_loop; - int i; - - BUG_ON(size_in_desc > max_consistent_size); - - if (size > max_consistent_size) { - size = max_consistent_size; - num_desc_in_a_loop = size / size_in_desc; - } - - buffer_virt = dma_alloc_coherent(&xdev->pdev->dev, size, - &buffer_bus, GFP_KERNEL); - - /* allocate transfer data structure */ - transfer = kzalloc(sizeof(struct xdma_transfer), GFP_KERNEL); - BUG_ON(!transfer); - - /* 0 = write engine (to_dev=0) , 1 = read engine (to_dev=1) */ - transfer->dir = engine->dir; - /* set number of descriptors */ - transfer->desc_num = num_desc_in_a_loop; - - /* allocate descriptor list */ - if (!engine->desc) { - engine->desc = dma_alloc_coherent(&xdev->pdev->dev, - num_desc_in_a_loop * sizeof(struct xdma_desc), - &engine->desc_bus, GFP_KERNEL); - BUG_ON(!engine->desc); - dbg_init("device %s, engine %s pre-alloc desc 0x%p,0x%llx.\n", - dev_name(&xdev->pdev->dev), engine->name, - engine->desc, engine->desc_bus); - } - transfer->desc_virt = engine->desc; - transfer->desc_bus = engine->desc_bus; - - transfer_desc_init(transfer, transfer->desc_num); - - dbg_sg("transfer->desc_bus = 0x%llx.\n", (u64)transfer->desc_bus); - - for (i = 0; i < transfer->desc_num; i++) { - struct xdma_desc *desc = transfer->desc_virt + i; - dma_addr_t rc_bus_addr = buffer_bus + size_in_desc * i; - - /* fill in descriptor entry with transfer details */ - xdma_desc_set(desc, rc_bus_addr, ep_addr, size_in_desc, - engine->dir); - } - - /* stop engine and request interrupt on last descriptor */ - xdma_desc_control_set(transfer->desc_virt, 0); - /* create a linked loop */ - xdma_desc_link(transfer->desc_virt + transfer->desc_num - 1, - transfer->desc_virt, transfer->desc_bus); - - transfer->cyclic = 1; - - /* initialize wait queue */ - init_waitqueue_head(&transfer->wq); - - //printk("=== Descriptor print for PERF \n"); - //transfer_dump(transfer); - - dbg_perf("Queueing XDMA I/O %s request for performance measurement.\n", - engine->dir ? "write (to dev)" : "read (from dev)"); - transfer_queue(engine, transfer); - return 0; - -} -EXPORT_SYMBOL_GPL(xdma_performance_submit); - -static struct xdma_dev *alloc_dev_instance(struct pci_dev *pdev) -{ - int i; - struct xdma_dev *xdev; - struct xdma_engine *engine; - - BUG_ON(!pdev); - - /* allocate zeroed device book keeping structure */ - xdev = kzalloc(sizeof(struct xdma_dev), GFP_KERNEL); - if (!xdev) { - pr_info("OOM, xdma_dev.\n"); - return NULL; - } - spin_lock_init(&xdev->lock); - - xdev->magic = MAGIC_DEVICE; - xdev->config_bar_idx = -1; - xdev->user_bar_idx = -1; - xdev->bypass_bar_idx = -1; - xdev->irq_line = -1; - - /* create a driver to device reference */ - xdev->pdev = pdev; - dbg_init("xdev = 0x%p\n", xdev); - - /* Set up data user IRQ data structures */ - for (i = 0; i < 16; i++) { - xdev->user_irq[i].xdev = xdev; - spin_lock_init(&xdev->user_irq[i].events_lock); - init_waitqueue_head(&xdev->user_irq[i].events_wq); - xdev->user_irq[i].handler = NULL; - xdev->user_irq[i].user_idx = i; /* 0 based */ - } - - engine = xdev->engine_h2c; - for (i = 0; i < XDMA_CHANNEL_NUM_MAX; i++, engine++) { - spin_lock_init(&engine->lock); - spin_lock_init(&engine->desc_lock); - INIT_LIST_HEAD(&engine->transfer_list); - init_waitqueue_head(&engine->shutdown_wq); - init_waitqueue_head(&engine->xdma_perf_wq); - } - - engine = xdev->engine_c2h; - for (i = 0; i < XDMA_CHANNEL_NUM_MAX; i++, engine++) { - spin_lock_init(&engine->lock); - spin_lock_init(&engine->desc_lock); - INIT_LIST_HEAD(&engine->transfer_list); - init_waitqueue_head(&engine->shutdown_wq); - init_waitqueue_head(&engine->xdma_perf_wq); - } - - return xdev; -} - -static int request_regions(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - int rv; - - BUG_ON(!xdev); - BUG_ON(!pdev); - - dbg_init("pci_request_regions()\n"); - rv = pci_request_regions(pdev, xdev->mod_name); - /* could not request all regions? */ - if (rv) { - dbg_init("pci_request_regions() = %d, device in use?\n", rv); - /* assume device is in use so do not disable it later */ - xdev->regions_in_use = 1; - } else { - xdev->got_regions = 1; - } - - return rv; -} - -static int set_dma_mask(struct pci_dev *pdev) -{ - BUG_ON(!pdev); - - dbg_init("sizeof(dma_addr_t) == %ld\n", sizeof(dma_addr_t)); - /* 64-bit addressing capability for XDMA? */ - if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - /* query for DMA transfer */ - /* @see Documentation/DMA-mapping.txt */ - dbg_init("pci_set_dma_mask()\n"); - /* use 64-bit DMA */ - dbg_init("Using a 64-bit DMA mask.\n"); - /* use 32-bit DMA for descriptors */ - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - /* use 64-bit DMA, 32-bit for consistent */ - } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { - dbg_init("Could not set 64-bit DMA mask.\n"); - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - /* use 32-bit DMA */ - dbg_init("Using a 32-bit DMA mask.\n"); - } else { - dbg_init("No suitable DMA possible.\n"); - return -EINVAL; - } - - return 0; -} - -static u32 get_engine_channel_id(struct engine_regs *regs) -{ - u32 value; - - BUG_ON(!regs); - - value = read_register(®s->identifier); - - return (value & 0x00000f00U) >> 8; -} - -static u32 get_engine_id(struct engine_regs *regs) -{ - u32 value; - - BUG_ON(!regs); - - value = read_register(®s->identifier); - return (value & 0xffff0000U) >> 16; -} - -static void remove_engines(struct xdma_dev *xdev) -{ - struct xdma_engine *engine; - int i; - - BUG_ON(!xdev); - - /* iterate over channels */ - for (i = 0; i < xdev->h2c_channel_max; i++) { - engine = &xdev->engine_h2c[i]; - if (engine->magic == MAGIC_ENGINE) { - dbg_sg("Remove %s, %d", engine->name, i); - engine_destroy(xdev, engine); - dbg_sg("%s, %d removed", engine->name, i); - } - } - - for (i = 0; i < xdev->c2h_channel_max; i++) { - engine = &xdev->engine_c2h[i]; - if (engine->magic == MAGIC_ENGINE) { - dbg_sg("Remove %s, %d", engine->name, i); - engine_destroy(xdev, engine); - dbg_sg("%s, %d removed", engine->name, i); - } - } -} - -static int probe_for_engine(struct xdma_dev *xdev, enum dma_data_direction dir, - int channel) -{ - struct engine_regs *regs; - int offset = channel * CHANNEL_SPACING; - u32 engine_id; - u32 engine_id_expected; - u32 channel_id; - struct xdma_engine *engine; - int rv; - - /* register offset for the engine */ - /* read channels at 0x0000, write channels at 0x1000, - * channels at 0x100 interval */ - if (dir == DMA_TO_DEVICE) { - engine_id_expected = XDMA_ID_H2C; - engine = &xdev->engine_h2c[channel]; - } else { - offset += H2C_CHANNEL_OFFSET; - engine_id_expected = XDMA_ID_C2H; - engine = &xdev->engine_c2h[channel]; - } - - regs = xdev->bar[xdev->config_bar_idx] + offset; - engine_id = get_engine_id(regs); - channel_id = get_engine_channel_id(regs); - - if ((engine_id != engine_id_expected) || (channel_id != channel)) { - dbg_init("%s %d engine, reg off 0x%x, id mismatch 0x%x,0x%x," - "exp 0x%x,0x%x, SKIP.\n", - dir == DMA_TO_DEVICE ? "H2C" : "C2H", - channel, offset, engine_id, channel_id, - engine_id_expected, channel_id != channel); - return -EINVAL; - } - - dbg_init("found AXI %s %d engine, reg. off 0x%x, id 0x%x,0x%x.\n", - dir == DMA_TO_DEVICE ? "H2C" : "C2H", channel, - offset, engine_id, channel_id); - - /* allocate and initialize engine */ - rv = engine_init(engine, xdev, offset, dir, channel); - if (rv != 0) { - pr_info("failed to create AXI %s %d engine.\n", - dir == DMA_TO_DEVICE ? "H2C" : "C2H", - channel); - return rv; - } - - return 0; -} - -static int probe_engines(struct xdma_dev *xdev) -{ - int i; - int rv = 0; - - BUG_ON(!xdev); - - /* iterate over channels */ - for (i = 0; i < xdev->h2c_channel_max; i++) { - rv = probe_for_engine(xdev, DMA_TO_DEVICE, i); - if (rv) - break; - } - xdev->h2c_channel_max = i; - - for (i = 0; i < xdev->c2h_channel_max; i++) { - rv = probe_for_engine(xdev, DMA_FROM_DEVICE, i); - if (rv) - break; - } - xdev->c2h_channel_max = i; - - return 0; -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) -static void pci_enable_relaxed_ordering(struct pci_dev *pdev) -{ - pcie_capability_set_word(pdev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN); -} -#else -static void pci_enable_relaxed_ordering(struct pci_dev *pdev) -{ - u16 v; - int pos; - - pos = pci_pcie_cap(pdev); - if (pos > 0) { - pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &v); - v |= PCI_EXP_DEVCTL_RELAX_EN; - pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, v); - } -} -#endif - -static void pci_check_extended_tag(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - u16 cap; - u32 v; - void *__iomem reg; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) - pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); -#else - int pos; - - pos = pci_pcie_cap(pdev); - if (pos > 0) - pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &cap); - else { - pr_info("pdev 0x%p, unable to access pcie cap.\n", pdev); - return; - } -#endif - - if ((cap & PCI_EXP_DEVCTL_EXT_TAG)) - return; - - /* extended tag not enabled */ - pr_info("0x%p EXT_TAG disabled.\n", pdev); - - if (xdev->config_bar_idx < 0) { - pr_info("pdev 0x%p, xdev 0x%p, config bar UNKNOWN.\n", - pdev, xdev); - return; - } - - reg = xdev->bar[xdev->config_bar_idx] + XDMA_OFS_CONFIG + 0x4C; - v = read_register(reg); - v = (v & 0xFF) | (((u32)32) << 8); - write_register(v, reg, XDMA_OFS_CONFIG + 0x4C); -} - -void *xdma_device_open(const char *mname, struct pci_dev *pdev, int *user_max, - int *h2c_channel_max, int *c2h_channel_max) -{ - struct xdma_dev *xdev = NULL; - int rv = 0; - - pr_info("%s device %s, 0x%p.\n", mname, dev_name(&pdev->dev), pdev); - - /* allocate zeroed device book keeping structure */ - xdev = alloc_dev_instance(pdev); - if (!xdev) - return NULL; - xdev->mod_name = mname; - xdev->user_max = *user_max; - xdev->h2c_channel_max = *h2c_channel_max; - xdev->c2h_channel_max = *c2h_channel_max; - - xdma_device_flag_set(xdev, XDEV_FLAG_OFFLINE); - xdev_list_add(xdev); - - if (xdev->user_max == 0 || xdev->user_max > MAX_USER_IRQ) - xdev->user_max = MAX_USER_IRQ; - if (xdev->h2c_channel_max == 0 || - xdev->h2c_channel_max > XDMA_CHANNEL_NUM_MAX) - xdev->h2c_channel_max = XDMA_CHANNEL_NUM_MAX; - if (xdev->c2h_channel_max == 0 || - xdev->c2h_channel_max > XDMA_CHANNEL_NUM_MAX) - xdev->c2h_channel_max = XDMA_CHANNEL_NUM_MAX; - - rv = pci_enable_device(pdev); - if (rv) { - dbg_init("pci_enable_device() failed, %d.\n", rv); - goto err_enable; - } - - /* keep INTx enabled */ - pci_check_intr_pend(pdev); - - /* enable relaxed ordering */ - pci_enable_relaxed_ordering(pdev); - - pci_check_extended_tag(xdev, pdev); - - /* force MRRS to be 512 */ - rv = pcie_set_readrq(pdev, 512); - if (rv) - pr_info("device %s, error set PCI_EXP_DEVCTL_READRQ: %d.\n", - dev_name(&pdev->dev), rv); - - /* enable bus master capability */ - pci_set_master(pdev); - - rv = request_regions(xdev, pdev); - if (rv) - goto err_regions; - - rv = map_bars(xdev, pdev); - if (rv) - goto err_map; - - rv = set_dma_mask(pdev); - if (rv) - goto err_mask; - - check_nonzero_interrupt_status(xdev); - /* explicitely zero all interrupt enable masks */ - channel_interrupts_disable(xdev, ~0); - user_interrupts_disable(xdev, ~0); - read_interrupts(xdev); - - rv = probe_engines(xdev); - if (rv) - goto err_engines; - - rv = enable_msi_msix(xdev, pdev); - if (rv < 0) - goto err_enable_msix; - - rv = irq_setup(xdev, pdev); - if (rv < 0) - goto err_interrupts; - - if (!poll_mode) - channel_interrupts_enable(xdev, ~0); - - /* Flush writes */ - read_interrupts(xdev); - - *user_max = xdev->user_max; - *h2c_channel_max = xdev->h2c_channel_max; - *c2h_channel_max = xdev->c2h_channel_max; - - xdma_device_flag_clear(xdev, XDEV_FLAG_OFFLINE); - return (void *)xdev; - -err_interrupts: - irq_teardown(xdev); -err_enable_msix: - disable_msi_msix(xdev, pdev); -err_engines: - remove_engines(xdev); -err_mask: - unmap_bars(xdev, pdev); -err_map: - if (xdev->got_regions) - pci_release_regions(pdev); -err_regions: - if (!xdev->regions_in_use) - pci_disable_device(pdev); -err_enable: - xdev_list_remove(xdev); - kfree(xdev); - return NULL; -} -EXPORT_SYMBOL_GPL(xdma_device_open); - -void xdma_device_close(struct pci_dev *pdev, void *dev_hndl) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - - dbg_init("pdev 0x%p, xdev 0x%p.\n", pdev, dev_hndl); - - if (!dev_hndl) - return; - - if (debug_check_dev_hndl(__func__, pdev, dev_hndl) < 0) - return; - - dbg_sg("remove(dev = 0x%p) where pdev->dev.driver_data = 0x%p\n", - pdev, xdev); - if (xdev->pdev != pdev) { - dbg_sg("pci_dev(0x%lx) != pdev(0x%lx)\n", - (unsigned long)xdev->pdev, (unsigned long)pdev); - } - - channel_interrupts_disable(xdev, ~0); - user_interrupts_disable(xdev, ~0); - read_interrupts(xdev); - - irq_teardown(xdev); - disable_msi_msix(xdev, pdev); - - remove_engines(xdev); - unmap_bars(xdev, pdev); - - if (xdev->got_regions) { - dbg_init("pci_release_regions 0x%p.\n", pdev); - pci_release_regions(pdev); - } - - if (!xdev->regions_in_use) { - dbg_init("pci_disable_device 0x%p.\n", pdev); - pci_disable_device(pdev); - } - - xdev_list_remove(xdev); - - kfree(xdev); -} -EXPORT_SYMBOL_GPL(xdma_device_close); - -void xdma_device_offline(struct pci_dev *pdev, void *dev_hndl) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - struct xdma_engine *engine; - int i; - - if (!dev_hndl) - return; - - if (debug_check_dev_hndl(__func__, pdev, dev_hndl) < 0) - return; - -pr_info("pdev 0x%p, xdev 0x%p.\n", pdev, xdev); - xdma_device_flag_set(xdev, XDEV_FLAG_OFFLINE); - - /* wait for all engines to be idle */ - for (i = 0; i < xdev->h2c_channel_max; i++) { - unsigned long flags; - - engine = &xdev->engine_h2c[i]; - - if (engine->magic == MAGIC_ENGINE) { - spin_lock_irqsave(&engine->lock, flags); - engine->shutdown |= ENGINE_SHUTDOWN_REQUEST; - - xdma_engine_stop(engine); - engine->running = 0; - spin_unlock_irqrestore(&engine->lock, flags); - } - } - - for (i = 0; i < xdev->c2h_channel_max; i++) { - unsigned long flags; - - engine = &xdev->engine_c2h[i]; - if (engine->magic == MAGIC_ENGINE) { - spin_lock_irqsave(&engine->lock, flags); - engine->shutdown |= ENGINE_SHUTDOWN_REQUEST; - - xdma_engine_stop(engine); - engine->running = 0; - spin_unlock_irqrestore(&engine->lock, flags); - } - } - - /* turn off interrupts */ - channel_interrupts_disable(xdev, ~0); - user_interrupts_disable(xdev, ~0); - read_interrupts(xdev); - irq_teardown(xdev); - - pr_info("xdev 0x%p, done.\n", xdev); -} -EXPORT_SYMBOL_GPL(xdma_device_offline); - -void xdma_device_online(struct pci_dev *pdev, void *dev_hndl) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - struct xdma_engine *engine; - unsigned long flags; - int i; - - if (!dev_hndl) - return; - - if (debug_check_dev_hndl(__func__, pdev, dev_hndl) < 0) - return; - -pr_info("pdev 0x%p, xdev 0x%p.\n", pdev, xdev); - - for (i = 0; i < xdev->h2c_channel_max; i++) { - engine = &xdev->engine_h2c[i]; - if (engine->magic == MAGIC_ENGINE) { - engine_init_regs(engine); - spin_lock_irqsave(&engine->lock, flags); - engine->shutdown &= ~ENGINE_SHUTDOWN_REQUEST; - spin_unlock_irqrestore(&engine->lock, flags); - } - } - - for (i = 0; i < xdev->c2h_channel_max; i++) { - engine = &xdev->engine_c2h[i]; - if (engine->magic == MAGIC_ENGINE) { - engine_init_regs(engine); - spin_lock_irqsave(&engine->lock, flags); - engine->shutdown &= ~ENGINE_SHUTDOWN_REQUEST; - spin_unlock_irqrestore(&engine->lock, flags); - } - } - - /* re-write the interrupt table */ - if (!poll_mode) { - irq_setup(xdev, pdev); - - channel_interrupts_enable(xdev, ~0); - user_interrupts_enable(xdev, xdev->mask_irq_user); - read_interrupts(xdev); - } - - xdma_device_flag_clear(xdev, XDEV_FLAG_OFFLINE); -pr_info("xdev 0x%p, done.\n", xdev); -} -EXPORT_SYMBOL_GPL(xdma_device_online); - -int xdma_device_restart(struct pci_dev *pdev, void *dev_hndl) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - - if (!dev_hndl) - return -EINVAL; - - if (debug_check_dev_hndl(__func__, pdev, dev_hndl) < 0) - return -EINVAL; - - pr_info("NOT implemented, 0x%p.\n", xdev); - return -EINVAL; -} -EXPORT_SYMBOL_GPL(xdma_device_restart); - -int xdma_user_isr_register(void *dev_hndl, unsigned int mask, - irq_handler_t handler, void *dev) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - int i; - - if (!dev_hndl) - return -EINVAL; - - if (debug_check_dev_hndl(__func__, xdev->pdev, dev_hndl) < 0) - return -EINVAL; - - for (i = 0; i < xdev->user_max && mask; i++) { - unsigned int bit = (1 << i); - - if ((bit & mask) == 0) - continue; - - mask &= ~bit; - xdev->user_irq[i].handler = handler; - xdev->user_irq[i].dev = dev; - } - - return 0; -} -EXPORT_SYMBOL_GPL(xdma_user_isr_register); - -int xdma_user_isr_enable(void *dev_hndl, unsigned int mask) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - - if (!dev_hndl) - return -EINVAL; - - if (debug_check_dev_hndl(__func__, xdev->pdev, dev_hndl) < 0) - return -EINVAL; - - xdev->mask_irq_user |= mask; - /* enable user interrupts */ - user_interrupts_enable(xdev, mask); - read_interrupts(xdev); - - return 0; -} -EXPORT_SYMBOL_GPL(xdma_user_isr_enable); - -int xdma_user_isr_disable(void *dev_hndl, unsigned int mask) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - - if (!dev_hndl) - return -EINVAL; - - if (debug_check_dev_hndl(__func__, xdev->pdev, dev_hndl) < 0) - return -EINVAL; - - xdev->mask_irq_user &= ~mask; - user_interrupts_disable(xdev, mask); - read_interrupts(xdev); - - return 0; -} -EXPORT_SYMBOL_GPL(xdma_user_isr_disable); - -#ifdef __LIBXDMA_MOD__ -static int __init xdma_base_init(void) -{ - printk(KERN_INFO "%s", version); - return 0; -} - -static void __exit xdma_base_exit(void) -{ - return; -} - -module_init(xdma_base_init); -module_exit(xdma_base_exit); -#endif -/* makes an existing transfer cyclic */ -static void xdma_transfer_cyclic(struct xdma_transfer *transfer) -{ - /* link last descriptor to first descriptor */ - xdma_desc_link(transfer->desc_virt + transfer->desc_num - 1, - transfer->desc_virt, transfer->desc_bus); - /* remember transfer is cyclic */ - transfer->cyclic = 1; -} - -static int transfer_monitor_cyclic(struct xdma_engine *engine, - struct xdma_transfer *transfer, int timeout_ms) -{ - struct xdma_result *result; - int rc = 0; - - BUG_ON(!engine); - BUG_ON(!transfer); - - result = engine->cyclic_result; - BUG_ON(!result); - - if (poll_mode) { - int i ; - for (i = 0; i < 5; i++) { - rc = engine_service_poll(engine, 0); - if (rc) { - pr_info("%s service_poll failed %d.\n", - engine->name, rc); - rc = -ERESTARTSYS; - } - if (result[engine->rx_head].status) - return 0; - } - } else { - if (enable_credit_mp){ - dbg_tfr("%s: rx_head=%d,rx_tail=%d, wait ...\n", - engine->name, engine->rx_head, engine->rx_tail); - rc = wait_event_interruptible_timeout( transfer->wq, - (engine->rx_head!=engine->rx_tail || - engine->rx_overrun), - msecs_to_jiffies(timeout_ms)); - dbg_tfr("%s: wait returns %d, rx %d/%d, overrun %d.\n", - engine->name, rc, engine->rx_head, - engine->rx_tail, engine->rx_overrun); - } else { - rc = wait_event_interruptible_timeout( transfer->wq, - engine->eop_found, - msecs_to_jiffies(timeout_ms)); - dbg_tfr("%s: wait returns %d, eop_found %d.\n", - engine->name, rc, engine->eop_found); - } - } - - return 0; -} - -struct scatterlist *sglist_index(struct sg_table *sgt, unsigned int idx) -{ - struct scatterlist *sg = sgt->sgl; - int i; - - if (idx >= sgt->orig_nents) - return NULL; - - if (!idx) - return sg; - - for (i = 0; i < idx; i++, sg = sg_next(sg)) - ; - - return sg; -} - -static int copy_cyclic_to_user(struct xdma_engine *engine, int pkt_length, - int head, char __user *buf, size_t count) -{ - struct scatterlist *sg; - int more = pkt_length; - - BUG_ON(!engine); - BUG_ON(!buf); - - dbg_tfr("%s, pkt_len %d, head %d, user buf idx %u.\n", - engine->name, pkt_length, head, engine->user_buffer_index); - - sg = sglist_index(&engine->cyclic_sgt, head); - if (!sg) { - pr_info("%s, head %d OOR, sgl %u.\n", - engine->name, head, engine->cyclic_sgt.orig_nents); - return -EIO; - } - - /* EOP found? Transfer anything from head to EOP */ - while (more) { - unsigned int copy = more > PAGE_SIZE ? PAGE_SIZE : more; - unsigned int blen = count - engine->user_buffer_index; - int rv; - - if (copy > blen) - copy = blen; - - dbg_tfr("%s sg %d, 0x%p, copy %u to user %u.\n", - engine->name, head, sg, copy, - engine->user_buffer_index); - - rv = copy_to_user(&buf[engine->user_buffer_index], - page_address(sg_page(sg)), copy); - if (rv) { - pr_info("%s copy_to_user %u failed %d\n", - engine->name, copy, rv); - return -EIO; - } - - more -= copy; - engine->user_buffer_index += copy; - - if (engine->user_buffer_index == count) { - /* user buffer used up */ - break; - } - - head++; - if (head >= CYCLIC_RX_PAGES_MAX) { - head = 0; - sg = engine->cyclic_sgt.sgl; - } else - sg = sg_next(sg); - } - - return pkt_length; -} - -static int complete_cyclic(struct xdma_engine *engine, char __user *buf, - size_t count) -{ - struct xdma_result *result; - int pkt_length = 0; - int fault = 0; - int eop = 0; - int head; - int rc = 0; - int num_credit = 0; - unsigned long flags; - - BUG_ON(!engine); - result = engine->cyclic_result; - BUG_ON(!result); - - spin_lock_irqsave(&engine->lock, flags); - - /* where the host currently is in the ring buffer */ - head = engine->rx_head; - - /* iterate over newly received results */ - while (engine->rx_head != engine->rx_tail||engine->rx_overrun) { - - WARN_ON(result[engine->rx_head].status==0); - - dbg_tfr("%s, result[%d].status = 0x%x length = 0x%x.\n", - engine->name, engine->rx_head, - result[engine->rx_head].status, - result[engine->rx_head].length); - - if ((result[engine->rx_head].status >> 16) != C2H_WB) { - pr_info("%s, result[%d].status 0x%x, no magic.\n", - engine->name, engine->rx_head, - result[engine->rx_head].status); - fault = 1; - } else if (result[engine->rx_head].length > PAGE_SIZE) { - pr_info("%s, result[%d].len 0x%x, > PAGE_SIZE 0x%lx.\n", - engine->name, engine->rx_head, - result[engine->rx_head].length, PAGE_SIZE); - fault = 1; - } else if (result[engine->rx_head].length == 0) { - pr_info("%s, result[%d].length 0x%x.\n", - engine->name, engine->rx_head, - result[engine->rx_head].length); - fault = 1; - /* valid result */ - } else { - pkt_length += result[engine->rx_head].length; - num_credit++; - /* seen eop? */ - //if (result[engine->rx_head].status & RX_STATUS_EOP) - if (result[engine->rx_head].status & RX_STATUS_EOP){ - eop = 1; - engine->eop_found = 1; - } - - dbg_tfr("%s, pkt_length=%d (%s)\n", - engine->name, pkt_length, - eop ? "with EOP" : "no EOP yet"); - } - /* clear result */ - result[engine->rx_head].status = 0; - result[engine->rx_head].length = 0; - /* proceed head pointer so we make progress, even when fault */ - engine->rx_head = (engine->rx_head + 1) % CYCLIC_RX_PAGES_MAX; - - /* stop processing if a fault/eop was detected */ - if (fault || eop){ - break; - } - } - - spin_unlock_irqrestore(&engine->lock, flags); - - if (fault) - return -EIO; - - rc = copy_cyclic_to_user(engine, pkt_length, head, buf, count); - engine->rx_overrun = 0; - /* if copy is successful, release credits */ - if(rc > 0) - write_register(num_credit,&engine->sgdma_regs->credits, 0); - - return rc; -} - -ssize_t xdma_engine_read_cyclic(struct xdma_engine *engine, char __user *buf, - size_t count, int timeout_ms) -{ - int i = 0; - int rc = 0; - int rc_len = 0; - struct xdma_transfer *transfer; - - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); - - transfer = &engine->cyclic_req->xfer; - BUG_ON(!transfer); - - engine->user_buffer_index = 0; - - do { - rc = transfer_monitor_cyclic(engine, transfer, timeout_ms); - if (rc < 0) - return rc; - rc = complete_cyclic(engine, buf, count); - if (rc < 0) - return rc; - rc_len += rc; - - i++; - if (i > 10) - break; - } while (!engine->eop_found); - - if(enable_credit_mp) - engine->eop_found = 0; - - return rc_len; -} - -static void sgt_free_with_pages(struct sg_table *sgt, int dir, - struct pci_dev *pdev) -{ - struct scatterlist *sg = sgt->sgl; - int npages = sgt->orig_nents; - int i; - - for (i = 0; i < npages; i++, sg = sg_next(sg)) { - struct page *pg = sg_page(sg); - dma_addr_t bus = sg_dma_address(sg); - - if (pg) { - if (pdev) - pci_unmap_page(pdev, bus, PAGE_SIZE, dir); - __free_page(pg); - } else - break; - } - sg_free_table(sgt); - memset(sgt, 0, sizeof(struct sg_table)); -} - -static int sgt_alloc_with_pages(struct sg_table *sgt, unsigned int npages, - int dir, struct pci_dev *pdev) -{ - struct scatterlist *sg; - int i; - - if (sg_alloc_table(sgt, npages, GFP_KERNEL)) { - pr_info("sgt OOM.\n"); - return -ENOMEM; - } - - sg = sgt->sgl; - for (i = 0; i < npages; i++, sg = sg_next(sg)) { - struct page *pg = alloc_page(GFP_KERNEL); - - if (!pg) { - pr_info("%d/%u, page OOM.\n", i, npages); - goto err_out; - } - - if (pdev) { - dma_addr_t bus = pci_map_page(pdev, pg, 0, PAGE_SIZE, - dir); - if (unlikely(pci_dma_mapping_error(pdev, bus))) { - pr_info("%d/%u, page 0x%p map err.\n", - i, npages, pg); - __free_page(pg); - goto err_out; - } - sg_dma_address(sg) = bus; - sg_dma_len(sg) = PAGE_SIZE; - } - sg_set_page(sg, pg, PAGE_SIZE, 0); - } - - sgt->orig_nents = sgt->nents = npages; - - return 0; - -err_out: - sgt_free_with_pages(sgt, dir, pdev); - return -ENOMEM; -} - -int xdma_cyclic_transfer_setup(struct xdma_engine *engine) -{ - struct xdma_dev *xdev; - struct xdma_transfer *xfer; - dma_addr_t bus; - unsigned long flags; - int i; - int rc; - - BUG_ON(!engine); - xdev = engine->xdev; - BUG_ON(!xdev); - - if (engine->cyclic_req) { - pr_info("%s: exclusive access already taken.\n", - engine->name); - return -EBUSY; - } - - spin_lock_irqsave(&engine->lock, flags); - - engine->rx_tail = 0; - engine->rx_head = 0; - engine->rx_overrun = 0; - engine->eop_found = 0; - - rc = sgt_alloc_with_pages(&engine->cyclic_sgt, CYCLIC_RX_PAGES_MAX, - engine->dir, xdev->pdev); - if (rc < 0) { - pr_info("%s cyclic pages %u OOM.\n", - engine->name, CYCLIC_RX_PAGES_MAX); - goto err_out; - } - - engine->cyclic_req = xdma_init_request(&engine->cyclic_sgt, 0); - if (!engine->cyclic_req) { - pr_info("%s cyclic request OOM.\n", engine->name); - rc = -ENOMEM; - goto err_out; - } - -#ifdef __LIBXDMA_DEBUG__ - xdma_request_cb_dump(engine->cyclic_req); -#endif - - rc = transfer_init(engine, engine->cyclic_req); - if (rc < 0) - goto err_out; - - xfer = &engine->cyclic_req->xfer; - - /* replace source addresses with result write-back addresses */ - memset(engine->cyclic_result, 0, - CYCLIC_RX_PAGES_MAX * sizeof(struct xdma_result)); - bus = engine->cyclic_result_bus; - for (i = 0; i < xfer->desc_num; i++) { - xfer->desc_virt[i].src_addr_lo = cpu_to_le32(PCI_DMA_L(bus)); - xfer->desc_virt[i].src_addr_hi = cpu_to_le32(PCI_DMA_H(bus)); - bus += sizeof(struct xdma_result); - } - /* set control of all descriptors */ - for (i = 0; i < xfer->desc_num; i++) { - xdma_desc_control_clear(xfer->desc_virt + i, LS_BYTE_MASK); - xdma_desc_control_set(xfer->desc_virt + i, - XDMA_DESC_EOP | XDMA_DESC_COMPLETED); - } - - /* make this a cyclic transfer */ - xdma_transfer_cyclic(xfer); - -#ifdef __LIBXDMA_DEBUG__ - transfer_dump(xfer); -#endif - - if(enable_credit_mp){ - //write_register(RX_BUF_PAGES,&engine->sgdma_regs->credits); - write_register(128, &engine->sgdma_regs->credits, 0); - } - - spin_unlock_irqrestore(&engine->lock, flags); - - /* start cyclic transfer */ - transfer_queue(engine, xfer); - - return 0; - - /* unwind on errors */ -err_out: - if (engine->cyclic_req) { - xdma_request_free(engine->cyclic_req); - engine->cyclic_req = NULL; - } - - if (engine->cyclic_sgt.orig_nents) { - sgt_free_with_pages(&engine->cyclic_sgt, engine->dir, - xdev->pdev); - engine->cyclic_sgt.orig_nents = 0; - engine->cyclic_sgt.nents = 0; - engine->cyclic_sgt.sgl = NULL; - } - - spin_unlock_irqrestore(&engine->lock, flags); - - return rc; -} - - -static int cyclic_shutdown_polled(struct xdma_engine *engine) -{ - BUG_ON(!engine); - - spin_lock(&engine->lock); - - dbg_tfr("Polling for shutdown completion\n"); - do { - engine_status_read(engine, 1, 0); - schedule(); - } while (engine->status & XDMA_STAT_BUSY); - - if ((engine->running) && !(engine->status & XDMA_STAT_BUSY)) { - dbg_tfr("Engine has stopped\n"); - - if (!list_empty(&engine->transfer_list)) - engine_transfer_dequeue(engine); - - engine_service_shutdown(engine); - } - - dbg_tfr("Shutdown completion polling done\n"); - spin_unlock(&engine->lock); - - return 0; -} - -static int cyclic_shutdown_interrupt(struct xdma_engine *engine) -{ - int rc; - - BUG_ON(!engine); - - rc = wait_event_interruptible_timeout(engine->shutdown_wq, - !engine->running, msecs_to_jiffies(10000)); - -#if 0 - if (rc) { - dbg_tfr("wait_event_interruptible=%d\n", rc); - return rc; - } -#endif - - if (engine->running) { - pr_info("%s still running?!, %d\n", engine->name, rc); - return -EINVAL; - } - - return rc; -} - -int xdma_cyclic_transfer_teardown(struct xdma_engine *engine) -{ - int rc; - struct xdma_dev *xdev = engine->xdev; - struct xdma_transfer *transfer; - unsigned long flags; - - transfer = engine_cyclic_stop(engine); - - spin_lock_irqsave(&engine->lock, flags); - if (transfer) { - dbg_tfr("%s: stop transfer 0x%p.\n", engine->name, transfer); - if (transfer != &engine->cyclic_req->xfer) { - pr_info("%s unexpected transfer 0x%p/0x%p\n", - engine->name, transfer, - &engine->cyclic_req->xfer); - } - } - /* allow engine to be serviced after stop request */ - spin_unlock_irqrestore(&engine->lock, flags); - - /* wait for engine to be no longer running */ - if (poll_mode) - rc = cyclic_shutdown_polled(engine); - else - rc = cyclic_shutdown_interrupt(engine); - - /* obtain spin lock to atomically remove resources */ - spin_lock_irqsave(&engine->lock, flags); - - if (engine->cyclic_req) { - xdma_request_free(engine->cyclic_req); - engine->cyclic_req = NULL; - } - - if (engine->cyclic_sgt.orig_nents) { - sgt_free_with_pages(&engine->cyclic_sgt, engine->dir, - xdev->pdev); - engine->cyclic_sgt.orig_nents = 0; - engine->cyclic_sgt.nents = 0; - engine->cyclic_sgt.sgl = NULL; - } - - spin_unlock_irqrestore(&engine->lock, flags); - - return 0; -} - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/libxdma.h b/sdk/linux_kernel_drivers/xocl/libxdma.h deleted file mode 100644 index d7620827..00000000 --- a/sdk/linux_kernel_drivers/xocl/libxdma.h +++ /dev/null @@ -1,612 +0,0 @@ -/******************************************************************************* - * - * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - * - * The full GNU General Public License is included in this distribution in - * the file called "LICENSE". - * - * Karen Xie - * - ******************************************************************************/ -#ifndef XDMA_LIB_H -#define XDMA_LIB_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Switch debug printing on/off */ -#define XDMA_DEBUG 0 - -/* SECTION: Preprocessor macros/constants */ -#define XDMA_BAR_NUM (6) - -/* maximum amount of register space to map */ -#define XDMA_BAR_SIZE (0x8000UL) - -/* Use this definition to poll several times between calls to schedule */ -#define NUM_POLLS_PER_SCHED 100 - -#define XDMA_CHANNEL_NUM_MAX (4) -/* - * interrupts per engine, rad2_vul.sv:237 - * .REG_IRQ_OUT (reg_irq_from_ch[(channel*2) +: 2]), - */ -#define XDMA_ENG_IRQ_NUM (1) -#define MAX_EXTRA_ADJ (15) -#define RX_STATUS_EOP (1) - -/* Target internal components on XDMA control BAR */ -#define XDMA_OFS_INT_CTRL (0x2000UL) -#define XDMA_OFS_CONFIG (0x3000UL) - -/* maximum number of desc per transfer request */ -#define XDMA_TRANSFER_MAX_DESC (2048) - -/* maximum size of a single DMA transfer descriptor */ -#define XDMA_DESC_BLEN_BITS 28 -#define XDMA_DESC_BLEN_MAX ((1 << (XDMA_DESC_BLEN_BITS)) - 1) - -/* bits of the SG DMA control register */ -#define XDMA_CTRL_RUN_STOP (1UL << 0) -#define XDMA_CTRL_IE_DESC_STOPPED (1UL << 1) -#define XDMA_CTRL_IE_DESC_COMPLETED (1UL << 2) -#define XDMA_CTRL_IE_DESC_ALIGN_MISMATCH (1UL << 3) -#define XDMA_CTRL_IE_MAGIC_STOPPED (1UL << 4) -#define XDMA_CTRL_IE_IDLE_STOPPED (1UL << 6) -#define XDMA_CTRL_IE_READ_ERROR (0x1FUL << 9) -#define XDMA_CTRL_IE_DESC_ERROR (0x1FUL << 19) -#define XDMA_CTRL_NON_INCR_ADDR (1UL << 25) -#define XDMA_CTRL_POLL_MODE_WB (1UL << 26) - -/* bits of the SG DMA status register */ -#define XDMA_STAT_BUSY (1UL << 0) -#define XDMA_STAT_DESC_STOPPED (1UL << 1) -#define XDMA_STAT_DESC_COMPLETED (1UL << 2) -#define XDMA_STAT_ALIGN_MISMATCH (1UL << 3) -#define XDMA_STAT_MAGIC_STOPPED (1UL << 4) -#define XDMA_STAT_INVALID_LEN (1UL << 5) -#define XDMA_STAT_IDLE_STOPPED (1UL << 6) - -#define XDMA_STAT_COMMON_ERR_MASK \ - (XDMA_STAT_ALIGN_MISMATCH | XDMA_STAT_MAGIC_STOPPED | \ - XDMA_STAT_INVALID_LEN) - -/* desc_error, C2H & H2C */ -#define XDMA_STAT_DESC_UNSUPP_REQ (1UL << 19) -#define XDMA_STAT_DESC_COMPL_ABORT (1UL << 20) -#define XDMA_STAT_DESC_PARITY_ERR (1UL << 21) -#define XDMA_STAT_DESC_HEADER_EP (1UL << 22) -#define XDMA_STAT_DESC_UNEXP_COMPL (1UL << 23) - -#define XDMA_STAT_DESC_ERR_MASK \ - (XDMA_STAT_DESC_UNSUPP_REQ | XDMA_STAT_DESC_COMPL_ABORT | \ - XDMA_STAT_DESC_PARITY_ERR | XDMA_STAT_DESC_HEADER_EP | \ - XDMA_STAT_DESC_UNEXP_COMPL) - -/* read error: H2C */ -#define XDMA_STAT_H2C_R_UNSUPP_REQ (1UL << 9) -#define XDMA_STAT_H2C_R_COMPL_ABORT (1UL << 10) -#define XDMA_STAT_H2C_R_PARITY_ERR (1UL << 11) -#define XDMA_STAT_H2C_R_HEADER_EP (1UL << 12) -#define XDMA_STAT_H2C_R_UNEXP_COMPL (1UL << 13) - -#define XDMA_STAT_H2C_R_ERR_MASK \ - (XDMA_STAT_H2C_R_UNSUPP_REQ | XDMA_STAT_H2C_R_COMPL_ABORT | \ - XDMA_STAT_H2C_R_PARITY_ERR | XDMA_STAT_H2C_R_HEADER_EP | \ - XDMA_STAT_H2C_R_UNEXP_COMPL) - -/* write error, H2C only */ -#define XDMA_STAT_H2C_W_DECODE_ERR (1UL << 14) -#define XDMA_STAT_H2C_W_SLAVE_ERR (1UL << 15) - -#define XDMA_STAT_H2C_W_ERR_MASK \ - (XDMA_STAT_H2C_W_DECODE_ERR | XDMA_STAT_H2C_W_SLAVE_ERR) - -/* read error: C2H */ -#define XDMA_STAT_C2H_R_DECODE_ERR (1UL << 9) -#define XDMA_STAT_C2H_R_SLAVE_ERR (1UL << 10) - -#define XDMA_STAT_C2H_R_ERR_MASK \ - (XDMA_STAT_C2H_R_DECODE_ERR | XDMA_STAT_C2H_R_SLAVE_ERR) - -/* all combined */ -#define XDMA_STAT_H2C_ERR_MASK \ - (XDMA_STAT_COMMON_ERR_MASK | XDMA_STAT_DESC_ERR_MASK | \ - XDMA_STAT_H2C_R_ERR_MASK | XDMA_STAT_H2C_W_ERR_MASK) - -#define XDMA_STAT_C2H_ERR_MASK \ - (XDMA_STAT_COMMON_ERR_MASK | XDMA_STAT_DESC_ERR_MASK | \ - XDMA_STAT_C2H_R_ERR_MASK) - -/* bits of the SGDMA descriptor control field */ -#define XDMA_DESC_STOPPED (1UL << 0) -#define XDMA_DESC_COMPLETED (1UL << 1) -#define XDMA_DESC_EOP (1UL << 4) - -#define XDMA_PERF_RUN (1UL << 0) -#define XDMA_PERF_CLEAR (1UL << 1) -#define XDMA_PERF_AUTO (1UL << 2) - -#define MAGIC_ENGINE 0xEEEEEEEEUL -#define MAGIC_DEVICE 0xDDDDDDDDUL - -/* upper 16-bits of engine identifier register */ -#define XDMA_ID_H2C 0x1fc0U -#define XDMA_ID_C2H 0x1fc1U - -/* for C2H AXI-ST mode */ -#define CYCLIC_RX_PAGES_MAX 256 - -#define LS_BYTE_MASK 0x000000FFUL - -#define BLOCK_ID_MASK 0xFFF00000 -#define BLOCK_ID_HEAD 0x1FC00000 - -#define IRQ_BLOCK_ID 0x1fc20000UL -#define CONFIG_BLOCK_ID 0x1fc30000UL - -#define WB_COUNT_MASK 0x00ffffffUL -#define WB_ERR_MASK (1UL << 31) -#define POLL_TIMEOUT_SECONDS 10 - -#define MAX_USER_IRQ 16 - -#define MAX_DESC_BUS_ADDR (0xffffffffULL) - -#define DESC_MAGIC 0xAD4B0000UL - -#define C2H_WB 0x52B4UL - -#define MAX_NUM_ENGINES (XDMA_CHANNEL_NUM_MAX * 2) -#define H2C_CHANNEL_OFFSET 0x1000 -#define SGDMA_OFFSET_FROM_CHANNEL 0x4000 -#define CHANNEL_SPACING 0x100 -#define TARGET_SPACING 0x1000 - -#define BYPASS_MODE_SPACING 0x0100 - -/* obtain the 32 most significant (high) bits of a 32-bit or 64-bit address */ -#define PCI_DMA_H(addr) ((addr >> 16) >> 16) -/* obtain the 32 least significant (low) bits of a 32-bit or 64-bit address */ -#define PCI_DMA_L(addr) (addr & 0xffffffffUL) - -#ifndef VM_RESERVED - #define VMEM_FLAGS (VM_IO | VM_DONTEXPAND | VM_DONTDUMP) -#else - #define VMEM_FLAGS (VM_IO | VM_RESERVED) -#endif - -#ifdef __LIBXDMA_DEBUG__ -#define dbg_io pr_err -#define dbg_fops pr_err -#define dbg_perf pr_err -#define dbg_sg pr_err -#define dbg_tfr pr_err -#define dbg_irq pr_err -#define dbg_init pr_err -#define dbg_desc pr_err -#else -/* disable debugging */ -#define dbg_io(...) -#define dbg_fops(...) -#define dbg_perf(...) -#define dbg_sg(...) -#define dbg_tfr(...) -#define dbg_irq(...) -#define dbg_init(...) -#define dbg_desc(...) -#endif - -/* SECTION: Enum definitions */ -enum transfer_state { - TRANSFER_STATE_NEW = 0, - TRANSFER_STATE_SUBMITTED, - TRANSFER_STATE_COMPLETED, - TRANSFER_STATE_FAILED, - TRANSFER_STATE_ABORTED -}; - -enum shutdown_state { - ENGINE_SHUTDOWN_NONE = 0, /* No shutdown in progress */ - ENGINE_SHUTDOWN_REQUEST = 1, /* engine requested to shutdown */ - ENGINE_SHUTDOWN_IDLE = 2 /* engine has shutdown and is idle */ -}; - -enum dev_capabilities { - CAP_64BIT_DMA = 2, - CAP_64BIT_DESC = 4, - CAP_ENGINE_WRITE = 8, - CAP_ENGINE_READ = 16 -}; - -/* SECTION: Structure definitions */ - -struct config_regs { - u32 identifier; - u32 reserved_1[4]; - u32 msi_enable; -}; - -/** - * SG DMA Controller status and control registers - * - * These registers make the control interface for DMA transfers. - * - * It sits in End Point (FPGA) memory BAR[0] for 32-bit or BAR[0:1] for 64-bit. - * It references the first descriptor which exists in Root Complex (PC) memory. - * - * @note The registers must be accessed using 32-bit (PCI DWORD) read/writes, - * and their values are in little-endian byte ordering. - */ -struct engine_regs { - u32 identifier; - u32 control; - u32 control_w1s; - u32 control_w1c; - u32 reserved_1[12]; /* padding */ - - u32 status; - u32 status_rc; - u32 completed_desc_count; - u32 alignments; - u32 reserved_2[14]; /* padding */ - - u32 poll_mode_wb_lo; - u32 poll_mode_wb_hi; - u32 interrupt_enable_mask; - u32 interrupt_enable_mask_w1s; - u32 interrupt_enable_mask_w1c; - u32 reserved_3[9]; /* padding */ - - u32 perf_ctrl; - u32 perf_cyc_lo; - u32 perf_cyc_hi; - u32 perf_dat_lo; - u32 perf_dat_hi; - u32 perf_pnd_lo; - u32 perf_pnd_hi; -} __packed; - -struct engine_sgdma_regs { - u32 identifier; - u32 reserved_1[31]; /* padding */ - - /* bus address to first descriptor in Root Complex Memory */ - u32 first_desc_lo; - u32 first_desc_hi; - /* number of adjacent descriptors at first_desc */ - u32 first_desc_adjacent; - u32 credits; -} __packed; - -struct msix_vec_table_entry { - u32 msi_vec_addr_lo; - u32 msi_vec_addr_hi; - u32 msi_vec_data_lo; - u32 msi_vec_data_hi; -} __packed; - -struct msix_vec_table { - struct msix_vec_table_entry entry_list[32]; -} __packed; - -struct interrupt_regs { - u32 identifier; - u32 user_int_enable; - u32 user_int_enable_w1s; - u32 user_int_enable_w1c; - u32 channel_int_enable; - u32 channel_int_enable_w1s; - u32 channel_int_enable_w1c; - u32 reserved_1[9]; /* padding */ - - u32 user_int_request; - u32 channel_int_request; - u32 user_int_pending; - u32 channel_int_pending; - u32 reserved_2[12]; /* padding */ - - u32 user_msi_vector[8]; - u32 channel_msi_vector[8]; -} __packed; - -struct sgdma_common_regs { - u32 padding[8]; - u32 credit_mode_enable; - u32 credit_mode_enable_w1s; - u32 credit_mode_enable_w1c; -} __packed; - - -/* Structure for polled mode descriptor writeback */ -struct xdma_poll_wb { - u32 completed_desc_count; - u32 reserved_1[7]; -} __packed; - - -/** - * Descriptor for a single contiguous memory block transfer. - * - * Multiple descriptors are linked by means of the next pointer. An additional - * extra adjacent number gives the amount of extra contiguous descriptors. - * - * The descriptors are in root complex memory, and the bytes in the 32-bit - * words must be in little-endian byte ordering. - */ -struct xdma_desc { - u32 control; - u32 bytes; /* transfer length in bytes */ - u32 src_addr_lo; /* source address (low 32-bit) */ - u32 src_addr_hi; /* source address (high 32-bit) */ - u32 dst_addr_lo; /* destination address (low 32-bit) */ - u32 dst_addr_hi; /* destination address (high 32-bit) */ - /* - * next descriptor in the single-linked list of descriptors; - * this is the PCIe (bus) address of the next descriptor in the - * root complex memory - */ - u32 next_lo; /* next desc address (low 32-bit) */ - u32 next_hi; /* next desc address (high 32-bit) */ -} __packed; - -/* 32 bytes (four 32-bit words) or 64 bytes (eight 32-bit words) */ -struct xdma_result { - u32 status; - u32 length; - u32 reserved_1[6]; /* padding */ -} __packed; - -struct sw_desc { - dma_addr_t addr; - unsigned int len; -}; - -/* Describes a (SG DMA) single transfer for the engine */ -struct xdma_transfer { - struct list_head entry; /* queue of non-completed transfers */ - struct xdma_desc *desc_virt; /* virt addr of the 1st descriptor */ - dma_addr_t desc_bus; /* bus addr of the first descriptor */ - int desc_adjacent; /* adjacent descriptors at desc_bus */ - int desc_num; /* number of descriptors in transfer */ - enum dma_data_direction dir; - wait_queue_head_t wq; /* wait queue for transfer completion */ - - enum transfer_state state; /* state of the transfer */ - unsigned int flags; -#define XFER_FLAG_NEED_UNMAP 0x1 - int cyclic; /* flag if transfer is cyclic */ - int last_in_request; /* flag if last within request */ - unsigned int len; - struct sg_table *sgt; -}; - -struct xdma_request_cb { - struct sg_table *sgt; - unsigned int total_len; - u64 ep_addr; - - struct xdma_transfer xfer; - - unsigned int sw_desc_idx; - unsigned int sw_desc_cnt; - struct sw_desc sdesc[0]; -}; - -struct xdma_engine { - unsigned long magic; /* structure ID for sanity checks */ - struct xdma_dev *xdev; /* parent device */ - char name[5]; /* name of this engine */ - int version; /* version of this engine */ - //dev_t cdevno; /* character device major:minor */ - //struct cdev cdev; /* character device (embedded struct) */ - - /* HW register address offsets */ - struct engine_regs *regs; /* Control reg BAR offset */ - struct engine_sgdma_regs *sgdma_regs; /* SGDAM reg BAR offset */ - u32 bypass_offset; /* Bypass mode BAR offset */ - - /* Engine state, configuration and flags */ - enum shutdown_state shutdown; /* engine shutdown mode */ - enum dma_data_direction dir; - int running; /* flag if the driver started engine */ - int non_incr_addr; /* flag if non-incremental addressing used */ - int streaming; - int addr_align; /* source/dest alignment in bytes */ - int len_granularity; /* transfer length multiple */ - int addr_bits; /* HW datapath address width */ - int channel; /* engine indices */ - int max_extra_adj; /* descriptor prefetch capability */ - int desc_dequeued; /* num descriptors of completed transfers */ - u32 status; /* last known status of device */ - u32 interrupt_enable_mask_value;/* only used for MSIX mode to store per-engine interrupt mask value */ - - /* Transfer list management */ - struct list_head transfer_list; /* queue of transfers */ - - /* Members applicable to AXI-ST C2H (cyclic) transfers */ - struct xdma_result *cyclic_result; - dma_addr_t cyclic_result_bus; /* bus addr for transfer */ - struct xdma_request_cb *cyclic_req; - struct sg_table cyclic_sgt; - u8 eop_found; /* used only for cyclic(rx:c2h) */ - - int rx_tail; /* follows the HW */ - int rx_head; /* where the SW reads from */ - int rx_overrun; /* flag if overrun occured */ - - /* for copy from cyclic buffer to user buffer */ - unsigned int user_buffer_index; - - /* Members associated with polled mode support */ - u8 *poll_mode_addr_virt; /* virt addr for descriptor writeback */ - dma_addr_t poll_mode_bus; /* bus addr for descriptor writeback */ - - /* Members associated with interrupt mode support */ - wait_queue_head_t shutdown_wq; /* wait queue for shutdown sync */ - spinlock_t lock; /* protects concurrent access */ - int prev_cpu; /* remember CPU# of (last) locker */ - int msix_irq_line; /* MSI-X vector for this engine */ - u32 irq_bitmask; /* IRQ bit mask for this engine */ - struct work_struct work; /* Work queue for interrupt handling */ - - spinlock_t desc_lock; /* protects concurrent access */ - dma_addr_t desc_bus; - struct xdma_desc *desc; - - /* for performance test support */ - struct xdma_performance_ioctl *xdma_perf; /* perf test control */ - wait_queue_head_t xdma_perf_wq; /* Perf test sync */ -}; - -struct xdma_user_irq { - struct xdma_dev *xdev; /* parent device */ - u8 user_idx; /* 0 ~ 15 */ - u8 events_irq; /* accumulated IRQs */ - spinlock_t events_lock; /* lock to safely update events_irq */ - wait_queue_head_t events_wq; /* wait queue to sync waiting threads */ - irq_handler_t handler; - - void *dev; -}; - -/* XDMA PCIe device specific book-keeping */ -#define XDEV_FLAG_OFFLINE 0x1 -struct xdma_dev { - struct list_head list_head; - struct list_head rcu_node; - - unsigned long magic; /* structure ID for sanity checks */ - struct pci_dev *pdev; /* pci device struct from probe() */ - int idx; /* dev index */ - - const char *mod_name; /* name of module owning the dev */ - - spinlock_t lock; /* protects concurrent access */ - unsigned int flags; - - /* PCIe BAR management */ - void *__iomem bar[XDMA_BAR_NUM]; /* addresses for mapped BARs */ - int user_bar_idx; /* BAR index of user logic */ - int config_bar_idx; /* BAR index of XDMA config logic */ - int bypass_bar_idx; /* BAR index of XDMA bypass logic */ - int regions_in_use; /* flag if dev was in use during probe() */ - int got_regions; /* flag if probe() obtained the regions */ - - int user_max; - int c2h_channel_max; - int h2c_channel_max; - - /* Interrupt management */ - int irq_count; /* interrupt counter */ - int irq_line; /* flag if irq allocated successfully */ - int msi_enabled; /* flag if msi was enabled for the device */ - int msix_enabled; /* flag if msi-x was enabled for the device */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(4,12,0) - struct msix_entry entry[32]; /* msi-x vector/entry table */ -#endif - struct xdma_user_irq user_irq[16]; /* user IRQ management */ - unsigned int mask_irq_user; - - /* XDMA engine management */ - int engines_num; /* Total engine count */ - u32 mask_irq_h2c; - u32 mask_irq_c2h; - struct xdma_engine engine_h2c[XDMA_CHANNEL_NUM_MAX]; - struct xdma_engine engine_c2h[XDMA_CHANNEL_NUM_MAX]; - - /* SD_Accel specific */ - enum dev_capabilities capabilities; - u64 feature_id; -}; - -static inline int xdma_device_flag_check(struct xdma_dev *xdev, unsigned int f) -{ - unsigned long flags; - - spin_lock_irqsave(&xdev->lock, flags); - if (xdev->flags & f) { - spin_unlock_irqrestore(&xdev->lock, flags); - return 1; - } - spin_unlock_irqrestore(&xdev->lock, flags); - return 0; -} - -static inline int xdma_device_flag_test_n_set(struct xdma_dev *xdev, - unsigned int f) -{ - unsigned long flags; - int rv = 0; - - spin_lock_irqsave(&xdev->lock, flags); - if (xdev->flags & f) { - spin_unlock_irqrestore(&xdev->lock, flags); - rv = 1; - } else - xdev->flags |= f; - spin_unlock_irqrestore(&xdev->lock, flags); - return rv; -} - -static inline void xdma_device_flag_set(struct xdma_dev *xdev, unsigned int f) -{ - unsigned long flags; - - spin_lock_irqsave(&xdev->lock, flags); - xdev->flags |= f; - spin_unlock_irqrestore(&xdev->lock, flags); -} - -static inline void xdma_device_flag_clear(struct xdma_dev *xdev, unsigned int f) -{ - unsigned long flags; - - spin_lock_irqsave(&xdev->lock, flags); - xdev->flags &= ~f; - spin_unlock_irqrestore(&xdev->lock, flags); -} - -void write_register(u32 value, void *iomem); -u32 read_register(void *iomem); - -struct xdma_dev *xdev_find_by_pdev(struct pci_dev *pdev); - -void xdma_device_offline(struct pci_dev *pdev, void *dev_handle); -void xdma_device_online(struct pci_dev *pdev, void *dev_handle); - -int xdma_performance_submit(struct xdma_dev *xdev, struct xdma_engine *engine); -struct xdma_transfer *engine_cyclic_stop(struct xdma_engine *engine); -void enable_perf(struct xdma_engine *engine); -void get_perf_stats(struct xdma_engine *engine); - -int xdma_cyclic_transfer_setup(struct xdma_engine *engine); -int xdma_cyclic_transfer_teardown(struct xdma_engine *engine); -ssize_t xdma_engine_read_cyclic(struct xdma_engine *, char __user *, size_t, - int); - -#endif /* XDMA_LIB_H */ - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/libxdma_api.h b/sdk/linux_kernel_drivers/xocl/libxdma_api.h deleted file mode 100644 index bf043eb1..00000000 --- a/sdk/linux_kernel_drivers/xocl/libxdma_api.h +++ /dev/null @@ -1,135 +0,0 @@ -/******************************************************************************* - * - * Xilinx XDMA IP Core Linux Driver - * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. - * - * Karen Xie - * Leon Woestenberg - * - ******************************************************************************/ - -#ifndef __XDMA_BASE_API_H__ -#define __XDMA_BASE_API_H__ - -#include -#include -#include - -/* - * functions exported by the xdma driver - */ - -typedef struct { - u64 write_submitted; - u64 write_completed; - u64 read_requested; - u64 read_completed; - u64 restart; - u64 open; - u64 close; - u64 msix_trigger; -} xdma_statistics; - -/* - * This struct should be constantly updated by XMDA using u64_stats_* APIs - * The front end will read the structure without locking (That's why updating atomically is a must) - * every time it prints the statistics. - */ -//static XDMA_Statistics stats; - -/* - * xdma_device_open - read the pci bars and configure the fpga - * should be called from probe() - * NOTE: - * user interrupt will not enabled until xdma_user_isr_enable() - * is called - * @pdev: ptr to pci_dev - * @mod_name: the module name to be used for request_irq - * @user_max: max # of user/event (interrupts) to be configured - * @channel_max: max # of c2h and h2c channels to be configured - * NOTE: if the user/channel provisioned is less than the max specified, - * libxdma will update the user_max/channel_max - * returns - * a opaque handle (for libxdma to identify the device) - * NULL, in case of error - */ -void *xdma_device_open(const char *mod_name, struct pci_dev *pdev, - int *user_max, int *h2c_channel_max, int *c2h_channel_max); - -/* - * xdma_device_close - prepare fpga for removal: disable all interrupts (users - * and xdma) and release all resources - * should called from remove() - * @pdev: ptr to struct pci_dev - * @tuples: from xdma_device_open() - */ -void xdma_device_close(struct pci_dev *pdev, void *dev_handle); - -/* - * xdma_device_restart - restart the fpga - * @pdev: ptr to struct pci_dev - * TODO: - * may need more refining on the parameter list - * return < 0 in case of error - * TODO: exact error code will be defined later - */ -int xdma_device_restart(struct pci_dev *pdev, void *dev_handle); - -/* - * xdma_user_isr_register - register a user ISR handler - * It is expected that the xdma will register the ISR, and for the user - * interrupt, it will call the corresponding handle if it is registered and - * enabled. - * - * @pdev: ptr to the the pci_dev struct - * @mask: bitmask of user interrupts (0 ~ 15)to be registered - * bit 0: user interrupt 0 - * ... - * bit 15: user interrupt 15 - * any bit above bit 15 will be ignored. - * @handler: the correspoinding handler - * a NULL handler will be treated as de-registeration - * @name: to be passed to the handler, ignored if handler is NULL` - * @dev: to be passed to the handler, ignored if handler is NULL` - * return < 0 in case of error - * TODO: exact error code will be defined later - */ -int xdma_user_isr_register(void *dev_hndl, unsigned int mask, - irq_handler_t handler, void *dev); - -/* - * xdma_user_isr_enable/disable - enable or disable user interrupt - * @pdev: ptr to the the pci_dev struct - * @mask: bitmask of user interrupts (0 ~ 15)to be registered - * return < 0 in case of error - * TODO: exact error code will be defined later - */ -int xdma_user_isr_enable(void *dev_hndl, unsigned int mask); -int xdma_user_isr_disable(void *dev_hndl, unsigned int mask); - -/* - * xdma_xfer_submit - submit data for dma operation (for both read and write) - * This is a blocking call - * @channel: channle number (< channel_max) - * == channel_max means libxdma can pick any channel available:q - - * @dir: DMA_FROM/TO_DEVICE - * @offset: offset into the DDR/BRAM memory to read from or write to - * @sg_tbl: the scatter-gather list of data buffers - * @timeout: timeout in mili-seconds, *currently ignored - * return # of bytes transfered or - * < 0 in case of error - * TODO: exact error code will be defined later - */ -ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, - struct sg_table *sgt, bool dma_mapped, int timeout_ms); - - -/////////////////////missing API//////////////////// - -//xdma_get_channle_state - if no interrupt on DMA hang is available -//xdma_channle_restart - -#endif diff --git a/sdk/linux_kernel_drivers/xocl/xclfeatures.h b/sdk/linux_kernel_drivers/xocl/xclfeatures.h deleted file mode 100644 index 5709b93c..00000000 --- a/sdk/linux_kernel_drivers/xocl/xclfeatures.h +++ /dev/null @@ -1,146 +0,0 @@ -/** - * Copyright (C) 2015-2018 Xilinx, Inc - * - * This file is dual licensed. It may be redistributed and/or modified - * under the terms of the Apache 2.0 License OR version 2 of the GNU - * General Public License. - * - * Apache License Verbiage - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * GPL license Verbiage: - * - * This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - * You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - - -/** - * Xilinx SDAccel FPGA BIOS definition - * Copyright (C) 2016-2017, Xilinx Inc - All rights reserved - */ - - -//Layout: At address 0xB0000, we will have the FeatureRomHeader that comprises: -// -//1. First have FeatureRomHeader: 152 bytes of information followed by -//2. Then, as a part of FeatureRomHeader we have the PRRegion struct(s). -// The number of such structs will be same as OCLRegionCount. -//3. After this the freq scaling table is laid out. -// - -//#include - -typedef struct PartialRegion { - uint16_t clk[4]; - uint8_t XPR; //0 : non-xpt, 1: xpr -} PRRegion; - -// Each entry represents one row in freq scaling table. -struct FreqScalingTableRow{ - short config0; - short freq; - short config2; -}; - -enum PROMType { - BPI = 0 - ,SPI = 1 - //room for 6 more types of flash devices. -}; - -enum DebugType { - DT_NIFD = 0x01, - DT_FIREWALL = 0x02 - //There is room for future expansion upto 8 IPs -}; - -// This bit mask is used with the FeatureBitMap to calculate 64 bool features -// -// To test if a feature is provided: -// FeatureRomHeader header; -// if (FeatureBitMask::FBM_IS_UNIFIED & header.FeatureBitMap) -// // it is supported -// else -// // it is not supported -// -// To set if a feature is provided: -// header.FeatureBitMap = 0; -// header.FeatureBitMap |= FeatureBitMask::FBM_IS_UNIFIED; -// -enum FeatureBitMask -{ - UNIFIED_PLATFORM = 0x0000000000000001 /* bit 1 : Unified platform */ - ,XARE_ENBLD = 0x0000000000000002 /* bit 2 : Aurora link enabled DSA */ - ,BOARD_MGMT_ENBLD = 0x0000000000000004 /* bit 3 : Has MB based power monitoring */ - ,MB_SCHEDULER = 0x0000000000000008 /* bit 4: Has MB based scheduler */ - ,PROM_MASK = 0x0000000000000070 /* bits 5,6 &7 : 3 bits for PROMType */ - /** ------ Bit 8 unused **/ - ,DEBUG_MASK = 0x000000000000FF00 /* bits 9 through 16 : 8 bits for DebugType */ - - //....more -}; - - - -// In the following data structures, the EntryPointString, MajorVersion, and MinorVersion -// values are all used in the Runtime to identify if the ROM is producing valid data, and -// to pick the schema to read the rest of the data; Ergo, these values shall not change. - -/* - * Struct used for > 2017.2_sdx - * This struct should be used for version (==) 10.0 (Major: 10, Minor: 0) - */ -struct FeatureRomHeader { - unsigned char EntryPointString[4]; // This is "xlnx" - uint8_t MajorVersion; // Feature ROM's major version eg 1 - uint8_t MinorVersion; // minor version eg 2. - // -- DO NOT CHANGE THE TYPES ABOVE THIS LINE -- - uint32_t VivadoBuildID; // Vivado Software Build (e.g., 1761098 ). From ./vivado --version - uint32_t IPBuildID; // IP Build (e.g., 1759159 from abve) - uint64_t TimeSinceEpoch; // linux time(NULL) call, at write_dsa_rom invocation - unsigned char FPGAPartName[64]; // The hardware FPGA part. Null termninated - unsigned char VBNVName[64]; // eg : xilinx:xil-accel-rd-ku115:4ddr-xpr:3.4: null terminated - uint8_t DDRChannelCount; // 4 for TUL - uint8_t DDRChannelSize; // 4 (in GB) - uint64_t DRBaseAddress; // The Dynamic Range's (AppPF/CL/Userspace) Base Address - uint64_t FeatureBitMap; // Feature Bit Map, specifies 64 different bool features, maps to enum FeatureBitMask -}; - - -/* - * Struct used for 2017.1_sdx - * This struct should be used for all versions below (<) 10.0 (Major: 10, Minor: 0) -struct FeatureRomHeader { - unsigned char EntryPointString[4]; // This is "xlnx" - uint8_t MajorVersion; // Feature ROM's major version eg 1 - uint8_t MinorVersion; // minor version eg 2. - // -- DO NOT CHANGE THE TYPES ABOVE THIS LINE -- - uint32_t VivadoBuildID; // Vivado Software Build (e.g., 1761098 ). From ./vivado --version - uint32_t IPBuildID; // IP Build (e.g., 1759159 from abve) - uint64_t TimeSinceEpoch; // linux time(NULL) call, at write_dsa_rom invocation - unsigned char FPGAPartName[64]; // The hardware FPGA part. Null termninated - unsigned char VBNVName[64]; // eg : xilinx:xil-accel-rd-ku115:4ddr-xpr:3.4: null terminated - uint8_t DDRChannelCount; // 4 for TUL - uint8_t DDRChannelSize; // 4 (in GB) - uint8_t OCLRegionCount; // Number of OCL regions - uint8_t FPGAType; // maps to enum FPGAGeneration - uint8_t NumFreqTableRows; // Number of rows in freq scaling table. - PRRegion region[1]; // The PRRegion struct, lay them out one after another totalling OCLRegionCount. - unsigned char FreqTable[1]; // NumFreqTableRows of FreqScalingTableRow struct -}; -*/ - diff --git a/sdk/linux_kernel_drivers/xocl/xocl_bo.c b/sdk/linux_kernel_drivers/xocl/xocl_bo.c deleted file mode 100644 index b8aedfc6..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_bo.c +++ /dev/null @@ -1,1041 +0,0 @@ -/* - * Copyright (C) 2016-2018 Xilinx, Inc - * - * Authors: - * Sonal Santan - * Sarabjeet Singh - * - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#include -#include -#include -#ifdef XOCL_CMA_ALLOC -#include -#endif -#if LINUX_VERSION_CODE <= KERNEL_VERSION(3,0,0) -#include -#endif -#include -#include "xocl_drv.h" -#include "xocl_ioctl.h" -#include "xocl_xdma.h" - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -#define XOCL_DRM_FREE_MALLOC -#elif defined(RHEL_RELEASE_CODE) -#if RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(7,4) -#define XOCL_DRM_FREE_MALLOC -#endif -#endif - -#if defined(XOCL_DRM_FREE_MALLOC) -static inline void drm_free_large(void *ptr) -{ - kvfree(ptr); -} - -static inline void *drm_malloc_ab(size_t nmemb, size_t size) -{ - return kvmalloc_array(nmemb, sizeof(struct page *), GFP_KERNEL); -} -#endif - -static inline int xocl_drm_mm_insert_node(struct drm_mm *mm, - struct drm_mm_node *node, - u64 size) -{ -#if defined(XOCL_DRM_FREE_MALLOC) - return drm_mm_insert_node_generic(mm, node, size, PAGE_SIZE, 0, 0); -#else - return drm_mm_insert_node_generic(mm, node, size, PAGE_SIZE, 0, 0, 0); -#endif -} - - -static inline void __user *to_user_ptr(u64 address) -{ - return (void __user *)(uintptr_t)address; -} - -static size_t xocl_bo_physical_addr(const struct drm_xocl_bo *xobj) -{ - uint64_t paddr = xobj->mm_node ? xobj->mm_node->start : 0xffffffffffffffffull; - - //Sarab: Need to check for number of hops & size of DDRs - if (xobj->flags & XOCL_BO_ARE) - paddr |= XOCL_ARE_HOP; - return paddr; -} - -void xocl_describe(const struct drm_xocl_bo *xobj) -{ - size_t size_in_kb = xobj->base.size / 1024; - size_t physical_addr = xocl_bo_physical_addr(xobj); - unsigned ddr = xocl_bo_ddr_idx(xobj->flags); - unsigned userptr = xocl_bo_userptr(xobj) ? 1 : 0; - - DRM_DEBUG("%p: H[%p] SIZE[0x%zxKB] D[0x%zx] DDR[%u] UPTR[%u] SGLCOUNT[%u]\n", - xobj, xobj->vmapping, size_in_kb, physical_addr, ddr, userptr, xobj->sgt->orig_nents); -} - -static void xocl_free_mm_node(struct drm_xocl_bo *xobj) -{ - struct drm_xocl_dev *xdev = xobj->base.dev->dev_private; - unsigned ddr = xocl_bo_ddr_idx(xobj->flags); - if (!xobj->mm_node) - return; - - mutex_lock(&xdev->mm_lock); - xdev->mm_usage_stat[ddr].memory_usage -= xobj->base.size; - xdev->mm_usage_stat[ddr].bo_count--; - drm_mm_remove_node(xobj->mm_node); - mutex_unlock(&xdev->mm_lock); - kfree(xobj->mm_node); - xobj->mm_node = NULL; -} - -void xocl_free_bo(struct drm_gem_object *obj) -{ - struct drm_xocl_bo *xobj = to_xocl_bo(obj); - struct drm_xocl_dev *xdev = xobj->base.dev->dev_private; - int npages = obj->size >> PAGE_SHIFT; - DRM_DEBUG("Freeing BO %p\n", xobj); - - if (xobj->vmapping) - vunmap(xobj->vmapping); - xobj->vmapping = NULL; - - if (xobj->pages) { - if (xocl_bo_userptr(xobj)) { - release_pages(xobj->pages, npages, 0); - drm_free_large(xobj->pages); - } -#ifdef XOCL_CMA_ALLOC - else if (xocl_bo_cma(xobj)) { - if (xobj->pages[0]) - cma_release(xdev->cma_blk, xobj->pages[0], npages); - drm_free_large(xobj->pages); - } -#endif - else if (!xocl_bo_import(xobj)) { - drm_gem_put_pages(obj, xobj->pages, false, false); - } - } - xobj->pages = NULL; - - if (!xocl_bo_import(xobj)) { - DRM_DEBUG("Freeing regular buffer\n"); - if (xobj->sgt) { - sg_free_table(xobj->sgt); - kfree(xobj->sgt); - xobj->sgt = NULL; - } - xocl_free_mm_node(xobj); - } - else { - DRM_DEBUG("Freeing imported buffer\n"); - if (!(xobj->flags & XOCL_BO_ARE)) - xocl_free_mm_node(xobj); - - if (obj->import_attach) { - DRM_DEBUG("Unnmapping attached dma buf\n"); - dma_buf_unmap_attachment(obj->import_attach, xobj->sgt, DMA_TO_DEVICE); - drm_prime_gem_destroy(obj, NULL); - } - } - - //If it is imported BO then we do not delete SG Table - //And if is imported from ARE device then we do not free the mm_node as well - - //Sarab: Call detach here........ - //to let the exporting device know that importing device do not need it anymore.. - //else free_bo i.e this function is not called for exporting device - //as it assumes that the exported buffer is still being used - //dmabuf->ops->release(dmabuf); - //The drm_driver.gem_free_object callback is responsible for cleaning up the dma_buf attachment and references acquired at import time. - - /* This crashes machine.. Using above code instead - * drm_prime_gem_destroy calls detach function.. - struct dma_buf *imported_dma_buf = obj->dma_buf; - if (imported_dma_buf->ops->detach) - imported_dma_buf->ops->detach(imported_dma_buf, obj->import_attach); - */ - - drm_gem_object_release(obj); - kfree(xobj); -} - - -static inline int check_bo_user_flags(const struct drm_device *dev, unsigned flags) -{ - const unsigned ddr_count = xocl_ddr_channel_count(dev); - struct drm_xocl_dev *xdev = dev->dev_private; - unsigned ddr; - - if(ddr_count == 0) - return -EINVAL; - if (flags == 0xffffffff) - return 0; - if (flags == DRM_XOCL_BO_EXECBUF) - return 0; -#ifdef XOCL_CMA_ALLOC - if (flags == DRM_XOCL_BO_CMA) - return 0; -#else - if (flags == DRM_XOCL_BO_CMA) - return -EINVAL; -#endif - ddr = xocl_bo_ddr_idx(flags); - if (ddr == 0xffffffff) - return 0; - if (ddr >= ddr_count) - return -EINVAL; - if (xdev->unified) { - if (xdev->topology.m_data[ddr].m_used != 1) { - printk(KERN_INFO "Bank %d is marked as unused in axlf\n", ddr); - return -EINVAL; - } - } - return 0; -} - - -static struct drm_xocl_bo *xocl_create_bo(struct drm_device *dev, - uint64_t unaligned_size, - unsigned user_flags) -{ - size_t size = PAGE_ALIGN(unaligned_size); - struct drm_xocl_bo *xobj; - struct drm_xocl_dev *xdev = dev->dev_private; - unsigned ddr = xocl_bo_ddr_idx(user_flags); - const unsigned ddr_count = xocl_ddr_channel_count(dev); - int err = 0; - - if (!size) - return ERR_PTR(-EINVAL); - - /* Either none or only one DDR should be specified */ - if (check_bo_user_flags(dev, user_flags)) - return ERR_PTR(-EINVAL); - - xobj = kzalloc(sizeof(*xobj), GFP_KERNEL); - if (!xobj) - return ERR_PTR(-ENOMEM); - - err = drm_gem_object_init(dev, &xobj->base, size); - if (err) - goto out3; - - if (user_flags == DRM_XOCL_BO_EXECBUF) { - xobj->flags = XOCL_BO_EXECBUF; - xobj->mm_node = NULL; - xobj->metadata.state = DRM_XOCL_EXECBUF_STATE_ABORT; - return xobj; - } - -#ifdef XOCL_CMA_ALLOC - if (user_flags == DRM_XOCL_BO_CMA) { - xobj->flags = XOCL_BO_CMA; - xobj->mm_node = NULL; - return xobj; - } -#endif - - xobj->mm_node = kzalloc(sizeof(*xobj->mm_node), GFP_KERNEL); - if (!xobj->mm_node) { - err = -ENOMEM; - goto out3; - } - - mutex_lock(&xdev->mm_lock); - if (ddr != 0xffffffff) { - /* Attempt to allocate buffer on the requested DDR */ - DRM_DEBUG("%s:%s:%d: %u\n", __FILE__, __func__, __LINE__, ddr); - err = xocl_drm_mm_insert_node(&xdev->mm[ddr], xobj->mm_node, xobj->base.size); - if (err) - goto out2; - } - else { - /* Attempt to allocate buffer on any DDR */ - for (ddr = 0; ddr < ddr_count; ddr++) { - DRM_DEBUG("%s:%s:%d: %u\n", __FILE__, __func__, __LINE__, ddr); - if(xdev->unified && !xdev->topology.m_data[ddr].m_used) - continue; - err = xocl_drm_mm_insert_node(&xdev->mm[ddr], xobj->mm_node, xobj->base.size); - if (err == 0) - break; - } - if (err) - goto out2; - } - xdev->mm_usage_stat[ddr].memory_usage += xobj->base.size; - xdev->mm_usage_stat[ddr].bo_count++; - mutex_unlock(&xdev->mm_lock); - /* Record the DDR we allocated the buffer on */ - xobj->flags |= (1 << ddr); - - return xobj; -out2: - mutex_unlock(&xdev->mm_lock); - kfree(xobj->mm_node); - drm_gem_object_release(&xobj->base); -out3: - kfree(xobj); - return ERR_PTR(err); -} - -/* - * For ARE device do not reserve DDR space - * In below import it will reuse the mm_node which is already created by other application - */ - -static struct drm_xocl_bo *xocl_create_bo_forARE(struct drm_device *dev, - uint64_t unaligned_size, - struct drm_mm_node *exporting_mm_node) -{ - struct drm_xocl_bo *xobj; - size_t size = PAGE_ALIGN(unaligned_size); - int err = 0; - - if (!size) - return ERR_PTR(-EINVAL); - - xobj = kzalloc(sizeof(*xobj), GFP_KERNEL); - if (!xobj) - return ERR_PTR(-ENOMEM); - - err = drm_gem_object_init(dev, &xobj->base, size); - if (err) - goto out3; - - xobj->mm_node = exporting_mm_node; - if (!xobj->mm_node) { - err = -ENOMEM; - goto out3; - } - - /* Record that this buffer is on remote device to be access over ARE*/ - xobj->flags = XOCL_BO_ARE; - return xobj; -out3: - kfree(xobj); - return ERR_PTR(err); -} - - -int xocl_create_bo_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - int ret; - int j; - struct drm_xocl_bo *xobj; - struct page *cpages; - unsigned int page_count; - struct drm_xocl_create_bo *args = data; - unsigned ddr = args->flags & 0xf; - struct drm_xocl_dev *xdev = dev->dev_private; - - if (args->flags && (args->flags != DRM_XOCL_BO_EXECBUF)) { - if (hweight_long(ddr) > 1) - return -EINVAL; - } - - xobj = xocl_create_bo(dev, args->size, args->flags); - - if (IS_ERR(xobj)) { - DRM_DEBUG("object creation failed\n"); - return PTR_ERR(xobj); - } - -#ifdef XOCL_CMA_ALLOC - if (args->flags == DRM_XOCL_BO_CMA) { - page_count = xobj->base.size >> PAGE_SHIFT; - xobj->pages = drm_malloc_ab(page_count, sizeof(*xobj->pages)); - if (!xobj->pages) { - ret = -ENOMEM; - goto out_free; - } - cpages = cma_alloc(xdev->cma_blk, page_count, 0, GFP_KERNEL); - if (!cpages) { - ret = -ENOMEM; - goto out_free; - } - for (j = 0; j < page_count; j++) - xobj->pages[j] = cpages++; - } - else { - xobj->pages = drm_gem_get_pages(&xobj->base); - } -#else - xobj->pages = drm_gem_get_pages(&xobj->base); -#endif - if (IS_ERR(xobj->pages)) { - ret = PTR_ERR(xobj->pages); - goto out_free; - } - - xobj->sgt = drm_prime_pages_to_sg(xobj->pages, xobj->base.size >> PAGE_SHIFT); - if (IS_ERR(xobj->sgt)) { - ret = PTR_ERR(xobj->sgt); - goto out_free; - } - - xobj->vmapping = vmap(xobj->pages, xobj->base.size >> PAGE_SHIFT, VM_MAP, PAGE_KERNEL); - - if (!xobj->vmapping) { - ret = -ENOMEM; - goto out_free; - } - - ret = drm_gem_create_mmap_offset(&xobj->base); - if (ret < 0) - goto out_free; - - ret = drm_gem_handle_create(filp, &xobj->base, &args->handle); - if (ret < 0) - goto out_free; - - xocl_describe(xobj); - drm_gem_object_unreference_unlocked(&xobj->base); - return ret; - -out_free: - xocl_free_bo(&xobj->base); - return ret; -} - -int xocl_userptr_bo_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - int ret; - struct drm_xocl_bo *xobj; - unsigned int page_count; - struct drm_xocl_userptr_bo *args = data; - unsigned ddr = args->flags & 0xf; - - if (offset_in_page(args->addr)) - return -EINVAL; - - if (args->flags & DRM_XOCL_BO_EXECBUF) - return -EINVAL; - - if (args->flags & DRM_XOCL_BO_CMA) - return -EINVAL; - - if (args->flags && (hweight_long(ddr) > 1)) - return -EINVAL; - - xobj = xocl_create_bo(dev, args->size, args->flags); - - if (IS_ERR(xobj)) { - DRM_DEBUG("object creation failed\n"); - return PTR_ERR(xobj); - } - - /* Use the page rounded size so we can accurately account for number of pages */ - page_count = xobj->base.size >> PAGE_SHIFT; - - xobj->pages = drm_malloc_ab(page_count, sizeof(*xobj->pages)); - if (!xobj->pages) { - ret = -ENOMEM; - goto out1; - } - ret = get_user_pages_fast(args->addr, page_count, 1, xobj->pages); - - if (ret != page_count) - goto out0; - - xobj->sgt = drm_prime_pages_to_sg(xobj->pages, page_count); - if (IS_ERR(xobj->sgt)) { - ret = PTR_ERR(xobj->sgt); - goto out0; - } - - /* TODO: resolve the cache issue */ - xobj->vmapping = vmap(xobj->pages, page_count, VM_MAP, PAGE_KERNEL); - - if (!xobj->vmapping) { - ret = -ENOMEM; - goto out1; - } - - ret = drm_gem_handle_create(filp, &xobj->base, &args->handle); - if (ret) - goto out1; - - xobj->flags |= XOCL_BO_USERPTR; - xocl_describe(xobj); - drm_gem_object_unreference_unlocked(&xobj->base); - return ret; - -out0: - drm_free_large(xobj->pages); - xobj->pages = NULL; -out1: - xocl_free_bo(&xobj->base); - DRM_DEBUG("handle creation failed\n"); - return ret; -} - - -int xocl_map_bo_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - int ret = 0; - struct drm_xocl_map_bo *args = data; - struct drm_gem_object *obj; - - obj = xocl_gem_object_lookup(dev, filp, args->handle); - if (!obj) { - DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); - return -ENOENT; - } - - if (xocl_bo_userptr(to_xocl_bo(obj))) { - ret = -EPERM; - goto out; - } - /* The mmap offset was set up at BO allocation time. */ - args->offset = drm_vma_node_offset_addr(&obj->vma_node); - xocl_describe(to_xocl_bo(obj)); -out: - drm_gem_object_unreference_unlocked(obj); - return ret; -} - -static struct sg_table *alloc_onetime_sg_table(struct page **pages, uint64_t offset, uint64_t size) -{ - int ret; - unsigned int nr_pages; - struct sg_table *sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL); - if (!sgt) - return ERR_PTR(-ENOMEM); - - pages += (offset >> PAGE_SHIFT); - offset &= (~PAGE_MASK); - nr_pages = PAGE_ALIGN(size + offset) >> PAGE_SHIFT; - - ret = sg_alloc_table_from_pages(sgt, pages, nr_pages, offset, size, GFP_KERNEL); - if (ret) - goto cleanup; - return sgt; - -cleanup: - kfree(sgt); - return ERR_PTR(-ENOMEM); -} - -static int acquire_channel(struct drm_xocl_dev *xdev, enum drm_xocl_sync_bo_dir dir) -{ - int channel = 0; - int result = 0; - - if (down_interruptible(&xdev->channel_sem[dir])) { - channel = -ERESTARTSYS; - goto out; - } - - for (channel = 0; channel < xdev->channel; channel++) { - result = test_and_clear_bit(channel, &xdev->channel_bitmap[dir]); - if (result) - break; - } - if (!result) { - // How is this possible? - DRM_ERROR("Failed to acquire a valid channel\n"); - up(&xdev->channel_sem[dir]); - channel = -EIO; - } -out: - return channel; -} - -static void release_channel(struct drm_xocl_dev *xdev, enum drm_xocl_sync_bo_dir dir, int channel) -{ - set_bit(channel, &xdev->channel_bitmap[dir]); - up(&xdev->channel_sem[dir]); -} - - -int xocl_sync_bo_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - const struct drm_xocl_bo *xobj; - struct sg_table *sgt; - u64 paddr = 0; - int channel = 0; - ssize_t ret = 0; - const struct drm_xocl_sync_bo *args = data; - struct drm_xocl_dev *xdev = dev->dev_private; - const bool dir = (args->dir == DRM_XOCL_SYNC_BO_TO_DEVICE) ? true : false; - struct drm_gem_object *gem_obj = xocl_gem_object_lookup(dev, filp, - args->handle); - if (!gem_obj) { - DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); - return -ENOENT; - } - - xobj = to_xocl_bo(gem_obj); - sgt = xobj->sgt; - - //Sarab: If it is a remote BO then why do sync over ARE. - //We should do sync directly using the other device which this bo locally. - //So that txfer is: HOST->PCIE->DDR; Else it will be HOST->PCIE->ARE->DDR - paddr = xocl_bo_physical_addr(xobj); - - if (paddr == 0xffffffffffffffffull) - return -EINVAL; - - /* If device is offline (due to error), reject all DMA requests */ - if (xdev->offline) - return -ENODEV; - - - if ((args->offset >= gem_obj->size) || (args->size > gem_obj->size) || - ((args->offset + args->size) > gem_obj->size)) { - ret = -EINVAL; - goto out; - } - - /* only invalidate the range of addresses requested by the user */ - /* - if (args->dir == DRM_XOCL_SYNC_BO_TO_DEVICE) - flush_kernel_vmap_range(kaddr, args->size); - else if (args->dir == DRM_XOCL_SYNC_BO_FROM_DEVICE) - invalidate_kernel_vmap_range(kaddr, args->size); - else { - ret = -EINVAL; - goto out; - } - */ - paddr += args->offset; - - if (args->offset || (args->size != xobj->base.size)) { - sgt = alloc_onetime_sg_table(xobj->pages, args->offset, args->size); - if (IS_ERR(sgt)) { - ret = PTR_ERR(sgt); - goto out; - } - } - - //drm_clflush_sg(sgt); - channel = acquire_channel(xdev, args->dir); - - if (channel < 0) { - ret = -EINVAL; - goto clear; - } - /* Now perform DMA */ - ret = xdma_migrate_bo(xdev, sgt, dir, paddr, channel); - if (ret >= 0) { - xdev->channel_usage[args->dir][channel] += ret; - ret = (ret == args->size) ? 0 : -EIO; - } - release_channel(xdev, args->dir, channel); -clear: - if (args->offset || (args->size != xobj->base.size)) { - sg_free_table(sgt); - kfree(sgt); - } -out: - drm_gem_object_unreference_unlocked(gem_obj); - return ret; -} - -int xocl_info_bo_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - const struct drm_xocl_bo *xobj; - struct drm_xocl_info_bo *args = data; - struct drm_gem_object *gem_obj = xocl_gem_object_lookup(dev, filp, - args->handle); - - if (!gem_obj) { - DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); - return -ENOENT; - } - - xobj = to_xocl_bo(gem_obj); - - args->size = xobj->base.size; - - args->paddr = xocl_bo_physical_addr(xobj); - xocl_describe(xobj); - drm_gem_object_unreference_unlocked(gem_obj); - - return 0; -} - -int xocl_pwrite_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - struct drm_xocl_bo *xobj; - const struct drm_xocl_pwrite_bo *args = data; - struct drm_gem_object *gem_obj = xocl_gem_object_lookup(dev, filp, - args->handle); - char __user *user_data = to_user_ptr(args->data_ptr); - int ret = 0; - void *kaddr; - - if (!gem_obj) { - DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); - return -ENOENT; - } - - if ((args->offset > gem_obj->size) || (args->size > gem_obj->size) - || ((args->offset + args->size) > gem_obj->size)) { - ret = -EINVAL; - goto out; - } - - if (args->size == 0) { - ret = 0; - goto out; - } - - if (!access_ok(VERIFY_READ, user_data, args->size)) { - ret = -EFAULT; - goto out; - } - - xobj = to_xocl_bo(gem_obj); - - if (xocl_bo_userptr(xobj)) { - ret = -EPERM; - goto out; - } - - kaddr = xobj->vmapping; - kaddr += args->offset; - - ret = copy_from_user(kaddr, user_data, args->size); -out: - drm_gem_object_unreference_unlocked(gem_obj); - - return ret; -} - -int xocl_pread_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - struct drm_xocl_bo *xobj; - const struct drm_xocl_pread_bo *args = data; - struct drm_gem_object *gem_obj = xocl_gem_object_lookup(dev, filp, - args->handle); - char __user *user_data = to_user_ptr(args->data_ptr); - int ret = 0; - void *kaddr; - - if (!gem_obj) { - DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); - return -ENOENT; - } - - if (xocl_bo_userptr(to_xocl_bo(gem_obj))) { - ret = -EPERM; - goto out; - } - - if ((args->offset > gem_obj->size) || (args->size > gem_obj->size) - || ((args->offset + args->size) > gem_obj->size)) { - ret = -EINVAL; - goto out; - } - - if (args->size == 0) { - ret = 0; - goto out; - } - - if (!access_ok(VERIFY_WRITE, user_data, args->size)) { - ret = EFAULT; - goto out; - } - - xobj = to_xocl_bo(gem_obj); - kaddr = xobj->vmapping;; - kaddr += args->offset; - - ret = copy_to_user(user_data, kaddr, args->size); - -out: - drm_gem_object_unreference_unlocked(gem_obj); - - return ret; -} - -struct sg_table *xocl_gem_prime_get_sg_table(struct drm_gem_object *obj) -{ - struct drm_xocl_bo *xobj = to_xocl_bo(obj); - return drm_prime_pages_to_sg(xobj->pages, xobj->base.size >> PAGE_SHIFT); -} - - -static struct drm_xocl_bo *xocl_is_exporting_xare(struct drm_device *dev, struct dma_buf_attachment *attach) -{ - struct drm_gem_object *exporting_gem_obj; - struct drm_device *exporting_drm_dev; - struct drm_xocl_dev *exporting_xdev; - - struct device_driver *importing_dma_driver = dev->dev->driver; - struct dma_buf *exporting_dma_buf = attach->dmabuf; - struct device_driver *exporting_dma_driver = attach->dev->driver; - struct drm_xocl_dev *xdev = dev->dev_private; - - if (!strstr(xdev->header.VBNVName, "-xare")) - return NULL; - - //We don't know yet if the exporting device is Xilinx/XOCL or third party or USB device - //So checking it in below code - if (importing_dma_driver != exporting_dma_driver) - return NULL; - - //Exporting devices have same driver as us. So this is Xilinx device - //So now we can get gem_object, drm_device & xocl_dev - exporting_gem_obj = exporting_dma_buf->priv; - exporting_drm_dev = exporting_gem_obj->dev; - exporting_xdev = exporting_drm_dev->dev_private; - //exporting_xdev->header;//This has FeatureROM header - if (strstr(exporting_xdev->header.VBNVName, "-xare")) - return to_xocl_bo(exporting_gem_obj); - - return NULL; -} - -struct drm_gem_object *xocl_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, struct sg_table *sgt) -{ - int ret = 0; - // This is exporting device - struct drm_xocl_bo *exporting_xobj = xocl_is_exporting_xare(dev, attach); - - // For ARE device resue the mm node from exporting xobj - - // For non ARE devices we need to create a full BO but share the SG table - // ???? add flags to create_bo.. for DDR bank?? - - struct drm_xocl_bo *importing_xobj = exporting_xobj ? xocl_create_bo_forARE(dev, attach->dmabuf->size, exporting_xobj->mm_node) : - xocl_create_bo(dev, attach->dmabuf->size, 0); - - if (IS_ERR(importing_xobj)) { - DRM_DEBUG("object creation failed\n"); - return (struct drm_gem_object *)importing_xobj; - } - - importing_xobj->flags |= XOCL_BO_IMPORT; - importing_xobj->sgt = sgt; - importing_xobj->pages = drm_malloc_ab(attach->dmabuf->size >> PAGE_SHIFT, sizeof(*importing_xobj->pages)); - if (!importing_xobj->pages) { - ret = -ENOMEM; - goto out_free; - } - - ret = drm_prime_sg_to_page_addr_arrays(sgt, importing_xobj->pages, - NULL, attach->dmabuf->size >> PAGE_SHIFT); - if (ret) - goto out_free; - - importing_xobj->vmapping = vmap(importing_xobj->pages, importing_xobj->base.size >> PAGE_SHIFT, VM_MAP, - PAGE_KERNEL); - - if (!importing_xobj->vmapping) { - ret = -ENOMEM; - goto out_free; - } - - ret = drm_gem_create_mmap_offset(&importing_xobj->base); - if (ret < 0) - goto out_free; - - xocl_describe(importing_xobj); - return &importing_xobj->base; - -out_free: - xocl_free_bo(&importing_xobj->base); - DRM_ERROR("Buffer import failed\n"); - return ERR_PTR(ret); -} - -void *xocl_gem_prime_vmap(struct drm_gem_object *obj) -{ - struct drm_xocl_bo *xobj = to_xocl_bo(obj); - return xobj->vmapping; -} - -void xocl_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) -{ - -} - -static int xocl_init_unmgd(struct drm_xocl_unmgd *unmgd, uint64_t data_ptr, uint64_t size, - enum drm_xocl_sync_bo_dir dir) -{ - int ret; - char __user *user_data = to_user_ptr(data_ptr); - - if (!access_ok((dir == DRM_XOCL_SYNC_BO_TO_DEVICE) ? VERIFY_READ : VERIFY_WRITE, user_data, size)) - return -EFAULT; - - memset(unmgd, 0, sizeof(struct drm_xocl_unmgd)); - - unmgd->npages = (((unsigned long)user_data + size + PAGE_SIZE - 1) - - ((unsigned long)user_data & PAGE_MASK)) >> PAGE_SHIFT; - - unmgd->pages = drm_malloc_ab(unmgd->npages, sizeof(*unmgd->pages)); - if (!unmgd->pages) - return -ENOMEM; - - ret = get_user_pages_fast(data_ptr, unmgd->npages, (dir == DRM_XOCL_SYNC_BO_FROM_DEVICE) ? 1 : 0, unmgd->pages); - - if (ret != unmgd->npages) - goto clear_pages; - - unmgd->sgt = alloc_onetime_sg_table(unmgd->pages, data_ptr & ~PAGE_MASK, size); - if (IS_ERR(unmgd->sgt)) { - ret = PTR_ERR(unmgd->sgt); - goto clear_release; - } - - return 0; - -clear_release: - release_pages(unmgd->pages, unmgd->npages, 0); -clear_pages: - drm_free_large(unmgd->pages); - unmgd->pages = NULL; - return ret; -} - -static void xocl_finish_unmgd(struct drm_xocl_unmgd *unmgd) -{ - if (!unmgd->pages) - return; - sg_free_table(unmgd->sgt); - kfree(unmgd->sgt); - release_pages(unmgd->pages, unmgd->npages, 0); - drm_free_large(unmgd->pages); - unmgd->pages = NULL; -} - - -int xocl_pwrite_unmgd_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - int channel; - struct drm_xocl_unmgd unmgd; - const struct drm_xocl_pwrite_unmgd *args = data; - struct drm_xocl_dev *xdev = dev->dev_private; - const enum drm_xocl_sync_bo_dir dir = DRM_XOCL_SYNC_BO_TO_DEVICE; - ssize_t ret = 0; - - if (args->address_space != 0) - return -EFAULT; - - if (args->size == 0) - return 0; - - DRM_DEBUG("%s:%d\n", __func__, __LINE__); - ret = xocl_init_unmgd(&unmgd, args->data_ptr, args->size, dir); - if (ret) - return ret; - - channel = acquire_channel(xdev, dir); - DRM_DEBUG("%s:%d\n", __func__, __LINE__); - if (channel < 0) { - ret = -EINVAL; - goto clear; - } - /* Now perform DMA */ - ret = xdma_migrate_bo(xdev, unmgd.sgt, (dir == DRM_XOCL_SYNC_BO_TO_DEVICE), args->paddr, channel); - if (ret >= 0) { - xdev->channel_usage[dir][channel] += ret; - ret = (ret == args->size) ? 0 : -EIO; - } - release_channel(xdev, dir, channel); - DRM_DEBUG("%s:%llx\n", __func__, xdev->channel_usage[dir][channel]); -clear: - xocl_finish_unmgd(&unmgd); - return ret; -} - -int xocl_pread_unmgd_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - int channel; - struct drm_xocl_unmgd unmgd; - const struct drm_xocl_pwrite_unmgd *args = data; - struct drm_xocl_dev *xdev = dev->dev_private; - const enum drm_xocl_sync_bo_dir dir = DRM_XOCL_SYNC_BO_FROM_DEVICE; - ssize_t ret = 0; - - DRM_DEBUG("%s:%d\n", __func__, __LINE__); - if (args->address_space != 0) - return -EFAULT; - - if (args->size == 0) - return 0; - - ret = xocl_init_unmgd(&unmgd, args->data_ptr, args->size, dir); - if (ret) - return ret; - - DRM_DEBUG("%s:%d\n", __func__, __LINE__); - channel = acquire_channel(xdev, dir); - - if (channel < 0) { - ret = -EINVAL; - goto clear; - } - /* Now perform DMA */ - ret = xdma_migrate_bo(xdev, unmgd.sgt, (dir == DRM_XOCL_SYNC_BO_TO_DEVICE), args->paddr, channel); - if (ret >= 0) { - xdev->channel_usage[dir][channel] += ret; - ret = (ret == args->size) ? 0 : -EIO; - } - release_channel(xdev, dir, channel); - DRM_DEBUG("%s:%llx\n", __func__, xdev->channel_usage[dir][channel]); -clear: - xocl_finish_unmgd(&unmgd); - return ret; -} - -int xocl_usage_stat_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_usage_stat *args = data; - args->mm_channel_count = xocl_ddr_channel_count(dev); - if (args->mm_channel_count > 8) - args->mm_channel_count = 8; - memcpy(args->mm, xdev->mm_usage_stat, sizeof(struct drm_xocl_mm_stat) * args->mm_channel_count); - args->dma_channel_count = xdev->channel; - if (args->dma_channel_count > 8) - args->dma_channel_count = 8; - memcpy(args->h2c, xdev->channel_usage[DRM_XOCL_SYNC_BO_TO_DEVICE], sizeof(unsigned long long) * args->dma_channel_count); - memcpy(args->c2h, xdev->channel_usage[DRM_XOCL_SYNC_BO_FROM_DEVICE], sizeof(unsigned long long) * args->dma_channel_count); - DRM_INFO("%s h2c[0] 0%llx\n", __func__, args->h2c[0]); - DRM_INFO("%s c2h[0] 0%llx\n", __func__, args->c2h[0]); - DRM_INFO("%s\n", __func__); - return 0; -} - - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/xocl_ctx.c b/sdk/linux_kernel_drivers/xocl/xocl_ctx.c deleted file mode 100644 index 24af6f2f..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_ctx.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2017-2018 Xilinx, Inc - * - * Authors: - * Sonal Santan - * - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include "xocl_drv.h" -#include "xocl_ioctl.h" -#include "xocl_xdma.h" - - -void xocl_track_ctx(struct drm_xocl_dev *xdev, struct drm_xocl_client_ctx *fpriv) -{ - unsigned long flags; - - spin_lock_irqsave(&xdev->exec.ctx_list_lock, flags); - list_add_tail(&fpriv->link, &xdev->exec.ctx_list); - spin_unlock_irqrestore(&xdev->exec.ctx_list_lock, flags); -} - -void xocl_untrack_ctx(struct drm_xocl_dev *xdev, struct drm_xocl_client_ctx *fpriv) -{ - unsigned long flags; - - spin_lock_irqsave(&xdev->exec.ctx_list_lock, flags); - list_del(&fpriv->link); - spin_unlock_irqrestore(&xdev->exec.ctx_list_lock, flags); -} - diff --git a/sdk/linux_kernel_drivers/xocl/xocl_drv.c b/sdk/linux_kernel_drivers/xocl/xocl_drv.c deleted file mode 100644 index c97835b0..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_drv.c +++ /dev/null @@ -1,832 +0,0 @@ -/* - * Copyright (C) 2016-2018 Xilinx, Inc - * - * Authors: - * Sonal Santan - * Hem Neema - * - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#if LINUX_VERSION_CODE <= KERNEL_VERSION(3,0,0) -#include -#endif -#include -#include -#include -#include -#ifdef XOCL_CMA_ALLOC -#include -#endif -#include "xocl_drv.h" -#include "xocl_ioctl.h" -#include "xocl_xdma.h" -#include "xclbin.h" - -#define XOCL_DRIVER_NAME "xocl" -#define XOCL_DRIVER_DESC "Xilinx PCIe Accelerator Device Manager" -#define XOCL_DRIVER_DATE "20171111" -#define XOCL_DRIVER_MAJOR 2017 -#define XOCL_DRIVER_MINOR 4 -#define XOCL_DRIVER_PATCHLEVEL 5 - - -#define XOCL_DRIVER_VERSION \ - __stringify(XOCL_DRIVER_MAJOR) "." \ - __stringify(XOCL_DRIVER_MINOR) "." \ - __stringify(XOCL_DRIVER_PATCHLEVEL) - -#define XOCL_DRIVER_VERSION_NUMBER \ - ((XOCL_DRIVER_MAJOR)*1000 + (XOCL_DRIVER_MINOR)*100 + XOCL_DRIVER_PATCHLEVEL) - - -#define XOCL_FILE_PAGE_OFFSET 0x100000 - -#ifndef VM_RESERVED -#define VM_RESERVED (VM_DONTEXPAND | VM_DONTDUMP) -#endif - -static const struct pci_device_id pciidlist[] = { - { PCI_DEVICE(0x10ee, 0x4A48), }, - { PCI_DEVICE(0x10ee, 0x4A88), }, - { PCI_DEVICE(0x10ee, 0x4B48), }, - { PCI_DEVICE(0x10ee, 0x4B88), }, - { PCI_DEVICE(0x10ee, 0x6850), }, - { PCI_DEVICE(0x10ee, 0x6890), }, - { PCI_DEVICE(0x10ee, 0x6950), }, - { PCI_DEVICE(0x10ee, 0x6990), }, - { PCI_DEVICE(0x10ee, 0x6A50), }, - { PCI_DEVICE(0x10ee, 0x6A90), }, - { PCI_DEVICE(0x10ee, 0x6E50), }, - { PCI_DEVICE(0x10ee, 0x6B10), }, - { PCI_DEVICE(0x1d0f, 0xf010), }, // shell 1.4 - { 0, } -}; - -MODULE_DEVICE_TABLE(pci, pciidlist); - -static struct cma *xocl_cma = NULL; - -static void xocl_print_dev_info(const struct drm_xocl_dev *xdev) -{ - DRM_INFO("%s [Timestamp 0x%llx]\n", xdev->header.VBNVName, xdev->header.TimeSinceEpoch); - DRM_INFO("%d bi-directional DMA channels\n", xdev->channel); - DRM_INFO("%d DDR channels, Total RAM = %dGB\n", xdev->header.DDRChannelCount, - xdev->header.DDRChannelSize * xdev->header.DDRChannelCount); - DRM_INFO("PCI Resource 0x%llx [Size 0x%llxKB]\n", xdev->res_start, xdev->res_len/1024); -} - -static int probe_feature_rom(struct drm_xocl_dev *xdev) -{ - u32 val; - unsigned short ddr = (xdev->ddev->pdev->subsystem_device >> 12) & 0x000f; - val = ioread32(xdev->user_bar + XOCL_FEATURE_ROM); - // Magic number check - if (val != 0x786e6c78) { - if (xdev->ddev->pdev->vendor == 0x1d0f && (xdev->ddev->pdev->device == 0x1042 || xdev->ddev->pdev->device == 0xf010)) { - printk(KERN_INFO "XOCL: Found AWS VU9P Device without featureROM\n"); - //This is AWS device. Fill the FeatureROM struct. Right now it doesn't have FeatureROM - memset(xdev->header.EntryPointString, 0, sizeof(xdev->header.EntryPointString)); - strncpy(xdev->header.EntryPointString, "xlnx", 4); - memset(xdev->header.FPGAPartName, 0, sizeof(xdev->header.FPGAPartName)); - strncpy(xdev->header.FPGAPartName, "AWS VU9P", 8); - memset(xdev->header.VBNVName, 0, sizeof(xdev->header.VBNVName)); - strncpy(xdev->header.VBNVName, "xilinx_aws-vu9p-f1_dynamic_5_0", 35); - xdev->header.MajorVersion = 4; - xdev->header.MinorVersion = 0; - xdev->header.VivadoBuildID = 0xabcd; - xdev->header.IPBuildID = 0xabcd; - xdev->header.TimeSinceEpoch = 0xabcd; - xdev->header.DDRChannelCount = 4; - xdev->header.DDRChannelSize = 16; - xdev->header.FeatureBitMap = 0x0; - printk(KERN_INFO "XOCL: Enabling AWS dynamic 5.0 DSA\n"); - xdev->header.FeatureBitMap = UNIFIED_PLATFORM; - xdev->unified = true; - } else { - DRM_ERROR("XOCL: Probe of Feature ROM failed\n"); - return -ENODEV; - } - } else { - printk(KERN_INFO "XOCL: Printing PCI VendorID: %llx\n", xdev->ddev->pdev->vendor); - printk(KERN_INFO "XOCL: Printing PCI DeviceID: %llx\n", xdev->ddev->pdev->device); - memcpy_fromio(&xdev->header, xdev->user_bar + XOCL_FEATURE_ROM, sizeof(struct FeatureRomHeader)); - // Sanity check - if (strstr(xdev->header.VBNVName, "-xare")) {//This is ARE device - xdev->header.DDRChannelCount = xdev->header.DDRChannelCount - 1; //ARE is mapped like another DDR inside FPGA; map_connects as M04_AXI - } - if (ddr != xdev->header.DDRChannelCount) { - DRM_ERROR("XOCL: Feature ROM DDR channel count not consistent\n"); - return -ENODEV; - } - - if(xdev->header.FeatureBitMap & UNIFIED_PLATFORM) { - xdev->unified = true; - } - } - - printk(KERN_INFO "XOCL: ROM magic : %s\n", xdev->header.EntryPointString); - printk(KERN_INFO "XOCL: VBNV: %s", xdev->header.VBNVName); - printk(KERN_INFO "XOCL: DDR channel count : %d\n", xdev->header.DDRChannelCount); - printk(KERN_INFO "XOCL: DDR channel size: %d GB\n", xdev->header.DDRChannelSize); - printk(KERN_INFO "XOCL: Major Version: %d \n", xdev->header.MajorVersion); - printk(KERN_INFO "XOCL: Minor Version: %d \n", xdev->header.MinorVersion); - printk(KERN_INFO "XOCL: IPBuildID: %u\n", xdev->header.IPBuildID); - printk(KERN_INFO "XOCL: TimeSinceEpoch: %llx\n", xdev->header.TimeSinceEpoch); - printk(KERN_INFO "XOCL: FeatureBitMap: %llx\n", xdev->header.FeatureBitMap); - -// if(xdev->header.MajorVersion >= 10) -// printk(KERN_INFO "Printing DRBaseAddress: %llx\n", xdev->header.DRBaseAddress); - return 0; -} - -static int xocl_drm_load(struct drm_device *ddev, unsigned long flags) -{ - struct drm_xocl_dev *xdev; - unsigned i; - int result = 0; - unsigned long long segment = 0; - unsigned short ddr = 0; - unsigned long long ddr_size = 0; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(4,4,0) - drm_dev_set_unique(ddev, dev_name(ddev->dev)); -#endif - - xdev = devm_kzalloc(ddev->dev, sizeof(*xdev), GFP_KERNEL); - if (!xdev) - return -ENOMEM; - xdev->ddev = ddev; - ddev->dev_private = xdev; - - xdev->res_start = pci_resource_start(xdev->ddev->pdev, 0); - xdev->res_len = pci_resource_end(xdev->ddev->pdev, 0) - xdev->res_start + 1; - - xdev->user_bar = pci_iomap(xdev->ddev->pdev, 0, xdev->res_len); - if (!xdev->user_bar) - return -EIO; - - result = probe_feature_rom(xdev); - if (result) - goto bar_cleanup; - - - if (xdev->unified) { - memset(&xdev->topology, 0, sizeof(struct drm_xocl_mem_topology)); - memset(&xdev->connectivity, 0, sizeof(struct drm_xocl_connectivity)); - memset(&xdev->layout, 0, sizeof(struct drm_xocl_layout)); - memset(&xdev->debug_layout, 0, sizeof(struct drm_xocl_debug_layout)); - } else { - printk(KERN_INFO "XOCL : non-unified ddr initialization.\n"); - ddr = xocl_ddr_channel_count(ddev); - ddr_size = xocl_ddr_channel_size(ddev); - - xdev->mm = devm_kzalloc(ddev->dev, sizeof(struct drm_mm) * ddr, GFP_KERNEL); - xdev->mm_usage_stat = devm_kzalloc(ddev->dev, sizeof(struct drm_xocl_mm_stat) * ddr, GFP_KERNEL); - if (!xdev->mm || !xdev->mm_usage_stat) { - result = -ENOMEM; - goto bar_cleanup; - } - - for (i = 0; i < ddr; i++) { - drm_mm_init(&xdev->mm[i], segment, ddr_size); - segment += ddr_size; - } - } - - mutex_init(&xdev->mm_lock); - // Now call XDMA core init - DRM_INFO("Enable XDMA core\n"); - result = xdma_init_glue(xdev); - if (result) { - DRM_ERROR("XDMA device initialization failed with err code: %d\n", result); - goto mm_cleanup; - } - - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - - sema_init(&xdev->channel_sem[0], xdev->channel); - sema_init(&xdev->channel_sem[1], xdev->channel); - /* Initialize bit mask to represent individual channels */ - xdev->channel_bitmap[0] = BIT(xdev->channel); - xdev->channel_bitmap[0]--; - xdev->channel_bitmap[1] = xdev->channel_bitmap[0]; - - xdev->channel_usage[0] = devm_kzalloc(ddev->dev, sizeof(unsigned long long) * xdev->channel, GFP_KERNEL); - xdev->channel_usage[1] = devm_kzalloc(ddev->dev, sizeof(unsigned long long) * xdev->channel, GFP_KERNEL); - - if (!xdev->channel_usage[0] || !xdev->channel_usage[1]) { - result = -ENOMEM; - goto xdma_cleanup; - } - - xdev->cma_blk = xocl_cma; - - mutex_init(&xdev->stat_lock); - xdev->offline = false; - xocl_print_dev_info(xdev); - - //Init xocl sysfs - xocl_fini_sysfs(&xdev->ddev->pdev->dev); - result = xocl_init_sysfs(&xdev->ddev->pdev->dev); - if (result) { - DRM_ERROR("failed to create sysds file for xocl: %d\n", result); - goto all_cleanup; - } - - xocl_init_exec(xdev); - xdev->xvc.bar = xdev->user_bar; -#ifdef XOCL_BUILTIN_XVC - xocl_xvc_device_init(&xdev->xvc, &xdev->ddev->pdev->dev); -#endif - return result; - -all_cleanup: - mutex_destroy(&xdev->stat_lock); -xdma_cleanup: - xdma_fini_glue(xdev); -mm_cleanup: - if (!xdev->unified) { - for (i = 0; i < ddr; i++) { - drm_mm_takedown(&xdev->mm[i]); - } - } - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); -bar_cleanup: - pci_iounmap(xdev->ddev->pdev, xdev->user_bar); - xdev->user_bar = NULL; - return result; -} - -static int xocl_drm_unload(struct drm_device *drm) -{ - int i = 0; - struct drm_xocl_dev *xdev = drm->dev_private; - const unsigned short ddr = xocl_ddr_channel_count(drm); - - xdev->offline = true; -#ifdef XOCL_BUILTIN_XVC - xocl_xvc_device_fini(&xdev->xvc); -#endif - xocl_fini_exec(xdev); - - if(xdev->unified) { - for (i = 0; i < ddr; i++) { - if(xdev->topology.m_data[i].m_used) - drm_mm_takedown(&xdev->mm[i]); - } - vfree(xdev->topology.m_data); - vfree(xdev->topology.topology); - memset(&xdev->topology, 0, sizeof(xdev->topology)); - vfree(xdev->connectivity.connections); - memset(&xdev->connectivity, 0, sizeof(xdev->connectivity)); - vfree(xdev->layout.layout); - memset(&xdev->layout, 0, sizeof(xdev->layout)); - vfree(xdev->debug_layout.layout); - memset(&xdev->debug_layout, 0, sizeof(xdev->debug_layout)); - } else { - for (i = 0; i < ddr; i++) { - drm_mm_takedown(&xdev->mm[i]); - } - } - - mutex_destroy(&xdev->stat_lock); - mutex_destroy(&xdev->mm_lock); - - pci_iounmap(xdev->ddev->pdev, xdev->user_bar); - xdma_fini_glue(xdev); - xocl_fini_sysfs(&xdev->ddev->pdev->dev); - dev_set_drvdata(&xdev->ddev->pdev->dev, NULL); - return 0; -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -static void xocl_drm_unload2(struct drm_device *drm) -{ - xocl_drm_unload(drm); -} -#endif - -static void xocl_free_object(struct drm_gem_object *obj) -{ - xocl_free_bo(obj); -} - -static int xocl_mmap(struct file *filp, struct vm_area_struct *vma) -{ - int ret; - struct drm_file *priv = filp->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_xocl_dev *xdev = dev->dev_private; - struct mm_struct *mm = current->mm; - unsigned long vsize; - - //DRM_DEBUG("mmap operation 0x%lx 0x%lx 0x%lx\n", vma->vm_start, vma->vm_end, vma->vm_pgoff); - /* If the page offset is > than 4G, then let GEM handle that and do what - * it thinks is best,we will only handle page offsets less than 4G. - */ - if (likely(vma->vm_pgoff >= XOCL_FILE_PAGE_OFFSET)) { - ret = drm_gem_mmap(filp, vma); - if (ret) - return ret; - /* Clear VM_PFNMAP flag set by drm_gem_mmap() - * we have "struct page" for all backing pages for bo - */ - vma->vm_flags &= ~VM_PFNMAP; - /* Clear VM_IO flag set by drm_gem_mmap() - * it prevents gdb from accessing mapped buffers - */ - vma->vm_flags &= ~VM_IO; - vma->vm_flags |= VM_MIXEDMAP; - vma->vm_flags |= mm->def_flags; - vma->vm_pgoff = 0; - - /* Override pgprot_writecombine() mapping setup by drm_gem_mmap() - * which results in very poor read performance - */ - if (vma->vm_flags & (VM_READ | VM_MAYREAD)) - vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - else - vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); - return ret; - } - - if (vma->vm_pgoff != 0) - return -EINVAL; - - vsize = vma->vm_end - vma->vm_start; - if (vsize > xdev->res_len) - return -EINVAL; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_IO; - vma->vm_flags |= VM_RESERVED; - - ret = io_remap_pfn_range(vma, vma->vm_start, - xdev->res_start >> PAGE_SHIFT, - vsize, vma->vm_page_prot); - DRM_INFO("io_remap_pfn_range ret code: %d", ret); - - return ret; - -} - -int xocl_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct drm_xocl_bo *xobj = to_xocl_bo(vma->vm_private_data); - loff_t num_pages; - unsigned int page_offset; - int ret = 0; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) - page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; -#else - page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT; -#endif - - if (!xobj->pages) - return VM_FAULT_SIGBUS; - - num_pages = DIV_ROUND_UP(xobj->base.size, PAGE_SIZE); - if (page_offset > num_pages) - return VM_FAULT_SIGBUS; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) - ret = vm_insert_page(vma, vmf->address, xobj->pages[page_offset]); -#else - ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address, xobj->pages[page_offset]); -#endif - switch (ret) { - case -EAGAIN: - case 0: - case -ERESTARTSYS: - return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - default: - return VM_FAULT_SIGBUS; - } -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -int xocl_gem_fault2(struct vm_fault *vmf) -{ - return xocl_gem_fault(vmf->vma, vmf); -} -#endif - -static int xocl_info_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - struct drm_xocl_info *obj = data; - struct drm_xocl_dev *xdev = dev->dev_private; - struct pci_dev *pdev = xdev->ddev->pdev; - printk(KERN_INFO "%s %s INFO IOCTL \n", DRV_NAME, __FUNCTION__); - - obj->vendor = pdev->vendor; - obj->device = pdev->device; - obj->subsystem_vendor = pdev->subsystem_vendor; - obj->subsystem_device = pdev->subsystem_device; - obj->driver_version = XOCL_DRIVER_VERSION_NUMBER; - obj->pci_slot = PCI_SLOT(pdev->devfn); - - printk(KERN_INFO "%s %s PCI Slot: %d \n", DRV_NAME, __FUNCTION__, obj->pci_slot); - return 0; -} - -static int xocl_client_open(struct drm_device *dev, struct drm_file *filp) -{ - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_client_ctx *fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); - if (!fpriv) - return -ENOMEM; - filp->driver_priv = fpriv; - mutex_init(&fpriv->lock); - atomic_set(&fpriv->trigger, 0); - xocl_track_ctx(xdev, fpriv); - DRM_INFO("Pid %d opened device\n", pid_nr(task_tgid(current))); - return 0; -} - -static void xocl_client_release(struct drm_device *dev, struct drm_file *filp) -{ - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_client_ctx *fpriv = filp->driver_priv; - int i; - unsigned bit; - - if (!fpriv) - return; - - xocl_untrack_ctx(xdev, fpriv); - if (!fpriv->eventfd_bitmap) - goto out; - - /* Clear all the eventfd structures */ - mutex_lock(&xdev->exec.user_msix_table_lock); - for (i = XOCL_USER_INTR_START; i < XOCL_USER_INTR_END; i++) { - bit = 1 << i; - if (!(fpriv->eventfd_bitmap & bit)) - continue; - xdma_user_interrupt_config(xdev, i, false); - eventfd_ctx_put(xdev->exec.user_msix_table[i]); - xdev->exec.user_msix_table[i] = NULL; - } - fpriv->eventfd_bitmap = 0; - mutex_unlock(&xdev->exec.user_msix_table_lock); -out: - mutex_destroy(&fpriv->lock); - kfree(fpriv); - filp->driver_priv = NULL; - DRM_INFO("Pid %d closed device\n", pid_nr(task_tgid(current))); -} - -static unsigned int xocl_poll(struct file *filp, poll_table *wait) -{ - int counter; - struct drm_file *priv = filp->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_client_ctx *fpriv = priv->driver_priv; - int ret = 0; - - BUG_ON(!fpriv); - poll_wait(filp, &xdev->exec.poll_wait_queue, wait); - /* - * Mutex lock protects from two threads from the same application - * calling poll concurrently using the same file handle - */ - mutex_lock(&fpriv->lock); - counter = atomic_read(&fpriv->trigger); - if (counter > 0) { - /* - * Use atomic here since the trigger may be incremented by interrupt - * handler running concurrently - */ - atomic_dec(&fpriv->trigger); - ret = POLLIN; - } - mutex_unlock(&fpriv->lock); - return ret; -} - -static const struct drm_ioctl_desc xocl_ioctls[] = { - DRM_IOCTL_DEF_DRV(XOCL_CREATE_BO, xocl_create_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_USERPTR_BO, xocl_userptr_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_MAP_BO, xocl_map_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_SYNC_BO, xocl_sync_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_INFO_BO, xocl_info_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_PWRITE_BO, xocl_pwrite_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_PREAD_BO, xocl_pread_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_CTX, xocl_ctx_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_INFO, xocl_info_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_READ_AXLF, xocl_read_axlf_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_PWRITE_UNMGD, xocl_pwrite_unmgd_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_PREAD_UNMGD, xocl_pread_unmgd_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_USAGE_STAT, xocl_usage_stat_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_USER_INTR, xocl_user_intr_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_EXECBUF, xocl_execbuf_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), -}; - -static const struct file_operations xocl_driver_fops = { - .owner = THIS_MODULE, - .open = drm_open, - .mmap = xocl_mmap, - .poll = xocl_poll, - .read = drm_read, - .unlocked_ioctl = drm_ioctl, - .release = drm_release, -}; - -static const struct vm_operations_struct xocl_vm_ops = { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) - .fault = xocl_gem_fault2, -#else - .fault = xocl_gem_fault, -#endif - .open = drm_gem_vm_open, - .close = drm_gem_vm_close, -}; - -static struct drm_driver xocl_drm_driver = { - .driver_features = DRIVER_GEM | DRIVER_PRIME | - DRIVER_RENDER, - .postclose = xocl_client_release, - .open = xocl_client_open, - .load = xocl_drm_load, -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) - .unload = xocl_drm_unload2, -#else - .unload = xocl_drm_unload, -#endif - .gem_free_object = xocl_free_object, - .gem_vm_ops = &xocl_vm_ops, - .prime_handle_to_fd = drm_gem_prime_handle_to_fd, - .prime_fd_to_handle = drm_gem_prime_fd_to_handle, - .gem_prime_import = drm_gem_prime_import, - .gem_prime_export = drm_gem_prime_export, - .gem_prime_get_sg_table = xocl_gem_prime_get_sg_table, - .gem_prime_import_sg_table = xocl_gem_prime_import_sg_table, - .gem_prime_vmap = xocl_gem_prime_vmap, - .gem_prime_vunmap = xocl_gem_prime_vunmap, - .ioctls = xocl_ioctls, - .num_ioctls = ARRAY_SIZE(xocl_ioctls), - .fops = &xocl_driver_fops, -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0) - .set_busid = drm_pci_set_busid, -#endif - .name = XOCL_DRIVER_NAME, - .desc = XOCL_DRIVER_DESC, - .date = XOCL_DRIVER_DATE, - .major = XOCL_DRIVER_MAJOR, - .minor = XOCL_DRIVER_MINOR, - .patchlevel = XOCL_DRIVER_PATCHLEVEL, -}; - -// TODO: Umang remove the additional DRM_INFO's once this driver has been -// in production for some time. 07/06/2017. -static int xocl_driver_load(struct pci_dev *pdev, - const struct pci_device_id *ent) -{ - struct drm_device *dev; - int ret; - - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - - dev = drm_dev_alloc(&xocl_drm_driver, &pdev->dev); - if (IS_ERR(dev)) - return PTR_ERR(dev); - - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - ret = pci_enable_device(pdev); - if (ret) - goto err_free; - - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - dev->pdev = pdev; - pci_set_drvdata(pdev, dev); - - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - ret = drm_dev_register(dev, ent->driver_data); - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - if (ret) { - goto err_reg; - } - - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - return 0; - -err_reg: - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - pci_disable_device(pdev); -err_free: - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - drm_dev_unref(dev); - return ret; - -} - -static int xocl_pci_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) -{ - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - return xocl_driver_load(pdev, ent); -} - -static void xocl_pci_remove(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - pci_disable_device(pdev); - drm_put_dev(dev); -} - -static pci_ers_result_t xocl_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - struct xdma_pci_dev *xpdev = dev_get_drvdata(&pdev->dev); - - switch (state) { - case pci_channel_io_normal: - return PCI_ERS_RESULT_CAN_RECOVER; - case pci_channel_io_frozen: - DRM_INFO("dev 0x%p,0x%p, frozen state error, reset controller\n", - pdev, xpdev); - //xdma_dev_disable(xpdev, false); - return PCI_ERS_RESULT_NEED_RESET; - case pci_channel_io_perm_failure: - DRM_INFO("dev 0x%p,0x%p, failure state error, req. disconnect\n", - pdev, xpdev); - return PCI_ERS_RESULT_DISCONNECT; - } - return PCI_ERS_RESULT_NEED_RESET; -} - -static pci_ers_result_t xocl_slot_reset(struct pci_dev *pdev) -{ - struct drm_device *ddev = pci_get_drvdata(pdev); - - DRM_INFO("0x%p restart after slot reset\n", ddev->dev_private); - pci_restore_state(pdev); - //queue_work(xdma_workq, &dev->reset_work); - return PCI_ERS_RESULT_RECOVERED; -} - -static void xocl_error_resume(struct pci_dev *pdev) -{ - struct drm_device *ddev = pci_get_drvdata(pdev); - - DRM_INFO("dev 0x%p,0x%p.\n", pdev, ddev->dev_private); - pci_cleanup_aer_uncorrect_error_status(pdev); -} - -void xocl_reset_notify(struct pci_dev *pdev, bool prepare) -{ - struct drm_device *ddev = dev_get_drvdata(&pdev->dev); - struct drm_xocl_dev *xdev; - - if(ddev) { - xdev = ddev->dev_private; - } - else { - DRM_ERROR("%s: %s ddev is null", DRV_NAME, __FUNCTION__); - return; - } - - if(xdev) - DRM_INFO("%s: %s dev 0x%p,0x%p, prepare %d.\n", DRV_NAME, __FUNCTION__, - pdev, ddev->dev_private, prepare); - else { - DRM_ERROR("%s: %s xdev is null", DRV_NAME, __FUNCTION__); - return; - } - - if (prepare) { - xdev->offline = true; - xdma_device_offline(pdev, xdev->xdma_handle); - } - else { - xdma_device_online(pdev, xdev->xdma_handle); - xdev->offline = false; - } -} -EXPORT_SYMBOL_GPL(xocl_reset_notify); - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -static void xocl_reset_prepare(struct pci_dev *pdev) -{ - xocl_reset_notify(pdev, true); -} - -static void xocl_reset_done(struct pci_dev *pdev) -{ - xocl_reset_notify(pdev, false); -} -#endif - -static const struct pci_error_handlers xocl_err_handler = { - .error_detected = xocl_error_detected, - .slot_reset = xocl_slot_reset, - .resume = xocl_error_resume, -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) - .reset_prepare = xocl_reset_prepare, - .reset_done = xocl_reset_done, -#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) - .reset_notify = xocl_reset_notify, -#endif -}; - - -static struct pci_driver xocl_pci_driver = { - .name = XOCL_DRIVER_NAME, - .id_table = pciidlist, - .probe = xocl_pci_probe, - .remove = xocl_pci_remove, - .err_handler = &xocl_err_handler, -}; - -/* init xilinx opencl drm platform */ -static int __init xocl_init(void) -{ - int result; -#ifdef XOCL_BUILTIN_XVC - result = xocl_xvc_chardev_init(); - if (result) { - DRM_ERROR("XVC registration failed with error code: %d\n", result); - return result; - } -#endif - result = pci_register_driver(&xocl_pci_driver); - if (result) { - DRM_ERROR("PCIe registration failed with error code: %d\n", result); - goto unregister_xvc; - } - -#ifdef XOCL_CMA_ALLOC - result = cma_init_reserved_mem(XOCL_CMA_BASE, XOCL_CMA_SIZE, 0, &xocl_cma); - if (result) { - DRM_ERROR("CMA region allocation for PCI Slave failed with error code: %d\n", result); - goto unregister_pci; - } -#endif - return 0; - -unregister_pci: - pci_unregister_driver(&xocl_pci_driver); - -unregister_xvc: -#ifdef XOCL_BUILTIN_XVC - xocl_xvc_chardev_exit(); -#endif - return result; -} - -static void __exit xocl_exit(void) -{ - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - pci_unregister_driver(&xocl_pci_driver); -#ifdef XOCL_BUILTIN_XVC - xocl_xvc_chardev_exit(); -#endif -} - -module_init(xocl_init); -module_exit(xocl_exit); - - -MODULE_VERSION(__stringify(XOCL_DRIVER_MAJOR) "." - __stringify(XOCL_DRIVER_MINOR) "." - __stringify(XOCL_DRIVER_PATCHLEVEL)); - -MODULE_DESCRIPTION(XOCL_DRIVER_DESC); -MODULE_AUTHOR("Sonal Santan "); -MODULE_LICENSE("GPL"); - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/xocl_drv.h b/sdk/linux_kernel_drivers/xocl/xocl_drv.h deleted file mode 100644 index 20c34c1e..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_drv.h +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright (C) 2016-2018 Xilinx, Inc - * - * Authors: - * Sonal Santan - * - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _XCL_XOCL_DRV_H_ -#define _XCL_XOCL_DRV_H_ - -#include -#if LINUX_VERSION_CODE <= KERNEL_VERSION(3,0,0) -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include "xclfeatures.h" -#include "xclbin.h" -#include "xocl_ioctl.h" -#include "xocl_exec.h" -#include "xocl_xvc.h" -#include "libxdma.h" - -#define DRV_NAME "xocl" - -// For CMA kernel command line should be cma=nn[MG]@[start[MG] - -#define XOCL_BO_USERPTR (1 << 31) -#define XOCL_BO_IMPORT (1 << 30) -#define XOCL_BO_EXECBUF (1 << 29) -#define XOCL_BO_CMA (1 << 28) -#define XOCL_BO_DDR0 (1 << 0) -#define XOCL_BO_DDR1 (1 << 1) -#define XOCL_BO_DDR2 (1 << 2) -#define XOCL_BO_DDR3 (1 << 3) -#define XOCL_BO_ARE (1 << 4) //When the BO is imported from an ARE device. This is remote BO to be accessed over ARE - -#define XOCL_CHANNEL_COUNT 4 -#define XOCL_RD_MTX 0 -#define XOCL_WR_MTX 1 - -#define XOCL_CMA_BASE 0x200000000 // (8 GB) -#define XOCL_CMA_SIZE 0x020000000 // (512 MB) -#define XOCL_CMA_NAME "PCISlave" - -#define XOCL_ARE_HOP 0x400000000ull - -#define XOCL_FEATURE_ROM 0x0B0000 -#define XOCL_SCHD_HW 0x180000 -#define XOCL_SCHD_CMD_QUEUE 0x190000 -#define XOCL_SCHD_CMD_STATUS 0x190000 - -struct cma; - -struct drm_xocl_exec_metadata { - enum drm_xocl_execbuf_state state; - unsigned int index; -}; - -struct drm_xocl_bo { - /* drm base object */ - struct drm_gem_object base; - struct drm_mm_node *mm_node; - struct drm_xocl_exec_metadata metadata; - struct page **pages; - struct sg_table *sgt; - void *vmapping; - unsigned flags; -}; - -struct drm_xocl_unmgd { - struct page **pages; - struct sg_table *sgt; - unsigned int npages; - unsigned flags; -}; - -struct drm_xocl_mem_topology { - //TODO : check the first 4 entries - remove unneccessary ones. - int32_t bank_count; - struct mem_data* m_data; - u32 m_data_length; //length of the mem_data section. - uint64_t bank_size; //in KB. Currently only fixed sizes are supported. - uint64_t size; - struct mem_topology *topology; -}; - -struct drm_xocl_connectivity { - uint64_t size; - struct connectivity *connections; -}; - -struct drm_xocl_layout { - uint64_t size; - struct ip_layout *layout; -}; - -struct drm_xocl_debug_layout { - uint64_t size; - struct debug_ip_layout *layout; -}; - -struct drm_xocl_dev { - struct drm_device *ddev; - /* The feature Rom header */ - struct FeatureRomHeader header; - /* Number of bidirectional channels */ - unsigned channel; - /* Memory manager array, one per DDR channel */ - struct drm_mm *mm; - /* Memory manager lock */ - struct mutex mm_lock; - /* Semaphore, one for each direction */ - struct semaphore channel_sem[2]; - /* Channel usage bitmasks, one for each direction - * bit 1 indicates channel is free, bit 0 indicates channel is free - */ - volatile unsigned long channel_bitmap[2]; - unsigned long long *channel_usage[2]; - struct drm_xocl_mm_stat *mm_usage_stat; - struct xdma_dev *xdma_handle; - struct cma *cma_blk; - bool offline; - /* Lock for stats */ - struct mutex stat_lock; - void *__iomem user_bar; - phys_addr_t res_start; - resource_size_t res_len; - bool unified; //unified platform, populated from FeatureROM, - u64 unique_id_last_bitstream; - struct xocl_xvc xvc; - struct drm_xocl_exec_core exec; - struct drm_xocl_mem_topology topology; - struct drm_xocl_layout layout; - struct drm_xocl_debug_layout debug_layout; - struct drm_xocl_connectivity connectivity; -}; - -static inline struct drm_gem_object *xocl_gem_object_lookup(struct drm_device *dev, - struct drm_file *filp, - u32 handle) -{ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,7,0) - return drm_gem_object_lookup(filp, handle); -#elif defined(RHEL_RELEASE_CODE) -#if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,4) - return drm_gem_object_lookup(filp, handle); -#else - return drm_gem_object_lookup(dev, filp, handle); -#endif -#else - return drm_gem_object_lookup(dev, filp, handle); -#endif -} - -static inline struct drm_xocl_bo *to_xocl_bo(struct drm_gem_object *bo) -{ - return (struct drm_xocl_bo *)bo; -} - -static inline struct drm_xocl_dev *bo_xocl_dev(const struct drm_xocl_bo *bo) -{ - return bo->base.dev->dev_private; -} - -static inline unsigned xocl_bo_ddr_idx(unsigned flags) -{ - const unsigned ddr = flags & 0xf; - if (!ddr) - return 0xffffffff; - return __builtin_ctz(ddr); -} - -static inline unsigned short xocl_ddr_channel_count(const struct drm_device *drm) -{ - struct drm_xocl_dev *xdev = drm->dev_private; - struct drm_xocl_mem_topology *topology; - if(!xdev->unified) - return xdev->header.DDRChannelCount; - topology = &xdev->topology; - return topology->bank_count; -} - -static inline unsigned long long xocl_ddr_channel_size(const struct drm_device *drm) -{ - struct drm_xocl_dev *xdev = drm->dev_private; - struct drm_xocl_mem_topology *topology; - - if(!xdev->unified) { - /* Channel size is in GB */ - return xdev->header.DDRChannelSize * 0x40000000ull; - } - topology = &xdev->topology; - return topology->bank_size; -} - -static inline bool xocl_bo_userptr(const struct drm_xocl_bo *bo) -{ - return (bo->flags & XOCL_BO_USERPTR); -} - -static inline bool xocl_bo_import(const struct drm_xocl_bo *bo) -{ - return (bo->flags & XOCL_BO_IMPORT); -} - -static inline bool xocl_bo_execbuf(const struct drm_xocl_bo *bo) -{ - return (bo->flags & XOCL_BO_EXECBUF); -} - -static inline bool xocl_bo_cma(const struct drm_xocl_bo *bo) -{ - return (bo->flags & XOCL_BO_CMA); -} - -int xocl_create_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_userptr_bo_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp); -int xocl_sync_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_map_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_info_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_pwrite_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_pread_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_ctx_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_pwrite_unmgd_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_pread_unmgd_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_usage_stat_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_read_axlf_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); - - -void xocl_describe(const struct drm_xocl_bo *obj); - -void xocl_free_bo(struct drm_gem_object *obj); - -int xocl_migrate_bo(struct drm_device *ddev, const struct drm_xocl_bo *xobj, - enum drm_xocl_sync_bo_dir dir); - -int xocl_user_event(int irq, struct drm_xocl_dev *xdev); - -/** - * DMA-BUF support - */ -struct drm_gem_object *xocl_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, struct sg_table *sgt); - -struct sg_table *xocl_gem_prime_get_sg_table(struct drm_gem_object *obj); - -void *xocl_gem_prime_vmap(struct drm_gem_object *obj); - -void xocl_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); - -/** - * Sysfs related functions - */ -int xocl_init_sysfs(struct device *dev); -void xocl_fini_sysfs(struct device *dev); - -/** - * DEBUG and EXEC support - */ - -int xocl_debug_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_execbuf_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); - -int xocl_user_intr_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); - -#endif - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/xocl_exec.c b/sdk/linux_kernel_drivers/xocl/xocl_exec.c deleted file mode 100644 index c9e7865a..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_exec.c +++ /dev/null @@ -1,1426 +0,0 @@ -/* - * Copyright (C) 2017-2018 Xilinx, Inc - * - * Authors: - * Soren Soe - * - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ -#include -#include -#include -#include -#include "ert.h" -#include "xocl_drv.h" -#include "xocl_exec.h" -#include "xocl_xdma.h" - -//#define SCHED_VERBOSE -#define SCHED_THREAD_ENABLE - -#if 0 -static unsigned long zero = 0; -static unsigned long time_ns(void) -{ - struct timeval now; - do_gettimeofday(&now); - if (!zero) - zero = timeval_to_ns(&now); - return timeval_to_ns(&now) - zero; -} -#endif - -#define sched_error_on(xdev,expr,msg) \ -({ \ - unsigned int ret = 0; \ - if ((expr)) { \ - DRM_INFO("Assertion failed: %s:%d:%s:%s %s\n" \ - ,__FILE__,__LINE__,__FUNCTION__,#expr,msg); \ - xdev->exec.scheduler->error=1; \ - ret = 1; \ - } \ - (ret); \ -}) - - -#ifdef SCHED_VERBOSE -# define SCHED_DEBUG(msg) printk(msg) -# define SCHED_DEBUGF(format,...) printk(format, ##__VA_ARGS__) -#else -# define SCHED_DEBUG(msg) -# define SCHED_DEBUGF(format,...) -#endif - -#define XOCL_U32_MASK 0xFFFFFFFF - -/** - * struct xocl_sched: scheduler for xocl_cmd objects - * - * @scheduler_thread: thread associated with this scheduler - * @use_count: use count for this scheduler - * @wait_queue: conditional wait queue for scheduler thread - * @error: set to 1 to indicate scheduler error - * @command_queue: list of command objects managed by scheduler - * @intc: boolean flag set when there is a pending interrupt for command completion - * @poll: number of running commands in polling mode - */ -struct xocl_sched -{ - struct task_struct *scheduler_thread; - unsigned int use_count; - - wait_queue_head_t wait_queue; - unsigned int error; - - struct list_head command_queue; - atomic_t intc; /* pending interrupt */ - atomic_t poll; /* number of cmds to poll */ -}; - -static struct xocl_sched global_scheduler0; - -/** - * Command data used by scheduler - * - * @list: command object moves from list to list - * @bo: underlying drm buffer object - * @xdev: device handle - * @xs: scehduler processing this commands - * @state: state of command object per scheduling - * @cu_idx: index of CU executing this cmd object; used in penguin mode only - * @slot_idx: command queue index of this command object - * @packet: mapped ert packet object from user space - */ -struct xocl_cmd -{ - struct list_head list; - struct drm_xocl_bo *bo; - struct drm_xocl_dev *xdev; - struct xocl_sched *xs; - enum ert_cmd_state state; - int cu_idx; - int slot_idx; - - struct ert_packet *packet; -}; - -/** - * set_cmd_int_state() - Set internal command state used by scheduler only - * - * @xcmd: command to change internal state on - * @state: new command state per ert.h - */ -inline void -set_cmd_int_state(struct xocl_cmd* xcmd, enum ert_cmd_state state) -{ - SCHED_DEBUGF("->set_cmd_int_state(,%d)\n",state); - xcmd->state = state; - SCHED_DEBUG("<-set_cmd_int_state\n"); -} - -/** - * set_cmd_state() - Set both internal and external state of a command - * - * The state is reflected externally through the command packet - * as well as being captured in internal state variable - * - * @xcmd: command object - * @state: new state - */ -inline void -set_cmd_state(struct xocl_cmd* xcmd, enum ert_cmd_state state) -{ - SCHED_DEBUGF("->set_cmd_state(,%d)\n",state); - xcmd->state = state; - xcmd->packet->state = state; - SCHED_DEBUG("<-set_cmd_state\n"); -} - -/** - * List of free xocl_cmd objects. - * - * @free_cmds: populated with recycled xocl_cmd objects - * @cmd_mutex: mutex lock for cmd_list - * - * Command objects are recycled for later use and only freed when kernel - * module is unloaded. - */ -static LIST_HEAD(free_cmds); -static DEFINE_MUTEX(free_cmds_mutex); - -/** - * List of new pending xocl_cmd objects - * - * @pending_cmds: populated from user space with new commands for buffer objects - * @num_pending: number of pending commands - * - * Scheduler copies pending commands to its private queue when necessary - */ -static LIST_HEAD(pending_cmds); -static DEFINE_MUTEX(pending_cmds_mutex); -static atomic_t num_pending = ATOMIC_INIT(0); - -/** - * get_free_xocl_cmd() - Get a free command object - * - * Get from free/recycled list or allocate a new command if necessary. - * - * Return: Free command object - */ -static struct xocl_cmd* -get_free_xocl_cmd(void) -{ - struct xocl_cmd* cmd; - SCHED_DEBUG("-> get_free_xocl_cmd\n"); - mutex_lock(&free_cmds_mutex); - cmd=list_first_entry_or_null(&free_cmds,struct xocl_cmd,list); - if (cmd) - list_del(&cmd->list); - mutex_unlock(&free_cmds_mutex); - if (!cmd) - cmd = kmalloc(sizeof(struct xocl_cmd),GFP_KERNEL); - if (!cmd) - return ERR_PTR(-ENOMEM); - SCHED_DEBUGF("<- get_free_xocl_cmd %p\n",cmd); - return cmd; -} - -/** - * add_cmd() - Add a new command to pending list - * - * @xdev: device owning adding the buffer object - * @bo: buffer objects from user space from which new command is created - * - * Scheduler copies pending commands to its internal command queue. - * - * Return: 0 on success, -errno on failure - */ -static int -add_cmd(struct drm_xocl_dev *xdev, struct drm_xocl_bo* bo) -{ - struct xocl_cmd *xcmd = get_free_xocl_cmd(); - SCHED_DEBUG("-> add_cmd\n"); - xcmd->bo=bo; - xcmd->xdev=xdev; - xcmd->cu_idx=-1; - xcmd->slot_idx=-1; - xcmd->packet = (struct ert_packet*)bo->vmapping; - xcmd->xs = xdev->exec.scheduler; - set_cmd_state(xcmd,ERT_CMD_STATE_NEW); - mutex_lock(&pending_cmds_mutex); - list_add_tail(&xcmd->list,&pending_cmds); - mutex_unlock(&pending_cmds_mutex); - - /* wake scheduler */ - atomic_inc(&num_pending); - wake_up_interruptible(&xcmd->xs->wait_queue); - - SCHED_DEBUG("<- add_cmd\n"); - return 0; -} - -/** - * recycle_cmd() - recycle a command objects - * - * @xcmd: command object to recycle - * - * Command object is added to the freelist - * - * Return: 0 - */ -static int -recycle_cmd(struct xocl_cmd* xcmd) -{ - SCHED_DEBUGF("recycle %p\n",xcmd); - mutex_lock(&free_cmds_mutex); - list_del(&xcmd->list); - list_add_tail(&xcmd->list,&free_cmds); - mutex_unlock(&free_cmds_mutex); - return 0; -} - -/** - * delete_cmd_list() - reclaim memory for all allocated command objects - */ -static void -delete_cmd_list(void) -{ - struct xocl_cmd *xcmd; - struct list_head *pos, *next; - - mutex_lock(&free_cmds_mutex); - list_for_each_safe(pos, next, &free_cmds) { - xcmd = list_entry(pos, struct xocl_cmd, list); - list_del(pos); - kfree(xcmd); - } - mutex_unlock(&free_cmds_mutex); -} - - - -/** - * struct xocl_sched_ops: scheduler specific operations - * - * Scheduler can operate in MicroBlaze mode (mb/ert) or in penguin mode. This - * struct differentiates specific operations. The struct is per device node, - * meaning that one device can operate in ert mode while another can operate in - * penguin mode. - */ -struct xocl_sched_ops -{ - int (*submit) (struct xocl_cmd *xcmd); - void (*query) (struct xocl_cmd *xcmd); -}; - -static struct xocl_sched_ops mb_ops; -static struct xocl_sched_ops penguin_ops; - -/** - * is_ert() - Check if running in embedded (ert) mode. - * - * Return: %true of ert mode, %false otherwise - */ -inline unsigned int -is_ert(struct drm_xocl_dev *xdev) -{ - return xdev->exec.ops == &mb_ops; -} - -/** - * ffs_or_neg_one() - Find first set bit in a 32 bit mask. - * - * @mask: mask to check - * - * First LSBit is at position 0. - * - * Return: Position of first set bit, or -1 if none - */ -inline int -ffs_or_neg_one(u32 mask) -{ - if (!mask) - return -1; - return ffs(mask)-1; -} - -/** - * ffz_or_neg_one() - First first zero bit in bit mask - * - * @mask: mask to check - * Return: Position of first zero bit, or -1 if none - */ -inline int -ffz_or_neg_one(u32 mask) -{ - if (mask==XOCL_U32_MASK) - return -1; - return ffz(mask); -} - - -/** - * slot_size() - slot size per device configuration - * - * Return: Command queue slot size - */ -inline unsigned int -slot_size(struct drm_xocl_dev *xdev) -{ - return ERT_CQ_SIZE / xdev->exec.num_slots; -} - -/** - * cu_mask_idx() - CU mask index for a given cu index - * - * @cu_idx: Global [0..127] index of a CU - * Return: Index of the CU mask containing the CU with cu_idx - */ -inline unsigned int -cu_mask_idx(unsigned int cu_idx) -{ - return cu_idx >> 5; /* 32 cus per mask */ -} - -/** - * cu_idx_in_mask() - CU idx within its mask - * - * @cu_idx: Global [0..127] index of a CU - * Return: Index of the CU within the mask that contains it - */ -inline unsigned int -cu_idx_in_mask(unsigned int cu_idx) -{ - return cu_idx - (cu_mask_idx(cu_idx) << 5); -} - -/** - * cu_idx_from_mask() - Given CU idx within a mask return its global idx [0..127] - * - * @cu_idx: Index of CU with mask identified by mask_idx - * @mask_idx: Mask index of the has CU with cu_idx - * Return: Global cu_idx [0..127] - */ -inline unsigned int -cu_idx_from_mask(unsigned int cu_idx, unsigned int mask_idx) -{ - return cu_idx + (mask_idx << 5); -} - -/** - * slot_mask_idx() - Slot mask idx index for a given slot_idx - * - * @slot_idx: Global [0..127] index of a CQ slot - * Return: Index of the slot mask containing the slot_idx - */ -inline unsigned int -slot_mask_idx(unsigned int slot_idx) -{ - return slot_idx >> 5; -} - -/** - * slot_idx_in_mask() - Index of command queue slot within the mask that contains it - * - * @slot_idx: Global [0..127] index of a CQ slot - * Return: Index of slot within the mask that contains it - */ -inline unsigned int -slot_idx_in_mask(unsigned int slot_idx) -{ - return slot_idx - (slot_mask_idx(slot_idx) << 5); -} - -/** - * slot_idx_from_mask_idx() - Given slot idx within a mask, return its global idx [0..127] - * - * @slot_idx: Index of slot with mask identified by mask_idx - * @mask_idx: Mask index of the mask hat has slot with slot_idx - * Return: Global slot_idx [0..127] - */ -inline unsigned int -slot_idx_from_mask_idx(unsigned int slot_idx,unsigned int mask_idx) -{ - return slot_idx + (mask_idx << 5); -} - -/** - * opcode() - Command opcode - * - * @cmd: Command object - * Return: Opcode per command packet - */ -inline u32 -opcode(struct xocl_cmd* xcmd) -{ - return xcmd->packet->opcode; -} - -/** - * payload_size() - Command payload size - * - * @xcmd: Command object - * Return: Size in number of words of command packet payload - */ -inline u32 -payload_size(struct xocl_cmd *xcmd) -{ - return xcmd->packet->count; -} - -/** - * packet_size() - Command packet size - * - * @xcmd: Command object - * Return: Size in number of words of command packet - */ -inline u32 -packet_size(struct xocl_cmd *xcmd) -{ - return payload_size(xcmd) + 1; -} - -/** - * cu_masks() - Number of command packet cu_masks - * - * @xcmd: Command object - * Return: Total number of CU masks in command packet - */ -inline u32 -cu_masks(struct xocl_cmd *xcmd) -{ - struct ert_start_kernel_cmd *sk; - if (opcode(xcmd)!=ERT_START_KERNEL) - return 0; - sk = (struct ert_start_kernel_cmd *)xcmd->packet; - return 1 + sk->extra_cu_masks; -} - -/** - * regmap_size() - Size of regmap is payload size (n) minus the number of cu_masks - * - * @xcmd: Command object - * Return: Size of register map in number of words - */ -inline u32 -regmap_size(struct xocl_cmd* xcmd) -{ - return payload_size(xcmd) - cu_masks(xcmd); -} - -/** - * cu_idx_to_addr() - Convert CU idx into it relative bar address. - * - * @xdev: Device handle - * @cu_idx: Global CU idx - * Return: Address of CU relative to bar - */ -inline u32 -cu_idx_to_addr(struct drm_xocl_dev *xdev,unsigned int cu_idx) -{ - return (cu_idx << xdev->exec.cu_shift_offset) + xdev->exec.cu_base_addr; -} - -/** - * cu_idx_to_bitmask() - Compute the cu bitmask for cu_idx - * - * Subtract 32 * lower bitmasks prior to bitmask repsenting - * this index. For example, f.x cu_idx=67 - * 1 << (67 - (67>>5)<<5) = - * 1 << (67 - (2<<5)) = - * 1 << (67 - 64) = - * 1 << 3 = - * 0b1000 for position 4 in third bitmask - * - * @xdev: Device handle - * @cu_idx: Global index [0..127] of CU - * - * This function computes the bitmask for cu_idx in the mask that stores cu_idx - * - * Return: Bitmask with bit set for corresponding CU - */ -inline u32 -cu_idx_to_bitmask(struct drm_xocl_dev *xdev, u32 cu_idx) -{ - return 1 << (cu_idx - (cu_mask_idx(cu_idx)<<5)); -} - - -/** - * configure() - Configure the scheduler - * - * Process the configure command sent from user space. Only one process can - * configure the scheduler, so if scheduler is already configured, the - * function should verify that another process doesn't expect different - * configuration. - * - * Future may need ability to query current configuration so as to keep - * multiple processes in sync. - * - * Return: 0 on success, 1 on failure - */ -static int -configure(struct xocl_cmd *xcmd) -{ - struct drm_xocl_dev *xdev=xcmd->xdev; - struct ert_configure_cmd *cfg; - - if (sched_error_on(xdev,opcode(xcmd)!=ERT_CONFIGURE,"expected configure command")) - return 1; - - cfg = (struct ert_configure_cmd *)(xcmd->packet); - - if (xdev->exec.configured==0) { - SCHED_DEBUG("configuring scheduler\n"); - xdev->exec.num_slots = ERT_CQ_SIZE / cfg->slot_size; - xdev->exec.num_cus = cfg->num_cus; - xdev->exec.cu_shift_offset = cfg->cu_shift; - xdev->exec.cu_base_addr = cfg->cu_base_addr; - xdev->exec.num_cu_masks = ((xdev->exec.num_cus-1)>>5) + 1; - - if (cfg->ert) { - SCHED_DEBUG("++ configuring embedded scheduler mode\n"); - xdev->exec.ops = &mb_ops; - xdev->exec.polling_mode = cfg->polling; - xdev->exec.cq_interrupt = cfg->cq_int; - } - else { - SCHED_DEBUG("++ configuring penguin scheduler mode\n"); - xdev->exec.ops = &penguin_ops; - xdev->exec.polling_mode = 1; - } - - DRM_INFO("scheduler config ert(%d) slots(%d), cus(%d), cu_shift(%d), cu_base(0x%x), cu_masks(%d)\n" - ,is_ert(xdev) - ,xdev->exec.num_slots - ,xdev->exec.num_cus - ,xdev->exec.cu_shift_offset - ,xdev->exec.cu_base_addr - ,xdev->exec.num_cu_masks); - - return 0; - } - - DRM_INFO("reconfiguration of scheduler not supported\n"); - - return 1; -} - -/** - * acquire_slot_idx() - Acquire a slot index if available. Update slot status to busy - * so it cannot be reacquired. - * - * This function is called from scheduler thread - * - * Return: Command queue slot index, or -1 if none avaiable - */ -static int -acquire_slot_idx(struct drm_xocl_dev *xdev) -{ - unsigned int mask_idx=0, slot_idx=-1; - u32 mask; - SCHED_DEBUG("-> acquire_slot_idx\n"); - for (mask_idx=0; mask_idxexec.num_slot_masks; ++mask_idx) { - mask = xdev->exec.slot_status[mask_idx]; - slot_idx = ffz_or_neg_one(mask); - if (slot_idx==-1 || slot_idx_from_mask_idx(slot_idx,mask_idx)>=xdev->exec.num_slots) - continue; - xdev->exec.slot_status[mask_idx] ^= (1< release_slot_idx slot_status[%d]=0x%x, pos=%d\n" - ,mask_idx,xdev->exec.slot_status[mask_idx],pos); - xdev->exec.slot_status[mask_idx] ^= (1<exec.submitted_cmds[cmd_idx]; - if (sched_error_on(xdev,!xcmd,"no submtted cmd")) - return -1; - return xcmd->cu_idx; -} - -/** - * cu_done() - Check status of CU - * - * @cu_idx: Index of cu to check - * - * This function is called in polling mode only. The cu_idx - * is guaranteed to have been started - * - * Return: %true if cu done, %false otherwise - */ -inline int -cu_done(struct drm_xocl_dev *xdev, unsigned int cu_idx) -{ - u32 cu_addr = cu_idx_to_addr(xdev,cu_idx); - SCHED_DEBUGF("-> cu_done(,%d) checks cu at address 0x%x\n",cu_idx,cu_addr); - /* done is indicated by AP_DONE(2) alone or by AP_DONE(2) | AP_IDLE(4) - * but not by AP_IDLE itself. Since 0x10 | (0x10 | 0x100) = 0x110 - * checking for 0x10 is sufficient. */ - if (ioread32(xdev->user_bar + cu_addr) & 2) { - unsigned int mask_idx = cu_mask_idx(cu_idx); - unsigned int pos = cu_idx_in_mask(cu_idx); - xdev->exec.cu_status[mask_idx] ^= 1<exec.submitted_cmds[cmd_idx]; - u32 opc = 0; - SCHED_DEBUGF("-> cmd_done(,%d)\n",cmd_idx); - - if (sched_error_on(xdev,!xcmd || xcmd->slot_idx!=cmd_idx,"no command or missing slot index")) - return false; - - opc = opcode(xcmd); - if (opc==ERT_START_CU) { - int val = cu_done(xdev,get_cu_idx(xdev,cmd_idx)); - SCHED_DEBUGF("<- cmd_done (cu_done) returns %d\n",val); - return val; - } - if (opc==ERT_CONFIGURE) { - SCHED_DEBUG("<- cmd_done (configure) returns 1\n"); - return true; - } - SCHED_DEBUG("<- cmd_done returns 0\n"); - return false; -} - -/** - * notify_host() - Notify user space that a command is complete. - */ -static void -notify_host(struct xocl_cmd *xcmd) -{ - struct list_head *ptr; - struct drm_xocl_client_ctx *entry; - struct drm_xocl_dev *xdev = xcmd->xdev; - unsigned long flags = 0; - - SCHED_DEBUG("-> notify_host\n"); - - /* now for each client update the trigger counter in the context */ - spin_lock_irqsave(&xdev->exec.ctx_list_lock, flags); - list_for_each(ptr, &xdev->exec.ctx_list) { - entry = list_entry(ptr, struct drm_xocl_client_ctx, link); - atomic_inc(&entry->trigger); - } - spin_unlock_irqrestore(&xdev->exec.ctx_list_lock, flags); - /* wake up all the clients */ - wake_up_interruptible(&xdev->exec.poll_wait_queue); - SCHED_DEBUG("<- notify_host\n"); -} - -/** - * mark_cmd_complete() - Move a command to complete state - * - * Commands are marked complete in two ways - * 1. Through polling of CUs or polling of MB status register - * 2. Through interrupts from MB - * In both cases, the completed commands are residing in the completed_cmds - * list and the number of completed commands is reflected in num_completed. - * - * @xcmd: Command to mark complete - * - * The command is removed from the slot it occupies in the device command - * queue. The slot is released so new commands can be submitted. The host - * is notified that some command has completed. - */ -static void -mark_cmd_complete(struct xocl_cmd *xcmd) -{ - SCHED_DEBUGF("-> mark_cmd_complete(,%d)\n",xcmd->slot_idx); - xcmd->xdev->exec.submitted_cmds[xcmd->slot_idx] = NULL; - set_cmd_state(xcmd,ERT_CMD_STATE_COMPLETED); - if (xcmd->xdev->exec.polling_mode) - atomic_dec(&xcmd->xs->poll); - release_slot_idx(xcmd->xdev,xcmd->slot_idx); - notify_host(xcmd); - SCHED_DEBUGF("<- mark_cmd_complete\n"); -} - -/** - * mark_mask_complete() - Move all commands in mask to complete state - * - * @mask: Bitmask with queried statuses of commands - * @mask_idx: Index of the command mask. Used to offset the actual cmd slot index - */ -static void -mark_mask_complete(struct drm_xocl_dev *xdev, u32 mask, unsigned int mask_idx) -{ - int bit_idx=0,cmd_idx=0; - SCHED_DEBUGF("-> mark_mask_complete(,0x%x,%d)\n",mask,mask_idx); - if (!mask) - return; - for (bit_idx=0, cmd_idx=mask_idx<<5; bit_idx<32; mask>>=1,++bit_idx,++cmd_idx) - if (mask & 0x1) - mark_cmd_complete(xdev->exec.submitted_cmds[cmd_idx]); - SCHED_DEBUG("<- mark_mask_complete\n"); -} - -/** - * queued_to_running() - Move a command from queued to running state if possible - * - * @xcmd: Command to start - * - * Upon success, the command is not necessarily running. In ert mode the - * command will have been submitted to the embedded scheduler, whereas in - * penguin mode the command has been started on a CU. - * - * Return: %true if command was submitted to device, %false otherwise - */ -static int -queued_to_running(struct xocl_cmd *xcmd) -{ - int retval = false; - - SCHED_DEBUG("-> queued_to_running\n"); - - if (opcode(xcmd)==ERT_CONFIGURE) - configure(xcmd); - - if (xcmd->xdev->exec.ops->submit(xcmd)) { - set_cmd_int_state(xcmd,ERT_CMD_STATE_RUNNING); - if (xcmd->xdev->exec.polling_mode) - atomic_inc(&xcmd->xs->poll); - xcmd->xdev->exec.submitted_cmds[xcmd->slot_idx] = xcmd; - retval = true; - } - - SCHED_DEBUGF("<- queued_to_running returns %d\n",retval); - - return retval; -} - -/** - * running_to_complete() - Check status of running commands - * - * @xcmd: Command is in running state - * - * If a command is found to be complete, it marked complete prior to return - * from this function. - */ -static void -running_to_complete(struct xocl_cmd *xcmd) -{ - SCHED_DEBUG("-> running_to_complete\n"); - - xcmd->xdev->exec.ops->query(xcmd); - - SCHED_DEBUG("<- running_to_complete\n"); -} - -/** - * complete_to_free() - Recycle a complete command objects - * - * @xcmd: Command is in complete state - */ -static void -complete_to_free(struct xocl_cmd *xcmd) -{ - SCHED_DEBUG("-> complete_to_free\n"); - - drm_gem_object_unreference_unlocked(&xcmd->bo->base); - recycle_cmd(xcmd); - - SCHED_DEBUG("<- complete_to_free\n"); -} - -/** - * scheduler_queue_cmds() - Queue any pending commands - * - * The scheduler copies pending commands to its internal command queue where - * is is now in queued state. - */ -static void -scheduler_queue_cmds(struct xocl_sched *xs) -{ - struct xocl_cmd *xcmd; - - SCHED_DEBUG("-> scheduler_queue_cmds\n"); - mutex_lock(&pending_cmds_mutex); - while (!list_empty(&pending_cmds)) { - xcmd = list_first_entry(&pending_cmds,struct xocl_cmd,list); - if (xcmd->xs != xs) - continue; - list_del(&xcmd->list); - list_add_tail(&xcmd->list,&xs->command_queue); - set_cmd_int_state(xcmd,ERT_CMD_STATE_QUEUED); - atomic_dec(&num_pending); - } - mutex_unlock(&pending_cmds_mutex); - SCHED_DEBUG("<- scheduler_queue_cmds\n"); -} - -/** - * scheduler_iterator_cmds() - Iterate all commands in scheduler command queue - */ -static void -scheduler_iterate_cmds(struct xocl_sched *xs) -{ - struct xocl_cmd *xcmd; - struct list_head *pos, *next; - - SCHED_DEBUG("-> scheduler_iterate_cmds\n"); - list_for_each_safe(pos, next, &xs->command_queue) { - xcmd = list_entry(pos, struct xocl_cmd, list); - - if (xcmd->state == ERT_CMD_STATE_QUEUED) - queued_to_running(xcmd); - if (xcmd->state == ERT_CMD_STATE_RUNNING) - running_to_complete(xcmd); - if (xcmd->state == ERT_CMD_STATE_COMPLETED) - complete_to_free(xcmd); - - } - SCHED_DEBUG("<- scheduler_iterate_cmds\n"); -} - -/** - * scheduler_wait_condition() - Check status of scheduler wait condition - * - * Scheduler must wait (sleep) if - * 1. there are no pending commands - * 2. no pending interrupt from embedded scheduler - * 3. no pending complete commands in polling mode - * - * Return: 1 if scheduler must wait, 0 othewise - */ -static int -scheduler_wait_condition(struct xocl_sched *xs) -{ - if (kthread_should_stop() || xs->error) { - SCHED_DEBUG("scheduler wakes kthread_should_stop\n"); - return 0; - } - - if (atomic_read(&num_pending)) { - SCHED_DEBUG("scheduler wakes to copy new pending commands\n"); - return 0; - } - - if (atomic_read(&xs->intc)) { - SCHED_DEBUG("scheduler wakes on interrupt\n"); - atomic_set(&xs->intc,0); - return 0; - } - - if (atomic_read(&xs->poll)) { - SCHED_DEBUG("scheduler wakes to poll\n"); - return 0; - } - - SCHED_DEBUG("scheduler waits ...\n"); - return 1; -} - -/** - * scheduler_wait() - check if scheduler should wait - * - * See scheduler_wait_condition(). - */ -static void -scheduler_wait(struct xocl_sched *xs) -{ - wait_event_interruptible(xs->wait_queue,scheduler_wait_condition(xs)==0); -} - -/** - * scheduler_loop() - Run one loop of the scheduler - */ -static void -scheduler_loop(struct xocl_sched *xs) -{ - SCHED_DEBUG("scheduler_loop\n"); - - scheduler_wait(xs); - - if (xs->error) { - DRM_INFO("scheduler encountered unexpected error and exits\n"); - return; - } - - /* queue new pending commands */ - scheduler_queue_cmds(xs); - - /* iterate all commands */ - scheduler_iterate_cmds(xs); -} - -/** - * scheduler() - Command scheduler thread routine - */ -#if defined(__GNUC__) && !defined(SCHED_THREAD_ENABLE) -__attribute__((unused)) -#endif -static int -scheduler(void* data) -{ - struct xocl_sched *xs = (struct xocl_sched *)data; - while (!kthread_should_stop() && !xs->error) - scheduler_loop(xs); - DRM_INFO("%s:%d scheduler thread exits with value %d\n",__FILE__,__LINE__,xs->error); - return xs->error; -} - -/** - * init_scheduler_thread() - Initialize scheduler thread if necessary - * - * Return: 0 on success, -errno otherwise - */ -static int -init_scheduler_thread(void) -{ -#ifdef SCHED_THREAD_ENABLE - SCHED_DEBUGF("init_scheduler_thread use_count=%d\n",global_scheduler0.use_count); - if (global_scheduler0.use_count++) - return 0; - - init_waitqueue_head(&global_scheduler0.wait_queue); - global_scheduler0.error = 0; - - INIT_LIST_HEAD(&global_scheduler0.command_queue); - atomic_set(&global_scheduler0.intc,0); - atomic_set(&global_scheduler0.poll,0); - - global_scheduler0.scheduler_thread = kthread_run(scheduler,(void*)&global_scheduler0,"xocl-scheduler-thread0"); - if (IS_ERR(global_scheduler0.scheduler_thread)) { - int ret = PTR_ERR(global_scheduler0.scheduler_thread); - DRM_ERROR(__func__); - return ret; - } -#endif - return 0; -} - -/** - * fini_scheduler_thread() - Finalize scheduler thread if unused - * - * Return: 0 on success, -errno otherwise - */ -static int -fini_scheduler_thread(void) -{ - int retval = 0; - SCHED_DEBUGF("fini_scheduler_thread use_count=%d\n",global_scheduler0.use_count); - if (--global_scheduler0.use_count) - return 0; - - retval = kthread_stop(global_scheduler0.scheduler_thread); - - /* clear stale command objects if any */ - while (!list_empty(&pending_cmds)) { - struct xocl_cmd *xcmd = list_first_entry(&pending_cmds,struct xocl_cmd,list); - DRM_INFO("deleting stale pending cmd\n"); - list_del(&xcmd->list); - drm_gem_object_unreference_unlocked(&xcmd->bo->base); - } - while (!list_empty(&global_scheduler0.command_queue)) { - struct xocl_cmd *xcmd = list_first_entry(&global_scheduler0.command_queue,struct xocl_cmd,list); - DRM_INFO("deleting stale scheduler cmd\n"); - list_del(&xcmd->list); - drm_gem_object_unreference_unlocked(&xcmd->bo->base); - } - - delete_cmd_list(); - - return retval; -} - - -/** - * mb_query() - Check command status of argument command - * - * @xcmd: Command to check - * - * This function is for ERT mode. In polling mode, check the command status - * register containing the slot assigned to the command. In interrupt mode - * check the interrupting status register. The function checks all commands in - * the same command status register as argument command so more than one - * command may be marked complete by this function. - */ -static void -mb_query(struct xocl_cmd *xcmd) -{ - struct drm_xocl_dev *xdev = xcmd->xdev; - unsigned int cmd_mask_idx = slot_mask_idx(xcmd->slot_idx); - - SCHED_DEBUGF("-> mb_query slot_idx=%d, cmd_mask_idx=%d\n",xcmd->slot_idx,cmd_mask_idx); - - if (xdev->exec.polling_mode - || (cmd_mask_idx==0 && atomic_read(&xdev->exec.sr0)) - || (cmd_mask_idx==1 && atomic_read(&xdev->exec.sr1)) - || (cmd_mask_idx==2 && atomic_read(&xdev->exec.sr2)) - || (cmd_mask_idx==3 && atomic_read(&xdev->exec.sr3))) { - u32 csr_addr = ERT_STATUS_REGISTER_ADDR + (cmd_mask_idx<<2); - u32 mask = ioread32(xcmd->xdev->user_bar + csr_addr); - if (mask) - mark_mask_complete(xcmd->xdev,mask,cmd_mask_idx); - SCHED_DEBUGF("++ mb_query csr_addr=0x%x mask=0x%x\n",csr_addr,mask); - } - - SCHED_DEBUGF("<- mb_query\n"); -} - -/** - * penguin_query() - Check command status of argument command - * - * @xcmd: Command to check - * - * Function is called in penguin mode (no embedded scheduler). - */ -static void -penguin_query(struct xocl_cmd *xcmd) -{ - u32 opc = opcode(xcmd); - - SCHED_DEBUGF("-> penguin_queury() slot_idx=%d\n",xcmd->slot_idx); - - if (opc==ERT_CONFIGURE || (opc==ERT_START_CU && cu_done(xcmd->xdev,get_cu_idx(xcmd->xdev,xcmd->slot_idx)))) - mark_cmd_complete(xcmd); - - SCHED_DEBUG("<- penguin_queury\n"); -} - -/** - * mb_submit() - Submit a command the embedded scheduler command queue - * - * @xcmd: Command to submit - * Return: %true if successfully submitted, %false otherwise - */ -static int -mb_submit(struct xocl_cmd *xcmd) -{ - u32 slot_addr; - - SCHED_DEBUG("-> mb_submit\n"); - - xcmd->slot_idx = acquire_slot_idx(xcmd->xdev); - if (xcmd->slot_idx<0) { - SCHED_DEBUG("<- mb_submit returns 0\n"); - return 0; - } - - slot_addr = ERT_CQ_BASE_ADDR + xcmd->slot_idx*slot_size(xcmd->xdev); - SCHED_DEBUGF("++ mb_submit slot_idx=%d, slot_addr=0x%x\n",xcmd->slot_idx,slot_addr); - - /* write packet minus header */ - memcpy_toio(xcmd->xdev->user_bar + slot_addr + 4,xcmd->packet->data,(packet_size(xcmd)-1)*sizeof(u32)); - - /* write header */ - iowrite32(xcmd->packet->header,xcmd->xdev->user_bar + slot_addr); - - /* trigger interrupt to embedded scheduler if feature is enabled */ - if (xcmd->xdev->exec.cq_interrupt) { - u32 cq_int_addr = ERT_CQ_STATUS_REGISTER_ADDR + (slot_mask_idx(xcmd->slot_idx)<<2); - u32 mask = 1<slot_idx); - SCHED_DEBUGF("++ mb_submit writes slot mask 0x%x to CQ_INT register at addr 0x%x\n", - mask,cq_int_addr); - iowrite32(mask,xcmd->xdev->user_bar + cq_int_addr); - } - - SCHED_DEBUG("<- mb_submit returns 1\n"); - return 1; -} - -/** - * get_free_cu() - get index of first available CU per command cu mask - * - * @xcmd: command containing CUs to check for availability - * - * This function is called kernel software scheduler mode only, in embedded - * scheduler mode, the hardware scheduler handles the commands directly. - * - * Return: Index of free CU, -1 of no CU is available. - */ -static int -get_free_cu(struct xocl_cmd *xcmd) -{ - int mask_idx=0; - SCHED_DEBUG("-> get_free_cu\n"); - for (mask_idx=0; mask_idxxdev->exec.num_cu_masks; ++mask_idx) { - u32 cmd_mask = xcmd->packet->data[mask_idx]; /* skip header */ - u32 busy_mask = xcmd->xdev->exec.cu_status[mask_idx]; - int cu_idx = ffs_or_neg_one((cmd_mask | busy_mask) ^ busy_mask); - if (cu_idx>=0) { - xcmd->xdev->exec.cu_status[mask_idx] ^= 1<xdev->user_bar; - u32 cu_addr = cu_idx_to_addr(xcmd->xdev,cu_idx); - u32 size = regmap_size(xcmd); - struct ert_start_kernel_cmd *ecmd = (struct ert_start_kernel_cmd *)xcmd->packet; - - SCHED_DEBUGF("-> configure_cu cu_idx=%d, cu_addr=0x%x, regmap_size=%d\n" - ,cu_idx,cu_addr,size); - - /* write register map, but skip first word (AP_START) */ - /* can't get memcpy_toio to work */ - /* memcpy_toio(user_bar + cu_addr + 4,ecmd->data + ecmd->extra_cu_masks + 1,(size-1)*4); */ - for (i=1; idata + ecmd->extra_cu_masks + i),user_bar + cu_addr + (i<<2)); - - /* start CU at base + 0x0 */ - iowrite32(0x1,user_bar + cu_addr); - - SCHED_DEBUG("<- configure_cu\n"); -} - -/** - * penguin_submit() - penguin submit of a command - * - * @xcmd: command to submit - * - * Special processing for configure command. Configuration itself is - * done/called by queued_to_running before calling penguin_submit. In penguin - * mode configuration need to ensure that the command is retired properly by - * scheduler, so assign it a slot index and let normal flow continue. - * - * Return: %true on successful submit, %false otherwise - */ -static int -penguin_submit(struct xocl_cmd *xcmd) -{ - SCHED_DEBUG("-> penguin_submit\n"); - - /* configuration was done by submit_cmds, ensure the cmd retired properly */ - if (opcode(xcmd)==ERT_CONFIGURE) { - xcmd->slot_idx = acquire_slot_idx(xcmd->xdev); - SCHED_DEBUG("<- penguin_submit (configure)\n"); - return 1; - } - - if (opcode(xcmd)!=ERT_START_CU) - return 0; - - /* extract cu list */ - xcmd->cu_idx = get_free_cu(xcmd); - if (xcmd->cu_idx<0) - return 0; - - xcmd->slot_idx = acquire_slot_idx(xcmd->xdev); - if (xcmd->slot_idx<0) - return 0; - - /* found free cu, transfer regmap and start it */ - configure_cu(xcmd,xcmd->cu_idx); - - SCHED_DEBUGF("<- penguin_submit cu_idx=%d slot=%d\n",xcmd->cu_idx,xcmd->slot_idx); - - return 1; -} - - -/** - * mb_ops: operations for ERT scheduling - */ -static struct xocl_sched_ops mb_ops = { - .submit = mb_submit, - .query = mb_query, -}; - -/** - * penguin_ops: operations for kernel mode scheduling - */ -static struct xocl_sched_ops penguin_ops = { - .submit = penguin_submit, - .query = penguin_query, -}; - -/** - * xocl_user_event() - Interrupt service routine for MB interrupts - * - * Called by xocl_xdma_user_isr() which is our stub for user ISR registered with libxdma - * Kernel doc says eventfd_signal() does not sleep so it should be okay to call this in ISR - * TODO: Add support for locking so xdev->user_msix_table[irq] is not deleted/changed by - * xocl_user_intr_ioctl() while we are using it. - */ -int -xocl_user_event(int irq, struct drm_xocl_dev *xdev) -{ - SCHED_DEBUGF("xocl_user_event %d\n",irq); - if (irq>=XOCL_CSR_INTR0 && irq<=XOCL_CSR_INTR3 && is_ert(xdev) && !xdev->exec.polling_mode) { - - if (irq==0) - atomic_set(&xdev->exec.sr0,1); - else if (irq==1) - atomic_set(&xdev->exec.sr1,1); - else if (irq==2) - atomic_set(&xdev->exec.sr2,1); - else if (irq==3) - atomic_set(&xdev->exec.sr3,1); - - /* wake up all scheduler ... currently one only */ - atomic_set(&global_scheduler0.intc,1); - wake_up_interruptible(&global_scheduler0.wait_queue); - return 0; - } - if (!xdev->exec.user_msix_table[irq]) - return -EFAULT; - if (eventfd_signal(xdev->exec.user_msix_table[irq], 1) == 1) - return 0; - return -EFAULT; -} - - -/** - * xocl_execbuf_ioctl() - Entry point for exec buffer. - * - * @dev: Device node calling execbuf - * @data: Payload - * @filp: - * - * Function adds exec buffer to the pending list of commands - * - * Return: 0 on success, -errno otherwise - */ -int -xocl_execbuf_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - struct drm_gem_object *obj; - struct drm_xocl_bo *xobj; - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_execbuf *args = data; - int ret = 0; - - SCHED_DEBUG("-> xocl_execbuf_ioctl\n"); - obj = xocl_gem_object_lookup(dev, filp, args->exec_bo_handle); - if (!obj) { - DRM_INFO("Failed to look up GEM BO %d\n", args->exec_bo_handle); - return -ENOENT; - } - - xobj = to_xocl_bo(obj); - if (!xocl_bo_execbuf(xobj)) { - ret = -EINVAL; - goto out; - } - - /* Add the command to pending list */ - if (add_cmd(xdev,xobj)) { - ret = -EINVAL; - goto out; - } - - /* we keep a bo reference which is released later when the bo is retired when task is done */ - SCHED_DEBUG("<- xocl_execbuf_ioctl\n"); - return ret; -out: - drm_gem_object_unreference_unlocked(&xobj->base); - return ret; -} - -/** - * xocl_init_exec() - Initialize the command execution for device - * - * @xdev: Device node to initialize - * - * Return: 0 on success, -errno otherwise - */ -int -xocl_init_exec(struct drm_xocl_dev *xdev) -{ - unsigned int i; - - mutex_init(&xdev->exec.user_msix_table_lock); - spin_lock_init(&xdev->exec.ctx_list_lock); - INIT_LIST_HEAD(&xdev->exec.ctx_list); - init_waitqueue_head(&xdev->exec.poll_wait_queue); - - xdev->exec.scheduler = &global_scheduler0; - - for (i=0; iexec.submitted_cmds[i] = NULL; - - xdev->exec.num_slots = 16; - xdev->exec.num_cus = 0; - xdev->exec.cu_base_addr = 0; - xdev->exec.cu_shift_offset = 0; - xdev->exec.cq_interrupt = 0; - xdev->exec.polling_mode = 1; - - for (i=0; iexec.slot_status[i] = 0; - xdev->exec.num_slot_masks = 1; - - for (i=0; iexec.cu_status[i] = 0; - xdev->exec.num_cu_masks = 0; - - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR0, true); - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR1, true); - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR2, true); - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR3, true); - xdev->exec.ops = &penguin_ops; - - atomic_set(&xdev->exec.sr0,0); - atomic_set(&xdev->exec.sr1,0); - atomic_set(&xdev->exec.sr2,0); - atomic_set(&xdev->exec.sr3,0); - - init_scheduler_thread(); - return 0; -} - -/** - * xocl_fini_exec() - Finalize the command execution for device - * - * @xdev: Device node to finalize - * - * Return: 0 on success, -errno otherwise - */ -int xocl_fini_exec(struct drm_xocl_dev *xdev) -{ - int i; - - fini_scheduler_thread(); - - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR0, false); - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR1, false); - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR2, false); - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR3, false); - for (i=0; i<16; i++) { - xdma_user_interrupt_config(xdev, i, false); - if (xdev->exec.user_msix_table[i]) - eventfd_ctx_put(xdev->exec.user_msix_table[i]); - } - mutex_destroy(&xdev->exec.user_msix_table_lock); - - return 0; -} diff --git a/sdk/linux_kernel_drivers/xocl/xocl_exec.h b/sdk/linux_kernel_drivers/xocl/xocl_exec.h deleted file mode 100644 index e36f3990..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_exec.h +++ /dev/null @@ -1,128 +0,0 @@ -/** - * Copyright (C) 2017-2018 Xilinx, Inc - * - * Authors: - * Sonal Santan - * - * Compute unit execution, interrupt management and client context core data structures. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _XCL_XOCL_EXEC_H_ -#define _XCL_XOCL_EXEC_H_ - -#include -#include -#include -#include - -#define XOCL_CSR_INTR0 0 -#define XOCL_CSR_INTR1 1 -#define XOCL_CSR_INTR2 2 -#define XOCL_CSR_INTR3 3 - -#define XOCL_USER_INTR_START 4 -#define XOCL_USER_INTR_END 16 - -#define XOCL_MAX_SLOTS 128 -#define XOCL_MAX_CUS 128 -#define XOCL_MAX_U32_SLOT_MASKS (((XOCL_MAX_SLOTS-1)>>5) + 1) -#define XOCL_MAX_U32_CU_MASKS (((XOCL_MAX_CUS-1)>>5) + 1) - -struct eventfd_ctx; -struct drm_xocl_dev; - -struct drm_xocl_client_ctx { - struct list_head link; - atomic_t trigger; - /* - * Bitmap to indicate all the user interrupts registered. These are unmanaged - * interrupts directly used by the non-OpenCL application. The corresponding - * eventfd objects are stored in drm_xocl_dev::user_msix_table - */ - unsigned int eventfd_bitmap; - struct mutex lock; -}; - -/** - * struct drm_xocl_exec_core: Core data structure for command execution on a device - * - * @user_msix_table: Eventfd table for user interrupts - * @user_msix_table_lock: Eventfd table lock - * @ctx_list: Context list populated with device context - * @ctx_list_lock: Context list lock - * @poll_wait_queue: Wait queue for device polling - * @scheduler: Command queue scheduler - * @submitted_cmds: Tracking of command submitted for execution on this device - * @num_slots: Number of command queue slots - * @num_cus: Number of CUs in loaded program - * @cu_shift_offset: CU idx to CU address shift value - * @cu_base_addr: Base address of CU address space - * @polling_mode: If set then poll for command completion - * @cq_interrupt: If set then trigger interrupt to MB on new commands - * @configured: Flag to indicate that the core data structure has been initialized - * @slot_status: Bitmap to track status (busy(1)/free(0)) slots in command queue - * @num_slot_masks: Number of slots status masks used (computed from @num_slots) - * @cu_status: Bitmap to track status (busy(1)/free(0)) of CUs. Unused in ERT mode. - * @num_cu_masks: Number of CU masks used (computed from @num_cus) - * @sr0: If set, then status register [0..31] is pending with completed commands (ERT only). - * @sr1: If set, then status register [32..63] is pending with completed commands (ERT only). - * @sr2: If set, then status register [64..95] is pending with completed commands (ERT only). - * @sr3: If set, then status register [96..127] is pending with completed commands (ERT only). - * @ops: Scheduler operations vtable - */ -struct drm_xocl_exec_core { - struct eventfd_ctx *user_msix_table[16]; - struct mutex user_msix_table_lock; - - struct list_head ctx_list; - spinlock_t ctx_list_lock; - wait_queue_head_t poll_wait_queue; - - struct xocl_sched *scheduler; - - struct xocl_cmd *submitted_cmds[XOCL_MAX_SLOTS]; - - unsigned int num_slots; - unsigned int num_cus; - unsigned int cu_shift_offset; - u32 cu_base_addr; - unsigned int polling_mode; - unsigned int cq_interrupt; - unsigned int configured; - - /* Bitmap tracks busy(1)/free(0) slots in cmd_slots*/ - u32 slot_status[XOCL_MAX_U32_SLOT_MASKS]; - unsigned int num_slot_masks; /* ((num_slots-1)>>5)+1 */ - - u32 cu_status[XOCL_MAX_U32_CU_MASKS]; - unsigned int num_cu_masks; /* ((num_cus-1)>>5+1 */ - - /* Status register pending complete. Written by ISR, cleared by scheduler */ - atomic_t sr0; - atomic_t sr1; - atomic_t sr2; - atomic_t sr3; - - /* Operations for dynamic indirection dependt on MB or kernel scheduler */ - struct xocl_sched_ops* ops; -}; - -int xocl_init_exec(struct drm_xocl_dev *xdev); -int xocl_fini_exec(struct drm_xocl_dev *xdev); - -int xocl_init_test_thread(struct drm_xocl_dev *xdev); -int xocl_fini_test_thread(struct drm_xocl_dev *xdev); - -void xocl_track_ctx(struct drm_xocl_dev *xdev, struct drm_xocl_client_ctx *fpriv); -void xocl_untrack_ctx(struct drm_xocl_dev *xdev, struct drm_xocl_client_ctx *fpriv); - -#endif diff --git a/sdk/linux_kernel_drivers/xocl/xocl_ioctl.c b/sdk/linux_kernel_drivers/xocl/xocl_ioctl.c deleted file mode 100644 index 0de72558..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_ioctl.c +++ /dev/null @@ -1,433 +0,0 @@ -/* - * Copyright (C) 2016-2018 Xilinx, Inc - * - * Authors: - * Sonal Santan - * - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#include -#include -#include -#if LINUX_VERSION_CODE <= KERNEL_VERSION(3,0,0) -#include -#endif -#include -#include -#include "xocl_drv.h" -#include "xocl_ioctl.h" -#include "xocl_xdma.h" - -static const struct axlf_section_header* get_axlf_section(const struct axlf* top, enum axlf_section_kind kind) -{ - int i = 0; - printk(KERN_INFO "Trying to find section header for axlf section %d", kind); - for(i = 0; i < top->m_header.m_numSections; i++) - { - printk(KERN_INFO "Section is %d",top->m_sections[i].m_sectionKind); - if(top->m_sections[i].m_sectionKind == kind) { - printk(KERN_INFO "Found section header for axlf"); - return &top->m_sections[i]; - } - } - printk(KERN_INFO "Did NOT find section header for axlf section %d", kind); - return NULL; -} - - -static long xclbin_precheck_cleanup(struct drm_device *dev, int preserve_mem) -{ - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_mem_topology *topology = &xdev->topology; - long err = 0; - short ddr = 0; - unsigned i = 0; - printk(KERN_INFO "%s XOCL: Existing bank count = %d\n", __FUNCTION__, topology->bank_count); - ddr = 0; - if( !preserve_mem ) { // Data Retention - for (i= 0; i < topology->bank_count; i++) { - if (topology->m_data[i].m_used) { - ddr++; - if (xdev->mm_usage_stat[ddr -1].bo_count !=0 ) { - err = -EBUSY; - printk(KERN_INFO "%s The ddr %d has pre-existing buffer allocations, please exit and re-run.\n", __FUNCTION__, ddr -1); - return err; - } - } - } - - printk(KERN_INFO "XOCL: Marker 2.1\n"); - //Cleanup the topology struct from the previous xclbin - ddr = xocl_ddr_channel_count(dev); - printk( KERN_INFO "%s XOCL: xocl_ddr_channel_count(dev): %d\n", __FUNCTION__, ddr ); - for (i = 0; i < ddr; i++) { - if(topology->m_data[i].m_used) { - printk(KERN_INFO "Taking down DDR : %d", i); - drm_mm_takedown(&xdev->mm[i]); - } - } - - vfree(topology->m_data); - vfree(topology->topology); - memset(topology, 0, sizeof(struct drm_xocl_mem_topology)); - } - - vfree(xdev->connectivity.connections); - memset(&xdev->connectivity, 0, sizeof(xdev->connectivity)); - vfree(xdev->layout.layout); - memset(&xdev->layout, 0, sizeof(xdev->layout)); - vfree(xdev->debug_layout.layout); - memset(&xdev->debug_layout, 0, sizeof(xdev->debug_layout)); - - return err; -} - - -int xocl_read_axlf_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - struct drm_xocl_axlf *axlf_obj_ptr = data; - struct drm_xocl_dev *xdev = dev->dev_private; - long err = 0; - unsigned i = 0; - uint64_t copy_buffer_size = 0; - struct axlf* copy_buffer = 0; - const struct axlf_section_header *memHeader = 0; - char __user *buffer =0; - int32_t bank_count = 0; - short ddr = 0; - struct axlf bin_obj; - int preserve_mem; - struct drm_xocl_mem_topology *topology; - struct drm_xocl_mem_topology new_topology; - new_topology.topology = NULL; - new_topology.m_data = NULL; - - printk(KERN_INFO "%s %s READ_AXLF IOCTL \n", DRV_NAME, __FUNCTION__); - - if(!xdev->unified) { - printk(KERN_INFO "XOCL: not unified dsa"); - return err; - } - - printk(KERN_INFO "XOCL: Marker 0 %p\n", data); - if (copy_from_user((void *)&bin_obj, (void*)axlf_obj_ptr->xclbin, sizeof(struct axlf))) - return -EFAULT; - if (memcmp(bin_obj.m_magic, "xclbin2", 8)) - return -EINVAL; - //Ignore timestamp matching for AWS platform - if(bin_obj.m_header.m_featureRomTimeStamp != xdev->header.TimeSinceEpoch && strstr(xdev->header.VBNVName, "xilinx_aw") == NULL) { - printk(KERN_ERR "TimeStamp of ROM did not match Xclbin\n"); - return -EINVAL; - } - - printk(KERN_INFO "XOCL: VBNV and TimeStamps matched\n"); - - if(bin_obj.m_uniqueId == xdev->unique_id_last_bitstream) { - printk(KERN_INFO "Skipping repopulating topology, connectivity,ip_layout data\n"); - return err; - } - - //Copy from user space and proceed. - copy_buffer_size = (bin_obj.m_header.m_numSections)*sizeof(struct axlf_section_header) + sizeof(struct axlf); - copy_buffer = (struct axlf*)vmalloc(copy_buffer_size); - if(!copy_buffer) { - printk(KERN_ERR "Unable to create copy_buffer"); - return -EFAULT; - } - printk(KERN_INFO "XOCL: Marker 1\n"); - - if (copy_from_user((void *)copy_buffer, (void *)axlf_obj_ptr->xclbin, copy_buffer_size)) { - err = -EFAULT; - goto done; - } - - buffer = (char __user *)axlf_obj_ptr->xclbin; - err = !access_ok(VERIFY_READ, buffer, bin_obj.m_header.m_length); - if (err) { - err = -EFAULT; - goto done; - } - - //--- - printk(KERN_INFO "Finding MEM_TOPOLOGY section\n"); - memHeader = get_axlf_section(copy_buffer, MEM_TOPOLOGY); - if (memHeader == 0) { - printk(KERN_INFO "Did not find MEM_TOPOLOGY section.\n"); - err = -EINVAL; - goto done; - } - printk(KERN_INFO "XOCL: Marker 2\n"); - - printk(KERN_INFO "%s XOCL: MEM_TOPOLOGY offset = %llx, size = %llx\n", __FUNCTION__, memHeader->m_sectionOffset , memHeader->m_sectionSize); - - if((memHeader->m_sectionOffset + memHeader->m_sectionSize) > bin_obj.m_header.m_length) { - err = -EINVAL; - goto done; - } - - buffer = (char __user *)axlf_obj_ptr->xclbin; - buffer += memHeader->m_sectionOffset; - - new_topology.topology = vmalloc(memHeader->m_sectionSize); - err = copy_from_user(new_topology.topology, buffer, memHeader->m_sectionSize); - if (err) - goto done; - - get_user(bank_count, buffer); - new_topology.size = memHeader->m_sectionSize; - new_topology.bank_count = bank_count; - new_topology.m_data_length = bank_count*sizeof(struct mem_data); - buffer += offsetof(struct mem_topology, m_mem_data); - new_topology.m_data = vmalloc(new_topology.m_data_length); - err = copy_from_user(new_topology.m_data, buffer, bank_count*sizeof(struct mem_data)); - if (err ) - goto done; - - //check for null pointer, then do mem compare - preserve_mem = 0; - if( xdev->topology.topology != NULL ) { - // m_data can be of different length but we would not compare them if topology match fails - if( !memcmp(new_topology.topology, xdev->topology.topology, memHeader->m_sectionSize) && - !memcmp(new_topology.m_data, xdev->topology.m_data, new_topology.bank_count*sizeof(struct mem_data) ) ) { - printk( KERN_INFO "XOCL: MEM_TOPOLOGY match, preserve mem_topology.\n" ); - preserve_mem = 1; - } else { - printk( KERN_INFO "XOCL: MEM_TOPOLOGY mismatch, do not preserve mem_topology.\n" ); - } - } - - //Switching the xclbin, make sure none of the buffers are used. - err = xclbin_precheck_cleanup(dev, preserve_mem); - if(err) - goto done; - - if( !preserve_mem ) { // Data Retention - xdev->topology.topology = new_topology.topology; - xdev->topology.size = new_topology.size; - xdev->topology.bank_count = new_topology.bank_count; - xdev->topology.m_data_length = new_topology.m_data_length; - xdev->topology.m_data = new_topology.m_data; - new_topology.topology = NULL; - new_topology.m_data = NULL; - } - - //---- - printk(KERN_INFO "Finding IP_LAYOUT section\n"); - memHeader = get_axlf_section(copy_buffer, IP_LAYOUT); - if (memHeader == 0) { - printk(KERN_INFO "Did not find IP_LAYOUT section.\n"); - } else { - printk(KERN_INFO "%s XOCL: IP_LAYOUT offset = %llx, size = %llx, xclbin length = %llx\n", __FUNCTION__, memHeader->m_sectionOffset , memHeader->m_sectionSize, bin_obj.m_header.m_length); - - if((memHeader->m_sectionOffset + memHeader->m_sectionSize) > bin_obj.m_header.m_length) { - printk(KERN_INFO "%s XOCL: IP_LAYOUT section extends beyond xclbin boundary %llx\n", __FUNCTION__, bin_obj.m_header.m_length); - err = -EINVAL; - goto done; - } - printk(KERN_INFO "XOCL: Marker 3.1\n"); - buffer += memHeader->m_sectionOffset; - xdev->layout.layout = vmalloc(memHeader->m_sectionSize); - err = copy_from_user(xdev->layout.layout, buffer, memHeader->m_sectionSize); - printk(KERN_INFO "XOCL: Marker 3.2\n"); - if (err) - goto done; - xdev->layout.size = memHeader->m_sectionSize; - printk(KERN_INFO "XOCL: Marker 3.3\n"); - } - - //---- - printk(KERN_INFO "Finding DEBUG_IP_LAYOUT section\n"); - memHeader = get_axlf_section(copy_buffer, DEBUG_IP_LAYOUT); - if (memHeader == 0) { - printk(KERN_INFO "Did not find DEBUG_IP_LAYOUT section.\n"); - } else { - printk(KERN_INFO "%s XOCL: DEBUG_IP_LAYOUT offset = %llx, size = %llx, xclbin length = %llx\n", __FUNCTION__, memHeader->m_sectionOffset , memHeader->m_sectionSize, bin_obj.m_header.m_length); - - if((memHeader->m_sectionOffset + memHeader->m_sectionSize) > bin_obj.m_header.m_length) { - printk(KERN_INFO "%s XOCL: DEBUG_IP_LAYOUT section extends beyond xclbin boundary %llx\n", __FUNCTION__, bin_obj.m_header.m_length); - err = -EINVAL; - goto done; - } - printk(KERN_INFO "XOCL: Marker 4.1\n"); - buffer = (char __user *)axlf_obj_ptr->xclbin; - buffer += memHeader->m_sectionOffset; - xdev->debug_layout.layout = vmalloc(memHeader->m_sectionSize); - err = copy_from_user(xdev->debug_layout.layout, buffer, memHeader->m_sectionSize); - printk(KERN_INFO "XOCL: Marker 4.2\n"); - if (err) - goto done; - xdev->debug_layout.size = memHeader->m_sectionSize; - printk(KERN_INFO "XOCL: Marker 4.3\n"); - } - - //--- - printk(KERN_INFO "Finding CONNECTIVITY section\n"); - memHeader = get_axlf_section(copy_buffer, CONNECTIVITY); - if (memHeader == 0) { - printk(KERN_INFO "Did not find CONNECTIVITY section.\n"); - } else { - printk(KERN_INFO "%s XOCL: CONNECTIVITY offset = %llx, size = %llx\n", __FUNCTION__, memHeader->m_sectionOffset , memHeader->m_sectionSize); - if((memHeader->m_sectionOffset + memHeader->m_sectionSize) > bin_obj.m_header.m_length) { - err = -EINVAL; - goto done; - } - buffer = (char __user *)axlf_obj_ptr->xclbin; - buffer += memHeader->m_sectionOffset; - xdev->connectivity.connections = vmalloc(memHeader->m_sectionSize); - err = copy_from_user(xdev->connectivity.connections, buffer, memHeader->m_sectionSize); - if (err) - goto done; - xdev->connectivity.size = memHeader->m_sectionSize; - } - - printk(KERN_INFO "XOCL: Marker 5\n"); - - topology = &xdev->topology; - - printk(KERN_INFO "XOCL: Topology Bank count = %d, data_length = %d\n", topology->bank_count, xdev->topology.m_data_length); - - if (!preserve_mem) { // Data Retention - xdev->mm = devm_kzalloc(dev->dev, sizeof(struct drm_mm) * topology->bank_count, GFP_KERNEL); - xdev->mm_usage_stat = devm_kzalloc(dev->dev, sizeof(struct drm_xocl_mm_stat) * topology->bank_count, GFP_KERNEL); - if (!xdev->mm || !xdev->mm_usage_stat) { - err = -ENOMEM; - goto done; - } - } - - //Check if sizes are same across banks. - ddr = 0; - for (i=0; i < topology->bank_count; i++) - { - printk(KERN_INFO "XOCL, DDR Info Index: %d Type:%d Used:%d Size:%llx Base_addr:%llx\n", i, - topology->m_data[i].m_type, topology->m_data[i].m_used, topology->m_data[i].m_size, - topology->m_data[i].m_base_address); - if (topology->m_data[i].m_used) - { - ddr++; - if ((topology->bank_size !=0) && (topology->bank_size != topology->m_data[i].m_size)) { - //we support only same sized banks for initial testing, so return error. - printk(KERN_INFO "%s err: %ld\n", __FUNCTION__, err); - err = -EFAULT; - vfree(xdev->topology.m_data); - memset(&xdev->topology, 0, sizeof(xdev->topology)); - goto done; - } - topology->bank_size = topology->m_data[i].m_size; - } - } - - //xdev->topology.used_bank_count = ddr; - printk(KERN_INFO "XOCL: Unified flow, used bank count :%d bank size(KB):%llx\n", ddr, xdev->topology.bank_size); - - if (!preserve_mem) { // Data Retention - //initialize the used banks and their sizes. Currently only fixed sizes are supported. - for (i=0; i < topology->bank_count; i++) - { - if (topology->m_data[i].m_used) { - printk(KERN_INFO "%s Allocating DDR:%d with base_addr:%llx, size %llx \n", __FUNCTION__, i, - topology->m_data[i].m_base_address, topology->m_data[i].m_size*1024); - drm_mm_init(&xdev->mm[i], topology->m_data[i].m_base_address, topology->m_data[i].m_size*1024); - printk(KERN_INFO "drm_mm_init called \n"); - } - } - } - - //Populate with "this" bitstream, so avoid redownload the next time - xdev->unique_id_last_bitstream = bin_obj.m_uniqueId; - -done: - printk(KERN_INFO "%s err: %ld\n", __FUNCTION__, err); - vfree(copy_buffer); - if (new_topology.topology != NULL) - vfree(new_topology.topology); - if (new_topology.m_data != NULL) - vfree(new_topology.m_data); - return err; - -} - -int xocl_ctx_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - unsigned long flags; - int ret = 0; - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_ctx *args = data; - - if (args->op == XOCL_CTX_OP_FREE_CTX) { - DRM_INFO("Releasing context for pid %d\n", pid_nr(task_tgid(current))); - spin_lock_irqsave(&xdev->exec.ctx_list_lock, flags); - spin_unlock_irqrestore(&xdev->exec.ctx_list_lock, flags); - return 0; - } - - if (args->op != XOCL_CTX_OP_ALLOC_CTX) - return -EINVAL; - - DRM_INFO("Creating context for pid %d\n", pid_nr(task_tgid(current))); - - spin_lock_irqsave(&xdev->exec.ctx_list_lock, flags); - - spin_unlock_irqrestore(&xdev->exec.ctx_list_lock, flags); - return ret; -} - - -int xocl_debug_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - int ret = -EINVAL; - //struct drm_xocl_debug *args = data; - return ret; -} - - - -int xocl_user_intr_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) - -{ - struct eventfd_ctx *trigger; - int ret = 0; - struct drm_xocl_user_intr *args = data; - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_client_ctx *fpriv = filp->driver_priv; - - if ((args->msix >= XOCL_USER_INTR_END) || (args->msix < XOCL_USER_INTR_START)) - return -EINVAL; - mutex_lock(&xdev->exec.user_msix_table_lock); - if (xdev->exec.user_msix_table[args->msix]) { - ret = -EPERM; - goto out; - } - - if (args->fd < 0) - goto out; - trigger = eventfd_ctx_fdget(args->fd); - if (IS_ERR(trigger)) { - ret = PTR_ERR(trigger); - goto out; - } - xdev->exec.user_msix_table[args->msix] = trigger; - xdma_user_interrupt_config(xdev, args->msix, true); - fpriv->eventfd_bitmap |= (1 << args->msix); -out: - mutex_unlock(&xdev->exec.user_msix_table_lock); - return ret; -} diff --git a/sdk/linux_kernel_drivers/xocl/xocl_ioctl.h b/sdk/linux_kernel_drivers/xocl/xocl_ioctl.h deleted file mode 100644 index 49ffa85c..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_ioctl.h +++ /dev/null @@ -1,375 +0,0 @@ -/** - * Copyright (C) 2015-2018 Xilinx, Inc - * - * This file is dual licensed. It may be redistributed and/or modified - * under the terms of the Apache 2.0 License OR version 2 of the GNU - * General Public License. - * - * Apache License Verbiage - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * GPL license Verbiage: - * - * This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - * You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * Copyright (C) 2017 Xilinx, Inc. All rights reserved. - * - * Authors: - * Sonal Santan - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _XCL_XOCL_IOCTL_H_ -#define _XCL_XOCL_IOCTL_H_ - -#if defined(__KERNEL__) -#include -#elif defined(__cplusplus) -#include -#include -#else -#include -#include -#endif - -enum { - /* GEM core ioctls */ - /* Buffer creation */ - DRM_XOCL_CREATE_BO = 0, - /* Buffer creation from user provided pointer */ - DRM_XOCL_USERPTR_BO, - /* Map buffer into application user space (no DMA is performed) */ - DRM_XOCL_MAP_BO, - /* Sync buffer (like fsync) in the desired direction by using DMA */ - DRM_XOCL_SYNC_BO, - /* Get information about the buffer such as physical address in the device, etc */ - DRM_XOCL_INFO_BO, - /* Update host cached copy of buffer wih user's data */ - DRM_XOCL_PWRITE_BO, - /* Update user's data with host cached copy of buffer */ - DRM_XOCL_PREAD_BO, - /* Other ioctls */ - DRM_XOCL_OCL_RESET, - /* Currently unused */ - DRM_XOCL_CTX, - /* Get information from device */ - DRM_XOCL_INFO, - /* Unmanaged DMA from/to device */ - DRM_XOCL_PREAD_UNMGD, - DRM_XOCL_PWRITE_UNMGD, - /* Various usage metrics */ - DRM_XOCL_USAGE_STAT, - /* Hardware debug command */ - DRM_XOCL_DEBUG, - /* Command to run on one or more CUs */ - DRM_XOCL_EXECBUF, - /* Register eventfd for user interrupts */ - DRM_XOCL_USER_INTR, - /* Read xclbin/axlf */ - DRM_XOCL_READ_AXLF, - DRM_XOCL_NUM_IOCTLS -}; - -enum drm_xocl_sync_bo_dir { - DRM_XOCL_SYNC_BO_TO_DEVICE = 0, - DRM_XOCL_SYNC_BO_FROM_DEVICE -}; - -/* - * Higher 4 bits are for DDR, one for each DDR - * LSB bit for execbuf - */ -#define DRM_XOCL_BO_BANK0 (0x1) -#define DRM_XOCL_BO_BANK1 (0x1 << 1) -#define DRM_XOCL_BO_BANK2 (0x1 << 2) -#define DRM_XOCL_BO_BANK3 (0x1 << 3) -#define DRM_XOCL_BO_CMA (0x1 << 30) -#define DRM_XOCL_BO_EXECBUF (0x1 << 31) - -struct drm_xocl_create_bo { - uint64_t size; - uint32_t handle; - uint32_t flags; -}; - -struct drm_xocl_userptr_bo { - uint64_t addr; - uint64_t size; - uint32_t handle; - uint32_t flags; -}; - -struct drm_xocl_map_bo { - uint32_t handle; - uint32_t pad; - uint64_t offset; -}; - -/** - * struct drm_xocl_sync_bo - used for SYNQ_BO IOCTL - * @handle: GEM object handle - * @flags: Unused - * @size: Number of bytes to migrate - * @offset: Offset into the object to write to - * @dir: DRM_XOCL_SYNC_DIR_XXX - */ -struct drm_xocl_sync_bo { - uint32_t handle; - uint32_t flags; - uint64_t size; - uint64_t offset; - enum drm_xocl_sync_bo_dir dir; -}; - -/** - * struct drm_xocl_info_bo - used for INFO_BO IOCTL - * @handle: GEM object handle - * @size: Size of buffer object in bytes - * @paddr: physical address (out) - */ -struct drm_xocl_info_bo { - uint32_t handle; - uint32_t flags; - uint64_t size; - uint64_t paddr; -}; - -struct drm_xocl_axlf { - struct axlf *xclbin; -}; - -/** - * struct drm_xocl_pwrite_bo - used for PWRITE_BO IOCTL - * @handle: GEM object handle - * @pad: Padding - * @offset: Offset into the buffer object to write to - * @size: Length of data to write - * @data_ptr: Pointer to read the data from - */ -struct drm_xocl_pwrite_bo { - uint32_t handle; - uint32_t pad; - uint64_t offset; - uint64_t size; - uint64_t data_ptr; -}; - -/** - * struct drm_xocl_pread_bo - used for PREAD_BO IOCTL - * @handle: GEM object handle - * @pad: Padding - * @offset: Offset into the buffer object to read from - * @size: Length of data to read - * @data_ptr: Pointer to write the data into - */ -struct drm_xocl_pread_bo { - uint32_t handle; - uint32_t pad; - uint64_t offset; - uint64_t size; - uint64_t data_ptr; -}; - -enum drm_xocl_ctx_code { - XOCL_CTX_OP_ALLOC_CTX = 0, - XOCL_CTX_OP_FREE_CTX -}; - -struct drm_xocl_ctx { - enum drm_xocl_ctx_code op; - char uuid[16]; - uint32_t cu_bitmap; - uint32_t flags; -}; - -struct drm_xocl_info { - unsigned short vendor; - unsigned short device; - unsigned short subsystem_vendor; - unsigned short subsystem_device; - unsigned int dma_engine_version; - unsigned int driver_version; - unsigned int pci_slot; - char reserved[64]; -}; - - -/** - * struct drm_xocl_pwrite_unmgd (used with PWRITE_UNMGD IOCTL) - * @address_space: Address space in the DSA; currently only 0 is suported - * @pad: Padding - * @offset: Physical address in the specified address space - * @size: Length of data to write - * @data_ptr: Pointer to read the data from - */ -struct drm_xocl_pwrite_unmgd { - uint32_t address_space; - uint32_t pad; - uint64_t paddr; - uint64_t size; - uint64_t data_ptr; -}; - -/** - * struct drm_xocl_pread_unmgd (used for PREAD_UNMGD IOCTL) - * @address_space: Address space in the DSA; currently only 0 is valid - * @pad: Padding - * @offset: Physical address in the specified address space - * @size: Length of data to write - * @data_ptr: Pointer to write the data to - */ -struct drm_xocl_pread_unmgd { - uint32_t address_space; - uint32_t pad; - uint64_t paddr; - uint64_t size; - uint64_t data_ptr; -}; - - -struct drm_xocl_mm_stat { - size_t memory_usage; - unsigned int bo_count; -}; - -/** - * struct drm_xocl_stats (used for STATS IOCTL) - * @address_space: Address space in the DSA; currently only 0 is valid - * @pad: Padding - * @offset: Physical address in the specified address space - * @size: Length of data to write - * @data_ptr: Pointer to write the data to - */ -struct drm_xocl_usage_stat { - unsigned dma_channel_count; - unsigned mm_channel_count; - uint64_t h2c[8]; - uint64_t c2h[8]; - struct drm_xocl_mm_stat mm[8]; -}; - -enum drm_xocl_debug_code { - DRM_XOCL_DEBUG_ACQUIRE_CU = 0, - DRM_XOCL_DEBUG_RELEASE_CU, - DRM_XOCL_DEBUG_NIFD_RD, - DRM_XOCL_DEBUG_NIFD_WR, -}; - -struct drm_xocl_debug { - uint32_t ctx_id; - enum drm_xocl_debug_code code; - unsigned int code_size; - uint64_t code_ptr; -}; - -/** - * Opcodes for the embedded scheduler provided by the client to the driver - */ -enum drm_xocl_execbuf_code { - DRM_XOCL_EXECBUF_RUN_KERNEL = 0, - DRM_XOCL_EXECBUF_RUN_KERNEL_XYZ, - DRM_XOCL_EXECBUF_PING, - DRM_XOCL_EXECBUF_DEBUG, -}; - -/** - * State of exec request managed by the kernel driver - */ -enum drm_xocl_execbuf_state { - DRM_XOCL_EXECBUF_STATE_COMPLETE = 0, - DRM_XOCL_EXECBUF_STATE_RUNNING, - DRM_XOCL_EXECBUF_STATE_SUBMITTED, - DRM_XOCL_EXECBUF_STATE_QUEUED, - DRM_XOCL_EXECBUF_STATE_ERROR, - DRM_XOCL_EXECBUF_STATE_ABORT, -}; - -/** - * Layout of BO of EXECBUF kind - */ -struct drm_xocl_execbuf_bo { - enum drm_xocl_execbuf_state state; - enum drm_xocl_execbuf_code code; - uint64_t cu_bitmap; - uint64_t token; - char buf[3584]; // inline regmap layout -}; - -struct drm_xocl_execbuf { - uint32_t ctx_id; - uint32_t exec_bo_handle; -}; - -/** - * struct drm_xocl_user_intr (used for XOCL_USER_INTR IOCTL) - * @ctx_id: Context created before with CTX ioctl - * @fd: File descriptor created with eventfd system call - * @msix: User interrupt number (0 to 15) - */ -struct drm_xocl_user_intr { - uint32_t ctx_id; - int fd; - int msix; -}; - - -#define DRM_IOCTL_XOCL_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_CREATE_BO, struct drm_xocl_create_bo) -#define DRM_IOCTL_XOCL_USERPTR_BO DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_USERPTR_BO, struct drm_xocl_userptr_bo) -#define DRM_IOCTL_XOCL_MAP_BO DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_MAP_BO, struct drm_xocl_map_bo) -#define DRM_IOCTL_XOCL_SYNC_BO DRM_IOW (DRM_COMMAND_BASE + \ - DRM_XOCL_SYNC_BO, struct drm_xocl_sync_bo) -#define DRM_IOCTL_XOCL_INFO_BO DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_INFO_BO, struct drm_xocl_info_bo) -#define DRM_IOCTL_XOCL_PWRITE_BO DRM_IOW (DRM_COMMAND_BASE + \ - DRM_XOCL_PWRITE_BO, struct drm_xocl_pwrite_bo) -#define DRM_IOCTL_XOCL_PREAD_BO DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_PREAD_BO, struct drm_xocl_pread_bo) -#define DRM_IOCTL_XOCL_CTX DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_CTX, struct drm_xocl_ctx) -#define DRM_IOCTL_XOCL_INFO DRM_IOR(DRM_COMMAND_BASE + \ - DRM_XOCL_INFO, struct drm_xocl_info) -#define DRM_IOCTL_XOCL_READ_AXLF DRM_IOW(DRM_COMMAND_BASE + \ - DRM_XOCL_READ_AXLF, struct drm_xocl_axlf) -#define DRM_IOCTL_XOCL_PWRITE_UNMGD DRM_IOW (DRM_COMMAND_BASE + \ - DRM_XOCL_PWRITE_UNMGD, struct drm_xocl_pwrite_unmgd) -#define DRM_IOCTL_XOCL_PREAD_UNMGD DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_PREAD_UNMGD, struct drm_xocl_pread_unmgd) -#define DRM_IOCTL_XOCL_USAGE_STAT DRM_IOR(DRM_COMMAND_BASE + \ - DRM_XOCL_USAGE_STAT, struct drm_xocl_usage_stat) -#define DRM_IOCTL_XOCL_DEBUG DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_DEBUG, struct drm_xocl_debug) -#define DRM_IOCTL_XOCL_EXECBUF DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_EXECBUF, struct drm_xocl_execbuf) -#define DRM_IOCTL_XOCL_USER_INTR DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_USER_INTR, struct drm_xocl_user_intr) - -#endif diff --git a/sdk/linux_kernel_drivers/xocl/xocl_sysfs.c b/sdk/linux_kernel_drivers/xocl/xocl_sysfs.c deleted file mode 100644 index dab368be..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_sysfs.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (C) 2016-2018 Xilinx, Inc - * - * Authors: - * Umang Parekh - * - * sysfs for the device attributes. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include "xocl_drv.h" - -//-xclbinid-- -static ssize_t xclbinid_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct drm_xocl_dev *xdev = ddev->dev_private; - return sprintf(buf, "%llx\n", xdev->unique_id_last_bitstream); -} - -static DEVICE_ATTR_RO(xclbinid); - -//-Base address-- -static ssize_t dr_base_addr_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct drm_xocl_dev *xdev = ddev->dev_private; - - //TODO: Fix: DRBaseAddress no longer required in feature rom - if(xdev->header.MajorVersion >= 10) - return sprintf(buf, "%llu\n", xdev->header.DRBaseAddress); - else - return sprintf(buf, "%u\n", 0); -} - -static DEVICE_ATTR_RO(dr_base_addr); - - -//-Mem_topology-- -static ssize_t mem_topology_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - printk(KERN_INFO "%s %s In mem_topology_show function \n", DRV_NAME, __FUNCTION__); - struct drm_device *ddev = dev_get_drvdata(dev); - struct drm_xocl_dev *xdev = ddev->dev_private; - memcpy(buf, xdev->topology.topology, xdev->topology.size); - printk(KERN_INFO "%s %s Mem-copied %llx bytes \n", DRV_NAME, __FUNCTION__, xdev->topology.size); - return xdev->topology.size; -} - -static DEVICE_ATTR_RO(mem_topology); - -//-Connectivity-- -static ssize_t connectivity_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - printk(KERN_INFO "%s %s In connectivity_show function \n", DRV_NAME, __FUNCTION__); - struct drm_device *ddev = dev_get_drvdata(dev); - struct drm_xocl_dev *xdev = ddev->dev_private; - memcpy(buf, xdev->connectivity.connections, xdev->connectivity.size); - return xdev->connectivity.size; -} - -static DEVICE_ATTR_RO(connectivity); - -//-IP_layout-- -static ssize_t ip_layout_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - printk(KERN_INFO "%s %s In ip_layout_show function \n", DRV_NAME, __FUNCTION__); - struct drm_device *ddev = dev_get_drvdata(dev); - struct drm_xocl_dev *xdev = ddev->dev_private; - memcpy(buf, xdev->layout.layout, xdev->layout.size); - return xdev->layout.size; -} - -static DEVICE_ATTR_RO(ip_layout); - -//- Debug IP_layout-- -static ssize_t debug_ip_layout_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - printk(KERN_INFO "%s %s In debug_ip_layout_show function \n", DRV_NAME, __FUNCTION__); - struct drm_device *ddev = dev_get_drvdata(dev); - struct drm_xocl_dev *xdev = ddev->dev_private; - memcpy(buf, xdev->debug_layout.layout, xdev->debug_layout.size); - printk(KERN_INFO "%s %s Mem-copied %llx bytes \n", DRV_NAME, __FUNCTION__, xdev->debug_layout.size); - return xdev->debug_layout.size; -} - -static DEVICE_ATTR_RO(debug_ip_layout); - - -//--- -int xocl_init_sysfs(struct device *dev) -{ - int result = device_create_file(dev, &dev_attr_xclbinid); - if(result) - return result; - result = device_create_file(dev, &dev_attr_dr_base_addr); - if(result) - return result; - result = device_create_file(dev, &dev_attr_connectivity); - if(result) - return result; - result = device_create_file(dev, &dev_attr_ip_layout); - if(result) - return result; - result = device_create_file(dev, &dev_attr_debug_ip_layout); - if(result) - return result; - result = device_create_file(dev, &dev_attr_mem_topology); - return result; -} - -void xocl_fini_sysfs(struct device *dev) -{ - printk(KERN_INFO "%s %s Cleaning up sys files \n", DRV_NAME, __FUNCTION__); - device_remove_file(dev, &dev_attr_xclbinid); - device_remove_file(dev, &dev_attr_dr_base_addr); - device_remove_file(dev, &dev_attr_mem_topology); - device_remove_file(dev, &dev_attr_connectivity); - device_remove_file(dev, &dev_attr_ip_layout); - device_remove_file(dev, &dev_attr_debug_ip_layout); -} diff --git a/sdk/linux_kernel_drivers/xocl/xocl_test.c b/sdk/linux_kernel_drivers/xocl/xocl_test.c deleted file mode 100644 index 94c1fee4..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_test.c +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Copyright (C) 2018 Xilinx, Inc - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include "xocl_drv.h" - -int xocl_test_interval = 5; -bool xocl_test_on = true; - -/** - * TODO: - * Test drm_send_event() with event object initialized with drm_event_reserve_init() - * to send events for CUs - */ -static int xocl_test_thread_main(void *data) -{ -#if 0 - struct timeval now; - struct drm_xocl_dev *xdev = (struct drm_xocl_dev *)data; - int irq = 0; - int count = 0; - while (!kthread_should_stop()) { - ssleep(xocl_test_interval); - do_gettimeofday(&now); - DRM_INFO("irq[%d] tv_sec[%ld]tv_usec[%ld]\n", irq, now.tv_sec, now.tv_usec); - xocl_user_event(irq, xdev); - irq++; - irq &= 0xf; - count++; - } - printk(KERN_INFO "The xocl test thread has terminated."); -#endif - return 0; -} - -int xocl_init_test_thread(struct drm_xocl_dev *xdev) -{ - int ret = 0; -#if 0 - xdev->exec.test_kthread = kthread_run(xocl_test_thread_main, (void *)xdev, "xocl-test-thread"); - DRM_DEBUG(__func__); - if (IS_ERR(xdev->exec.test_kthread)) { - DRM_ERROR(__func__); - ret = PTR_ERR(xdev->exec.test_kthread); - xdev->exec.test_kthread = NULL; - } -#endif - return ret; -} - -int xocl_fini_test_thread(struct drm_xocl_dev *xdev) -{ - int ret = 0; -#if 0 - if (!xdev->exec.test_kthread) - return 0; - ret = kthread_stop(xdev->exec.test_kthread); - ssleep(xocl_test_interval); - xdev->exec.test_kthread = NULL; - DRM_DEBUG(__func__); -#endif - return ret; -} diff --git a/sdk/linux_kernel_drivers/xocl/xocl_xdma.c b/sdk/linux_kernel_drivers/xocl/xocl_xdma.c deleted file mode 100644 index edc47f27..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_xdma.c +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Copyright (C) 2015-2018 Xilinx, Inc - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include "xocl_drv.h" -#include "xocl_xdma.h" -#include "libxdma_api.h" - -static irqreturn_t xocl_xdma_user_isr(int irq, void *arg) -{ - struct drm_xocl_dev *xdev = (struct drm_xocl_dev *)arg; - xocl_user_event(irq, xdev); - return IRQ_HANDLED; -} - -int xdma_init_glue(struct drm_xocl_dev *xdev) -{ - int ret = 0; - int user = 0; - unsigned short mask = ~0; - xdev->xdma_handle = (struct xdma_dev *) xdma_device_open(DRV_NAME, xdev->ddev->pdev, &user, - &xdev->channel, &xdev->channel); - if (xdev->xdma_handle == NULL) { - DRM_INFO("%s: XDMA Device Open failed. \n", DRV_NAME); - ret = -ENOENT; // TBD: Get the error code from XDMA API. - } - ret = xdma_user_isr_register(xdev->xdma_handle, mask, xocl_xdma_user_isr, xdev); - if (ret) - xdma_device_close(xdev->ddev->pdev, xdev->xdma_handle); - else - DRM_INFO("%s: XDMA Device Open successful. \n", DRV_NAME); - return ret; -} - -void xdma_fini_glue(struct drm_xocl_dev *xdev) -{ - unsigned short mask = ~0; - xdma_user_isr_register(xdev->xdma_handle, mask, NULL, xdev); - xdma_device_close(xdev->ddev->pdev, xdev->xdma_handle); - xdev->xdma_handle = NULL; - DRM_INFO("%s: XDMA Device Close successful. \n", DRV_NAME); -} - - -ssize_t xdma_migrate_bo(const struct drm_xocl_dev *xdev, struct sg_table *sgt, bool write, - u64 paddr, int channel) -{ - struct page *pg; - struct scatterlist *sg = sgt->sgl; - int nents = sgt->orig_nents; - pid_t pid = current->pid; - const char* dirstr = write ? "to" : "from"; - int i = 0; - ssize_t ret; - unsigned long long pgaddr; - DRM_DEBUG("%s TID %d, Channel:" - "%d, Offset: 0x%llx, Direction: %d\n", __func__, pid, channel, paddr, write ? 1 : 0); - ret = xdma_xfer_submit(xdev->xdma_handle, channel, write ? 1 : 0, paddr, sgt, false, 10000); - if (ret >= 0) - return ret; - - DRM_ERROR("DMA failed %s device addr 0x%llx, tid %d, channel %d\n", dirstr, paddr, pid, channel); - DRM_ERROR("Dumping SG Page Table\n"); - for (i = 0; i < nents; i++, sg = sg_next(sg)) { - if (!sg) - break; - pg = sg_page(sg); - if (!pg) - continue; - pgaddr = page_to_phys(pg); - DRM_ERROR("%i, 0x%llx\n", i, pgaddr); - } - return ret; -} - - -int xdma_user_interrupt_config(struct drm_xocl_dev *xdev, int user_intr_number, bool enable) -{ - const unsigned int mask = 1 << user_intr_number; - return enable ? xdma_user_isr_enable(xdev->xdma_handle, mask) : xdma_user_isr_disable(xdev->xdma_handle, mask); -} diff --git a/sdk/linux_kernel_drivers/xocl/xocl_xdma.h b/sdk/linux_kernel_drivers/xocl/xocl_xdma.h deleted file mode 100644 index 5e6d1d64..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_xdma.h +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Copyright (C) 2015-2018 Xilinx, Inc - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _XCL_XOCL_XDMA_H_ -#define _XCL_XOCL_XDMA_H_ - -#include -#include -#include -#include - -int xdma_init_glue(struct drm_xocl_dev *xdev); -void xdma_fini_glue(struct drm_xocl_dev *xdev); -ssize_t xdma_migrate_bo(const struct drm_xocl_dev *xdev, struct sg_table *sgt, bool write, - u64 paddr, int channel); -int xdma_user_interrupt_config(struct drm_xocl_dev *xdev, int user_intr_number, bool enable); -#endif - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/xocl_xvc.c b/sdk/linux_kernel_drivers/xocl/xocl_xvc.c deleted file mode 100644 index dc543c5c..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_xvc.c +++ /dev/null @@ -1,330 +0,0 @@ -/******************************************************************************* - * - * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - * - * The full GNU General Public License is included in this distribution in - * the file called "LICENSE". - * - * Karen Xie - * Sonal Santan - * - ******************************************************************************/ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xocl_xvc.h" -#include "xvc_pcie_ioctl.h" - -#define DEFAULT_XVC_BAR_OFFSET 0xC0000 // DSA 4.1 and 5.0 -#define DEFAULT_XVC_BAR 0 - -#define COMPLETION_LOOP_MAX 100 - -#define XVC_BAR_LENGTH_REG 0x0 -#define XVC_BAR_TMS_REG 0x4 -#define XVC_BAR_TDI_REG 0x8 -#define XVC_BAR_TDO_REG 0xC -#define XVC_BAR_CTRL_REG 0x10 - -#define XVC_DEV_NAME "xvc" - -static dev_t xvc_dev; -static int instance = 0; -static struct class *xvc_class = NULL; - -#ifdef __REG_DEBUG__ -/* SECTION: Function definitions */ -static inline void __write_register(const char *fn, u32 value, void *base, - unsigned int off) -{ - pr_info("%s: 0x%p, W reg 0x%lx, 0x%x.\n", fn, base, off, value); - iowrite32(value, base + off); -} - -static inline u32 __read_register(const char *fn, void *base, unsigned int off) -{ - u32 v = ioread32(base + off); - - pr_info("%s: 0x%p, R reg 0x%lx, 0x%x.\n", fn, base, off, v); - return v; -} -#define write_register(v,base,off) __write_register(__func__, v, base, off) -#define read_register(base,off) __read_register(__func__, base, off) - -#else -#define write_register(v,base,off) iowrite32(v, (base) + (off)) -#define read_register(base,off) ioread32((base) + (off)) -#endif /* #ifdef __REG_DEBUG__ */ - - -static int xvc_shift_bits(void *base, u32 tms_bits, u32 tdi_bits, - u32 *tdo_bits) -{ - u32 control; - int count; - - /* set tms bit */ - write_register(tms_bits, base, XVC_BAR_TMS_REG); - /* set tdi bits and shift data out */ - write_register(tdi_bits, base, XVC_BAR_TDI_REG); - /* enable shift operation */ - write_register(0x1, base, XVC_BAR_CTRL_REG); - - /* poll for completion */ - count = COMPLETION_LOOP_MAX; - while (count) { - /* read control reg to check shift operation completion */ - control = read_register(base, XVC_BAR_CTRL_REG); - if ((control & 0x01) == 0) - break; - - count--; - } - - if (!count) { - pr_warn("XVC bar transaction timed out (0x%0X)\n", control); - return -ETIMEDOUT; - } - - /* read tdo bits back out */ - *tdo_bits = read_register(base, XVC_BAR_TDO_REG); - - return 0; -} - -static long xvc_ioctl_helper(struct xocl_xvc *xvc, const void __user *arg) -{ - struct xil_xvc_ioc xvc_obj; - unsigned int opcode; - unsigned int total_bits; - unsigned int total_bytes; - unsigned int bits, bits_left; - unsigned char *buffer = NULL; - unsigned char *tms_buf = NULL; - unsigned char *tdi_buf = NULL; - unsigned char *tdo_buf = NULL; - void __iomem *iobase = xvc->bar + DEFAULT_XVC_BAR_OFFSET; - int rv; - - rv = copy_from_user((void *)&xvc_obj, arg, - sizeof(struct xil_xvc_ioc)); - /* anything not copied ? */ - if (rv) { - pr_info("copy_from_user xvc_obj failed: %d.\n", rv); - goto cleanup; - } - - opcode = xvc_obj.opcode; - - /* Invalid operation type, no operation performed */ - if (opcode != 0x01 && opcode != 0x02) { - pr_info("UNKNOWN opcode 0x%x.\n", opcode); - return -EINVAL; - } - - total_bits = xvc_obj.length; - total_bytes = (total_bits + 7) >> 3; - - buffer = (char *)kmalloc(total_bytes * 3, GFP_KERNEL); - if (!buffer) { - pr_info("OOM %u, op 0x%x, len %u bits, %u bytes.\n", - 3 * total_bytes, opcode, total_bits, total_bytes); - rv = -ENOMEM; - goto cleanup; - } - tms_buf = buffer; - tdi_buf = tms_buf + total_bytes; - tdo_buf = tdi_buf + total_bytes; - - rv = copy_from_user((void *)tms_buf, xvc_obj.tms_buf, total_bytes); - if (rv) { - pr_info("copy tmfs_buf failed: %d/%u.\n", rv, total_bytes); - goto cleanup; - } - rv = copy_from_user((void *)tdi_buf, xvc_obj.tdi_buf, total_bytes); - if (rv) { - pr_info("copy tdi_buf failed: %d/%u.\n", rv, total_bytes); - goto cleanup; - } - - /* set length register to 32 initially if more than one - * word-transaction is to be done */ - if (total_bits >= 32) - write_register(0x20, iobase, XVC_BAR_LENGTH_REG); - - for (bits = 0, bits_left = total_bits; bits < total_bits; bits += 32, - bits_left -= 32) { - unsigned int bytes = bits >> 3; - unsigned int shift_bytes = 4; - u32 tms_store = 0; - u32 tdi_store = 0; - u32 tdo_store = 0; - - if (bits_left < 32) { - /* set number of bits to shift out */ - write_register(bits_left, iobase, XVC_BAR_LENGTH_REG); - shift_bytes = (bits_left + 7) >> 3; - } - - memcpy(&tms_store, tms_buf + bytes, shift_bytes); - memcpy(&tdi_store, tdi_buf + bytes, shift_bytes); - - /* Shift data out and copy to output buffer */ - rv = xvc_shift_bits(iobase, tms_store, tdi_store, &tdo_store); - if (rv < 0) - goto cleanup; - - memcpy(tdo_buf + bytes, &tdo_store, shift_bytes); - } - - /* if testing bar access swap tdi and tdo bufferes to "loopback" */ - if (opcode == 0x2) { - char *tmp = tdo_buf; - - tdo_buf = tdi_buf; - tdi_buf = tmp; - } - - rv = copy_to_user((void *)xvc_obj.tdo_buf, tdo_buf, total_bytes); - if (rv) { - pr_info("copy back tdo_buf failed: %d/%u.\n", rv, total_bytes); - rv = -EFAULT; - goto cleanup; - } - -cleanup: - if (buffer) - kfree(buffer); - - mmiowb(); - - return rv; -} - -long xvc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct xocl_xvc *xvc = filp->private_data; - return xvc_ioctl_helper(xvc, (void __user *)arg); -} - -static int char_open(struct inode *inode, struct file *file) -{ - struct xocl_xvc *xvc = NULL; - - /* pointer to containing structure of the character device inode */ - xvc = container_of(inode->i_cdev, struct xocl_xvc, sys_cdev); - /* create a reference to our char device in the opened file */ - file->private_data = xvc; - return 0; -} - -/* - * Called when the device goes from used to unused. - */ -static int char_close(struct inode *inode, struct file *file) -{ - return 0; -} - - -/* - * character device file operations for the XVC - */ -static const struct file_operations xvc_fops = { - .owner = THIS_MODULE, - .open = char_open, - .release = char_close, - .unlocked_ioctl = xvc_ioctl, -}; - -int xocl_xvc_device_init(struct xocl_xvc *xvc, struct device *dev) -{ - int err; -#ifdef __XVC_BAR_NUM__ - xcdev->bar = __XVC_BAR_NUM__; -#endif -#ifdef __XVC_BAR_OFFSET__ - xcdev->base = __XVC_BAR_OFFSET__; -#else - xvc->base = XVC_BAR_OFFSET_DFLT; -#endif - pr_info("xcdev 0x%p, offset 0x%lx.\n", - xvc, xvc->base); - - cdev_init(&xvc->sys_cdev, &xvc_fops); - xvc->sys_cdev.owner = THIS_MODULE; - xvc->instance = instance++; - xvc->sys_cdev.dev = MKDEV(MAJOR(xvc_dev), xvc->instance); - err = cdev_add(&xvc->sys_cdev, xvc->sys_cdev.dev, 1); - if (err) - return err; - - xvc->sys_device = device_create(xvc_class, dev, - xvc->sys_cdev.dev, - NULL, XVC_DEV_NAME "%d", xvc->instance); - if (IS_ERR(xvc->sys_device)) { - err = PTR_ERR(xvc->sys_device); - cdev_del(&xvc->sys_cdev); - } - - if (!err) - pr_info("XVC device instance %d initialized\n", xvc->instance); - return err; -} - - -int xocl_xvc_device_fini(struct xocl_xvc *xvc) -{ - device_destroy(xvc_class, xvc->sys_cdev.dev); - cdev_del(&xvc->sys_cdev); - return 0; -} - -int xocl_xvc_chardev_init() -{ - int err = 0; - - err = alloc_chrdev_region(&xvc_dev, 0, 16, XVC_DEV_NAME); - if (err < 0) - goto err_register_chrdev; - - xvc_class = class_create(THIS_MODULE, XVC_DEV_NAME); - if (IS_ERR(xvc_class)) { - err = PTR_ERR(xvc_class); - goto err_class_create; - } - return 0; - -err_class_create: - unregister_chrdev_region(xvc_dev, 16); -err_register_chrdev: - return err; -} - -void xocl_xvc_chardev_exit() -{ - unregister_chrdev_region(xvc_dev, 16); - class_destroy(xvc_class); -} diff --git a/sdk/linux_kernel_drivers/xocl/xocl_xvc.h b/sdk/linux_kernel_drivers/xocl/xocl_xvc.h deleted file mode 100644 index 76b62c8e..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_xvc.h +++ /dev/null @@ -1,44 +0,0 @@ -/******************************************************************************* - * - * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - * - * The full GNU General Public License is included in this distribution in - * the file called "LICENSE". - * - * Karen Xie - * Sonal Santan - * - ******************************************************************************/ - -#ifndef _XCL_XOCL_XVC_DRV_H_ -#define _XCL_XOCL_XVC_DRV_H_ - -#define XVC_BAR_OFFSET_DFLT 0x40000 - -struct xocl_xvc { - unsigned long base; /* bar access offset */ - unsigned int instance; - struct cdev sys_cdev; - struct device *sys_device; - void *__iomem bar; -}; - -int xocl_xvc_chardev_init(void); -void xocl_xvc_chardev_exit(void); -int xocl_xvc_device_init(struct xocl_xvc *xvc, struct device *dev); -int xocl_xvc_device_fini(struct xocl_xvc *xvc); - -#endif diff --git a/sdk/linux_kernel_drivers/xocl/xvc_pcie_ioctl.h b/sdk/linux_kernel_drivers/xocl/xvc_pcie_ioctl.h deleted file mode 100644 index c81d813b..00000000 --- a/sdk/linux_kernel_drivers/xocl/xvc_pcie_ioctl.h +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Copyright (C) 2017-2018 Xilinx, Inc - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _XIL_XVC_IOCALLS_POSIX_H_ -#define _XIL_XVC_IOCALLS_POSIX_H_ - -#ifndef _WINDOWS -// TODO: Windows build support -#include -#endif - -#define XIL_XVC_MAGIC 0x58564344 // "XVCD" - -struct xil_xvc_ioc { - unsigned opcode; - unsigned length; - unsigned char *tms_buf; - unsigned char *tdi_buf; - unsigned char *tdo_buf; -}; - -#define XDMA_IOCXVC _IOWR(XIL_XVC_MAGIC, 1, struct xil_xvc_ioc) - -#endif // _XIL_XVC_IOCALLS_POSIX_H_ -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/shared/bin/set_common_env_vars.sh b/shared/bin/set_common_env_vars.sh index c249a253..52a07815 100644 --- a/shared/bin/set_common_env_vars.sh +++ b/shared/bin/set_common_env_vars.sh @@ -75,3 +75,6 @@ PYTHONPATH=$python_lib:$PYTHONPATH export PATH=$(echo $PATH | sed -e 's/\(^\|:\)[^:]\+\/shared\/bin\/scripts\(:\|$\)/:/g; s/^://; s/:$//') PATH=$AWS_FPGA_REPO_DIR/shared/bin/scripts:$PATH + +# Enable xilinx licensing +export XILINX_ENABLE_AWS_WHITELIST=095707098027 diff --git a/shared/bin/set_common_functions.sh b/shared/bin/set_common_functions.sh index c7a05c91..94bd19bb 100644 --- a/shared/bin/set_common_functions.sh +++ b/shared/bin/set_common_functions.sh @@ -270,7 +270,7 @@ function patch_AR73068 { elif [[ "${base_vivado_version}" =~ "Vivado v2017.4" ]]; then patch_AR73068_2017_4 else - warn_msg "Unknown Vivado version: ${base_vivado_version}. Not applying Xilinx Patch AR73068" + info_msg "Xilinx Patch AR73068 not applicable for Vivado version: ${base_vivado_version}." fi } diff --git a/shared/lib/aws_fpga_test_utils/AwsFpgaTestBase.py b/shared/lib/aws_fpga_test_utils/AwsFpgaTestBase.py index 32a5e014..861d72d7 100644 --- a/shared/lib/aws_fpga_test_utils/AwsFpgaTestBase.py +++ b/shared/lib/aws_fpga_test_utils/AwsFpgaTestBase.py @@ -414,7 +414,7 @@ def get_sdaccel_example_run_cmd(examplePath, xilinxVersion): run_cmd += " {}".format(((description.get("cmd_args", None).replace(".xclbin",".hw.xilinx_aws-vu9p-f1-04261818_dynamic_5_0.awsxclbin")).replace("PROJECT",".")).replace("BUILD","./xclbin")) assert run_cmd is not None, "Could not find run_cmd(em_cmd) or (host_exe) in the example description here {}".format(examplePath) - + return run_cmd @staticmethod @@ -663,6 +663,10 @@ def assert_non_zero_file(filter): filenames = glob.glob(filter) + # Removing .link.xclbin found in Vitis2020.1 + + filenames = [x for x in filenames if ".link." not in x] + assert len(filenames) > 0, "No {} file found in {}".format(filter, os.getcwd()) assert len(filenames) == 1, "More than 1 {} file found: {}\n{}".format(filter, len(filenames), filenames) diff --git a/shared/lib/check_src_headers.py b/shared/lib/check_src_headers.py index 8d390a3c..329896eb 100755 --- a/shared/lib/check_src_headers.py +++ b/shared/lib/check_src_headers.py @@ -554,7 +554,7 @@ def check_headers(dir): "sdk/linux_kernel_drivers/xocl/LICENSE$", "sdk/apps/virtual-ethernet/scripts/pktgen-ena-range.pkt", "sdk/apps/virtual-ethernet/scripts/pktgen-ena.pkt", - + "SDAccel/userspace/src/test", "SDAccel/examples/aws/kernel_3ddr_bandwidth/description.json", "SDAccel/examples/aws/helloworld_ocl_runtime/helloworld", @@ -576,6 +576,7 @@ def check_headers(dir): "SDAccel/examples/xilinx", "Vitis/aws_platform", "Vitis/examples/xilinx", + "Vitis/docs/Alveo_to_AWS_F1_Migration/example", ]) file_path_list = sorted(file_provider.get_files(dir)) diff --git a/supported_vivado_versions.txt b/supported_vivado_versions.txt index 38d9300d..26e509fb 100644 --- a/supported_vivado_versions.txt +++ b/supported_vivado_versions.txt @@ -1,12 +1,3 @@ -Vivado v2017.4 (64-bit) -Vivado v2017.4.op (64-bit) -Vivado v2018.2_AR71275_op (64-bit) -Vivado v2018.2_AR71715 (64-bit) -Vivado v2018.2.op (64-bit) -Vivado v2018.2 (64-bit) -Vivado v2018.3.op (64-bit) -Vivado v2018.3 (64-bit) -Vivado v2018.3_AR72667 (64-bit) Vivado v2019.1.op (64-bit) Vivado v2019.1 (64-bit) Vivado v2019.1_AR73068 (64-bit) @@ -15,3 +6,4 @@ Vivado v2019.1_AR72668 (64-bit) Vivado v2019.2 (64-bit) Vivado v2019.2_AR73068_op (64-bit) Vivado v2019.2_AR73068 (64-bit) +Vivado v2020.1 (64-bit) diff --git a/vitis_runtime_setup.sh b/vitis_runtime_setup.sh index 57a0b7a0..c6fb0681 100644 --- a/vitis_runtime_setup.sh +++ b/vitis_runtime_setup.sh @@ -152,7 +152,7 @@ check_kernel_ver check_xdma_driver check_edma_driver -if [[ "$VIVADO_TOOL_VERSION" =~ .*2019\.2.* ]]; then +if [[ "$VIVADO_TOOL_VERSION" =~ .*2019\.2.* || "$VIVADO_TOOL_VERSION" =~ .*2020\.1.* ]]; then info_msg "Xilinx Vivado version is $VIVADO_TOOL_VERSION" if [ $override == 1 ]; then @@ -193,7 +193,7 @@ if [[ "$VIVADO_TOOL_VERSION" =~ .*2019\.2.* ]]; then return 1 fi else - err_msg "Xilinx Vivado version is $VIVADO_TOOL_VERSION , only 2019.2 is supported for Vitis " + err_msg "Xilinx Vivado version is $VIVADO_TOOL_VERSION , only 2019.2 or 2020.1 are supported for Vitis " return 1 fi @@ -203,4 +203,4 @@ setup_runtime info_msg "Starting MPD" systemctl is-active --quiet mpd || sudo systemctl start mpd -info_msg "Vitis runtime check PASSED" \ No newline at end of file +info_msg "Vitis runtime check PASSED" diff --git a/vitis_setup.sh b/vitis_setup.sh index 8a4dc548..61948b83 100644 --- a/vitis_setup.sh +++ b/vitis_setup.sh @@ -171,7 +171,7 @@ info_msg " XILINX_VITIS is set to $XILINX_VITIS" # Update Xilinx Vitis Examples from GitHub info_msg "Using Vitis $RELEASE_VER" -if [[ $RELEASE_VER =~ .*2019\.2.* ]]; then +if [[ $RELEASE_VER =~ .*2019\.2.* || $RELEASE_VER =~ .*2020\.1.* ]]; then info_msg "Updating Xilinx Vitis Examples $RELEASE_VER" git submodule update --init -- Vitis/examples/xilinx_$RELEASE_VER export VIVADO_TOOL_VER=$RELEASE_VER @@ -183,7 +183,7 @@ if [[ $RELEASE_VER =~ .*2019\.2.* ]]; then fi ln -sf $VITIS_DIR/examples/xilinx_$RELEASE_VER $VITIS_DIR/examples/xilinx else - echo " $RELEASE_VER is not supported (2019.2 is supported).\n" + echo " $RELEASE_VER is not supported (2019.2 or 2020.1 is supported).\n" return 2 fi