diff --git a/.gitignore b/.gitignore index d2850ba8..5b80bc45 100644 --- a/.gitignore +++ b/.gitignore @@ -39,7 +39,7 @@ fio !SDAccel/aws_platform/xilinx_aws-vu9p-f1_1ddr-xpr-2pr_4_0/sw/lib/x86_64/libxilinxopencl.so !SDAccel/aws_platform/xilinx_aws-vu9p-f1_4ddr-xpr-2pr_4_0/sw/lib/x86_64/libxilinxopencl.so !SDAccel/aws_platform/xilinx_aws-vu9p-f1_4ddr-xpr-2pr-debug_4_0/sw/lib/x86_64/libxilinxopencl.so -!SDAccel/aws_platform/xilinx_aws-vu9p-f1_dynamic_5_0/sw/lib/x86_64/libxilinxopencl.so +!SDAccel/aws_platform/xilinx_aws-vu9p-f1_dynamic_5_0/sw/lib/x86_64/libxilinxopencl.so !SDAccel/aws_platform/xilinx_aws-vu9p-f1-04261818_dynamic_5_0/sw/lib/x86_64/libxilinxopencl.so nohup.out @@ -106,6 +106,13 @@ vivado*.log # Patches patches/* +# Temporary files +.batch +.temp + +.python-version + +# FireSim specific awsver.txt sdk/linux_kernel_drivers/xdma/.libxdma.o.d diff --git a/.gitmodules b/.gitmodules index 3cccf78d..3cad5231 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,12 +1,13 @@ -[submodule "SDAccel/examples/xilinx_2017.4"] - path = SDAccel/examples/xilinx_2017.4 +[submodule "SDAccel/examples/xilinx_2019.1"] + path = SDAccel/examples/xilinx_2019.1 url = https://github.com/Xilinx/SDAccel_Examples.git - branch = aws_2017.4 -[submodule "SDAccel/examples/xilinx_2018.2"] - path = SDAccel/examples/xilinx_2018.2 - url = https://github.com/Xilinx/SDAccel_Examples.git - branch = 2018.2 -[submodule "SDAccel/examples/xilinx_2018.3"] - path = SDAccel/examples/xilinx_2018.3 - url = https://github.com/Xilinx/SDAccel_Examples.git - branch = master +[submodule "Vitis/examples/xilinx_2019.2"] + path = Vitis/examples/xilinx_2019.2 + branch = master + url = https://github.com/Xilinx/Vitis_Accel_Examples +[submodule "Vitis/examples/xilinx_2020.1"] + path = Vitis/examples/xilinx_2020.1 + url = https://github.com/Xilinx/Vitis_Accel_Examples +[submodule "Vitis/examples/xilinx_2020.2"] + path = Vitis/examples/xilinx_2020.2 + url = https://github.com/Xilinx/Vitis_Accel_Examples diff --git a/ERRATA.md b/ERRATA.md index a56958a4..5fbf6d82 100644 --- a/ERRATA.md +++ b/ERRATA.md @@ -5,14 +5,35 @@ [Shell\_04261818_Errata](./hdk/docs/AWS_Shell_ERRATA.md) ## HDK -* Multiple SDE instances per CL is not supported in this release. Support planned for future release. +* Multiple SDE instances per CL is not supported in this release. Support is planned for a future release. * DRAM Data retention is not supported for CL designs with less than 4 DDRs enabled -* Combinatorial loops in CL designs are not supported. -* Shell Model (sh_bfm) provided with testbench for design simulations, continues to drive read data on PCIM AXI rdata channel even when rready is de-asserted. Will be fixed in future release. +* Combinatorial loops in CL designs are not supported. +* Connecting one of the clocks provided from the shell (clk_main_a0, clk_extra_a1, etc...) directly to a BUFG in the CL is not supported by the Xilinx tools and may result in a non-functional clock. To workaround this limitation, it is recommended to use an MMCM to feed the BUFG (clk_from_shell -> MMCM -> BUFG). Please refer to [Xilinx AR# 73360](https://www.xilinx.com/support/answers/73360.html) for further details. + +### Xilinx Design Advisory for UltraScale/UltraScale+ DDR4/DDR3 IP - Memory IP Timing Exceptions (AR# 73068) +AWS EC2 F1 customers using the DDR4 IP in customer logic (HDK or SDAccel/Vitis designs) may be impacted by a recent design advisory from Xilinx. + +AWS customers may experience hardware failures including: post calibration data errors and DQS gate tracking issues. The error condition is build dependent and errors would need to be detected on the first write/read access after a successful calibration to prevent further data corruption. + +To detect if your build is impacted by this bug, AWS recommends all EC2 F1 customers utilizing the DDR4 IP in their designs should run a TCL script on the design checkpoint point (DCP) to check to determine if the design is susceptible to this issue. If the check passes, your design is safe to use as the hardware will function properly. +If the check fails, the design is susceptible to the issue and will need to be regenerated using the same tool version with the AR 73068 patch. +For designs under development, we recommend applying the patch to your on-premises tools or update to developer kit v1.4.15. +For additional details, please refer to the [Xilinx Answer Record #73068](https://www.xilinx.com/support/answers/73068.html) + +We recommend using [Developer Kit Release v1.4.15a](https://github.com/aws/aws-fpga/releases/tag/v1.4.15a) or newer to allow for patching and fixing the DDR4 IP timing exception by re-generating the IP. + +### 2019.1 +* Vivado `compile_simlib` command fails to generate the following verilog IP libraries for the following simulators. +* Please refer to the Xilinx Answer record for details. + +| Library(verilog) | Simulator | Xilinx Answer Record | +|---|---|---| +| `sync_ip` | Cadence IES | [AR72795](https://www.xilinx.com/support/answers/72795.html) | +| `hdmi_gt_controller_v1_0_0` | Synopsys VCS | [AR72601](https://www.xilinx.com/support/answers/72601.html) | ## SDK ## SDAccel (For additional restrictions see [SDAccel ERRATA](./SDAccel/ERRATA.md)) * Virtual Ethernet is not supported when using SDAccel * DRAM Data retention is not supported for kernels that provision less than 4 DDRs -* Combinatorial loops in CL designs are not supported. +* Combinatorial loops in CL designs are not supported. diff --git a/FAQs.md b/FAQs.md index 4a12d4eb..52e5ce16 100644 --- a/FAQs.md +++ b/FAQs.md @@ -19,18 +19,17 @@ ## General F1 FAQs -**Q: How is developing a FPGA design for the cloud different from the common practice outside the cloud?** +**Q: How is developing an FPGA design for the cloud different from the common practice outside the cloud?** AWS designed its FPGA instances to provide a developer experience with ease of use and as similar as possible to on-premises development environment with the following differences (advantages): - - Developers don’t need to purchase / design / bringup or debug the physical hardware where the FPGA is hosted, nor the platform/server hardware: all the hardware is verified, monitored, and maintained by AWS. -- AWS provides an [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) that contains Xilinx Vivado development environment, with all the needed licenses. By using the FPGA developer AMI developers have a choice to a wide range of instance (different CPU and Memory configuration) allowing developers to optimize their development flow. +- AWS provides an [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) that contains Xilinx Vivado development environment, with all the needed licenses. By using the FPGA Developer AMI developers have a choice to a wide range of instance (different CPU and Memory configuration) allowing developers to optimize their development flow. - AWS provides cloud based debug tools: [Virtual JTAG](./hdk/docs/Virtual_JTAG_XVC.md) which is equivalent to debug using JTAG with on-premises development, and Virtual LED together with Virtual DIP Switch emulation the LED and DIP switches in typical development board. -- For developers who want to develop on-premises, Xilinx provides an [on-premises license](./hdk/docs/on_premise_licensing_help.md ) that matches all the needed components needed to be licensed for F1 development on premises. +- For developers who want to develop on-premises, Xilinx provides an [on-premises license](docs/on_premise_licensing_help.md ) that matches all the needed components needed to be licensed for F1 development on premises. - The developers' output is a Design Checkpoint (DCP) and not an FPGA bitstream: The FPGA bitstream is actually generated by AWS after the developer submits the DCP. @@ -46,30 +45,32 @@ There are two parts to answer this question: For developers that are familiar with AWS, there is almost no additional time to get right into F1 development environment, as long as the documentation and guidances in the [FPGA HDK/SDK](https://github.com/aws/aws-fpga) are followed. -For developers who are new to AWS, there is typically a one to two days ramp on AWS general topics such as launching EC2 instance, setting up S3 storage and its permissions, using AWS console, etc… For new developers to AWS, we recommend to start with the [FPGA Developer Forum](https://forums.aws.amazon.com/ann.jspa?annID=4448) +For developers who are new to AWS, there is typically a one to two days ramp on AWS general topics such as launching EC2 instance, setting up S3 storage and its permissions, using AWS console, etc… For new developers to AWS, we recommend starting with the [FPGA Developer Forum](https://forums.aws.amazon.com/ann.jspa?annID=4448) - On-going development flow: -Once developers complete their DCP, they submit the design through an AWS EC2 API to create the Amazon FGPA Image (aka AFI, this API call can take a few hours to complete, and the status of the process is reported in the S3 log bucket provides by the developers. AWS is working to improve the turn time of AFI generation. +Once developers create their DCP, they submit the design through an AWS EC2 API to create the Amazon FPGA Image (aka AFI, this API call can take a few hours to complete, and the status of the process is reported in the S3 log bucket provides by the developers. AWS is working to improve the turn time of AFI generation. **Q: What new skill sets will be required from an FPGA developer in the cloud?** -As AWS has taken all the non-differentiating, heavy lifting of hardware design, debug and implementation of PCIe tuning, FPGA I/O assignment, power, thermal management, and runtime health monitoring. Therefore AWS FPGA developers can focus on their own differentiating logic, instead of spending time on hardware bringup/debug and maintenance. +AWS takes care of all the non-differentiating, heavy lifting of hardware design, debug and implementation of PCIe tuning, FPGA I/O assignment, power, thermal management, and runtime health monitoring. + +This enables AWS FPGA developers to focus on their own differentiating logic, instead of spending time on hardware bring-up/debug and maintenance. -On the business side, AWS Marketplace (MP) provides FPGA developers the opportunity to sell hardware accelerations to all of AWS users: Ramping on AWS MP services, capabilities and commercial opportunities are recommended knowledge for developers interested in selling their AFIs on AWS MP. Education and research institutes can use AWS MP to distribute their research work ; having access to vast amounts of free [public data-sets](https://aws.amazon.com/public-datasets/ ) can be of value when running research hardware accelerations on AWS. +On the business side, AWS Marketplace (MP) provides FPGA developers the opportunity to sell hardware accelerations to all of AWS users: Ramping on AWS MP services, capabilities and commercial opportunities are recommended knowledge for developers interested in selling their AFIs on AWS MP. Education and research institutes can use AWS MP to distribute their research work. Having access to vast amounts of free [public data-sets](https://aws.amazon.com/public-datasets/ ) can be of value when running research hardware accelerations on AWS. Finally, AWS consulting and technology partners can offer their services through the [AWS Partner Network](https://aws.amazon.com/ec2/instance-types/f1/partners/) to AWS users that don’t have specific FPGA development knowledge, in order to develop FPGA accelerations in the cloud by themselves. -**Q: How is deployment FPGA in the cloud different compared to on-premises?** +**Q: How is developing on FPGA's in the cloud different from on-premises?** With AWS, FPGAs developers have a few advantages: - Low entry bar: AWS FPGAs are charged on an hourly rate instead of the many thousands of dollars spent on hardware/licenses and 12+ months time it takes to design/manufacture and ship a production-ready FPGA hardware solution. -- Scalability and Elasticity: Developers can ramp up / down the number of deployed FPGAs within seconds based on offered load. +- Scalability and Elasticity: Developers can ramp up / down the number of deployed FPGAs within seconds based on required load. - Share: FPGA developers can share their designs easily through AWS Marketplace or APN. This is important for businesses as well as education and research use. @@ -80,11 +81,11 @@ With AWS, FPGAs developers have a few advantages: The HDK includes the following main components: -1) Documentation for the Shell interface and other Custom Logic implementation guidelines, the Shell models needed for Custom Logic development, simulation models for the Shell, software for exercising. +1) Documentation for the Shell interface and other Custom Logic implementation guidelines, shell models needed for Custom Logic development, simulation models for the shell, scripts for building and simulating, etc. 2) Custom Logic examples, a getting started guide for building your own Custom Logic, and examples for starting a Custom Logic Design. -3) Scripts for building and submitting Amazon FPGA Image (AFI) from a Custom Logic. +3) Scripts for building and creating Amazon FPGA Images (AFI) from a Custom Logic. 4) Reference software drivers to be used in conjunction with the Custom Logic examples. @@ -93,7 +94,7 @@ The HDK includes the following main components: **Q: What is in the AWS Shell?** -The AWS Shell is the part of the FPGA that is provided and managed by AWS: it implements the non-differentiated development and heavy lifting tasks like setting up the PCIe interface, FPGA image download, security, health monitoring, metrics and debug hooks. +The AWS Shell is the part of the FPGA that is provided and managed by AWS: it implements the non-differentiated development and heavy lifting tasks like setting up the PCIe interface, security, health monitoring, metrics and debug hooks. Every FPGA deployed in AWS cloud includes an AWS Shell, and the developer Custom Logic (CL) interfaces with the available AWS Shell interfaces. @@ -102,7 +103,15 @@ Every FPGA deployed in AWS cloud includes an AWS Shell, and the developer Custom It is the compiled FPGA code that is loaded into an FPGA in AWS for performing the Custom Logic (CL) function created by the developer. AFIs are maintained by AWS according and associated with the AWS account that created them. The AFI includes the CL and AWS FPGA Shell. An AFI ID is used to reference a particular AFI from an F1 instance. -The developer can create multiple AFIs at no extra cost, up to a defined limited (typically 100 AFIs per region per AWS account). An AFI can be loaded into as many FPGAs as needed. +The developer can create multiple AFIs at no extra cost, up to a defined limited (typically 500 AFIs per region per AWS account). An AFI can be loaded into as many FPGAs as needed. + +**Q: How do I increase my AFI limit?** + +You can increase your AFI limit by creating an [AWS Support Case](https://console.aws.amazon.com/support/home#/case/create). +1. Select the `Service Limit Increase` tab +2. In the `Limit Type`, select `EC2 FPGA` +3. Select the region(s) where you want your limit to be increased +4. Add justification for the limit increase. **Q: What regions are supported?** @@ -110,12 +119,12 @@ The developer can create multiple AFIs at no extra cost, up to a defined limited AWS FPGA generation and EC2 F1 instances are supported in us-east-1 (N. Virginia), us-west-2 (Oregon), eu-west-1 (Ireland) and us-gov-west-1 (GovCloud US). - **Q: What is the process for creating an AFI?** -The AFI process starts by creating Custom Logic (CL) code that conforms to the [Shell Specification](./hdk/docs/AWS_Shell_Interface_Specification.md). Then, the CL must be compiled using the HDK scripts which leverages Vivado tools to create a Design Checkpoint (DCP). That DCP is submitted to AWS for generating an AFI using the `aws ec2 create-fpga-image` API. - -Use the AWS CLI `describe-fpga-images` API to get information about the created AFIs using the AFI ID provided by `create-fpga-image`, or to list available AFIs for your account. See [describe-fpga-images](./hdk/docs/describe_fpga_images.md) document for details on how to use this API. +* The AFI process starts by creating Custom Logic (CL) code that conforms to the [Shell Specification](./hdk/docs/AWS_Shell_Interface_Specification.md). +* Then, the CL must be compiled using the HDK scripts which leverages Vivado tools to create a Design Checkpoint (DCP). +* That DCP is submitted to AWS for generating an AFI using the `aws ec2 create-fpga-image` API. + * Use the AWS CLI `describe-fpga-images` API to get information about the created AFIs using the AFI ID provided by `create-fpga-image`, or to list available AFIs for your account. See [describe-fpga-images](./hdk/docs/describe_fpga_images.md) document for details on how to use this API. **Q: Can I load an AFI on every region AWS FPGA is supported?** @@ -138,19 +147,13 @@ Yes, use [delete-fpga-image](./hdk/docs/delete_fpga_image.md) to delete an AFI i Use [delete-fpga-image](./hdk/docs/delete_fpga_image.md) carefully. Once all AFIs of the same global AFI ID are deleted, the AFIs cannot be recovered from deletion. Review [IAM policy best practices](http://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html#grant-least-privilege) to restrict access to this API. -**Q: Can I share an AFI with other AWS accounts?** - -Yes, sharing allows accounts other than the owner account to load and use an AFI. Use [modify-fpga-image-attribute](./hdk/docs/fpga_image_attributes.md) API to update `loadPermission` attribute to grant/remove AFI load permission. AWS AFIs support two load permission types: -* `UserId`: share AFI with specific AWS accounts using account IDs. -* `UserGroups`: only supports `all` group to make an AFI public or private. - -Use [reset-fpga-image-attribute](./hdk/docs/fpga_image_attributes.md) API to revoke all load permissions. +**Q: How do I increase my AFI limit?** -**Q: Can I delete an AFI?** +AFI limit increases may be requested by opening up a Support Case from your [EC2 Support Console](https://console.aws.amazon.com/support/cases#/create) -Yes, use [delete-fpga-image](./hdk/docs/delete_fpga_image.md) to delete an AFI in a specific region. Deleting an AFI in one region does not affect AFIs in other regions. +Select a `Service limit increase` of the Limit Type - `EC2 FPGA` for the region where a limit increase is needed. -Use [delete-fpga-image](./hdk/docs/delete_fpga_image.md) carefully. Once all AFIs of the same global AFI ID are deleted, the AFIs cannot be recovered from deletion. Review [IAM policy best practices](http://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html#grant-least-privilege) to resrict access to this API. +You will hear back from our support team once the limit is increased. **Q: Can I bring my own bitstream for loading on an F1 FPGA?** @@ -175,7 +178,7 @@ AWS prefers not to limit developers to a specific template in terms of how we ad If you decide to use the [FPGA Developer AMI on AWS Marketplace](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ), Xilinx licenses for simulation, encryption, SDAccel and Design Checkpoint generation are included at no additional cost. -If you want to run using other methods or on a local machine, you will need to obtain any necessary licenses, specifically you will need to have setup the appropriate Xilinx Vivado license. For more details, please refer to [On-premises licensing help](./hdk/docs/on_premise_licensing_help.md) +If you want to run using other methods or on a local machine, you will need to obtain any necessary licenses, specifically you will need to have setup the appropriate Xilinx Vivado license. For more details, please refer to [On-premises licensing help](docs/on_premise_licensing_help.md) **Q: Does AWS provide physical FPGA boards for on-premises development?** @@ -185,20 +188,25 @@ No. AWS supports a cloud-only development model and provides the necessary eleme **Q: Do I need to design for a specific power envelope?** -Yes, the Xilinx UltraScale+ FPGA devices used on the F1 instances have a maximum power limit that must be maintained. If a loaded AFI consumes maximum power, the F1 instance will automatically gate the input clocks provided to the AFI in order to prevent errors within the FPGA. Developers are provided warnings when power (Vccint) is greater than 85 watts. Above that level, the CL is in danger of being clock gated. [Additional details on AFI power](hdk/docs/afi_power.md) - +Yes, the Xilinx UltraScale+ FPGA devices used on the F1 instances have a maximum power limit that must be maintained. +If a loaded AFI consumes maximum power, the F1 instance will automatically gate the input clocks provided to the AFI in order to prevent errors within the FPGA. +Developers are provided warnings when power (Vccint) is greater than 85 watts. Above that level, the CL is in danger of being clock gated. -**Q: What IP blocks are provided in the HDK?** +[Additional details on AFI power](hdk/docs/afi_power.md) -The HDK includes IP for AWS Shell and DRAM interface controllers. Inside the Shell, there is a PCIe interface, DMA Engine, and one DRAM interface controller. These blocks are only accessible via the AXI interfaces defined by the Shell-Custom Logic interface. The HDK provides additional IP blocks for the other DRAM interfaces, enabling up to 3 additional DRAM interfaces instantiated by the developer in the Custom Logic region. - **Note** * future versions of the HDK will include IP for the FPGA Link interface.* +**Q: What IP blocks are provided in the HDK?** +The HDK includes IP for AWS Shell and DRAM interface controllers. +Inside the Shell, there is a PCIe interface, DMA Engine, and one DRAM interface controller. +These blocks are only accessible via the AXI interfaces defined by the Shell-Custom Logic interface. +The HDK provides additional IP blocks for the other DRAM interfaces, enabling up to 3 additional DRAM interfaces instantiated by the developer in the Custom Logic region. **Q: Can I use other IP blocks from Xilinx or other 3rd parties?** -Yes. Developers are free to use any IP blocks within the Custom Logic region. Those can be 3rd party IPs or IP available in the Vivado IP catalog. +Yes. Developers are free to use any IP blocks within the Custom Logic region. +Those can be 3rd party IPs or IP available in the Vivado IP catalog. **Note** * AWS supports only the IP blocks contained in the HDK.* @@ -207,19 +215,23 @@ Yes. Developers are free to use any IP blocks within the Custom Logic region. Th ## Getting Started FAQs **Q: What AWS knowledge do I need to learn before I can develop accelerators and run on AWS F1 instances?** -[AWS Getting Started Resource Center](https://aws.amazon.com/getting-started/) has lots of resources to help developers get started. For F1 development, launching linux virtual machines (EC2) and storing and retrieving files from S3 are required skills. +[AWS Getting Started Resource Center](https://aws.amazon.com/getting-started/) has lots of resources to help developers get started. +For F1 development, launching EC2 instances and storing and retrieving files from S3 are required skills. **Q: What do I need to get started on building accelerators for FPGA instances?** -Getting started requires downloading the latest HDK and SDK from the AWS FPGA GitHub repository. The HDK and SDK provide the needed code and information for building FPGA code. The HDK provides all the information needed for developing an FPGA image from source code, while the SDK provides all the runtime software for managing the Amazon FPGA Image (AFI) loaded into the F1 instance FPGA. +Getting started requires downloading the latest HDK and SDK from the [AWS FPGA GitHub repository](https://github.com/aws/aws-fpga). +The HDK and SDK provide the needed code and information for building FPGA code. The HDK provides all the information needed for developing an FPGA image from source code, while the SDK provides all the runtime software for managing the Amazon FPGA Image (AFI) loaded into the F1 instance FPGA. -Typically, FPGA development process requires a simulator to perform functional test on the source code, and a Vivado tool set for synthesis of source code into compiled FPGA code. The FPGA Developer AMI provided by AWS includes the complete Xilinx Vivado tools for simulation (XSIM) and synthesis of FPGA. +Typically, FPGA development process requires a simulator to perform functional test on the source code, and a Vivado tool set for synthesis of source code into compiled FPGA code. +The FPGA Developer AMI provided by AWS includes the complete Xilinx Vivado tools for simulation (XSIM) and synthesis of FPGA. **Q: How do I develop accelerator code for an FPGA in an F1 instance?** -Start with the [Shell interface specification](./hdk/docs/AWS_Shell_Interface_Specification.md). This document describes the interface between Custom Logic and the AWS Shell. All Custom Logic for an accelerator resides within the Custom Logic region of the F1 FPGA. +Start with the [Shell interface specification](./hdk/docs/AWS_Shell_Interface_Specification.md). +This document describes the interface between Custom Logic and the AWS Shell. All Custom Logic for an accelerator resides within the Custom Logic region of the F1 FPGA. The [HDK README](./hdk/README.md) walks the developer through the steps to build an FPGA image from one of the provided examples as well as starting a new code. @@ -265,12 +277,19 @@ We recommend using the latest available version to be able to use the expanding ## Marketplace FAQs **Q: What does publishing my AFI/AMI to AWS Marketplace enables?** -FPGA Developers can share or sell their AFI/AMI using the AWS Marketplace to other AWS users. Once in Marketplace, AWS users can launch an F1 instance with that AFI/AMI combination with the 1-click deployment feature. Marketplace Sellers can take advantage of the Management Portal to better build and analyze their business, using it to drive marketing activities and customer adoption. The metering, billing, collections, and disbursement of payments are managed by AWS, allowing developers to focus on marketing their solution. Please check out [AWS Marketplace Tour](https://aws.amazon.com/marketplace/management/tour/) for more details on how to become an AWS Marketplace seller, how to set pricing and collect metrics. +FPGA Developers can share or sell their AFI/AMI using the AWS Marketplace to other AWS users. +Once in Marketplace, AWS users can launch an F1 instance with that AFI/AMI combination with the 1-click deployment feature. +Marketplace Sellers can take advantage of the Management Portal to better build and analyze their business, using it to drive marketing activities and customer adoption. +The metering, billing, collections, and disbursement of payments are managed by AWS, allowing developers to focus on marketing their solution. + +Please check out [AWS Marketplace Tour](https://aws.amazon.com/marketplace/management/tour/) for more details on how to become an AWS Marketplace seller, how to set pricing and collect metrics. **Q: How can I publish my AFI to AWS Marketplace?** -First, you need to [register as a Marketplace Seller](https://aws.amazon.com/marketplace/management/register/). In parallel you should create an AMI that includes the drivers and runtime libraries needed to use your AFI. Finally, follow the [standard flow](https://aws.amazon.com/marketplace/help/200940360) to publish your AMI on AWS marketplace, providing the associated AFI IDs. In other words, AFIs are not published directly on AWS marketplace, rather AFI(s) should be associated with an AMI that gets published. +* First, you need to [register as a Marketplace Seller](https://aws.amazon.com/marketplace/management/register/). +* In parallel you should create an AMI that includes the drivers and runtime libraries needed to use your AFI. +* Finally, follow the [standard flow](https://aws.amazon.com/marketplace/help/200940360) to publish your AMI on AWS marketplace, providing the associated AFI IDs. In other words, AFIs are not published directly on AWS marketplace, rather AFI(s) should be associated with an AMI that gets published. **Q: Do AWS Marketplace customers see FPGA source code or a bitstream?** @@ -281,7 +300,11 @@ Neither, no FPGA internal design code is exposed. AWS Marketplace customers that ## F1 Instance and Runtime Tools FAQs **Q: What OS can run on the F1 instance?** -CentOS 7.x is supported and tested on AWS EC2 F1 instance. Please see [release notes](./RELEASE_NOTES.md) for a description of compatible kernel & OS versions supported by a specific Developer kit version. Developers can utilize the source code in the SDK directory to compile other variants of Linux for use on F1. Windows OSs are not supported on F1. +CentOS 7.x is supported and tested on AWS EC2 F1 instance. +Please see [release notes](./RELEASE_NOTES.md) for a description of compatible Kernel & OS versions supported by a specific Developer kit version. +Developers can utilize the source code in the SDK directory to compile other variants of Linux for use on F1. + +NOTE: Windows OSs are not supported on F1. **Q: What are the interfaces between the F1 instance host CPU and the FPGAs?** @@ -293,7 +316,6 @@ The first is the FPGA Image Management Tools. These APIs are detailed in the [SD The second type of interface is direct address access to the Application PCIe Physical Functions (PF) of the FPGA. There is no API for this access. Rather, there is direct access to resources in the Custom Logic (CL) region or Shell that can be accessed by software written on the instance. For example, the ChipScope software (Virtual JTAG) uses address space in a PF to provide FPGA debug support. Developers can create any API to the resources in their CL. See the [Shell Interface Specification](./hdk/docs/AWS_Shell_Interface_Specification.md) for more details on the address space mapping as seen from the instance. - **Q: Can I integrate the FPGA Image Management Tools in my application?** Yes, In addition to providing the [FPGA Management Tools](./sdk/userspace/fpga_mgmt_tools) as linux shell commands, the [SDK Userspace](./sdk/userspace) directory includes files in the `include` and `hal` to integrate the FPGA Management Tools into the developer's application(a) and avoid calling linux shell commands. @@ -326,8 +348,8 @@ The AWS infrastructure scrubs FPGA state on termination of an F1 instance and an **Q: How do the FPGAs connect to the x86 CPU?** -Each FPGA in F1 is connected to the instance CPU via a x16 PCIe Gen3 interface. Physical Functions (PF) within the FPGA are directly mapped into the F1 instance. Software on the instance can directly access the address in the PF to take advantage of the high performance PCIe interface. - +Each FPGA in F1 is connected to the instance CPU via a x16 PCIe Gen3 interface. +Physical Functions (PF) within the FPGA are directly mapped into the F1 instance. Software on the instance can directly access the address in the PF to take advantage of the high performance PCIe interface. **Q: Can the FPGAs on F1 directly access Amazon’s network?** @@ -346,8 +368,10 @@ No. The FPGAs do not have direct access to the SSDs on F1. The SSDs on F1 are hi ## Development Languages FAQs **Q: Which HDL languages are supported?** -For RTL level development: Verilog and VHDL are both supported in the FPGA Developer AMI and in generating a Design Checkpoint. The Xilinx Vivado tools and simulator support mixed mode simulation of Verilog and VHDL. The AWS Shell is written in Verilog. Support for mixed mode simulation may vary if developers use other simulators. Check your simulator documentation for Verilog/VHDL/System Verilog support. - +For RTL level development: Verilog and VHDL are both supported in the FPGA Developer AMI and in generating a Design Checkpoint. +The Xilinx Vivado tools and simulator support mixed mode simulation of Verilog and VHDL. +The AWS Shell is written in Verilog. Support for mixed mode simulation may vary if developers use other simulators. +Check your simulator documentation for Verilog/VHDL/System Verilog support. **Q: Is OpenCL and/or SDAccel Supported?** @@ -355,41 +379,44 @@ For RTL level development: Verilog and VHDL are both supported in the FPGA Devel Yes. Please review the [SDAccel README to get started](SDAccel/README.md) - **Q: Can I use High Level Synthesis(HLS) Tools to generate an AFI?** -Yes. Vivado HLS and SDAccel are directly supported through the FPGA Developer AMI. Any other HLS tool that generates compatible Verilog or VHDL for Vivado input can also be used for writing in HLS. - +Yes. Vivado HLS and SDAccel are directly supported through the FPGA Developer AMI. +Any other HLS tool that generates compatible Verilog or VHDL for Vivado input can also be used for writing in HLS. **Q: What RTL simulators are supported?** The FPGA Developer AMI has built-in support for the Xilinx XSIM simulator. All licensing and software for XSIM is included in the FPGA Developer AMI when launched. -Support for other simulators is included through the bring-your-own license in the FPGA Developer AMI. AWS tests the HDK with Synopsys VCS, Mentor Questa/ModelSim, and Cadence Incisive. Licenses for these simulators must be acquired by the developer and are not available with AWS FPGA Developer AMI. +AWS tests the HDK with Synopsys VCS, Mentor Questa/ModelSim, and Cadence Incisive. Licenses for these simulators must be acquired by the developer and are not available with AWS FPGA Developer AMI. ## FPGA Specific FAQs **Q: What FPGA is used in AWS EC2 F1 instance?** -The FPGA for F1 is the Xilinx Ultrascale+ VU9P device with the -2 speed grade. The HDK scripts have the compile scripts needed for the VU9P device. +The FPGA for F1 is the Xilinx Ultrascale+ VU9P device with the -2 speed grade. +The HDK scripts have the compile scripts needed for the VU9P device. **Q: What is FPGA Direct and how fast is it?** -FPGA Direct is FPGA to FPGA low latency high throughput peer communication through the PCIe links on each FPGA, where all FPGAs shared the same memory space. The PCIe BAR space in the Application PF (see [Shell Interface specification](./hdk/docs/AWS_Shell_Interface_Specification.md) for more details) allows the developer to map regions of the Custom Logic, such as external DRAM space, to other FPGAs. The implementation of communication protocol and data transfer engine across the PCIe interface using FPGA direct is left to the developer. +FPGA Direct is FPGA to FPGA low latency high throughput peer communication through the PCIe links on each FPGA, where all FPGAs shared the same memory space. +The PCIe BAR space in the Application PF (see [Shell Interface specification](./hdk/docs/AWS_Shell_Interface_Specification.md) for more details) allows the developer to map regions of the Custom Logic, such as external DRAM space, to other FPGAs. +The implementation of communication protocol and data transfer engine across the PCIe interface using FPGA direct is left to the developer. **Q: What is FPGA Link and how fast is it?** -FPGA Link is based on 4 x 100Gbps links on each FPGA card. The FPGA Link is organized as a ring, with 2 x 100Gbps links to each adjacent card. This enables each FPGA card to send/receive data from an adjacent card at 200Gbps speeds. This is a unsupported feature planned for future release. Details on the FPGA Link interface will be provided in the Shell Interface specification when available. +FPGA Link is based on 4 x 100Gbps links on each FPGA card. The FPGA Link is organized as a ring, with 2 x 100Gbps links to each adjacent card. This enables each FPGA card to send/receive data from an adjacent card at 200Gbps speeds. +This is an unsupported feature planned for future release. Details on the FPGA Link interface will be provided in the Shell Interface specification when available. **Q: What protocol is used for FPGA link?** The FPGA link is a generic raw streaming interface, no transport protocol is provided for it by AWS. It is expected that developers would take advantage of standard PCIe protocol, Ethernet protocol, or Xilinx's (reliable) Aurora protocol layer for this interface. -This is a unsupported feature planned for future release. Details on the Shell Interface to the FPGA Link IP blocks are provided in the [Shell Interface specification](./hdk/docs/AWS_Shell_Interface_Specification.md) when available. +This is an unsupported feature planned for future release. Details on the Shell Interface to the FPGA Link IP blocks are provided in the [Shell Interface specification](./hdk/docs/AWS_Shell_Interface_Specification.md) when available. **Q: What clock speed does the FPGA utilize?** @@ -458,13 +485,37 @@ Parent process (pid 8160) has died. This helper process will now exit *For On Premise runs:* -You would need a valid [on premise license](./hdk/docs/on_premise_licensing_help.md) provided by Xilinx. +You would need a valid [on premise license](docs/on_premise_licensing_help.md) provided by Xilinx. *For runs using the FPGA Developer AMI:* Please contact us through [AWS FPGA Developers forum](https://forums.aws.amazon.com/forum.jspa?forumID=243) - **Q: Why does Vivado in GUI mode show up blank ? or Why does Vivado in GUI mode show up as an empty window?** We have seen this issue when running RDP in 32 bit color mode where Vivado shows up as a blank window. Please modify RDP options to choose any color depth less than 32 bit and try re-connecting. + +**Q: Why did my AFI creation fail with `***ERROR***: DCP has DNA_PORT instantiation, ingestion failed, exiting`?** + +AWS does not support creating AFI's with the Device DNA instantiated within your design. Please create your design without instantiating the DNA_PORT primitive to be able to create your AFI. + +**Q: How do I know which HDK version I have on my instance/machine? ** + +Look for the ./hdk/hdk_version.txt file. + +**Q: How do I know what my Shell version is? ** + +The Shell version of an FPGA slot is available through the FPGA Image Management tools after an AFI has been loaded. +See the description of `fpga-describe-local-image` for more details on retrieving the shell version from a slot. +Prior to loading an AFI, the state of the FPGA (including shell version) is undefined and non-deterministic. + +**Q: How do I know what version of FPGA Image management tools are running on my instance? ** + +The FPGA Image management tools version is reported with any command executed from these tools. +See the description of `fpga-describe-local-image` for more details. + +**Q: How do I update my existing design with a new release?** + +1. Start by pulling changes from a new [aws-fpga github release](https://github.com/aws/aws-fpga) +1. If the [AWS Shell Interface Specification](./hdk/docs/AWS_Shell_Interface_Specification.md) has changed, update your CL design to conform to the new specification. +3. Follow the process for AFI generation diff --git a/Jenkinsfile b/Jenkinsfile index 05a558f3..4f980364 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -19,6 +19,8 @@ properties([parameters([ booleanParam(name: 'test_sdaccel_scripts', defaultValue: true, description: 'Test SDAccel setup scripts'), booleanParam(name: 'test_all_sdaccel_examples_fdf', defaultValue: false, description: 'Run Full Developer Flow testing of all SDAccel examples. This overrides test_helloworld_sdaccel_example'), booleanParam(name: 'test_helloworld_sdaccel_example_fdf', defaultValue: true, description: 'Run Full Developer Flow testing of the Hello World SDAccel example'), + booleanParam(name: 'test_all_vitis_examples_fdf', defaultValue: false, description: 'Run Full Developer Flow testing of all Vitis examples. This overrides test_helloworld_sdaccel_example'), + booleanParam(name: 'test_helloworld_vitis_example_fdf', defaultValue: true, description: 'Run Full Developer Flow testing of the Hello World Vitis example'), booleanParam(name: 'debug_dcp_gen', defaultValue: false, description: 'Only run FDF on cl_hello_world. Overrides test_*.'), booleanParam(name: 'debug_fdf_uram', defaultValue: false, description: 'Debug the FDF for cl_uram_example.'), booleanParam(name: 'fdf_ddr_comb', defaultValue: false, description: 'run FDF for cl_dram_dma ddr combinations.'), @@ -44,9 +46,11 @@ boolean test_hdk_fdf = params.get('test_hdk_fdf') boolean test_sdaccel_scripts = params.get('test_sdaccel_scripts') boolean test_all_sdaccel_examples_fdf = params.get('test_all_sdaccel_examples_fdf') boolean test_helloworld_sdaccel_example_fdf = params.get('test_helloworld_sdaccel_example_fdf') +boolean test_all_vitis_examples_fdf = params.get('test_all_vitis_examples_fdf') +boolean test_helloworld_vitis_example_fdf = params.get('test_helloworld_vitis_example_fdf') boolean disable_runtime_tests = params.get('disable_runtime_tests') -def runtime_sw_cl_names = ['cl_dram_dma', 'cl_hello_world'] +def runtime_sw_cl_names = ['cl_dram_dma', 'cl_hello_world', 'cl_sde'] def dcp_recipe_cl_names = ['cl_dram_dma', 'cl_hello_world'] def dcp_recipe_scenarios = [ // Default values are tested in FDF: A0-B0-C0-DEFAULT @@ -68,6 +72,7 @@ def fdf_test_names = [ 'cl_dram_dma[A1-B0-C0-DEFAULT]', 'cl_hello_world[A0-B0-C0-DEFAULT]', 'cl_hello_world_vhdl', + 'cl_sde[A0-B0-C0-DEFAULT]', 'cl_uram_example[2]', 'cl_uram_example[3]', 'cl_uram_example[4]' @@ -75,7 +80,7 @@ def fdf_test_names = [ boolean debug_dcp_gen = params.get('debug_dcp_gen') if (debug_dcp_gen) { - fdf_test_names = ['cl_hello_world[A0-B0-C0-DEFAULT]'] + fdf_test_names = ['cl_sde[A0-B0-C0-DEFAULT]'] test_markdown_links = false test_sims = false test_runtime_software = false @@ -121,61 +126,85 @@ task_label = [ ] // Put the latest version last -def xilinx_versions = [ '2017.4', '2018.2', '2018.3' ] +def xilinx_versions = [ '2019.1', '2019.2', '2020.1' , '2020.2' ] + +def vitis_versions = ['2019.2', '2020.1' , '2020.2' ] // We want the default to be the latest. def default_xilinx_version = xilinx_versions.last() def dsa_map = [ - '2017.4' : [ 'DYNAMIC_5_0' : 'dyn'], - '2018.2' : [ 'DYNAMIC_5_0' : 'dyn'], - '2018.3' : [ 'DYNAMIC_5_0' : 'dyn'] + '2019.1' : [ 'DYNAMIC_5_0' : 'dyn'], +] + +def xsa_map = [ + '2019.2' : [ 'DYNAMIC':'dyn'], + '2020.1' : [ 'DYNAMIC':'dyn'], + '2020.2' : [ 'DYNAMIC':'dyn'] ] def sdaccel_example_default_map = [ - '2017.4' : [ - 'Hello_World_1ddr': 'SDAccel/examples/xilinx/getting_started/host/helloworld_ocl', - 'Gmem_2Banks_2ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/gmem_2banks_ocl', - 'kernel_3ddr_bandwidth_4ddr': 'SDAccel/examples/aws/kernel_3ddr_bandwidth', - 'Kernel_Global_Bw_4ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/kernel_global_bandwidth', + '2019.1' : [ + 'Hello_World_1ddr': 'SDAccel/examples/xilinx/getting_started/hello_world/helloworld_ocl', + 'Gmem_2Banks_2ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/gmem_2banks_ocl_5.0_shell', + 'Kernel_Global_Bw_4ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/kernel_global_bandwidth_5.0_shell', 'RTL_Vadd_Debug': 'SDAccel/examples/xilinx/getting_started/rtl_kernel/rtl_vadd_hw_debug' + ] +] + +def vitis_example_default_map = [ + '2019.2' : [ + 'Hello_World_1ddr': 'Vitis/examples/xilinx/ocl_kernels/cl_helloworld', + 'Gmem_2Banks_2ddr': 'Vitis/examples/xilinx/ocl_kernels/cl_gmem_2banks', + 'Kernel_Global_Bw_4ddr': 'Vitis/examples/xilinx/cpp_kernels/kernel_global_bandwidth', + 'RTL_Vadd_Debug': 'Vitis/examples/xilinx/rtl_kernels/rtl_vadd_hw_debug' ], - '2018.2' : [ - 'Hello_World_1ddr': 'SDAccel/examples/xilinx/getting_started/host/helloworld_ocl', - 'Gmem_2Banks_2ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/gmem_2banks_ocl', - 'kernel_3ddr_bandwidth_4ddr': 'SDAccel/examples/aws/kernel_3ddr_bandwidth', - 'Kernel_Global_Bw_4ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/kernel_global_bandwidth', - 'RTL_Vadd_Debug': 'SDAccel/examples/xilinx/getting_started/rtl_kernel/rtl_vadd_hw_debug' + '2020.1' : [ + 'Hello_World_1ddr': 'Vitis/examples/xilinx/ocl_kernels/cl_helloworld', + 'Gmem_2Banks_2ddr': 'Vitis/examples/xilinx/ocl_kernels/cl_gmem_2banks', + 'Kernel_Global_Bw_4ddr': 'Vitis/examples/xilinx/cpp_kernels/kernel_global_bandwidth', + 'RTL_Vadd_Debug': 'Vitis/examples/xilinx/rtl_kernels/rtl_vadd_hw_debug', + 'gemm_blas': 'Vitis/examples/xilinx/library_examples/gemm', + 'gzip_app': 'Vitis/examples/xilinx/library_examples/gzip_app' ], - '2018.3' : [ - 'Hello_World_1ddr': 'SDAccel/examples/xilinx/getting_started/host/helloworld_ocl', - 'Gmem_2Banks_2ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/gmem_2banks_ocl', - 'Kernel_Global_Bw_4ddr': 'SDAccel/examples/xilinx/getting_started/kernel_to_gmem/kernel_global_bandwidth', - 'RTL_Vadd_Debug': 'SDAccel/examples/xilinx/getting_started/rtl_kernel/rtl_vadd_hw_debug' + '2020.2' : [ + 'Hello_World_1ddr': 'Vitis/examples/xilinx/ocl_kernels/cl_helloworld', + 'Gmem_2Banks_2ddr': 'Vitis/examples/xilinx/ocl_kernels/cl_gmem_2banks', + 'Kernel_Global_Bw_4ddr': 'Vitis/examples/xilinx/cpp_kernels/kernel_global_bandwidth', + 'RTL_Vadd_Debug': 'Vitis/examples/xilinx/rtl_kernels/rtl_vadd_hw_debug', + 'gemm_blas': 'Vitis/examples/xilinx/library_examples/gemm', + 'gzip_app': 'Vitis/examples/xilinx/library_examples/gzip_app' ] ] def simulator_tool_default_map = [ - '2017.4' : [ - 'vivado': 'xilinx/SDx/2017.4_04112018', - 'vcs': 'vcs-mx/L-2016.06-1', - 'questa': 'questa/10.6b', - 'ies': 'incisive/15.20.063' - ], - '2018.2' : [ - 'vivado': 'xilinx/SDx/2018.2_06142018', - 'vcs': 'vcs-mx/N-2017.12-SP1-1', + '2019.1' : [ + 'vivado': 'xilinx/SDx/2019.1.op2552052', + 'vcs': 'synopsys/vcs-mx/N-2017.12-SP2', 'questa': 'questa/10.6c_1', 'ies': 'incisive/15.20.063' ], - '2018.3' : [ - 'vivado': 'xilinx/SDx/2018.3_1207', - 'vcs': 'vcs-mx/N-2017.12-SP1-1', - 'questa': 'questa/10.6c_1', + '2019.2' : [ + 'vivado': 'xilinx/Vivado/2019.2', + 'vcs': 'synopsys/vcs-mx/O-2018.09-SP2-1', + 'questa': 'questa/2019.2', 'ies': 'incisive/15.20.063' + ], + '2020.1' : [ + 'vivado': 'xilinx/Vivado/2020.1', + 'vcs': 'synopsys/vcs-mx/P-2019.06-SP1-1', + 'questa': 'questa/2019.4', + 'ies': 'incisive/15.20.079' + ], + '2020.2' : [ + 'vivado': 'xilinx/Vivado/2020.2', + 'vcs': 'synopsys/vcs-mx/Q-2020.03', + 'questa': 'questa/2020.2', + 'ies': 'incisive/15.20.083' ] ] +// ies 073 is not available for download // Get serializable entry set @NonCPS def entrySet(m) {m.collect {k, v -> [key: k, value: v]}} @@ -196,7 +225,7 @@ def get_task_label(Map args=[ : ]) { } if (params.internal_simulations) { echo "internal simulation agent requested" - task_label = 'f1' + task_label = 'f1_3rd_party_sims' } echo "Label Requested: $task_label" @@ -269,7 +298,7 @@ def test_run_py_bindings() { try { sh """ set -e - source $WORKSPACE/shared/tests/bin/setup_test_sdk_env_al2.sh "py_bindings" + source $WORKSPACE/shared/tests/bin/setup_test_sdk_env.sh "py_bindings" python2.7 -m pytest -v $WORKSPACE/${test} --junit-xml $WORKSPACE/${report_file} """ } catch (exc) { @@ -367,7 +396,7 @@ def test_fpga_all_slots() { } catch (exception) { echo "Test FPGA Tools All Slots failed" - input message: "1 slot FPGA Tools test failed. Click Proceed or Abort when you are done debugging on the instance." + input message: "All slot FPGA Tools test failed. Click Proceed or Abort when you are done debugging on the instance." throw exception } finally { @@ -395,7 +424,6 @@ def test_run_non_root_access() { source $WORKSPACE/shared/tests/bin/setup_test_sdk_env.sh newgrp fpgauser export SDK_DIR="${WORKSPACE}/sdk" - source $WORKSPACE/shared/tests/bin/setup_test_env.sh python2.7 -m pytest -v $WORKSPACE/${test} --junit-xml $WORKSPACE/${report_file} """ } catch (exc) { @@ -492,7 +520,7 @@ if (test_fpga_tools) { if (test_sims) { all_tests['Run Sims'] = { stage('Run Sims') { - def cl_names = ['cl_uram_example', 'cl_dram_dma', 'cl_hello_world', 'cl_sde'] + def cl_names = ['cl_vhdl_hello_world', 'cl_uram_example', 'cl_dram_dma', 'cl_hello_world', 'cl_sde'] def simulators = ['vivado'] def sim_nodes = [:] if(params.internal_simulations) { @@ -505,7 +533,16 @@ if (test_sims) { String xilinx_version = y String cl_name = x String simulator = z - String node_name = "Sim ${cl_name} ${xilinx_version}" + if((cl_name == 'cl_vhdl_hello_world') && (simulator == 'ies')) { + println ("Skipping Simulator: ${simulator} CL: ${cl_name}") + continue; + } + String cl_dir_name = cl_name + if(cl_name == 'cl_vhdl_hello_world') { + cl_dir_name = "cl_hello_world_vhdl" + } + + String node_name = "Sim ${cl_name} ${xilinx_version} ${simulator}" String key = "test_${cl_name}__" String report_file = "test_sims_${cl_name}_${xilinx_version}.xml" def tool_module_map = simulator_tool_default_map.get(xilinx_version) @@ -525,19 +562,21 @@ if (test_sims) { sh """ set -e module purge - module load python/2.7.9 + module load python/3.7.2 + module load python/2.7.14 + module load batch module load ${vivado_module} module load ${vcs_module} module load ${questa_module} module load ${ies_module} source $WORKSPACE/hdk_setup.sh - python2.7 -m pytest -v $WORKSPACE/hdk/tests/simulation_tests/test_sims.py -k \"${key}\" --junit-xml $WORKSPACE/${report_file} --simulator ${simulator} + python2.7 -m pytest -v $WORKSPACE/hdk/tests/simulation_tests/test_sims.py -k \"${key}\" --junit-xml $WORKSPACE/${report_file} --simulator ${simulator} --batch 'TRUE' """ } else { sh """ set -e source $WORKSPACE/shared/tests/bin/setup_test_hdk_env.sh - python2.7 -m pytest -v $WORKSPACE/hdk/tests/simulation_tests/test_sims.py -k \"${key}\" --junit-xml $WORKSPACE/${report_file} --simulator ${simulator} + python2.7 -m pytest -v $WORKSPACE/hdk/tests/simulation_tests/test_sims.py -k \"${key}\" --junit-xml $WORKSPACE/${report_file} --simulator ${simulator} --batch 'FALSE' """ } } catch (exc) { @@ -545,7 +584,7 @@ if (test_sims) { throw exc } finally { run_junit(report_file) - archiveArtifacts artifacts: "hdk/cl/examples/${cl_name}/**/*.sim.log", fingerprint: true + archiveArtifacts artifacts: "hdk/cl/examples/${cl_dir_name}/**/*.sim.log", fingerprint: true } } } @@ -587,15 +626,15 @@ if (test_xdma) { //============================================================================= // Python Binding Test //============================================================================= -if (test_py_bindings) { - all_tests['Test Python Bindings'] = { - stage('Test Python Bindings') { - node('f1.2xl_runtime_test_al2') { - test_run_py_bindings() - } - } - } -} +// if (test_py_bindings) { +// all_tests['Test Python Bindings'] = { +// stage('Test Python Bindings') { +// node('f1.2xl_runtime_test_al2') { +// test_run_py_bindings() +// } +// } +// } +// } //============================================================================= // Precompiled Runtime Tests @@ -861,113 +900,369 @@ if (test_hdk_fdf) { // SDAccel Tests //============================================================================= -if (test_sdaccel_scripts) { - all_tests['Test SDAccel Scripts'] = { - stage('Test SDAccel Scripts') { - def nodes = [:] - for (def xilinx_version in xilinx_versions) { +// if (test_sdaccel_scripts) { +// all_tests['Test SDAccel Scripts'] = { +// stage('Test SDAccel Scripts') { +// def nodes = [:]git +// for (def xilinx_version in xilinx_versions) { +// +// String node_label = get_task_label(task: 'source_scripts', xilinx_version: xilinx_version) +// String node_name = "Test SDAccel Scripts ${xilinx_version}" +// nodes[node_name] = { +// node(node_label) { +// String report_file = "test_sdaccel_scripts_${xilinx_version}.xml" +// checkout scm +// try { +// sh """ +// set -e +// source $WORKSPACE/shared/tests/bin/setup_test_env.sh +// python2.7 -m pytest -v $WORKSPACE/SDAccel/tests/test_sdaccel_scripts.py --junit-xml $WORKSPACE/${report_file} +// """ +// } finally { +// run_junit(report_file) +// } +// } +// } +// } +// parallel nodes +// } +// } +// } + +// if (test_helloworld_sdaccel_example_fdf || test_all_sdaccel_examples_fdf) { +// all_tests['Run SDAccel Tests'] = { +// String sdaccel_examples_list = 'sdaccel_examples_list.json' +// +// def sdaccel_all_version_stages = [:] +// +// for (def version in xilinx_versions) { +// +// String xilinx_version = version +// String sdaccel_base_stage_name = "SDx FDF $xilinx_version" +// String sdaccel_find_stage_name = "SDx Find tests $xilinx_version" +// +// sdaccel_all_version_stages[sdaccel_base_stage_name] = { +// stage (sdaccel_find_stage_name) { +// +// node(get_task_label(task: 'find_tests', xilinx_version: xilinx_version)) { +// +// checkout scm +// String report_file = "test_find_sdaccel_examples_${xilinx_version}.xml" +// +// try { +// sh """ +// rm -rf ${sdaccel_examples_list} +// """ +// } catch(error) { +// // Ignore any errors +// echo "Failed to clean ${sdaccel_examples_list}" +// } +// +// try { +// sh """ +// set -e +// source $WORKSPACE/shared/tests/bin/setup_test_build_sdaccel_env.sh +// python2.7 -m pytest -v $WORKSPACE/SDAccel/tests/test_find_sdaccel_examples.py --junit-xml $WORKSPACE/${report_file} --xilinxVersion ${xilinx_version} +// """ +// } catch (exc) { +// echo "Could not find tests. Please check the repository." +// throw exc +// } finally { +// run_junit(report_file) +// archiveArtifacts artifacts: "${sdaccel_examples_list}.*", fingerprint: true +// +// } +// +// // Only run the hello world test by default +// //def example_map = [ 'Hello_World': 'SDAccel/examples/xilinx/getting_started/host/helloworld_ocl' ] +// def example_map = sdaccel_example_default_map.get(xilinx_version) +// +// // Run all examples when parameter set +// if (test_all_sdaccel_examples_fdf) { +// example_map = readJSON file: sdaccel_examples_list +// } +// +// def sdaccel_build_stages = [:] +// +// for ( def e in entrySet(example_map) ) { +// +// String test_key = e.key +// def dsa_map_for_version = dsa_map.get(xilinx_version) +// +// // dsa = [ 4DDR: 4ddr ] +// for ( def dsa in entrySet(dsa_map_for_version) ) { +// +// String build_name = "SDx ${e.key}_${dsa.value}_${xilinx_version}" +// String example_path = e.value +// +// String dsa_name = dsa.key +// String dsa_rte_name = dsa.value +// +// String sw_emu_stage_name = "SDx SW_EMU ${build_name}" +// String hw_emu_stage_name = "SDx HW_EMU ${build_name}" +// String hw_stage_name = "SDx HW ${build_name}" +// String create_afi_stage_name = "SDx AFI ${build_name}" +// String run_example_stage_name = "SDx RUN ${build_name}" +// +// String sw_emu_report_file = "sdaccel_sw_emu_${e.key}_${dsa.value}_${xilinx_version}.xml" +// String hw_emu_report_file = "sdaccel_hw_emu_${e.key}_${dsa.value}_${xilinx_version}.xml" +// String hw_report_file = "sdaccel_hw_${e.key}_${dsa.value}_${xilinx_version}.xml" +// String create_afi_report_file = "sdaccel_create_afi_${e.key}_${dsa.value}_${xilinx_version}.xml" +// String run_example_report_file = "sdaccel_run_${e.key}_${dsa.value}_${xilinx_version}.xml" +// +// String description_file = "${example_path}/description.json" +// def description_json = ["targets":["hw","hw_emu","sw_emu"]] +// +// try { +// description_json = readJSON file: description_file +// } +// catch (exc) { +// echo "Could not read the file: ${description_file}" +// throw exc +// } +// +// boolean test_sw_emu_supported = true +// boolean test_hw_emu_supported = true +// +// if(description_json["targets"]) { +// if(description_json["targets"].contains("sw_emu")) { +// test_sw_emu_supported = true +// echo "Description file ${description_file} has target sw_emu" +// } else { +// test_sw_emu_supported = false +// echo "Description file ${description_file} does not have target sw_emu" +// } +// if(description_json["targets"].contains("hw_emu")) { +// test_hw_emu_supported = true +// echo "Description file ${description_file} has target sw_emu" +// } else { +// test_hw_emu_supported = false +// echo "Description file ${description_file} does not have target sw_emu" +// } +// } else { +// echo "Description json did not have a 'target' key" +// } +// +// sdaccel_build_stages[build_name] = { +// if(test_sw_emu_supported) { +// stage(sw_emu_stage_name) { +// node(get_task_label(task: 'sdaccel_builds', xilinx_version: xilinx_version)) { +// checkout scm +// try { +// sh """ +// set -e +// source $WORKSPACE/shared/tests/bin/setup_test_build_sdaccel_env.sh +// export AWS_PLATFORM=\$AWS_PLATFORM_${dsa_name} +// python2.7 -m pytest -v $WORKSPACE/SDAccel/tests/test_build_sdaccel_example.py::TestBuildSDAccelExample::test_sw_emu --examplePath ${example_path} --junit-xml $WORKSPACE/${sw_emu_report_file} --timeout=14400 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} +// """ +// } catch (error) { +// echo "${sw_emu_stage_name} SW EMU Build generation failed" +// archiveArtifacts artifacts: "${example_path}/**", fingerprint: true +// throw error +// } finally { +// run_junit(sw_emu_report_file) +// git_cleanup() +// } +// } +// } +// } +// +// if(test_hw_emu_supported) { +// stage(hw_emu_stage_name) { +// node(get_task_label(task: 'sdaccel_builds', xilinx_version: xilinx_version)) { +// checkout scm +// try { +// sh """ +// set -e +// source $WORKSPACE/shared/tests/bin/setup_test_build_sdaccel_env.sh +// export AWS_PLATFORM=\$AWS_PLATFORM_${dsa_name} +// python2.7 -m pytest -v $WORKSPACE/SDAccel/tests/test_build_sdaccel_example.py::TestBuildSDAccelExample::test_hw_emu --examplePath ${example_path} --junit-xml $WORKSPACE/${hw_emu_report_file} --timeout=21600 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} +// """ +// } catch (error) { +// echo "${hw_emu_stage_name} HW EMU Build generation failed" +// archiveArtifacts artifacts: "${example_path}/**", fingerprint: true +// throw error +// } finally { +// run_junit(hw_emu_report_file) +// git_cleanup() +// } +// } +// } +// } +// +// stage(hw_stage_name) { +// node(get_task_label(task: 'sdaccel_builds', xilinx_version: xilinx_version)) { +// checkout scm +// try { +// sh """ +// set -e +// source $WORKSPACE/shared/tests/bin/setup_test_build_sdaccel_env.sh +// export AWS_PLATFORM=\$AWS_PLATFORM_${dsa_name} +// python2.7 -m pytest -s -v $WORKSPACE/SDAccel/tests/test_build_sdaccel_example.py::TestBuildSDAccelExample::test_hw_build --examplePath ${example_path} --junit-xml $WORKSPACE/${hw_report_file} --timeout=36000 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} +// """ +// } catch (error) { +// echo "${hw_stage_name} HW Build generation failed" +// archiveArtifacts artifacts: "${example_path}/**", fingerprint: true +// throw error +// } finally { +// run_junit(hw_report_file) +// git_cleanup() +// } +// } +// } +// +// stage(create_afi_stage_name) { +// node(get_task_label(task: 'create_afi', xilinx_version: xilinx_version)) { +// +// checkout scm +// try { +// sh """ +// set -e +// source $WORKSPACE/shared/tests/bin/setup_test_build_sdaccel_env.sh +// export AWS_PLATFORM=\$AWS_PLATFORM_${dsa_name} +// python2.7 -m pytest -s -v $WORKSPACE/SDAccel/tests/test_create_sdaccel_afi.py::TestCreateSDAccelAfi::test_create_sdaccel_afi --examplePath ${example_path} --junit-xml $WORKSPACE/${create_afi_report_file} --timeout=18000 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} +// """ +// } catch (error) { +// echo "${create_afi_stage_name} Create AFI failed" +// archiveArtifacts artifacts: "${example_path}/**", fingerprint: true +// throw error +// } finally { +// +// String to_aws_dir = "${example_path}/to_aws" +// +// if (fileExists(to_aws_dir)) { +// sh "rm -rf ${to_aws_dir}" +// } +// run_junit(create_afi_report_file) +// git_cleanup() +// } +// } +// } +// +// stage(run_example_stage_name) { +// +// if(disable_runtime_tests) { +// echo "Runtime tests disabled. Not running ${run_example_stage_name}" +// } else { +// node(get_task_label(task: 'runtime', xilinx_version: xilinx_version)) { +// +// checkout scm +// try { +// sh """ +// set -e +// source $WORKSPACE/shared/tests/bin/setup_test_runtime_sdaccel_env.sh +// export AWS_PLATFORM=\$AWS_PLATFORM_${dsa_name} +// python2.7 -m pytest -v $WORKSPACE/SDAccel/tests/test_run_sdaccel_example.py::TestRunSDAccelExample::test_run_sdaccel_example --examplePath ${example_path} --junit-xml $WORKSPACE/${run_example_report_file} --timeout=14400 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} +// """ +// } catch (error) { +// echo "${run_example_stage_name} Runtime example failed" +// archiveArtifacts artifacts: "${example_path}/**", fingerprint: true +// input message: "SDAccel Runtime test failed. Click Proceed or Abort when you are done debugging on the instance." +// throw error +// } finally { +// run_junit(run_example_report_file) +// git_cleanup() +// } +// } +// } //else +// +// } +// +// } // sdaccel_build_stages[ e.key ] +// +// } //for ( def dsa in entrySet(dsa_map_for_version) ) { +// } // for ( e in list_map ) +// +// parallel sdaccel_build_stages +// } +// } +// } +// } //for (def xilinx_version in xilinx_versions) { +// parallel sdaccel_all_version_stages +// } +// } - String node_label = get_task_label(task: 'source_scripts', xilinx_version: xilinx_version) - String node_name = "Test SDAccel Scripts ${xilinx_version}" - nodes[node_name] = { - node(node_label) { - String report_file = "test_sdaccel_scripts_${xilinx_version}.xml" - checkout scm - try { - sh """ - set -e - source $WORKSPACE/shared/tests/bin/setup_test_env.sh - python2.7 -m pytest -v $WORKSPACE/SDAccel/tests/test_sdaccel_scripts.py --junit-xml $WORKSPACE/${report_file} - """ - } finally { - run_junit(report_file) - } - } - } - } - parallel nodes - } - } -} - -if (test_helloworld_sdaccel_example_fdf || test_all_sdaccel_examples_fdf) { - all_tests['Run SDAccel Tests'] = { - String sdaccel_examples_list = 'sdaccel_examples_list.json' +//============================================================================= +// Vitis Tests +//============================================================================= +if (test_helloworld_vitis_example_fdf || test_all_vitis_examples_fdf) { + all_tests['Run Vitis Tests'] = { + String vitis_examples_list = 'vitis_examples_list.json' - def sdaccel_all_version_stages = [:] + def vitis_all_version_stages = [:] - for (def version in xilinx_versions) { + for (def version in vitis_versions) { String xilinx_version = version - String sdaccel_base_stage_name = "SDx FDF $xilinx_version" - String sdaccel_find_stage_name = "SDx Find tests $xilinx_version" + String vitis_base_stage_name = "Vitis FDF $xilinx_version" + String vitis_find_stage_name = "Vitis Find tests $xilinx_version" - sdaccel_all_version_stages[sdaccel_base_stage_name] = { - stage (sdaccel_find_stage_name) { + vitis_all_version_stages[vitis_base_stage_name] = { + stage (vitis_find_stage_name) { node(get_task_label(task: 'find_tests', xilinx_version: xilinx_version)) { checkout scm - String report_file = "test_find_sdaccel_examples_${xilinx_version}.xml" + String report_file = "test_find_vitis_examples_${xilinx_version}.xml" try { sh """ - rm -rf ${sdaccel_examples_list} + rm -rf ${vitis_examples_list} """ } catch(error) { // Ignore any errors - echo "Failed to clean ${sdaccel_examples_list}" + echo "Failed to clean ${vitis_examples_list}" } try { sh """ set -e - source $WORKSPACE/shared/tests/bin/setup_test_build_sdaccel_env.sh - python2.7 -m pytest -v $WORKSPACE/SDAccel/tests/test_find_sdaccel_examples.py --junit-xml $WORKSPACE/${report_file} + source $WORKSPACE/shared/tests/bin/setup_test_build_vitis_env.sh + python2.7 -m pytest -v $WORKSPACE/Vitis/tests/test_find_vitis_examples.py --junit-xml $WORKSPACE/${report_file} --xilinxVersion ${xilinx_version} """ } catch (exc) { echo "Could not find tests. Please check the repository." throw exc } finally { run_junit(report_file) + archiveArtifacts artifacts: "${vitis_examples_list}.*", fingerprint: true + } - // Only run the hello world test by default - //def example_map = [ 'Hello_World': 'SDAccel/examples/xilinx/getting_started/host/helloworld_ocl' ] - def example_map = sdaccel_example_default_map.get(xilinx_version) + def example_map = vitis_example_default_map.get(xilinx_version) // Run all examples when parameter set - if (test_all_sdaccel_examples_fdf) { - example_map = readJSON file: sdaccel_examples_list + if (test_all_vitis_examples_fdf) { + example_map = readJSON file: vitis_examples_list } - def sdaccel_build_stages = [:] + def vitis_build_stages = [:] for ( def e in entrySet(example_map) ) { String test_key = e.key - def dsa_map_for_version = dsa_map.get(xilinx_version) + def xsa_map_for_version = xsa_map.get(xilinx_version) // dsa = [ 4DDR: 4ddr ] - for ( def dsa in entrySet(dsa_map_for_version) ) { + for ( def dsa in entrySet(xsa_map_for_version) ) { - String build_name = "SDx ${e.key}_${dsa.value}_${xilinx_version}" + String build_name = "Vitis ${e.key}_${dsa.value}_${xilinx_version}" String example_path = e.value String dsa_name = dsa.key String dsa_rte_name = dsa.value - String sw_emu_stage_name = "SDx SW_EMU ${build_name}" - String hw_emu_stage_name = "SDx HW_EMU ${build_name}" - String hw_stage_name = "SDx HW ${build_name}" - String create_afi_stage_name = "SDx AFI ${build_name}" - String run_example_stage_name = "SDx RUN ${build_name}" + String sw_emu_stage_name = "Vitis SW_EMU ${build_name}" + String hw_emu_stage_name = "Vitis HW_EMU ${build_name}" + String hw_stage_name = "Vitis HW ${build_name}" + String create_afi_stage_name = "Vitis AFI ${build_name}" + String run_example_stage_name = "Vitis RUN ${build_name}" - String sw_emu_report_file = "sdaccel_sw_emu_${e.key}_${dsa.value}_${xilinx_version}.xml" - String hw_emu_report_file = "sdaccel_hw_emu_${e.key}_${dsa.value}_${xilinx_version}.xml" - String hw_report_file = "sdaccel_hw_${e.key}_${dsa.value}_${xilinx_version}.xml" - String create_afi_report_file = "sdaccel_create_afi_${e.key}_${dsa.value}_${xilinx_version}.xml" - String run_example_report_file = "sdaccel_run_${e.key}_${dsa.value}_${xilinx_version}.xml" + String sw_emu_report_file = "vitis_sw_emu_${e.key}_${dsa.value}_${xilinx_version}.xml" + String hw_emu_report_file = "vitis_hw_emu_${e.key}_${dsa.value}_${xilinx_version}.xml" + String hw_report_file = "vitis_hw_${e.key}_${dsa.value}_${xilinx_version}.xml" + String create_afi_report_file = "vitis_create_afi_${e.key}_${dsa.value}_${xilinx_version}.xml" + String run_example_report_file = "vitis_run_${e.key}_${dsa.value}_${xilinx_version}.xml" String description_file = "${example_path}/description.json" def description_json = ["targets":["hw","hw_emu","sw_emu"]] @@ -980,7 +1275,8 @@ if (test_helloworld_sdaccel_example_fdf || test_all_sdaccel_examples_fdf) { throw exc } - boolean test_sw_emu_supported = true + boolean test_sw_emu_supported = false + boolean test_hw_emu_supported = false if(description_json["targets"]) { if(description_json["targets"].contains("sw_emu")) { @@ -990,11 +1286,21 @@ if (test_helloworld_sdaccel_example_fdf || test_all_sdaccel_examples_fdf) { test_sw_emu_supported = false echo "Description file ${description_file} does not have target sw_emu" } + if(description_json["targets"].contains("hw_emu")) { + test_hw_emu_supported = true + echo "Description file ${description_file} has target sw_emu" + } else { + test_hw_emu_supported = false + echo "Description file ${description_file} does not have target sw_emu" + } } else { echo "Description json did not have a 'target' key" + test_sw_emu_supported = true + test_hw_emu_supported = true } - sdaccel_build_stages[build_name] = { + vitis_build_stages[build_name] = { + if(test_sw_emu_supported) { stage(sw_emu_stage_name) { node(get_task_label(task: 'sdaccel_builds', xilinx_version: xilinx_version)) { @@ -1002,9 +1308,8 @@ if (test_helloworld_sdaccel_example_fdf || test_all_sdaccel_examples_fdf) { try { sh """ set -e - source $WORKSPACE/shared/tests/bin/setup_test_build_sdaccel_env.sh - export AWS_PLATFORM=\$AWS_PLATFORM_${dsa_name} - python2.7 -m pytest -v $WORKSPACE/SDAccel/tests/test_build_sdaccel_example.py::TestBuildSDAccelExample::test_sw_emu --examplePath ${example_path} --junit-xml $WORKSPACE/${sw_emu_report_file} --timeout=14400 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} + source $WORKSPACE/shared/tests/bin/setup_test_build_vitis_env.sh + python2.7 -m pytest -v $WORKSPACE/Vitis/tests/test_build_vitis_example.py::TestBuildVitisExample::test_sw_emu --examplePath ${example_path} --junit-xml $WORKSPACE/${sw_emu_report_file} --timeout=14400 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} """ } catch (error) { echo "${sw_emu_stage_name} SW EMU Build generation failed" @@ -1018,23 +1323,24 @@ if (test_helloworld_sdaccel_example_fdf || test_all_sdaccel_examples_fdf) { } } - stage(hw_emu_stage_name) { - node(get_task_label(task: 'sdaccel_builds', xilinx_version: xilinx_version)) { - checkout scm - try { - sh """ - set -e - source $WORKSPACE/shared/tests/bin/setup_test_build_sdaccel_env.sh - export AWS_PLATFORM=\$AWS_PLATFORM_${dsa_name} - python2.7 -m pytest -v $WORKSPACE/SDAccel/tests/test_build_sdaccel_example.py::TestBuildSDAccelExample::test_hw_emu --examplePath ${example_path} --junit-xml $WORKSPACE/${hw_emu_report_file} --timeout=21600 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} - """ - } catch (error) { - echo "${hw_emu_stage_name} HW EMU Build generation failed" - archiveArtifacts artifacts: "${example_path}/**", fingerprint: true - throw error - } finally { - run_junit(hw_emu_report_file) - git_cleanup() + if(test_hw_emu_supported) { + stage(hw_emu_stage_name) { + node(get_task_label(task: 'sdaccel_builds', xilinx_version: xilinx_version)) { + checkout scm + try { + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_build_vitis_env.sh + python2.7 -m pytest -v $WORKSPACE/Vitis/tests/test_build_vitis_example.py::TestBuildVitisExample::test_hw_emu --examplePath ${example_path} --junit-xml $WORKSPACE/${hw_emu_report_file} --timeout=21600 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} + """ + } catch (error) { + echo "${hw_emu_stage_name} HW EMU Build generation failed" + archiveArtifacts artifacts: "${example_path}/**", fingerprint: true + throw error + } finally { + run_junit(hw_emu_report_file) + git_cleanup() + } } } } @@ -1045,9 +1351,8 @@ if (test_helloworld_sdaccel_example_fdf || test_all_sdaccel_examples_fdf) { try { sh """ set -e - source $WORKSPACE/shared/tests/bin/setup_test_build_sdaccel_env.sh - export AWS_PLATFORM=\$AWS_PLATFORM_${dsa_name} - python2.7 -m pytest -s -v $WORKSPACE/SDAccel/tests/test_build_sdaccel_example.py::TestBuildSDAccelExample::test_hw_build --examplePath ${example_path} --junit-xml $WORKSPACE/${hw_report_file} --timeout=36000 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} + source $WORKSPACE/shared/tests/bin/setup_test_build_vitis_env.sh + python2.7 -m pytest -s -v $WORKSPACE/Vitis/tests/test_build_vitis_example.py::TestBuildVitisExample::test_hw_build --examplePath ${example_path} --junit-xml $WORKSPACE/${hw_report_file} --timeout=36000 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} """ } catch (error) { echo "${hw_stage_name} HW Build generation failed" @@ -1067,9 +1372,8 @@ if (test_helloworld_sdaccel_example_fdf || test_all_sdaccel_examples_fdf) { try { sh """ set -e - source $WORKSPACE/shared/tests/bin/setup_test_build_sdaccel_env.sh - export AWS_PLATFORM=\$AWS_PLATFORM_${dsa_name} - python2.7 -m pytest -s -v $WORKSPACE/SDAccel/tests/test_create_sdaccel_afi.py::TestCreateSDAccelAfi::test_create_sdaccel_afi --examplePath ${example_path} --junit-xml $WORKSPACE/${create_afi_report_file} --timeout=18000 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} + source $WORKSPACE/shared/tests/bin/setup_test_build_vitis_env.sh + python2.7 -m pytest -s -v $WORKSPACE/Vitis/tests/test_create_vitis_afi.py::TestCreateVitisAfi::test_create_vitis_afi --examplePath ${example_path} --junit-xml $WORKSPACE/${create_afi_report_file} --timeout=18000 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} """ } catch (error) { echo "${create_afi_stage_name} Create AFI failed" @@ -1099,14 +1403,13 @@ if (test_helloworld_sdaccel_example_fdf || test_all_sdaccel_examples_fdf) { try { sh """ set -e - source $WORKSPACE/shared/tests/bin/setup_test_runtime_sdaccel_env.sh - export AWS_PLATFORM=\$AWS_PLATFORM_${dsa_name} - python2.7 -m pytest -v $WORKSPACE/SDAccel/tests/test_run_sdaccel_example.py::TestRunSDAccelExample::test_run_sdaccel_example --examplePath ${example_path} --junit-xml $WORKSPACE/${run_example_report_file} --timeout=14400 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} + source $WORKSPACE/shared/tests/bin/setup_test_runtime_vitis_env.sh + python2.7 -m pytest -v $WORKSPACE/Vitis/tests/test_run_vitis_example.py::TestRunVitisExample::test_run_vitis_example --examplePath ${example_path} --junit-xml $WORKSPACE/${run_example_report_file} --timeout=14400 --rteName ${dsa_rte_name} --xilinxVersion ${xilinx_version} """ } catch (error) { echo "${run_example_stage_name} Runtime example failed" archiveArtifacts artifacts: "${example_path}/**", fingerprint: true - input message: "SDAccel Runtime test failed. Click Proceed or Abort when you are done debugging on the instance." + input message: "Vitis Runtime test failed. Click Proceed or Abort when you are done debugging on the instance." throw error } finally { run_junit(run_example_report_file) @@ -1117,20 +1420,19 @@ if (test_helloworld_sdaccel_example_fdf || test_all_sdaccel_examples_fdf) { } - } // sdaccel_build_stages[ e.key ] + } // vitis_build_stages[ e.key ] - } //for ( def dsa in entrySet(dsa_map_for_version) ) { + } //for ( def dsa in entrySet(xsa_map_for_version) ) { } // for ( e in list_map ) - parallel sdaccel_build_stages + parallel vitis_build_stages } } } } //for (def xilinx_version in xilinx_versions) { - parallel sdaccel_all_version_stages + parallel vitis_all_version_stages } } - //============================================================================= // SDK Tests //============================================================================= diff --git a/Jenkinsfile_int_sims b/Jenkinsfile_int_sims new file mode 100644 index 00000000..51a2d46f --- /dev/null +++ b/Jenkinsfile_int_sims @@ -0,0 +1,226 @@ +#!/usr/bin/env groovy + +//============================================================================= +// Pipeline parameters +//============================================================================= +properties([parameters([ + string(name: 'branch', defaultValue: ''), + booleanParam(name: 'test_sims', defaultValue: true, description: 'Run all Simulations'), + booleanParam(name: 'internal_simulations', defaultValue: true, description: 'This option asks for default agent from Jenkins') +])]) + +//============================================================================= +// Configuration +//============================================================================= + +boolean test_sims = params.get('test_sims') + +//============================================================================= +// Globals +//============================================================================= + +// Map that contains stages of tests +def all_tests = [:] + +// Task to Label map +task_label = [ + 'create_afi': 't2.l_50', + 'simulation': 'z1d.l', + 'dcp_gen': 'z1d.2xl', + 'runtime': 'f1.2xl', + 'runtime_all_slots': 'f1.16xl', + 'source_scripts': 'c4.xl', + 'md_links': 'c4.xl', + 'find_tests': 't2.l_50', + 'sdaccel_builds': 'z1d.2xl' +] + +// Put the latest version last +def xilinx_versions = [ '2020.2' ] + +// We want the default to be the latest. +def default_xilinx_version = xilinx_versions.last() + +def simulator_tool_default_map = [ + '2019.2' : [ + 'vivado': 'xilinx/Vivado/2019.2', + 'vcs': 'synopsys/vcs-mx/O-2018.09-SP2-1', + 'questa': 'questa/2019.2', + 'ies': 'incisive/15.20.063' + ], + '2020.1' : [ + 'vivado': 'xilinx/Vivado/2020.1', + 'vcs': 'synopsys/vcs-mx/P-2019.06-SP1-1', + 'questa': 'questa/2019.4', + 'ies': 'incisive/15.20.079' + ], + '2020.2' : [ + 'vivado': 'xilinx/Vivado/2020.2', + 'vcs': 'synopsys/vcs/Q-2020.03', + 'questa': 'questa/2019.4_3', + 'ies': 'incisive/15.20.083' + ] +] + + +// Get serializable entry set +@NonCPS def entrySet(m) {m.collect {k, v -> [key: k, value: v]}} + +@NonCPS +def is_public_repo() { + echo "Change URL: ${env.CHANGE_URL}" + return (env.CHANGE_URL =~ /^(\S+)?aws-fpga\/pull\/(\d+)$/) +} + +def get_task_label(Map args=[ : ]) { + String task_label = args.xilinx_version + '_' + task_label[args.task] + + if (params.internal_simulations) { + echo "internal simulation agent requested" + task_label = 'f1_3rd_party_sims' + } + + echo "Label Requested: $task_label" + return task_label +} + +def abort_previous_running_builds() { + def hi = Hudson.instance + def pname = env.JOB_NAME.split('/')[0] + + hi.getItem(pname).getItem(env.JOB_BASE_NAME).getBuilds().each{ build -> + def executor = build.getExecutor() + + if (build.number != currentBuild.number && build.number < currentBuild.number && executor != null) { + executor.interrupt( + Result.ABORTED, + new CauseOfInterruption.UserInterruption("Aborted by #${currentBuild.number}")) + println("Aborted previous running build #${build.number}") + } else { + println("Build is not running or is current build, not aborting - #${build.number}") + } + } +} + +// Wait for input if we are running on a public repo to avoid malicious PRS +if (is_public_repo()) { + input "Running on a public repository, do you want to proceed with running the tests?" +} else { + echo "Running on a private repository" +} + + +//Abort previous builds on PR when we push new commits +// env.CHANGE_ID is only available on PR's and not on branch builds +if (env.CHANGE_ID) { + abort_previous_running_builds() +} + + +def run_junit(String report_file) { + + if (fileExists(report_file)) { + junit healthScaleFactor: 10.0, testResults: report_file + } else { + echo "Pytest wasn't run for stage. Report file not generated: ${report_file}" + } +} + +def git_cleanup() { + sh """ + set -e + sudo git reset --hard + sudo git clean -fdx + """ +} + +//============================================================================= +// Simulations +//============================================================================= +if (test_sims) { + all_tests['Run Sims'] = { + stage('Run Sims') { + def cl_names = ['cl_vhdl_hello_world', 'cl_uram_example', 'cl_dram_dma', 'cl_hello_world', 'cl_sde'] + def simulators = ['vivado'] + def sim_nodes = [:] + if(params.internal_simulations) { + simulators = ['vcs', 'ies', 'questa', 'vivado'] + } + + for (x in cl_names) { + for (y in xilinx_versions) { + for (z in simulators) { + String xilinx_version = y + String cl_name = x + String simulator = z + if((cl_name == 'cl_vhdl_hello_world') && (simulator == 'ies')) { + println ("Skipping Simulator: ${simulator} CL: ${cl_name}") + continue; + } + String cl_dir_name = cl_name + if(cl_name == 'cl_vhdl_hello_world') { + cl_dir_name = "cl_hello_world_vhdl" + } + String node_name = "Sim ${cl_name} ${xilinx_version} ${simulator}" + String key = "test_${cl_name}__" + String report_file = "test_sims_${cl_name}_${xilinx_version}.xml" + def tool_module_map = simulator_tool_default_map.get(xilinx_version) + String vcs_module = tool_module_map.get('vcs') + String questa_module = tool_module_map.get('questa') + String ies_module = tool_module_map.get('ies') + String vivado_module = tool_module_map.get('vivado') + + if(params.internal_simulations) { + report_file = "test_sims_${cl_name}_${xilinx_version}_${simulator}.xml" + } + sim_nodes[node_name] = { + node(get_task_label(task: 'simulation', xilinx_version: xilinx_version)) { + checkout scm + try { + if(params.internal_simulations) { + sh """ + set -e + module purge + module load python/3.7.2 + module load python/2.7.14 + module load slurm + module load ${vivado_module} + module load ${vcs_module} + module load ${questa_module} + module load ${ies_module} + source $WORKSPACE/hdk_setup.sh + python2.7 -m pytest -v $WORKSPACE/hdk/tests/simulation_tests/test_sims.py -k \"${key}\" --junit-xml $WORKSPACE/${report_file} --simulator ${simulator} --batch 'TRUE' + """ + } else { + sh """ + set -e + source $WORKSPACE/shared/tests/bin/setup_test_hdk_env.sh + python2.7 -m pytest -v $WORKSPACE/hdk/tests/simulation_tests/test_sims.py -k \"${key}\" --junit-xml $WORKSPACE/${report_file} --simulator ${simulator} --batch 'FALSE' + """ + } + } catch (exc) { + echo "${node_name} failed" + throw exc + } finally { + run_junit(report_file) + archiveArtifacts artifacts: "hdk/cl/examples/${cl_dir_name}/**/*.sim.log", fingerprint: true + } + } + } + } + } + } + + parallel sim_nodes + } + } +} + + +//============================================================================= +// SDK Tests +//============================================================================= + + +// Run the tests here +parallel all_tests diff --git a/README.md b/README.md index c1a7f432..179a835a 100644 --- a/README.md +++ b/README.md @@ -8,192 +8,199 @@ Below is the standard aws-fpga documentation from upstream. # Table of Contents -1. [Overview of AWS EC2 FPGA Development Kit](#overviewdevkit) - - [Development environments](#overviewdevenv) - - [Runtime environments](#overviewrunenv) - - [Example applications](#overviewexapps) - - [Development tools](#overviewdevtools) -2. [Getting Started](#gettingstarted) -3. [FPGA Developer AMI available on AWS Marketplace](#devAmi) -4. [FPGA Hardware Development Kit (HDK)](#fpgahdk) -5. [FPGA Software Development Kit (SDK)](#fpgasdk) -6. [OpenCL Development Environment with Amazon EC2 F1 FPGA Instances to accelerate your C/C++ applications](#sdaccel) -7. [Developer Support](#devSupport) -8. [Recommended Documentation](#doccontents) -9. [Github tips and tricks](#githubtipstricks) - - - +1. [Overview of AWS EC2 FPGA Development Kit](#overview-of-aws-ec2-fpga-development-kit) + - [Development Flow](#development-flow) + - [Development environments](#development-environments) + - [FPGA Developer AMI](#fpga-developer-ami) + - [FPGA Hardware Development Kit (HDK)](#hardware-development-kit-hdk) + - [FPGA Software Development Kit (SDK)](#runtime-tools-sdk) + - [Software Defined Development Environment](#software-defined-development-environment) +1. [Amazon EC2 F1 platform features](#amazon-ec2-f1-platform-features) +1. [Getting Started](#getting-started) + - [Getting Familiar with AWS](#getting-familiar-with-aws) + - [First time setup](#setting-up-development-environment-for-the-first-time) + - [Quickstarts](#quickstarts) + - [How To's](#how-tos) +1. [Documentation Overview](#documentation-overview) +1. [Developer Support](#developer-support) + # Overview of AWS EC2 FPGA Development Kit -The AWS EC2 FPGA Development Kit is provided by AWS to support development and runtime on [AWS FPGA instances](https://aws.amazon.com/ec2/instance-types/f1/). Amazon EC2 FPGA instances are high-performance compute instances with field programmable gate arrays (FPGAs) that are programmed to create custom hardware accelerations in EC2. F1 instances are easy to program and AWS provides everything needed to develop, simulate, debug, compile and run hardware accelerated applications. Using the [FPGA developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ), developers create an FPGA design. Once the FPGA design (also called CL - Custom logic) is complete, developers create the Amazon FPGA Image (AFI), and easily deploy it to the F1 instance. AFIs are reusable, shareable and can be deployed in a scalable and secure way. -![Alt text](hdk/docs/images/f1-Instance-How-it-Works-flowchart.jpg) +AWS EC2 FPGA Development Kit is a set of development and runtime tools to develop, simulate, debug, compile and run hardware accelerated applications on [Amazon EC2 F1 instances](https://aws.amazon.com/ec2/instance-types/f1/). +It is distributed between this github repository and FPGA Developer AMI - [Centos](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ)/[AL2](https://aws.amazon.com/marketplace/pp/B08NTMMZ7X) provided by AWS with no cost of development tools. - -## Overview of Development Environments - -| Development Environment | Description | Accelerator Language | Development Tool | Debug Options| Typical Developer / FPGA Experience | -| --------|---------|---------|-------|-------|-------| -| [Software Defined Accelerator Development - SDAccel](SDAccel/README.md) | Development experience leverages an optimized compiler to allow easy new accelerator development or migration of existing C/C++/openCL, Verilog/VHDL to AWS FPGA instances | C/C++/OpenCL, Verilog/VHDL (RTL) | SDx/Vivado (GUI or scipt) | SW/HW Emulation, Simulation, GDB, Virtual JTAG (Chipscope) | SW or HW Developer with zero FPGA experience | -| [Hardware Accelerator Development - HDK](hdk/README.md) | Fully custom hardware development experience provides hardware developers with the tools required for developing AFIs for AWS FPGA instances | Verilog/VHDL | Vivado | Simulation, Virtual JTAG | HW Developer with advanced FPGA experience | -| [IP Integrator or High Level Synthesis (HLx)](hdk/docs/IPI_GUI_Vivado_Setup.md) | Graphical interface development experience for integrating IP and high level synthesis development | Verilog/VHDL/C | Vivado (GUI) | Simulation, Virtual JTAG | HW Developer with intermediate FPGA experience | - - -## Overview of Runtime Environments - -| Runtime Environment | Hardware Interface | Host Code Language | FPGA Tools | -| --------|---------|---------|-------| -| [C/C++ Software Defined Accelerator Development](SDAccel/README.md) | OpenCL APIs, [XOCL Driver](./sdk/linux_kernel_drivers/xocl), [HAL](SDAccel/userspace/src2) | C/C++ | [SDK](./sdk), SDx | -| [Hardware Accelerator Development](hdk/README.md) | [XDMA Driver](sdk/linux_kernel_drivers/xdma/README.md), [peek/poke](sdk/userspace/README.md) | C/C++ | [SDK](./sdk), Vivado | -| [IP Integrator or High Level Synthesis (HLx)](hdk/docs/IPI_GUI_Vivado_Setup.md) | [XDMA Driver](sdk/linux_kernel_drivers/xdma/README.md), [peek/poke](sdk/userspace/README.md) | C/C++ | [SDK](./sdk), Vivado | - - -## Overview of Development Tools - -| Tool | Development/Runtime | Tool location | Description | -| --------|---------|---------|---------| -| SDx 2017.4 & 2018.2 | Development | [FPGA developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) | Used for [Software Defined Accelerator Development](SDAccel/README.md) | -| Vivado 2017.4 & 2018.2 | Development | [FPGA developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) | Used for [Hardware Accelerator Development](hdk/README.md) | -| FPGA AFI Management Tools | Runtime | [SDK - fpga\_mgmt\_tools](sdk/userspace/fpga_mgmt_tools) | Command-line tools used for FPGA management while running on the F1 instance | -| Virtual JTAG | Development (Debug) | [FPGA developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) | Runtime debug waveform | -| wait\_for\_afi | Development | [wait\_for\_afi.py](shared/bin/scripts/wait_for_afi.py) | Helper script that notifies via email on AFI generation completion | -| notify\_via\_sns | Development | [notify\_via\_sns.py](shared/bin/scripts/notify_via_sns.py) | Notifies developer when design build process completes | -| AFI Administration | Development | [Copy](hdk/docs/copy_fpga_image.md), [Delete](hdk/docs/delete_fpga_image.md), [Describe](hdk/docs/describe_fpga_images.md), [Attributes](hdk/docs/fpga_image_attributes.md) | AWS CLI EC2 commands for managing your AFIs | - - -NOTE: For on-premises development, SDx/Vivado must have the correct license and use one of the [supported versions of SDx/Vivado](./supported_vivado_versions.txt). The FPGA HDK+SDK [Release Notes](./RELEASE_NOTES.md) may contain additional information. The following links have more information on on-premises development: [Vivado requirements](hdk/docs/on_premise_licensing_help.md) and [SDx requirements](SDAccel/docs/On_Premises_Development_Steps.md) - - -## Overview of Example Applications -| Accelerator Application | Example | Development Environment | Description | -| --------|---------|---------|-------| -| Custom hardware | [cl\_hello\_world](hdk/cl/examples/cl_hello_world) | HDK - RTL (Verilog) | Simple [getting started example](hdk/README.md) with minimal hardware | -| Custom hardware | [cl\_dram\_dma](hdk/cl/examples/cl_dram_dma) | HDK - RTL (Verilog) | Demonstrates CL connectivity to the F1 shell and connectivity to/from all DDRs | -| Custom hardware IP integration example using a GUI | [cl\_dram\_dma\_hlx](hdk/cl/examples/cl_dram_dma_hlx) | HLx - Verilog | Demonstrates CL connectivity to the F1 shell and connectivity to/from DRAM using the Vivado IP Integrator GUI | -| Virtual Ethernet Application | [Example Application](sdk/apps/virtual-ethernet) | [HDK SDE Example](hdk/cl/examples/cl_sde) | The Virtual Ethernet framework facilitates streaming Ethernet frames from a network interface (or any source) into the FPGA for processing and back out to some destination. Possible use cases for this include deep packet inspection, software defined networking, stream encryption or compression, and more. | -| Pipelined Workload Applications | [cl\_dram\_dma\_data\_retention](hdk/docs/data_retention.md)| [HDK](hdk/cl/examples/cl_dram_dma/software/runtime/test_dram_dma_retention.c) [SDAccel](SDAccel/examples/aws/data_retention) | Demonstrates how to preserve data in DRAMs while swapping out accelerators. Applications that use a temporal accelerator pipeline can take advantage of this feature to reduce latency between FPGA image swaps | -| Digital Up-Converter using High Level Synthesis | [cl\_hls\_dds\_hlx](hdk/cl/examples/cl_hls_dds_hlx) | HLx - C-to-RTL | Demonstrates an example application written in C that is synthesized to RTL (Verilog) | -| Security | [AES, RSA, SHA1](https://github.com/Xilinx/SDAccel_Examples/tree/2018.2/security) | SDAccel - C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates methods of using hardware acceleration to speed up security software algorithms | -| Computer Vision | [Affine, Convolve, Huffman, IDCT](https://github.com/Xilinx/SDAccel_Examples/tree/master/vision) | SDAccel - C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates methods of using hardware acceleration to speed up image detection algorithms | -| Misc Algorithms | [Kmeans, SmithWaterman, MatrixMult](https://github.com/Xilinx/SDAccel_Examples/tree/master/acceleration) | SDAccel - C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates methods of applying hardware acceleration to a variety of sorting and search algorithms | -| Financial | [Blacksholes, Heston](https://github.com/KitAway/FinancialModels_AmazonF1) | SDAccel - C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates methods of using hardware acceleration on Monte Carlo financial models | -| Custom Hardware with Software Defined Acceleration | [RTL Kernels](https://github.com/Xilinx/SDAccel_Examples/tree/master/getting_started/rtl_kernel) | SDAccel - RTL (Verilog) + C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates a quick method for developing new or migrating existing hardware designs (RTL) | -| File Compression | [GZip](https://github.com/Xilinx/Applications/tree/master/GZip) | SDAccel - C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates methods of using hardware acceleration to speed up GZIP compression on an FPGA | -| WebP Image Compression | [WebP](https://github.com/Xilinx/Applications/tree/master/webp) | SDAccel - C/C++/OpenCL | Developed using software defined acceleration, this example demonstrates methods of using hardware acceleration to speed up WebP encoder application on an FPGA | - - -# Getting Started +⚠️ NOTE: The developer kit is supported for Linux operating systems only. -### New to AWS? -If you have never used AWS before, we recommend you start with [AWS getting started training](https://aws.amazon.com/getting-started/), and focus on the basics of the [AWS EC2](https://aws.amazon.com/ec2/) and [AWS S3](https://aws.amazon.com/s3/) services. Understanding the fundamentals of these services will make it easier to work with AWS FPGAs. +## Development Flow +After creating an FPGA design (also called CL - Custom logic), developers can create an Amazon FPGA Image (AFI) and easily deploy it to an F1 instance. AFIs are reusable, shareable and can be deployed in a scalable and secure way. -AWS FPGA generation and EC2 F1 instances are supported in the us-east-1 (N. Virginia), us-west-2 (Oregon), eu-west-1 (Ireland) and us-gov-west-1 ([GovCloud US](https://aws.amazon.com/govcloud-us/)) [regions](https://aws.amazon.com/about-aws/global-infrastructure/). +![Alt text](hdk/docs/images/f1-Instance-How-it-Works-flowchart.jpg) +## Development Environments -### New to AWS FPGAs and setting up a development environment? -The developer kit is supported for Linux operating systems only. You have the choice to develop on AWS EC2 using the [FPGA developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) or on-premises. Within a linux environment, you can execute `git clone https://github.com/aws/aws-fpga.git` to download the latest release to your EC2 Instance or local server. Help on cloning from github is available [here](https://help.github.com/articles/which-remote-url-should-i-use/). When using a SSH connection, execute `git clone git@github.com:aws/aws-fpga.git`. [To get help with connecting to Github via SSH](https://help.github.com/articles/connecting-to-github-with-ssh/). +| Development Environment | Description | Accelerator Language | Hardware Interface | Debug Options| Typical Developer | +| --------|---------|-------|---------|-------|-------| +| Software Defined Accelerator Development using [Vitis](Vitis/README.md)/[SDAccel](SDAccel/README.md)| Development experience leverages an optimized compiler to allow easy new accelerator development or migration of existing C/C++/openCL, Verilog/VHDL to AWS FPGA instances | C/C++/OpenCL, Verilog/VHDL (RTL) | OpenCL APIs and XRT | SW/HW Emulation, Simulation, GDB, Virtual JTAG (Chipscope) | SW or HW Developer with zero FPGA experience | +| [Hardware Accelerator Development using Vivado](hdk/README.md) | Fully custom hardware development experience provides hardware developers with the tools required for developing AFIs for AWS FPGA instances | Verilog/VHDL | [XDMA Driver](sdk/linux_kernel_drivers/xdma/README.md), [peek/poke](sdk/userspace/README.md) | Simulation, Virtual JTAG | HW Developer with advanced FPGA experience | +| [IP Integrator/High Level Design(HLx) using Vivado](hdk/docs/IPI_GUI_Vivado_Setup.md) | Graphical interface development experience for integrating IP and high level synthesis development | Verilog/VHDL/C | [XDMA Driver](sdk/linux_kernel_drivers/xdma/README.md), [peek/poke](sdk/userspace/README.md) | Simulation, Virtual JTAG | HW Developer with intermediate FPGA experience | -Before you start your first AWS FPGA design, we recommend that you go through one of the step-by-step guides. The guides will walk through development steps for hello world examples. Based on the tables above, pick the development environment that best fits your needs and use the guide to get started: - * For fastest way to get started on FPGA accelerator development, start with the software defined development environment. The guide starts with the [SW Hello World example](SDAccel/README.md). - * Next use the same guide to develop using the C/C++/openCL/RTL based [80+ examples on github](./SDAccel/examples/xilinx_2017.4). - * For custom hardware development (HDK) environment, start with the [HDK Hello World example](hdk/README.md). - * Next use the same guide to develop using the [cl\_dram\_dma](hdk/cl/examples/cl_dram_dma). +> For on-premise development, SDAccel/Vitis/Vivado must have the [correct license and use one of the supported tool versions](./docs/on_premise_licensing_help.md). -### In-depth training and resources -Once you have completed your hello world examples, we recommend diving deeper into a training workshop or application notes - * Software-defined [re:Invent 2017 Workshop](https://github.com/awslabs/aws-fpga-app-notes/blob/master/reInvent17_Developer_Workshop/README.md) demonstrates a video encoder acceleration and how to debug and optimize your accelerator. - * Custom hardware developers need to learn about how the hardware accelerator interfaces to the F1 Shell - * [Shell Interface](hdk/docs/AWS_Shell_Interface_Specification.md) - * [Shell Address Map](hdk/docs/AWS_Fpga_Pcie_Memory_Map.md) - * [Programmer view of the FPGA](./hdk/docs/Programmer_View.md) - * [Virtual JTAG](hdk/docs/Virtual_JTAG_XVC.md) - * [Application for methods of interfacing the host application to the Hardware accelerator](https://github.com/awslabs/aws-fpga-app-notes) +## FPGA Developer AMI - -# FPGA Developer AMI +The [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) is available on the AWS marketplace without a software charge and includes tools needed for developing FPGA Designs to run on AWS F1. -The [FPGA developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) is available on the AWS marketplace without a software charge and includes free tools and drivers needed for FPGA development on EC2 instances. FPGA development runs on several [EC2 instance types](https://aws.amazon.com/ec2/instance-types/). Given the large size of the FPGA used inside the AWS FPGA instances, the implementation tools require 32GiB Memory (ex: z1d.xlarge, z1d.2xlarge, c5.4xlarge, m5.2xlarge, r5.xlarge, t2.2xlarge). z1d.xlarge/c5.4xlarge and z1d.2xlarge/c5.8xlarge would provide the fastest execution time with 30GiB+ and 60GiB+ of memory respectively. Developers who want to save on cost, could start coding and run simulations on low-cost instances, like t2.2xlarge, and move to the aforementioned larger instances to run the synthesis of their acceleration code. +Given the large size of the FPGA used inside AWS F1 Instances, Xilinx tools work best with 32GiB Memory. +z1d.xlarge/c5.4xlarge and z1d.2xlarge/c5.8xlarge instance types would provide the fastest execution time with 30GiB+ and 60GiB+ of memory respectively. +Developers who want to save on cost, could start coding and run simulations on low-cost instances, like t2.2xlarge, and move to the aforementioned larger instances to run the synthesis of their acceleration code. -Currently, AWS marketplace includes multiple versions of the FPGA developer AMI, supporting Xilinx SDx 2017.4 and 2018.2 toolchain versions. The following compatibility table describes the mapping of currently supported developer kit versions to AMI versions: +AWS marketplace offers multiple versions of the FPGA Developer AMI. The following compatibility table describes the mapping of currently supported developer kit versions to AMI versions: -| Developer Kit Version | Tool Version Supported | Compatible FPGA developer AMI Version | +| Developer Kit Version | Tool Version Supported | Compatible FPGA Developer AMI Version | |-----------|-----------|------| -| 1.3.7-1.3.X | 2017.4 | v1.4.0-v1.4.X (Xilinx Vivado/SDx 2017.4) | -| 1.4.X | 2017.4 | v1.4.0-v1.4.X (Xilinx Vivado/SDx 2017.4) | -| 1.4.3+ | 2018.2 | v1.5.0-v1.5.X (Xilinx Vivado/SDx 2018.2) | +| 1.4.18+ | 2020.2 | v1.10.X (Xilinx Vivado/Vitis 2020.2) | +| 1.4.16+ | 2020.1 | v1.9.0-v1.9.X (Xilinx Vivado/Vitis 2020.1) | +| 1.4.13+ | 2019.2 | v1.8.0-v1.8.X (Xilinx Vivado/Vitis 2019.2) | +| 1.4.11+ | 2019.1 | v1.7.0-v1.7.X (Xilinx Vivado/SDx 2019.1) | +| 1.4.8 - 1.4.15a | 2018.3 | v1.6.0-v1.6.X (Xilinx Vivado/SDx 2018.3) | +| 1.4.3 - 1.4.15a | 2018.2 | v1.5.0-v1.5.X (Xilinx Vivado/SDx 2018.2) | +| 1.3.7 - 1.4.15a | 2017.4 | v1.4.0-v1.4.X (Xilinx Vivado/SDx 2017.4) | + +⚠️ Developer kit release v1.4.16 will remove support for Xilinx 2017.4, 2018.2, 2018.3 toolsets. +While developer kit release v1.4.16 onwards will not support older Xilinx tools, you can still use them using HDK releases v1.4.15a or earlier. +Please checkout [the latest v1.4.15a release tag from Github](https://github.com/aws/aws-fpga/releases/tag/v1.4.15a) to use Xilinx 2017.4, 2018.2, 2018.3 toolsets. -Developer kit versions prior to v1.3.7 and Developer AMI prior to v1.4 (2017.1) reached end-of-life. See [AWS forum announcement](https://forums.aws.amazon.com/ann.jspa?annID=6068) for additional details. +⚠️ Developer kit versions prior to v1.3.7 and Developer AMI prior to v1.4 (2017.1) reached end-of-life. See [AWS forum announcement](https://forums.aws.amazon.com/ann.jspa?annID=6068) for additional details. - If developing using SDAccel environment please refer to this [Runtime Compatibility Table](SDAccel/docs/Create_Runtime_AMI.md#runtime-ami-compatability-table) +For software-defined development please look at the runtime compatibility table based on the Xilinx toolset in use: +[SDAccel](SDAccel/docs/Create_Runtime_AMI.md#runtime-ami-compatibility-table) or [Vitis](Vitis/docs/Create_Runtime_AMI.md#runtime-ami-compatibility-table) - -# Hardware Development Kit (HDK) +## Hardware Development Kit (HDK) -The [HDK directory](./hdk/README.md) contains useful information, examples, and scripts for developers wanting to start building Amazon FPGA Images (AFI). It includes the development environment, simulation, build and AFI creation scripts. The HDK can be installed on any on-premises server or an EC2 instance. The developer kit is not required if you plan to use a pre-built AFI shared from another developer. +The [HDK directory](./hdk/README.md) contains documentation, examples, simulation, build and AFI creation scripts to start building Amazon FPGA Images (AFI). +The HDK can be installed on any on-premises server or an EC2 instance. +The developer kit is not required if you plan to use a pre-built AFI shared from another developer. - -# Software-defined Development Environment +## Software-defined Development Environment -The software-defined development environment allows customers to compile their C/C++/OpenCL code into the FPGA as kernels, and use OpenCL APIs to pass data to the FPGA. Software developers with no FPGA experience will find a familiar development experience that supercharges cloud applications. +The software-defined development environment allows customers to compile their C/C++/OpenCL code into the FPGA as kernels, and use OpenCL APIs to pass data to the FPGA. +Software developers with no FPGA experience will find a familiar development experience that supercharges cloud applications. -In addition, this development environment (also called SDAccel) allows the mix of C/C++ and RTL accelerator designs into a C/C++ software based development environment. This method enables faster prototyping using C/C++ while supporting manual optimization of critical blocks within RTL. This approach is similar to optimizing time critical functions using software compiler optimization methods. +In addition, this development environment allows for a mix of C/C++ and RTL accelerator designs into a C/C++ software based development environment. This method enables faster prototyping using C/C++ while supporting manual optimization of critical blocks within RTL. This approach is similar to optimizing time critical functions using software compiler optimization methods. -This developer kit has 80+ examples to help you get started on FPGA acceleration. To get started, review the [Software-defined development environment readme](SDAccel/README.md). +To get started with Xilinx SDAccel, review the [Software-defined development environment readme](SDAccel/README.md). +To get started with Xilinx Vitis, review the [Vitis unified development environment readme](Vitis/README.md). - -# Runtime Tools (SDK) +## Runtime Tools (SDK) The [SDK directory](./sdk/README.md) includes the runtime environment required to run on EC2 FPGA instances. It includes the drivers and tools to manage the AFIs that are loaded on the FPGA instance. The SDK isn't required during the AFI development process; it is only required once an AFI is loaded onto an EC2 FPGA instance. The following sdk resources are provided: * Linux Kernel Drivers - The developer kit includes three drivers: * [XDMA Driver](sdk/linux_kernel_drivers/xdma/README.md) - DMA interface to/from HDK accelerators. - * [XOCL Driver](sdk/linux_kernel_drivers/xocl) - DMA interface with software defined accelerators (also called hardware kernels). * [FPGA Libraries](sdk/userspace/fpga_libs) - APIs used by C/C++ host applications. * [FPGA Management Tools](sdk/userspace/fpga_mgmt_tools/README.md) - AFI management APIs for runtime loading/clearing FPGA image, gathering metrics and debug interface on the F1 instance. - -# Developer Support +# Amazon EC2 F1 Platform Features +* 1-8 Xilinx UltraScale+ VU9P based FPGA slots +* Per FPGA Slot, Interfaces available for Custom Logic(CL): + * One x16 PCIe Gen 3 Interface + * Four DDR4 RDIMM interfaces (with ECC) + * AXI4 protocol support on all interfaces +* User-defined clock frequency driving all CL to Shell interfaces +* Multiple free running auxiliary clocks +* PCI-E endpoint presentation to Custom Logic(CL) + * Management PF (physical function) + * Application PF +* Virtual JTAG, Virtual LED, Virtual DIP Switches +* PCI-E interface between Shell(SH) and Custom Logic(CL). + * SH to CL inbound 512-bit AXI4 interface + * CL to SH outbound 512-bit AXI4 interface + * Multiple 32-bit AXI-Lite buses for register access, mapped to different PCIe BARs + * Maximum payload size set by the Shell + * Maximum read request size set by the Shell + * AXI4 error handling +* DDR interface between SH and CL + * CL to SH 512-bit AXI4 interface + * 1 DDR controller implemented in the SH (always available) + * 3 DDR controllers implemented in the CL (configurable number of implemented controllers allowed) + +# Getting Started + +### Getting familiar with AWS +If you have never used AWS before, we recommend you start with [AWS getting started training](https://aws.amazon.com/getting-started/), and focus on the basics of the [AWS EC2](https://aws.amazon.com/ec2/) and [AWS S3](https://aws.amazon.com/s3/) services. +Understanding the fundamentals of these services will make it easier to work with AWS F1 and the FPGA Developer Kit. + +FPGA Image generation and EC2 F1 instances are supported in the us-east-1 (N. Virginia), us-west-2 (Oregon), eu-west-1 (Ireland) and us-gov-west-1 ([GovCloud US](https://aws.amazon.com/govcloud-us/)) [regions](https://aws.amazon.com/about-aws/global-infrastructure/). -The [**Amazon FPGA Development User Forum**](https://forums.aws.amazon.com/forum.jspa?forumID=243&start=0) is the first place to go to post questions, learn from other users and read announcements from the EC2 FPGA team. +> ⚠️ NOTE: By default, your AWS Account will have an EC2 F1 Instance launch limit of 0. +> Before using F1 instances, you will have to open a [Support Case](https://console.aws.amazon.com/support/home#/case/create) to increase the EC2 Instance limits to allow launching F1 instances. -* Click the "Watch" button in GitHub upper right corner to get regular updates. -* We recommend you will join the [AWS forum](https://forums.aws.amazon.com/forum.jspa?forumID=243) to engage with the FPGA developer community and get help when needed (both AWS and Xilinx engineers monitor this forum). -* In case you can't see "Your Stuff" details, you will need to logout using the logout button on the forums page and log back in again. +### Setting up development environment for the first time + +You have the choice to develop on AWS EC2 using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) or on-premise. + +> ℹ️ INFO: We suggest starting with the FPGA Developer AMI with [build instances](#fpga-developer-ami) on EC2 as it has Xilinx tools and licenses setup for you to be able to quickly get into development. + +> ℹ️ INFO: For on-premise development, you will need to have [Xilinx tools and licenses available for you to use](./docs/on_premise_licensing_help.md) + +1. Start a Build Instance first to start your development. + > 💡 TIP: This instance does not have to be an F1 instance. You only require an F1 instance to run your AFI's(Amazon FPGA Image) once you have gone through your design build and AFI creation steps. + + > ℹ️ INFO: If you need to follow GUI Development flows, please checkout our [Developer Resources](./developer_resources/README.md) where we provide Step-By-Step guides to setting up a GUI Desktop. +1. Clone the [FPGA Developer Kit](https://github.com/aws/aws-fpga) on your instance. + ```git clone https://github.com/aws/aws-fpga.git``` +1. Follow the quickstarts from the next section. + +### Quickstarts +Before you create your own AWS FPGA design, we recommend that you go through one of the step-by-step Quickstart guides: + +| Description | Quickstart | Next Steps | +|----|----|----| +| Software Defined Accelerator Development using Xilinx Vitis | [Vitis hello_world Quickstart](Vitis/README.md) | [60+ Vitis examples](./Vitis/examples/), [Vitis Library Examples](./docs/examples/example_list.md) | +| Software Defined Accelerator Development using Xilinx SDAccel | [SDAccel hello_world Quickstart](SDAccel/README.md) | [60+ SDAccel examples](./SDAccel/examples/) | +| Custom Hardware Development(HDK) | [HDK hello_world Quickstart](hdk/README.md) | [CL to Shell and DRAM connectivity example](./hdk/cl/examples/cl_dram_dma), [Virtual Ethernet Application](./sdk/apps/virtual-ethernet) using the [Streaming Data Engine](./hdk/cl/examples/cl_sde) | +| IP Integrator/High Level Design(HLx) | [IPI hello_world Quickstart](hdk/cl/examples/cl_hello_world_hlx/README.md) | [IPI GUI Examples](hdk/docs/IPI_GUI_Examples.md) | + +ℹ️ INFO: For more in-depth applications and examples of using High level synthesis, Vitis Libraries, App Notes and Workshops, please refer to our [Example List](./docs/examples/example_list.md) + +### How Tos +| How To | Description | +|----|----| +| [Migrate Alveo U200 designs to F1](./Vitis/docs/Alveo_to_AWS_F1_Migration.md) | This application note shows the ease of migrating an Alveo U200 design to F1. | - # Documentation Overview -The documentation is located throughout this developer kit, therefore, to help developers find information quicker the table below consolidates a list of key documents: +Documentation is located throughout this developer kit and the table below consolidates a list of key documents to help developers find information: | Topic | Document Name | Description | |-----------|-----------|------| -| Developer Kit Features | [RELEASE\_NOTES](./RELEASE_NOTES.md), [Errata](./ERRATA.md) | Release notes and Errata for all developer kit features, excluding the shell | -| Frequently asked questions | [FAQ](./FAQs.md), [Errata](./ERRATA.md) | Q/A are added based on developer feedback and common AWS forum questions | -| F1 Shell (HDK) | [AWS\_Shell\_RELEASE\_NOTES](./hdk/docs/AWS_Shell_RELEASE_NOTES.md), [AWS\_Shell\_ERRATA](./hdk/docs/AWS_Shell_ERRATA.md) | Release notes and Errata for F1 shell | -| F1 Shell (HDK) | [AWS\_Shell\_Interface\_Specification](hdk/docs/AWS_Shell_Interface_Specification.md) | Shell-CL interface specification for HDK developers building AFI | -| AWS setup | [Setup\_AWS\_CLI\_and\_S3\_Bucket](SDAccel/docs/Setup_AWS_CLI_and_S3_Bucket.md) | Setup instructions for preparing for AFI creation | -| SDx graphical interface (SDAccel) | [README\_GUI](SDAccel/docs/README_GUI.md) | Instructions using the SDx GUI for software defined acceleration development and debug | -| Software defined acceleration using RTL (SDAccel) | [Debug\_RTL\_Kernel](SDAccel/docs/Debug_RTL_Kernel.md) | Instructions on debugging RTL Kernel | -| Software defined acceleration Run time (SDAccel) | [Create\_Runtime\_AMI](SDAccel/docs/Create_Runtime_AMI.md) | Instructions on creating a runtime AMI | -| Host Application (HDK) | [Programmer\_View](hdk/docs/Programmer_View.md) | Host application to CL interface specification | -| CL Debug (HDK) | [Virtual\_JTAG\_XVC](hdk/docs/Virtual_JTAG_XVC.md) | Debugging CL using Virtual JTAG (Chipscope) | -| CL/Shell Simulation (HDK) | [RTL\_Simulating\_CL\_Designs](hdk/docs/RTL_Simulating_CL_Designs.md) | Shell-CL simulation specification | -| Driver (HDK) | [README](sdk/linux_kernel_drivers/xdma/README.md) | Describes the DMA driver (XDMA) used by HDK examples and includes a link to an installation guide | -| Shell Timeout and AXI Protocol Protection | [HOWTO\_detect\_shell\_timeout](hdk/docs/HOWTO_detect_shell_timeout.md) | The shell will terminate transactions after a time period or on an illegal transaction. This describes how to detect and gather data to help debug CL issues caused by timeouts. | -| AFI Power | [afi\_power](hdk/docs/afi_power.md) | Helps developers with understanding AFI power and preventing power violations on the F1 instance | -| AFI Management | [README](sdk/userspace/fpga_mgmt_tools/README.md) | CLI documentation for managing AFI on the F1 instance | -| AFI Administration | [copy\_fpga\_image](hdk/docs/copy_fpga_image.md), [delete\_fpga\_image](hdk/docs/delete_fpga_image.md), [describe\_fpga\_images](hdk/docs/describe_fpga_images.md), [fpga\_image\_attributes](hdk/docs/fpga_image_attributes.md) | CLI documentation for administering AFIs | -| AFI Creation Error Codes | [create\_fpga\_image\_error\_codes](hdk/docs/create_fpga_image_error_codes.md) | CLI documentation for managing AFIs | -| Developing on-premises | [HDK: on\_premise\_licensing\_help](hdk/docs/on_premise_licensing_help.md), [SDAccel: On\_Premises\_Development\_Steps](SDAccel/docs/On_Premises_Development_Steps.md) | Guidance for developer wanting to develop AFIs from on-premises instead of using the [FPGA developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) running on AWS EC2 | - - - -# Github tips and tricks - * [Cloning the repository](https://help.github.com/articles/cloning-a-repository/) - * [Forking the repository](https://help.github.com/articles/fork-a-repo/) - * [Searching code](https://help.github.com/articles/searching-code/) and [advanced search syntax](https://help.github.com/articles/understanding-the-search-syntax/) - * [Finding files](https://help.github.com/articles/finding-files-on-github/) - * Simply replace github.com with gitprint.com to generate a printable PDF +| AWS setup | [Setup AWS CLI and S3 Bucket](./SDAccel/docs/Setup_AWS_CLI_and_S3_Bucket.md) | Setup instructions for preparing for AFI creation | +| Developer Kit | [RELEASE NOTES](./RELEASE_NOTES.md), [Errata](./ERRATA.md) | Release notes and Errata for all developer kit features, excluding the shell | +| Developer Kit | [Errata](./ERRATA.md) | Errata for all developer kit features, excluding the shell | +| F1 Shell | [AWS Shell RELEASE NOTES](./hdk/docs/AWS_Shell_RELEASE_NOTES.md) | Release notes for F1 shell | +| F1 Shell | [AWS Shell ERRATA](./hdk/docs/AWS_Shell_ERRATA.md) | Errata for F1 shell | +| F1 Shell | [AWS Shell Interface Specification](./hdk/docs/AWS_Shell_Interface_Specification.md) | Shell-CL interface specification for HDK developers building AFI | +| F1 Shell - Timeout and AXI Protocol Protection | [How to detect a shell timeout](hdk/docs/HOWTO_detect_shell_timeout.md) | The shell will terminate transactions after a time period or on an illegal transaction. This describes how to detect and gather data to help debug CL issues caused by timeouts. | +| Vitis | [Debug Vitis Kernel](./Vitis/docs/Debug_Vitis_Kernel.md) | Instructions on debugging Vitis Kernel | +| Vitis | [Create Runtime AMI](./Vitis/docs/Create_Runtime_AMI.md) | Instructions on creating a runtime AMI when using Xilinx Vitis| +| Vitis | [XRT Instructions](./Vitis/docs/XRT_installation_instructions.md) | Instructions on building, installing XRT with MPD daemon considerations for F1 | +| SDAccel | [Debug RTL Kernel](./SDAccel/docs/Debug_RTL_Kernel.md) | Instructions on debugging RTL Kernel with SDAccel | +| SDAccel | [Create Runtime AMI](./SDAccel/docs/Create_Runtime_AMI.md) | Instructions on creating a runtime AMI when using Xilinx SDAccel| +| HDK - Host Application | [Programmer View](./hdk/docs/Programmer_View.md) | Host application to CL interface specification | +| HDK - CL Debug | [Debug using Virtual JTAG](./hdk/docs/Virtual_JTAG_XVC.md) | Debugging CL using Virtual JTAG (Chipscope) | +| HDK - Simulation | [Simulating CL Designs](./hdk/docs/RTL_Simulating_CL_Designs.md) | Shell-CL simulation specification | +| HDK - Driver | [README](./sdk/linux_kernel_drivers/xdma/README.md) | Describes the DMA driver (XDMA) used by HDK examples and includes a link to an installation guide | +| AFI | [AFI Management SDK](./sdk/userspace/fpga_mgmt_tools/README.md) | CLI documentation for managing AFI on the F1 instance | +| AFI - EC2 CLI | [copy\_fpga\_image](./hdk/docs/copy_fpga_image.md), [delete\_fpga\_image](./hdk/docs/delete_fpga_image.md), [describe\_fpga\_images](./hdk/docs/describe_fpga_images.md), [fpga\_image\_attributes](./hdk/docs/fpga_image_attributes.md) | CLI documentation for administering AFIs | +| AFI - Creation Error Codes | [create\_fpga\_image\_error\_codes](hdk/docs/create_fpga_image_error_codes.md) | CLI documentation for managing AFIs | +| AFI - Power | [FPGA Power, recovering from clock gating](./hdk/docs/afi_power.md) | Helps developers with understanding FPGA power usage, preventing power violations on the F1 instance and recovering from a clock gated slot. | +| On-premise Development | [Tools, Licenses required for on-premise development](./docs/on_premise_licensing_help.md) | Guidance for developer wanting to develop AFIs from on-premises instead of using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) | +| Frequently asked questions | [FAQ](./FAQs.md)| Q/A are added based on developer feedback and common AWS forum questions | +# Developer Support +* The [**Amazon FPGA Development User Forum**](https://forums.aws.amazon.com/forum.jspa?forumID=243&start=0) is the first place to go to post questions, learn from other users and read announcements. + * We recommend joining the [AWS forums](https://forums.aws.amazon.com/forum.jspa?forumID=243) to engage with the FPGA developer community, AWS and Xilinx engineers to get help. +* You could also file a [Github Issue](https://github.com/aws/aws-fpga/issues) for support. We prefer the forums as this helps the entire community learn from issues, feedback and answers. + * Click the "Watch" button in GitHub upper right corner to get regular updates. diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 86f06856..38d40175 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,33 +1,118 @@ - # AWS EC2 FPGA HDK+SDK Release Notes +## Release 1.4.18 (See [ERRATA](./ERRATA.md) for unsupported features) +* FPGA developer kit now supports Xilinx Vivado/Vitis 2020.2 + +## Release 1.4.17 (See [ERRATA](./ERRATA.md) for unsupported features) +* Updated XDMA Driver to allow builds on newer kernels +* Updated documentation on Alveo U200 to F1 platform porting +* Added Vitis 2019.2 Patching for AR#73068 + +## Release 1.4.16 (See [ERRATA](./ERRATA.md) for unsupported features) +* FPGA developer kit now supports Xilinx Vivado/Vitis 2020.1 + * To upgrade, use [Developer AMI v1.9.0](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) on the AWS Marketplace. +* Updated Vitis examples to include usage of Vitis Libraries. +* Added documentation and examples to show Xilinx Alveo design migration to F1. + +## Release 1.4.15a (See [ERRATA](./ERRATA.md) for unsupported features) +* Fixed Xilinx AR#73068 patching + * DDR4 IP needs to be regenerated for the patch to take effect. +* Updated cl_dram_dma public AFI. + +## Release 1.4.15 (See [ERRATA](./ERRATA.md) for unsupported features) +* Added Xilinx AR#73068 patching +* Added DMA range error to the interrupt status register metrics +* Enhanced DDR model rebuild qualifiers in hdk_setup.sh +* Updated Virtual JTAG Documentation + +## Release 1.4.14 (See [ERRATA](./ERRATA.md) for unsupported features) +* Updated Vitis Platform file to fix a DDR bandwidth issue +* Added Vitis Debug Documentation + +## Release 1.4.13 (See [ERRATA](./ERRATA.md) for unsupported features) +* FPGA developer kit now supports Xilinx Vivado/Vitis 2019.2 +* To upgrade, use [Developer AMI v1.8.0](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) on the AWS Marketplace. + +## Release 1.4.12 (See [ERRATA](./ERRATA.md) for unsupported features) +* Added supported versions for BJS AMI's +* Added link to the re:Invent 19 F1 workshop +* Fixed missing extern C declaration by PR #473 +* Documentation Path fixes from PR #466, #468 and #470 + +## Release 1.4.11 (See [ERRATA](./ERRATA.md) for unsupported features) +* FPGA developer kit now supports Xilinx SDx/Vivado 2019.1 + * We recommend developers upgrade to v1.4.11 to benefit from the new features, bug fixes, and optimizations. + * To upgrade, use [Developer AMI v1.7.0](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) on the AWS Marketplace. The Developer Kit scripts (hdk_setup.sh or sdaccel_setup.sh) will detect the tool version and update the environment based on requirements needed for Xilinx 2019.1 tools. +* New functionality: + * Added a [developer resources section](./developer_resources/README.md) that provides guides on how to setup your own GUI Desktop and compute cluster environment. + * Developers can now ask for AFI limit increases via the [AWS Support Center Console](https://console.aws.amazon.com/support/cases#/create). + * Create a case to increase your `EC2 FPGA` service limit from the console. + * HLx IPI flow updates + * HLx support for AXI Fast Memory mode. + * HLx support for 3rd party simulations. + * HLx support for changes in shell and AWS IP updates(e.g. sh_ddr). +* Bug Fixes: + * Documentation fixes in the [Shell Interface Specification](./hdk/docs/AWS_Shell_Interface_Specification.md) + * Fixes for forum questions + * [Unable to compile aws_v1_0_vl_rfs.sv in Synopsys VCS](https://forums.aws.amazon.com/thread.jspa?threadID=308829&tstart=0) + * [Use fpga_mgmt init in HLx runtime](https://forums.aws.amazon.com/thread.jspa?messageID=912063) + * New XRT versions added to the [XRT Installation Instructions](./SDAccel/docs/XRT_installation_instructions.md) to fix segmentation faults when using xclbin instead of awsxclbin files. +* Deprecations: + * Removed GUI Setup scripts from AMI v1.7.0 onwards. See the [developer resources section](./developer_resources/README.md) that provides guides on how to setup your own GUI Desktop and compute cluster environment. +* Package versions used for validation + + | Package | AMI 1.7.0 [2019.1] | AMI 1.6.0 [2018.3] |AMI 1.5.0 [2018.2] | AMI 1.4.0 [2017.4] | + |---------|---|------------------------|------------------------|-----------------------| + | OS | Centos 7.6 | Centos 7.6 | Centos 7.5, 7.6 | Centos 7.4 | + | kernel | 3.10.0-957.27.2.el7.x86_64 | 3.10.0-957.5.1.el7.x86_64 | 3.10.0-862.11.6.el7.x86_64, 3.10.0-957.1.3.el7.x86_64 | 3.10.0-693.21.1.el7.x86_64 | + | kernel-devel | 3.10.0-957.27.2.el7.x86_64 | 3.10.0-957.5.1.el7.x86_64 | 3.10.0-862.11.6.el7.x86_64, 3.10.0-957.1.3.el7.x86_64 | 3.10.0-693.21.1.el7.x86_64 | + | LIBSTDC++ | libstdc++-4.8.5-36.el7_6.2.x86_64 | libstdc++-4.8.5-36.el7.x86_64 | libstdc++-4.8.5-36.el7.x86_64 | libstdc++-4.8.5-16.el7_4.2.x86_64 | + +## Release 1.4.10 (See [ERRATA](./ERRATA.md) for unsupported features) +* New functionality: + * SDK now sorts the slots in DBDF order. Any scripts or integration maintainers should note that the slot order will be different from previous versions and should make any updates accordingly. + +* Bug Fixes: + * Fixes a bug in the [Automatic Traffic Generator (ATG)](./hdk/cl/examples/cl_dram_dma/design/cl_tst.sv). In SYNC mode, the ATG did not wait for write response transaction before issuing read transactions. + * Released [Xilinx runtime(XRT) version 2018.3.3.2](https://github.com/Xilinx/XRT/releases/tag/2018.3.3.2) to fix the following error: + `symbol lookup error: /opt/xilinx/xrt/lib/libxrt_aws.so: undefined symbol: uuid_parse!` + * This release fixes a bug wherein concurrent AFI load requests on two or more slots resulted in a race condition which sometimes resulted in Error: `(20) pci-device-missing` + * This release fixes a issue with coding style of logic which could infer a latch during synthesis in [sde_ps_acc module](./hdk/cl/examples/cl_sde/design/sde_ps_acc.sv) within cl_sde example + +* Package versions used for validation + + | Package | AMI 1.6.0 [2018.3] |AMI 1.5.0 [2018.2] | AMI 1.4.0 [2017.4] | + |---------|------------------------|------------------------|-----------------------| + | OS | Centos 7.6 | Centos 7.5, 7.6 | Centos 7.4 | + | kernel | 3.10.0-957.5.1.el7.x86_64 | 3.10.0-862.11.6.el7.x86_64, 3.10.0-957.1.3.el7.x86_64 | 3.10.0-693.21.1.el7.x86_64 | + | kernel-devel | 3.10.0-957.5.1.el7.x86_64 | 3.10.0-862.11.6.el7.x86_64, 3.10.0-957.1.3.el7.x86_64 | 3.10.0-693.21.1.el7.x86_64 | + | LIBSTDC++ | libstdc++-4.8.5-36.el7.x86_64 | libstdc++-4.8.5-36.el7.x86_64 | libstdc++-4.8.5-16.el7_4.2.x86_64 | + -## AWS EC2 F1 Platform Features: - * 1-8 Xilinx UltraScale+ VU9P based FPGA slots - * Per FPGA Slot, Interfaces available for Custom Logic(CL): - * One x16 PCIe Gen 3 Interface - * Four DDR4 RDIMM interfaces (with ECC) - * AXI4 protocol support on all interfaces - * User-defined clock frequency driving all CL to Shell interfaces - * Multiple free running auxiliary clocks - * PCI-E endpoint presentation to Custom Logic(CL) - * Management PF (physical function) - * Application PF - * Virtual JTAG, Virtual LED, Virtual DIP Switches - * PCI-E interface between Shell(SH) and Custom Logic(CL). - * SH to CL inbound 512-bit AXI4 interface - * CL to SH outbound 512-bit AXI4 interface - * Multiple 32-bit AXI-Lite buses for register access, mapped to different PCIe BARs - * Maximum payload size set by the Shell - * Maximum read request size set by the Shell - * AXI4 error handling - * DDR interface between SH and CL - * CL to SH 512-bit AXI4 interface - * 1 DDR controller implemented in the SH (always available) - * 3 DDR controllers implemented in the CL (configurable number of implemented controllers allowed) +## Release 1.4.9 (See [ERRATA](./ERRATA.md) for unsupported features) + * New functionality: + * Improved AFI load times for pipelined accelerator designs. For more details please see [Amazon FPGA image (AFI) pre-fetch and caching features](./hdk/docs/load_times.md). + * Ease of Use features: + * [Improved SDK Error messaging](./sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt.c) + * [Improved documentation](./hdk/docs/IPI_GUI_Vivado_Setup.md#switching-between-hdk-and-hlx-flows) to help with transition from [HLX to HDK command line flows](https://forums.aws.amazon.com/thread.jspa?threadID=302718&tstart=0) and vice versa + * Incorporates feedback from [aws-fpga Issue 458](https://github.com/aws/aws-fpga/issues/458) by making the ```init_ddr``` function, used in design simulations to initialize DDR, more generic by moving out ATG deselection logic to a new ```deselect_atg_hw``` task + + * Bug Fixes: + * Fixed Shell simulation model (sh_bfm) issue on PCIM AXI read data channel back pressure which was described in HDK 1.4.8 Errata. + * Fixed HDK simulation example which [demonstrates DMA and PCIM traffic in parallel](./hdk/cl/examples/cl_dram_dma/verif/tests/test_dma_pcim_concurrent.sv). + + * Package versions used for validation + + | Package | AMI 1.6.0 [2018.3] |AMI 1.5.0 [2018.2] | AMI 1.4.0 [2017.4] | + |---------|------------------------|------------------------|-----------------------| + | OS | Centos 7.6 | Centos 7.5, 7.6 | Centos 7.4 | + | kernel | 3.10.0-957.5.1.el7.x86_64 | 3.10.0-862.11.6.el7.x86_64, 3.10.0-957.1.3.el7.x86_64 | 3.10.0-693.21.1.el7.x86_64 | + | kernel-devel | 3.10.0-957.5.1.el7.x86_64 | 3.10.0-862.11.6.el7.x86_64, 3.10.0-957.1.3.el7.x86_64 | 3.10.0-693.21.1.el7.x86_64 | + | LIBSTDC++ | libstdc++-4.8.5-36.el7.x86_64 | libstdc++-4.8.5-36.el7.x86_64 | libstdc++-4.8.5-16.el7_4.2.x86_64 | + ## Release 1.4.8 (See [ERRATA](./ERRATA.md) for unsupported features) - * FPGA developer kit supports Xilinx SDx/Vivado 2018.3 + * FPGA developer kit supports Xilinx SDx/Vivado 2018.3 * We recommend developers upgrade to v1.4.8 to benefit from the new features, bug fixes, and optimizations. To upgrade, use [Developer AMI v1.6.0](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) on AWS Marketplace. The Developer Kit scripts (hdk_setup.sh or sdaccel_setup.sh) will detect the tool version and update the environment based on requirements needed for Xilinx 2018.3 tools. * Ease of Use features: * Support for importing results into SDx GUI - By importing results from a script-based flow into SDx IDE, developers can leverage the tools for debug/profiling while keeping flexibility of the script-based flow @@ -58,9 +143,6 @@ | LIBSTDC++ | libstdc++-4.8.5-36.el7.x86_64 | libstdc++-4.8.5-36.el7.x86_64 | libstdc++-4.8.5-16.el7_4.2.x86_64 | - - - ## Release 1.4.7 (See [ERRATA](./ERRATA.md) for unsupported features) * Adds [Xilinx Runtime (XRT)](https://github.com/Xilinx/XRT/releases/tag/2018.2_XDF.RC5) Support for Linux kernel 3.10.0-957.1.3.el7.x86_64 & Centos 7.6 @@ -107,7 +189,7 @@ ## Release 1.4.5 (See [ERRATA](./ERRATA.md) for unsupported features) -* [Documents SDAccel Runtime compatibility](SDAccel/docs/Create_Runtime_AMI.md#runtime-ami-compatability-table) +* [Documents SDAccel Runtime compatibility](SDAccel/docs/Create_Runtime_AMI.md#runtime-ami-compatibility-table) * [Enables SDK FPGA Mgmt tool access to Non-root users](sdk/README.md#using-fpga-as-non-root-user) * Fixed issues * [HLX simulation failure](https://forums.aws.amazon.com/thread.jspa?threadID=293313&tstart=0) @@ -120,7 +202,7 @@ ## Release 1.4.3 (See [ERRATA](./ERRATA.md) for unsupported features) * [DRAM Data Retention](hdk/docs/data_retention.md) - With DRAM data retention, developers can simply load a new AFI and continue using the data that is persistently kept in the DRAM attached to the FPGA, eliminating unnecessary data movements and greatly improving the overall application performance. * [Virtual Ethernet](./sdk/apps/virtual-ethernet/README.md) - Provides a low latency network interface for EC2 F1, that enables high performance hardware acceleration to ethernet based applications on AWS like firewalls, routers and advanced security virtual appliances. With Virtual Ethernet, developers are able to create F1 accelerators that process ethernet packets directly from user-space on the FPGA with high throughput and low-latency. -* [Developer AMI v1.5](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) with Vivado/SDx 2018.2 tools - New FPGA developer AMI supporting Vivado 2018.2 for faster compile times, higher frequencies and improved timing closure +* [Developer AMI v1.5](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) with Vivado/SDx 2018.2 tools - New FPGA Developer AMI supporting Vivado 2018.2 for faster compile times, higher frequencies and improved timing closure ## Release 1.4.2 (See [ERRATA](./ERRATA.md) for unsupported features) * Fixed SDAccel XOCL driver compile fails that occur on linux kernels greater than 3.10.0-862.3.3.el7.x86_64 @@ -148,55 +230,6 @@ * Release 1.4.0 greatly improves the performance of the DMA (for interrupt driven DMA on the cl\_dram\_dma example design). This is accomplished through a combination of shell changes to relax DMA timeouts and a new XDMA software driver option. We have ported the relevant HDK examples to the XDMA driver in this release. EDMA is still supported, and developers can freely choose which DMA driver to use as part of their host application. -## Supported Tools and Environment - -* The HDK and SDK are designed for **Linux** environment and has not been tested on other platforms -* The First installation of AWS FPGA SDK requires having gcc installed on the instance. If it's not available, try `sudo yum update && sudo yum group install "Development Tools"` -* The HDK build step requires having Xilinx's Vivado tool and Vivado License Management running. These are provided with AWS FPGA Developer AMI at no additional cost -* This release is tested and validated with Xilinx 2017.4 SDx/Vivado -* Developers that choose to develop on-premises need to have Xilinx license 'EF-VIVADO-SDX-VU9P-OP' installed. For more help, please refer to the [on-premises licensing help](./hdk/docs/on_premise_licensing_help.md) -* The following simulators are supported with this HDK: -**Vivado XSIM RTL simulator -** Mentor Graphics' Questa RTL simulator (with a separate license from MentorGraphics) -** Synopsys' VCS RTL simulator (with a separate license from Synopsys) - -## License Requirements - -The HDK and SDK in the FPGA development kit have different licenses. For more details please refer to the [HDK License](./hdk/LICENSE.txt) and the [SDK License](./sdk/LICENSE.txt). - -## FAQs - -**Q: How do I know which HDK version I have on my instance/machine? ** - -Look for the ./hdk/hdk_version.txt file. - -**Q: How do I know what my Shell version is? ** - -The Shell version of an FPGA slot is available through the FPGA Image Management tools after an AFI has been loaded. See the description of `fpga-describe-local-image` for more details on retrieving the shell version from a slot. Prior to loading an AFI, the state of the FPGA (including shell version) is undefined and non-deterministic. - -**Q: How do I know what version of FPGA Image management tools are running on my instance? ** - -The FPGA Image management tools version is reported with any command executed from these tools. See the description of `fpga-describe-local-image` for more details. - -**Q: How do I update my existing design with this release?** - -1. Start by either cloning the entire GitHub structure for the HDK release or downloading new directories that have changed. AWS recommends an entire GitHub clone to ensure no files are missed -2. Update the CL design to conform to the new AWS_Shell_Interface_Specification TODO: add link. TODO: need a doc to outline what changes are a MUST in this upgrade, and which ones are optional? -3. Follow the process for AFI generation outlined in aws-fpga/hdk/cl/examples/readme.md -4. Update FPGA Image Management Tools to the version included in aws-fpga/sdk/management -TODO: SDaccel design have different steps? - -**Q: How do I get support?** - -The FPGA Development forum provides an easy access to Developer support. It's the first place to go to post questions, suggestions and receive important announcements from the AWS FPGA team. To gain access to the user forum, please go to https://forums.aws.amazon.com/index.jspa and login. To be notified of important messages you will need to click the “Watch Forum” button on the right side of the screen. - -**Q: How do I know which HDK GitHub release I am working with? ** - -See the release notes at the top of the GitHub directory to identify the version of your GitHub clone. - -TODO: The following major features are included in this HDK release: - - ## Previous release notes ## Release 1.3.X Details (See [ERRATA](./ERRATA.md) for unsupported features) @@ -224,7 +257,7 @@ The following major features are included in this HDK release: * Restrictions on URAM have been updated to enable 100% of the URAM with a CL to be utilized. See documentation on enabling URAM utilization: [URAM_options](./hdk/docs/URAM_Options.md) ### 5. Vivado IP Integrator (IPI) and GUI Workflow -* Vivado graphical design canvas and project-based flow is now supported. This flow allows developers to create CL logic as either RTL or complex subsystems based on an IP centric block diagram. Prior experience in RTL or system block designs is recommended. The [IP Integrator and GUI Vivado workflow](hdk/docs/IPI_GUI_Vivado_Setup.md) enables a unified graphical environment to guide the developer through the common steps to design, implement, and verify FGPAs. To get started, start with the [README that will take you through getting started steps and documents on IPI](hdk/docs/IPI_GUI_Vivado_Setup.md) +* Vivado graphical design canvas and project-based flow is now supported. This flow allows developers to create CL logic as either RTL or complex subsystems based on an IP centric block diagram. Prior experience in RTL or system block designs is recommended. The [IP Integrator and GUI Vivado workflow](hdk/docs/IPI_GUI_Vivado_Setup.md) enables a unified graphical environment to guide the developer through the common steps to design, implement, and verify FPGAs. To get started, start with the [README that will take you through getting started steps and documents on IPI](hdk/docs/IPI_GUI_Vivado_Setup.md) ### 6. Build Flow improvments * See [Build_Scripts](./hdk/common/shell_v04261818/build/scripts) diff --git a/SDAccel/FAQ.md b/SDAccel/FAQ.md index adc3d021..3f41a2d6 100644 --- a/SDAccel/FAQ.md +++ b/SDAccel/FAQ.md @@ -1,51 +1,59 @@ # Frequently Asked Questions (FAQ) -## Q: When I run my application on F1, I see these errors: ERROR: Failed to load xclbin ERROR: No program executable for device ERROR: buffer (2) is not resident in device (0)", how to debug these errors? -A: First double check that your AFI has been generated successfully by reviewing the SDAccel README. Second, check that you are running your application on F1 using sudo. Lastly, check that your AWS CLI (configure) was configured using output format as json. +## Q: When I run my application on F1, I see these errors: ERROR: Failed to load xclbin ERROR: No program executable for device ERROR: buffer (2) is not resident in device (0)", how to debug these errors? +A: +* Check that your AFI has been generated successfully by reviewing the SDAccel README. +* Check that you are running your application on F1 as super user(sudo). +* Lastly, check that your AWS CLI (configure) was configured using output format as json. ## Q: During AFI generation (create_sdaccel_afi.sh), how do I resolve this error: "An error occurred (AuthFailure) when calling the CreateFpgaImage operation: AWS was not able to validate the provided access credentials"? -A: For an AFI generation to complete all errors must be resolved. This error ("An error occurred (AuthFailure) when calling the CreateFpgaImage operation: AWS was not able to validate the provided access credentials") message means your AWS credentials were not setup properly or your IAM does not have access to the API (CreateFpgaImage). Here is some additional info on how to setup IAM privileges. -http://docs.aws.amazon.com/AWSEC2/latest/APIReference/ec2-api-permissions.html +A: + +This error message means your AWS credentials or IAM role were not setup correctly to have access to the API (CreateFpgaImage). +AWS Accounts require IAM permissions to access API functions. To test your IAM permissions use [DescribeFpgaImage API](https://github.com/aws/aws-fpga/blob/master/hdk/docs/describe_fpga_images.md) + +To setup IAM privileges please check the [EC2 API Permissions documentation](http://docs.aws.amazon.com/AWSEC2/latest/APIReference/ec2-api-permissions.html) -AWS Accounts require IAM permisions to access API functions. To test your IAM permissions use DescribeFpgaImage API: -https://github.com/aws/aws-fpga/blob/master/hdk/docs/describe_fpga_images.md ## Q: During AFI generation (create_sdaccel_afi.sh), my AFI failed to generate and I see this error message in the log: "Provided clocks configuration is illegal. See AWS FPGA HDK documentation for supported clocks configuration. Frequency 0 is lower than minimal supported frequency of 80", how do I debug this message? -A: Please confirm that you successfully compiled your kernel for HW. For the quick start examples, you will need to have completed the quick start and successfully passed this command: make TARGETS=hw DEVICES=$AWS_PLATFORM all +A: +* Please confirm that you successfully compiled your kernel for HW. +* For the quick start examples, you will need to have completed the quick start and successfully passed this command: `make TARGETS=hw DEVICES=$AWS_PLATFORM all` -## Q: What is a xclbin or binary container on SDAccel? +## Q: What is a xclbin or binary container on SDAccel? What is an awsxclbin? A: The [xclbin](https://www.xilinx.com/html_docs/xilinx2017_2/sdaccel_doc/topics/design-flows/concept-create-compute-unit-binary.html) file or the "Binary Container" is a binary library of kernel compute units that will be loaded together into an OpenCL context for a specific device. -AWS uses a modified version of the xclbin called awsxclbin. The awsxclbin contains the xclbin metadata and AFI ID. +AWS uses a modified version of the xclbin called awsxclbin. The awsxclbin contains the xclbin metadata and AFI ID. ## Q: What can we investigate when xocc fails with a path not meeting timing? A: An example is WARNING: [XOCC 60-732] Link warning: One or more timing paths failed timing targeting MHz for . The frequency is being automatically changed to MHz to enable proper functionality. 1. Generally speaking, lowering the clock will make the design functionally operational in terms of operations (since there will not be timing failures) but the design might not operate at the performance needed due this clock frequency change. We can review what can be done. -1. If CLOCK_NAME is `kernel clock 'DATA_CLK'` then this is the clock that drives the kernels. Try reducing the kernel clock frequency see --kernel_frequency option to xocc in [latest SDAccel Environment User Guide] +1. If CLOCK_NAME is `kernel clock 'DATA_CLK'` then this is the clock that drives the kernels. Try reducing the kernel clock frequency see --kernel_frequency option to xocc in the [latest SDAccel Environment User Guide](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_1/ug1023-sdaccel-user-guide.pdf). 1. If CLOCK_NAME is `system clock 'clk_main_a0'` then this is the clock clk_main_a0 which drives the AXI interconnect between the AWS Shell and the rest of the platform (SDAccel peripherals and user kernels). Using --kernel_frequency as above does not have any direct effect but might have side effect in changing the topology/placement of the design and improve this issue. 1. If OCL/C/C++ kernels were also used, investigate VHLS reports / correlate with kernel source code to see if there are functions with large number of statements in basic block, examples: might have unrolled loops with large loop-count, might have a 100++ latency; the VHLS runs and log files are located in the directory named `_xocc*compile*` 1. Try `xocc -O3` to run bitstream creation process with higher efforts. -1. Open a Vivado implementation project using ```vivado `find -name ipiimpl.xpr` ``` to analyze the design; needs Vivado knowledge; see [UltraFast Design Methodology Guide for the Vivado][latest UG949] +1. Open a Vivado implementation project using ```vivado `find -name ipiimpl.xpr` ``` to analyze the design; needs Vivado knowledge; see [UltraFast Design Methodology Guide for the Vivado](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_1/ug949-vivado-design-methodology.pdf) ## Q: xocc issues message WARNING: [XOCC 204-69] Unable to schedule ...due to limited memory ports. A: This may lower the performance of the implementation. -Details on this are provided in [Debug HLS Performance: Limited memory ports] +Details on this are provided in [the SDAccel HLS Debug document](docs/SDAccel_HLS_Debug.md) ## Q: xocc fails due to routing/resource overflow -A: Examine utilization reports. If OCL/C/C++ kernels were also used, look into the source code for excessive unroll happening. +A: Examine utilization reports. If OCL/C/C++ kernels were also used, look into the source code for excessive unroll happening. ## Q: How do I open the design as a Vivado project (.xpr)? A: There are 2 Vivado project files: 1. CL Design - from command line: ```vivado `find -name ipiprj.xpr\` ``` to see the connectivity of the created design -1. Implementation project - from command line: ```vivado `find -name ipiimpl.xpr\` ``` to analyze the design in the place and routing design phases. For an additional Vivado Design reference, see [UltraFast Design Methodology Guide for the Vivado][latest UG949] +1. Implementation project - from command line: ```vivado `find -name ipiimpl.xpr\` ``` to analyze the design in the place and routing design phases. + 1. For an additional Vivado Design reference, see the [UltraFast Design Methodology Guide for the Vivado](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_1/ug949-vivado-design-methodology.pdf) ## Q: What should I do if FPGA instance execution gets the wrong results or gets stuck? A: 1. Verify hw_emu works as expected -1. See "Chapter 4 - Debugging Applications in the SDAccel Environment" in [latest SDAccel Environment User Guide] +1. See the "Debugging Applications in the SDAccel Environment" chapter in the [latest SDAccel Environment User Guide](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_1/ug1023-sdaccel-user-guide.pdf). ## Q: Bitstream creation fails to create design less that 60 MHz? A: SDAccel flow does not allow clocks running less than 60 MHz kernel clock, therefore, you will need to debug further using [HLS Debug suggestions](./docs/SDAccel_HLS_Debug.md) @@ -63,19 +71,21 @@ A: Please make sure you executed the following commands before launching the SDx ## Q: How do I debug error: `No current synthesis run set`? A: You may have run the previous [HDK IPI examples](../hdk/docs/IPI_GUI_Vivado_Setup.md) and created a `Vivado_init.tcl` file in `~/.Xilinx/Vivado`. It is recommended to remove it before switching from hardware development flow to SDAccel. -# Additional Resources +## Q: I am getting an error: `symbol lookup error: /opt/xilinx/xrt/lib/libxrt_aws.so: undefined symbol: uuid_parse` What should I do? +A: This error occured because the XRT RPM was built without linking in a library needed for the uuid symbols. + To fix it, use the latest XRT RPM's documented in the [XRT installation document](docs/XRT_installation_instructions.md) -The [AWS SDAccel README]. +## Q: What is the lowest frequency SDAccel design supported on the AWS F1 Platform? +A: We support creating AFI's from CL's that have been built to work at Frequencies no lower than 80MHz. + Re-clocking/Loading a dynamic clock frequency lower than 80MHz will also result in an error. -Xilinx web portal for [Xilinx SDAccel documentation] and for [Xilinx SDAccel GitHub repository] +# Additional Resources -Links pointing to **latest** version of the user guides - * [UG1023: SDAccel Environment User Guide][latest SDAccel Environment User Guide] - * [UG1021: SDAccel Environment Tutorial: Getting Started Guide (including emulation/build/running on H/W flow)][latest UG1021] - * [UG1207: SDAccel Environment Optimization Guide][latest SDAccel Environment Optimization Guide] - * [UG949: UltraFast Design Methodology Guide for the Vivado Design Suite][latest UG949] +* The [AWS SDAccel README](README.md). +* Xilinx web portal for [Xilinx SDAccel documentation](https://www.xilinx.com/products/design-tools/software-zone/sdaccel.html?resultsTablePreSelect=xlnxdocumenttypes:SeeAll#documentation) +* [Xilinx SDAccel GitHub repository](https://github.com/Xilinx/SDAccel_Examples) -Links pointing to **2017.4** version of the user guides +* Links pointing to **2017.4** version of the user guides * [UG1023: SDAccel Environment User Guide][UG1023 2017.4] * [UG1021: SDAccel Environment Tutorial: Getting Started Guide (including emulation/build/running on H/W flow)][UG1021 2017.4] * [UG1207: SDAccel Environment Optimization Guide][UG1207 2017.4] diff --git a/SDAccel/Makefile b/SDAccel/Makefile index 436c6882..284cbe04 100644 --- a/SDAccel/Makefile +++ b/SDAccel/Makefile @@ -40,9 +40,9 @@ $(info OS is $(OS)) MODULE := ifeq ($(RELEASE_VER),2017.4) DSA := $(DSA) - SRC_DIR = src2 + SRC_DIR = src XRT_HAL_LIB = libxrt-aws.so - EXE = awssak2 + EXE = awssak MODULE = xocl ifeq ($(OS),Ubuntu) GLIBCPP_PATH = lib/lnx64.o/Ubuntu diff --git a/SDAccel/README.md b/SDAccel/README.md index 2bfdfa23..da59da6e 100644 --- a/SDAccel/README.md +++ b/SDAccel/README.md @@ -43,7 +43,7 @@ It is highly recommended you read the documentation and utilize software and har * Launch an instance using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) which comes pre-installed with SDAccel and required licenses. * You may use this F1 instance to [build your host application and Xilinx FPGA binary](#createapp), however, it is more cost efficient to either: * Launch the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) on a compute EC2 instance, with a minimum of 30GiB RAM), **OR** - * Follow the [On-Premises Instructions](../hdk/docs/on_premise_licensing_help.md) to purchase and install a license from Xilinx. + * Follow the [On-Premises Instructions](../docs/on_premise_licensing_help.md) to purchase and install a license from Xilinx. * Setup AWS IAM permissions for creating FPGA Images (CreateFpgaImage and DescribeFpgaImages). [EC2 API Permissions are described in more detail](http://docs.aws.amazon.com/AWSEC2/latest/APIReference/ec2-api-permissions.html). It is highly recommended that you validate your AWS IAM permissions prior to proceeding with this quick start. By calling the [DescribeFpgaImages API](../hdk/docs/describe_fpga_images.md) you can check that your IAM permissions are correct. * [Setup AWS CLI and S3 Bucket](docs/Setup_AWS_CLI_and_S3_Bucket.md) to enable AFI creation. * Install optional [packages](packages.txt) required to run all examples. If you do not install these packages, some examples may not work properly. The setup scripts will warn you of any missing packages. @@ -63,9 +63,7 @@ It is highly recommended you read the documentation and utilize software and har $ cd $AWS_FPGA_REPO_DIR $ source sdaccel_setup.sh ``` - * This section describes the valid platforms for shell_v04261818 - * Xilinx Tool 2017.4 Platform: - * AWS_PLATFORM_DYNAMIC_5_0 - (Default) AWS F1 platform dynamically optimized for multi DDR use cases. + * Valid platforms for shell_v04261818: `AWS_PLATFORM_DYNAMIC_5_0` (Default) AWS F1 platform dynamically optimized for multi DDR use cases. * Changing to a different platform can be accomplished by setting AWS_PLATFORM environment variable. Only one platform is supported for this example:   ``` @@ -91,7 +89,7 @@ For CPU-based (SW) emulation, both the host code and the FPGA binary code are co The instructions below describe how to run the SDAccel SW Emulation flow using the Makefile provided with a simple "hello world" example ``` - $ cd $SDACCEL_DIR/examples/xilinx/getting_started/host/helloworld_ocl/ + $ cd $SDACCEL_DIR/examples/xilinx/getting_started/hello_world/helloworld_ocl/ $ make clean $ make check TARGETS=sw_emu DEVICES=$AWS_PLATFORM all ``` @@ -106,7 +104,7 @@ The SDAccel hardware emulation flow enables the developer to check the correctne The instructions below describe how to run the HW Emulation flow using the Makefile provided with a simple "hello world" example: ``` - $ cd $SDACCEL_DIR/examples/xilinx/getting_started/host/helloworld_ocl/ + $ cd $SDACCEL_DIR/examples/xilinx/getting_started/hello_world/helloworld_ocl/ $ make clean $ make check TARGETS=hw_emu DEVICES=$AWS_PLATFORM all ``` @@ -120,7 +118,7 @@ The SDAccel system build flow enables the developer to build their host applicat The instructions below describe how to build the Xilinx FPGA Binary and host application using the Makefile provided with a simple "hello world" example: ``` - $ cd $SDACCEL_DIR/examples/xilinx/getting_started/host/helloworld_ocl/ + $ cd $SDACCEL_DIR/examples/xilinx/getting_started/hello_world/helloworld_ocl/ $ make clean $ make TARGETS=hw DEVICES=$AWS_PLATFORM all ``` @@ -139,7 +137,7 @@ This assumes you have: The [create_sdaccel_afi.sh](./tools/create_sdaccel_afi.sh) script is provided to facilitate AFI creation from a Xilinx FPGA Binary, it: * Takes in your Xilinx FPGA Binary \*.xclbin file -* Calls *aws ec2 create_fgpa_image* to generate an AFI under the hood +* Calls *aws ec2 create_fpga_image* to generate an AFI under the hood * Generates a \_afi_id.txt which contains the identifiers for your AFI * Creates an AWS FPGA Binary file with an \*.awsxclbin extension that is composed of: Metadata and AGFI-ID. * **This \*.awsxclbin is the AWS FPGA Binary file that will need to be loaded by your host application to the FPGA** @@ -193,15 +191,14 @@ For help with AFI creation issues, see [create-fpga-image error codes](../hdk/do # 3. Run the FPGA accelerated application on Amazon FPGA instances -Here are the steps: -* Start an FPGA instance using [FPGA Developer AMI on AWS Marketplace](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) and check the AMI [compatiability table](../README.md#devAmi) and [runtime compatilibility table](docs/Create_Runtime_AMI.md#runtime-ami-compatability-table). Alternatively, you can [create your own Runtime AMI](docs/Create_Runtime_AMI.md) for running your SDAccel applications on Amazon FPGA instances. +* Start an FPGA instance using [FPGA Developer AMI on AWS Marketplace](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) and check the AMI [compatibility table](../README.md#fpga-developer-ami) and [runtime compatibility table](docs/Create_Runtime_AMI.md#runtime-ami-compatibility-table). Alternatively, you can [create your own Runtime AMI](docs/Create_Runtime_AMI.md) for running your SDAccel applications on Amazon FPGA instances. * *Assuming the developer flow (compilation) was done on a separate instance you will need to:* * Copy the compiled host executable (exe) to the new instance * Copy the \*.awsxclbin AWS FPGA binary file to the new instance - * Depending on the host code, the \*.awsxclbin may need to named .hw..awsxclbin . Ex: ```vector_addition.hw.xilinx_aws-vu9p-f1-04261818_dynamic_5_0.awsxclbin``` + * Depending on the host code, the \*.awsxclbin may need to named \.hw.\.awsxclbin .For Example: ```vector_addition.hw.xilinx_aws-vu9p-f1-04261818_dynamic_5_0.awsxclbin``` * Copy any data files required for execution to the new instance * [Clone the github repository to the new F1 instance and install runtime drivers](#gitsetenv) - * Clone the github repository to the new F1 instance and install runtime drivers + ``` $ git clone https://github.com/aws/aws-fpga.git $AWS_FPGA_REPO_DIR $ cd $AWS_FPGA_REPO_DIR @@ -212,7 +209,7 @@ Here are the steps: * Source the Runtime Environment & Execute your Host Application: ``` - $ sudo sh + $ sudo -E /bin/bash # source $AWS_FPGA_REPO_DIR/sdaccel_runtime_setup.sh # Other runtime env settings needed by the host app should be setup after this step # ./helloworld ``` diff --git a/SDAccel/docs/Create_Runtime_AMI.md b/SDAccel/docs/Create_Runtime_AMI.md index 0adf3f95..eadaf4ec 100644 --- a/SDAccel/docs/Create_Runtime_AMI.md +++ b/SDAccel/docs/Create_Runtime_AMI.md @@ -1,19 +1,20 @@ # Create a Runtime AMI Starting with an Amazon Linux AMI or Ubuntu -## Runtime AMI Compatability Table +## Runtime AMI Compatibility Table | SDx Version used for AFI Development | Compatible SDAccel Runtime | |--------------------------------------|-----------------------------| | 2017.4 | Runtime installed by sourcing "sdaccel_setup.sh" while using HDK Ver 1.4.X when environment variable RELEASE_VER=2017.4 | - | 2018.2 | AWS FPGA Developer AMI 1.5.0 ( XRT is pre-installed) or [Runtime installed with XRT Version 2.1.0](https://www.xilinx.com/html_docs/xilinx2018_2_xdf/sdaccel_doc/ejy1538090924727.html) | - | 2018.3 | AWS FPGA Developer AMI 1.6.0 ( XRT is pre-installed) or [Runtime installed with XRT Version 2.1.0](https://xilinx.github.io/XRT/2018.3/html/build.html) | + | 2018.2 | AWS FPGA Developer AMI 1.5.0 (XRT is pre-installed) or [Runtime installed with XRT Version 2.1.0](https://www.xilinx.com/html_docs/xilinx2018_2_xdf/sdaccel_doc/ejy1538090924727.html) | + | 2018.3 | AWS FPGA Developer AMI 1.6.0 (XRT is pre-installed) or [Runtime installed with XRT Version 2.1.0](https://xilinx.github.io/XRT/2018.3/html/build.html) | + | 2019.1 | AWS FPGA Developer AMI 1.7.0 (XRT is pre-installed) or [Runtime installed with XRT Version 2.1.0](https://xilinx.github.io/XRT/2019.1/html/build.html) | ## 1. Launch a Runtime Instance & Install Required Packages * Please note Amazon Linux 2 or Amazon Linux are not supported by Xilinx XRT at this time. Please use Centos/RHEL or Ubuntu when using Xilinx XRT Runtimes for the AFIs generated using Xilinx SDx 2018.2 and 2018.3 toolsets. -* Launch an F1 instance using an [Amazon Linux AMI](https://aws.amazon.com/marketplace/pp/B00635Y2IW) or [Centos 7](https://aws.amazon.com/marketplace/pp/B00O7WM7QW) -* Install the required updates +* Launch an F1 instance using [Centos 7](https://aws.amazon.com/marketplace/pp/B00O7WM7QW) or Amazon Linux AMI's +* Update to get the latest packages. ```` $ sudo yum update @@ -54,11 +55,11 @@ * Using an instance running [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) or an on-premises machine with access to a Xilinx SDAccel Tools Installation, first source $AWS_FPGA_REPO_DIR/sdaccel_setup.sh and then run following commands: -* if using Ubuntu or debian distribution set GLIBPATH env variable to Ubuntu. If using any other OS distribution set GLIBPATH to default. +* If using Ubuntu or Debian distributions set GLIBPATH env variable to Ubuntu. If using any other OS distribution set GLIBPATH to default. -* set env variable 'XLNXRTE' to intended runtime install directory path. +* Set env variable 'XLNXRTE' to intended runtime install directory path. -### **For Vivado SDX 2017.4** +### **Xilinx SDX 2017.4: ** ```` $ export GLIBPATH= @@ -67,8 +68,8 @@ $ mkdir -p $XLNXRTE/lib/lnx64.o $ mkdir -p $XLNXRTE/runtime/bin $ mkdir -p $XLNXRTE/runtime/lib/x86_64 - $ cp $SDACCEL_DIR/userspace/src2/libxrt-aws.so $XLNXRTE/runtime/platforms/xilinx_aws-vu9p-f1-04261818_dynamic_5_0/driver/ - $ cp $SDACCEL_DIR/tools/awssak2/xbsak $XLNXRTE/runtime/bin/ + $ cp $SDACCEL_DIR/userspace/src/libxrt-aws.so $XLNXRTE/runtime/platforms/xilinx_aws-vu9p-f1-04261818_dynamic_5_0/driver/ + $ cp $SDACCEL_DIR/tools/awssak/xbsak $XLNXRTE/runtime/bin/ $ cp $XIILNX_SDX/lib/lnx64.o/$GLIBPATH/libstdc++.so* xlnxrte/lib/x86_64/ $ cp $XIILNX_SDX/runtime/bin/xclbinsplit xlnxrte/runtime/bin/ $ cp $XIILNX_SDX/runtime/bin/xclbincat xlnxrte/runtime/bin/ @@ -79,14 +80,12 @@ * You may need to update path in $XLNXRTE/setup.sh and $XLNXRTE/setup.csh script to match your runtime instance. * Copy $XLNXRTE directory created to $HOME on your Runtime Instance. -### **For Vivado SDX 2018.2** - - Please refer [installing Xilinx SDx 2018.2 XRT](https://www.xilinx.com/html_docs/xilinx2018_2_xdf/sdaccel_doc/ejy1538090924727.html) for instructions on how to install XRT on your AMI. +### **Xilinx SDx 2018.2:** [Install 2018.2 XRT](https://www.xilinx.com/html_docs/xilinx2018_2_xdf/sdaccel_doc/ejy1538090924727.html). -### **For Vivado SDX 2018.3** - - Please refer [installing Xilinx SDx 2018.3 XRT](https://xilinx.github.io/XRT/2018.3/html/build.html) for instructions on how to install runtime on your AMI. +### **Xilinx SDx 2018.3:** [Install 2018.3 XRT](https://xilinx.github.io/XRT/2018.3/html/build.html). +### **Xilinx SDx 2019.1:** [Install 2019.1 XRT](https://xilinx.github.io/XRT/2019.1/html/build.html). + ## 3. Install Runtime Drivers and run your FPGA accelerated application on your Runtime Instance. * Log back on to the Runtime Instance: diff --git a/SDAccel/docs/README_GUI.md b/SDAccel/docs/README_GUI.md index 0818eaac..4d3f5485 100644 --- a/SDAccel/docs/README_GUI.md +++ b/SDAccel/docs/README_GUI.md @@ -7,7 +7,7 @@ The guide explains how to: 1. Verify the application 1. Build the application to execute on FPGA hardware -**Note**: It is highly recommended to review the [SDAccel Guide][SDAccel_Guide] to fully understand the SDAccel flow before using the GUI. +**Note**: It is highly recommended to review the [AWS F1 SDAccel Guide](SDAccel_Guide_AWS_F1.md) to fully understand the SDAccel flow before using the GUI. ## Cloning the aws-fpga Git repository The AWS Github repository contains the example used in this tutorial. @@ -29,13 +29,9 @@ The SDAccel examples from the github are downloaded by the above steps. However, First change directory to **helloworld_ocl** example. ``` - $ cd /SDAccel/examples/xilinx_2017.4/getting_started/host/helloworld_ocl -``` -The github examples use common header files and those needs to be copied in the local project source folder to make it easier to use. -Type the command **make local-files** to copy all necessary files in the local directory. -``` - $ make local-files + $ cd /SDAccel/examples/xilinx/getting_started/hello_world/helloworld_ocl ``` + The SDAccel GUI is invoked with the **sdx** command. ``` @@ -46,9 +42,9 @@ To debug using gdb inside from SDX gui few additional commands are needed to exe ``` $ mv /usr/local/Modules/init init.bak - $ unset –f switchml - $ unset –f _moduleraw - $ unset –f module + $ unset -f switchml + $ unset -f _moduleraw + $ unset -f module $ sdx ``` @@ -62,7 +58,7 @@ We will now cover the following steps: Add workspace inside the current directory named "GUI_test" as shown below. A new directory **GUI_test** will be created and used to store all logfiles of our runs. -![](./figure/gui_fig_1.JPG) +![](./figure/gui_fig_1.png)
@@ -71,7 +67,7 @@ You will get a Welcome screen. You need to set Platform path by selecting **Add
-![](./figure/gui_fig_2.JPG) +![](./figure/gui_fig_2.png) @@ -79,7 +75,7 @@ Click on the **plus** sign as shown below. -![](./figure/gui_fig_3.JPG) +![](./figure/gui_fig_3.png) diff --git a/SDAccel/docs/SDAccel_Guide_AWS_F1.md b/SDAccel/docs/SDAccel_Guide_AWS_F1.md index 98cffd63..66bf0a73 100644 --- a/SDAccel/docs/SDAccel_Guide_AWS_F1.md +++ b/SDAccel/docs/SDAccel_Guide_AWS_F1.md @@ -164,42 +164,19 @@ Conversely, code which is simply a few lines of basic operations, and has no tas # Additional Resources -The [AWS SDAccel README]. - -Xilinx web portal for [Xilinx SDAccel documentation] and for [Xilinx SDAccel GitHub repository] - - -Links pointing to **2017.4** version of the user guides -1. [UG1023: SDAccel Environment User Guide][UG1023 2017.4] -1. [UG1021: SDAccel Environment Tutorial: Getting Started Guide (including emulation/build/running on H/W flow)][UG1021 2017.4] -1. [UG1207: SDAccel Environment Optimization Guide][UG1207 2017.4] -1. [UG949: UltraFast Design Methodology Guide for the Vivado Design Suite][UG949 2017.4] -1. [UG1238: SDx Development Environment Release Notes, Installation, and Licensing Guide][UG1238 2017.4] - - - -[SDAccel_landing_page]: https://www.xilinx.com/products/design-tools/software-zone/sdaccel.html -[VHLS_landing_page]: https://www.xilinx.com/products/design-tools/vivado/integration/esl-design.html -[Vivado_landing_page]: https://www.xilinx.com/products/design-tools/vivado.html - -[SDAccel Environment User Guide]: https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug1023-sdaccel-user-guide.pdf -[UG1021]: https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug1021-sdaccel-intro-tutorial.pdf -[SDAccel Environment Optimization Guide]: https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug1207-sdaccel-optimization-guide.pdf -[UG949]: https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug949-vivado-design-methodology.pdf -[UG902]: https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug902-vivado-high-level-synthesis.pdf - -[UG1023 2017.4]: https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug1023-sdaccel-user-guide.pdf -[UG1021 2017.4]: https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug1021-sdaccel-intro-tutorial.pdf -[UG1207 2017.4]: https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug1207-sdaccel-optimization-guide.pdf -[UG1238 2017.4]:http://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug1238-sdx-rnil.pdf -[Xilinx SDAccel documentation]: https://www.xilinx.com/products/design-tools/software-zone/sdaccel.html#documentation -[Xilinx SDAccel GitHub repository]: https://github.com/Xilinx/SDAccel_Examples -[UG949 2017.4]: https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug949-vivado-design-methodology.pdf - -[AWS SDAccel Readme]: ../README.md -[OnPremiseDev]: ./On_Premises_Development_Steps.md -[Power_Analysis]: ./SDAccel_Power_Analysis.md -[GUI_README]: ./README_GUI.md -[FAQ]:../FAQ.md +* The [AWS SDAccel README](../README.md). +* Xilinx web portal for [Xilinx SDAccel documentation](https://www.xilinx.com/products/design-tools/software-zone/sdaccel.html?resultsTablePreSelect=xlnxdocumenttypes:SeeAll#documentation) +* [Xilinx SDAccel GitHub repository](https://github.com/Xilinx/SDAccel_Examples) +* [Xilinx SDAccel landing page](https://www.xilinx.com/products/design-tools/software-zone/sdaccel.html) +* [Vivado HLS landing page](https://www.xilinx.com/products/design-tools/vivado/integration/esl-design.html) +* [Vivado landing page](https://www.xilinx.com/products/design-tools/vivado.html) +* [SDAccel Environment User Guide](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug1023-sdaccel-user-guide.pdf) +* [SDAccel Intro Tutorial](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug1021-sdaccel-intro-tutorial.pdf) +* [SDAccel Environment Optimization Guide](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug1207-sdaccel-optimization-guide.pdf) +* [UltraFast Design Methodology Guide](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug949-vivado-design-methodology.pdf) +* [Vivado High Level Synthesis User Guide](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_4/ug902-vivado-high-level-synthesis.pdf) +* [On Premise Development steps](On_Premises_Development_Steps.md) +* [SDAccel Power Analysis](SDAccel_Power_Analysis.md) +* [FAQ](../FAQ.md) diff --git a/SDAccel/docs/SDAccel_HLS_Debug.md b/SDAccel/docs/SDAccel_HLS_Debug.md index dffbba14..6b0d4901 100755 --- a/SDAccel/docs/SDAccel_HLS_Debug.md +++ b/SDAccel/docs/SDAccel_HLS_Debug.md @@ -1,4 +1,4 @@ -# Debug HLS Performance: Limited memory ports. +# Debug HLS Performance: Limited memory ports In an ideal FPGA implementation, the kernel will process 1 data sample per clock cycle. In the High-Level Synthesis (HLS) technology used in SDAccel, this is referred to an II=1 implementation, where II is the Initiation Interval of design, or the number of clock cycles before the design can read new data inputs. diff --git a/SDAccel/docs/SDAccel_Migrate_dynamic_DSA.md b/SDAccel/docs/SDAccel_Migrate_dynamic_DSA.md index 7dac1ee4..b48b9afe 100644 --- a/SDAccel/docs/SDAccel_Migrate_dynamic_DSA.md +++ b/SDAccel/docs/SDAccel_Migrate_dynamic_DSA.md @@ -47,7 +47,7 @@ set_property sdx_kernel_type rtl [ipx::current_core] * Profiling hardware no longer pre-built in the platform. Instead, it is added compile time to the design. * This requires an update to the xocc command options. * (2017.4) Add the -profile_kernel option the xocc command to enable profile instrumentation when compiling the kernel; set profile=true in the sdaccel.ini file to collect profile data when running the application. - + * (2019.1) Add the -profile_kernel option the xocc command to enable profile instrumentation when compiling the kernel; set profile=true in the xrt.ini file to collect profile data when running the application. ## Additional resources * [SDAccel Development Enviroment - Changes for 2017.4](https://www.xilinx.com/html_docs/xilinx2017_4/sdaccel_doc/jdl1512623841682.html) * [SDAccel Development Enviroment - Whats new for 2017.4](https://www.xilinx.com/html_docs/xilinx2017_4/sdaccel_doc/rke1512623904797.html) diff --git a/SDAccel/docs/Setup_AWS_CLI_and_S3_Bucket.md b/SDAccel/docs/Setup_AWS_CLI_and_S3_Bucket.md index 2b1bdeb4..288e6527 100644 --- a/SDAccel/docs/Setup_AWS_CLI_and_S3_Bucket.md +++ b/SDAccel/docs/Setup_AWS_CLI_and_S3_Bucket.md @@ -1,7 +1,7 @@ ## Setup CLI and Create S3 Bucket -The developer is required to create a S3 bucket for the AFI generation. The bucket will contain a tar file and logs which are generated from the AFI creation service. +The developer is required to create an S3 bucket for the AFI generation. The bucket will contain a tar file and logs which are generated from the AFI creation service. -To install the AWS CLI, please follow the instructions here: (http://docs.aws.amazon.com/cli/latest/userguide/installing.html). +To install the AWS CLI, please follow the [instructions here](http://docs.aws.amazon.com/cli/latest/userguide/installing.html). The AWS SDAccel scripts require JSON output format and the scripts will not work properly if you use any other output format types (ex: text, table). JSON is the default output format of the AWS CLI. diff --git a/SDAccel/docs/XRT_installation_instructions.md b/SDAccel/docs/XRT_installation_instructions.md index c0125c13..a6503c98 100644 --- a/SDAccel/docs/XRT_installation_instructions.md +++ b/SDAccel/docs/XRT_installation_instructions.md @@ -1,122 +1,61 @@ -# XRT Installation Instructions - -# Installing Xilinx Runtime (XRT) 2018.3 RC3 Patch 1 - - * Applicable SDx Tool Version: 2018.3 - - * XRT Release Tag: 2018.3.3.1 (SHA: 48cafdc100b29843fd013d371ffba0141db06b7a) - - * [Xilinx Runtime (XRT) 2018.3 RC3 Patch 1 release](https://github.com/Xilinx/XRT/releases/tag/2018.3.3.1) - - ### Instructions to build & install XRT - - Pre-requisite commands used to build XRT for AWS F1 platform for this release - - ``` - git clone http://www.github.com/aws/aws-fpga.git - cd aws-fpga - source sdaccel_setup.sh - mkdir $SDACCEL_DIR/Runtime - cd $SDACCEL_DIR/Runtime - export XRT_PATH="${SDACCEL_DIR}/Runtime/XRT_20183rc3p1 " - git clone http://www.github.com/Xilinx/XRT.git -b 2018.3.3.1 ${XRT_PATH} - cd ${XRT_PATH} - sudo ./src/runtime_src/tools/scripts/xrtdeps.sh - cd build - - ``` - - Follow [Xilinx's instructions to build & install XRT on Centos/Redhat & Ubuntu/Debian](https://xilinx.github.io/XRT/master/html/build.html#xrt-for-pcie-platforms) to build XRT for supported OS. - - ### Install on Centos/RedHat Linux using prebuilt RPM - - ``` - curl -s https://s3.amazonaws.com/aws-fpga-developer-ami/1.6.0/Patches/XRT_2018_3_RC3_Patch1/xrt_201803.2.1.0_7.5.1804-xrt.rpm -o xrt_201803.2.1.0_7.5.1804-xrt.rpm - curl -s https://s3.amazonaws.com/aws-fpga-developer-ami/1.6.0/Patches/XRT_2018_3_RC3_Patch1/xrt_201803.2.1.0_7.5.1804-aws.rpm -o xrt_201803.2.1.0_7.5.1804-aws.rpm - sudo yum remove -y xrt-aws - sudo yum remove -y xrt - sudo yum install -y xrt_201803.2.1.0_7.5.1804-xrt.rpm - sudo yum install -y xrt_201803.2.1.0_7.5.1804-aws.rpm - - ``` - -# Installing Xilinx Runtime (XRT) 2018.2_XDF.RC4 - - * Applicable SDx Tool Version: 2018.2 - - * XRT Release Tag: 2018.2_XDF.RC4 (SHA: 343186f76f59edd01bc48d84cf67fe22a0a3f338) - - * [Xilinx Runtime (XRT) 2018.2_XDF.RC4 release](https://github.com/Xilinx/XRT/tree/2018.2_XDF.RC4) - - ### Instructions to build & install XRT - - Pre-requisite commands used to build XRT for AWS F1 platform for this release - - ``` - git clone http://www.github.com/aws/aws-fpga.git - cd aws-fpga - source sdaccel_setup.sh - mkdir $SDACCEL_DIR/Runtime - cd $SDACCEL_DIR/Runtime - export XRT_PATH="${SDACCEL_DIR}/Runtime/XRT_20182rc4" - git clone http://www.github.com/Xilinx/XRT.git -b 2018.2_XDF.RC4 ${XRT_PATH} - cd ${XRT_PATH} - sudo ./src/runtime_src/tools/scripts/xrtdeps.sh - cd build - - ``` - Follow [ Xilinx's instructions to build & install XRT on Centos/RedHat & Ubuntu/Debian](https://www.xilinx.com/html_docs/xilinx2018_2_xdf/sdaccel_doc/ejy1538090924727.html) to build XRT for supported OS. - - ### Install on Centos/RedHat Linux using prebuilt RPMs - - Run following commands to download and install XRT 2018.2_XDF.RC4 for 'Centos/RHEL' - - ``` - curl -s https://s3.amazonaws.com/aws-fpga-developer-ami/1.5.0/Patches/xrt_201802.2.1.0_7.5.1804-xrt.rpm -o xrt_201802.2.1.0_7.5.1804-xrt.rpm - curl -s https://s3.amazonaws.com/aws-fpga-developer-ami/1.5.0/Patches/xrt_201802.2.1.0_7.5.1804-aws.rpm -o xrt_201802.2.1.0_7.5.1804-aws.rpm - sudo yum remove -y xrt - sudo yum install -y xrt_201802.2.1.0_7.5.1804-xrt.rpm - sudo yum install -y xrt_201802.2.1.0_7.5.1804-aws.rpm - - ``` - -# Installing Xilinx Runtime (XRT) 2018.2_XDF.RC5 - - * Applicable SDx Tool Version: 2018.2 - - * XRT Release Tag: 2018.2_XDF.RC5 (SHA: 65ffad62f427c0bd1bc65b6ea555a810295468b7) - - * [Xilinx Runtime (XRT) 2018.2_XDF.RC5 release](https://github.com/Xilinx/XRT/releases/tag/2018.2_XDF.RC5) - - ### Instructions to build & install XRT - - Pre-requisite commands used to build XRT for AWS F1 platform for this release - - ``` - git clone http://www.github.com/aws/aws-fpga.git - cd aws-fpga - source sdaccel_setup.sh - mkdir $SDACCEL_DIR/Runtime - cd $SDACCEL_DIR/Runtime - export XRT_PATH="${SDACCEL_DIR}/Runtime/XRT_20182rc5 " - git clone http://www.github.com/Xilinx/XRT.git -b 2018.2_XDF.RC5 ${XRT_PATH} - cd ${XRT_PATH} - sudo ./src/runtime_src/tools/scripts/xrtdeps.sh - cd build - - ``` - Follow [ Xilinx's instructions to build & install XRT on Centos/RedHat & Ubuntu/Debian](https://www.xilinx.com/html_docs/xilinx2018_2_xdf/sdaccel_doc/ejy1538090924727.html) to build XRT for supported OS. - - ### Install on Centos/RedHat Linux using prebuilt RPMs - - Run following commands to download and install XRT 2018.2_XDF.RC5 for 'Centos/RHEL' - - ``` - curl -s https://s3.amazonaws.com/aws-fpga-developer-ami/1.5.0/Patches/XRT_2018_2_XDF_RC5/xrt_201802.2.1.0_7.5.1804-xrt.rpm -o xrt_201802.2.1.0_7.5.1804-xrt.rpm - curl -s https://s3.amazonaws.com/aws-fpga-developer-ami/1.5.0/Patches/XRT_2018_2_XDF_RC5/xrt_201802.2.1.0_7.5.1804-aws.rpm -o xrt_201802.2.1.0_7.5.1804-aws.rpm - sudo yum remove -y xrt-aws - sudo yum remove -y xrt - sudo yum install -y xrt_201802.2.1.0_7.5.1804-xrt.rpm - sudo yum install -y xrt_201802.2.1.0_7.5.1804-aws.rpm - - ``` +# Xilinx Runtime (XRT) and SDx Tool versions + +* Xilinx Runtime versions match with the tool that you created your SDAccel AFI with. +* We provide pre-built RPM's for Centos/RHEL and instructions for building XRT +* Use the below table as reference to install and use the correct XRT version for your applications. + +| Xilinx SDx Tool Version | XRT Release Tag | SHA | `xrt` and `xrt-aws` pre-built RPM's (Centos/RHEL) | +|---|---|---|---| +|2019.1| [2019.1.0.3](https://github.com/Xilinx/XRT/tree/2019.1.0.3) | 89e25d51313daac5c322dfb4e84707829306d3fe | [xrt_201910.2.2.0_7.7.1908-xrt.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.7.0/Patches/XRT_2019_1_0_3/xrt_201910.2.2.0_7.7.1908-xrt.rpm) [xrt_201910.2.2.0_7.7.1908-aws.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.7.0/Patches/XRT_2019_1_0_3/xrt_201910.2.2.0_7.7.1908-aws.rpm) | +|2018.3| [2018.3_RC5](https://github.com/Xilinx/XRT/releases/tag/2018.3_RC5) | 8654da1f0d2bd196c9887bdcfe1479103a93e90a | [xrt_201830.2.1.0_7.6.1810-xrt.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.6.0/Patches/XRT_2018_3_RC5/xrt_201830.2.1.0_7.6.1810-xrt.rpm) [xrt_201830.2.1.0_7.6.1810-aws.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.6.0/Patches/XRT_2018_3_RC5/xrt_201830.2.1.0_7.6.1810-aws.rpm) | +|2018.2| [2018.2_XDF.RC5](https://github.com/Xilinx/XRT/releases/tag/2018.2_XDF.RC5) | 65ffad62f427c0bd1bc65b6ea555a810295468b7 | [xrt_201802.2.1.0_7.5.1804-xrt.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.5.0/Patches/XRT_2018_2_XDF_RC5/xrt_201802.2.1.0_7.5.1804-xrt.rpm) [xrt_201802.2.1.0_7.5.1804-aws.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.5.0/Patches/XRT_2018_2_XDF_RC5/xrt_201802.2.1.0_7.5.1804-aws.rpm) | +|2017.4| N/A** | N/A** | N/A** | +** Use XOCL for 2017.4 + +# Centos/RHEL build and install steps + +```bash +XRT_RELEASE_TAG=2019.1_RC2 # Substitute XRT_RELEASE_TAG= + +git clone https://github.com/aws/aws-fpga.git + +cd aws-fpga +source sdaccel_setup.sh +cd $SDACCEL_DIR/Runtime +export XRT_PATH="${SDACCEL_DIR}/Runtime/${XRT_RELEASE_TAG}" +git clone http://www.github.com/Xilinx/XRT.git -b ${XRT_RELEASE_TAG} ${XRT_PATH} + +cd ${XRT_PATH} +sudo ./src/runtime_src/tools/scripts/xrtdeps.sh + +cd build +scl enable devtoolset-6 bash +./build.sh + +cd Release +sudo yum reinstall xrt_*.rpm -y +``` + +# Centos/RHEL pre-built RPM install steps + +### 2019.1 + +```bash +curl -s https://aws-fpga-developer-ami.s3.amazonaws.com/1.7.0/Patches/XRT_2019_1_RC2/xrt_201910.2.2.0_7.6.1810-xrt.rpm -o xrt.rpm +curl -s https://aws-fpga-developer-ami.s3.amazonaws.com/1.7.0/Patches/XRT_2019_1_RC2/xrt_201910.2.2.0_7.6.1810-aws.rpm -o xrt-aws.rpm +sudo yum reinstall xrt*.rpm -y +``` +### 2018.3 + +```bash +curl -s https://aws-fpga-developer-ami.s3.amazonaws.com/1.6.0/Patches/XRT_2018_3_RC5/xrt_201830.2.1.0_7.6.1810-xrt.rpm -o xrt.rpm +curl -s https://aws-fpga-developer-ami.s3.amazonaws.com/1.6.0/Patches/XRT_2018_3_RC5/xrt_201830.2.1.0_7.6.1810-aws.rpm -o xrt-aws.rpm +sudo yum reinstall xrt*.rpm -y +``` +### 2018.2 + +```bash +curl -s https://aws-fpga-developer-ami.s3.amazonaws.com/1.5.0/Patches/XRT_2018_2_XDF_RC5/xrt_201802.2.1.0_7.5.1804-xrt.rpm -o xrt.rpm +curl -s https://aws-fpga-developer-ami.s3.amazonaws.com/1.5.0/Patches/XRT_2018_2_XDF_RC5/xrt_201802.2.1.0_7.5.1804-aws.rpm -o xrt-aws.rpm +sudo yum reinstall xrt*.rpm -y +``` diff --git a/SDAccel/docs/figure/gui_fig_1.png b/SDAccel/docs/figure/gui_fig_1.png new file mode 100644 index 00000000..b98b468a Binary files /dev/null and b/SDAccel/docs/figure/gui_fig_1.png differ diff --git a/SDAccel/docs/figure/gui_fig_2.png b/SDAccel/docs/figure/gui_fig_2.png new file mode 100644 index 00000000..884371fb Binary files /dev/null and b/SDAccel/docs/figure/gui_fig_2.png differ diff --git a/SDAccel/docs/figure/gui_fig_3.png b/SDAccel/docs/figure/gui_fig_3.png new file mode 100644 index 00000000..83d61d20 Binary files /dev/null and b/SDAccel/docs/figure/gui_fig_3.png differ diff --git a/SDAccel/docs/README_third_party.md b/SDAccel/examples/3rd_party/README.md similarity index 80% rename from SDAccel/docs/README_third_party.md rename to SDAccel/examples/3rd_party/README.md index 1e8fc873..d50cca85 100644 --- a/SDAccel/docs/README_third_party.md +++ b/SDAccel/examples/3rd_party/README.md @@ -2,7 +2,6 @@ * In the interest of providing more examples for the user, we present this guide that tells how to port third party OpenCL examples to the SDAccel flow. * In this guide, we show the changes necessary to port third party host code and kernel code for 2 different examples. * We also show some [differences between the third party OpenCL and Xilinx SDAccel implementations](#xilinx-and-third-party-implementation-differences) that the user should be aware of. -* There is a third example (matrix_mult, not discussed here) available at SDAccel/examples/3rd_party. ## The file structure of the third party examples used in this guide. * The following shows the common file structure of the third party examples used in this guide. @@ -27,16 +26,16 @@ common/src/AOCLUtils/options.cpp ## Changes to the host code. - * The changes needed for the **vector_addition** host code can be found [here](../examples/3rd_party/vector_addition) in the file named vector_addition_main.cpp.diff. - * The changes needed for the **fft1d** host code can be found [here](../examples/3rd_party/fft1d) in the file named fft1d_main.cpp.diff. - * All the modified dependency files can be found in the [SDAccel/examples/3rd_party/common](../examples/3rd_party/common) directory. + * The changes needed for the **vector_addition** host code can be found [here](vector_addition) in the file named vector_addition_main.cpp.diff. + * The changes needed for the **fft1d** host code can be found [here](fft1d) in the file named fft1d_main.cpp.diff. + * All the modified dependency files can be found in the [SDAccel/examples/3rd_party/common](common) directory. ## Changes to the kernel code. * The kernel code, found in the <example_name>/device directory, will most likely need modifications. * The **vector addition** kernel does not need changes. * The **fft1d** example needs several changes due to the differences between the third party and Xilinx implementations. -* The changes needed for the fft1d.cl file are found [here](../examples/3rd_party/fft1d) in the file named fft1d_fft1d.cl.diff. +* The changes needed for the fft1d.cl file are found [here](fft1d) in the file named fft1d_fft1d.cl.diff. * See table below regarding [implementation differences between third party and Xilinx](#xilinx-and-third-party-implementation-differences). * The <example_name>/device/twid_radix4_8.cl file will get many warnings about casting from double to float. @@ -46,18 +45,18 @@ common/src/AOCLUtils/options.cpp sed 's/\([0-9]\)\( \{0,\}[,}]\)/\1f\2/g' twid_radix4_8.cl > tmp mv tmp twid_radix4_8.cl ``` -* The script above can be found [here](../examples/3rd_party/fft1d) named cast_float_const.sh. +* The script above can be found [here](fft1d) named cast_float_const.sh. ## Changes to the Makefile. * The third party Makefile can be replaced by a version that is similar to the SDAccel example Makefiles. -* For example, for the third party **vector_addition** code, the Makefile can be found [here](../examples/3rd_party/vector_addition). -* The **fft1d** example Makefile can be found [here](../examples/3rd_party/fft1d). +* For example, for the third party **vector_addition** code, the Makefile can be found [here](vector_addition). +* The **fft1d** example Makefile can be found [here](fft1d). ## Compiling and running. * The steps to compile and run would be the same as those used for the SDAccel examples with the exception that the host program would need the -hw=<mode> switch when running in emulation mode. -* For the complete guide on compiling and running the SDAccel examples, see [this](../README.md). +* For the complete guide on compiling and running the SDAccel examples, see [this](../../README.md). * To run in software emulation mode, use the following commands. ``` @@ -80,8 +79,7 @@ make TARGETS=hw DEVICES=$AWS_PLATFORM all ./main ``` -* For more information on running this example on an F1 instance, see [this](../README.md#runonf1). - +* For more information on running this example on an F1 instance, see [this](../../README.md#runonf1). ## Xilinx and third party Implementation Differences #### Host Code @@ -105,18 +103,12 @@ make TARGETS=hw DEVICES=$AWS_PLATFORM all | declares and initializes an struct object together | declare an struct object and then initialize it separately | ## SUPPORT -For more information check here: -[SDAccel User Guides][] +For more information check the [SDAccel User Guides](http://www.xilinx.com/support/documentation-navigation/development-tools/software-development/sdaccel.html?resultsTablePreSelect=documenttype:SeeAll#documentation) -For questions and to get help on this project or your own projects, visit the [SDAccel Forums][]. +For questions and to get help on this project or your own projects, visit the [SDAccel Forums](https://forums.xilinx.com/t5/SDAccel/bd-p/SDx) ## REVISION HISTORY Date | Readme Version | Revision Description --------|----------------|------------------------- SEP2017 | 1.0 | Initial release - - - -[SDAccel Forums]: https://forums.xilinx.com/t5/SDAccel/bd-p/SDx -[SDAccel User Guides]: http://www.xilinx.com/support/documentation-navigation/development-tools/software-development/sdaccel.html?resultsTablePreSelect=documenttype:SeeAll#documentation diff --git a/SDAccel/examples/3rd_party/matrix_mult/Makefile b/SDAccel/examples/3rd_party/matrix_mult/Makefile deleted file mode 100644 index d459aff2..00000000 --- a/SDAccel/examples/3rd_party/matrix_mult/Makefile +++ /dev/null @@ -1,58 +0,0 @@ -## Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. -## -## Licensed under the Amazon Software License (the "License"). You may not use -## this file except in compliance with the License. A copy of the License is -## located at -## -## http://aws.amazon.com/asl/ -## -## or in the "license" file accompanying this file. This file is distributed on -## an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or -## implied. See the License for the specific language governing permissions and -## limitations under the License. - - -COMMON_REPO := $(SDACCEL_DIR)/examples/xilinx - -include $(COMMON_REPO)/utility/boards.mk -include $(COMMON_REPO)/libs/xcl/xcl.mk -include $(COMMON_REPO)/libs/opencl/opencl.mk - -main_SRCS=$(wildcard host/src/*.cpp ../common/src/AOCLUtils/*.cpp) $(xcl_SRCS) -main_HDRS=$(xcl_HDRS) - -main_CXXFLAGS=$(xcl_CXXFLAGS) $(opencl_CXXFLAGS) -Ihost/inc/ -I../common/inc/ -main_LDFLAGS=$(opencl_LDFLAGS) -lrt - -EXES=main - -# Kernel -matrix_mult_SRCS=./device/matrix_mult.cl -matrix_mult_CLFLAGS= -k matrix_mult -Ihost/inc/ -#Specifyinng Fifo depth for Dataflow -matrix_mult_CLFLAGS+=--xp "param:compiler.xclDataflowFifoDepth=32" - -XOS=matrix_mult - -# xclbin -matrix_mult_XOS=matrix_mult - -XCLBINS=matrix_mult - -# check -check_EXE=main -check_XCLBINS=matrix_mult - -CHECKS=check - -# Compilation flags -ifeq ($(DEBUG),1) -CXXFLAGS += -g -else -CXXFLAGS += -O2 -endif - -# Compiler -#CXX := g++ - -include $(COMMON_REPO)/utility/rules.mk diff --git a/SDAccel/examples/3rd_party/matrix_mult/README.md b/SDAccel/examples/3rd_party/matrix_mult/README.md deleted file mode 100644 index 228c3161..00000000 --- a/SDAccel/examples/3rd_party/matrix_mult/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# Third party matrix multiply OpcnCL example. -## Compiling and running. -* The steps to compile and run are the same as those used for the SDAccel examples with the exception that the host program would need the -hw=<mode> switch when running in emulation mode. - - -* For the complete guide on compiling and running the SDAccel examples, see [this](../../../README.md). - -* To run in software emulation mode, use the following commands. - ``` -make clean -source $XILINX_SDX/settings64.sh -make TARGETS=sw_emu DEVICES=$AWS_PLATFORM all -./main -hw=sw_emu -``` - -* To run in hardware emulation mode, use the following commands. - ``` -make clean -source $XILINX_SDX/settings64.sh -make TARGETS=hw_emu DEVICES=$AWS_PLATFORM all -./main -hw=hw_emu -``` - -* To run on an F1 instance, use the following commands. - ``` -make clean -source $XILINX_SDX/settings64.sh -make TARGETS=hw DEVICES=$AWS_PLATFORM all -./main -``` - -* For more information on running this example on an F1 instance, see [this](../../../README.md#runonf1). \ No newline at end of file diff --git a/SDAccel/examples/3rd_party/matrix_mult/device/matrix_mult.cl b/SDAccel/examples/3rd_party/matrix_mult/device/matrix_mult.cl deleted file mode 100644 index 42dc429f..00000000 --- a/SDAccel/examples/3rd_party/matrix_mult/device/matrix_mult.cl +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright (C) 2013-2016 Altera Corporation, San Jose, California, USA. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to -// whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. -// -// This agreement shall be governed in all respects by the laws of the State of California and -// by the laws of the United States of America. - -// This kernel computes C = A * B, where -// A is a N x K matrix -// B is a K x M matrix -// C is a N x M matrix -// All dimensions must be a multiple of BLOCK_SIZE (defined below). -// -// The ND-range is two-dimensional and corresponds to the dimensions of matrix -// C. Each work-item computes one element of the output matrix. -// -// The implemented algorithm uses blocking to take advantage of data reuse -// across multiple elements in matrix C. This is just like the standard loop -// tiling optimization often used in matrix multiplication implementations. -// -// This kernel is intended to be compiled with the following compiler flags: -// --no-interleaving default -// This flag indicates that the global memory is divided into two logical -// banks and allows the host program to assign buffers to specific buffers. -// This allows the host to manage the load on each memory bank, usually -// to maximize the memory bandwidth usage. -// -// This flag is used for matrix multiplication because there are -// two primary memory accesses: reads from matrix A and reads from -// matrix B. To maximize memory bandwidth, the two input matrices -// are placed in different memory banks, which ensures that there is no -// contention when trying to read elements from both matrices -// simultaneously. -// -// -fp-relaxed=true -// This flag enables the order of additions in the dot product -// computation within a block to be rearranged. This enables the additions -// to be computed more efficiently in hardware, using a tree structure -// instead of a vine. -// -// As a simple example, take the addition of four values: a0 + a1 + a2 + a3. -// The default implementation (without -fp-relaxed=true) is: -// (((a0 + a1) + a2) + a3) -// which matches the standard ordering of operations. In hardware, this -// looks like: -// a0 a1 -// |-+-| -// | a2 -// |-+-| -// | a3 -// |-+-| -// | -// -// With -fp-relaxed=true, the implementation is a balanced tree: -// ((a0 + a1) + (a2 + a3)) -// In hardware, this looks like: -// a0 a1 a2 a3 -// |-+-| |-+-| -// | | -// |----+----| -// | -// -// There are two values that need to be defined in the preprocessor. -// BLOCK_SIZE -// The dimension of the block used in the core computation -// is BLOCK_SIZE x BLOCK_SIZE. This is defined in the host -// include file because the host needs to know too (just to -// ensure that the matrix sizes are a multiple of the block -// size. -// SIMD_WORK_ITEMS -// This value tells the compiler how many work-items in the work-group -// in a SIMD fashion. In the context of matrix multiplication, this -// value indicates how many output elements will be computed -// in a SIMD manner. BLOCK_SIZE must be a multiple of SIMD_WORK_ITEMS. -// See the Optimization Guide for details about this attribute. -// -// The combination of these values determines the number of floating-point -// operations per cycle. - -#include "matrixMult.h" - -#ifndef SIMD_WORK_ITEMS -#define SIMD_WORK_ITEMS 4 // default value -#endif - -__kernel -__attribute((reqd_work_group_size(BLOCK_SIZE,BLOCK_SIZE,1))) -__attribute((num_simd_work_items(SIMD_WORK_ITEMS))) -void matrix_mult( // Input and output matrices - __global float *restrict C, - __global float *A, - __global float *B, - // Widths of matrices. - int A_width, int B_width) -{ - // Local storage for a block of input matrices A and B - __local float A_local[BLOCK_SIZE][BLOCK_SIZE]; - __local float B_local[BLOCK_SIZE][BLOCK_SIZE]; - - // Block index - int block_x = get_group_id(0); - int block_y = get_group_id(1); - - // Local ID index (offset within a block) - int local_x = get_local_id(0); - int local_y = get_local_id(1); - - // Compute loop bounds - int a_start = A_width * BLOCK_SIZE * block_y; - int a_end = a_start + A_width - 1; - int b_start = BLOCK_SIZE * block_x; - - float running_sum = 0.0f; - - // Compute the matrix multiplication result for this output element. Each - // loop iteration processes one block of the matrix. - for (int a = a_start, b = b_start; a <= a_end; a += BLOCK_SIZE, b += (BLOCK_SIZE * B_width)) - { - // Load the matrices to local memory. Note that the (x, y) indices - // are swapped for A_local and B_local. This affects the reads from - // A_local and B_local below and result in more efficient hardware. - // - // This is actually an optimization that the compiler can perform, - // but is shown here for illustration purposes. - A_local[local_y][local_x] = A[a + A_width * local_y + local_x]; - B_local[local_x][local_y] = B[b + B_width * local_y + local_x]; - - // Wait for the entire block to be loaded. - barrier(CLK_LOCAL_MEM_FENCE); - - // Do the dot product accumulation within this block. Fully unroll the loop. - // As a result of the swap of indices above, memory accesses to - // A_local and B_local are very efficient because each loop iteration - // accesses consecutive elements. This can be seen by unrolling the - // loop and analyzing the regions that are loaded: - // A_local[local_y][0..BLOCK_SIZE-1] and - // B_local[local_x][0..BLOCK_SIZE-1] - __attribute__((opencl_unroll_hint())) - for (int k = 0; k < BLOCK_SIZE; ++k) - { - running_sum += A_local[local_y][k] * B_local[local_x][k]; - } - - // Wait for the block to be fully consumed before loading the next - // block. - barrier(CLK_LOCAL_MEM_FENCE); - } - - // Store result in matrix C - C[get_global_id(1) * get_global_size(0) + get_global_id(0)] = running_sum; -} diff --git a/SDAccel/examples/3rd_party/matrix_mult/host/inc/matrixMult.h b/SDAccel/examples/3rd_party/matrix_mult/host/inc/matrixMult.h deleted file mode 100644 index 76f8f0ad..00000000 --- a/SDAccel/examples/3rd_party/matrix_mult/host/inc/matrixMult.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (C) 2013-2016 Altera Corporation, San Jose, California, USA. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to -// whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. -// -// This agreement shall be governed in all respects by the laws of the State of California and -// by the laws of the United States of America. - -#ifndef MATRIXMULT_H -#define MATRIXMULT_H - -// Block size. Affects the kernel, so if this value changes, the kernel -// needs to be recompiled. -#ifndef BLOCK_SIZE -#define BLOCK_SIZE 64 // default value -#endif - -#endif - diff --git a/SDAccel/examples/3rd_party/matrix_mult/host/src/main.cpp b/SDAccel/examples/3rd_party/matrix_mult/host/src/main.cpp deleted file mode 100644 index 4481372d..00000000 --- a/SDAccel/examples/3rd_party/matrix_mult/host/src/main.cpp +++ /dev/null @@ -1,549 +0,0 @@ -// Copyright (C) 2013-2016 Altera Corporation, San Jose, California, USA. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to -// whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. -// -// This agreement shall be governed in all respects by the laws of the State of California and -// by the laws of the United States of America. - -/////////////////////////////////////////////////////////////////////////////////// -// This host program executes a matrix multiplication kernel to perform: -// C = A * B -// where A is a N x K matrix, B is a K x M matrix and C is a N x M matrix. -// All dimensions must be a multiple of BLOCK_SIZE, which affects the -// underlying kernel. -// -// This host program supports partitioning the problem across multiple OpenCL -// devices if available. If there are M available devices, the problem is -// divided so that each device operates on N/M rows (with -// processed by each device is . The host program -// assumes that all devices are of the same type (that is, the same binary can -// be used), but the code can be generalized to support different device types -// easily. -// -// Verification is performed against the same computation on the host CPU. -/////////////////////////////////////////////////////////////////////////////////// - -#include -#include -#include -#include "CL/opencl.h" -#include "AOCLUtils/aocl_utils.h" -#include "matrixMult.h" - -using namespace aocl_utils; - -// OpenCL runtime configuration -cl_platform_id platform = NULL; -unsigned num_devices = 0; -scoped_array device; // num_devices elements -cl_context context = NULL; -scoped_array queue; // num_devices elements -cl_program program = NULL; -scoped_array kernel; // num_devices elements -#if USE_SVM_API == 0 -scoped_array input_a_buf; // num_devices elements -scoped_array input_b_buf; // num_devices elements -scoped_array output_buf; // num_devices elements -#endif /* USE_SVM_API == 0 */ - -// Problem data. -unsigned A_height = 32 * BLOCK_SIZE; -unsigned A_width = 16 * BLOCK_SIZE; -const unsigned &B_height = A_width; -unsigned B_width = 16 * BLOCK_SIZE; -const unsigned &C_height = A_height; -const unsigned &C_width = B_width; -std::string hwtype = "hw"; - -#if USE_SVM_API == 0 -scoped_array > input_a; // num_devices elements -scoped_aligned_ptr input_b; -scoped_array > output; // num_devices elements -#else -scoped_array > input_a; // num_devices elements -scoped_SVM_aligned_ptr input_b; -scoped_array > output; // num_devices elements -#endif /* USE_SVM_API == 0 */ -scoped_array ref_output; -scoped_array rows_per_device; // num_devices elements - -// Function prototypes -float rand_float(); -bool init_opencl(); -void init_problem(); -void run(); -void compute_reference(); -void verify(); -void cleanup(); - -// Entry point. -int main(int argc, char **argv) { - Options options(argc, argv); - - if(options.has("ah")) { - A_height = options.get("ah"); - } - if(options.has("aw")) { - A_width = options.get("aw"); - } - if(options.has("bw")) { - B_width = options.get("bw"); - } - if(options.has("hw")) { - hwtype = options.get("hw"); - } - - printf("Matrix sizes:\n A: %d x %d\n B: %d x %d\n C: %d x %d\n", - A_height, A_width, B_height, B_width, C_height, C_width); - - // Spot check matrix sizes. They all must be a multiple of BLOCK_SIZE, - // although it is relatively straightforward to handle non-multiples - // by adding padding. For simplicity, this example does not pad. - if((A_height % BLOCK_SIZE) != 0 || (A_width % BLOCK_SIZE) != 0 || - (B_height % BLOCK_SIZE) != 0 || (B_width % BLOCK_SIZE) != 0 || - (C_height % BLOCK_SIZE) != 0 || (C_width % BLOCK_SIZE) != 0) { - printf("Matrix sizes must be a multiple of %d.\n", BLOCK_SIZE); - return -1; - } - - // Initialize OpenCL. - if(!init_opencl()) { - return -1; - } - - // Initialize the problem data. - // Requires the number of devices to be known. - init_problem(); - - // Run the kernel. - run(); - - // Free the resources allocated - cleanup(); - - return 0; -} - -/////// HELPER FUNCTIONS /////// - -// Randomly generate a floating-point number between -10 and 10. -float rand_float() { - return float(rand()) / float(RAND_MAX) * 20.0f - 10.0f; -} - -// Initializes the OpenCL objects. -bool init_opencl() { - cl_int status; - - printf("Initializing OpenCL\n"); - - if(!setCwdToExeDir()) { - return false; - } - - // Get the OpenCL platform. - platform = findPlatform("Xilinx"); - if(platform == NULL) { - printf("ERROR: Unable to find Xilinx OpenCL platform.\n"); - return false; - } - - // Query the available OpenCL device. - device.reset(getDevices(platform, CL_DEVICE_TYPE_ALL, &num_devices)); - printf("Platform: %s\n", getPlatformName(platform).c_str()); - printf("Using %d device(s)\n", num_devices); - for(unsigned i = 0; i < num_devices; ++i) { - printf(" %s\n", getDeviceName(device[i]).c_str()); - } - - // Create the context. - context = clCreateContext(NULL, num_devices, device, &oclContextCallback, NULL, &status); - checkError(status, "Failed to create context"); - - // Create the program for all device. Use the first device as the - // representative device (assuming all device are of the same type). - std::string fname = "xclbin/matrix_mult."+ hwtype + "." + VERSION_STR; - std::string binary_file = getBoardBinaryFile(fname.c_str(), device[0]); - printf("Using XCLBIN: %s\n", binary_file.c_str()); - program = createProgramFromBinary(context, binary_file.c_str(), device, num_devices); - - // Build the program that was just created. - status = clBuildProgram(program, 0, NULL, "", NULL, NULL); - checkError(status, "Failed to build program"); - - // Create per-device objects. - queue.reset(num_devices); - kernel.reset(num_devices); - rows_per_device.reset(num_devices); -#if USE_SVM_API == 0 - input_a_buf.reset(num_devices); - input_b_buf.reset(num_devices); - output_buf.reset(num_devices); -#endif /* USE_SVM_API == 0 */ - - const unsigned num_block_rows = C_height / BLOCK_SIZE; - - for(unsigned i = 0; i < num_devices; ++i) { - // Command queue. - queue[i] = clCreateCommandQueue(context, device[i], CL_QUEUE_PROFILING_ENABLE, &status); - checkError(status, "Failed to create command queue"); - - // Kernel. - const char *kernel_name = "matrix_mult"; - kernel[i] = clCreateKernel(program, kernel_name, &status); - checkError(status, "Failed to create kernel"); - - // Determine the number of rows processed by this device. - // First do this computation in block-rows. - rows_per_device[i] = num_block_rows / num_devices; // this is the number of block-rows - - // Spread out the remainder of the block-rows over the first - // N % num_devices. - if(i < (num_block_rows % num_devices)) { - rows_per_device[i]++; - } - - // Multiply by BLOCK_SIZE to get the actual number of rows. - rows_per_device[i] *= BLOCK_SIZE; - -#if USE_SVM_API == 0 - // Input buffers. - // For matrix A, each device only needs the rows corresponding - // to the rows of the output matrix. We specifically - // assign this buffer to the first bank of global memory. - input_a_buf[i] = clCreateBuffer(context, CL_MEM_READ_ONLY , // remove for now // | CL_MEM_BANK_1_ALTERA, - rows_per_device[i] * A_width * sizeof(float), NULL, &status); - checkError(status, "Failed to create buffer for input A"); - - // For matrix B, each device needs the whole matrix. We specifically - // assign this buffer to the second bank of global memory. - input_b_buf[i] = clCreateBuffer(context, CL_MEM_READ_ONLY, // remove for now // | CL_MEM_BANK_2_ALTERA, - B_height * B_width * sizeof(float), NULL, &status); - checkError(status, "Failed to create buffer for input B"); - - // Output buffer. This is matrix C, for the rows that are computed by this - // device. We assign this buffer to the first bank of global memory, - // although it is not material to performance to do so because - // the reads from the input matrices are far more frequent than the - // write to the output matrix. - output_buf[i] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, // remove for now // | CL_MEM_BANK_1_ALTERA, - rows_per_device[i] * C_width * sizeof(float), NULL, &status); - checkError(status, "Failed to create buffer for output"); -#else - cl_device_svm_capabilities caps = 0; - - status = clGetDeviceInfo( - device[i], - CL_DEVICE_SVM_CAPABILITIES, - sizeof(cl_device_svm_capabilities), - &caps, - 0 - ); - checkError(status, "Failed to get device info"); - - if (!(caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER)) { - printf("The host was compiled with USE_SVM_API, however the device currently being targeted does not support SVM.\n"); - // Free the resources allocated - cleanup(); - return false; - } -#endif /* USE_SVM_API == 0 */ - } - - return true; -} - -// Initialize the data for the problem. Requires num_devices to be known. -void init_problem() { - if(num_devices == 0) { - checkError(-1, "No devices"); - } - - // Generate input matrices A and B. For matrix A, we divide up the host - // buffers so that the buffers are aligned for each device. The whole of - // matrix B is used by each device, so it does not need to be divided. - printf("Generating input matrices\n"); - input_a.reset(num_devices); - output.reset(num_devices); -#if USE_SVM_API == 0 - for(unsigned i = 0; i < num_devices; ++i) { - input_a[i].reset(rows_per_device[i] * A_width); - output[i].reset(rows_per_device[i] * C_width); - - for(unsigned j = 0; j < rows_per_device[i] * A_width; ++j) { - input_a[i][j] = rand_float(); - } - } - - input_b.reset(B_height * B_width); - for(unsigned i = 0; i < B_height * B_width; ++i) { - input_b[i] = rand_float(); - } -#else - for(unsigned i = 0; i < num_devices; ++i) { - input_a[i].reset(context, rows_per_device[i] * A_width); - output[i].reset(context, rows_per_device[i] * C_width); - - cl_int status; - - status = clEnqueueSVMMap(queue[i], CL_TRUE, CL_MAP_WRITE, - (void *)input_a[i], rows_per_device[i] * A_width * sizeof(float), 0, NULL, NULL); - checkError(status, "Failed to map input A"); - - for(unsigned j = 0; j < rows_per_device[i] * A_width; ++j) { - input_a[i][j] = rand_float(); - } - - status = clEnqueueSVMUnmap(queue[i], (void *)input_a[i], 0, NULL, NULL); - checkError(status, "Failed to unmap input A"); - } - - input_b.reset(context, B_height * B_width); - - cl_int status; - - for (unsigned i = 0; i < num_devices; ++i) { - status = clEnqueueSVMMap(queue[i], CL_TRUE, CL_MAP_WRITE, - (void *)input_b, B_height * B_width * sizeof(float), 0, NULL, NULL); - checkError(status, "Failed to map input B"); - } - - for(unsigned i = 0; i < B_height * B_width; ++i) { - input_b[i] = rand_float(); - } - - for (unsigned i = 0; i < num_devices; ++i) { - status = clEnqueueSVMUnmap(queue[i], (void *)input_b, 0, NULL, NULL); - checkError(status, "Failed to unmap input B"); - } -#endif /* USE_SVM_API == 0 */ -} - -void run() { - cl_int status; - -#if USE_SVM_API == 0 - // Transfer inputs to each device. Each of the host buffers supplied to - // clEnqueueWriteBuffer here is already aligned to ensure that DMA is used - // for the host-to-device transfer. - for(unsigned i = 0; i < num_devices; ++i) { - status = clEnqueueWriteBuffer(queue[i], input_a_buf[i], CL_FALSE, - 0, rows_per_device[i] * A_width * sizeof(float), input_a[i], 0, NULL, NULL); - checkError(status, "Failed to transfer input A"); - - status = clEnqueueWriteBuffer(queue[i], input_b_buf[i], CL_FALSE, - 0, B_width * B_height * sizeof(float), input_b, 0, NULL, NULL); - checkError(status, "Failed to transfer input B"); - } - - // Wait for all queues to finish. - for(unsigned i = 0; i < num_devices; ++i) { - clFinish(queue[i]); - } -#endif /* USE_SVM_API == 0 */ - - // Launch kernels. - // This is the portion of time that we'll be measuring for throughput - // benchmarking. - scoped_array kernel_event(num_devices); - - const double start_time = getCurrentTimestamp(); - for(unsigned i = 0; i < num_devices; ++i) { - // Set kernel arguments. - unsigned argi = 0; - -#if USE_SVM_API == 0 - status = clSetKernelArg(kernel[i], argi++, sizeof(cl_mem), &output_buf[i]); - checkError(status, "Failed to set argument %d", argi - 1); - - status = clSetKernelArg(kernel[i], argi++, sizeof(cl_mem), &input_a_buf[i]); - checkError(status, "Failed to set argument %d", argi - 1); - - status = clSetKernelArg(kernel[i], argi++, sizeof(cl_mem), &input_b_buf[i]); - checkError(status, "Failed to set argument %d", argi - 1); -#else - status = clSetKernelArgSVMPointer(kernel[i], argi++, (void*)output[i]); - checkError(status, "Failed to set argument %d", argi - 1); - - status = clSetKernelArgSVMPointer(kernel[i], argi++, (void*)input_a[i]); - checkError(status, "Failed to set argument %d", argi - 1); - - status = clSetKernelArgSVMPointer(kernel[i], argi++, (void*)input_b); - checkError(status, "Failed to set argument %d", argi - 1); -#endif /* USE_SVM_API == 0 */ - - status = clSetKernelArg(kernel[i], argi++, sizeof(A_width), &A_width); - checkError(status, "Failed to set argument %d", argi - 1); - - status = clSetKernelArg(kernel[i], argi++, sizeof(B_width), &B_width); - checkError(status, "Failed to set argument %d", argi - 1); - - // Enqueue kernel. - // Use a global work size corresponding to the size of the output matrix. - // Each work-item computes the result for one value of the output matrix, - // so the global work size has the same dimensions as the output matrix. - // - // The local work size is one block, so BLOCK_SIZE x BLOCK_SIZE. - // - // Events are used to ensure that the kernel is not launched until - // the writes to the input buffers have completed. - const size_t global_work_size[2] = {C_width, rows_per_device[i]}; - const size_t local_work_size[2] = {BLOCK_SIZE, BLOCK_SIZE}; - printf("Launching for device %d (global size: %zd, %zd)\n", i, global_work_size[0], global_work_size[1]); - - status = clEnqueueNDRangeKernel(queue[i], kernel[i], 2, NULL, - global_work_size, local_work_size, 0, NULL, &kernel_event[i]); - checkError(status, "Failed to launch kernel"); - } - - // Wait for all kernels to finish. - clWaitForEvents(num_devices, kernel_event); - - const double end_time = getCurrentTimestamp(); - const double total_time = end_time - start_time; - - // Wall-clock time taken. - printf("\nTime: %0.3f ms\n", total_time * 1e3); - - // Get kernel times using the OpenCL event profiling API. - for(unsigned i = 0; i < num_devices; ++i) { - cl_ulong time_ns = getStartEndTime(kernel_event[i]); - printf("Kernel time (device %d): %0.3f ms\n", i, double(time_ns) * 1e-6); - } - - // Compute the throughput (GFLOPS). - // There are C_width * C_height output values, with each value - // computed using A_width multiplies and adds. - const float flops = (float)(2.0f * C_width * C_height * A_width / total_time); - printf("\nThroughput: %0.2f GFLOPS\n\n", flops * 1e-9); - - // Release kernel events. - for(unsigned i = 0; i < num_devices; ++i) { - clReleaseEvent(kernel_event[i]); - } - - // Read the result. - for(unsigned i = 0; i < num_devices; ++i) { -#if USE_SVM_API == 0 - status = clEnqueueReadBuffer(queue[i], output_buf[i], CL_TRUE, - 0, rows_per_device[i] * C_width * sizeof(float), output[i], 0, NULL, NULL); - checkError(status, "Failed to read output matrix"); -#else - status = clEnqueueSVMMap(queue[i], CL_TRUE, CL_MAP_READ, - (void *)output[i], rows_per_device[i] * C_width * sizeof(float), 0, NULL, NULL); - checkError(status, "Failed to map output"); -#endif /* USE_SVM_API == 0 */ - } - - // Verify results. - compute_reference(); - verify(); -#if USE_SVM_API == 1 - for (unsigned i = 0; i < num_devices; ++i) { - status = clEnqueueSVMUnmap(queue[i], (void *)output[i], 0, NULL, NULL); - checkError(status, "Failed to unmap output"); - } -#endif /* USE_SVM_API == 1 */ -} - -void compute_reference() { - // Compute the reference output. - printf("Computing reference output\n"); - ref_output.reset(C_height * C_width); - - for(unsigned y = 0, dev_index = 0; y < C_height; ++dev_index) { - for(unsigned yy = 0; yy < rows_per_device[dev_index]; ++yy, ++y) { - for(unsigned x = 0; x < C_width; ++x) { - // Compute result for C(y, x) - float sum = 0.0f; - for(unsigned k = 0; k < A_width; ++k) { - sum += input_a[dev_index][yy * A_width + k] * input_b[k * B_width + x]; - } - ref_output[y * C_width + x] = sum; - } - } - } -} - -void verify() { - printf("Verifying\n"); - - // Compute the L^2-Norm of the difference between the output and reference - // output matrices and compare it against the L^2-Norm of the reference. - float diff = 0.0f; - float ref = 0.0f; - for(unsigned y = 0, dev_index = 0; y < C_height; ++dev_index) { - for(unsigned yy = 0; yy < rows_per_device[dev_index]; ++yy, ++y) { - for(unsigned x = 0; x < C_width; ++x) { - const float o = output[dev_index][yy * C_width + x]; - const float r = ref_output[y * C_width + x]; - const float d = o - r; - diff += d * d; - ref += r * r; - } - } - } - - const float diff_l2norm = sqrtf(diff); - const float ref_l2norm = sqrtf(ref); - const float error = diff_l2norm / ref_l2norm; - const bool pass = error < 1e-6; - printf("Verification: %s\n", pass ? "PASS" : "FAIL"); - if(!pass) { - printf("Error (L^2-Norm): %0.3g\n", error); - } -} - -// Free the resources allocated during initialization -void cleanup() { - for(unsigned i = 0; i < num_devices; ++i) { - if(kernel && kernel[i]) { - clReleaseKernel(kernel[i]); - } - if(queue && queue[i]) { - clReleaseCommandQueue(queue[i]); - } -#if USE_SVM_API == 0 - if(input_a_buf && input_a_buf[i]) { - clReleaseMemObject(input_a_buf[i]); - } - if(input_b_buf && input_b_buf[i]) { - clReleaseMemObject(input_b_buf[i]); - } - if(output_buf && output_buf[i]) { - clReleaseMemObject(output_buf[i]); - } -#else - if(input_a[i].get()) - input_a[i].reset(); - if(output[i].get()) - output[i].reset(); -#endif /* USE_SVM_API == 0 */ - } -#if USE_SVM_API == 1 - if(input_b.get()) - input_b.reset(); -#endif /* USE_SVM_API == 1 */ - - if(program) { - clReleaseProgram(program); - } - if(context) { - clReleaseContext(context); - } -} - diff --git a/SDAccel/examples/3rd_party/vector_addition/vector_addition_main.cpp.diff b/SDAccel/examples/3rd_party/vector_addition/vector_addition_main.cpp.diff index 9d850916..94075d89 100644 --- a/SDAccel/examples/3rd_party/vector_addition/vector_addition_main.cpp.diff +++ b/SDAccel/examples/3rd_party/vector_addition/vector_addition_main.cpp.diff @@ -1,22 +1,45 @@ ---- third_party/vector_add/host/src/main.cpp 2017-05-09 22:47:50.000000000 +0000 -+++ sdaccel/vector_add/host/src/main.cpp 2017-09-12 18:55:22.172000000 +0000 -@@ -70,0 +71,2 @@ +--- third_party/vector_add/host/src/main.cpp 2018-02-12 17:55:18.000000000 +0000 ++++ aws/vector_add/host/src/main.cpp 2019-09-15 22:53:15.593553611 +0000 +@@ -67,6 +67,7 @@ + #endif /* USE_SVM_API == 0 */ + scoped_array > ref_output; // num_devices elements + scoped_array n_per_device; // num_devices elements +std::string hwtype = "hw"; -+ -@@ -85,0 +88,3 @@ + + // Function prototypes + float rand_float(); +@@ -84,6 +85,9 @@ + N = options.get("n"); + } + + if(options.has("hw")) { + hwtype = options.get("hw"); + } -@@ -123 +128 @@ -- platform = findPlatform("Intel"); -+ platform = findPlatform("Xilinx"); -@@ -125 +130 @@ -- printf("ERROR: Unable to find Intel FPGA OpenCL platform.\n"); + // Initialize OpenCL. + if(!init_opencl()) { + return -1; +@@ -120,9 +124,9 @@ + } + + // Get the OpenCL platform. +- platform = findPlatform("Intel(R) FPGA SDK for OpenCL(TM)"); ++ platform = findPlatform("Xilinx); + if(platform == NULL) { +- printf("ERROR: Unable to find Intel(R) FPGA OpenCL platform.\n"); + printf("ERROR: Unable to find Xilinx FPGA OpenCL platform.\n"); -@@ -143,2 +148,4 @@ + return false; + } + +@@ -140,8 +144,10 @@ + + // Create the program for all device. Use the first device as the + // representative device (assuming all device are of the same type). - std::string binary_file = getBoardBinaryFile("vector_add", device[0]); - printf("Using AOCX: %s\n", binary_file.c_str()); + std::string fname = "xclbin/vector_add."+ hwtype + "." + VERSION_STR; + printf("Looking for %s.\n",fname.c_str()); + std::string binary_file = getBoardBinaryFile(fname.c_str(), device[0]); + printf("Using XCLBIN: %s\n", binary_file.c_str()); + program = createProgramFromBinary(context, binary_file.c_str(), device, num_devices); + + // Build the program that was just created. diff --git a/SDAccel/examples/aws/helloworld_ocl_runtime/2018.3/helloworld b/SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/helloworld old mode 100644 new mode 100755 similarity index 100% rename from SDAccel/examples/aws/helloworld_ocl_runtime/2018.3/helloworld rename to SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/helloworld diff --git a/SDAccel/examples/aws/helloworld_ocl_runtime/2018.3/helloworld_ocl_afi-ids.txt b/SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/helloworld_ocl_afi-ids.txt similarity index 100% rename from SDAccel/examples/aws/helloworld_ocl_runtime/2018.3/helloworld_ocl_afi-ids.txt rename to SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/helloworld_ocl_afi-ids.txt diff --git a/SDAccel/examples/aws/helloworld_ocl_runtime/2018.3/helloworld_ocl_agfi-ids.txt b/SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/helloworld_ocl_agfi-ids.txt similarity index 100% rename from SDAccel/examples/aws/helloworld_ocl_runtime/2018.3/helloworld_ocl_agfi-ids.txt rename to SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/helloworld_ocl_agfi-ids.txt diff --git a/SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/sdaccel.ini b/SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/sdaccel.ini new file mode 100644 index 00000000..c75131c1 --- /dev/null +++ b/SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/sdaccel.ini @@ -0,0 +1,2 @@ +[Debug] +profile=true diff --git a/SDAccel/examples/aws/helloworld_ocl_runtime/2018.3/vector_addition.hw.xilinx_aws-vu9p-f1-04261818_dynamic_5_0.awsxclbin b/SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/vector_addition.hw.xilinx_aws-vu9p-f1-04261818_dynamic_5_0.awsxclbin similarity index 100% rename from SDAccel/examples/aws/helloworld_ocl_runtime/2018.3/vector_addition.hw.xilinx_aws-vu9p-f1-04261818_dynamic_5_0.awsxclbin rename to SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/vector_addition.hw.xilinx_aws-vu9p-f1-04261818_dynamic_5_0.awsxclbin diff --git a/SDAccel/examples/aws/helloworld_ocl_runtime/README.md b/SDAccel/examples/aws/helloworld_ocl_runtime/README.md index 1782662b..a8c9ad2a 100644 --- a/SDAccel/examples/aws/helloworld_ocl_runtime/README.md +++ b/SDAccel/examples/aws/helloworld_ocl_runtime/README.md @@ -31,23 +31,19 @@ vector_addition.hw.xilinx_aws-vu9p-f1-04261818_dynamic_5_0.awsxclbin --awsxclbin ## Execution - -#### :exclamation: PLEASE NOTE: xclbin & awsxclbin file formats have changed for SDx 2018.3. xclbin & awsxclbin files generated using earlier SDx versions are not compatible with 2018.3 based XRTs. If you are using a 2018.3 based XRT, please copy over awsxclbin & helloworld executable files provided in the 2018.3 subdirectory to this folder. - -Command sequence +#### :exclamation: PLEASE NOTE: xclbin & awsxclbin file formats have changed from SDx 2018.3 onwards. xclbin & awsxclbin files generated using earlier SDx versions are not compatible with 2018.3/2019.1 based XRTs. If you are using a 2018.3/2019.1 based XRT, please copy over awsxclbin & helloworld executable files provided in the 2018.3_2019.1 subdirectory to this folder. ``` sudo fpga-clear-local-image -S 0 - >>$sudo sh -sh-4.2# source $AWS_FPGA_REPO_DIR/sdaccel_runtime_setup.sh -sh-4.2# ./helloworld - +sudo -E /bin/bash +source $AWS_FPGA_REPO_DIR/sdaccel_runtime_setup.sh +./helloworld ``` ## Hello World Example Metadata -| Key | Region | Value for 2017.4 or 2018.2 | Value for 2018.3 | +| Key | Region | SDx 2017.4 or 2018.2 | SDx 2018.3 or 2019.1 | |--------|---------|-----------------------------|------------------| |afi id | us-east-1(N. Virginia) | afi-0532379b26ea13f26 | afi-0c8210915ce9bab5c | |afi id | us-west-2(oregon) | afi-0ab098d3fbfc43c7e | afi-01e237aa978aa74de | diff --git a/SDAccel/examples/aws/helloworld_ocl_runtime/sdaccel.ini b/SDAccel/examples/aws/helloworld_ocl_runtime/sdaccel.ini new file mode 100644 index 00000000..c75131c1 --- /dev/null +++ b/SDAccel/examples/aws/helloworld_ocl_runtime/sdaccel.ini @@ -0,0 +1,2 @@ +[Debug] +profile=true diff --git a/SDAccel/examples/xilinx_2017.4 b/SDAccel/examples/xilinx_2017.4 deleted file mode 160000 index cd196250..00000000 --- a/SDAccel/examples/xilinx_2017.4 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit cd196250dfdd63491080e8c6f3e79fe6d1718997 diff --git a/SDAccel/examples/xilinx_2018.2 b/SDAccel/examples/xilinx_2018.2 deleted file mode 160000 index 70a0f3ed..00000000 --- a/SDAccel/examples/xilinx_2018.2 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 70a0f3edc6d78f3de13806ec7a7a01d1fbe0d2bd diff --git a/SDAccel/examples/xilinx_2018.3 b/SDAccel/examples/xilinx_2018.3 deleted file mode 160000 index b2884db9..00000000 --- a/SDAccel/examples/xilinx_2018.3 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b2884db9768d6589ae094cd06d9b491b3bd39816 diff --git a/SDAccel/examples/xilinx_2019.1 b/SDAccel/examples/xilinx_2019.1 new file mode 160000 index 00000000..0ec1aef5 --- /dev/null +++ b/SDAccel/examples/xilinx_2019.1 @@ -0,0 +1 @@ +Subproject commit 0ec1aef54f3bf17c78581630d687b13fadae9616 diff --git a/SDAccel/kernel_version.txt b/SDAccel/kernel_version.txt index 2ecfd922..20429c73 100644 --- a/SDAccel/kernel_version.txt +++ b/SDAccel/kernel_version.txt @@ -2,3 +2,6 @@ 3.10.0-693.21.1.el7.x86_64 3.10.0-957.1.3.el7.x86_64 3.10.0-957.5.1.el7.x86_64 +3.10.0-957.27.2.el7.x86_64 +3.10.0-1062.4.1.el7.x86_64 +3.10.0-1062.9.1.el7.x86_64 \ No newline at end of file diff --git a/SDAccel/sdaccel_xrt_version.txt b/SDAccel/sdaccel_xrt_version.txt index 4518becc..7322b406 100644 --- a/SDAccel/sdaccel_xrt_version.txt +++ b/SDAccel/sdaccel_xrt_version.txt @@ -1,4 +1,6 @@ 2018.2:343186f76f59edd01bc48d84cf67fe22a0a3f338 2018.2:65ffad62f427c0bd1bc65b6ea555a810295468b7 -2018.3:3636217b633930ed4815abd598324691ca25c2f3 -2018.3:48cafdc100b29843fd013d371ffba0141db06b7a +2018.3:8654da1f0d2bd196c9887bdcfe1479103a93e90a +2019.1:e21b8a5b208618834760593bbb15063f7e399642 +2019.1:dd210161e204e882027d22132725d8ffdf285149 +2019.1:89e25d51313daac5c322dfb4e84707829306d3fe \ No newline at end of file diff --git a/SDAccel/tests/test_find_sdaccel_examples.py b/SDAccel/tests/test_find_sdaccel_examples.py index f0c5c758..4d9f1c94 100644 --- a/SDAccel/tests/test_find_sdaccel_examples.py +++ b/SDAccel/tests/test_find_sdaccel_examples.py @@ -44,6 +44,8 @@ class TestFindSDAccelExamples(AwsFpgaTestBase): NOTE: Cannot have an __init__ method. ''' + ADD_XILINX_VERSION = True + @classmethod def setup_class(cls): ''' @@ -52,7 +54,7 @@ def setup_class(cls): AwsFpgaTestBase.setup_class(cls, __file__) return - def test_find_example_makefiles(self): + def test_find_example_makefiles(self, xilinxVersion): assert os.path.exists(self.xilinx_sdaccel_examples_dir), "The Xilinx SDAccel example dir does not exist: {}".format(self.xilinx_sdaccel_examples_dir) assert os.listdir(self.xilinx_sdaccel_examples_dir) != [], "Xilinx SDAccel example submodule not cloned or does not exist" @@ -61,13 +63,34 @@ def test_find_example_makefiles(self): xilinx_sdaccel_example_map = {} for root, dirs, files in os.walk(self.xilinx_sdaccel_examples_dir): - for file in files: - if file.endswith('Makefile'): - makefile_path = root + "/Makefile" - - # If the Makefile has a docs target, it's not the makefile we want to read - if 'docs:' not in open(makefile_path).read(): - xilinx_examples_makefiles.append(root) + ignore = False + + if os.path.exists(root + "/description.json") and os.path.exists(root + "/Makefile"): + with open(root + "/description.json", "r") as description_file: + description = json.load(description_file) + + if "containers" in description: + if len(description["containers"]) > 1: + ignore = True + logger.info("Ignoring {} as >1 containers found in description.json.".format(root)) + + else: + ignore = True + logger.info("Ignoring {} as no containers found in description.json.".format(root)) + continue + + if "nboard" in description: + if "xilinx_aws-vu9p-f1-04261818" in description["nboard"]: + ignore = True + logger.info("Ignoring {} as F1 device found in nboard.".format(root)) + continue + else: + ignore = True + logger.warn("Ignoring: {} as no Makefile/description.json exist".format(root)) + + if not ignore: + xilinx_examples_makefiles.append(root) + logger.info("Adding: " + root) assert len(xilinx_examples_makefiles) != 0, "Could not find any Xilinx SDAccel example in %s" % self.xilinx_sdaccel_examples_dir @@ -84,4 +107,8 @@ def test_find_example_makefiles(self): with open(self.xilinx_sdaccel_examples_list_file, 'w') as outfile: json.dump(xilinx_sdaccel_example_map, outfile) + # Also write the archive file + with open(self.xilinx_sdaccel_examples_list_file + "." + xilinxVersion, 'w') as archive_file: + json.dump(xilinx_sdaccel_example_map, archive_file) + assert os.path.getsize(self.xilinx_sdaccel_examples_list_file) > 0, "%s is a non zero file. We need to have some data in the file" % self.xilinx_sdaccel_examples_list_file diff --git a/SDAccel/tests/test_run_sdaccel_example.py b/SDAccel/tests/test_run_sdaccel_example.py index 4bf1ecaf..201fb5c1 100644 --- a/SDAccel/tests/test_run_sdaccel_example.py +++ b/SDAccel/tests/test_run_sdaccel_example.py @@ -68,13 +68,14 @@ def setup_class(cls): return + @pytest.mark.flaky(reruns=2, reruns_delay=2) def test_run_sdaccel_example(self, examplePath, rteName, xilinxVersion): os.chdir(self.get_sdaccel_example_fullpath(examplePath)) (rc, stdout_lines, stderr_lines) = self.run_cmd("make exe") assert rc == 0 - em_run_cmd = self.get_sdaccel_example_run_cmd(examplePath) + em_run_cmd = self.get_sdaccel_example_run_cmd(examplePath, xilinxVersion) check_runtime_script = os.path.join(AwsFpgaTestBase.WORKSPACE,'sdaccel_runtime_setup.sh') self.get_sdaccel_aws_xclbin_file(examplePath, rteName, xilinxVersion) diff --git a/SDAccel/tools/awssak/Makefile b/SDAccel/tools/awssak/Makefile index 8b1fff39..019ae6fa 100644 --- a/SDAccel/tools/awssak/Makefile +++ b/SDAccel/tools/awssak/Makefile @@ -24,14 +24,15 @@ CXX := g++ CXXFLAGS := -Wall -Werror -std=c++11 ROOT = $(SDACCEL_DIR) -HAL_INC := -I../../include -I$(ROOT)/userspace/include +HAL_INC := -I$(SDACCEL_DIR)/userspace/src -I$(SDACCEL_DIR)/userspace/include -I$(SDK_DIR)/userspace/include -I$(SDK_DIR)/linux_kernel_drivers CXXFLAGS += $(HAL_INC) ifeq ($(ec2),1) -AWSBM_HAL_LIBNAME := $(ROOT)/userspace/src/libawsxcldrv.a +AWS_HAL_LIBNAME := $(ROOT)/userspace/src/libxrt-aws.a else -AWSBM_HAL_LIBNAME := $(ROOT)/userspace/src/libawsbmdrv.a +AWS_HAL_LIBNAME := $(ROOT)/userspace/src/libxrtbm-aws.a +CXXFLAGS += -DINTERNAL_RELEASE endif ifeq ($(debug),1) @@ -59,8 +60,8 @@ all : $(EXENAME) $(CXX) $(CXXFLAGS) $(MYCFLAGS) $(MYCXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(MYCFLAGS) $(MYCXXFLAGS) -c -MM $< -o $(patsubst %.o, %.d, $@) -$(EXENAME): $(OBJS) $(AWSBM_HAL_LIBNAME) - $(CXX) -o $@ $(OBJS) $(AWSBM_HAL_LIBNAME) $(LDFLAGS) $(LDLIBS) -lrt +$(EXENAME): $(OBJS) $(AWS_HAL_LIBNAME) + $(CXX) -o $@ $(OBJS) $(AWS_HAL_LIBNAME) $(LDFLAGS) $(LDLIBS) -lrt -pthread clean: rm -rf *.o *.d $(EXENAME) diff --git a/SDAccel/tools/awssak/main.cpp b/SDAccel/tools/awssak/main.cpp index 91f95bd6..09b96f57 100644 --- a/SDAccel/tools/awssak/main.cpp +++ b/SDAccel/tools/awssak/main.cpp @@ -1,9 +1,7 @@ /** * Copyright (C) 2017-2018 Xilinx, Inc - * Author: Sonal Santan - * Simple command line utility to interact with SDX PCIe devices - * - * Code copied verbatim from SDAccel xbsak implementation + * Author: Sonal Santan, Ryan Radjabi + * Simple command line utility to inetract with SDX PCIe devices * * Licensed under the Apache License, Version 2.0 (the "License"). You may * not use this file except in compliance with the License. A copy of the @@ -18,921 +16,9 @@ * under the License. */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xclhal.h" -#include "xcl_axi_checker_codes.h" -#include "memaccess.h" - - -#define TO_STRING(x) #x -#define OCL_NUM_CLOCKS 2 - -class Timer { - std::chrono::high_resolution_clock::time_point mTimeStart; -public: - Timer() { - reset(); - } - long long stop() { - std::chrono::high_resolution_clock::time_point timeEnd = std::chrono::high_resolution_clock::now(); - return std::chrono::duration_cast(timeEnd - mTimeStart).count(); - } - void reset() { - mTimeStart = std::chrono::high_resolution_clock::now(); - } -}; - -/* - * Simple command line tool to query and interact with SDx PCIe devices - * The tool statically links with xcldma HAL driver inorder to avoid - * dependencies on environment variables like XILINX_OPENCL, LD_LIBRARY_PATH, etc. - * TODO: - * Rewrite the command line parsing to provide interface like Android adb: - * xcldev [options] - */ - -namespace xcldev { - enum command { - FLASH, - PROGRAM, - CLOCK, - BOOT, - HELP, - QUERY, - RESET, - RUN, - FAN, - DMATEST, - LIST, - MEM, - STATUS, - CMD_MAX - }; - - enum subcommand { - MEM_READ = 0, - MEM_WRITE, - STATUS_APM, - STATUS_LAPC - }; - enum statusmask { - STATUS_NONE_MASK = 0x0, - STATUS_APM_MASK = 0x1, - STATUS_LAPC_MASK = 0x2 - }; - - static const std::pair map_pairs[] = { - std::make_pair("flash", FLASH), - std::make_pair("program", PROGRAM), - std::make_pair("clock", CLOCK), - std::make_pair("boot", BOOT), - std::make_pair("help", HELP), - std::make_pair("query", QUERY), - std::make_pair("reset", RESET), - std::make_pair("run", RUN), - std::make_pair("fan", FAN), - std::make_pair("dmatest", DMATEST), - std::make_pair("list", LIST), - std::make_pair("mem", MEM), - std::make_pair("status", STATUS) - }; - - static const std::pair subcmd_pairs[] = { - std::make_pair("read", MEM_READ), - std::make_pair("write", MEM_WRITE), - std::make_pair("apm", STATUS_APM), - std::make_pair("lapc", STATUS_LAPC) - }; - - - static const std::map commandTable(map_pairs, map_pairs + sizeof(map_pairs) / sizeof(map_pairs[0])); - - class device { - unsigned int m_idx; - xclDeviceHandle m_handle; - xclDeviceInfo2 m_devinfo; - bool m_multiclock; - - public: - device(unsigned int idx, const char* log) : m_idx(idx), m_handle(nullptr), m_devinfo{}, - m_multiclock(false) { - m_handle = xclOpen(m_idx, log, XCL_QUIET); - if (!m_handle) - throw std::runtime_error("Failed to open device index, " + std::to_string(m_idx)); - if (xclGetDeviceInfo2(m_handle, &m_devinfo)) - throw std::runtime_error("Unable to query device index, " + std::to_string(m_idx)); -// const unsigned id = (m_devinfo.mDeviceId << 16) | m_devinfo.mSubsystemId; - m_multiclock = true; - } - - bool isMultipleOCLClockSupported() { return m_multiclock; } - - device(device&& rhs) : m_idx(rhs.m_idx), m_handle(rhs.m_handle), m_devinfo(std::move(rhs.m_devinfo)) { - } - - device(const device &dev) = delete; - device& operator=(const device &dev) = delete; - - ~device() { - xclClose(m_handle); - } - - const char *name() const { - return m_devinfo.mName; - } - - int flash(const std::string& mcs1, const std::string& mcs2) { - return xclUpgradeFirmware2(m_handle, mcs1.c_str(), mcs2.c_str()); - } - - int reclock2(unsigned regionIndex, const unsigned short *freq) { - return xclReClock2(m_handle, 2, freq); - } - - std::string parseCUStatus(unsigned val) const { - char delim = '('; - std::string status; - if (val & 0x1) { - status += delim; - status += "START"; - delim = '|'; - } - if (val & 0x2) { - status += delim; - status += "DONE"; - delim = '|'; - } - if (val & 0x4) { - status += delim; - status += "IDLE"; - delim = '|'; - } - if (val & 0x8) { - status += delim; - status += "READY"; - delim = '|'; - } - if (val & 0x10) { - status += delim; - status += "RESTART"; - delim = '|'; - } - if (status.size()) - status += ')'; - else if (val == 0x0) - status = "(--)"; - else - status = "(?\?)"; - return status; - } - - std::ostream& dump(std::ostream& ostr) const { - ostr << "DSA name: " << m_devinfo.mName << "\n"; - ostr << "HAL ver: " << m_devinfo.mHALMajorVersion << "." << m_devinfo.mHALMinorVersion << "\n"; - ostr << "Vendor: " << std::hex << m_devinfo.mVendorId << std::dec << "\n"; - ostr << "Device: " << std::hex << m_devinfo.mDeviceId << std::dec << "\n"; - ostr << "Device ver: " << m_devinfo.mDeviceVersion << "\n"; - ostr << "SDevice: " << std::hex << m_devinfo.mSubsystemId << std::dec << "\n"; - ostr << "SVendor: " << std::hex << m_devinfo.mSubsystemVendorId << std::dec << "\n"; - ostr << "DDR size: " << "0x" << std::hex << m_devinfo.mDDRSize/1024 << std::dec << " KB\n"; - ostr << "DDR count: " << m_devinfo.mDDRBankCount << "\n"; - ostr << "Data alignment: " << m_devinfo.mDataAlignment << "\n"; - ostr << "DDR free size: " << "0x" << std::hex << m_devinfo.mDDRFreeSize/1024 << std::dec << " KB\n"; - ostr << "Min xfer size: " << m_devinfo.mMinTransferSize << "\n"; - ostr << "OnChip Temp: " << m_devinfo.mOnChipTemp << " C\n"; - //ostr << "Fan Temp: " << m_devinfo.mFanTemp<< " C\n"; - ostr << "VCC INT: " << m_devinfo.mVInt << " mV\n"; - ostr << "VCC AUX: " << m_devinfo.mVAux << " mV\n"; - ostr << "VCC BRAM: " << m_devinfo.mVBram << " mV\n"; - if (m_multiclock) { - ostr << "OCL freq1: " << m_devinfo.mOCLFrequency[0] << " MHz\n"; - ostr << "OCL freq2: " << m_devinfo.mOCLFrequency[1] << " MHz\n"; - } - else { - ostr << "OCL freq: " << m_devinfo.mOCLFrequency[0] << " MHz\n"; - } - ostr << "PCIe: " << "GEN" << m_devinfo.mPCIeLinkSpeed << " x " << m_devinfo.mPCIeLinkWidth << "\n"; - ostr << "DMA threads: " << m_devinfo.mDMAThreads << "\n"; - //ostr << "Fan Speed: " << m_devinfo.mFanSpeed << "\n"; - ostr << "MIG Calibrated: " << std::boolalpha << m_devinfo.mMigCalib << std::noboolalpha << "\n"; - - ostr << "CU Status:\n"; - unsigned buf[16]; - for (unsigned i = 0; i < 4; i++) { - xclRead(m_handle, XCL_ADDR_KERNEL_CTRL, i * 4096, static_cast(buf), 16); - ostr << " " << std::setw(7) << i << ": 0x" << std::hex << buf[0] << std::dec << " " << parseCUStatus(buf[0]) << "\n"; - } - return ostr; - } - - int program(const std::string& xclbin, unsigned region) { - std::ifstream stream(xclbin.c_str()); - - if(!stream.is_open()) { - std::cout << "ERROR: Cannot open " << xclbin << ". Check that it exists and is readable." << std::endl; - return -ENOENT; - } - - char temp[8]; - stream.read(temp, 8); - if (std::strncmp(temp, "xclbin0", 8) && std::strncmp(temp, "xclbin2", 8)) - return -EINVAL; - - stream.seekg(0, stream.end); - int length = stream.tellg(); - stream.seekg(0, stream.beg); - - char *buffer = new char[length]; - stream.read(buffer, length); - const xclBin *header = (const xclBin *)buffer; - int result = xclLockDevice(m_handle); - if (result) - return result; - result = xclLoadXclBin(m_handle, header); - delete [] buffer; - return result; - } - - int boot() { - return xclBootFPGA(m_handle); - } - - int reset(unsigned region) { - return xclResetDevice(m_handle, XCL_RESET_KERNEL); - } - - int run(unsigned region, unsigned cu) { - std::cout << "ERROR: Not implemented\n"; - return -1; - } - - int fan(unsigned speed) { - std::cout << "ERROR: Not implemented\n"; - return -1; - } - - int dmatest(unsigned long long blockSize) { - void *buf = 0; - if (posix_memalign(&buf, 4096, blockSize)) - return -1; - std::memset(buf, 0, blockSize); - - double bw = m_devinfo.mDDRSize; - bw /= 0x100000; // Convert to MB - - // Use plain POSIX open/pwrite/close. - // std::ofstream causes libstdc++ to use AIO with xcldma on CentOS 6.x (but not on Ubuntu 15.10) - std::string baseName("/dev/xdma"); - baseName += std::to_string(m_idx); - baseName += "_h2c_0"; - int fd = open(baseName.c_str(), O_WRONLY); - if (fd < 0) { - std::cout << "Unable to open device node " << baseName << "\n"; - return -1; - } - std::cout << "INFO: Zeroing DDR with " << blockSize/1024 << " KB blocks using DMA channel 0 ...\n"; - ssize_t count = 0; - Timer tim; - for (uint64_t phy = 0; phy < m_devinfo.mDDRSize; phy += blockSize) { - count += pwrite(fd, (const char *)buf, blockSize, phy); - } - double elapsed = tim.stop(); - close(fd); - bw /= elapsed; - bw *= 1000000; // Convert from microseconds to seconds - if (count != (ssize_t)m_devinfo.mDDRSize) { - std::cout << "DMA error\n"; - return -1; - } - std::cout << "INFO: Host -> PCIe -> MIG write bandwidth " << bw << " MB/s\n"; - - baseName.pop_back(); - baseName += "1"; - bw = m_devinfo.mDDRSize; - bw /= 0x100000; // Convert to MB - - // Use plain POSIX open/pwrite/close. - // std::ofstream causes libstdc++ to use AIO with xcldma on CentOS 6.x (but not on Ubuntu 15.10) - fd = open(baseName.c_str(), O_WRONLY); - if (fd < 0) { - std::cout << "Unable to open device node " << baseName << "\n"; - return -1; - } - - std::cout << "INFO: Zeroing DDR with " << blockSize/1024 << " KB blocks using DMA channel 1 ...\n"; - count = 0; - tim.reset(); - for (uint64_t phy = 0; phy < m_devinfo.mDDRSize; phy += blockSize) { - count += pwrite(fd, (const char *)buf, blockSize, phy); - } - elapsed = tim.stop(); - close(fd); - bw /= elapsed; - bw *= 1000000; // Convert from microseconds to seconds - if (count != (ssize_t)m_devinfo.mDDRSize) { - std::cout << "DMA error\n"; - return -1; - } - std::cout << "INFO: Host -> PCIe -> MIG write bandwidth " << bw << " MB/s\n"; - - // Now read bandwidth - bw = m_devinfo.mDDRSize; - bw /= 0x100000; // Convert to MB - baseName.erase(baseName.size() - 6); - baseName += "_c2h_0"; - fd = open(baseName.c_str(), O_RDONLY); - if (fd < 0) { - std::cout << "Unable to open device node " << baseName << "\n"; - return -1; - } - std::cout << "INFO: Reading back " << blockSize/1024 << " KB blocks from DDR using DMA channel 0 ...\n"; - count = 0; - tim.reset(); - for (uint64_t phy = 0; phy < m_devinfo.mDDRSize; phy += blockSize) { - count += pread(fd, (char *)buf, blockSize, phy); - } - elapsed = tim.stop(); - close(fd); - bw /= elapsed; - bw *= 1000000; // Convert from microseconds to seconds - if (count != (ssize_t)m_devinfo.mDDRSize) { - std::cout << "DMA error\n"; - return -1; - } - std::cout << "INFO: Host <- PCIe <- MIG read bandwidth " << bw << " MB/s\n"; - - baseName.pop_back(); - baseName += "1"; - bw = m_devinfo.mDDRSize; - bw /= 0x100000; // Convert to MB - - // Use plain POSIX open/pwrite/close. - // std::ofstream causes libstdc++ to use AIO with xcldma on CentOS 6.x (but not on Ubuntu 15.10) - fd = open(baseName.c_str(), O_RDONLY); - if (fd < 0) { - std::cout << "Unable to open device node " << baseName << "\n"; - return -1; - } - - std::cout << "INFO: Reading back " << blockSize/1024 << " KB blocks from DDR using DMA channel 1 ...\n"; - count = 0; - tim.reset(); - for (uint64_t phy = 0; phy < m_devinfo.mDDRSize; phy += blockSize) { - count += pread(fd, (char *)buf, blockSize, phy); - } - elapsed = tim.stop(); - close(fd); - bw /= elapsed; - bw *= 1000000; // Convert from microseconds to seconds - if (count != (ssize_t)m_devinfo.mDDRSize) { - std::cout << "DMA error\n"; - return -1; - } - std::cout << "INFO: Host <- PCIe <- MIG read bandwidth " << bw << " MB/s\n"; - - free(buf); - return 0; - } - int memread(std::string aFilename, unsigned long long aStartAddr = 0, unsigned long long aSize = 0) { - std::string baseName("/dev/xdma"); - baseName += std::to_string(m_idx); - return memaccess(baseName, m_devinfo.mDDRSize, m_devinfo.mDataAlignment).read(aFilename, aStartAddr, aSize); - } - - int memwrite(unsigned long long aStartAddr, unsigned long long aSize, unsigned int aPattern) { - std::string baseName("/dev/xdma"); - baseName += std::to_string(m_idx); - return memaccess(baseName, m_devinfo.mDDRSize, m_devinfo.mDataAlignment).write(aStartAddr, aSize, aPattern); - } - - int readAPMCounters() { - static const char* apmSlotNames [XAPM_MAX_NUMBER_SLOTS] = { - "OCL Master-0", - "XDMA ", - "OCL Master-1", - "OCL Master-2", - "OCL Master-3", - "Reserved ", - "Reserved ", - "Reserved " - }; - xclDebugCountersResults debugResults = {0}; - xclDebugReadIPStatus(m_handle, XCL_DEBUG_READ_TYPE_APM, &debugResults); - std::cout << "APM Counters\n"; - std::cout << "Slot " << std::setw(20) << " Write Bytes" << std::setw(16) << " Write Trans." << std::setw(16) << " Read Bytes" << std::setw(16) << " Read Tranx." << std::endl; - for (int i = 0; i()); - std::cout << " Other violations: \n"; - std::cout << " " << xclAXICheckerCodes::decodeAXICheckerCodes(tCummStatus); - violations_found = true; - } - } - if (!violations_found && !invalid_codes) { - std::cout << "No AXI violations found \n"; - } - if (violations_found && aVerbose && !invalid_codes) { - std::cout << "\n"; - std::cout << "Slot " << std::setw(20) << "Overall Status" << std::setw(16) << "Snapshot0" << std::setw(16) << " Snapshot1" << std::setw(16) << " Snapshot2" << std::setw(16) << " Snapshot3"; - std::cout << std::setw(16) << " Cumulative0" << std::setw(16) << " Cumulative1 " << std::setw(16) << " Cumulative2" << std::setw(16) << " Cumulative3" << std::endl; - for (int i = 0; i [options]\n\n"; - std::cout << "Command and option summary:\n"; - std::cout << " boot [-d device]\n"; - std::cout << " clock [-d device] [-r region] [-f clock1_freq_MHz] [-g clock2_freq_MHz]\n"; - std::cout << " dmatest [-d device] [-b [0x]block_size_KB]\n"; - std::cout << " mem --read [-d device] [-a [0x]start_addr] [-i size_bytes] [-o output filename]\n"; - std::cout << " mem --write [-d device] [-a [0x]start_addr] [-i size_bytes] [-e pattern_byte]\n"; -// std::cout << " fan [-d device] -s speed\n"; - std::cout << " flash [-d device] -m primary_mcs [-n secondary_mcs]\n"; - std::cout << " help\n"; - std::cout << " list\n"; - std::cout << " program [-d device] [-r region] -p xclbin\n"; - std::cout << " query [-d device [-r region]]\n"; - std::cout << " reset [-d device] [-r region]\n"; - std::cout << " status --apm\n"; - std::cout << " status --lapc\n"; -// std::cout << " run -d device [-r region] -c compunit\n"; TODO - std::cout << "\nExamples:\n"; - std::cout << "List all devices\n"; - std::cout << " " << exe << " list\n"; - std::cout << "Boot device 1 from PROM and retrain the PCIe link without rebooting the host\n"; - std::cout << " " << exe << " boot -d 1\n"; - std::cout << "Change the clock frequency of region 0 in device 0 to 100 MHz\n"; - std::cout << " " << exe << " clock -f 100\n"; - std::cout << "For device 0 which supports multiple clocks, change the clock 1 to 200MHz and clock 2 to 250MHz\n"; - std::cout << " " << exe << " clock -f 200 -g 250\n"; - std::cout << "Download the accelerator program for device 2\n"; - std::cout << " " << exe << " program -d 2 -p a.xclbin\n"; - std::cout << "Run DMA test on device 1 with 32 KB blocks of buffer\n"; - std::cout << " " << exe << " dmatest -d 1 -b 0x20\n"; - std::cout << "Read 256 bytes from DDR starting at 0x1000 into file read.out\n"; - std::cout << " " << exe << " mem --read -a 0x1000 -i 256 -o read.out\n"; - std::cout << " " << "Default values for address is 0x0, size is DDR size and file is memread.out\n"; - std::cout << "Write 256 bytes to DDR starting at 0x1000 with byte 0xaa \n"; - std::cout << " " << exe << " mem --write -a 0x1000 -i 256 -e 0xaa\n"; - std::cout << " " << "Default values for address is 0x0, size is DDR size and pattern is 0x0\n"; - std::cout << "Read AXI Performance Monitor counters on the base platform (applicable only if APMs are available on base platform)\n"; - std::cout << " " << exe << " status --apm\n"; - std::cout << "Read AXI violation codes detected by Light weight AXI Protocol Checker (applicable only if LAPC IP available on base platform)\n"; - std::cout << " " << exe << " status --lapc\n"; - } -} - +#include "awssak.h" int main(int argc, char *argv[]) { - unsigned index = 0xffffffff; - unsigned regionIndex = 0xffffffff; - unsigned computeIndex = 0xffffffff; - unsigned short targetFreq[2] = {0, 0}; - unsigned fanSpeed = 0; - unsigned long long startAddr = 0; - unsigned int pattern_byte = 0; - size_t sizeInBytes = 0; - std::string outMemReadFile = "memread.out"; - std::string mcsFile1, mcsFile2; - std::string xclbin; - unsigned long long blockSize = 0x4000000; - - int c; - - const std::string exe(argv[0]); - if (argc == 1) { - xcldev::printHelp(exe); - return 1; - } - - argv++; - const auto v = xcldev::commandTable.find(*argv); - if (v == xcldev::commandTable.end()) { - std::cout << "ERROR: Unknown comand \'" << *argv << "\'\n"; - xcldev::printHelp(exe); - return 1; - } - - const xcldev::command cmd = v->second; - std::string cmdname = v->first; - xcldev::subcommand subcmd = xcldev::MEM_READ; - unsigned int ipmask = static_cast(xcldev::STATUS_NONE_MASK); - argc--; - - if (cmd == xcldev::HELP) { - xcldev::printHelp(exe); - return 1; - } - - argv[0] = const_cast(exe.c_str()); - static struct option long_options[] = { - {"read", no_argument, 0, xcldev::MEM_READ}, - {"write", no_argument, 0, xcldev::MEM_WRITE}, - {"apm", no_argument, 0, xcldev::STATUS_APM}, - {"lapc", no_argument, 0, xcldev::STATUS_LAPC} - }; - int long_index; - const char* short_options = "a:d:e:i:r:p:f:g:m:n:c:s:b:o:"; //don't add numbers - while ((c = getopt_long(argc, argv, short_options, long_options, &long_index)) != -1) - { - if (cmd == xcldev::LIST) { - std::cout << "ERROR: 'list' command does not accept any options\n"; - return -1; - } - switch (c) - { - //Deal with long options. Long options return the value set in option::val - case xcldev::MEM_READ : { - //--read - if (cmd != xcldev::MEM) { - std::cout << "ERROR: Option '" << long_options[long_index].name << "' cannot be used with command " << cmdname << "\n"; - return -1; - } - subcmd = xcldev::MEM_READ; - break; - } - case xcldev::MEM_WRITE : { - //--write - if (cmd != xcldev::MEM) { - std::cout << "ERROR: Option '" << long_options[long_index].name << "' cannot be used with command " << cmdname << "\n"; - return -1; - } - subcmd = xcldev::MEM_WRITE; - break; - } - case xcldev::STATUS_APM : { - //--apm - if (cmd != xcldev::STATUS) { - std::cout << "ERROR: Option '" << long_options[long_index].name << "' cannot be used with command " << cmdname << "\n"; - return -1; - } - ipmask |= static_cast(xcldev::STATUS_APM_MASK); - break; - } - case xcldev::STATUS_LAPC : { - //--lapc - if (cmd != xcldev::STATUS) { - std::cout << "ERROR: Option '" << long_options[long_index].name << "' cannot be used with command " << cmdname << "\n"; - return -1; - } - ipmask |= static_cast(xcldev::STATUS_LAPC_MASK); - break; - } - //short options are dealt here - case 'a':{ - if (cmd != xcldev::MEM) { - std::cout << "ERROR: '-a' not applicable for this command\n"; - return -1; - } - size_t idx = 0; - try { - startAddr = std::stoll(optarg, &idx, 0); - } - catch (const std::exception& ex) { - //out of range, invalid argument ex - std::cout << "ERROR: Value supplied to -" << (char)c << " option is invalid\n"; - return -1; - } - if (idx < strlen(optarg)) { - std::cout << "ERROR: Value supplied to -" << (char)c << " option is invalid\n"; - return -1; - } - break; - } - case 'o': - if (cmd != xcldev::MEM || subcmd != xcldev::MEM_READ) { - std::cout << "ERROR: '-o' not applicable for this command\n"; - return -1; - } - outMemReadFile = optarg; - break; - case 'e': { - if (cmd != xcldev::MEM || subcmd != xcldev::MEM_WRITE) { - std::cout << "ERROR: '-e' not applicable for this command\n"; - return -1; - } - size_t idx = 0; - try { - pattern_byte = std::stoi(optarg, &idx, 0); - } - catch (const std::exception& ex) { - //out of range, invalid argument ex - std::cout << "ERROR: Value supplied to -" << (char)c << " option must be a value between 0 and 255\n"; - return -1; - } - if (pattern_byte > 0xff || idx < strlen(optarg)) { - std::cout << "ERROR: Value supplied to -" << (char)c << " option must be a value between 0 and 255\n"; - return -1; - } - break; - } - case 'i': { - if (cmd != xcldev::MEM) { - std::cout << "ERROR: '-i' not applicable for this command\n"; - return -1; - } - size_t idx = 0; - try { - sizeInBytes = std::stoll(optarg, &idx, 0); - } - catch (const std::exception& ex) { - //out of range, invalid argument ex - std::cout << "ERROR: Value supplied to -" << (char)c << " option is invalid\n"; - return -1; - } - if (idx < strlen(optarg)) { - std::cout << "ERROR: Value supplied to -" << (char)c << " option is invalid\n"; - return -1; - } - break; - } - case 'd': - index = std::atoi(optarg); - break; - case 'r': - if ((cmd == xcldev::FLASH) || (cmd == xcldev::BOOT) || (cmd == xcldev::DMATEST)) { - std::cout << "ERROR: '-r' not applicable for this command\n"; - return -1; - } - regionIndex = std::atoi(optarg); - break; - case 'p': - if (cmd != xcldev::PROGRAM) { - std::cout << "ERROR: '-p' only allowed with 'program' command\n"; - return -1; - } - xclbin = optarg; - break; - case 'f': - if (cmd != xcldev::CLOCK) { - std::cout << "ERROR: '-f' only allowed with 'clock' command\n"; - return -1; - } - targetFreq[0] = std::atoi(optarg); - break; - case 'g': - if (cmd != xcldev::CLOCK) { - std::cout << "ERROR: '-g' only allowed with 'clock' command\n"; - return -1; - } - targetFreq[1] = std::atoi(optarg); - break; - case 'm': - if (cmd != xcldev::FLASH) { - std::cout << "ERROR: '-m' only allowed with 'flash' command\n"; - return -1; - } - mcsFile1 = optarg; - break; - case 'n': - if (cmd != xcldev::FLASH) { - std::cout << "ERROR: '-n' only allowed with 'flash' command\n"; - return -1; - } - mcsFile2 = optarg; - break; - case 'c': - if (cmd != xcldev::RUN) { - std::cout << "ERROR: '-c' only allowed with 'run' command\n"; - return -1; - } - computeIndex = std::atoi(optarg); - break; - case 's': - if (cmd != xcldev::FAN) { - std::cout << "ERROR: '-s' only allowed with 'fan' command\n"; - return -1; - } - fanSpeed = std::atoi(optarg); - break; - case 'b': - { - if (cmd != xcldev::DMATEST) { - std::cout << "ERROR: '-b' only allowed with 'dmatest' command\n"; - return -1; - } - std::string tmp(optarg); - if ((tmp[0] == '0') && (std::tolower(tmp[1]) == 'x')) { - blockSize = std::stoll(tmp, 0, 16); - } - else { - blockSize = std::stoll(tmp, 0, 10); - } - - if (blockSize & (blockSize - 1)) { - std::cout << "ERROR: block size should be power of 2\n"; - return -1; - } - blockSize *= 1024; // convert kilo bytes to bytes - break; - } - default: - xcldev::printHelp(exe); - return 1; - } - } - - if (optind != argc) { - std::cout << "ERROR: Illegal command \'" << argv[optind++] << "\'\n"; - } - - if (index == 0xffffffff) index = 0; - - if (regionIndex == 0xffffffff) regionIndex = 0; - - switch (cmd) { - case xcldev::FLASH: - { - if (mcsFile1.size() == 0) { - std::cout << "ERROR: Please specify mcs file with '-m' switch\n"; - return -1; - } - break; - } - case xcldev::BOOT: - case xcldev::RUN: - case xcldev::FAN: - case xcldev::DMATEST: - case xcldev::QUERY: - case xcldev::STATUS: - break; - case xcldev::PROGRAM: - { - if (xclbin.size() == 0) { - std::cout << "ERROR: Please specify xclbin file with '-p' switch\n"; - return -1; - } - break; - } - case xcldev::CLOCK: - { - if (!targetFreq[0] && !targetFreq[1]) { - std::cout << "ERROR: Please specify frequency(ies) with '-f' and or '-g' switch(es)\n"; - return -1; - } - break; - } - default: - break; - } - - std::vector> deviceVec; - - try { - unsigned int count = xclProbe(); - if (count == 0) { - std::cout << "ERROR: No devices found\n"; - return 1; - } - - for (unsigned i = 0; i < count; i++) { - deviceVec.emplace_back(new xcldev::device(i, nullptr)); - } - } - catch (const std::exception& ex) { - std::cout << ex.what() << std::endl; - return 1; - } - - std::cout << "INFO: Found " << deviceVec.size() << " device(s)\n"; - - if (cmd == xcldev::LIST) { - for (unsigned i = 0; i < deviceVec.size(); i++) { - std::cout << '[' << i << "] " << deviceVec[i]->name() << std::endl; - } - return 0; - } - - if (index >= deviceVec.size()) { - std::cout << "ERROR: Device index " << index << " out of range\n"; - return 1; - } - - int result = 0; - - switch (cmd) - { - case xcldev::BOOT: - result = deviceVec[index]->boot(); - break; - case xcldev::CLOCK: - result = deviceVec[index]->reclock2(regionIndex, targetFreq); - break; - case xcldev::FAN: - result = deviceVec[index]->fan(fanSpeed); - break; - case xcldev::FLASH: - result = deviceVec[index]->flash(mcsFile1, mcsFile2); - break; - case xcldev::PROGRAM: - result = deviceVec[index]->program(xclbin, regionIndex); - break; - case xcldev::QUERY: - deviceVec[index]->dump(std::cout); - break; - case xcldev::RESET: - result = deviceVec[index]->reset(regionIndex); - break; - case xcldev::RUN: - result = deviceVec[index]->run(regionIndex, computeIndex); - break; - case xcldev::DMATEST: - result = deviceVec[index]->dmatest(blockSize); - break; - case xcldev::MEM: - if (subcmd == xcldev::MEM_READ) { - result = deviceVec[index]->memread(outMemReadFile, startAddr, sizeInBytes); - } - else if (subcmd == xcldev::MEM_WRITE) { - result = deviceVec[index]->memwrite(startAddr, sizeInBytes, pattern_byte); - } - break; - case xcldev::STATUS: - if (ipmask == xcldev::STATUS_NONE_MASK) { - //if no ip specified then read all - ipmask = static_cast(xcldev::STATUS_APM_MASK); - if (!(getuid() && geteuid())) { - ipmask |= static_cast(xcldev::STATUS_LAPC_MASK); - } - } - if (ipmask & static_cast(xcldev::STATUS_APM_MASK)) { - result = deviceVec[index]->readAPMCounters(); - } - if (ipmask & static_cast(xcldev::STATUS_LAPC_MASK)) { - result = deviceVec[index]->readLAPCheckers(1); - } - break; - default: - std::cout << "ERROR: Not implemented\n"; - result = -1; - } - - if(result == 0) { - std::cout << "INFO: xbsak " << v->first << " successful." << std::endl; - } else { - std::cout << "ERROR: xbsak " << v->first << " failed." << std::endl; - } - - return result; + return xcldev::xclXbsak(argc, argv); } diff --git a/SDAccel/tools/awssak/memaccess.h b/SDAccel/tools/awssak/memaccess.h deleted file mode 100644 index b85ed53f..00000000 --- a/SDAccel/tools/awssak/memaccess.h +++ /dev/null @@ -1,159 +0,0 @@ -/** - * Copyright (C) 2017-2018 Xilinx, Inc - * Author: Sonal Santan - * Simple command line utility to inetract with SDX PCIe devices - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ -#include -#include -#include -#include -#include -#include - -#include "xclhal.h" -namespace xcldev { - class memaccess { - std::string mDevName; - size_t mDDRSize, mDataAlignment; - public: - memaccess(std::string aDevName, size_t aDDRSize, size_t aDataAlignment) : - mDevName(aDevName), mDDRSize(aDDRSize), mDataAlignment (aDataAlignment) {} - - int read(std::string aFilename, unsigned long long aStartAddr = 0, unsigned long long aSize = 0) { - void *buf = 0; - unsigned long long size; - unsigned long long blockSize = 0x20000; - if (posix_memalign(&buf, 4096, blockSize)) - return -1; - std::memset(buf, 0, blockSize); - - //sanity check - if (aStartAddr > mDDRSize) { - std::cout << "Start address " << std::hex << aStartAddr << - " is greater than device memory " << std::hex << mDDRSize << std::endl; - return -1; - } - //sanity check - if (aSize > mDDRSize || aStartAddr+aSize > mDDRSize) { - std::cout << "Read size " << std::dec << aSize << " from address 0x" << std::hex << aStartAddr << - " goes beyond the device memory" << std::endl; - return -1; - } - - unsigned long long endAddr = aSize == 0 ? mDDRSize : aStartAddr+aSize; - - size = endAddr-aStartAddr; - std::ofstream outFile(aFilename, std::ofstream::out | std::ofstream::binary); - - // Use plain POSIX open/pwrite/close. - std::string baseName = mDevName; - baseName += "_c2h_0"; - int fd = open(baseName.c_str(), O_RDONLY); - if (fd < 0) { - std::cout << "Unable to open device node " << baseName << "\n"; - return -1; - } - size_t count = size; - uint64_t incr; - size_t nRead = 0; - for (uint64_t phy = aStartAddr; phy < aStartAddr+size; phy += incr) { - incr = (count >= blockSize) ? blockSize : count; - nRead = pread(fd, (char *)buf, incr, phy); - if (nRead == (size_t)-1) { - //error - std::cout << "Error (" << strerror (errno) << ") reading " << incr << " bytes from DDR at offset " << std::hex << phy << std::dec << "\n"; - return -1; - } - count -= nRead; - if (nRead) { - outFile.write((const char*)(char*)buf, nRead); - if ((outFile.rdstate() & std::ifstream::failbit) != 0) { - std::cout << "Error writing to file \n"; - } - } - std::cout << "INFO: Read block 0x" << std::hex << nRead << " total 0x" < mDDRSize) { - std::cout << "Start address " << std::hex << aStartAddr << - " is greater than device memory " << std::hex << mDDRSize << std::endl; - return -1; - } - //sanity check - if (aSize > mDDRSize || aStartAddr+aSize > mDDRSize) { - std::cout << "Read size " << std::dec << aSize << " from address 0x" << std::hex << aStartAddr << - " goes beyond the device memory" << std::endl; - return -1; - } - - endAddr = aSize == 0 ? mDDRSize : aStartAddr + aSize; - size = endAddr-aStartAddr; - - // Use plain POSIX open/pwrite/close. - std::string baseName = mDevName; - baseName += "_h2c_0"; - - std::cout << "INFO: Writing DDR with " << std::dec << size << " bytes of pattern: 0x" - << std::hex << aPattern << " from address 0x" <= blockSize) ? blockSize : count; - size_t nWrite = pwrite(wfd, (const char *)buf, incr, phy); - if (nWrite == (size_t)-1) { - //error - std::cout << "Error (" << strerror (errno) << ") writing " << incr << " bytes to DDR at offset " << std::hex << phy << std::dec << "\n"; - return -1; - } - count -= nWrite; - } - - close(wfd); - if (count != 0) { - std::cout << "Error! Written " << std::dec << size-count << " bytes, requested " << size << std::endl; - return -1; - } - return count; - } - }; -} diff --git a/SDAccel/tools/awssak2/LICENSE-2.0.txt b/SDAccel/tools/awssak2/LICENSE-2.0.txt deleted file mode 100644 index d6456956..00000000 --- a/SDAccel/tools/awssak2/LICENSE-2.0.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/SDAccel/tools/awssak2/Makefile b/SDAccel/tools/awssak2/Makefile deleted file mode 100644 index 391ad587..00000000 --- a/SDAccel/tools/awssak2/Makefile +++ /dev/null @@ -1,71 +0,0 @@ -# Amazon FPGA Hardware Development Kit -# -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Amazon Software License (the "License"). You may not use -# this file except in compliance with the License. A copy of the License is -# located at -# -# http://aws.amazon.com/asl/ -# -# or in the "license" file accompanying this file. This file is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or -# implied. See the License for the specific language governing permissions and -# limitations under the License. - -EXENAME := xbsak - -CXX_EXT := cpp -CL_EXT := cl - -AR := ar -CXX := g++ - -CXXFLAGS := -Wall -Werror -std=c++11 - -ROOT = $(SDACCEL_DIR) -HAL_INC := -I$(SDACCEL_DIR)/userspace/src2 -I$(SDACCEL_DIR)/userspace/include -I$(SDK_DIR)/userspace/include -I$(SDK_DIR)/linux_kernel_drivers - -CXXFLAGS += $(HAL_INC) - -ifeq ($(ec2),1) -AWS_HAL_LIBNAME := $(ROOT)/userspace/src2/libxrt-aws.a -else -AWS_HAL_LIBNAME := $(ROOT)/userspace/src2/libxrtbm-aws.a -CXXFLAGS += -DINTERNAL_RELEASE -endif - -ifeq ($(debug),1) - CXXFLAGS += -g -D_DEBUG -else - CXXFLAGS += -O2 -DNDEBUG -endif - -SRCS := $(wildcard *.$(CXX_EXT)) -OBJS := $(patsubst %.$(CXX_EXT), %.o, $(SRCS)) - --include $(OBJS:.o=.d) - -AWS_FPGA_MGMTLIB := fpga_mgmt -AWS_FPGA_MGMTLIB_DIR := $(SDK_DIR)/userspace/lib - -ifeq ($(ec2),1) -LDFLAGS += -L$(AWS_FPGA_MGMTLIB_DIR) -LDLIBS += -l$(AWS_FPGA_MGMTLIB) -endif - -all : $(EXENAME) - -%.o: %.$(CXX_EXT) - $(CXX) $(CXXFLAGS) $(MYCFLAGS) $(MYCXXFLAGS) -c $< -o $@ - $(CXX) $(CXXFLAGS) $(MYCFLAGS) $(MYCXXFLAGS) -c -MM $< -o $(patsubst %.o, %.d, $@) - -$(EXENAME): $(OBJS) $(AWS_HAL_LIBNAME) - $(CXX) -o $@ $(OBJS) $(AWS_HAL_LIBNAME) $(LDFLAGS) $(LDLIBS) -lrt -pthread - -clean: - rm -rf *.o *.d $(EXENAME) - -.PHONY: all - -.DEFAULT_GOAL := all diff --git a/SDAccel/tools/awssak2/main.cpp b/SDAccel/tools/awssak2/main.cpp deleted file mode 100644 index 09b96f57..00000000 --- a/SDAccel/tools/awssak2/main.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/** - * Copyright (C) 2017-2018 Xilinx, Inc - * Author: Sonal Santan, Ryan Radjabi - * Simple command line utility to inetract with SDX PCIe devices - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -#include "awssak.h" - -int main(int argc, char *argv[]) -{ - return xcldev::xclXbsak(argc, argv); -} diff --git a/SDAccel/userspace/include/xcl_app_debug.h b/SDAccel/userspace/include/xcl_app_debug.h index 71d5c20b..0c3fb604 100644 --- a/SDAccel/userspace/include/xcl_app_debug.h +++ b/SDAccel/userspace/include/xcl_app_debug.h @@ -1,8 +1,23 @@ /** - * Copyright (C) 2015-2018 Xilinx, Inc + * Copyright (C) 2016-2018 Xilinx, Inc * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +/** * Xilinx SDAccel HAL userspace driver extension APIs * Performance Monitoring Exposed Parameters + * Copyright (C) 2015-2018, Xilinx Inc - All rights reserved * * Licensed under the Apache License, Version 2.0 (the "License"). You may * not use this file except in compliance with the License. A copy of the @@ -24,13 +39,14 @@ extern "C" { #endif -/************************ APM Debug Counters ********************************/ -#define XAPM_DEBUG_METRIC_COUNTERS_PER_SLOT 4 //debug is only interested in 4 metric counters +/************************ SPM Debug Counters ********************************/ +//debug is only interested in 4 metric counters: wb,wt,rb,rt,outstanding,lwa,lwd,lra,lrd +#define XSPM_DEBUG_SAMPLE_COUNTERS_PER_SLOT 9 /* * LAPC related defs here */ -#define XLAPC_MAX_NUMBER_SLOTS 4 +#define XLAPC_MAX_NUMBER_SLOTS 31 #define XLAPC_STATUS_PER_SLOT 9 /* Metric counters per slot */ @@ -47,15 +63,24 @@ extern "C" { /********************** Definitions: Enums, Structs ***************************/ enum xclDebugReadType { XCL_DEBUG_READ_TYPE_APM = 0, - XCL_DEBUG_READ_TYPE_LAPC = 1 + XCL_DEBUG_READ_TYPE_LAPC = 1, + XCL_DEBUG_READ_TYPE_SPM = 2 }; /* Debug counter results */ typedef struct { - unsigned int WriteBytes[XAPM_MAX_NUMBER_SLOTS]; - unsigned int WriteTranx[XAPM_MAX_NUMBER_SLOTS]; - unsigned int ReadBytes[XAPM_MAX_NUMBER_SLOTS]; - unsigned int ReadTranx[XAPM_MAX_NUMBER_SLOTS]; + unsigned int WriteBytes[XSPM_MAX_NUMBER_SLOTS]; + unsigned int WriteTranx[XSPM_MAX_NUMBER_SLOTS]; + unsigned int ReadBytes[XSPM_MAX_NUMBER_SLOTS]; + unsigned int ReadTranx[XSPM_MAX_NUMBER_SLOTS]; + + unsigned int OutStandCnts[XSPM_MAX_NUMBER_SLOTS]; + unsigned int LastWriteAddr[XSPM_MAX_NUMBER_SLOTS]; + unsigned int LastWriteData[XSPM_MAX_NUMBER_SLOTS]; + unsigned int LastReadAddr[XSPM_MAX_NUMBER_SLOTS]; + unsigned int LastReadData[XSPM_MAX_NUMBER_SLOTS]; + unsigned int NumSlots; + char DevUserName[256]; } xclDebugCountersResults; enum xclCheckerType { @@ -67,6 +92,8 @@ typedef struct { unsigned int OverallStatus[XLAPC_MAX_NUMBER_SLOTS]; unsigned int CumulativeStatus[XLAPC_MAX_NUMBER_SLOTS][4]; unsigned int SnapshotStatus[XLAPC_MAX_NUMBER_SLOTS][4]; + unsigned int NumSlots; + char DevUserName[256]; } xclDebugCheckersResults; #ifdef __cplusplus diff --git a/SDAccel/userspace/include/xcl_app_debug2.h b/SDAccel/userspace/include/xcl_app_debug2.h deleted file mode 100755 index 0c3fb604..00000000 --- a/SDAccel/userspace/include/xcl_app_debug2.h +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Copyright (C) 2016-2018 Xilinx, Inc - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -/** - * Xilinx SDAccel HAL userspace driver extension APIs - * Performance Monitoring Exposed Parameters - * Copyright (C) 2015-2018, Xilinx Inc - All rights reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -#ifndef _XCL_DEBUG_H_ -#define _XCL_DEBUG_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/************************ SPM Debug Counters ********************************/ -//debug is only interested in 4 metric counters: wb,wt,rb,rt,outstanding,lwa,lwd,lra,lrd -#define XSPM_DEBUG_SAMPLE_COUNTERS_PER_SLOT 9 - -/* - * LAPC related defs here - */ -#define XLAPC_MAX_NUMBER_SLOTS 31 -#define XLAPC_STATUS_PER_SLOT 9 - -/* Metric counters per slot */ -#define XLAPC_OVERALL_STATUS 0 -#define XLAPC_CUMULATIVE_STATUS_0 1 -#define XLAPC_CUMULATIVE_STATUS_1 2 -#define XLAPC_CUMULATIVE_STATUS_2 3 -#define XLAPC_CUMULATIVE_STATUS_3 4 -#define XLAPC_SNAPSHOT_STATUS_0 5 -#define XLAPC_SNAPSHOT_STATUS_1 6 -#define XLAPC_SNAPSHOT_STATUS_2 7 -#define XLAPC_SNAPSHOT_STATUS_3 8 - -/********************** Definitions: Enums, Structs ***************************/ -enum xclDebugReadType { - XCL_DEBUG_READ_TYPE_APM = 0, - XCL_DEBUG_READ_TYPE_LAPC = 1, - XCL_DEBUG_READ_TYPE_SPM = 2 -}; - -/* Debug counter results */ -typedef struct { - unsigned int WriteBytes[XSPM_MAX_NUMBER_SLOTS]; - unsigned int WriteTranx[XSPM_MAX_NUMBER_SLOTS]; - unsigned int ReadBytes[XSPM_MAX_NUMBER_SLOTS]; - unsigned int ReadTranx[XSPM_MAX_NUMBER_SLOTS]; - - unsigned int OutStandCnts[XSPM_MAX_NUMBER_SLOTS]; - unsigned int LastWriteAddr[XSPM_MAX_NUMBER_SLOTS]; - unsigned int LastWriteData[XSPM_MAX_NUMBER_SLOTS]; - unsigned int LastReadAddr[XSPM_MAX_NUMBER_SLOTS]; - unsigned int LastReadData[XSPM_MAX_NUMBER_SLOTS]; - unsigned int NumSlots; - char DevUserName[256]; -} xclDebugCountersResults; - -enum xclCheckerType { -XCL_CHECKER_MEMORY = 0, -}; - -/* Debug checker results */ -typedef struct { - unsigned int OverallStatus[XLAPC_MAX_NUMBER_SLOTS]; - unsigned int CumulativeStatus[XLAPC_MAX_NUMBER_SLOTS][4]; - unsigned int SnapshotStatus[XLAPC_MAX_NUMBER_SLOTS][4]; - unsigned int NumSlots; - char DevUserName[256]; -} xclDebugCheckersResults; - -#ifdef __cplusplus -} -#endif -#endif diff --git a/SDAccel/userspace/include/xclbin.h b/SDAccel/userspace/include/xclbin.h index f9af178f..c94ee9d1 100644 --- a/SDAccel/userspace/include/xclbin.h +++ b/SDAccel/userspace/include/xclbin.h @@ -1,20 +1,32 @@ /** - * Copyright (C) 2015-2018 Xilinx, Inc + * Copyright (C) 2015-2018 Xilinx, Inc + * Xilinx SDAccel xclbin container definition * - * Xilinx SDAccel xclbin container definition + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at + * Apache License Verbiage * - * http://www.apache.org/licenses/LICENSE-2.0 + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. -*/ + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * GPL license Verbiage: + * + * This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ #ifndef _XCLBIN_H_ #define _XCLBIN_H_ @@ -30,31 +42,32 @@ #include #endif +#define ENABLE_MTAG + #ifdef __cplusplus extern "C" { #endif + /** + * Container format for Xilinx bitstreams, metadata and other + * binary blobs. + * Every segment must be aligned at 8 byte boundary with null byte padding + * between adjacent segments if required. + * For segements which are not present both offset and length must be 0 in + * the header. + * Currently only xclbin0\0 is recognized as file magic. In future if/when file + * format is updated the magic string will be changed to xclbin1\0 and so on. + */ enum XCLBIN_MODE { - // Use with flat flow where the full FPGA is re-programmed XCLBIN_FLAT, - // Used with Partial Reconfig flow XCLBIN_PR, - // Unused at the moment XCLBIN_TANDEM_STAGE2, - // Unused at the moment XCLBIN_TANDEM_STAGE2_WITH_PR, - // Used in HW emulation XCLBIN_HW_EMU, - // Used in SW emulation XCLBIN_SW_EMU, XCLBIN_MODE_MAX }; - /** - * Note that xclBin format has been deprecated from 2017.1 release onwards. - * SDAccel has switched to AXLF also known as xclbin2 -- look for struct axlf - * below. - */ struct xclBin { char m_magic[8]; /* should be xclbin0\0 */ @@ -98,15 +111,35 @@ extern "C" { enum axlf_section_kind { BITSTREAM = 0, - CLEARING_BITSTREAM = 1, - EMBEDDED_METADATA = 2, - FIRMWARE = 3, - DEBUG_DATA = 4 + CLEARING_BITSTREAM, + EMBEDDED_METADATA, + FIRMWARE, + DEBUG_DATA, + SCHED_FIRMWARE, + MEM_TOPOLOGY, + CONNECTIVITY, + IP_LAYOUT, + DEBUG_IP_LAYOUT, + DESIGN_CHECK_POINT + }; + + enum MEM_TYPE { + MEM_DDR3, + MEM_DDR4, + MEM_DRAM, + MEM_STREAMING, + MEM_PREALLOCATED_GLOB, + MEM_ARE //Aurora + }; + + enum IP_TYPE { + IP_MB = 0, + IP_KERNEL //kernel instance }; struct axlf_section_header { uint32_t m_sectionKind; /* Section type */ - char m_sectionName[16]; /* Examples: "stage2", "clear1", "clear2", "ocl1", "ocl2, "ublaze" */ + char m_sectionName[16]; /* Examples: "stage2", "clear1", "clear2", "ocl1", "ocl2, "ublaze", "sched" */ uint64_t m_sectionOffset; /* File offset of section data */ uint64_t m_sectionSize; /* Size of section data */ }; @@ -114,7 +147,7 @@ extern "C" { struct axlf_header { uint64_t m_length; /* Total size of the xclbin file */ uint64_t m_timeStamp; /* Number of seconds since epoch when xclbin was created */ - uint64_t m_featureRomTimeStamp; /* TimeSinceEpoch of the Feature ROM in the DSA */ + uint64_t m_featureRomTimeStamp; /* TimeSinceEpoch of the featureRom */ uint32_t m_version; /* Tool version used to create xclbin */ uint32_t m_mode; /* XCLBIN_MODE */ uint64_t m_platformId; /* 64 bit platform ID: vendor-device-subvendor-subdev */ @@ -127,19 +160,99 @@ extern "C" { struct axlf { char m_magic[8]; /* Should be "xclbin2\0" */ - unsigned char m_cipher[32]; /* HMAC output digest */ + unsigned char m_cipher[32]; /* Hmac output digest */ unsigned char m_keyBlock[256]; /* Signature for validation of binary */ - uint64_t m_uniqueId; /* axlf's uniqueId, use it to skip re-download etc */ + uint64_t m_uniqueId; /* axlf's uniqueId, use it to skip redownload etc */ struct axlf_header m_header; /* Inline header */ struct axlf_section_header m_sections[1]; /* One or more section headers follow */ }; - //xilinx internal + /**** BEGIN : Xilinx internal section *****/ + + /* bitstream information */ struct xlnx_bitstream { uint8_t m_freq[8]; char bits[1]; }; + /**** MEMORY TOPOLOGY SECTION ****/ + struct mem_data { + uint8_t m_type; //enum corresponding to mem_type. + uint8_t m_used; //if 0 this bank is not present + uint64_t m_size; //in KB + uint64_t m_base_address; +#ifdef ENABLE_MTAG + unsigned char m_tag[16]; //Initially: BANK0,1,2,3, has to be null terminated. +#endif + }; + + struct mem_topology { + int32_t m_count; //Number of mem_data + struct mem_data m_mem_data[1]; //Should be sorted on mem_type + }; + + /**** CONNECTIVITY SECTION ****/ + /* Connectivity of each argument of Kernel. It will be in terms of argument + * index associated. For associating kernel instances with arguments and + * banks, start at the connectivity section. Using the m_ip_layout_index + * access the ip_data.m_name. Now we can associate this kernel instance + * with its original kernel name and get the connectivity as well. This + * enables us to form related groups of kernel instances. + */ + + struct connection { + int32_t arg_index; //From 0 to n, may not be contiguous as scalars skipped + int32_t m_ip_layout_index; //index into the ip_layout section. ip_layout.m_ip_data[index].m_type == IP_KERNEL + int32_t mem_data_index; //index of the m_mem_data . Flag error is m_used false. + }; + + struct connectivity { + int32_t m_count; + struct connection m_connection[1]; + }; + + + /**** IP_LAYOUT SECTION ****/ + /* IPs on AXI lite - their types, names, and base addresses.*/ + struct ip_data { + uint32_t m_type; //map to IP_TYPE enum + uint32_t properties; //32 bits to indicate ip specific property. eg if m_type == IP_KERNEL then bit 0 is for interrupt. + uint64_t m_base_address; + uint8_t m_name[64]; //eg Kernel name corresponding to KERNEL instance, can embed CU name in future. + }; + + struct ip_layout { + int32_t m_count; + struct ip_data m_ip_data[1]; //All the ip_data needs to be sorted by m_base_address. + }; + + /*** Debug IP section layout ****/ + enum DEBUG_IP_TYPE { + UNDEFINED = 0, + LAPC, + ILA, + AXI_MM_MONITOR, + AXI_TRACE_FUNNEL, + AXI_MONITOR_FIFO_LITE, + AXI_MONITOR_FIFO_FULL + }; + + struct debug_ip_data { + uint8_t m_type; // type of enum DEBUG_IP_TYPE + uint8_t m_index; + uint8_t m_properties; + uint8_t m_reserved[5]; + uint64_t m_base_address; + uint8_t m_name[128]; + }; + + struct debug_ip_layout { + uint16_t m_count; + struct debug_ip_data m_debug_ip_data[1]; + }; + + /**** END : Xilinx internal section *****/ + # ifdef __cplusplus namespace xclbin { inline const axlf_section_header* diff --git a/SDAccel/userspace/include/xclbin2.h b/SDAccel/userspace/include/xclbin2.h deleted file mode 100644 index c94ee9d1..00000000 --- a/SDAccel/userspace/include/xclbin2.h +++ /dev/null @@ -1,273 +0,0 @@ -/** - * Copyright (C) 2015-2018 Xilinx, Inc - * Xilinx SDAccel xclbin container definition - * - * This file is dual licensed. It may be redistributed and/or modified - * under the terms of the Apache 2.0 License OR version 2 of the GNU - * General Public License. - * - * Apache License Verbiage - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * GPL license Verbiage: - * - * This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - * You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifndef _XCLBIN_H_ -#define _XCLBIN_H_ - -#if defined(__KERNEL__) -#include -#elif defined(__cplusplus) -#include -#include -#include -#else -#include -#include -#endif - -#define ENABLE_MTAG - -#ifdef __cplusplus -extern "C" { -#endif - - /** - * Container format for Xilinx bitstreams, metadata and other - * binary blobs. - * Every segment must be aligned at 8 byte boundary with null byte padding - * between adjacent segments if required. - * For segements which are not present both offset and length must be 0 in - * the header. - * Currently only xclbin0\0 is recognized as file magic. In future if/when file - * format is updated the magic string will be changed to xclbin1\0 and so on. - */ - enum XCLBIN_MODE { - XCLBIN_FLAT, - XCLBIN_PR, - XCLBIN_TANDEM_STAGE2, - XCLBIN_TANDEM_STAGE2_WITH_PR, - XCLBIN_HW_EMU, - XCLBIN_SW_EMU, - XCLBIN_MODE_MAX - }; - - - struct xclBin { - char m_magic[8]; /* should be xclbin0\0 */ - uint64_t m_length; /* total size of the xclbin file */ - uint64_t m_timeStamp; /* number of seconds since epoch when xclbin was created */ - uint64_t m_version; /* tool version used to create xclbin */ - unsigned m_mode; /* XCLBIN_MODE */ - char m_nextXclBin[24]; /* Name of next xclbin file in the daisy chain */ - uint64_t m_metadataOffset; /* file offset of embedded metadata */ - uint64_t m_metadataLength; /* size of the embedded metdata */ - uint64_t m_primaryFirmwareOffset; /* file offset of bitstream or emulation archive */ - uint64_t m_primaryFirmwareLength; /* size of the bistream or emulation archive */ - uint64_t m_secondaryFirmwareOffset; /* file offset of clear bitstream if any */ - uint64_t m_secondaryFirmwareLength; /* size of the clear bitstream */ - uint64_t m_driverOffset; /* file offset of embedded device driver if any (currently unused) */ - uint64_t m_driverLength; /* size of the embedded device driver (currently unused) */ - - // Extra debug information for hardware and hardware emulation debug - - uint64_t m_dwarfOffset ; - uint64_t m_dwarfLength ; - uint64_t m_ipiMappingOffset ; - uint64_t m_ipiMappingLength ; - }; - - /* - * AXLF LAYOUT - * ----------- - * - * ----------------------------------------- - * | Magic | - * ----------------------------------------- - * | Header | - * ----------------------------------------- - * | One or more section headers | - * ----------------------------------------- - * | Matching number of sections with data | - * ----------------------------------------- - * - */ - - enum axlf_section_kind { - BITSTREAM = 0, - CLEARING_BITSTREAM, - EMBEDDED_METADATA, - FIRMWARE, - DEBUG_DATA, - SCHED_FIRMWARE, - MEM_TOPOLOGY, - CONNECTIVITY, - IP_LAYOUT, - DEBUG_IP_LAYOUT, - DESIGN_CHECK_POINT - }; - - enum MEM_TYPE { - MEM_DDR3, - MEM_DDR4, - MEM_DRAM, - MEM_STREAMING, - MEM_PREALLOCATED_GLOB, - MEM_ARE //Aurora - }; - - enum IP_TYPE { - IP_MB = 0, - IP_KERNEL //kernel instance - }; - - struct axlf_section_header { - uint32_t m_sectionKind; /* Section type */ - char m_sectionName[16]; /* Examples: "stage2", "clear1", "clear2", "ocl1", "ocl2, "ublaze", "sched" */ - uint64_t m_sectionOffset; /* File offset of section data */ - uint64_t m_sectionSize; /* Size of section data */ - }; - - struct axlf_header { - uint64_t m_length; /* Total size of the xclbin file */ - uint64_t m_timeStamp; /* Number of seconds since epoch when xclbin was created */ - uint64_t m_featureRomTimeStamp; /* TimeSinceEpoch of the featureRom */ - uint32_t m_version; /* Tool version used to create xclbin */ - uint32_t m_mode; /* XCLBIN_MODE */ - uint64_t m_platformId; /* 64 bit platform ID: vendor-device-subvendor-subdev */ - uint64_t m_featureId; /* 64 bit feature id */ - unsigned char m_platformVBNV[64]; /* e.g. xilinx:xil-accel-rd-ku115:4ddr-xpr:3.4: null terminated */ - char m_next_axlf[16]; /* Name of next xclbin file in the daisy chain */ - char m_debug_bin[16]; /* Name of binary with debug information */ - uint32_t m_numSections; /* Number of section headers */ - }; - - struct axlf { - char m_magic[8]; /* Should be "xclbin2\0" */ - unsigned char m_cipher[32]; /* Hmac output digest */ - unsigned char m_keyBlock[256]; /* Signature for validation of binary */ - uint64_t m_uniqueId; /* axlf's uniqueId, use it to skip redownload etc */ - struct axlf_header m_header; /* Inline header */ - struct axlf_section_header m_sections[1]; /* One or more section headers follow */ - }; - - /**** BEGIN : Xilinx internal section *****/ - - /* bitstream information */ - struct xlnx_bitstream { - uint8_t m_freq[8]; - char bits[1]; - }; - - /**** MEMORY TOPOLOGY SECTION ****/ - struct mem_data { - uint8_t m_type; //enum corresponding to mem_type. - uint8_t m_used; //if 0 this bank is not present - uint64_t m_size; //in KB - uint64_t m_base_address; -#ifdef ENABLE_MTAG - unsigned char m_tag[16]; //Initially: BANK0,1,2,3, has to be null terminated. -#endif - }; - - struct mem_topology { - int32_t m_count; //Number of mem_data - struct mem_data m_mem_data[1]; //Should be sorted on mem_type - }; - - /**** CONNECTIVITY SECTION ****/ - /* Connectivity of each argument of Kernel. It will be in terms of argument - * index associated. For associating kernel instances with arguments and - * banks, start at the connectivity section. Using the m_ip_layout_index - * access the ip_data.m_name. Now we can associate this kernel instance - * with its original kernel name and get the connectivity as well. This - * enables us to form related groups of kernel instances. - */ - - struct connection { - int32_t arg_index; //From 0 to n, may not be contiguous as scalars skipped - int32_t m_ip_layout_index; //index into the ip_layout section. ip_layout.m_ip_data[index].m_type == IP_KERNEL - int32_t mem_data_index; //index of the m_mem_data . Flag error is m_used false. - }; - - struct connectivity { - int32_t m_count; - struct connection m_connection[1]; - }; - - - /**** IP_LAYOUT SECTION ****/ - /* IPs on AXI lite - their types, names, and base addresses.*/ - struct ip_data { - uint32_t m_type; //map to IP_TYPE enum - uint32_t properties; //32 bits to indicate ip specific property. eg if m_type == IP_KERNEL then bit 0 is for interrupt. - uint64_t m_base_address; - uint8_t m_name[64]; //eg Kernel name corresponding to KERNEL instance, can embed CU name in future. - }; - - struct ip_layout { - int32_t m_count; - struct ip_data m_ip_data[1]; //All the ip_data needs to be sorted by m_base_address. - }; - - /*** Debug IP section layout ****/ - enum DEBUG_IP_TYPE { - UNDEFINED = 0, - LAPC, - ILA, - AXI_MM_MONITOR, - AXI_TRACE_FUNNEL, - AXI_MONITOR_FIFO_LITE, - AXI_MONITOR_FIFO_FULL - }; - - struct debug_ip_data { - uint8_t m_type; // type of enum DEBUG_IP_TYPE - uint8_t m_index; - uint8_t m_properties; - uint8_t m_reserved[5]; - uint64_t m_base_address; - uint8_t m_name[128]; - }; - - struct debug_ip_layout { - uint16_t m_count; - struct debug_ip_data m_debug_ip_data[1]; - }; - - /**** END : Xilinx internal section *****/ - -# ifdef __cplusplus - namespace xclbin { - inline const axlf_section_header* - get_axlf_section(const axlf* top, axlf_section_kind kind) - { - auto begin = top->m_sections; - auto end = begin + top->m_header.m_numSections; - auto itr = std::find_if(begin,end,[kind](const axlf_section_header& sec) { return sec.m_sectionKind==kind; }); - return (itr!=end) ? &(*itr) : nullptr; - } - } -# endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/SDAccel/userspace/include/xclhal.h b/SDAccel/userspace/include/xclhal.h index f5c8cb3f..1758350a 100644 --- a/SDAccel/userspace/include/xclhal.h +++ b/SDAccel/userspace/include/xclhal.h @@ -1,6 +1,5 @@ -/** +/* * Copyright (C) 2015-2018 Xilinx, Inc - * * Xilinx SDAccel HAL userspace driver APIs * * Licensed under the Apache License, Version 2.0 (the "License"). You may @@ -16,8 +15,8 @@ * under the License. */ -#ifndef _XCL_HAL_H_ -#define _XCL_HAL_H_ +#ifndef _XCL_HAL2_H_ +#define _XCL_HAL2_H_ #ifdef __cplusplus #include @@ -40,320 +39,795 @@ #include "xclperf.h" #include "xcl_app_debug.h" +#include "xclerr.h" #ifdef __cplusplus extern "C" { #endif - typedef void * xclDeviceHandle; - - struct xclBin; - struct axlf; - /** - * Structure used to obtain various bits of information from the device. - */ - - struct xclDeviceInfo2 { - unsigned mMagic; // = 0X586C0C6C; XL OpenCL X->58(ASCII), L->6C(ASCII), O->0 C->C L->6C(ASCII); - char mName[256]; - unsigned short mHALMajorVersion; - unsigned short mHALMinorVersion; - unsigned short mVendorId; - unsigned short mDeviceId; - unsigned short mSubsystemId; - unsigned short mSubsystemVendorId; - unsigned short mDeviceVersion; - size_t mDDRSize; // Size of DDR memory - size_t mDataAlignment; // Minimum data alignment requirement for host buffers - size_t mDDRFreeSize; // Total unused/available DDR memory - size_t mMinTransferSize; // Minimum DMA buffer size - unsigned short mDDRBankCount; - unsigned short mOCLFrequency[4]; - unsigned short mPCIeLinkWidth; - unsigned short mPCIeLinkSpeed; - unsigned short mDMAThreads; - short mOnChipTemp; - short mFanTemp; - unsigned short mVInt; - unsigned short mVAux; - unsigned short mVBram; - float mCurrent; -// unsigned short mCurrent; // Change float to short after driver unification since it changes the ABI - unsigned short mNumClocks; - unsigned short mFanSpeed; - bool mMigCalib; - // More properties here - }; - - enum xclMemoryDomains { - XCL_MEM_HOST_RAM = 0x00000000, - XCL_MEM_DEVICE_RAM = 0x00000001, - XCL_MEM_DEVICE_BRAM = 0x00000002, - XCL_MEM_SVM = 0x00000003, - XCL_MEM_CMA = 0x00000004, - XCL_MEM_DEVICE_REG = 0x00000005 - }; - - enum xclDDRFlags { - XCL_DEVICE_RAM_BANK0 = 0, - XCL_DEVICE_RAM_BANK1 = 1, - XCL_DEVICE_RAM_BANK2 = 2, - XCL_DEVICE_RAM_BANK3 = 3 - }; - - enum xclBRAMFlags { - XCL_DEVICE_BRAM0 = 0, - XCL_DEVICE_BRAM1 = 1, - XCL_DEVICE_BRAM2 = 2, - XCL_DEVICE_BRAM3 = 3, - }; - - /** - * Define address spaces on the device AXI bus. The enums are used in xclRead() and xclWrite() - * to pass relative offsets. - */ - - enum xclAddressSpace { - XCL_ADDR_SPACE_DEVICE_FLAT = 0, // Absolute address space - XCL_ADDR_SPACE_DEVICE_RAM = 1, // Address space for the DDR memory - XCL_ADDR_KERNEL_CTRL = 2, // Address space for the OCL Region control port - XCL_ADDR_SPACE_DEVICE_PERFMON = 3, // Address space for the Performance monitors - XCL_ADDR_SPACE_DEVICE_REG = 4, // Address space for device registers. - XCL_ADDR_SPACE_DEVICE_CHECKER = 5, // Address space for protocol checker - - XCL_ADDR_SPACE_MAX = 8 - }; - - /** - * Defines verbosity levels which are passed to xclOpen during device creation time - */ - - enum xclVerbosityLevel { - XCL_QUIET = 0, - XCL_INFO = 1, - XCL_WARN = 2, - XCL_ERROR = 3 - }; - - enum xclResetKind { - XCL_RESET_KERNEL, - XCL_RESET_FULL - }; - - // VERSION 1.0 APIs - // ---------------- - - /** - * @defgroup devman DEVICE MANAGMENT APIs - * -------------------------------------- - * APIs to open, close, query and program the device - * @{ - */ - - /** - * Open a device and obtain its handle. - * "deviceIndex" is 0 for first device, 1 for the second device and so on - * "logFileName" is optional and if not NULL should be used to log messages - * "level" specifies the verbosity level for the messages being logged to logFileName - */ - - XCL_DRIVER_DLLESPEC xclDeviceHandle xclOpen(unsigned deviceIndex, const char *logFileName, xclVerbosityLevel level); - - /** - * Close an opened device - */ - - XCL_DRIVER_DLLESPEC void xclClose(xclDeviceHandle handle); - - /** - * Obtain various bits of information from the device - */ - - XCL_DRIVER_DLLESPEC int xclGetDeviceInfo2(xclDeviceHandle handle, xclDeviceInfo2 *info); - - /** - * Download bitstream to the device. The bitstream is passed in memory in xclBin format. The bitstream - * may be PR bistream for devices which support PR and full bitstream for devices which require full - * configuration. - */ - - XCL_DRIVER_DLLESPEC int xclLoadXclBin(xclDeviceHandle handle, const xclBin *buffer); - - /** @} */ - - /** - * @defgroup bufman BUFFER MANAGMENT APIs - * -------------------------------------- - * - * Buffer management APIs are used for managing device memory. The board vendors are expected to - * provide a memory manager with the following 4 APIs. The xclCopyXXX functions will be used by - * runtime to migrate buffers between host and device memory. - * @{ - */ - - /** - * Allocate a buffer on the device DDR and return its address - */ - - XCL_DRIVER_DLLESPEC uint64_t xclAllocDeviceBuffer(xclDeviceHandle handle, size_t size); - - /** - * Allocate a buffer on the device DDR bank and return its address - */ - - XCL_DRIVER_DLLESPEC uint64_t xclAllocDeviceBuffer2(xclDeviceHandle handle, size_t size, - xclMemoryDomains domain, - unsigned flags); - - /** - * Free a previously allocated buffer on the device DDR - */ - - XCL_DRIVER_DLLESPEC void xclFreeDeviceBuffer(xclDeviceHandle handle, uint64_t buf); - - /** - * Copy host buffer contents to previously allocated device memory. "seek" specifies how many bytes to skip - * at the beginning of the destination before copying "size" bytes of host buffer. - */ - - XCL_DRIVER_DLLESPEC size_t xclCopyBufferHost2Device(xclDeviceHandle handle, uint64_t dest, - const void *src, size_t size, size_t seek); - - /** - * Copy contents of previously allocated device memory to host buffer. "skip" specifies how many bytes to skip - * from the beginning of the source before copying "size" bytes of device buffer. - */ - - XCL_DRIVER_DLLESPEC size_t xclCopyBufferDevice2Host(xclDeviceHandle handle, void *dest, - uint64_t src, size_t size, size_t skip); - - /** @} */ - - /** - * @defgroup readwrite DEVICE READ AND WRITE APIs - * ---------------------------------------------- - * - * These functions are used to read and write peripherals sitting on the address map. An implementation - * may use these to implement xclCopyXXX functions. OpenCL runtime will be using the BUFFER MANAGEMNT - * APIs described above to manage OpenCL buffers. It would use xclRead/xclWrite to program and manage - * peripherals on the card. For programming the Kernel, OpenCL runtime uses the kernel control register - * map generated by the OpenCL compiler. - * Note that the offset is wrt the address space - * @{ - */ +/** + * DOC: Xilinx Accelerator Hardware Abstraction Library Interface Definitions + * + * Header file *xclhal.h* defines data structures and function signatures exported by + * Hardware Abstraction Library (HAL). HAL is part of software stack which is integrated + * into Xilinx reference platform. + */ + +/** + * typedef xclDeviceHandle - opaque device handle + * + * A device handle of xclDeviceHandle kind is obtained by opening a device. Clients pass this + * device handle to refer to the opened device in all future interaction with HAL. + */ +typedef void * xclDeviceHandle; + +struct xclBin; +struct axlf; + +/** + * Structure used to obtain various bits of information from the device. + */ + +struct xclDeviceInfo2 { + unsigned mMagic; // = 0X586C0C6C; XL OpenCL X->58(ASCII), L->6C(ASCII), O->0 C->C L->6C(ASCII); + char mName[256]; + unsigned short mHALMajorVersion; + unsigned short mHALMinorVersion; + unsigned short mVendorId; + unsigned short mDeviceId; + unsigned short mSubsystemId; + unsigned short mSubsystemVendorId; + unsigned short mDeviceVersion; + size_t mDDRSize; // Size of DDR memory + size_t mDataAlignment; // Minimum data alignment requirement for host buffers + size_t mDDRFreeSize; // Total unused/available DDR memory + size_t mMinTransferSize; // Minimum DMA buffer size + unsigned short mDDRBankCount; + unsigned short mOCLFrequency[4]; + unsigned short mPCIeLinkWidth; + unsigned short mPCIeLinkSpeed; + unsigned short mDMAThreads; + short mOnChipTemp; + short mFanTemp; + unsigned short mVInt; + unsigned short mVAux; + unsigned short mVBram; + float mCurrent; + unsigned short mNumClocks; + unsigned short mFanSpeed; + bool mMigCalib; + // More properties here +}; + +/** + * xclMemoryDomains is for support of legacy APIs + * It is not used in BO APIs where we instead use xclBOKind + */ +enum xclMemoryDomains { + XCL_MEM_HOST_RAM = 0x00000000, + XCL_MEM_DEVICE_RAM = 0x00000001, + XCL_MEM_DEVICE_BRAM = 0x00000002, + XCL_MEM_SVM = 0x00000003, + XCL_MEM_CMA = 0x00000004, + XCL_MEM_DEVICE_REG = 0x00000005 +}; + +/* byte-0 lower 4 bits for DDR Flags are one-hot encoded */ +enum xclDDRFlags { + XCL_DEVICE_RAM_BANK0 = 0x00000000, + XCL_DEVICE_RAM_BANK1 = 0x00000002, + XCL_DEVICE_RAM_BANK2 = 0x00000004, + XCL_DEVICE_RAM_BANK3 = 0x00000008 +}; + +/** + * xclBOKind defines Buffer Object Kind which represents a fragment of device accesible + * memory and the corresponding backing host memory. + * + * 1. Shared virtual memory (SVM) class of systems like CAPI or MPSoc with SMMU. BOs + * have a common host RAM backing store. + * XCL_BO_SHARED_VIRTUAL + * + * 2. Shared physical memory class of systems like Zynq (or MPSoc with pass though SMMU) + * with Linux CMA buffer allocation. BOs have common host CMA allocated backing store. + * XCL_BO_SHARED_PHYSICAL + * + * 3. Shared virtual memory (SVM) class of systems with dedicated RAM and device MMU. BOs + * have a device RAM dedicated backing store and another host RAM allocated backing store. + * The buffers are sync'd via DMA. Both physical buffers use the same virtual address, + * hence giving the effect of SVM. + * XCL_BO_MIRRORED_VIRTUAL + * + * 4. Dedicated memory class of devices like PCIe card with DDR. BOs have a device RAM + * dedicated backing store and another host RAM allocated backing store. The buffers + * are sync'd via DMA + * XCL_BO_DEVICE_RAM + * + * 5. Dedicated onchip memory class of devices like PCIe card with BRAM. BOs have a device + * BRAM dedicated backing store and another host RAM allocated backing store. The buffers + * are sync'd via DMA + * XCL_BO_DEVICE_BRAM + */ + +enum xclBOKind { + XCL_BO_SHARED_VIRTUAL = 0, + XCL_BO_SHARED_PHYSICAL, + XCL_BO_MIRRORED_VIRTUAL, + XCL_BO_DEVICE_RAM, + XCL_BO_DEVICE_BRAM, + XCL_BO_DEVICE_PREALLOCATED_BRAM, +}; + +enum xclBOSyncDirection { + XCL_BO_SYNC_BO_TO_DEVICE = 0, + XCL_BO_SYNC_BO_FROM_DEVICE, +}; + +/** + * Define address spaces on the device AXI bus. The enums are used in xclRead() and xclWrite() + * to pass relative offsets. + */ + +enum xclAddressSpace { + XCL_ADDR_SPACE_DEVICE_FLAT = 0, // Absolute address space + XCL_ADDR_SPACE_DEVICE_RAM = 1, // Address space for the DDR memory + XCL_ADDR_KERNEL_CTRL = 2, // Address space for the OCL Region control port + XCL_ADDR_SPACE_DEVICE_PERFMON = 3, // Address space for the Performance monitors + XCL_ADDR_SPACE_DEVICE_CHECKER = 5, // Address space for protocol checker + XCL_ADDR_SPACE_MAX = 8 +}; + +/** + * Defines verbosity levels which are passed to xclOpen during device creation time + */ + +enum xclVerbosityLevel { + XCL_QUIET = 0, + XCL_INFO = 1, + XCL_WARN = 2, + XCL_ERROR = 3 +}; + +enum xclResetKind { + XCL_RESET_KERNEL, + XCL_RESET_FULL +}; + +struct xclDeviceUsage { + size_t h2c[8]; + size_t c2h[8]; + size_t ddrMemUsed[8]; + unsigned ddrBOAllocated[8]; + unsigned totalContexts; + uint64_t xclbinId[4]; +}; + +struct xclBOProperties { + uint32_t handle; + uint32_t flags; + uint64_t size; + uint64_t paddr; + xclBOKind domain; // not implemented +}; + +/** + * DOC: HAL Device Management APIs + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +/** + * xclProbe() - Enumerate devices found in the system + * + * Return: count of devices found + */ +XCL_DRIVER_DLLESPEC unsigned xclProbe(); + +/** + * xclOpen() - Open a device and obtain its handle. + * + * @deviceIndex: Slot number of device 0 for first device, 1 for the second device... + * @logFileName: Log file to use for optional logging + * @level: Severity level of messages to log + * + * Return: Device handle + */ +XCL_DRIVER_DLLESPEC xclDeviceHandle xclOpen(unsigned deviceIndex, const char *logFileName, + xclVerbosityLevel level); + +/** + * xclClose() - Close an opened device + * + * @handle: Device handle + */ +XCL_DRIVER_DLLESPEC void xclClose(xclDeviceHandle handle); + +/** + * xclResetDevice() - Reset a device or its CL + * + * @handle: Device handle + * @kind: Reset kind +* Return: 0 on success or appropriate error number + * + * Reset the device. All running kernels will be killed and buffers in DDR will be + * purged. A device may be reset if a user's application dies without waiting for + * running kernel(s) to finish. + */ +XCL_DRIVER_DLLESPEC int xclResetDevice(xclDeviceHandle handle, xclResetKind kind); + +/** + * xclGetDeviceInfo2() - Obtain various bits of information from the device + * + * @handle: Device handle + * @info: Information record + * Return: 0 on success or appropriate error number + */ +XCL_DRIVER_DLLESPEC int xclGetDeviceInfo2(xclDeviceHandle handle, xclDeviceInfo2 *info); + +/** + * xclGetUsageInfo() - Obtain usage information from the device + * + * @handle: Device handle + * @info: Information record + * Return: 0 on success or appropriate error number + */ +XCL_DRIVER_DLLESPEC int xclGetUsageInfo(xclDeviceHandle handle, xclDeviceUsage *info); + +/** + * xclGetErrorStatus() - Obtain error information from the device + * + * @handle: Device handle + * @info: Information record + * Return: 0 on success or appropriate error number + */ +XCL_DRIVER_DLLESPEC int xclGetErrorStatus(xclDeviceHandle handle, xclErrorStatus *info); + +/** + * xclLoadXclBin() - Download FPGA image (xclbin) to the device + * + * @handle: Device handle + * @buffer: Pointer to device image (xclbin) in memory + * Return: 0 on success or appropriate error number + * + * Download FPGA image (AXLF) to the device. The PR bitstream is encapsulated inside + * xclbin as a section. xclbin may also contains other sections which are suitably + * handled by the driver. + */ +XCL_DRIVER_DLLESPEC int xclLoadXclBin(xclDeviceHandle handle, const xclBin *buffer); + +/** + * xclReClock2() - Configure PR region frequncies + * + * @handle: Device handle + * @region: PR region (always 0) + * @targetFreqMHz: Array of target frequencies in order for the Clock Wizards driving + * the PR region + * Return: 0 on success or appropriate error number + */ +XCL_DRIVER_DLLESPEC int xclReClock2(xclDeviceHandle handle, unsigned short region, + const unsigned short *targetFreqMHz); + +/** + * xclLockDevice() - Get exclusive ownership of the device + * + * @handle: Device handle + * Return: 0 on success or appropriate error number + * + * The lock is necessary before performing buffer migration, register access or + * bitstream downloads. + */ +XCL_DRIVER_DLLESPEC int xclLockDevice(xclDeviceHandle handle); - XCL_DRIVER_DLLESPEC size_t xclWrite(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, - const void *hostBuf, size_t size); +/** + * xclUnlockDevice() - Release exclusive ownership of the device + * + * @handle: Device handle + * Return: 0 on success or appropriate error number + */ +XCL_DRIVER_DLLESPEC int xclUnlockDevice(xclDeviceHandle handle); - XCL_DRIVER_DLLESPEC size_t xclRead(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, - void *hostbuf, size_t size); +/* + * Update the device BPI PROM with new image + */ +XCL_DRIVER_DLLESPEC int xclUpgradeFirmware(xclDeviceHandle handle, const char *fileName); - /** @} */ +/* + * Update the device PROM with new image with clearing bitstream + */ +XCL_DRIVER_DLLESPEC int xclUpgradeFirmware2(xclDeviceHandle handle, const char *file1, const char* file2); - // EXTENSIONS FOR PARTIAL RECONFIG FLOW - // ------------------------------------ - // TODO: Deprecate this. Update the device PROM with new base bitsream - XCL_DRIVER_DLLESPEC int xclUpgradeFirmware(xclDeviceHandle handle, const char *fileName); +/* + * Update the device SPI PROM with new image + */ +XCL_DRIVER_DLLESPEC int xclUpgradeFirmwareXSpi(xclDeviceHandle handle, const char *fileName, int index); - // Update the device PROM with new base bitsream(s). - XCL_DRIVER_DLLESPEC int xclUpgradeFirmware2(xclDeviceHandle handle, const char *file1, const char* file2); +/** + * xclBootFPGA() - Boot the FPGA from PROM + * + * @handle: Device handle + * Return: 0 on success or appropriate error number + * + * This should only be called when there are no other clients. It will cause PCIe bus re-enumeration + */ +XCL_DRIVER_DLLESPEC int xclBootFPGA(xclDeviceHandle handle); - //TODO: Deprecate this. Update the device PROM for XSpi - XCL_DRIVER_DLLESPEC int xclUpgradeFirmwareXSpi(xclDeviceHandle handle, const char *fileName, int index); +/* + * Write to /sys/bus/pci/devices//remove and initiate a pci rescan by + * writing to /sys/bus/pci/rescan. + */ +XCL_DRIVER_DLLESPEC int xclRemoveAndScanFPGA(); - //Test the flash - XCL_DRIVER_DLLESPEC int xclTestXSpi(xclDeviceHandle handle, int slave_index); +/* + * Get the version number. 1 => Hal1 ; 2 => Hal2 + */ +XCL_DRIVER_DLLESPEC unsigned int xclVersion(); - // Boot the FPGA with new bitsream in PROM. This will break the PCIe link and render the device - // unusable till a reboot of the host - XCL_DRIVER_DLLESPEC int xclBootFPGA(xclDeviceHandle handle); +/* End HAL Device Management APIs */ - // NEW APIs in VERSION 1.1 - // ----------------------- +/** + * DOC: HAL Buffer Management APIs + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Buffer management APIs are used for managing device memory and migrating buffers + * between host and device memory + */ - /** - * @addtogroup devman - * @{ - */ +/** + * xclAllocBO() - Allocate a BO of requested size with appropriate flags + * + * @handle: Device handle + * @size: Size of buffer + * @domain: Memory domain + * @flags: Specify bank information, etc + * Return: BO handle + */ +XCL_DRIVER_DLLESPEC unsigned int xclAllocBO(xclDeviceHandle handle, size_t size, xclBOKind domain, + unsigned flags); - /** - * Reset the device. All running kernels will be killed and buffers in DDR will be purged. - * A device would be reset if a user's application dies without waiting for running kernel(s) - * to finish. - */ +/** + * xclAllocUserPtrBO() - Allocate a BO using userptr provided by the user + * + * @handle: Device handle + * @userptr: Pointer to 4K aligned user memory + * @size: Size of buffer + * @flags: Specify bank information, etc + * Return: BO handle + */ +XCL_DRIVER_DLLESPEC unsigned int xclAllocUserPtrBO(xclDeviceHandle handle, void *userptr, size_t size, + unsigned flags); - XCL_DRIVER_DLLESPEC int xclResetDevice(xclDeviceHandle handle, xclResetKind kind); +/** + * xclFreeBO() - Free a previously allocated BO + * + * @handle: Device handle + * @boHandle: BO handle + */ +XCL_DRIVER_DLLESPEC void xclFreeBO(xclDeviceHandle handle, unsigned int boHandle); - /** - * Set the OCL region clock frequencies. Currently only 2 clocks are supported but - * targetFreqMHz should be an array with 4 elements (for 4 clocks). A value of 0 for - * the frequncy indicates that the particular clock frequency should not be changed. - */ +/** + * xclWriteBO() - Copy-in user data to host backing storage of BO + * + * @handle: Device handle + * @boHandle: BO handle + * @src: Source data pointer + * @size: Size of data to copy + * @seek: Offset within the BO + * Return: 0 on success or appropriate error number + * + * Copy host buffer contents to previously allocated device memory. ``seek`` specifies how many bytes + * to skip at the beginning of the BO before copying-in ``size`` bytes of host buffer. + */ +XCL_DRIVER_DLLESPEC size_t xclWriteBO(xclDeviceHandle handle, unsigned int boHandle, + const void *src, size_t size, size_t seek); - XCL_DRIVER_DLLESPEC int xclReClock2(xclDeviceHandle handle, unsigned short region, - const unsigned short *targetFreqMHz); +/** + * xclReadBO() - Copy-out user data from host backing storage of BO + * + * @handle: Device handle + * @boHandle: BO handle + * @dst: Destination data pointer + * @size: Size of data to copy + * @skip: Offset within the BO + * Return: 0 on success or appropriate error number + * + * Copy contents of previously allocated device memory to host buffer. ``skip`` specifies how many bytes + * to skip from the beginning of the BO before copying-out ``size`` bytes of device buffer. + */ +XCL_DRIVER_DLLESPEC size_t xclReadBO(xclDeviceHandle handle, unsigned int boHandle, + void *dst, size_t size, size_t skip); - /** - * Return a count of devices found in the system - */ - XCL_DRIVER_DLLESPEC unsigned xclProbe(); +/** + * xclMapBO() - Memory map BO into user's address space + * + * @handle: Device handle + * @boHandle: BO handle + * @write: READ only or READ/WRITE mapping + * Return: Memory mapped buffer + * + * Map the contents of the buffer object into host memory + * To unmap the buffer call POSIX unmap() on mapped void * pointer returned from xclMapBO + */ +XCL_DRIVER_DLLESPEC void *xclMapBO(xclDeviceHandle handle, unsigned int boHandle, bool write); - /** - * Get exclusive ownership of the device. The lock is necessary before performing buffer - * migration, register access or bitstream downloads. - */ - XCL_DRIVER_DLLESPEC int xclLockDevice(xclDeviceHandle handle); +/** + * xclSyncBO() - Synchronize buffer contents in requested direction + * + * @handle: Device handle + * @boHandle: BO handle + * @dir: To device or from device + * @size: Size of data to synchronize + * @offset: Offset within the BO + * Return: 0 on success or standard errno + * + * Synchronize the buffer contents between host and device. Depending on the memory model this may + * require DMA to/from device or CPU cache flushing/invalidation + */ +XCL_DRIVER_DLLESPEC int xclSyncBO(xclDeviceHandle handle, unsigned int boHandle, xclBOSyncDirection dir, + size_t size, size_t offset); + +/** + * xclExportBO() - Obtain DMA-BUF file descriptor for a BO + * + * @handle: Device handle + * @boHandle: BO handle which needs to be exported + * Return: File handle to the BO or standard errno + * + * Export a BO for import into another device or Linux subsystem which accepts DMA-BUF fd + * This operation is backed by Linux DMA-BUF framework + */ +XCL_DRIVER_DLLESPEC int xclExportBO(xclDeviceHandle handle, unsigned int boHandle); + +/** + * xclImportBO() - Obtain BO handle for a BO represented by DMA-BUF file descriptor + * + * @handle: Device handle + * @fd: File handle to foreign BO owned by another device which needs to be imported + * @flags: Unused + * Return: BO handle of the imported BO + * + * Import a BO exported by another device. * + * This operation is backed by Linux DMA-BUF framework + */ +XCL_DRIVER_DLLESPEC unsigned int xclImportBO(xclDeviceHandle handle, int fd, unsigned flags); + +/** + * xclGetBOProperties() - Obtain xclBOProperties struct for a BO + * + * @handle: Device handle + * @boHandle: BO handle + * @properties: BO properties struct pointer + * Return: 0 on success + * + * This is the prefered method for obtaining BO property information. + */ +XCL_DRIVER_DLLESPEC int xclGetBOProperties(xclDeviceHandle handle, unsigned int boHandle, xclBOProperties *properties); + +/* + * xclGetBOSize() - Retrieve size of a BO + * + * + * @handle: Device handle + * @boHandle: BO handle + * Return size_t size of the BO on success + * + * This API will be deprecated in the future. New clients should use xclGetBOProperties instead + */ +inline XCL_DRIVER_DLLESPEC size_t xclGetBOSize(xclDeviceHandle handle, unsigned int boHandle) +{ + xclBOProperties p; + return !xclGetBOProperties(handle, boHandle, &p) ? (size_t)p.size : -1; +} + +/* + * Get the physical address on the device + * + * This function will be deprecated in the future. New clinets should use xclGetBOProperties instead. + * + * @handle: Device handle + * @boHandle: BO handle + * @return uint64_t address of the BO on success + */ +inline XCL_DRIVER_DLLESPEC uint64_t xclGetDeviceAddr(xclDeviceHandle handle, unsigned int boHandle) +{ + xclBOProperties p; + return !xclGetBOProperties(handle, boHandle, &p) ? p.paddr : -1; +} + +/* End HAL Buffer Management APIs */ + +/** + * DOC: HAL Legacy Buffer Management APIs + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Do *not* develop new features using the following 5 API's. These are for backwards + * compatibility with classic HAL interface and will be deprecated in future. New clients + * should use BO based APIs defined above + * + */ + +/** + * xclAllocDeviceBuffer() - Allocate a buffer on the device + * + * @handle: Device handle + * @size: Size of buffer + * Return: Physical address of buffer on device or 0xFFFFFFFFFFFFFFFF in case of failure + * + * Allocate a buffer on the device DDR and return its address. This API will be deprecated in future. + * Use xclAllocBO() in all new code. + */ +XCL_DRIVER_DLLESPEC uint64_t xclAllocDeviceBuffer(xclDeviceHandle handle, size_t size); + +/** + * xclAllocDeviceBuffer2() - Allocate a buffer on the device on a specific DDR + * + * @handle: Device handle + * @size: Size of buffer + * @domain: Memory domain + * @flags: Desired DDR bank as a bitmap. + * Return: Physical address of buffer on device or 0xFFFFFFFFFFFFFFFF in case of failure + * + * Allocate a buffer on a specific device DDR and return its address. This API will be deprecated in future. + * Use xclAllocBO() in all new code. + */ +XCL_DRIVER_DLLESPEC uint64_t xclAllocDeviceBuffer2(xclDeviceHandle handle, size_t size, + xclMemoryDomains domain, + unsigned flags); + +/** + * xclFreeDeviceBuffer() - Free a previously buffer on the device + * + * @handle: Device handle + * @buf: Physical address of buffer + * + * The physical address should have been previously allocated by xclAllocDeviceBuffe() or xclAllocDeviceBuffer2(). + * The address should point to the beginning of the buffer and not at an offset in the buffer. This API will + * be deprecated in future. Use xclFreeBO() together with BO allocation APIs. + */ +XCL_DRIVER_DLLESPEC void xclFreeDeviceBuffer(xclDeviceHandle handle, uint64_t buf); + +/** + * xclCopyBufferHost2Device() - Write to device memory + * + * @handle: Device handle + * @dest: Physical address in the device + * @src: Source buffer pointer + * @size: Size of data to synchronize + * @seek: Seek within the segment pointed to physical address + * Return: Size of data moved or standard error number + * + * Copy host buffer contents to previously allocated device memory. ``seek`` specifies how many bytes to skip + * at the beginning of the destination before copying ``size`` bytes of host buffer. This API will be + * deprecated in future. Use xclSyncBO() together with other BO APIs. + */ +XCL_DRIVER_DLLESPEC size_t xclCopyBufferHost2Device(xclDeviceHandle handle, uint64_t dest, + const void *src, size_t size, size_t seek); + +/** + * xclCopyBufferDevice2Host() - Read from device memory + * + * @handle: Device handle + * @dest: Destination buffer pointer + * @src: Physical address in the device + * @size: Size of data to synchronize + * @skip: Skip within the segment pointed to physical address + * Return: Size of data moved or standard error number + * + * Copy contents of previously allocated device memory to host buffer. ``skip`` specifies how many bytes to skip + * from the beginning of the source before copying ``size`` bytes of device buffer. This API will be + * deprecated in future. Use xclSyncBO() together with other BO APIs. + */ +XCL_DRIVER_DLLESPEC size_t xclCopyBufferDevice2Host(xclDeviceHandle handle, void *dest, + uint64_t src, size_t size, size_t skip); + +/* End HAL Legacy Buffer Management APIs */ + + +/** + * DOC: HAL Unmanaged DMA APIs + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Unmanaged DMA APIs are for exclusive use by the debuggers and tools. The APIs allow clinets to read/write + * from/to absolute device address. No checks are performed if a buffer was allocated before at the specified + * location or if the address is valid. Users who want to take over the full memory managemnt of the device + * may use this API to synchronize their buffers between host and device. + */ + +/** + * xclUnmgdPread() - Perform unmanaged device memory read operation + * + * @handle: Device handle + * @flags: Unused + * @buf: Destination data pointer + * @size: Size of data to copy + * @offset: Absolute offset inside device + * Return: size of bytes read or appropriate error number + * + * This API may be used to perform DMA operation from absolute location specified. Users + * may use this if they want to perform their own device memory management -- not using the buffer + * object (BO) framework defined before. + */ +XCL_DRIVER_DLLESPEC ssize_t xclUnmgdPread(xclDeviceHandle handle, unsigned flags, void *buf, + size_t size, uint64_t offset); + +/** + * xclUnmgdPwrite() - Perform unmanaged device memory read operation + * + * @handle: Device handle + * @flags: Unused + * @buf: Source data pointer + * @size: Size of data to copy + * @offset: Absolute offset inside device + * Return: size of bytes written or appropriate error number + * + * This API may be used to perform DMA operation to an absolute location specified. Users + * may use this if they want to perform their own device memory management -- not using the buffer + * object (BO) framework defined before. + */ +XCL_DRIVER_DLLESPEC ssize_t xclUnmgdPwrite(xclDeviceHandle handle, unsigned flags, const void *buf, + size_t size, uint64_t offset); + +/* End HAL Unmanaged DMA APIs */ + +/* + * DOC: HAL Register read/write APIs + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * These functions are used to read and write peripherals sitting on the address map. OpenCL runtime + * will be using the BUFFER MANAGEMNT APIs described above to manage OpenCL buffers. It would use + * xclRead/xclWrite to program and manage peripherals on the card. For programming the Kernel, OpenCL + * runtime uses the kernel control register map generated by the xocc compiler. + * Note that the offset is wrt the address space. + */ + +/** + * xclWrite() - Perform register write operation + * + * @handle: Device handle + * @space: Address space + * @offset: Offset in the address space + * @hostBuf: Source data pointer + * @size: Size of data to copy + * Return: size of bytes written or appropriate error number + * + * This API may be used to write to device registers exposed on PCIe BAR. Offset is relative to the + * the address space. A device may have many address spaces. + */ + +XCL_DRIVER_DLLESPEC size_t xclWrite(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, + const void *hostBuf, size_t size); + +/** + * xclRead() - Perform register read operation + * + * @handle: Device handle + * @space: Address space + * @offset: Offset in the address space + * @hostbuf: Destination data pointer + * @size: Size of data to copy + * Return: size of bytes written or appropriate error number + * + * This API may be used to read from device registers exposed on PCIe BAR. Offset is relative to the + * the address space. A device may have many address spaces. + */ +XCL_DRIVER_DLLESPEC size_t xclRead(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, + void *hostbuf, size_t size); + +/* HAL Register read/write APIs */ + +/* + * TODO: + * Define the following APIs + * + * 1. Host accessible pipe APIs: pread/pwrite + * 2. Accelerator status, start, stop APIs + * 3. Context creation APIs to support multiple clients + * 4. Multiple OCL Region support + * 5. DPDK style buffer management and device polling + * + */ + +/** + * DOC: HAL Compute Unit Execution Management APIs + * + * These APIs are under development. These functions will be used to start compute + * units and wait for them to finish. + */ + +/** + * xclExecBuf() - Submit an execution request to the embedded (or software) scheduler + * + * @handle: Device handle + * @cmdBO: BO handle containing command packet + * Return: 0 or standard error number + * + * This API is EXPERIMENTAL in this release. Submit an exec buffer for execution. The exec + * buffer layout is defined by struct ert_packet which is defined in file *ert.h*. The BO + * should been allocated with DRM_XOCL_BO_EXECBUF flag. + */ +XCL_DRIVER_DLLESPEC int xclExecBuf(xclDeviceHandle handle, unsigned int cmdBO); + +/** + * xclExecWait() - Wait for one or more execution events on the device + * + * @handle: Device handle + * @timeoutMilliSec: How long to wait for + * Return: Same code as poll system call + * + * This API is EXPERIMENTAL in this release + * Wait for notification from the hardware. The function essentially calls "poll" system + * call on the driver file handle. The return value has same semantics as poll system call. + * If return value is > 0 caller should check the status of submitted exec buffers + */ +XCL_DRIVER_DLLESPEC int xclExecWait(xclDeviceHandle handle, int timeoutMilliSec); + +/** + * xclRegisterInterruptNotify() - register *eventfd* file handle for a MSIX interrupt + * + * @handle: Device handle + * @userInterrupt: MSIX interrupt number + * @fd: Eventfd handle + * Return: 0 on success or standard errno + * + * Support for non managed interrupts (interrupts from custom IPs). fd should be obtained from + * eventfd system call. Caller should use standard poll/read eventfd framework in order to wait for + * interrupts. The handles are automatically unregistered on process exit. + */ +XCL_DRIVER_DLLESPEC int xclRegisterInterruptNotify(xclDeviceHandle handle, unsigned int userInterrupt, int fd); + +/* HAL Compute Unit Execution Management APIs */ + +/** + * @defgroup perfmon PERFORMANCE MONITORING OPERATIONS + * --------------------------------------------------- + * + * These functions are used to read and write to the performance monitoring infrastructure. + * OpenCL runtime will be using the BUFFER MANAGEMNT APIs described above to manage OpenCL buffers. + * It would use these functions to initialize and sample the performance monitoring on the card. + * Note that the offset is wrt the address space + */ - /** @} */ +/* Write host event to device tracing (Zynq only) */ +XCL_DRIVER_DLLESPEC void xclWriteHostEvent(xclDeviceHandle handle, xclPerfMonEventType type, + xclPerfMonEventID id); - /** - * @defgroup perfmon PERFORMANCE MONITORING OPERATIONS - * --------------------------------------------------- - * - * These functions are used to read and write to the performance monitoring infrastructure. - * OpenCL runtime will be using the BUFFER MANAGEMNT APIs described above to manage OpenCL buffers. - * It would use these functions to initialize and sample the performance monitoring on the card. - * Note that the offset is wrt the address space - */ +XCL_DRIVER_DLLESPEC size_t xclGetDeviceTimestamp(xclDeviceHandle handle); - /* Write host event to device tracing (Zynq only) */ - XCL_DRIVER_DLLESPEC void xclWriteHostEvent(xclDeviceHandle handle, xclPerfMonEventType type, - xclPerfMonEventID id); +XCL_DRIVER_DLLESPEC double xclGetDeviceClockFreqMHz(xclDeviceHandle handle); - XCL_DRIVER_DLLESPEC size_t xclGetDeviceTimestamp(xclDeviceHandle handle); +XCL_DRIVER_DLLESPEC double xclGetReadMaxBandwidthMBps(xclDeviceHandle handle); - XCL_DRIVER_DLLESPEC double xclGetDeviceClockFreqMHz(xclDeviceHandle handle); +XCL_DRIVER_DLLESPEC double xclGetWriteMaxBandwidthMBps(xclDeviceHandle handle); - XCL_DRIVER_DLLESPEC double xclGetReadMaxBandwidthMBps(xclDeviceHandle handle); +XCL_DRIVER_DLLESPEC void xclSetProfilingNumberSlots(xclDeviceHandle handle, xclPerfMonType type, + uint32_t numSlots); - XCL_DRIVER_DLLESPEC double xclGetWriteMaxBandwidthMBps(xclDeviceHandle handle); +XCL_DRIVER_DLLESPEC uint32_t xclGetProfilingNumberSlots(xclDeviceHandle handle, xclPerfMonType type); - XCL_DRIVER_DLLESPEC void xclSetOclRegionProfilingNumberSlots(xclDeviceHandle handle, - uint32_t numSlots); +XCL_DRIVER_DLLESPEC void xclGetProfilingSlotName(xclDeviceHandle handle, xclPerfMonType type, + uint32_t slotnum, char* slotName, uint32_t length); - XCL_DRIVER_DLLESPEC size_t xclPerfMonClockTraining(xclDeviceHandle handle, xclPerfMonType type); +XCL_DRIVER_DLLESPEC size_t xclPerfMonClockTraining(xclDeviceHandle handle, xclPerfMonType type); - XCL_DRIVER_DLLESPEC size_t xclPerfMonStartCounters(xclDeviceHandle handle, xclPerfMonType type); +XCL_DRIVER_DLLESPEC size_t xclPerfMonStartCounters(xclDeviceHandle handle, xclPerfMonType type); - XCL_DRIVER_DLLESPEC size_t xclPerfMonStopCounters(xclDeviceHandle handle, xclPerfMonType type); +XCL_DRIVER_DLLESPEC size_t xclPerfMonStopCounters(xclDeviceHandle handle, xclPerfMonType type); - XCL_DRIVER_DLLESPEC size_t xclPerfMonReadCounters(xclDeviceHandle handle, xclPerfMonType type, - xclCounterResults& counterResults); +XCL_DRIVER_DLLESPEC size_t xclPerfMonReadCounters(xclDeviceHandle handle, xclPerfMonType type, + xclCounterResults& counterResults); - XCL_DRIVER_DLLESPEC size_t xclDebugReadIPStatus(xclDeviceHandle handle, xclDebugReadType type, - void* debugResults); +XCL_DRIVER_DLLESPEC size_t xclDebugReadIPStatus(xclDeviceHandle handle, xclDebugReadType type, + void* debugResults); - XCL_DRIVER_DLLESPEC size_t xclPerfMonStartTrace(xclDeviceHandle handle, xclPerfMonType type, - uint32_t startTrigger); +XCL_DRIVER_DLLESPEC size_t xclPerfMonStartTrace(xclDeviceHandle handle, xclPerfMonType type, + uint32_t startTrigger); - XCL_DRIVER_DLLESPEC size_t xclPerfMonStopTrace(xclDeviceHandle handle, xclPerfMonType type); +XCL_DRIVER_DLLESPEC size_t xclPerfMonStopTrace(xclDeviceHandle handle, xclPerfMonType type); - XCL_DRIVER_DLLESPEC uint32_t xclPerfMonGetTraceCount(xclDeviceHandle handle, xclPerfMonType type); +XCL_DRIVER_DLLESPEC uint32_t xclPerfMonGetTraceCount(xclDeviceHandle handle, xclPerfMonType type); - XCL_DRIVER_DLLESPEC size_t xclPerfMonReadTrace(xclDeviceHandle handle, xclPerfMonType type, - xclTraceResultsVector& traceVector); +XCL_DRIVER_DLLESPEC size_t xclPerfMonReadTrace(xclDeviceHandle handle, xclPerfMonType type, + xclTraceResultsVector& traceVector); - /** @} */ +/** @} */ #ifdef __cplusplus } diff --git a/SDAccel/userspace/include/xclhal2.h b/SDAccel/userspace/include/xclhal2.h deleted file mode 100644 index eed9198c..00000000 --- a/SDAccel/userspace/include/xclhal2.h +++ /dev/null @@ -1,836 +0,0 @@ -/* - * Copyright (C) 2015-2018 Xilinx, Inc - * Xilinx SDAccel HAL userspace driver APIs - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -#ifndef _XCL_HAL2_H_ -#define _XCL_HAL2_H_ - -#ifdef __cplusplus -#include -#include -#else -#include -#include -#endif - -#if defined(_WIN32) -#ifdef XCL_DRIVER_DLL_EXPORT -#define XCL_DRIVER_DLLESPEC __declspec(dllexport) -#else -#define XCL_DRIVER_DLLESPEC __declspec(dllimport) -#endif -#else -#define XCL_DRIVER_DLLESPEC __attribute__((visibility("default"))) -#endif - - -#include "xclperf2.h" -#include "xcl_app_debug2.h" -#include "xclerr.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * DOC: Xilinx Accelerator Hardware Abstraction Library Interface Definitions - * - * Header file *xclhal2.h* defines data structures and function signatures exported by - * Hardware Abstraction Library (HAL). HAL is part of software stack which is integrated - * into Xilinx reference platform. - */ - -/** - * typedef xclDeviceHandle - opaque device handle - * - * A device handle of xclDeviceHandle kind is obtained by opening a device. Clients pass this - * device handle to refer to the opened device in all future interaction with HAL. - */ -typedef void * xclDeviceHandle; - -struct xclBin; -struct axlf; - -/** - * Structure used to obtain various bits of information from the device. - */ - -struct xclDeviceInfo2 { - unsigned mMagic; // = 0X586C0C6C; XL OpenCL X->58(ASCII), L->6C(ASCII), O->0 C->C L->6C(ASCII); - char mName[256]; - unsigned short mHALMajorVersion; - unsigned short mHALMinorVersion; - unsigned short mVendorId; - unsigned short mDeviceId; - unsigned short mSubsystemId; - unsigned short mSubsystemVendorId; - unsigned short mDeviceVersion; - size_t mDDRSize; // Size of DDR memory - size_t mDataAlignment; // Minimum data alignment requirement for host buffers - size_t mDDRFreeSize; // Total unused/available DDR memory - size_t mMinTransferSize; // Minimum DMA buffer size - unsigned short mDDRBankCount; - unsigned short mOCLFrequency[4]; - unsigned short mPCIeLinkWidth; - unsigned short mPCIeLinkSpeed; - unsigned short mDMAThreads; - short mOnChipTemp; - short mFanTemp; - unsigned short mVInt; - unsigned short mVAux; - unsigned short mVBram; - float mCurrent; - unsigned short mNumClocks; - unsigned short mFanSpeed; - bool mMigCalib; - // More properties here -}; - -/** - * xclMemoryDomains is for support of legacy APIs - * It is not used in BO APIs where we instead use xclBOKind - */ -enum xclMemoryDomains { - XCL_MEM_HOST_RAM = 0x00000000, - XCL_MEM_DEVICE_RAM = 0x00000001, - XCL_MEM_DEVICE_BRAM = 0x00000002, - XCL_MEM_SVM = 0x00000003, - XCL_MEM_CMA = 0x00000004, - XCL_MEM_DEVICE_REG = 0x00000005 -}; - -/* byte-0 lower 4 bits for DDR Flags are one-hot encoded */ -enum xclDDRFlags { - XCL_DEVICE_RAM_BANK0 = 0x00000000, - XCL_DEVICE_RAM_BANK1 = 0x00000002, - XCL_DEVICE_RAM_BANK2 = 0x00000004, - XCL_DEVICE_RAM_BANK3 = 0x00000008 -}; - -/** - * xclBOKind defines Buffer Object Kind which represents a fragment of device accesible - * memory and the corresponding backing host memory. - * - * 1. Shared virtual memory (SVM) class of systems like CAPI or MPSoc with SMMU. BOs - * have a common host RAM backing store. - * XCL_BO_SHARED_VIRTUAL - * - * 2. Shared physical memory class of systems like Zynq (or MPSoc with pass though SMMU) - * with Linux CMA buffer allocation. BOs have common host CMA allocated backing store. - * XCL_BO_SHARED_PHYSICAL - * - * 3. Shared virtual memory (SVM) class of systems with dedicated RAM and device MMU. BOs - * have a device RAM dedicated backing store and another host RAM allocated backing store. - * The buffers are sync'd via DMA. Both physical buffers use the same virtual address, - * hence giving the effect of SVM. - * XCL_BO_MIRRORED_VIRTUAL - * - * 4. Dedicated memory class of devices like PCIe card with DDR. BOs have a device RAM - * dedicated backing store and another host RAM allocated backing store. The buffers - * are sync'd via DMA - * XCL_BO_DEVICE_RAM - * - * 5. Dedicated onchip memory class of devices like PCIe card with BRAM. BOs have a device - * BRAM dedicated backing store and another host RAM allocated backing store. The buffers - * are sync'd via DMA - * XCL_BO_DEVICE_BRAM - */ - -enum xclBOKind { - XCL_BO_SHARED_VIRTUAL = 0, - XCL_BO_SHARED_PHYSICAL, - XCL_BO_MIRRORED_VIRTUAL, - XCL_BO_DEVICE_RAM, - XCL_BO_DEVICE_BRAM, - XCL_BO_DEVICE_PREALLOCATED_BRAM, -}; - -enum xclBOSyncDirection { - XCL_BO_SYNC_BO_TO_DEVICE = 0, - XCL_BO_SYNC_BO_FROM_DEVICE, -}; - -/** - * Define address spaces on the device AXI bus. The enums are used in xclRead() and xclWrite() - * to pass relative offsets. - */ - -enum xclAddressSpace { - XCL_ADDR_SPACE_DEVICE_FLAT = 0, // Absolute address space - XCL_ADDR_SPACE_DEVICE_RAM = 1, // Address space for the DDR memory - XCL_ADDR_KERNEL_CTRL = 2, // Address space for the OCL Region control port - XCL_ADDR_SPACE_DEVICE_PERFMON = 3, // Address space for the Performance monitors - XCL_ADDR_SPACE_DEVICE_CHECKER = 5, // Address space for protocol checker - XCL_ADDR_SPACE_MAX = 8 -}; - -/** - * Defines verbosity levels which are passed to xclOpen during device creation time - */ - -enum xclVerbosityLevel { - XCL_QUIET = 0, - XCL_INFO = 1, - XCL_WARN = 2, - XCL_ERROR = 3 -}; - -enum xclResetKind { - XCL_RESET_KERNEL, - XCL_RESET_FULL -}; - -struct xclDeviceUsage { - size_t h2c[8]; - size_t c2h[8]; - size_t ddrMemUsed[8]; - unsigned ddrBOAllocated[8]; - unsigned totalContexts; - uint64_t xclbinId[4]; -}; - -struct xclBOProperties { - uint32_t handle; - uint32_t flags; - uint64_t size; - uint64_t paddr; - xclBOKind domain; // not implemented -}; - -/** - * DOC: HAL Device Management APIs - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -/** - * xclProbe() - Enumerate devices found in the system - * - * Return: count of devices found - */ -XCL_DRIVER_DLLESPEC unsigned xclProbe(); - -/** - * xclOpen() - Open a device and obtain its handle. - * - * @deviceIndex: Slot number of device 0 for first device, 1 for the second device... - * @logFileName: Log file to use for optional logging - * @level: Severity level of messages to log - * - * Return: Device handle - */ -XCL_DRIVER_DLLESPEC xclDeviceHandle xclOpen(unsigned deviceIndex, const char *logFileName, - xclVerbosityLevel level); - -/** - * xclClose() - Close an opened device - * - * @handle: Device handle - */ -XCL_DRIVER_DLLESPEC void xclClose(xclDeviceHandle handle); - -/** - * xclResetDevice() - Reset a device or its CL - * - * @handle: Device handle - * @kind: Reset kind -* Return: 0 on success or appropriate error number - * - * Reset the device. All running kernels will be killed and buffers in DDR will be - * purged. A device may be reset if a user's application dies without waiting for - * running kernel(s) to finish. - */ -XCL_DRIVER_DLLESPEC int xclResetDevice(xclDeviceHandle handle, xclResetKind kind); - -/** - * xclGetDeviceInfo2() - Obtain various bits of information from the device - * - * @handle: Device handle - * @info: Information record - * Return: 0 on success or appropriate error number - */ -XCL_DRIVER_DLLESPEC int xclGetDeviceInfo2(xclDeviceHandle handle, xclDeviceInfo2 *info); - -/** - * xclGetUsageInfo() - Obtain usage information from the device - * - * @handle: Device handle - * @info: Information record - * Return: 0 on success or appropriate error number - */ -XCL_DRIVER_DLLESPEC int xclGetUsageInfo(xclDeviceHandle handle, xclDeviceUsage *info); - -/** - * xclGetErrorStatus() - Obtain error information from the device - * - * @handle: Device handle - * @info: Information record - * Return: 0 on success or appropriate error number - */ -XCL_DRIVER_DLLESPEC int xclGetErrorStatus(xclDeviceHandle handle, xclErrorStatus *info); - -/** - * xclLoadXclBin() - Download FPGA image (xclbin) to the device - * - * @handle: Device handle - * @buffer: Pointer to device image (xclbin) in memory - * Return: 0 on success or appropriate error number - * - * Download FPGA image (AXLF) to the device. The PR bitstream is encapsulated inside - * xclbin as a section. xclbin may also contains other sections which are suitably - * handled by the driver. - */ -XCL_DRIVER_DLLESPEC int xclLoadXclBin(xclDeviceHandle handle, const xclBin *buffer); - -/** - * xclReClock2() - Configure PR region frequncies - * - * @handle: Device handle - * @region: PR region (always 0) - * @targetFreqMHz: Array of target frequencies in order for the Clock Wizards driving - * the PR region - * Return: 0 on success or appropriate error number - */ -XCL_DRIVER_DLLESPEC int xclReClock2(xclDeviceHandle handle, unsigned short region, - const unsigned short *targetFreqMHz); - -/** - * xclLockDevice() - Get exclusive ownership of the device - * - * @handle: Device handle - * Return: 0 on success or appropriate error number - * - * The lock is necessary before performing buffer migration, register access or - * bitstream downloads. - */ -XCL_DRIVER_DLLESPEC int xclLockDevice(xclDeviceHandle handle); - -/** - * xclUnlockDevice() - Release exclusive ownership of the device - * - * @handle: Device handle - * Return: 0 on success or appropriate error number - */ -XCL_DRIVER_DLLESPEC int xclUnlockDevice(xclDeviceHandle handle); - -/* - * Update the device BPI PROM with new image - */ -XCL_DRIVER_DLLESPEC int xclUpgradeFirmware(xclDeviceHandle handle, const char *fileName); - -/* - * Update the device PROM with new image with clearing bitstream - */ -XCL_DRIVER_DLLESPEC int xclUpgradeFirmware2(xclDeviceHandle handle, const char *file1, const char* file2); - -/* - * Update the device SPI PROM with new image - */ -XCL_DRIVER_DLLESPEC int xclUpgradeFirmwareXSpi(xclDeviceHandle handle, const char *fileName, int index); - -/** - * xclBootFPGA() - Boot the FPGA from PROM - * - * @handle: Device handle - * Return: 0 on success or appropriate error number - * - * This should only be called when there are no other clients. It will cause PCIe bus re-enumeration - */ -XCL_DRIVER_DLLESPEC int xclBootFPGA(xclDeviceHandle handle); - -/* - * Write to /sys/bus/pci/devices//remove and initiate a pci rescan by - * writing to /sys/bus/pci/rescan. - */ -XCL_DRIVER_DLLESPEC int xclRemoveAndScanFPGA(); - -/* - * Get the version number. 1 => Hal1 ; 2 => Hal2 - */ -XCL_DRIVER_DLLESPEC unsigned int xclVersion(); - -/* End HAL Device Management APIs */ - -/** - * DOC: HAL Buffer Management APIs - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * Buffer management APIs are used for managing device memory and migrating buffers - * between host and device memory - */ - -/** - * xclAllocBO() - Allocate a BO of requested size with appropriate flags - * - * @handle: Device handle - * @size: Size of buffer - * @domain: Memory domain - * @flags: Specify bank information, etc - * Return: BO handle - */ -XCL_DRIVER_DLLESPEC unsigned int xclAllocBO(xclDeviceHandle handle, size_t size, xclBOKind domain, - unsigned flags); - -/** - * xclAllocUserPtrBO() - Allocate a BO using userptr provided by the user - * - * @handle: Device handle - * @userptr: Pointer to 4K aligned user memory - * @size: Size of buffer - * @flags: Specify bank information, etc - * Return: BO handle - */ -XCL_DRIVER_DLLESPEC unsigned int xclAllocUserPtrBO(xclDeviceHandle handle, void *userptr, size_t size, - unsigned flags); - -/** - * xclFreeBO() - Free a previously allocated BO - * - * @handle: Device handle - * @boHandle: BO handle - */ -XCL_DRIVER_DLLESPEC void xclFreeBO(xclDeviceHandle handle, unsigned int boHandle); - -/** - * xclWriteBO() - Copy-in user data to host backing storage of BO - * - * @handle: Device handle - * @boHandle: BO handle - * @src: Source data pointer - * @size: Size of data to copy - * @seek: Offset within the BO - * Return: 0 on success or appropriate error number - * - * Copy host buffer contents to previously allocated device memory. ``seek`` specifies how many bytes - * to skip at the beginning of the BO before copying-in ``size`` bytes of host buffer. - */ -XCL_DRIVER_DLLESPEC size_t xclWriteBO(xclDeviceHandle handle, unsigned int boHandle, - const void *src, size_t size, size_t seek); - -/** - * xclReadBO() - Copy-out user data from host backing storage of BO - * - * @handle: Device handle - * @boHandle: BO handle - * @dst: Destination data pointer - * @size: Size of data to copy - * @skip: Offset within the BO - * Return: 0 on success or appropriate error number - * - * Copy contents of previously allocated device memory to host buffer. ``skip`` specifies how many bytes - * to skip from the beginning of the BO before copying-out ``size`` bytes of device buffer. - */ -XCL_DRIVER_DLLESPEC size_t xclReadBO(xclDeviceHandle handle, unsigned int boHandle, - void *dst, size_t size, size_t skip); - -/** - * xclMapBO() - Memory map BO into user's address space - * - * @handle: Device handle - * @boHandle: BO handle - * @write: READ only or READ/WRITE mapping - * Return: Memory mapped buffer - * - * Map the contents of the buffer object into host memory - * To unmap the buffer call POSIX unmap() on mapped void * pointer returned from xclMapBO - */ -XCL_DRIVER_DLLESPEC void *xclMapBO(xclDeviceHandle handle, unsigned int boHandle, bool write); - -/** - * xclSyncBO() - Synchronize buffer contents in requested direction - * - * @handle: Device handle - * @boHandle: BO handle - * @dir: To device or from device - * @size: Size of data to synchronize - * @offset: Offset within the BO - * Return: 0 on success or standard errno - * - * Synchronize the buffer contents between host and device. Depending on the memory model this may - * require DMA to/from device or CPU cache flushing/invalidation - */ -XCL_DRIVER_DLLESPEC int xclSyncBO(xclDeviceHandle handle, unsigned int boHandle, xclBOSyncDirection dir, - size_t size, size_t offset); - -/** - * xclExportBO() - Obtain DMA-BUF file descriptor for a BO - * - * @handle: Device handle - * @boHandle: BO handle which needs to be exported - * Return: File handle to the BO or standard errno - * - * Export a BO for import into another device or Linux subsystem which accepts DMA-BUF fd - * This operation is backed by Linux DMA-BUF framework - */ -XCL_DRIVER_DLLESPEC int xclExportBO(xclDeviceHandle handle, unsigned int boHandle); - -/** - * xclImportBO() - Obtain BO handle for a BO represented by DMA-BUF file descriptor - * - * @handle: Device handle - * @fd: File handle to foreign BO owned by another device which needs to be imported - * @flags: Unused - * Return: BO handle of the imported BO - * - * Import a BO exported by another device. * - * This operation is backed by Linux DMA-BUF framework - */ -XCL_DRIVER_DLLESPEC unsigned int xclImportBO(xclDeviceHandle handle, int fd, unsigned flags); - -/** - * xclGetBOProperties() - Obtain xclBOProperties struct for a BO - * - * @handle: Device handle - * @boHandle: BO handle - * @properties: BO properties struct pointer - * Return: 0 on success - * - * This is the prefered method for obtaining BO property information. - */ -XCL_DRIVER_DLLESPEC int xclGetBOProperties(xclDeviceHandle handle, unsigned int boHandle, xclBOProperties *properties); - -/* - * xclGetBOSize() - Retrieve size of a BO - * - * - * @handle: Device handle - * @boHandle: BO handle - * Return size_t size of the BO on success - * - * This API will be deprecated in the future. New clients should use xclGetBOProperties instead - */ -inline XCL_DRIVER_DLLESPEC size_t xclGetBOSize(xclDeviceHandle handle, unsigned int boHandle) -{ - xclBOProperties p; - return !xclGetBOProperties(handle, boHandle, &p) ? (size_t)p.size : -1; -} - -/* - * Get the physical address on the device - * - * This function will be deprecated in the future. New clinets should use xclGetBOProperties instead. - * - * @handle: Device handle - * @boHandle: BO handle - * @return uint64_t address of the BO on success - */ -inline XCL_DRIVER_DLLESPEC uint64_t xclGetDeviceAddr(xclDeviceHandle handle, unsigned int boHandle) -{ - xclBOProperties p; - return !xclGetBOProperties(handle, boHandle, &p) ? p.paddr : -1; -} - -/* End HAL Buffer Management APIs */ - -/** - * DOC: HAL Legacy Buffer Management APIs - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * Do *not* develop new features using the following 5 API's. These are for backwards - * compatibility with classic HAL interface and will be deprecated in future. New clients - * should use BO based APIs defined above - * - */ - -/** - * xclAllocDeviceBuffer() - Allocate a buffer on the device - * - * @handle: Device handle - * @size: Size of buffer - * Return: Physical address of buffer on device or 0xFFFFFFFFFFFFFFFF in case of failure - * - * Allocate a buffer on the device DDR and return its address. This API will be deprecated in future. - * Use xclAllocBO() in all new code. - */ -XCL_DRIVER_DLLESPEC uint64_t xclAllocDeviceBuffer(xclDeviceHandle handle, size_t size); - -/** - * xclAllocDeviceBuffer2() - Allocate a buffer on the device on a specific DDR - * - * @handle: Device handle - * @size: Size of buffer - * @domain: Memory domain - * @flags: Desired DDR bank as a bitmap. - * Return: Physical address of buffer on device or 0xFFFFFFFFFFFFFFFF in case of failure - * - * Allocate a buffer on a specific device DDR and return its address. This API will be deprecated in future. - * Use xclAllocBO() in all new code. - */ -XCL_DRIVER_DLLESPEC uint64_t xclAllocDeviceBuffer2(xclDeviceHandle handle, size_t size, - xclMemoryDomains domain, - unsigned flags); - -/** - * xclFreeDeviceBuffer() - Free a previously buffer on the device - * - * @handle: Device handle - * @buf: Physical address of buffer - * - * The physical address should have been previously allocated by xclAllocDeviceBuffe() or xclAllocDeviceBuffer2(). - * The address should point to the beginning of the buffer and not at an offset in the buffer. This API will - * be deprecated in future. Use xclFreeBO() together with BO allocation APIs. - */ -XCL_DRIVER_DLLESPEC void xclFreeDeviceBuffer(xclDeviceHandle handle, uint64_t buf); - -/** - * xclCopyBufferHost2Device() - Write to device memory - * - * @handle: Device handle - * @dest: Physical address in the device - * @src: Source buffer pointer - * @size: Size of data to synchronize - * @seek: Seek within the segment pointed to physical address - * Return: Size of data moved or standard error number - * - * Copy host buffer contents to previously allocated device memory. ``seek`` specifies how many bytes to skip - * at the beginning of the destination before copying ``size`` bytes of host buffer. This API will be - * deprecated in future. Use xclSyncBO() together with other BO APIs. - */ -XCL_DRIVER_DLLESPEC size_t xclCopyBufferHost2Device(xclDeviceHandle handle, uint64_t dest, - const void *src, size_t size, size_t seek); - -/** - * xclCopyBufferDevice2Host() - Read from device memory - * - * @handle: Device handle - * @dest: Destination buffer pointer - * @src: Physical address in the device - * @size: Size of data to synchronize - * @skip: Skip within the segment pointed to physical address - * Return: Size of data moved or standard error number - * - * Copy contents of previously allocated device memory to host buffer. ``skip`` specifies how many bytes to skip - * from the beginning of the source before copying ``size`` bytes of device buffer. This API will be - * deprecated in future. Use xclSyncBO() together with other BO APIs. - */ -XCL_DRIVER_DLLESPEC size_t xclCopyBufferDevice2Host(xclDeviceHandle handle, void *dest, - uint64_t src, size_t size, size_t skip); - -/* End HAL Legacy Buffer Management APIs */ - - -/** - * DOC: HAL Unmanaged DMA APIs - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * Unmanaged DMA APIs are for exclusive use by the debuggers and tools. The APIs allow clinets to read/write - * from/to absolute device address. No checks are performed if a buffer was allocated before at the specified - * location or if the address is valid. Users who want to take over the full memory managemnt of the device - * may use this API to synchronize their buffers between host and device. - */ - -/** - * xclUnmgdPread() - Perform unmanaged device memory read operation - * - * @handle: Device handle - * @flags: Unused - * @buf: Destination data pointer - * @size: Size of data to copy - * @offset: Absolute offset inside device - * Return: size of bytes read or appropriate error number - * - * This API may be used to perform DMA operation from absolute location specified. Users - * may use this if they want to perform their own device memory management -- not using the buffer - * object (BO) framework defined before. - */ -XCL_DRIVER_DLLESPEC ssize_t xclUnmgdPread(xclDeviceHandle handle, unsigned flags, void *buf, - size_t size, uint64_t offset); - -/** - * xclUnmgdPwrite() - Perform unmanaged device memory read operation - * - * @handle: Device handle - * @flags: Unused - * @buf: Source data pointer - * @size: Size of data to copy - * @offset: Absolute offset inside device - * Return: size of bytes written or appropriate error number - * - * This API may be used to perform DMA operation to an absolute location specified. Users - * may use this if they want to perform their own device memory management -- not using the buffer - * object (BO) framework defined before. - */ -XCL_DRIVER_DLLESPEC ssize_t xclUnmgdPwrite(xclDeviceHandle handle, unsigned flags, const void *buf, - size_t size, uint64_t offset); - -/* End HAL Unmanaged DMA APIs */ - -/* - * DOC: HAL Register read/write APIs - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * These functions are used to read and write peripherals sitting on the address map. OpenCL runtime - * will be using the BUFFER MANAGEMNT APIs described above to manage OpenCL buffers. It would use - * xclRead/xclWrite to program and manage peripherals on the card. For programming the Kernel, OpenCL - * runtime uses the kernel control register map generated by the xocc compiler. - * Note that the offset is wrt the address space. - */ - -/** - * xclWrite() - Perform register write operation - * - * @handle: Device handle - * @space: Address space - * @offset: Offset in the address space - * @hostBuf: Source data pointer - * @size: Size of data to copy - * Return: size of bytes written or appropriate error number - * - * This API may be used to write to device registers exposed on PCIe BAR. Offset is relative to the - * the address space. A device may have many address spaces. - */ - -XCL_DRIVER_DLLESPEC size_t xclWrite(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, - const void *hostBuf, size_t size); - -/** - * xclRead() - Perform register read operation - * - * @handle: Device handle - * @space: Address space - * @offset: Offset in the address space - * @hostbuf: Destination data pointer - * @size: Size of data to copy - * Return: size of bytes written or appropriate error number - * - * This API may be used to read from device registers exposed on PCIe BAR. Offset is relative to the - * the address space. A device may have many address spaces. - */ -XCL_DRIVER_DLLESPEC size_t xclRead(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, - void *hostbuf, size_t size); - -/* HAL Register read/write APIs */ - -/* - * TODO: - * Define the following APIs - * - * 1. Host accessible pipe APIs: pread/pwrite - * 2. Accelerator status, start, stop APIs - * 3. Context creation APIs to support multiple clients - * 4. Multiple OCL Region support - * 5. DPDK style buffer management and device polling - * - */ - -/** - * DOC: HAL Compute Unit Execution Management APIs - * - * These APIs are under development. These functions will be used to start compute - * units and wait for them to finish. - */ - -/** - * xclExecBuf() - Submit an execution request to the embedded (or software) scheduler - * - * @handle: Device handle - * @cmdBO: BO handle containing command packet - * Return: 0 or standard error number - * - * This API is EXPERIMENTAL in this release. Submit an exec buffer for execution. The exec - * buffer layout is defined by struct ert_packet which is defined in file *ert.h*. The BO - * should been allocated with DRM_XOCL_BO_EXECBUF flag. - */ -XCL_DRIVER_DLLESPEC int xclExecBuf(xclDeviceHandle handle, unsigned int cmdBO); - -/** - * xclExecWait() - Wait for one or more execution events on the device - * - * @handle: Device handle - * @timeoutMilliSec: How long to wait for - * Return: Same code as poll system call - * - * This API is EXPERIMENTAL in this release - * Wait for notification from the hardware. The function essentially calls "poll" system - * call on the driver file handle. The return value has same semantics as poll system call. - * If return value is > 0 caller should check the status of submitted exec buffers - */ -XCL_DRIVER_DLLESPEC int xclExecWait(xclDeviceHandle handle, int timeoutMilliSec); - -/** - * xclRegisterInterruptNotify() - register *eventfd* file handle for a MSIX interrupt - * - * @handle: Device handle - * @userInterrupt: MSIX interrupt number - * @fd: Eventfd handle - * Return: 0 on success or standard errno - * - * Support for non managed interrupts (interrupts from custom IPs). fd should be obtained from - * eventfd system call. Caller should use standard poll/read eventfd framework in order to wait for - * interrupts. The handles are automatically unregistered on process exit. - */ -XCL_DRIVER_DLLESPEC int xclRegisterInterruptNotify(xclDeviceHandle handle, unsigned int userInterrupt, int fd); - -/* HAL Compute Unit Execution Management APIs */ - -/** - * @defgroup perfmon PERFORMANCE MONITORING OPERATIONS - * --------------------------------------------------- - * - * These functions are used to read and write to the performance monitoring infrastructure. - * OpenCL runtime will be using the BUFFER MANAGEMNT APIs described above to manage OpenCL buffers. - * It would use these functions to initialize and sample the performance monitoring on the card. - * Note that the offset is wrt the address space - */ - -/* Write host event to device tracing (Zynq only) */ -XCL_DRIVER_DLLESPEC void xclWriteHostEvent(xclDeviceHandle handle, xclPerfMonEventType type, - xclPerfMonEventID id); - -XCL_DRIVER_DLLESPEC size_t xclGetDeviceTimestamp(xclDeviceHandle handle); - -XCL_DRIVER_DLLESPEC double xclGetDeviceClockFreqMHz(xclDeviceHandle handle); - -XCL_DRIVER_DLLESPEC double xclGetReadMaxBandwidthMBps(xclDeviceHandle handle); - -XCL_DRIVER_DLLESPEC double xclGetWriteMaxBandwidthMBps(xclDeviceHandle handle); - -XCL_DRIVER_DLLESPEC void xclSetProfilingNumberSlots(xclDeviceHandle handle, xclPerfMonType type, - uint32_t numSlots); - -XCL_DRIVER_DLLESPEC uint32_t xclGetProfilingNumberSlots(xclDeviceHandle handle, xclPerfMonType type); - -XCL_DRIVER_DLLESPEC void xclGetProfilingSlotName(xclDeviceHandle handle, xclPerfMonType type, - uint32_t slotnum, char* slotName, uint32_t length); - -XCL_DRIVER_DLLESPEC size_t xclPerfMonClockTraining(xclDeviceHandle handle, xclPerfMonType type); - -XCL_DRIVER_DLLESPEC size_t xclPerfMonStartCounters(xclDeviceHandle handle, xclPerfMonType type); - -XCL_DRIVER_DLLESPEC size_t xclPerfMonStopCounters(xclDeviceHandle handle, xclPerfMonType type); - -XCL_DRIVER_DLLESPEC size_t xclPerfMonReadCounters(xclDeviceHandle handle, xclPerfMonType type, - xclCounterResults& counterResults); - -XCL_DRIVER_DLLESPEC size_t xclDebugReadIPStatus(xclDeviceHandle handle, xclDebugReadType type, - void* debugResults); - -XCL_DRIVER_DLLESPEC size_t xclPerfMonStartTrace(xclDeviceHandle handle, xclPerfMonType type, - uint32_t startTrigger); - -XCL_DRIVER_DLLESPEC size_t xclPerfMonStopTrace(xclDeviceHandle handle, xclPerfMonType type); - -XCL_DRIVER_DLLESPEC uint32_t xclPerfMonGetTraceCount(xclDeviceHandle handle, xclPerfMonType type); - -XCL_DRIVER_DLLESPEC size_t xclPerfMonReadTrace(xclDeviceHandle handle, xclPerfMonType type, - xclTraceResultsVector& traceVector); - -/** @} */ - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/SDAccel/userspace/include/xclperf.h b/SDAccel/userspace/include/xclperf.h index d4db9e12..38fe91b3 100644 --- a/SDAccel/userspace/include/xclperf.h +++ b/SDAccel/userspace/include/xclperf.h @@ -1,6 +1,5 @@ /** * Copyright (C) 2015-2018 Xilinx, Inc - * * Xilinx SDAccel HAL userspace driver extension APIs * Performance Monitoring Exposed Parameters * @@ -21,7 +20,7 @@ #define _XCL_PERF_H_ // DSA version (e.g., XCL_PLATFORM=xilinx_adm-pcie-7v3_1ddr_1_1) - +// Simply a default as its read from the device using lspci (see CR 870994) #define DSA_MAJOR_VERSION 1 #define DSA_MINOR_VERSION 1 @@ -34,6 +33,9 @@ #define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT 0 #define XPAR_AXI_PERF_MON_0_HOST_SLOT 1 +#define XPAR_SPM0_HOST_SLOT 0 +#define XPAR_SPM0_FIRST_KERNEL_SLOT 1 + #define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT2 2 #define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT3 3 #define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT4 4 @@ -72,7 +74,7 @@ /* AXI Stream FIFOs */ #define XPAR_AXI_PERF_MON_0_TRACE_NUMBER_FIFO 3 -#define XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH 128 +#define XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH 64 #define XPAR_AXI_PERF_MON_0_TRACE_NUMBER_SAMPLES 4096 #define MAX_TRACE_NUMBER_SAMPLES 8192 @@ -81,7 +83,11 @@ #define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_2 0x012000 // CR 877788: the extra 0x80001000 is a bug in Vivado where the AXI4 base address is not set correctly // TODO: remove it once that bug is fixed! -#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL (0x2000000000 + 0x80001000) +//#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL (0x2000000000 + 0x80001000) +#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL 0x2000000000 +// Default for new monitoring +//#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL2 (0x0400000000 + 0x80001000) +#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL2 0x0400000000 /********************* APM 1: Monitor PCIe DMA Masters ************************/ @@ -167,6 +173,8 @@ /************************ APM Profile Counters ********************************/ #define XAPM_MAX_NUMBER_SLOTS 8 +// Max slots = floor(max slots on trace funnel / 2) = floor(63 / 2) = 31 +#define XSPM_MAX_NUMBER_SLOTS 31 #define XAPM_METRIC_COUNTERS_PER_SLOT 8 /* Metric counters per slot */ @@ -213,6 +221,7 @@ /* Cycles to add to timestamp if overflow occurs */ #define LOOP_ADD_TIME (1<<16) +#define LOOP_ADD_TIME_SPM (1<<44) /********************** Definitions: Enums, Structs ***************************/ @@ -255,12 +264,20 @@ enum xclPerfMonEventType { XCL_PERF_MON_END_EVENT = 0x5 }; +/* + * Xocc follows this convention + * Even IDs are Reads + * Odd IDs are Writes + */ +#define IS_WRITE(x) ((x) & 1) +#define IS_READ(x) (!((x) & 1)) + /* * Performance monitor IDs for host SW events * NOTE: HW events start at 0, SDSoC SW events start at 4000 */ enum xclPerfMonEventID { - XCL_PERF_MON_IGNORE_EVENT = 0, + XCL_PERF_MON_HW_EVENT = 0, XCL_PERF_MON_GENERAL_ID = 3000, XCL_PERF_MON_QUEUE_ID = 3001, XCL_PERF_MON_READ_ID = 3002, @@ -305,46 +322,42 @@ enum xclPerfMonEventID { XCL_PERF_MON_CU5_ID = 3205, XCL_PERF_MON_CU6_ID = 3206, XCL_PERF_MON_CU7_ID = 3207, - XCL_PERF_MON_PROGRAM_END = 4090 + XCL_PERF_MON_PROGRAM_END = 4090, + XCL_PERF_MON_IGNORE_EVENT = 4095 }; /* Performance monitor counter results */ typedef struct { //unsigned int NumSlots; float SampleIntervalUsec; - unsigned int WriteBytes[XAPM_MAX_NUMBER_SLOTS]; - unsigned int WriteTranx[XAPM_MAX_NUMBER_SLOTS]; - unsigned int WriteLatency[XAPM_MAX_NUMBER_SLOTS]; - unsigned short WriteMinLatency[XAPM_MAX_NUMBER_SLOTS]; - unsigned short WriteMaxLatency[XAPM_MAX_NUMBER_SLOTS]; - unsigned int ReadBytes[XAPM_MAX_NUMBER_SLOTS]; - unsigned int ReadTranx[XAPM_MAX_NUMBER_SLOTS]; - unsigned int ReadLatency[XAPM_MAX_NUMBER_SLOTS]; - unsigned short ReadMinLatency[XAPM_MAX_NUMBER_SLOTS]; - unsigned short ReadMaxLatency[XAPM_MAX_NUMBER_SLOTS]; + unsigned int WriteBytes[XSPM_MAX_NUMBER_SLOTS]; + unsigned int WriteTranx[XSPM_MAX_NUMBER_SLOTS]; + unsigned int WriteLatency[XSPM_MAX_NUMBER_SLOTS]; + unsigned short WriteMinLatency[XSPM_MAX_NUMBER_SLOTS]; + unsigned short WriteMaxLatency[XSPM_MAX_NUMBER_SLOTS]; + unsigned int ReadBytes[XSPM_MAX_NUMBER_SLOTS]; + unsigned int ReadTranx[XSPM_MAX_NUMBER_SLOTS]; + unsigned int ReadLatency[XSPM_MAX_NUMBER_SLOTS]; + unsigned short ReadMinLatency[XSPM_MAX_NUMBER_SLOTS]; + unsigned short ReadMaxLatency[XSPM_MAX_NUMBER_SLOTS]; } xclCounterResults; /* Performance monitor trace results */ typedef struct { - unsigned char LogID; /* 0: event flags, 1: host timestamp */ + xclPerfMonEventID EventID; + xclPerfMonEventType EventType; + unsigned long long Timestamp; unsigned char Overflow; - unsigned char WriteStartEvent; - unsigned char WriteEndEvent; - unsigned char ReadStartEvent; - unsigned short Timestamp; - unsigned int HostTimestamp; - unsigned char RID[XAPM_MAX_NUMBER_SLOTS]; - unsigned char ARID[XAPM_MAX_NUMBER_SLOTS]; - unsigned char BID[XAPM_MAX_NUMBER_SLOTS]; - unsigned char AWID[XAPM_MAX_NUMBER_SLOTS]; - unsigned char EventFlags[XAPM_MAX_NUMBER_SLOTS]; - unsigned char ExtEventFlags[XAPM_MAX_NUMBER_SLOTS]; - unsigned char WriteAddrLen[XAPM_MAX_NUMBER_SLOTS]; - unsigned char ReadAddrLen[XAPM_MAX_NUMBER_SLOTS]; - unsigned short WriteBytes[XAPM_MAX_NUMBER_SLOTS]; - unsigned short ReadBytes[XAPM_MAX_NUMBER_SLOTS]; - unsigned short WriteAddrId[XAPM_MAX_NUMBER_SLOTS]; - unsigned short ReadAddrId[XAPM_MAX_NUMBER_SLOTS]; + unsigned int TraceID; + unsigned char Error; + unsigned char Reserved; + // Used in HW Emulation + unsigned long long HostTimestamp; + unsigned char EventFlags; + unsigned char WriteAddrLen; + unsigned char ReadAddrLen; + unsigned short WriteBytes; + unsigned short ReadBytes; } xclTraceResults; typedef struct { diff --git a/SDAccel/userspace/include/xclperf2.h b/SDAccel/userspace/include/xclperf2.h deleted file mode 100755 index 38fe91b3..00000000 --- a/SDAccel/userspace/include/xclperf2.h +++ /dev/null @@ -1,369 +0,0 @@ -/** - * Copyright (C) 2015-2018 Xilinx, Inc - * Xilinx SDAccel HAL userspace driver extension APIs - * Performance Monitoring Exposed Parameters - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -#ifndef _XCL_PERF_H_ -#define _XCL_PERF_H_ - -// DSA version (e.g., XCL_PLATFORM=xilinx_adm-pcie-7v3_1ddr_1_1) -// Simply a default as its read from the device using lspci (see CR 870994) -#define DSA_MAJOR_VERSION 1 -#define DSA_MINOR_VERSION 1 - -/************************ APM 0: Monitor MIG Ports ****************************/ - -#define XPAR_AXI_PERF_MON_0_NUMBER_SLOTS 2 - -#define XPAR_AXI_PERF_MON_0_SLOT0_NAME "OCL Region" -#define XPAR_AXI_PERF_MON_0_SLOT1_NAME "Host" -#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT 0 -#define XPAR_AXI_PERF_MON_0_HOST_SLOT 1 - -#define XPAR_SPM0_HOST_SLOT 0 -#define XPAR_SPM0_FIRST_KERNEL_SLOT 1 - -#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT2 2 -#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT3 3 -#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT4 4 -#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT5 5 -#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT6 6 -#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT7 7 - -#define XPAR_AXI_PERF_MON_0_SLOT2_NAME "OCL Region Master 2" -#define XPAR_AXI_PERF_MON_0_SLOT3_NAME "OCL Region Master 3" -#define XPAR_AXI_PERF_MON_0_SLOT4_NAME "OCL Region Master 4" -#define XPAR_AXI_PERF_MON_0_SLOT5_NAME "OCL Region Master 5" -#define XPAR_AXI_PERF_MON_0_SLOT6_NAME "OCL Region Master 6" -#define XPAR_AXI_PERF_MON_0_SLOT7_NAME "OCL Region Master 7" - -#define XPAR_AXI_PERF_MON_0_SLOT0_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_0_SLOT1_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_0_SLOT2_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_0_SLOT3_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_0_SLOT4_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_0_SLOT5_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_0_SLOT6_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_0_SLOT7_DATA_WIDTH 512 - -/* Profile */ -#define XPAR_AXI_PERF_MON_0_IS_EVENT_COUNT 1 -#define XPAR_AXI_PERF_MON_0_HAVE_SAMPLED_COUNTERS 1 -#define XPAR_AXI_PERF_MON_0_NUMBER_COUNTERS (XPAR_AXI_PERF_MON_0_NUMBER_SLOTS * XAPM_METRIC_COUNTERS_PER_SLOT) - -/* Trace */ -#define XPAR_AXI_PERF_MON_0_IS_EVENT_LOG 1 -#define XPAR_AXI_PERF_MON_0_SHOW_AXI_IDS 1 -#define XPAR_AXI_PERF_MON_0_SHOW_AXI_LEN 1 -// 2 DDR platform -#define XPAR_AXI_PERF_MON_0_SHOW_AXI_IDS_2DDR 0 -#define XPAR_AXI_PERF_MON_0_SHOW_AXI_LEN_2DDR 1 - -/* AXI Stream FIFOs */ -#define XPAR_AXI_PERF_MON_0_TRACE_NUMBER_FIFO 3 -#define XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH 64 -#define XPAR_AXI_PERF_MON_0_TRACE_NUMBER_SAMPLES 4096 -#define MAX_TRACE_NUMBER_SAMPLES 8192 - -#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_0 0x010000 -#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_1 0x011000 -#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_2 0x012000 -// CR 877788: the extra 0x80001000 is a bug in Vivado where the AXI4 base address is not set correctly -// TODO: remove it once that bug is fixed! -//#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL (0x2000000000 + 0x80001000) -#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL 0x2000000000 -// Default for new monitoring -//#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL2 (0x0400000000 + 0x80001000) -#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL2 0x0400000000 - -/********************* APM 1: Monitor PCIe DMA Masters ************************/ - -#define XPAR_AXI_PERF_MON_1_NUMBER_SLOTS 2 - -#define XPAR_AXI_PERF_MON_1_SLOT0_NAME "DMA AXI4 Master" -#define XPAR_AXI_PERF_MON_1_SLOT1_NAME "DMA AXI4-Lite Master" -#define XPAR_AXI_PERF_MON_1_SLOT2_NAME "Null" -#define XPAR_AXI_PERF_MON_1_SLOT3_NAME "Null" -#define XPAR_AXI_PERF_MON_1_SLOT4_NAME "Null" -#define XPAR_AXI_PERF_MON_1_SLOT5_NAME "Null" -#define XPAR_AXI_PERF_MON_1_SLOT6_NAME "Null" -#define XPAR_AXI_PERF_MON_1_SLOT7_NAME "Null" - -#define XPAR_AXI_PERF_MON_1_SLOT0_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_1_SLOT1_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_1_SLOT2_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_1_SLOT3_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_1_SLOT4_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_1_SLOT5_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_1_SLOT6_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_1_SLOT7_DATA_WIDTH 512 - -/* Profile */ -#define XPAR_AXI_PERF_MON_1_IS_EVENT_COUNT 1 -#define XPAR_AXI_PERF_MON_1_HAVE_SAMPLED_COUNTERS 1 -#define XPAR_AXI_PERF_MON_1_NUMBER_COUNTERS (XPAR_AXI_PERF_MON_1_NUMBER_SLOTS * XAPM_METRIC_COUNTERS_PER_SLOT) -#define XPAR_AXI_PERF_MON_1_SCALE_FACTOR 1 - -/* Trace */ -#define XPAR_AXI_PERF_MON_1_IS_EVENT_LOG 0 -#define XPAR_AXI_PERF_MON_1_SHOW_AXI_IDS 0 -#define XPAR_AXI_PERF_MON_1_SHOW_AXI_LEN 0 - -/* AXI Stream FIFOs */ -#define XPAR_AXI_PERF_MON_1_TRACE_NUMBER_FIFO 0 -#define XPAR_AXI_PERF_MON_1_TRACE_WORD_WIDTH 0 -#define XPAR_AXI_PERF_MON_1_TRACE_NUMBER_SAMPLES 0 - -/************************ APM 2: Monitor OCL Region ***************************/ - -#define XPAR_AXI_PERF_MON_2_NUMBER_SLOTS 1 - -#define XPAR_AXI_PERF_MON_2_SLOT0_NAME "Kernel0" -#define XPAR_AXI_PERF_MON_2_SLOT1_NAME "Kernel1" -#define XPAR_AXI_PERF_MON_2_SLOT2_NAME "Kernel2" -#define XPAR_AXI_PERF_MON_2_SLOT3_NAME "Kernel3" -#define XPAR_AXI_PERF_MON_2_SLOT4_NAME "Kernel4" -#define XPAR_AXI_PERF_MON_2_SLOT5_NAME "Kernel5" -#define XPAR_AXI_PERF_MON_2_SLOT6_NAME "Kernel6" -#define XPAR_AXI_PERF_MON_2_SLOT7_NAME "Kernel7" - -#define XPAR_AXI_PERF_MON_2_SLOT0_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_2_SLOT1_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_2_SLOT2_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_2_SLOT3_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_2_SLOT4_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_2_SLOT5_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_2_SLOT6_DATA_WIDTH 512 -#define XPAR_AXI_PERF_MON_2_SLOT7_DATA_WIDTH 512 - -/* Profile */ -#define XPAR_AXI_PERF_MON_2_IS_EVENT_COUNT 0 -#define XPAR_AXI_PERF_MON_2_HAVE_SAMPLED_COUNTERS 0 -#define XPAR_AXI_PERF_MON_2_NUMBER_COUNTERS 0 -#define XPAR_AXI_PERF_MON_2_SCALE_FACTOR 1 - -/* Trace */ -#define XPAR_AXI_PERF_MON_2_IS_EVENT_LOG 1 -#define XPAR_AXI_PERF_MON_2_SHOW_AXI_IDS 0 -#define XPAR_AXI_PERF_MON_2_SHOW_AXI_LEN 0 - -/* AXI Stream FIFOs */ -/* NOTE: number of FIFOs is dependent upon the number of compute units being monitored */ -//#define XPAR_AXI_PERF_MON_2_TRACE_NUMBER_FIFO 2 -#define XPAR_AXI_PERF_MON_2_TRACE_WORD_WIDTH 64 -#define XPAR_AXI_PERF_MON_2_TRACE_NUMBER_SAMPLES 4096 - -#define XPAR_AXI_PERF_MON_2_TRACE_OFFSET_0 0x01000 -#define XPAR_AXI_PERF_MON_2_TRACE_OFFSET_1 0x02000 -#define XPAR_AXI_PERF_MON_2_TRACE_OFFSET_2 0x03000 - -/************************ APM Profile Counters ********************************/ - -#define XAPM_MAX_NUMBER_SLOTS 8 -// Max slots = floor(max slots on trace funnel / 2) = floor(63 / 2) = 31 -#define XSPM_MAX_NUMBER_SLOTS 31 -#define XAPM_METRIC_COUNTERS_PER_SLOT 8 - -/* Metric counters per slot */ -#define XAPM_METRIC_WRITE_BYTES 0 -#define XAPM_METRIC_WRITE_TRANX 1 -#define XAPM_METRIC_WRITE_LATENCY 2 -#define XAPM_METRIC_READ_BYTES 3 -#define XAPM_METRIC_READ_TRANX 4 -#define XAPM_METRIC_READ_LATENCY 5 -#define XAPM_METRIC_WRITE_MIN_MAX 6 -#define XAPM_METRIC_READ_MIN_MAX 7 - -#define XAPM_METRIC_COUNT0_NAME "Write Byte Count" -#define XAPM_METRIC_COUNT1_NAME "Write Transaction Count" -#define XAPM_METRIC_COUNT2_NAME "Total Write Latency" -#define XAPM_METRIC_COUNT3_NAME "Read Byte Count" -#define XAPM_METRIC_COUNT4_NAME "Read Transaction Count" -#define XAPM_METRIC_COUNT5_NAME "Total Read Latency" -#define XAPM_METRIC_COUNT6_NAME "Min/Max Write Latency" -#define XAPM_METRIC_COUNT7_NAME "Min/Max Read Latency" - -/************************ APM Debug Counters ********************************/ -#define XAPM_DEBUG_METRIC_COUNTERS_PER_SLOT 4 //debug is only interested in 4 metric counters - -/************************ APM Trace Stream ************************************/ - -/* Bit locations of trace flags */ -#define XAPM_READ_LAST 6 -#define XAPM_READ_FIRST 5 -#define XAPM_READ_ADDR 4 -#define XAPM_RESPONSE 3 -#define XAPM_WRITE_LAST 2 -#define XAPM_WRITE_FIRST 1 -#define XAPM_WRITE_ADDR 0 - -/* Bit locations of external event flags */ -#define XAPM_EXT_START 2 -#define XAPM_EXT_STOP 1 -#define XAPM_EXT_EVENT 0 - -/* Total number of bits per slot */ -#define FLAGS_PER_SLOT 7 -#define EXT_EVENTS_PER_SLOT 3 - -/* Cycles to add to timestamp if overflow occurs */ -#define LOOP_ADD_TIME (1<<16) -#define LOOP_ADD_TIME_SPM (1<<44) - -/********************** Definitions: Enums, Structs ***************************/ - -/* Performance monitor type or location */ -enum xclPerfMonType { - XCL_PERF_MON_MEMORY = 0, - XCL_PERF_MON_HOST_INTERFACE = 1, - XCL_PERF_MON_OCL_REGION = 2, - XCL_PERF_MON_TOTAL_PROFILE = 3 -}; - -/* Performance monitor start event */ -enum xclPerfMonStartEvent { - XCL_PERF_MON_START_ADDR = 0, - XCL_PERF_MON_START_FIRST_DATA = 1 -}; - -/* Performance monitor end event */ -enum xclPerfMonEndEvent { - XCL_PERF_MON_END_LAST_DATA = 0, - XCL_PERF_MON_END_RESPONSE = 1 -}; - -/* Performance monitor counter types */ -enum xclPerfMonCounterType { - XCL_PERF_MON_WRITE_BYTES = 0, - XCL_PERF_MON_WRITE_TRANX = 1, - XCL_PERF_MON_WRITE_LATENCY = 2, - XCL_PERF_MON_READ_BYTES = 3, - XCL_PERF_MON_READ_TRANX = 4, - XCL_PERF_MON_READ_LATENCY = 5 -}; - -/* - * Performance monitor event types - * NOTE: these are the same values used by SDSoC - */ -enum xclPerfMonEventType { - XCL_PERF_MON_START_EVENT = 0x4, - XCL_PERF_MON_END_EVENT = 0x5 -}; - -/* - * Xocc follows this convention - * Even IDs are Reads - * Odd IDs are Writes - */ -#define IS_WRITE(x) ((x) & 1) -#define IS_READ(x) (!((x) & 1)) - -/* - * Performance monitor IDs for host SW events - * NOTE: HW events start at 0, SDSoC SW events start at 4000 - */ -enum xclPerfMonEventID { - XCL_PERF_MON_HW_EVENT = 0, - XCL_PERF_MON_GENERAL_ID = 3000, - XCL_PERF_MON_QUEUE_ID = 3001, - XCL_PERF_MON_READ_ID = 3002, - XCL_PERF_MON_WRITE_ID = 3003, - XCL_PERF_MON_API_GET_PLATFORM_ID = 3005, - XCL_PERF_MON_API_GET_PLATFORM_INFO_ID = 3006, - XCL_PERF_MON_API_GET_DEVICE_ID = 3007, - XCL_PERF_MON_API_GET_DEVICE_INFO_ID = 3008, - XCL_PERF_MON_API_BUILD_PROGRAM_ID = 3009, - XCL_PERF_MON_API_CREATE_CONTEXT_ID = 3010, - XCL_PERF_MON_API_CREATE_CONTEXT_TYPE_ID = 3011, - XCL_PERF_MON_API_CREATE_COMMAND_QUEUE_ID = 3012, - XCL_PERF_MON_API_CREATE_PROGRAM_BINARY_ID = 3013, - XCL_PERF_MON_API_CREATE_BUFFER_ID = 3014, - XCL_PERF_MON_API_CREATE_IMAGE_ID = 3015, - XCL_PERF_MON_API_CREATE_KERNEL_ID = 3016, - XCL_PERF_MON_API_KERNEL_ARG_ID = 3017, - XCL_PERF_MON_API_WAIT_FOR_EVENTS_ID = 3018, - XCL_PERF_MON_API_READ_BUFFER_ID = 3019, - XCL_PERF_MON_API_WRITE_BUFFER_ID = 3020, - XCL_PERF_MON_API_READ_IMAGE_ID = 3021, - XCL_PERF_MON_API_WRITE_IMAGE_ID = 3022, - XCL_PERF_MON_API_MIGRATE_MEM_ID = 3023, - XCL_PERF_MON_API_MIGRATE_MEM_OBJECTS_ID = 3024, - XCL_PERF_MON_API_MAP_BUFFER_ID = 3025, - XCL_PERF_MON_API_UNMAP_MEM_OBJECT_ID = 3026, - XCL_PERF_MON_API_NDRANGE_KERNEL_ID = 3027, - XCL_PERF_MON_API_TASK_ID = 3028, - XCL_PERF_MON_KERNEL0_ID = 3100, - XCL_PERF_MON_KERNEL1_ID = 3101, - XCL_PERF_MON_KERNEL2_ID = 3102, - XCL_PERF_MON_KERNEL3_ID = 3103, - XCL_PERF_MON_KERNEL4_ID = 3104, - XCL_PERF_MON_KERNEL5_ID = 3105, - XCL_PERF_MON_KERNEL6_ID = 3106, - XCL_PERF_MON_KERNEL7_ID = 3107, - XCL_PERF_MON_CU0_ID = 3200, - XCL_PERF_MON_CU1_ID = 3201, - XCL_PERF_MON_CU2_ID = 3202, - XCL_PERF_MON_CU3_ID = 3203, - XCL_PERF_MON_CU4_ID = 3204, - XCL_PERF_MON_CU5_ID = 3205, - XCL_PERF_MON_CU6_ID = 3206, - XCL_PERF_MON_CU7_ID = 3207, - XCL_PERF_MON_PROGRAM_END = 4090, - XCL_PERF_MON_IGNORE_EVENT = 4095 -}; - -/* Performance monitor counter results */ -typedef struct { - //unsigned int NumSlots; - float SampleIntervalUsec; - unsigned int WriteBytes[XSPM_MAX_NUMBER_SLOTS]; - unsigned int WriteTranx[XSPM_MAX_NUMBER_SLOTS]; - unsigned int WriteLatency[XSPM_MAX_NUMBER_SLOTS]; - unsigned short WriteMinLatency[XSPM_MAX_NUMBER_SLOTS]; - unsigned short WriteMaxLatency[XSPM_MAX_NUMBER_SLOTS]; - unsigned int ReadBytes[XSPM_MAX_NUMBER_SLOTS]; - unsigned int ReadTranx[XSPM_MAX_NUMBER_SLOTS]; - unsigned int ReadLatency[XSPM_MAX_NUMBER_SLOTS]; - unsigned short ReadMinLatency[XSPM_MAX_NUMBER_SLOTS]; - unsigned short ReadMaxLatency[XSPM_MAX_NUMBER_SLOTS]; -} xclCounterResults; - -/* Performance monitor trace results */ -typedef struct { - xclPerfMonEventID EventID; - xclPerfMonEventType EventType; - unsigned long long Timestamp; - unsigned char Overflow; - unsigned int TraceID; - unsigned char Error; - unsigned char Reserved; - // Used in HW Emulation - unsigned long long HostTimestamp; - unsigned char EventFlags; - unsigned char WriteAddrLen; - unsigned char ReadAddrLen; - unsigned short WriteBytes; - unsigned short ReadBytes; -} xclTraceResults; - -typedef struct { - unsigned int mLength; - //unsigned int mNumSlots; - xclTraceResults mArray[MAX_TRACE_NUMBER_SAMPLES]; -} xclTraceResultsVector; - -#endif diff --git a/SDAccel/userspace/src/Makefile b/SDAccel/userspace/src/Makefile index aa0c8195..f95abfd6 100644 --- a/SDAccel/userspace/src/Makefile +++ b/SDAccel/userspace/src/Makefile @@ -1,6 +1,6 @@ # Amazon FPGA Hardware Development Kit # -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2016-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Amazon Software License (the "License"). You may not use # this file except in compliance with the License. A copy of the License is @@ -14,7 +14,7 @@ # limitations under the License. -# AWS HAL Driver Makefile +# AWS Bare-metal HAL Driver Makefile # set ec2=1 to compile for F1 instance CXX := g++ @@ -23,25 +23,24 @@ AR := ar ARFLAGS := rcv ifeq ($(ec2),1) - CXXFLAGS := -Werror -std=c++11 - STLIB = libawsxcldrv.a - SHLIB = libawsxcldrv.so + CXXFLAGS := -Werror -std=c++11 + STLIB = libxrt-aws.a + SHLIB = libxrt-aws.so else # For bare metal testing, i.e. in non EC2 environment - CXXFLAGS := -Werror -std=c++11 -DINTERNAL_TESTING - STLIB = libawsbmdrv.a - SHLIB = libawsbmdrv.so + CXXFLAGS := -Werror -std=c++11 -DINTERNAL_TESTING + STLIB = libxrtbm-aws.a + SHLIB = libxrtbm-aws.so endif LIBS := $(STLIB) $(SHLIB) -XCLHAL_VER =-DXCLHAL_MAJOR_VER=1 -DXCLHAL_MINOR_VER=1 +XCLHAL_VER = -DXCLHAL_MAJOR_VER=2 -DXCLHAL_MINOR_VER=1 # Include XCLHAL includes, AWS fpga_pci/mgmt and AWS kernel drivers SHIM_INC := -I../include -I$(SDK_DIR)/userspace/include -I$(SDK_DIR)/linux_kernel_drivers - -CXXFLAGS += $(CXXFLAGS) $(XCLHAL_VER) $(SHIM_INC) -fpic -fvisibility=hidden -lrt +CXXFLAGS += $(CXXFLAGS) $(XCLHAL_VER) $(SHIM_INC) -fpic -fvisibility=hidden -lrt -Wall ifeq ($(debug),1) CXXFLAGS += -g -DDEBUG @@ -50,7 +49,7 @@ else endif SRCS := $(wildcard *.$(CXX_EXT)) -OBJS := $(patsubst %.$(CXX_EXT), %.o , $(SRCS)) +OBJS := $(patsubst %.$(CXX_EXT), %.o, $(SRCS)) -include $(OBJS:.o=.d) diff --git a/SDAccel/userspace/src2/README.md b/SDAccel/userspace/src/README.md similarity index 91% rename from SDAccel/userspace/src2/README.md rename to SDAccel/userspace/src/README.md index b1c756fa..624b1504 100644 --- a/SDAccel/userspace/src2/README.md +++ b/SDAccel/userspace/src/README.md @@ -1,6 +1,6 @@ # SDAccel Hardware Abstraction Layer for AWS FPGA -This directory includes the source code and binary files for mapping SDAccel/OpenCL runtime library call to AWS specific hardware. This API is documented in [xclhal2.h](../include/xclhal2.h). +This directory includes the source code and binary files for mapping SDAccel/OpenCL runtime library call to AWS specific hardware. This API is documented in [xclhal.h](../include/xclhal.h). It supports the following functionality. diff --git a/SDAccel/userspace/src2/awssak.cpp b/SDAccel/userspace/src/awssak.cpp similarity index 100% rename from SDAccel/userspace/src2/awssak.cpp rename to SDAccel/userspace/src/awssak.cpp diff --git a/SDAccel/userspace/src2/awssak.h b/SDAccel/userspace/src/awssak.h similarity index 99% rename from SDAccel/userspace/src2/awssak.h rename to SDAccel/userspace/src/awssak.h index 930567ee..47c26e5e 100644 --- a/SDAccel/userspace/src2/awssak.h +++ b/SDAccel/userspace/src/awssak.h @@ -38,7 +38,7 @@ #include #include -#include "xclbin2.h" +#include "xclbin.h" #include "xcl_axi_checker_codes.h" #include "scan.h" #include "dmatest.h" diff --git a/SDAccel/userspace/src2/awssak_debug.cpp b/SDAccel/userspace/src/awssak_debug.cpp similarity index 99% rename from SDAccel/userspace/src2/awssak_debug.cpp rename to SDAccel/userspace/src/awssak_debug.cpp index 2e4820cb..a03aba32 100644 --- a/SDAccel/userspace/src2/awssak_debug.cpp +++ b/SDAccel/userspace/src/awssak_debug.cpp @@ -29,7 +29,7 @@ #include #include -#include "xclbin2.h" +#include "xclbin.h" #include "scan.h" #include "awssak.h" diff --git a/SDAccel/userspace/src2/awssak_utils.cpp b/SDAccel/userspace/src/awssak_utils.cpp similarity index 100% rename from SDAccel/userspace/src2/awssak_utils.cpp rename to SDAccel/userspace/src/awssak_utils.cpp diff --git a/SDAccel/userspace/src2/awssak_utils.h b/SDAccel/userspace/src/awssak_utils.h similarity index 100% rename from SDAccel/userspace/src2/awssak_utils.h rename to SDAccel/userspace/src/awssak_utils.h diff --git a/SDAccel/userspace/src/datamover.h b/SDAccel/userspace/src/datamover.h deleted file mode 100644 index 1572fc3c..00000000 --- a/SDAccel/userspace/src/datamover.h +++ /dev/null @@ -1,198 +0,0 @@ -/** - * Copyright (C) 2016-2018 Xilinx, Inc - * Author: Sonal Santan - * XDMA HAL multi-threading safe, multi-channel DMA read/write support - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -#ifndef _XDMA_DATA_MOVER_H_ -#define _XDMA_DATA_MOVER_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// Work around GCC 4.8 + XDMA BAR implementation bugs -// With -O3 PCIe BAR read/write are not reliable hence force -O2 as max -// optimization level for pcieBarRead() and pcieBarWrite() -#if defined(__GNUC__) && defined(NDEBUG) -#define SHIM_O2 __attribute__ ((optimize("-O2"))) -#else -#define SHIM_O2 -#endif - -#if defined(AWS_EDMA) - #define DMA_PATHNAME "/dev/edma" - #define DMA_PATHH2C "_queue_" - #define DMA_PATHC2H "_queue_" -#else - #define DMA_PATHNAME "/dev/xdma" - #define DMA_PATHH2C "_h2c_" - #define DMA_PATHC2H "_c2h_" -#endif - -namespace awsbwhal { - class DMAChannelManager - { - public: - DMAChannelManager(unsigned deviceIndex, unsigned count, std::ios_base::openmode mode) : mCount(count) { - std::string baseName(DMA_PATHNAME); - baseName += std::to_string(deviceIndex); - assert((mode == std::ios_base::in) || (mode == std::ios_base::out)); - const char *suffix = (mode == std::ios_base::out) ? DMA_PATHH2C : DMA_PATHC2H; - baseName += suffix; - for (mIndex = 0; mIndex < static_cast(mCount); ++mIndex) { - std::string fileName(baseName); - fileName += std::to_string(mIndex); - mChannel.push_back(open(fileName.c_str(), (mode == std::ios_base::out) ? O_WRONLY : O_RDONLY)); - } - --mIndex; - } - - ~DMAChannelManager() { - unlock(); - for (unsigned i = 0; i < mCount; i++) { - close(mChannel[i]); - } - } - - bool isGood() const { - for (unsigned i = 0; i < mCount; i++) { - if (mChannel[i] < 0) - return false; - } - return true; - } - - void releaseDMAChannel(int channel) { - std::lock_guard lck(mMtx); - mChannel[++mIndex] = channel; - mCV.notify_one(); - } - - int acquireDMAChannel() { - std::unique_lock lck(mMtx); - while(mIndex < 0) { - mCV.wait(lck); - } - return mChannel[mIndex--]; - } - - bool lock() const { - for (unsigned i = 0; i < mCount; i++) { - if (!flock(mChannel[i], LOCK_EX | LOCK_NB)) - continue; - // Unable to lock channel i, unlock all channels locked so far - for (unsigned j = 0; j < i; j++) { - flock(mChannel[j], LOCK_UN); - } - return false; - } - return true; - } - - void unlock() const { - for (unsigned i = 0; i < mCount; i++) { - flock(mChannel[i], LOCK_UN); - } - } - - unsigned channelCount() const { - return mCount; - } - - private: - std::mutex mMtx; - std::condition_variable mCV; - std::vector mChannel; - const unsigned mCount; - int mIndex; - }; - - class DataMover { - public: - DataMover(unsigned index, unsigned count) : mWrite(index, count, std::ios_base::out), - mRead(index, count, std::ios_base::in) {} - - // TODO: Make pwrite64 and pread64 use RAII for the channel resource - ssize_t pwrite64(const void* buf, size_t count, off64_t offset) { - if(count == 0) // Nothing to do - return 0; - int fd = mWrite.acquireDMAChannel(); - ssize_t rc = pwrite(fd, buf, count, offset); - mWrite.releaseDMAChannel(fd); - return rc; - } - ssize_t pread64(void* buf, size_t count, off64_t offset) { - if(count == 0) // Nothing to do - return 0; - int fd = mRead.acquireDMAChannel(); - ssize_t rc = pread(fd, buf, count, offset); - mRead.releaseDMAChannel(fd); - return rc; - } - // Like memset but using pwrite - void pset64(const void* buf, size_t count, off64_t offset, unsigned rep) { - if(count == 0) // Nothing to do - return; - int fd = mWrite.acquireDMAChannel(); - off64_t curr = offset; - while (rep-- > 0) { -#ifndef RDI_COVERITY -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wunused-result" - pwrite(fd, buf, count, curr); -# pragma GCC diagnostic pop - curr += count; -#endif - } - mWrite.releaseDMAChannel(fd); - } - bool isGood() { - return (mWrite.isGood() && mRead.isGood()); - } - - int lock() { - if (mWrite.lock() && mRead.lock()) - return true; - unlock(); - return false; - } - - void unlock() { - mWrite.unlock(); - mRead.unlock(); - } - - unsigned channelCount() const { - return mWrite.channelCount() + mRead.channelCount(); - } - - private: - DMAChannelManager mWrite; - DMAChannelManager mRead; - }; -} - - -#endif diff --git a/SDAccel/userspace/src2/dd.cpp b/SDAccel/userspace/src/dd.cpp similarity index 100% rename from SDAccel/userspace/src2/dd.cpp rename to SDAccel/userspace/src/dd.cpp diff --git a/SDAccel/userspace/src2/dd.h b/SDAccel/userspace/src/dd.h similarity index 100% rename from SDAccel/userspace/src2/dd.h rename to SDAccel/userspace/src/dd.h diff --git a/SDAccel/userspace/src/debug.cpp b/SDAccel/userspace/src/debug.cpp index 3cfbb202..2c28ba8e 100644 --- a/SDAccel/userspace/src/debug.cpp +++ b/SDAccel/userspace/src/debug.cpp @@ -1,6 +1,7 @@ /* * Copyright (C) 2017-2018 Xilinx, Inc * Debug functionality to AWS hal driver + * * Licensed under the Apache License, Version 2.0 (the "License"). You may * not use this file except in compliance with the License. A copy of the * License is located at @@ -16,8 +17,8 @@ #include "shim.h" -#include "datamover.h" #include "perfmon_parameters.h" +#include "xclbin.h" #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #ifndef _WINDOWS // TODO: Windows build support @@ -47,22 +49,69 @@ namespace awsbwhal { // Helper functions // **************** - uint64_t AwsXcl::getProtocolCheckerBaseAddress(int type) { - switch (type) { - case 0: - return LAPC0_BASE; - case 1: - return LAPC1_BASE; - case 2: - return LAPC2_BASE; - case 3: - return LAPC3_BASE; - }; - return 0; + void AwsXcl::readDebugIpLayout() + { + if (mIsDebugIpLayoutRead) + return; + + // + // Profiling - addresses and names + // Parsed from debug_ip_layout.rtd contained in xclbin + if (mLogStream.is_open()) { + mLogStream << "debug_ip_layout: reading profile addresses and names..." << std::endl; + } + mMemoryProfilingNumberSlots = getIPCountAddrNames(AXI_MM_MONITOR, mPerfMonBaseAddress, mPerfMonSlotName); + mIsDeviceProfiling = (mMemoryProfilingNumberSlots > 0); + + std::string fifoName; + uint64_t fifoCtrlBaseAddr = mOffsets[XCL_ADDR_SPACE_DEVICE_PERFMON]; + getIPCountAddrNames(AXI_MONITOR_FIFO_LITE, &fifoCtrlBaseAddr, &fifoName); + mPerfMonFifoCtrlBaseAddress = fifoCtrlBaseAddr; + + uint64_t fifoReadBaseAddr = XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL2; + getIPCountAddrNames(AXI_MONITOR_FIFO_FULL, &fifoReadBaseAddr, &fifoName); + mPerfMonFifoReadBaseAddress = fifoReadBaseAddr; + + if (mLogStream.is_open()) { + for (unsigned int i = 0; i < mMemoryProfilingNumberSlots; ++i) { + mLogStream << "debug_ip_layout: AXI_MM_MONITOR slot " << i << ": " + << "base address = 0x" << std::hex << mPerfMonBaseAddress[i] + << ", name = " << mPerfMonSlotName[i] << std::endl; + } + mLogStream << "debug_ip_layout: AXI_MONITOR_FIFO_LITE: " + << "base address = 0x" << std::hex << fifoCtrlBaseAddr << std::endl; + mLogStream << "debug_ip_layout: AXI_MONITOR_FIFO_FULL: " + << "base address = 0x" << std::hex << fifoReadBaseAddr << std::endl; + } + + // Only need to read it once + mIsDebugIpLayoutRead = true; } - uint32_t AwsXcl::getCheckerNumberSlots(int type) { - return getBankCount(); + // Gets the information about the specified IP from the sysfs debug_ip_table. + // The IP types are defined in xclbin.h + uint32_t AwsXcl::getIPCountAddrNames(int type, uint64_t *baseAddress, std::string * portNames) { + debug_ip_layout *map; + std::string path = "/sys/bus/pci/devices/" + mDevUserName + "/debug_ip_layout"; + std::ifstream ifs(path.c_str(), std::ifstream::binary); + uint32_t count = 0; + char buffer[4096]; + if( ifs ) { + //sysfs max file size is 4096 + ifs.read(buffer, 4096); + if (ifs.gcount() > 0) { + map = (debug_ip_layout*)(buffer); + for( unsigned int i = 0; i < map->m_count; i++ ) { + if (map->m_debug_ip_data[i].m_type == type) { + if(baseAddress)baseAddress[count] = map->m_debug_ip_data[i].m_base_address; + if(portNames) portNames[count] = (char*)map->m_debug_ip_data[i].m_name; + ++count; + } + } + } + ifs.close(); + } + return count; } // Read APM performance counters @@ -85,13 +134,14 @@ namespace awsbwhal { LAPC_SNAPSHOT_STATUS_2_OFFSET, LAPC_SNAPSHOT_STATUS_3_OFFSET }; - uint32_t numSlots = getCheckerNumberSlots(0); - + uint64_t baseAddress[XLAPC_MAX_NUMBER_SLOTS]; + uint32_t numSlots = getIPCountAddrNames(LAPC, baseAddress, nullptr); uint32_t temp[XLAPC_STATUS_PER_SLOT]; - for (int s = 0; s < numSlots; ++s) { - uint64_t baseAddress = getProtocolCheckerBaseAddress(s); + aCheckerResults->NumSlots = numSlots; + snprintf(aCheckerResults->DevUserName, 256, "%s", mDevUserName.c_str()); + for (uint32_t s = 0; s < numSlots; ++s) { for (int c=0; c < XLAPC_STATUS_PER_SLOT; c++) - size += xclRead(XCL_ADDR_SPACE_DEVICE_CHECKER, baseAddress+statusRegisters[c], &temp[c], 4); + size += xclRead(XCL_ADDR_SPACE_DEVICE_CHECKER, baseAddress[s]+statusRegisters[c], &temp[c], 4); aCheckerResults->OverallStatus[s] = temp[XLAPC_OVERALL_STATUS]; std::copy(temp+XLAPC_CUMULATIVE_STATUS_0, temp+XLAPC_SNAPSHOT_STATUS_0, aCheckerResults->CumulativeStatus[s]); @@ -102,6 +152,7 @@ namespace awsbwhal { } // Read APM performance counters + size_t AwsXcl::xclDebugReadCounters(xclDebugCountersResults* aCounterResults) { if (mLogStream.is_open()) { mLogStream << __func__ << ", " << std::this_thread::get_id() @@ -110,51 +161,47 @@ namespace awsbwhal { } size_t size = 0; - uint32_t scaleFactor = getPerfMonByteScaleFactor(XCL_PERF_MON_MEMORY); - uint64_t baseAddress = getPerfMonBaseAddress(XCL_PERF_MON_MEMORY); - - uint64_t metricAddress[] = { - // Slot 0 - baseAddress + XAPM_MC0_OFFSET, baseAddress + XAPM_MC1_OFFSET, - baseAddress + XAPM_MC3_OFFSET, baseAddress + XAPM_MC4_OFFSET, - // Slot 1 - baseAddress + XAPM_MC6_OFFSET, baseAddress + XAPM_MC7_OFFSET, - baseAddress + XAPM_MC9_OFFSET, baseAddress + XAPM_MC10_OFFSET, - // Slot 2 - baseAddress + XAPM_MC12_OFFSET, baseAddress + XAPM_MC13_OFFSET, - baseAddress + XAPM_MC15_OFFSET, baseAddress + XAPM_MC16_OFFSET, - // Slot 3 - baseAddress + XAPM_MC18_OFFSET, baseAddress + XAPM_MC19_OFFSET, - baseAddress + XAPM_MC21_OFFSET, baseAddress + XAPM_MC22_OFFSET, - // Slot 4 - baseAddress + XAPM_MC24_OFFSET, baseAddress + XAPM_MC25_OFFSET, - baseAddress + XAPM_MC27_OFFSET, baseAddress + XAPM_MC28_OFFSET, - // Slot 5 - baseAddress + XAPM_MC30_OFFSET, baseAddress + XAPM_MC31_OFFSET, - baseAddress + XAPM_MC33_OFFSET, baseAddress + XAPM_MC34_OFFSET, - // Slot 6 - baseAddress + XAPM_MC36_OFFSET, baseAddress + XAPM_MC37_OFFSET, - baseAddress + XAPM_MC39_OFFSET, baseAddress + XAPM_MC40_OFFSET, - // Slot 7 - baseAddress + XAPM_MC42_OFFSET, baseAddress + XAPM_MC43_OFFSET, - baseAddress + XAPM_MC45_OFFSET, baseAddress + XAPM_MC46_OFFSET, + + uint64_t spm_offsets[] = { + XSPM_SAMPLE_WRITE_BYTES_OFFSET, + XSPM_SAMPLE_WRITE_TRANX_OFFSET, + XSPM_SAMPLE_READ_BYTES_OFFSET, + XSPM_SAMPLE_READ_TRANX_OFFSET, + XSPM_SAMPLE_OUTSTANDING_COUNTS_OFFSET, + XSPM_SAMPLE_LAST_WRITE_ADDRESS_OFFSET, + XSPM_SAMPLE_LAST_WRITE_DATA_OFFSET, + XSPM_SAMPLE_LAST_READ_ADDRESS_OFFSET, + XSPM_SAMPLE_LAST_READ_DATA_OFFSET }; // Read all metric counters - uint32_t countnum = 0; - uint32_t numSlots = getPerfMonNumberSlots(XCL_PERF_MON_MEMORY); + uint64_t baseAddress[XSPM_MAX_NUMBER_SLOTS]; + uint32_t numSlots = getIPCountAddrNames(AXI_MM_MONITOR, baseAddress, nullptr); - uint32_t temp[XAPM_DEBUG_METRIC_COUNTERS_PER_SLOT]; + uint32_t temp[XSPM_DEBUG_SAMPLE_COUNTERS_PER_SLOT]; + aCounterResults->NumSlots = numSlots; + snprintf(aCounterResults->DevUserName, 256, "%s", mDevUserName.c_str()); for (uint32_t s=0; s < numSlots; s++) { - for (int c=0; c < XAPM_DEBUG_METRIC_COUNTERS_PER_SLOT; c++) - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, metricAddress[countnum++], &temp[c], 4); + uint32_t sampleInterval; + // Read sample interval register to latch the sampled metric counters + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, + baseAddress[s] + XSPM_SAMPLE_OFFSET, + &sampleInterval, 4); + + for (int c=0; c < XSPM_DEBUG_SAMPLE_COUNTERS_PER_SLOT; c++) + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress[s]+spm_offsets[c], &temp[c], 4); - aCounterResults->WriteBytes[s] = temp[0] * scaleFactor; + aCounterResults->WriteBytes[s] = temp[0]; aCounterResults->WriteTranx[s] = temp[1]; - aCounterResults->ReadBytes[s] = temp[2] * scaleFactor; + aCounterResults->ReadBytes[s] = temp[2]; aCounterResults->ReadTranx[s] = temp[3]; + aCounterResults->OutStandCnts[s] = temp[4]; + aCounterResults->LastWriteAddr[s] = temp[5]; + aCounterResults->LastWriteData[s] = temp[6]; + aCounterResults->LastReadAddr[s] = temp[7]; + aCounterResults->LastReadData[s] = temp[8]; } return size; } @@ -166,10 +213,12 @@ size_t xclDebugReadIPStatus(xclDeviceHandle handle, xclDebugReadType type, void* if (!drv) return -1; switch (type) { - case XCL_DEBUG_READ_TYPE_APM : - return drv->xclDebugReadCounters(reinterpret_cast(debugResults)); case XCL_DEBUG_READ_TYPE_LAPC : return drv->xclDebugReadCheckers(reinterpret_cast(debugResults)); + case XCL_DEBUG_READ_TYPE_SPM : + return drv->xclDebugReadCounters(reinterpret_cast(debugResults)); + default : + break; }; return -1; } diff --git a/SDAccel/userspace/src2/dmatest.h b/SDAccel/userspace/src/dmatest.h similarity index 99% rename from SDAccel/userspace/src2/dmatest.h rename to SDAccel/userspace/src/dmatest.h index de98ff80..3793a275 100644 --- a/SDAccel/userspace/src2/dmatest.h +++ b/SDAccel/userspace/src/dmatest.h @@ -25,7 +25,7 @@ #include #include -#include "xclhal2.h" +#include "xclhal.h" namespace xcldev { class Timer { diff --git a/SDAccel/userspace/src2/memaccess.h b/SDAccel/userspace/src/memaccess.h similarity index 99% rename from SDAccel/userspace/src2/memaccess.h rename to SDAccel/userspace/src/memaccess.h index 5d2b5715..abfc6826 100644 --- a/SDAccel/userspace/src2/memaccess.h +++ b/SDAccel/userspace/src/memaccess.h @@ -29,8 +29,9 @@ #include #include -#include "xclhal2.h" -#include "xclbin2.h" +#include "xclhal.h" +#include "xclbin.h" + namespace xcldev { class memaccess { xclDeviceHandle mHandle; diff --git a/SDAccel/userspace/src/memorymanager.cpp b/SDAccel/userspace/src/memorymanager.cpp deleted file mode 100644 index 748fea01..00000000 --- a/SDAccel/userspace/src/memorymanager.cpp +++ /dev/null @@ -1,220 +0,0 @@ -/** - * Copyright (C) 2015-2018 Xilinx, Inc - * Author: Sonal Santan - * XDMA HAL Driver layered on top of XDMA kernel driver - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -#include "memorymanager.h" -#include -#include - -/* - * Define GCC version macro so we can use newer C++11 features - * if possible - */ -#define GCC_VERSION (__GNUC__ * 10000 \ - + __GNUC_MINOR__ * 100 \ - + __GNUC_PATCHLEVEL__) - - -awsbwhal::MemoryManager::MemoryManager(uint64_t size, uint64_t start, - unsigned alignment) : mSize(size), mStart(start), mAlignment(alignment), - mCoalesceThreshold(4), mFreeSize(0) -{ - assert(start % alignment == 0); - mFreeBufferList.push_back(std::make_pair(mStart, mSize)); - mFreeSize = mSize; -} - -awsbwhal::MemoryManager::~MemoryManager() -{ - - -} - -uint64_t -awsbwhal::MemoryManager::alloc(size_t size) -{ - if (size == 0) - size = mAlignment; - - uint64_t result = mNull; - const size_t mod_size = size % mAlignment; - const size_t pad = (mod_size > 0) ? (mAlignment - mod_size) : 0; - size += pad; - - std::lock_guard lock(mMemManagerMutex); - for (PairList::iterator i = mFreeBufferList.begin(), e = mFreeBufferList.end(); i != e; ++i) { - if (i->second < size) - continue; - result = i->first; - if (i->second > size) { - // Resize the existing entry in freelist - i->first += size; - i->second -= size; - } - else { - // remove the exact match found - mFreeBufferList.erase(i); - } - mBusyBufferList.push_back(std::make_pair(result, size)); - mFreeSize -= size; - break; - } - return result; -} - -void -awsbwhal::MemoryManager::free(uint64_t buf) -{ - std::lock_guard lock(mMemManagerMutex); - PairList::iterator i = find(buf); - if (i == mBusyBufferList.end()) - return; - mFreeSize += i->second; - mFreeBufferList.push_back(std::make_pair(i->first, i->second)); - mBusyBufferList.erase(i); - if (mFreeBufferList.size() > mCoalesceThreshold) { - coalesce(); - } -} - - -void -awsbwhal::MemoryManager::coalesce() -{ - // First sort the free buffers and then attempt to coalesce the neighbors - mFreeBufferList.sort(); - - PairList::iterator curr = mFreeBufferList.begin(); - PairList::iterator next = curr; - ++next; - PairList::iterator last = mFreeBufferList.end(); - while (next != last) { - if ((curr->first + curr->second) != next->first) { - // Non contiguous blocks - curr = next; - ++next; - continue; - } - // Coalesce curr and next - curr->second += next->second; - mFreeBufferList.erase(next); - next = curr; - ++next; - } -} - -// Caller should have acquired the mutex lock before calling find(); -awsbwhal::MemoryManager::PairList::iterator -awsbwhal::MemoryManager::find(uint64_t buf) -{ -#if GCC_VERSION >= 40800 - PairList::iterator i = std::find_if(mBusyBufferList.begin(), mBusyBufferList.end(), [&] (const PairList::value_type& s) - { return s.first == buf; }); -#else - PairList::iterator i = mBusyBufferList.begin(); - PairList::iterator last = mBusyBufferList.end(); - while(i != last) { - if (i->first == buf) - break; - ++i; - } -#endif - return i; -} - -void -awsbwhal::MemoryManager::reset() -{ - std::lock_guard lock(mMemManagerMutex); - mFreeBufferList.clear(); - mBusyBufferList.clear(); - mFreeBufferList.push_back(std::make_pair(mStart, mSize)); - mFreeSize = 0; -} - -std::pair -awsbwhal::MemoryManager::lookup(uint64_t buf) -{ - std::lock_guard lock(mMemManagerMutex); - PairList::iterator i = find(buf); - if (i != mBusyBufferList.end()) - return *i; - // Compiler bug -- Some versions of GCC C++11 compiler do not - // like mNull directly inside std::make_pair, so capture mNull - // in a temporary - const uint64_t v = mNull; - return std::make_pair(v, v); -} - - -bool -awsbwhal::MemoryManager::reserve(uint64_t base, size_t size) -{ - assert(size); - if (size > mSize) - return false; - - if (base < mStart) - return false; - - if (base > (mStart + mSize)) - return false; - - const size_t mod_size = size % mAlignment; - const size_t pad = (mod_size > 0) ? (mAlignment - mod_size) : 0; - size += pad; - - std::lock_guard lock(mMemManagerMutex); - for (PairList::iterator i = mFreeBufferList.begin(), e = mFreeBufferList.end(); i != e; ++i) { - if (i->second < size) - continue; - if (i->first > base) - continue; - if ((base + size) > (i->first + i->second)) - continue; - uint64_t a = i->first; - uint64_t b = i->second; - - i->second = base - i->first; - if ((i->first == base) && (i->second == 0)) { - //Exact match - mFreeBufferList.erase(i); - break; - } - if (i->first == base) { - // Hole at the end; Resize exisiting entry - i->first = base + size; - break; - } - if ((i->first + i->second) == (base + size)) { - // Hole in the beginning; Resize exisiting entry - i->second -= size; - break; - } - // We have holes on both sides - // Resize hole in the beginning - i->second = base - i->first; - - // Now create an entry for the hole at the end - b = b + a - base - size; - a = base + size; - mFreeBufferList.insert(++i, std::make_pair(a, b)); - } - mBusyBufferList.push_back(std::make_pair(base, size)); - mFreeSize -= size; - return true; -} diff --git a/SDAccel/userspace/src/memorymanager.h b/SDAccel/userspace/src/memorymanager.h deleted file mode 100644 index 44dbf25e..00000000 --- a/SDAccel/userspace/src/memorymanager.h +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Copyright (C) 2015-2018 Xilinx, Inc - * Author: Sonal Santan - * Simple usermode XDMA DDR memory manager used by HAL - * Eventually the common code here will be used by all HAL drivers. - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -#ifndef _XDMA_MEMORY_MANAGER_H_ -#define _XDMA_MEMORY_MANAGER_H_ - -#include -#include -#include "xclhal.h" - -namespace awsbwhal { - class MemoryManager { - std::mutex mMemManagerMutex; - std::list > mFreeBufferList; - std::list > mBusyBufferList; - const uint64_t mSize; - const uint64_t mStart; - const uint64_t mAlignment; - const unsigned mCoalesceThreshold; - uint64_t mFreeSize; - - typedef std::list > PairList; - - public: - static const uint64_t mNull = 0xffffffffffffffffull; - - public: - MemoryManager(uint64_t size, uint64_t start, unsigned alignment); - ~MemoryManager(); - uint64_t alloc(size_t size); - void free(uint64_t buf); - void reset(); - std::pairlookup(uint64_t buf); - bool reserve(uint64_t base, size_t size); - - uint64_t size() const { - return mSize; - } - - uint64_t start() const { - return mStart; - } - - uint64_t freeSize() const { - return mFreeSize; - } - - static bool isNullAlloc(const std::pair& buf) { - return ((buf.first == mNull) || (buf.second == mNull)); - } - - private: - /* Note that these should be called after acquiring mMemManagerMutex */ - void coalesce(); - PairList::iterator find(uint64_t buf); - }; -} - -#endif diff --git a/SDAccel/userspace/src/perf.cpp b/SDAccel/userspace/src/perf.cpp index f26815c6..74a0d994 100644 --- a/SDAccel/userspace/src/perf.cpp +++ b/SDAccel/userspace/src/perf.cpp @@ -1,8 +1,7 @@ -/** +/* * Copyright (C) 2017-2018 Xilinx, Inc * Performance Monitoring using PCIe for AWS HAL Driver - * Author: Paul Schumacher - * + * * Licensed under the Apache License, Version 2.0 (the "License"). You may * not use this file except in compliance with the License. A copy of the * License is located at @@ -17,11 +16,12 @@ */ #include "shim.h" -#include "datamover.h" #include "perfmon_parameters.h" +#include "xocl/xocl_ioctl.h" #include #include +#include #include #include @@ -31,12 +31,20 @@ #include #include #include +#include #ifdef _WINDOWS #define __func__ __FUNCTION__ #endif namespace awsbwhal { + + static int unmgdPread(int fd, void *buffer, size_t size, uint64_t addr) + { + drm_xocl_pread_unmgd unmgd = { 0, 0, addr, size, reinterpret_cast(buffer) }; + return ioctl(fd, DRM_IOCTL_XOCL_PREAD_UNMGD, &unmgd); + } + // **************** // Helper functions // **************** @@ -45,8 +53,9 @@ namespace awsbwhal { return mDeviceInfo.mDDRBankCount; } - void AwsXcl::xclSetOclRegionProfilingNumberSlots(uint32_t numSlots) { - mOclRegionProfilingNumberSlots = numSlots; + void AwsXcl::xclSetProfilingNumberSlots(xclPerfMonType type, uint32_t numSlots) { + if (type == XCL_PERF_MON_OCL_REGION) + mOclRegionProfilingNumberSlots = numSlots; } // Get host timestamp to write to APM @@ -61,37 +70,18 @@ namespace awsbwhal { return (uint64_t) now.tv_sec * 1000000000UL + (uint64_t) now.tv_nsec; } - uint64_t AwsXcl::getPerfMonBaseAddress(xclPerfMonType type) { - if (type == XCL_PERF_MON_MEMORY) return PERFMON0_OFFSET; - if (type == XCL_PERF_MON_HOST_INTERFACE) return PERFMON1_OFFSET; - if (type == XCL_PERF_MON_OCL_REGION) return PERFMON2_OFFSET; + uint64_t AwsXcl::getPerfMonBaseAddress(xclPerfMonType type, uint32_t slotNum) { + if (type == XCL_PERF_MON_MEMORY) return mPerfMonBaseAddress[slotNum]; return 0; } uint64_t AwsXcl::getPerfMonFifoBaseAddress(xclPerfMonType type, uint32_t fifonum) { - if (type == XCL_PERF_MON_MEMORY) { - return PERFMON0_OFFSET + XPAR_AXI_PERF_MON_0_TRACE_OFFSET_0; - } - if (type == XCL_PERF_MON_OCL_REGION) { - if (fifonum == 0) return (PERFMON2_OFFSET + XPAR_AXI_PERF_MON_2_TRACE_OFFSET_0); - if (fifonum == 1) return (PERFMON2_OFFSET + XPAR_AXI_PERF_MON_2_TRACE_OFFSET_1); - if (fifonum == 2) return (PERFMON2_OFFSET + XPAR_AXI_PERF_MON_2_TRACE_OFFSET_2); - return 0; - } + if (type == XCL_PERF_MON_MEMORY) return mPerfMonFifoCtrlBaseAddress; return 0; } uint64_t AwsXcl::getPerfMonFifoReadBaseAddress(xclPerfMonType type, uint32_t fifonum) { - if (type == XCL_PERF_MON_MEMORY) { - // Use AXI-MM to access trace FIFO - return XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL; - } - if (type == XCL_PERF_MON_OCL_REGION) { - if (fifonum == 0) return (PERFMON2_OFFSET + XPAR_AXI_PERF_MON_2_TRACE_OFFSET_0); - if (fifonum == 1) return (PERFMON2_OFFSET + XPAR_AXI_PERF_MON_2_TRACE_OFFSET_1); - if (fifonum == 2) return (PERFMON2_OFFSET + XPAR_AXI_PERF_MON_2_TRACE_OFFSET_2); - return 0; - } + if (type == XCL_PERF_MON_MEMORY) return mPerfMonFifoReadBaseAddress; return 0; } @@ -296,33 +286,39 @@ namespace awsbwhal { size_t AwsXcl::xclPerfMonStartCounters(xclPerfMonType type) { if (mLogStream.is_open()) { mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " - << type << ", Start device counters..." << std::endl; + << type << ", Start device counters..." << std::endl; } + // Update addresses for debug/profile IP + readDebugIpLayout(); + + if (!mIsDeviceProfiling) + return 0; + size_t size = 0; uint32_t regValue; - uint64_t baseAddress = getPerfMonBaseAddress(type); - - // 1. Reset APM metric counters - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_CTL_OFFSET, ®Value, 4); + uint64_t baseAddress; + uint32_t numSlots = getPerfMonNumberSlots(type); - regValue = regValue | XAPM_CR_MCNTR_RESET_MASK; - size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_CTL_OFFSET, ®Value, 4); + for (uint32_t i = 0; i < numSlots; i++) { + baseAddress = getPerfMonBaseAddress(type, i); - regValue = regValue & ~(XAPM_CR_MCNTR_RESET_MASK); - size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_CTL_OFFSET, ®Value, 4); + // 1. Reset AXI - MM monitor metric counters + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_CONTROL_OFFSET, ®Value, 4); - // 2. Start APM metric counters - regValue = regValue | XAPM_CR_MCNTR_ENABLE_MASK; - size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_CTL_OFFSET, ®Value, 4); + regValue = regValue | XSPM_CR_COUNTER_RESET_MASK; + size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_CONTROL_OFFSET, ®Value, 4); - // 3. Specify APM metric counters to _not_ reset after reading - regValue = 0x0; - size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_SICR_OFFSET, ®Value, 4); + regValue = regValue & ~(XSPM_CR_COUNTER_RESET_MASK); + size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_CONTROL_OFFSET, ®Value, 4); - // 4. Read from sample register to ensure total time is read again at end - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_SR_OFFSET, ®Value, 4); + // 2. Start AXI-MM monitor metric counters + regValue = regValue | XSPM_CR_COUNTER_ENABLE_MASK; + size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_CONTROL_OFFSET, ®Value, 4); + // 3. Read from sample register to ensure total time is read again at end + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_SAMPLE_OFFSET, ®Value, 4); + } return size; } @@ -330,19 +326,26 @@ namespace awsbwhal { size_t AwsXcl::xclPerfMonStopCounters(xclPerfMonType type) { if (mLogStream.is_open()) { mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " - << type << ", Stop and reset device counters..." << std::endl; + << type << ", Stop and reset device counters..." << std::endl; } + if (!mIsDeviceProfiling) + return 0; + size_t size = 0; uint32_t regValue; - uint64_t baseAddress = getPerfMonBaseAddress(type); + uint64_t baseAddress; + uint32_t numSlots = getPerfMonNumberSlots(type); - // 1. Stop APM metric counters - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_CTL_OFFSET, ®Value, 4); + for (uint32_t i = 0; i < numSlots; i++) { + baseAddress = getPerfMonBaseAddress(type, i); - regValue = regValue & ~(XAPM_CR_MCNTR_ENABLE_MASK); - size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_CTL_OFFSET, ®Value, 4); + // 1. Stop SPM metric counters + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_CONTROL_OFFSET, ®Value, 4); + regValue = regValue & ~(XSPM_CR_COUNTER_ENABLE_MASK); + size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_CONTROL_OFFSET, ®Value, 4); + } return size; } @@ -350,92 +353,63 @@ namespace awsbwhal { size_t AwsXcl::xclPerfMonReadCounters(xclPerfMonType type, xclCounterResults& counterResults) { if (mLogStream.is_open()) { mLogStream << __func__ << ", " << std::this_thread::get_id() - << ", " << type << ", " << &counterResults - << ", Read device counters..." << std::endl; + << ", " << type << ", " << &counterResults + << ", Read device counters..." << std::endl; } // Initialize all values in struct to 0 memset(&counterResults, 0, sizeof(xclCounterResults)); - size_t size = 0; - uint32_t scaleFactor = getPerfMonByteScaleFactor(type); - uint64_t baseAddress = getPerfMonBaseAddress(type); - - uint64_t intervalAddress = baseAddress + XAPM_SR_OFFSET; - uint64_t metricAddress[] = { - // Slot 0 - baseAddress + XAPM_SMC0_OFFSET, baseAddress + XAPM_SMC1_OFFSET, - baseAddress + XAPM_SMC2_OFFSET, baseAddress + XAPM_SMC3_OFFSET, - baseAddress + XAPM_SMC4_OFFSET, baseAddress + XAPM_SMC5_OFFSET, - baseAddress + XAPM_SMC48_OFFSET, baseAddress + XAPM_SMC49_OFFSET, - // Slot 1 - baseAddress + XAPM_SMC6_OFFSET, baseAddress + XAPM_SMC7_OFFSET, - baseAddress + XAPM_SMC8_OFFSET, baseAddress + XAPM_SMC9_OFFSET, - baseAddress + XAPM_SMC10_OFFSET, baseAddress + XAPM_SMC11_OFFSET, - baseAddress + XAPM_SMC50_OFFSET, baseAddress + XAPM_SMC51_OFFSET, - // Slot 2 - baseAddress + XAPM_SMC12_OFFSET, baseAddress + XAPM_SMC13_OFFSET, - baseAddress + XAPM_SMC14_OFFSET, baseAddress + XAPM_SMC15_OFFSET, - baseAddress + XAPM_SMC16_OFFSET, baseAddress + XAPM_SMC17_OFFSET, - baseAddress + XAPM_SMC52_OFFSET, baseAddress + XAPM_SMC53_OFFSET, - // Slot 3 - baseAddress + XAPM_SMC18_OFFSET, baseAddress + XAPM_SMC19_OFFSET, - baseAddress + XAPM_SMC20_OFFSET, baseAddress + XAPM_SMC21_OFFSET, - baseAddress + XAPM_SMC22_OFFSET, baseAddress + XAPM_SMC23_OFFSET, - baseAddress + XAPM_SMC54_OFFSET, baseAddress + XAPM_SMC55_OFFSET, - // Slot 4 - baseAddress + XAPM_SMC24_OFFSET, baseAddress + XAPM_SMC25_OFFSET, - baseAddress + XAPM_SMC26_OFFSET, baseAddress + XAPM_SMC27_OFFSET, - baseAddress + XAPM_SMC28_OFFSET, baseAddress + XAPM_SMC29_OFFSET, - baseAddress + XAPM_SMC56_OFFSET, baseAddress + XAPM_SMC57_OFFSET, - // Slot 5 - baseAddress + XAPM_SMC30_OFFSET, baseAddress + XAPM_SMC31_OFFSET, - baseAddress + XAPM_SMC32_OFFSET, baseAddress + XAPM_SMC33_OFFSET, - baseAddress + XAPM_SMC34_OFFSET, baseAddress + XAPM_SMC35_OFFSET, - baseAddress + XAPM_SMC58_OFFSET, baseAddress + XAPM_SMC59_OFFSET, - // Slot 6 - baseAddress + XAPM_SMC36_OFFSET, baseAddress + XAPM_SMC37_OFFSET, - baseAddress + XAPM_SMC38_OFFSET, baseAddress + XAPM_SMC39_OFFSET, - baseAddress + XAPM_SMC40_OFFSET, baseAddress + XAPM_SMC41_OFFSET, - baseAddress + XAPM_SMC60_OFFSET, baseAddress + XAPM_SMC61_OFFSET, - // Slot 7 - baseAddress + XAPM_SMC42_OFFSET, baseAddress + XAPM_SMC43_OFFSET, - baseAddress + XAPM_SMC44_OFFSET, baseAddress + XAPM_SMC45_OFFSET, - baseAddress + XAPM_SMC46_OFFSET, baseAddress + XAPM_SMC47_OFFSET, - baseAddress + XAPM_SMC62_OFFSET, baseAddress + XAPM_SMC63_OFFSET - }; + if (!mIsDeviceProfiling) + return 0; - // Read sample interval register - // NOTE: this also latches the sampled metric counters + size_t size = 0; + uint64_t baseAddress; uint32_t sampleInterval; - size_t ret = xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, intervalAddress, &sampleInterval, 4); - if (ret < 0) return ret; - counterResults.SampleIntervalUsec = sampleInterval / xclGetDeviceClockFreqMHz(); - - // Read all sampled metric counters - uint32_t countnum = 0; uint32_t numSlots = getPerfMonNumberSlots(type); - //counterResults.NumSlots = numSlots; - - uint32_t temp[XAPM_METRIC_COUNTERS_PER_SLOT]; - for (uint32_t s=0; s < numSlots; s++) { - for (int c=0; c < XAPM_METRIC_COUNTERS_PER_SLOT; c++) - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, metricAddress[countnum++], &temp[c], 4); + for (uint32_t s = 0; s < numSlots; s++) { + baseAddress = getPerfMonBaseAddress(type, s); + + // Read sample interval register + // NOTE: this also latches the sampled metric counters + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, + baseAddress + XSPM_SAMPLE_OFFSET, + &sampleInterval, 4); + // Need to do this for every xilmon + if (s == 0) { + counterResults.SampleIntervalUsec = sampleInterval / xclGetDeviceClockFreqMHz(); + } - counterResults.WriteBytes[s] = temp[XAPM_METRIC_WRITE_BYTES] * scaleFactor; - counterResults.WriteTranx[s] = temp[XAPM_METRIC_WRITE_TRANX]; - counterResults.WriteLatency[s] = temp[XAPM_METRIC_WRITE_LATENCY]; - counterResults.WriteMinLatency[s] = (temp[XAPM_METRIC_WRITE_MIN_MAX] & XAPM_MIN_LATENCY_MASK) >> XAPM_MIN_LATENCY_SHIFT; - counterResults.WriteMaxLatency[s] = (temp[XAPM_METRIC_WRITE_MIN_MAX] & XAPM_MAX_LATENCY_MASK) >> XAPM_MAX_LATENCY_SHIFT; + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, + baseAddress + XSPM_SAMPLE_WRITE_BYTES_OFFSET, + &counterResults.WriteBytes[s], 4); + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, + baseAddress + XSPM_SAMPLE_WRITE_TRANX_OFFSET, + &counterResults.WriteTranx[s], 4); + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, + baseAddress + XSPM_SAMPLE_WRITE_LATENCY_OFFSET, + &counterResults.WriteLatency[s], 4); + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, + baseAddress + XSPM_SAMPLE_READ_BYTES_OFFSET, + &counterResults.ReadBytes[s], 4); + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, + baseAddress + XSPM_SAMPLE_READ_TRANX_OFFSET, + &counterResults.ReadTranx[s], 4); + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, + baseAddress + XSPM_SAMPLE_READ_LATENCY_OFFSET, + &counterResults.ReadLatency[s], 4); - counterResults.ReadBytes[s] = temp[XAPM_METRIC_READ_BYTES] * scaleFactor; - counterResults.ReadTranx[s] = temp[XAPM_METRIC_READ_TRANX]; - counterResults.ReadLatency[s] = temp[XAPM_METRIC_READ_LATENCY]; - counterResults.ReadMinLatency[s] = (temp[XAPM_METRIC_READ_MIN_MAX] & XAPM_MIN_LATENCY_MASK) >> XAPM_MIN_LATENCY_SHIFT; - counterResults.ReadMaxLatency[s] = (temp[XAPM_METRIC_READ_MIN_MAX] & XAPM_MAX_LATENCY_MASK) >> XAPM_MAX_LATENCY_SHIFT; + if (mLogStream.is_open()) { + mLogStream << "Reading ...SlotNum : " << s << std::endl; + mLogStream << "Reading ...WriteBytes : " << counterResults.WriteBytes[s] << std::endl; + mLogStream << "Reading ...WriteTranx : " << counterResults.WriteTranx[s] << std::endl; + mLogStream << "Reading ...WriteLatency : " << counterResults.WriteLatency[s] << std::endl; + mLogStream << "Reading ...ReadBytes : " << counterResults.ReadBytes[s] << std::endl; + mLogStream << "Reading ...ReadTranx : " << counterResults.ReadTranx[s] << std::endl; + mLogStream << "Reading ...ReadLatency : " << counterResults.ReadLatency[s] << std::endl; + } } - return size; } @@ -447,85 +421,30 @@ namespace awsbwhal { size_t AwsXcl::xclPerfMonClockTraining(xclPerfMonType type) { if (mLogStream.is_open()) { mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " - << type << ", Send clock training..." << std::endl; - } - - size_t size = 0; - uint64_t baseAddress = getPerfMonBaseAddress(type); - - // Send host timestamps to target device - // NOTE: this is used for training to interpolate between time domains - for (int i=0; i < 3; i++) { -#if 1 - uint64_t hostTimeNsec = getHostTraceTimeNsec(); - - uint32_t hostTimeHigh = hostTimeNsec >> 32; - uint32_t hostTimeLow = hostTimeNsec & 0xffffffff; -#else - // Test values - uint32_t hostTimeHigh = 0xf00df00d; - uint32_t hostTimeLow = 0xdeadbeef; -#endif - - // Send upper then lower 32 bits of host timestamp to APM SW data register - size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_SWD_OFFSET, &hostTimeHigh, 4); - size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_SWD_OFFSET, &hostTimeLow, 4); - - if (mLogStream.is_open()) { - mLogStream << " Host timestamp: 0x" << std::hex << hostTimeHigh - << " " << hostTimeLow << std::dec << std::endl; - } + << type << ", Send clock training..." << std::endl; } - - return size; + // We're snapping first event to start of cu. + return 1; } // Start trace performance monitoring size_t AwsXcl::xclPerfMonStartTrace(xclPerfMonType type, uint32_t startTrigger) { if (mLogStream.is_open()) { mLogStream << __func__ << ", " << std::this_thread::get_id() - << ", " << type << ", " << startTrigger - << ", Start device tracing..." << std::endl; + << ", " << type << ", " << startTrigger + << ", Start device tracing..." << std::endl; } - size_t size = 0; - uint32_t regValue; - uint64_t ctrlAddress = getPerfMonBaseAddress(type) + XAPM_CTL_OFFSET; - xclAddressSpace addressSpace = (type == XCL_PERF_MON_OCL_REGION) ? - XCL_ADDR_KERNEL_CTRL : XCL_ADDR_SPACE_DEVICE_PERFMON; - - // 1. Reset APM trace stream FIFO - size += xclRead(addressSpace, ctrlAddress, ®Value, 4); - - regValue = regValue | XAPM_CR_FIFO_RESET_MASK; - size += xclWrite(addressSpace, ctrlAddress, ®Value, 4); + // Update addresses for debug/profile IP + readDebugIpLayout(); - regValue = regValue & ~(XAPM_CR_FIFO_RESET_MASK); - size += xclWrite(addressSpace, ctrlAddress, ®Value, 4); - - // 2. Start APM event log - regValue = regValue | XAPM_CR_EVENTLOG_ENABLE_MASK; - size += xclWrite(addressSpace, ctrlAddress, ®Value, 4); + if (!mIsDeviceProfiling) + return 0; - // 3. Reset trace FIFOs + size_t size = 0; + xclPerfMonGetTraceCount(type); size += resetFifos(type); - - // 4. Send host timestamps to target device - size += xclPerfMonClockTraining(type); - - // 5. Disable host monitoring on slot 1 - // TODO: replace check for value of startTrigger (temp way - // of keeping slot 1 enabled in 06_perfmon test) - if ((type == XCL_PERF_MON_MEMORY) && (startTrigger == 0)) { - regValue = 0xFFFFFF0F; - uint64_t enableTraceAddress = getPerfMonBaseAddress(type) + XAPM_ENT_OFFSET; - size += xclWrite(addressSpace, enableTraceAddress, ®Value, 4); - } - - // 6. Write to event trace trigger register - // TODO: add support for triggering in device here - //size += xclWrite(addressSpace, TBD, &startTrigger, 4); - + xclPerfMonGetTraceCount(type); return size; } @@ -533,23 +452,15 @@ namespace awsbwhal { size_t AwsXcl::xclPerfMonStopTrace(xclPerfMonType type) { if (mLogStream.is_open()) { mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " - << type << ", Stop and reset device tracing..." << std::endl; + << type << ", Stop and reset device tracing..." << std::endl; } - size_t size = 0; - uint32_t regValue; - uint64_t ctrlAddress = getPerfMonBaseAddress(type) + XAPM_CTL_OFFSET; - xclAddressSpace addressSpace = (type == XCL_PERF_MON_OCL_REGION) ? - XCL_ADDR_KERNEL_CTRL : XCL_ADDR_SPACE_DEVICE_PERFMON; - - // 1. Stop APM event log and metric counters - size += xclRead(addressSpace, ctrlAddress, ®Value, 4); - - regValue = regValue & ~(XAPM_CR_EVENTLOG_ENABLE_MASK); - size += xclWrite(addressSpace, ctrlAddress, ®Value, 4); + if (!mIsDeviceProfiling) + return 0; + size_t size = 0; + xclPerfMonGetTraceCount(type); size += resetFifos(type); - return size; } @@ -557,28 +468,26 @@ namespace awsbwhal { uint32_t AwsXcl::xclPerfMonGetTraceCount(xclPerfMonType type) { if (mLogStream.is_open()) { mLogStream << __func__ << ", " << std::this_thread::get_id() - << ", " << type << std::endl; + << ", " << type << std::endl; } + if (!mIsDeviceProfiling) + return 0; + xclAddressSpace addressSpace = (type == XCL_PERF_MON_OCL_REGION) ? - XCL_ADDR_KERNEL_CTRL : XCL_ADDR_SPACE_DEVICE_PERFMON; + XCL_ADDR_KERNEL_CTRL : XCL_ADDR_SPACE_DEVICE_PERFMON; - // Only read first FIFO (and assume the others have the same # words) - // NOTE: we do this for speed improvements - uint32_t fifoCount; + uint32_t fifoCount = 0; + uint32_t numSamples = 0; + uint32_t numBytes = 0; xclRead(addressSpace, getPerfMonFifoBaseAddress(type, 0) + AXI_FIFO_RLR, &fifoCount, 4); // Read bits 22:0 per AXI-Stream FIFO product guide (PG080, 10/1/14) - uint32_t numBytes = fifoCount & 0x7FFFFF; - - uint32_t numSamples = 0; - if (type == XCL_PERF_MON_MEMORY) - numSamples = numBytes / (XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH/8); - else - numSamples = numBytes >> 2; + numBytes = fifoCount & 0x7FFFFF; + numSamples = numBytes / (XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH / 8); if (mLogStream.is_open()) { mLogStream << " No. of trace samples = " << std::dec << numSamples - << " (fifoCount = 0x" << std::hex << fifoCount << ")" << std::dec << std::endl; + << " (fifoCount = 0x" << std::hex << fifoCount << ")" << std::dec << std::endl; } return numSamples; @@ -588,40 +497,28 @@ namespace awsbwhal { size_t AwsXcl::xclPerfMonReadTrace(xclPerfMonType type, xclTraceResultsVector& traceVector) { if (mLogStream.is_open()) { mLogStream << __func__ << ", " << std::this_thread::get_id() - << ", " << type << ", " << &traceVector - << ", Reading device trace stream..." << std::endl; + << ", " << type << ", " << &traceVector + << ", Reading device trace stream..." << std::endl; } traceVector.mLength = 0; + if (!mIsDeviceProfiling) + return 0; uint32_t numSamples = xclPerfMonGetTraceCount(type); if (numSamples == 0) return 0; - uint64_t fifoReadAddress[] = {0, 0, 0}; + uint64_t fifoReadAddress[] = { 0, 0, 0 }; if (type == XCL_PERF_MON_MEMORY) { fifoReadAddress[0] = getPerfMonFifoReadBaseAddress(type, 0) + AXI_FIFO_RDFD_AXI_FULL; } else { - for (int i=0; i < 3; i++) + for (int i = 0; i < 3; i++) fifoReadAddress[i] = getPerfMonFifoReadBaseAddress(type, i) + AXI_FIFO_RDFD; } - xclAddressSpace addressSpace = (type == XCL_PERF_MON_OCL_REGION) ? - XCL_ADDR_KERNEL_CTRL : XCL_ADDR_SPACE_DEVICE_PERFMON; - uint32_t numSlots = getPerfMonNumberSlots(type); - uint32_t numFifos = getPerfMonNumberFifos(type); - size_t size = 0; -#ifndef _WINDOWS - // TODO: Windows build support - // runtime array size is not supported - uint32_t temp[numFifos]; - memset(&temp, 0, numFifos*sizeof(uint32_t)); -#else - uint32_t temp[3]; - memset(&temp, 0, 3*sizeof(uint32_t)); -#endif // Limit to max number of samples so we don't overrun trace buffer on host uint32_t maxSamples = getPerfMonNumberSamples(type); @@ -636,23 +533,12 @@ namespace awsbwhal { // Create trace buffer on host (requires alignment) const int BUFFER_BYTES = MAX_TRACE_NUMBER_SAMPLES * bytesPerSample; const int BUFFER_WORDS = MAX_TRACE_NUMBER_SAMPLES * wordsPerSample; -#ifndef _WINDOWS -// TODO: Windows build support -// alignas is defined in c++11 #if GCC_VERSION >= 40800 - alignas(AXI_FIFO_RDFD_AXI_FULL) uint32_t hostbuf[BUFFER_WORDS]; + alignas(AXI_FIFO_RDFD_AXI_FULL)uint32_t hostbuf[BUFFER_WORDS]; #else AlignedAllocator alignedBuffer(AXI_FIFO_RDFD_AXI_FULL, BUFFER_WORDS); uint32_t* hostbuf = alignedBuffer.getBuffer(); #endif -#else - uint32_t hostbuf[BUFFER_WORDS]; -#endif - - // ****************************** - // Read all words from trace FIFO - // ****************************** - if (type == XCL_PERF_MON_MEMORY) { memset((void *)hostbuf, 0, BUFFER_BYTES); // Iterate over chunks @@ -660,18 +546,18 @@ namespace awsbwhal { uint32_t chunkSizeWords = 256 * wordsPerSample; if (chunkSizeWords > 1024) chunkSizeWords = 1024; uint32_t chunkSizeBytes = 4 * chunkSizeWords; - uint32_t words=0; + uint32_t words = 0; // Read trace a chunk of bytes at a time if (numWords > chunkSizeWords) { - for (; words < (numWords-chunkSizeWords); words += chunkSizeWords) { + for (; words < (numWords - chunkSizeWords); words += chunkSizeWords) { if (mLogStream.is_open()) { mLogStream << __func__ << ": reading " << chunkSizeBytes << " bytes from 0x" - << std::hex << fifoReadAddress[0] << " and writing it to 0x" - << (void *)(hostbuf + words) << std::dec << std::endl; + << std::hex << fifoReadAddress[0] << " and writing it to 0x" + << (void *)(hostbuf + words) << std::dec << std::endl; } - if (mDataMover->pread64((void *)(hostbuf + words), chunkSizeBytes, fifoReadAddress[0]) < 0) + if (awsbwhal::unmgdPread(mUserHandle, (void *)(hostbuf + words), chunkSizeBytes, fifoReadAddress[0]) < 0) return 0; size += chunkSizeBytes; @@ -684,11 +570,11 @@ namespace awsbwhal { if (mLogStream.is_open()) { mLogStream << __func__ << ": reading " << chunkSizeBytes << " bytes from 0x" - << std::hex << fifoReadAddress[0] << " and writing it to 0x" - << (void *)(hostbuf + words) << std::dec << std::endl; + << std::hex << fifoReadAddress[0] << " and writing it to 0x" + << (void *)(hostbuf + words) << std::dec << std::endl; } - if (mDataMover->pread64((void *)(hostbuf + words), chunkSizeBytes, fifoReadAddress[0]) < 0) + if (awsbwhal::unmgdPread(mUserHandle, (void *)(hostbuf + words), chunkSizeBytes, fifoReadAddress[0]) < 0) return 0; size += chunkSizeBytes; @@ -697,127 +583,45 @@ namespace awsbwhal { if (mLogStream.is_open()) { mLogStream << __func__ << ": done reading " << size << " bytes " << std::endl; } - } // ****************************** // Read & process all trace FIFOs // ****************************** - for (uint32_t wordnum=0; wordnum < numSamples; wordnum++) { - if (type == XCL_PERF_MON_MEMORY) { - uint32_t index = wordsPerSample * wordnum; - bool allZeros = true; - for (uint32_t fifonum=0; fifonum < numFifos; fifonum++) { - temp[fifonum] = *(hostbuf + index + fifonum); - allZeros &= (temp[fifonum] == 0); - } - if (allZeros) - continue; - } - else { - // NOTE: Using AXI-Lite so we use the same address with burst length of 1 word - for (uint32_t fifonum=0; fifonum < numFifos; fifonum++) - size += xclRead(addressSpace, fifoReadAddress[fifonum], &temp[fifonum], 4); - } - + for (uint32_t wordnum = 0; wordnum < numSamples; wordnum++) { + uint32_t index = wordsPerSample * wordnum; xclTraceResults results; - // Assign to all 0s to avoid uninitialized variables - memset(&results, 0, sizeof(xclTraceResults)); + uint64_t temp = 0; - uint64_t temp64 = ((uint64_t)temp[1] << 32) | temp[0]; - results.LogID = temp64 & 0x1; - results.Timestamp = (temp64 >> 1) & 0xFFFF; - results.Overflow = (temp64 >> 17) & 0x1; - results.ReadStartEvent = XCL_PERF_MON_START_ADDR; - results.WriteStartEvent = XCL_PERF_MON_START_ADDR; - results.WriteEndEvent = XCL_PERF_MON_END_LAST_DATA; - - if (results.LogID != 0) { - results.HostTimestamp = (temp64 >> 18) & 0xFFFFFFFF; - } - else { - for (uint32_t s=0; s < numSlots; s++) { - uint32_t b = getPerfMonSlotStartBit(type, s); - - if (b >= 32) - temp64 = ((((uint64_t)temp[2] << 32) | temp[1]) >> (b-32)); - else - temp64 = ((((uint64_t)temp[1] << 32) | temp[0]) >> b); - - results.ExtEventFlags[s] = temp64 & 0x7; - results.EventFlags[s] = (temp64 >> 3) & 0x7F; - - if (getPerfMonShowIDS(type)) { - if (getPerfMonShowLEN(type)) { - results.ReadAddrLen[s] = (temp64 >> 10) & 0xFF; - results.WriteAddrLen[s] = (temp64 >> 18) & 0xFF; - - // TODO: assumes AXI ID width of 5 - results.RID[s] = (temp64 >> 26) & 0x1F; - results.ARID[s] = (temp64 >> 31) & 0x1F; - results.BID[s] = (temp64 >> 36) & 0x1F; - results.AWID[s] = (temp64 >> 41) & 0x1F; - } - else { - // TODO: assumes AXI ID width of 5 - results.RID[s] = (temp64 >> 10) & 0x1F; - results.ARID[s] = (temp64 >> 15) & 0x1F; - results.BID[s] = (temp64 >> 20) & 0x1F; - results.AWID[s] = (temp64 >> 25) & 0x1F; - } - } - else { - if (getPerfMonShowLEN(type)) { - results.ReadAddrLen[s] = (temp64 >> 10) & 0xFF; - results.WriteAddrLen[s] = (temp64 >> 18) & 0xFF; - } - } - - // # bytes = burst length * bytes/burst = (addr len + 1) * bytes/burst - uint32_t dataWidth = getPerfMonSlotDataWidth(type, s); - results.ReadBytes[s] = (results.ReadAddrLen[s] + 1) * (dataWidth/8); - results.WriteBytes[s] = (results.WriteAddrLen[s] + 1) * (dataWidth/8); - } // for slot - } // if-else logID != 0 + temp = *(hostbuf + index) | (uint64_t)*(hostbuf + index + 1) << 32; + if (!temp) + continue; + // Initialize result to 0 + memset(&results, 0, sizeof(xclTraceResults)); + // SDSoC Packet Format + results.Timestamp = temp & 0x1FFFFFFFFFFF; + results.EventType = ((temp >> 45) & 0xF) ? XCL_PERF_MON_END_EVENT : + XCL_PERF_MON_START_EVENT; + results.TraceID = (temp >> 49) & 0xFFF; + results.Reserved = (temp >> 61) & 0x1; + results.Overflow = (temp >> 62) & 0x1; + results.Error = (temp >> 63) & 0x1; + results.EventID = XCL_PERF_MON_HW_EVENT; traceVector.mArray[wordnum] = results; - // Log values (if requested) -#if 1 if (mLogStream.is_open()) { mLogStream << " Trace sample " << std::dec << wordnum << ": "; - for (int fifonum=numFifos-1; fifonum >= 0; fifonum--) - mLogStream << dec2bin(temp[fifonum]) << " "; + mLogStream << dec2bin(uint32_t(temp >> 32)) << " " << dec2bin(uint32_t(temp & 0xFFFFFFFF)); + mLogStream << std::endl; + mLogStream << " Timestamp : " << results.Timestamp << " "; + mLogStream << "Event Type : " << results.EventType << " "; + mLogStream << "slotID : " << results.TraceID << " "; + mLogStream << "Start, Stop : " << static_cast(results.Reserved) << " "; + mLogStream << "Overflow : " << static_cast(results.Overflow) << " "; + mLogStream << "Error : " << static_cast(results.Error) << " "; mLogStream << std::endl; - - if (results.LogID == 1) { - mLogStream << std::hex << " Host Timestamp: " << results.HostTimestamp << std::endl; - } - else { - if (type == XCL_PERF_MON_OCL_REGION) { - mLogStream << " Ext Event flags: "; - for (int slot=numSlots-1; slot >= 0; slot--) - mLogStream << dec2bin(results.ExtEventFlags[slot], 3) << " "; - } - else { - mLogStream << " Event flags: "; - for (int slot=numSlots-1; slot >= 0; slot--) - mLogStream << dec2bin(results.EventFlags[slot], 7) << " "; - } - - mLogStream << "(ReadAddrLen[0] = " << (int)(results.ReadAddrLen[0]) - << ", WriteAddrLen[0] = " << (int)(results.WriteAddrLen[0]) - << ", ReadAddrLen[1] = " << (int)(results.ReadAddrLen[1]) - << ", WriteAddrLen[1] = " << (int)(results.WriteAddrLen[1]); - - if (getPerfMonShowIDS(type)) { - mLogStream << ", RID: " << (int)results.RID[0] << ", ARID: " << (int)results.ARID[0] - << ", BID: " << (int)results.BID[0] << ", AWID: " << (int)results.AWID[0]; - } - mLogStream << ")" << std::endl; - } } -#endif - } // for wordnum + } return size; } // end xclPerfMonReadTrace @@ -930,12 +734,26 @@ size_t xclGetDeviceTimestamp(xclDeviceHandle handle) } -void xclSetOclRegionProfilingNumberSlots(xclDeviceHandle handle, uint32_t numSlots) +void xclSetProfilingNumberSlots(xclDeviceHandle handle, xclPerfMonType type, uint32_t numSlots) { awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); if (!drv) return; - return drv->xclSetOclRegionProfilingNumberSlots(numSlots); + return drv->xclSetProfilingNumberSlots(type, numSlots); +} + + +uint32_t xclGetProfilingNumberSlots(xclDeviceHandle handle, xclPerfMonType type) +{ + return 2; +} + + +void xclGetProfilingSlotName(xclDeviceHandle handle, xclPerfMonType type, uint32_t slotnum, + char* slotName, uint32_t length) +{ + const char* name = (slotnum == XPAR_SPM0_HOST_SLOT) ? "Host" : "Kernels"; + strncpy(slotName, name, length); } @@ -944,3 +762,5 @@ void xclWriteHostEvent(xclDeviceHandle handle, xclPerfMonEventType type, { // don't do anything } + +// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/SDAccel/userspace/src/perfmon_parameters.h b/SDAccel/userspace/src/perfmon_parameters.h index 0fc547c8..4a2f417b 100644 --- a/SDAccel/userspace/src/perfmon_parameters.h +++ b/SDAccel/userspace/src/perfmon_parameters.h @@ -1,8 +1,7 @@ -/** - * Copyright (C) 2016-2018 Xilinx, Inc - * Author: Sonal Santan - * Performance Monitoring Internal Parameters - * XDMA HAL multi-threading safe, multi-channel DMA read/write support +/* + * Copyright (C) 2018 Xilinx, Inc + * Performance Monitoring Internal Parameters using PCIe for AWS HAL Driver. + * NOTE: partially taken from file xaxipmon_hw.h in v5.0 of APM driver * * Licensed under the Apache License, Version 2.0 (the "License"). You may * not use this file except in compliance with the License. A copy of the @@ -28,12 +27,57 @@ /* Address offsets in core */ #define AXI_FIFO_RDFR 0x18 +#define AXI_FIFO_RDFO 0x1c #define AXI_FIFO_RDFD 0x20 #define AXI_FIFO_RDFD_AXI_FULL 0x1000 +#define AXI_FIFO_TDFD 0x10 #define AXI_FIFO_RLR 0x24 #define AXI_FIFO_SRR 0x28 #define AXI_FIFO_RESET_VALUE 0xA5 +/************************ SDx Performance Monitor(SPM) ************************/ + +/* Address offsets in core */ +#define XSPM_CONTROL_OFFSET 0x08 +#define XSPM_TRACE_CTRL_OFFSET 0x10 +#define XSPM_EVENT_OFFSET 0x18 +#define XSPM_SAMPLE_OFFSET 0x20 +#define XSPM_FIFO_COUNTS_OFFSET 0x28 +#define XSPM_FIFO_READ_COUNTS_OFFSET 0x30 +#define XSPM_WRITE_BYTES_OFFSET 0x40 +#define XSPM_WRITE_TRANX_OFFSET 0x44 +#define XSPM_WRITE_LATENCY_OFFSET 0x48 +#define XSPM_READ_BYTES_OFFSET 0x4C +#define XSPM_READ_TRANX_OFFSET 0x50 +#define XSPM_READ_LATENCY_OFFSET 0x54 +//#define XSPM_MIN_MAX_WRITE_LATENCY_OFFSET 0x58 +//#define XSPM_MIN_MAX_READ_LATENCY_OFFSET 0x5C +#define XSPM_OUTSTANDING_COUNTS_OFFSET 0x58 +#define XSPM_LAST_WRITE_ADDRESS_OFFSET 0x5C +#define XSPM_LAST_WRITE_DATA_OFFSET 0x60 +#define XSPM_LAST_READ_ADDRESS_OFFSET 0x64 +#define XSPM_LAST_READ_DATA_OFFSET 0x68 +#define XSPM_SAMPLE_WRITE_BYTES_OFFSET 0x80 +#define XSPM_SAMPLE_WRITE_TRANX_OFFSET 0x84 +#define XSPM_SAMPLE_WRITE_LATENCY_OFFSET 0x88 +#define XSPM_SAMPLE_READ_BYTES_OFFSET 0x8C +#define XSPM_SAMPLE_READ_TRANX_OFFSET 0x90 +#define XSPM_SAMPLE_READ_LATENCY_OFFSET 0x94 +//#define XSPM_SAMPLE_MIN_MAX_WRITE_LATENCY_OFFSET 0x98 +//#define XSPM_SAMPLE_MIN_MAX_READ_LATENCY_OFFSET 0x9C +#define XSPM_SAMPLE_OUTSTANDING_COUNTS_OFFSET 0x98 +#define XSPM_SAMPLE_LAST_WRITE_ADDRESS_OFFSET 0x9C +#define XSPM_SAMPLE_LAST_WRITE_DATA_OFFSET 0xA0 +#define XSPM_SAMPLE_LAST_READ_ADDRESS_OFFSET 0xA4 +#define XSPM_SAMPLE_LAST_READ_DATA_OFFSET 0xA8 + +/* SPM Control Register masks */ +#define XSPM_CR_RESET_ON_SAMPLE_MASK 0x00000010 +#define XSPM_CR_FIFO_RESET_MASK 0x00000008 +#define XSPM_CR_TRACE_ENABLE_MASK 0x00000004 +#define XSPM_CR_COUNTER_RESET_MASK 0x00000002 +#define XSPM_CR_COUNTER_ENABLE_MASK 0x00000001 + /************************ APM Constant Definitions ****************************/ /* Register offsets of AXIMONITOR in the Device Config */ @@ -113,7 +157,7 @@ #define XAPM_SINC8_OFFSET 0x0284 /**< Sampled Incrementer 8 Register */ #define XAPM_SMC9_OFFSET 0x0290 /**< Sampled Metric Counter 9 Register */ #define XAPM_SINC9_OFFSET 0x0294 /**< Sampled Incrementer 9 Register */ - + #define XAPM_MC10_OFFSET 0x01A0 /**< Metric Counter 10 Register */ #define XAPM_MC11_OFFSET 0x01B0 /**< Metric Counter 11 Register */ #define XAPM_MC12_OFFSET 0x0500 /**< Metric Counter 12 Register */ @@ -152,7 +196,7 @@ #define XAPM_MC45_OFFSET 0x0990 /**< Metric Counter 45 Register */ #define XAPM_MC46_OFFSET 0x09A0 /**< Metric Counter 46 Register */ #define XAPM_MC47_OFFSET 0x09B0 /**< Metric Counter 47 Register */ - + #define XAPM_SMC10_OFFSET 0x02A0 /**< Sampled Metric Counter 10 Register */ #define XAPM_SMC11_OFFSET 0x02B0 /**< Sampled Metric Counter 11 Register */ #define XAPM_SMC12_OFFSET 0x0600 /**< Sampled Metric Counter 12 Register */ @@ -208,7 +252,7 @@ #define XAPM_SMC61_OFFSET 0x0A58 /**< Sampled Metric Counter 61 Register */ #define XAPM_SMC62_OFFSET 0x0AB4 /**< Sampled Metric Counter 62 Register */ #define XAPM_SMC63_OFFSET 0x0AB8 /**< Sampled Metric Counter 63 Register */ - + #define XAPM_CTL_OFFSET 0x0300 /**< Control Register */ #define XAPM_ID_OFFSET 0x0304 /**< Latency ID Register */ #define XAPM_IDMASK_OFFSET 0x0308 /**< ID Mask Register */ diff --git a/SDAccel/userspace/src2/scan.cpp b/SDAccel/userspace/src/scan.cpp similarity index 100% rename from SDAccel/userspace/src2/scan.cpp rename to SDAccel/userspace/src/scan.cpp diff --git a/SDAccel/userspace/src2/scan.h b/SDAccel/userspace/src/scan.h similarity index 100% rename from SDAccel/userspace/src2/scan.h rename to SDAccel/userspace/src/scan.h diff --git a/SDAccel/userspace/src/shim.cpp b/SDAccel/userspace/src/shim.cpp index 5b81680a..2441035b 100644 --- a/SDAccel/userspace/src/shim.cpp +++ b/SDAccel/userspace/src/shim.cpp @@ -19,24 +19,21 @@ */ #include "shim.h" -#include "memorymanager.h" -#include "datamover.h" #include /* * Define GCC version macro so we can use newer C++11 features * if possible */ -#define GCC_VERSION (__GNUC__ * 10000 \ - + __GNUC_MINOR__ * 100 \ +#define GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) -//TODO: umang #ifdef INTERNAL_TESTING #define ACCELERATOR_BAR 0 #define MMAP_SIZE_USER 0x400000 #endif -/* Kernels expect all buffers to be aligned at 4KB for AXI burst to/fro DDR */ +/* Aligning access to FPGA DRAM space to 4096 Byte */ #define DDR_BUFFER_ALIGNMENT 0x1000 #include @@ -55,206 +52,216 @@ #include #include #include +#include #include #include "xclbin.h" -#include "xdma/xdma_ioctl.h" +#include "xocl/xocl_ioctl.h" +#include "scan.h" +#include "awssak.h" #ifdef INTERNAL_TESTING - -#include "internal_use_only_mgmt/mgmt-ioctl.h" - +#include "driver/aws/kernel/include/mgmt-ioctl.h" #else - -#define XCLBIN_DOWNLOAD_RETRY 10 -#define XCLBIN_DOWNLOAD_WAIT 1 -#include -#include +#define AWSMGMT_NUM_SUPPORTED_CLOCKS 4 +#define AWSMGMT_NUM_ACTUAL_CLOCKS 3 // TODO - define this in a header file -extern const char* get_afi_from_xclBin(const xclBin *buffer); -extern const char *get_afi_from_axlf(const axlf *buffer); - +extern char* get_afi_from_xclBin(const xclBin *); +extern char* get_afi_from_axlf(const axlf *); +// define DEFAULT_GLOBAL_AFI "agfi-069ddd533a748059b" // 1.4 shell +#define DEFAULT_GLOBAL_AFI "agfi-0cc0ac6a40aa73ce8" // 1.4 shell 4-ddr data retention enabled #endif namespace awsbwhal { - const unsigned AwsXcl::TAG = 0X586C0C6C; // XL OpenCL X->58(ASCII), L->6C(ASCII), O->0 C->C L->6C(ASCII); + // This list will get populated in xclProbe + // 0 -> /dev/dri/renderD129 + // 1 -> /dev/dri/renderD130 static std::mutex deviceListMutex; - static std::vector deviceList; - - - static int findDMADevice(unsigned short domain, unsigned char bus, unsigned char dev, unsigned char func) - { - int i; - char file_name_buf[128]; - for (i = 0; i < 16; i++) { - std::sprintf((char *)&file_name_buf, "/dev/xdma%d_user", i); - int fd = open(file_name_buf, O_RDWR); - if (fd < 0) - continue; - xdma_ioc_info obj; - std::memset(&obj, 0, sizeof(xdma_ioc_info)); - obj.base.command = XDMA_IOCINFO; - obj.base.magic = 0X586C0C6C; - int ret = ioctl(fd, XDMA_IOCINFO, &obj); - close(fd); - if (ret) - continue; - if (obj.domain != domain) - continue; - if (obj.bus != bus) - continue; - if (obj.dev != dev) - continue; - if (obj.func != func) - continue; - return i; - } - return -1; - } + // static std::vector> deviceList; - int AwsXcl::setDDRCount(const axlf* buffer) - { - const char* str = (const char*) buffer->m_header.m_platformVBNV; - if(strstr(str, "1ddr-xpr")) { - m4DDR = false; - } else - m4DDR = true; - return 0; - } + const unsigned AwsXcl::TAG = 0X586C0C6C; // XL OpenCL X->58(ASCII), L->6C(ASCII), O->0 C->C L->6C(ASCII); +#ifdef INTERNAL_TESTING int AwsXcl::xclLoadAxlf(const axlf *buffer) { + if ( mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << buffer << std::endl; + } + + if ( !mLocked) + return -EPERM; + + std::cout << "Downloading xclbin ...\n" << std::endl; + const unsigned cmd = AWSMGMT_IOCICAPDOWNLOAD_AXLF; + awsmgmt_ioc_bitstream_axlf obj = { const_cast(buffer) }; + int ret = ioctl(mMgtHandle, cmd, &obj); + if ( 0 != ret) + return ret; + + // If it is an XPR DSA, zero out the DDR again as downloading the XCLBIN + // reinitializes the DDR and results in ECC error. + if ( isXPR()) { if ( mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << buffer << std::endl; + mLogStream << __func__ << "XPR Device found, zeroing out DDR again.." << std::endl; } - if ( !mLocked) - return -EPERM; - - //set 1 or 4 ddr. - if(setDDRCount(buffer)) - return -EINVAL; - -#ifdef INTERNAL_TESTING - const unsigned cmd = AWSMGMT_IOCICAPDOWNLOAD_AXLF; - awsmgmt_ioc_bitstream_axlf obj = { const_cast(buffer) }; - return ioctl(mMgtHandle, cmd, &obj); -#else - const char* afi_id = get_afi_from_axlf(buffer); - if (!afi_id) - return -EINVAL; - - //skip redownload. - fpga_mgmt_image_info info; - int result = fpga_mgmt_describe_local_image(mBoardNumber, &info, 0); - if(!result && (info.status == FPGA_STATUS_LOADED)) { - if(strncmp(afi_id, info.ids.afi_id, sizeof(info.ids.afi_id)) == 0) { - //existing afi matched. - uint16_t status = 0; - result = fpga_mgmt_get_vDIP_status(mBoardNumber, &status); - if(result) { - printf("Error: can not get virtual DIP Switch state\n"); - return result; - } - //Set bit 0 to 1 - status |= (1 << 0); - result = fpga_mgmt_set_vDIP(mBoardNumber, status); - if(result) { - printf("Error trying to set virtual DIP Switch \n"); - return result; - } - std::this_thread::sleep_for(std::chrono::microseconds(250)); - //pulse the changes in. - result = fpga_mgmt_get_vDIP_status(mBoardNumber, &status); - if(result) { - printf("Error: can not get virtual DIP Switch state\n"); - return result; - } - //Set bit 0 to 0 - status &= ~(1 << 0); - result = fpga_mgmt_set_vDIP(mBoardNumber, status); - if(result) { - printf("Error trying to set virtual DIP Switch \n"); - return result; - } - std::this_thread::sleep_for(std::chrono::microseconds(250)); - - printf("Successfully skipped reloading of local image.\n"); - return result; - } - } - - //proceed with download. - result = fpga_mgmt_load_local_image(mBoardNumber, const_cast(afi_id)); - if (result) - return -EBUSY; - for (int i = 0; i < XCLBIN_DOWNLOAD_RETRY; i++) { - std::this_thread::sleep_for(std::chrono::seconds(XCLBIN_DOWNLOAD_WAIT)); - //fpga_mgmt_image_info info; - std::memset(&info, 0, sizeof(struct fpga_mgmt_image_info)); - result = fpga_mgmt_describe_local_image(mBoardNumber, &info, 0); - if (result) - return -EBUSY; - if ((info.status == FPGA_STATUS_LOADED) && !std::strcmp(info.ids.afi_id, afi_id)) - return 0; + if ( zeroOutDDR() == false) { + if ( mLogStream.is_open()) { + mLogStream << __func__ << "zeroing out DDR failed" << std::endl; + } + return -EIO; } - return -EBUSY; + } + + drm_xocl_axlf axlf_obj = {const_cast(buffer)}; + ret = ioctl(mUserHandle, DRM_IOCTL_XOCL_READ_AXLF, &axlf_obj); + return ret; + } #endif + + int AwsXcl::xclGetXclBinIdFromSysfs(uint64_t &xclbin_id_from_sysfs) + { + const std::string devPath = "/sys/bus/pci/devices/" + xcldev::pci_device_scanner::device_list[ mBoardNumber ].user_name; + std::string binid_path = devPath + "/xclbinid"; + struct stat sb; + if( stat( binid_path.c_str(), &sb ) < 0 ) { + std::cout << "ERROR: failed to stat " << binid_path << std::endl; + return errno; + } + std::ifstream ifs( binid_path.c_str(), std::ifstream::binary ); + if( !ifs.good() ) { + return errno; + } + char* fileReadBuf = new char[sb.st_size]; + memset(fileReadBuf, 0, sb.st_size); + ifs.read( fileReadBuf, sb.st_size ); + if( ifs.gcount() > 0 ) { + std::string tmp_hex_string = fileReadBuf; + xclbin_id_from_sysfs = std::stoi(std::string(fileReadBuf),nullptr,16); + } else { // xclbinid exists, but no data read or reported + std::cout << "WARNING: 'xclbinid' invalid, unable to report xclbinid. Has the bitstream been loaded? See 'xbsak program'.\n"; + } + delete [] fileReadBuf; + ifs.close(); + return 0; } int AwsXcl::xclLoadXclBin(const xclBin *buffer) { - const char *xclbininmemory = reinterpret_cast(buffer); - - if (!memcmp(xclbininmemory, "xclbin2", 8)){ - return xclLoadAxlf(reinterpret_cast(xclbininmemory)); - } + char *xclbininmemory = reinterpret_cast (const_cast (buffer)); +#ifdef INTERNAL_TESTING + if (!memcmp(xclbininmemory, "xclbin2", 8)) { + return xclLoadAxlf(reinterpret_cast(xclbininmemory)); + } - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << buffer << std::endl; - } + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << buffer << std::endl; + } - if (!mLocked) - return -EPERM; + if (!mLocked) + return -EPERM; -#ifdef INTERNAL_TESTING - const unsigned cmd = AWSMGMT_IOCICAPDOWNLOAD; - awsmgmt_ioc_bitstream obj = {const_cast(buffer)}; - return ioctl(mMgtHandle, cmd, &obj); + const unsigned cmd = AWSMGMT_IOCICAPDOWNLOAD; + awsmgmt_ioc_bitstream obj = {const_cast(buffer)}; + return ioctl(mMgtHandle, cmd, &obj); #else - const char* afi_id = get_afi_from_xclBin(buffer); - if (!afi_id) - return -EINVAL; - int result = fpga_mgmt_load_local_image(mBoardNumber, const_cast(afi_id)); - if (result) - return -EBUSY; - for (int i = 0; i < 10; i++) { - std::this_thread::sleep_for(std::chrono::seconds(1)); - fpga_mgmt_image_info info; - std::memset(&info, 0, sizeof(struct fpga_mgmt_image_info)); - result = fpga_mgmt_describe_local_image(mBoardNumber, &info, 0); - if (result) - return -EBUSY; - if (!std::strcmp(info.ids.afi_id, afi_id)) - return 0; - } - return -EBUSY; - // TODO - add printout and eror case handing + if (!memcmp(xclbininmemory, "xclbin2", 8)) { + int retVal = 0; + axlf *axlfbuffer = reinterpret_cast(const_cast (buffer)); + fpga_mgmt_image_info orig_info; + char* afi_id = get_afi_from_axlf(axlfbuffer); + std::memset(&orig_info, 0, sizeof(struct fpga_mgmt_image_info)); + fpga_mgmt_describe_local_image(mBoardNumber, &orig_info, 0); + + uint64_t xclbin_id_from_sysfs; + if( int retVal = xclGetXclBinIdFromSysfs( xclbin_id_from_sysfs ) != 0 ) + return retVal; + + if ( (xclbin_id_from_sysfs == 0) || (axlfbuffer->m_uniqueId != xclbin_id_from_sysfs) || checkAndSkipReload(afi_id, &orig_info) ) { + // force data retention option + union fpga_mgmt_load_local_image_options opt; + fpga_mgmt_init_load_local_image_options(&opt); + opt.flags = FPGA_CMD_DRAM_DATA_RETENTION; + opt.afi_id = afi_id; + opt.slot_id = mBoardNumber; + retVal = fpga_mgmt_load_local_image_with_options(&opt); + if (retVal == FPGA_ERR_DRAM_DATA_RETENTION_NOT_POSSIBLE || + retVal == FPGA_ERR_DRAM_DATA_RETENTION_FAILED || + retVal == FPGA_ERR_DRAM_DATA_RETENTION_SETUP_FAILED) { + std::cout << "INFO: Could not load AFI for data retention, code: " << retVal + << " - Loading in classic mode." << std::endl; + retVal = fpga_mgmt_load_local_image(mBoardNumber, afi_id); + } + // check retVal from image load + if (retVal) { + std::cout << "Failed to load AFI, error: " << retVal << std::endl; + return -retVal; + } + retVal = sleepUntilLoaded( std::string(afi_id) ); + if (!retVal) { + drm_xocl_axlf axlf_obj = { reinterpret_cast(const_cast(buffer)) }; + retVal = ioctl(mUserHandle, DRM_IOCTL_XOCL_READ_AXLF, &axlf_obj); + if (retVal) { + std::cout << "IOCTL DRM_IOCTL_XOCL_READ_AXLF Failed: " << retVal << std::endl; + } else { + std::cout << "AFI load complete." << std::endl; + } + } + } + return retVal; + } else { + char* afi_id = get_afi_from_xclBin(buffer); + return fpga_mgmt_load_local_image(mBoardNumber, afi_id); + } #endif } - /* Accessing F1 FPGA memory space (i.e. OpenCL Global Memory) is mapped through AppPF BAR4 * all offsets are relative to the base address available in AppPF BAR4 * SDAcell XCL_ADDR_SPACE_DEVICE_RAM enum maps to AwsXcl::ocl_global_mem_bar, which is the * handle for AppPF BAR4 */ + size_t AwsXcl::xclReadModifyWrite(uint64_t offset, const void *hostBuf, size_t size) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " + << offset << ", " << hostBuf << ", " << size << std::endl; + } +#if GCC_VERSION >= 40800 + alignas(DDR_BUFFER_ALIGNMENT) char buffer[DDR_BUFFER_ALIGNMENT]; +#else + AlignedAllocator alignedBuffer(DDR_BUFFER_ALIGNMENT, DDR_BUFFER_ALIGNMENT); + char* buffer = alignedBuffer.getBuffer(); +#endif + + const size_t mod_size = offset % DDR_BUFFER_ALIGNMENT; + // Read back one full aligned block starting from preceding aligned address + const uint64_t mod_offset = offset - mod_size; + if (xclRead(XCL_ADDR_SPACE_DEVICE_RAM, mod_offset, buffer, DDR_BUFFER_ALIGNMENT) != DDR_BUFFER_ALIGNMENT) + return -1; + + // Update the local copy of buffer with user requested data + const size_t copy_size = (size + mod_size > DDR_BUFFER_ALIGNMENT) ? DDR_BUFFER_ALIGNMENT - mod_size : size; + std::memcpy(buffer + mod_size, hostBuf, copy_size); + + // Write back the updated aligned block + if (xclWrite(XCL_ADDR_SPACE_DEVICE_RAM, mod_offset, buffer, DDR_BUFFER_ALIGNMENT) != DDR_BUFFER_ALIGNMENT) + return -1; + + // Write any remaining blocks over DDR_BUFFER_ALIGNMENT size + if (size + mod_size > DDR_BUFFER_ALIGNMENT) { + size_t write_size = xclWrite(XCL_ADDR_SPACE_DEVICE_RAM, mod_offset + DDR_BUFFER_ALIGNMENT, + (const char *)hostBuf + copy_size, size - copy_size); + if (write_size != (size - copy_size)) + return -1; + } + return size; + } /* Accessing F1 FPGA memory space mapped through AppPF PCIe BARs - * space = XCL_ADDR_SPACE_DEVICE_RAM maps to AppPF PCIe BAR4, (sh_cl_dma_pcis_ bus), with AwsXcl::ocl_global_mem_bar as handle - * space = XCL_ADDR_KERNEL_CTRL maps to AppPF PCIe BAR0 (sh_cl_ocl bus), with AwsXcl::ocl_kernel_bar as handle - * all offsets are relative to the base address available in AppPF - */ + * space = XCL_ADDR_SPACE_DEVICE_RAM maps to AppPF PCIe BAR4, (sh_cl_dma_pcis_ bus), with AwsXcl::ocl_global_mem_bar as handle + * space = XCL_ADDR_KERNEL_CTRL maps to AppPF PCIe BAR0 (sh_cl_ocl bus), with AwsXcl::ocl_kernel_bar as handle + * all offsets are relative to the base address available in AppPF + */ size_t AwsXcl::xclWrite(xclAddressSpace space, uint64_t offset, const void *hostBuf, size_t size) { if (mLogStream.is_open()) { mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << space << ", " @@ -262,35 +269,23 @@ namespace awsbwhal { } if (!mLocked) - return -EPERM; + return -1; switch (space) { - case XCL_ADDR_SPACE_DEVICE_RAM: - { - const size_t totalSize = size; - const char *curr = static_cast(hostBuf); - while (size > maxDMASize) { - if (mDataMover->pwrite64(curr,maxDMASize,offset) < 0) - return -EIO; - offset += maxDMASize; - curr += maxDMASize; - size -= maxDMASize; - } - if (mDataMover->pwrite64(curr,size,offset) < 0) - return -EIO; - return totalSize; - } + + /* Current release now includes performance monitors */ case XCL_ADDR_SPACE_DEVICE_PERFMON: { #ifdef INTERNAL_TESTING - const unsigned int pf_bar = ACCELERATOR_BAR; + if (pcieBarWrite(ACCELERATOR_BAR, offset, hostBuf, size) == 0) { + return size; + } #else - const unsigned int pf_bar = APP_PF_BAR0; -#endif - if (pcieBarWrite(pf_bar, offset, hostBuf, size) == 0) { + if (pcieBarWrite(APP_PF_BAR0, offset, hostBuf, size) == 0) { return size; } - return -EIO; +#endif + return -1; } case XCL_ADDR_KERNEL_CTRL: { @@ -306,22 +301,23 @@ namespace awsbwhal { } } #ifdef INTERNAL_TESTING - const unsigned int pf_bar = ACCELERATOR_BAR; + if (pcieBarWrite(ACCELERATOR_BAR, offset, hostBuf, size) == 0) { #else - const unsigned int pf_bar = APP_PF_BAR0; + if (pcieBarWrite(APP_PF_BAR0, offset, hostBuf, size) == 0) { + #endif - if (pcieBarWrite(pf_bar, offset, hostBuf, size) == 0) { return size; } - return -EIO; + return -1; } default: { - return -EINVAL; + return -1; } } } + size_t AwsXcl::xclRead(xclAddressSpace space, uint64_t offset, void *hostBuf, size_t size) { if (mLogStream.is_open()) { mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << space << ", " @@ -329,41 +325,26 @@ namespace awsbwhal { } switch (space) { - case XCL_ADDR_SPACE_DEVICE_RAM: - { - const size_t totalSize = size; - char *curr = static_cast(hostBuf); - while (size > maxDMASize) { - if (mDataMover->pread64(curr,maxDMASize,offset) < 0) - return -EIO; - offset += maxDMASize; - curr += maxDMASize; - size -= maxDMASize; - } - if (mDataMover->pread64(curr,size,offset) < 0) - return -EIO; - return totalSize; - } case XCL_ADDR_SPACE_DEVICE_PERFMON: { -#ifdef INTERNAL_TESTING - const unsigned int pf_bar = ACCELERATOR_BAR; +#ifdef INTERNAL_TESTING + if (pcieBarRead(ACCELERATOR_BAR, offset, hostBuf, size) == 0) { + return size; + } #else - const unsigned int pf_bar = APP_PF_BAR0; -#endif - if (pcieBarRead(pf_bar, offset, hostBuf, size) == 0) { + if (pcieBarRead(APP_PF_BAR0, offset, hostBuf, size) == 0) { return size; } - return -EIO; +#endif + return -1; } case XCL_ADDR_KERNEL_CTRL: { -#ifdef INTERNAL_TESTING - const unsigned int pf_bar = ACCELERATOR_BAR; +#ifdef INTERNAL_TESTING + int result = pcieBarRead(ACCELERATOR_BAR, offset, hostBuf, size); #else - const unsigned int pf_bar = APP_PF_BAR0; + int result = pcieBarRead(APP_PF_BAR0, offset, hostBuf, size); #endif - int result = pcieBarRead(pf_bar, offset, hostBuf, size); if (mLogStream.is_open()) { const unsigned *reg = static_cast(hostBuf); size_t regSize = size / 4; @@ -374,138 +355,116 @@ namespace awsbwhal { << std::hex << offset + i << std::dec << ", 0x" << std::hex << reg[i] << std::dec << std::endl; } } - return !result ? size : -EIO; + return !result ? size : 0; } default: { - return -EINVAL; + return -1; } } } uint64_t AwsXcl::xclAllocDeviceBuffer(size_t size) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << size << std::endl; - } - - if (size == 0) - size = DDR_BUFFER_ALIGNMENT; + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << size << std::endl; + } - uint64_t result = MemoryManager::mNull; + uint64_t result = mNullAddr; + unsigned boHandle = xclAllocBO(size, XCL_BO_DEVICE_RAM, 0x0); + if (boHandle == mNullBO) + return result; - if(!is4DDR()) { - return mDDRMemoryManager[0]->alloc(size); - } - - for (auto i : mDDRMemoryManager) { - result = i->alloc(size); - if (result != MemoryManager::mNull) - break; - } + drm_xocl_info_bo boInfo = {boHandle, 0, 0, 0}; + if (ioctl(mUserHandle, DRM_IOCTL_XOCL_INFO_BO, &boInfo)) return result; + + void *hbuf = xclMapBO(boHandle, true); + if (hbuf == MAP_FAILED) { + xclFreeBO(boHandle); + return mNullAddr; + } + mLegacyAddressTable.insert(boInfo.paddr, size, std::make_pair(boHandle, (char *)hbuf)); + return boInfo.paddr; } uint64_t AwsXcl::xclAllocDeviceBuffer2(size_t size, xclMemoryDomains domain, unsigned flags) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << size << ", " - << domain << ", " << flags << std::endl; - } + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << size << ", " + << domain << ", " << flags << std::endl; + } - if (domain != XCL_MEM_DEVICE_RAM) - return MemoryManager::mNull; + uint64_t result = mNullAddr; + if (domain != XCL_MEM_DEVICE_RAM) + return result; - if (size == 0) - size = DDR_BUFFER_ALIGNMENT; + unsigned ddr = 1; + ddr <<= flags; + unsigned boHandle = xclAllocBO(size, XCL_BO_DEVICE_RAM, ddr); + if (boHandle == mNullBO) + return result; - if(!is4DDR() && flags > 0) { - std::cout << "Trying to allocate past the 1 bank on the 1 DDR device " << std::endl; - return MemoryManager::mNull; - } + drm_xocl_info_bo boInfo = {boHandle, 0, 0, 0}; + if (ioctl(mUserHandle, DRM_IOCTL_XOCL_INFO_BO, &boInfo)) + return result; - if (flags >= mDDRMemoryManager.size()) { - return MemoryManager::mNull; - } - return mDDRMemoryManager[flags]->alloc(size); + void *hbuf = xclMapBO(boHandle, true); + if (hbuf == MAP_FAILED) { + xclFreeBO(boHandle); + return mNullAddr; + } + mLegacyAddressTable.insert(boInfo.paddr, size, std::make_pair(boHandle, (char *)hbuf)); + return boInfo.paddr; } - void AwsXcl::xclFreeDeviceBuffer(uint64_t buf) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << buf << std::endl; - } - - uint64_t size = 0; - for (auto i : mDDRMemoryManager) { - size += i->size(); - if (buf < size) { - i->free(buf); - } - } + void AwsXcl::xclFreeDeviceBuffer(uint64_t buf) + { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << buf << std::endl; + } + + std::pair bo = mLegacyAddressTable.erase(buf); + drm_xocl_info_bo boInfo = {bo.first, 0, 0, 0}; + if (!ioctl(mUserHandle, DRM_IOCTL_XOCL_INFO_BO, &boInfo)) { + munmap(bo.second, boInfo.size); + } + xclFreeBO(bo.first); } - size_t AwsXcl::xclCopyBufferHost2Device(uint64_t dest, const void *src, size_t size, size_t seek) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << dest << ", " - << src << ", " << size << ", " << seek << std::endl; - } -#ifdef DEBUG - { - // Ensure that this buffer was allocated by memory manager before - const uint64_t v = MemoryManager::mNull; - std::pair buf = std::make_pair(v, v); - uint64_t high = 0; - for (auto i : mDDRMemoryManager) { - high += i->size(); - if (dest < high) { - buf = i->lookup(dest); - break; - } - } - if (MemoryManager::isNullAlloc(buf)) - return -1; - - if (buf.second < (size + seek)) - return -1; - } -#endif - dest += seek; - return xclWrite(XCL_ADDR_SPACE_DEVICE_RAM, dest, src, size); + size_t AwsXcl::xclCopyBufferHost2Device(uint64_t dest, const void *src, size_t size, size_t seek) + { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << dest << ", " + << src << ", " << size << ", " << seek << std::endl; + } + + std::pair bo = mLegacyAddressTable.find(dest); + std::memcpy(bo.second + seek, src, size); + int result = xclSyncBO(bo.first, XCL_BO_SYNC_BO_TO_DEVICE, size, seek); + if (result) + return result; + return size; } - size_t AwsXcl::xclCopyBufferDevice2Host(void *dest, uint64_t src, size_t size, size_t skip) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << dest << ", " - << src << ", " << size << ", " << skip << std::endl; - } - - -#ifdef DEBUG - { - // Ensure that this buffer was allocated by memory manager before - const uint64_t v = MemoryManager::mNull; - std::pair buf = std::make_pair(v, v); - uint64_t high = 0; - for (auto i : mDDRMemoryManager) { - high += i->size(); - if (src < high) { - buf = i->lookup(src); - break; - } - } - if (MemoryManager::isNullAlloc(buf)) - return -1; - - if (buf.second < (size + skip)) - return -1; - } -#endif - src += skip; - return xclRead(XCL_ADDR_SPACE_DEVICE_RAM, src, dest, size); + size_t AwsXcl::xclCopyBufferDevice2Host(void *dest, uint64_t src, size_t size, size_t skip) + { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << dest << ", " + << src << ", " << size << ", " << skip << std::endl; + } + + std::pair bo = mLegacyAddressTable.find(src); + int result = xclSyncBO(bo.first, XCL_BO_SYNC_BO_FROM_DEVICE, size, skip); + if (result) + return result; + std::memcpy(dest, bo.second + skip, size); + return size; } + AwsXcl *AwsXcl::handleCheck(void *handle) { // Sanity checks if (!handle) @@ -520,46 +479,37 @@ namespace awsbwhal { } unsigned AwsXcl::xclProbe() { -#if 0 - char file_name_buf[128]; - unsigned i = 0; - for (i = 0; i < 16; i++) { -#if defined(INTERNAL_TESTING) - std::sprintf((char *)&file_name_buf, "/dev/awsmgmt%d", i); -#elif defined(AWS_EDMA) - std::sprintf((char *)&file_name_buf, "/dev/edma%u_queue0", i); -#else - std::sprintf((char *)&file_name_buf, "/dev/xdma%u_user", i); + std::lock_guard lock(awsbwhal::deviceListMutex); + if(xcldev::pci_device_scanner::device_list.size() == 0) { + xcldev::pci_device_scanner devices; + devices.scan(true); + } + + unsigned i = 0; +#ifdef INTERNAL_TESTING + char file_name_buf[128]; + for (i = 0; i < 16; i++) { + std::sprintf((char *)&file_name_buf, "/dev/awsmgmt%d", i); + int fd = open(file_name_buf, O_RDWR); + if (fd < 0) { + return i; + } + close(fd); + } + if (i != xcldev::pci_device_scanner::device_list.size()) { + std::cout << "ERROR xclProbe: Num of FPGA userPF device do not match num of mgmtPF devices" << std::endl; + std::cout << "ERROR xclProbe: Num of userPF, mgmtPF devices = " << std::dec << xcldev::pci_device_scanner::device_list.size() << std::dec << i << std::endl; + return 0; + } #endif - int fd = open(file_name_buf, O_RDWR); - if (fd < 0) { - return i; - } - close(fd); - } + i = xcldev::pci_device_scanner::device_list.size(); + #ifndef INTERNAL_TESTING - if (fpga_mgmt_init() || fpga_pci_init() ) { - std::cout << "xclProbe failed to initialized fpga libraries" << std::endl; - return 0; - } - std::cout << "xclProbe found " << i << "FPGA slots with EDMA driver running" << std::endl; + std::cout << "xclProbe found " << std::dec << i << " FPGA slots with xocl driver running" << std::endl; #else - std::cout << "xclProbe found " << i << "FPGA slots with baremetal driver running" << std::endl; -#endif - return i; + std::cout << "xclProbe found " << std::dec << i << " FPGA slots with awsmgmt & xocl driver running" << std::endl; #endif - } - - void AwsXcl::initMemoryManager() - { - if (!mDeviceInfo.mDDRBankCount) - return; - const uint64_t bankSize = mDeviceInfo.mDDRSize / mDeviceInfo.mDDRBankCount; - uint64_t start = 0x0; - for (unsigned i = 0; i < mDeviceInfo.mDDRBankCount; i++) { - mDDRMemoryManager.push_back(new MemoryManager(bankSize, start, DDR_BUFFER_ALIGNMENT)); - start += bankSize; - } + return i; } AwsXcl::~AwsXcl() @@ -574,124 +524,158 @@ namespace awsbwhal { if (mMgtHandle > 0) close(mMgtHandle); #else - if (ocl_kernel_bar > PCI_BAR_HANDLE_INIT) +//# error "INTERNAL_TESTING macro disabled. AMZN code goes here. " + if (ocl_kernel_bar >=0) fpga_pci_detach(ocl_kernel_bar); - if (ocl_global_mem_bar > PCI_BAR_HANDLE_INIT) + if (ocl_global_mem_bar>=0) fpga_pci_detach(ocl_global_mem_bar); - if (sda_mgmt_bar > PCI_BAR_HANDLE_INIT) + if (sda_mgmt_bar>=0) fpga_pci_detach(sda_mgmt_bar); - ocl_kernel_bar = PCI_BAR_HANDLE_INIT; - ocl_global_mem_bar = PCI_BAR_HANDLE_INIT; - sda_mgmt_bar = PCI_BAR_HANDLE_INIT; + ocl_kernel_bar = -1; + ocl_global_mem_bar = -1; + sda_mgmt_bar = -1; #endif - delete mDataMover; - - for (auto i : mDDRMemoryManager) { - delete i; - } if (mLogStream.is_open()) { mLogStream << __func__ << ", " << std::this_thread::get_id() << std::endl; mLogStream.close(); } + + fpga_mgmt_close(); } AwsXcl::AwsXcl(unsigned index, const char *logfileName, xclVerbosityLevel verbosity) : mTag(TAG), mBoardNumber(index), - mDMADeviceNodeNumber(deviceList[index]), - maxDMASize(0xfa0000), - mLocked(false), - mOffsets{0x0, 0x0, 0x0, 0x0}, - mOclRegionProfilingNumberSlots(XPAR_AXI_PERF_MON_2_NUMBER_SLOTS), - m4DDR(true) + maxDMASize(0xfa0000), + mLocked(false), + mOffsets{0x0, 0x0, 0x0, 0x0}, + mOclRegionProfilingNumberSlots(XPAR_AXI_PERF_MON_2_NUMBER_SLOTS) { - int slot_id = mBoardNumber; - mDataMover = new DataMover(mDMADeviceNodeNumber, 4 /* 1 channel each dir */); - if (logfileName && (logfileName[0] != '\0')) { - mLogStream.open(logfileName); - mLogStream << "FUNCTION, THREAD ID, ARG..." << std::endl; - mLogStream << __func__ << ", " << std::this_thread::get_id() << std::endl; +#ifndef INTERNAL_TESTING + loadDefaultAfiIfCleared(); +#endif + const std::string devName = "/dev/dri/renderD" + std::to_string(xcldev::pci_device_scanner::device_list[mBoardNumber].user_instance); +#ifndef INTERNAL_TESTING + mUserHandle = open(devName.c_str(), O_RDWR); + if(mUserHandle <= 0) { + std::cout << "WARNING: AwsXcl - Cannot open userPF: " << devName << std::endl; } +#endif + + fpga_mgmt_init(); #ifdef INTERNAL_TESTING - char file_name_buf[128]; - std::sprintf((char *)&file_name_buf, "/dev/xdma%d_user", mDMADeviceNodeNumber); - mUserHandle = open(file_name_buf, O_RDWR | O_SYNC); - mUserMap = (char *)mmap(0, MMAP_SIZE_USER, PROT_READ | PROT_WRITE, MAP_SHARED, mUserHandle, 0); - if (mUserMap == MAP_FAILED) { - close(mUserHandle); - mUserHandle = -1; + if(mUserHandle > 0) { + mUserMap = (char *)mmap(0, MMAP_SIZE_USER, PROT_READ | PROT_WRITE, MAP_SHARED, mUserHandle, 0); + if (mUserMap == MAP_FAILED) { + std::cout << "Map failed: " << devName << std::endl; + close(mUserHandle); + mUserHandle = -1; + } } + char file_name_buf[128]; std::fill(&file_name_buf[0], &file_name_buf[0] + 128, 0); std::sprintf((char *)&file_name_buf, "/dev/awsmgmt%d", mBoardNumber); mMgtHandle = open(file_name_buf, O_RDWR | O_SYNC); - if (xclGetDeviceInfo2(&mDeviceInfo)) { - close(mUserHandle); - mUserHandle = -1; + if(mMgtHandle > 0) { + if (xclGetDeviceInfo2(&mDeviceInfo)) { + close(mMgtHandle); + mMgtHandle = -1; + } + } else { + std::cout << "Cannot open mgmtPF: " << devName << std::endl; } #else - ocl_kernel_bar = PCI_BAR_HANDLE_INIT; - ocl_global_mem_bar = PCI_BAR_HANDLE_INIT; - sda_mgmt_bar = PCI_BAR_HANDLE_INIT; + int slot_id = mBoardNumber; + ocl_kernel_bar = -1; + ocl_global_mem_bar = -1; + sda_mgmt_bar = -1; if (xclGetDeviceInfo2(&mDeviceInfo)) { - // print error; - } - else - if (fpga_pci_attach(slot_id, FPGA_APP_PF, APP_PF_BAR0, 0, &ocl_kernel_bar) ) { - ocl_kernel_bar = PCI_BAR_HANDLE_INIT; - // print error - } - else - if (fpga_pci_attach(slot_id, FPGA_APP_PF, APP_PF_BAR4, 0, &ocl_global_mem_bar) ) { - fpga_pci_detach(ocl_kernel_bar); - ocl_kernel_bar = PCI_BAR_HANDLE_INIT; - ocl_global_mem_bar = PCI_BAR_HANDLE_INIT; - sda_mgmt_bar = PCI_BAR_HANDLE_INIT; - // print error - } - else - if (fpga_pci_attach(slot_id, FPGA_MGMT_PF, MGMT_PF_BAR4, 0, &sda_mgmt_bar) ) { - // print error + std::cout << "ERROR AwsXcl: DeviceInfo failed for slot# " << std::dec << slot_id << std::endl; + } else if (fpga_pci_attach(slot_id, FPGA_APP_PF, APP_PF_BAR0, 0, &ocl_kernel_bar) ) { + ocl_kernel_bar = -1; + std::cout << "ERROR AwsXcl: PCI kernel bar attach failed for slot# " << std::dec << slot_id << std::endl; + } else if (fpga_pci_attach(slot_id, FPGA_APP_PF, APP_PF_BAR4, 0, &ocl_global_mem_bar) ) { + fpga_pci_detach(ocl_kernel_bar); + ocl_kernel_bar = -1; + ocl_global_mem_bar = -1; + sda_mgmt_bar = -1; + std::cout << "ERROR AwsXcl: PCI global bar attach failed for slot# " << std::dec << slot_id << std::endl; + } else if (fpga_pci_attach(slot_id, FPGA_MGMT_PF, MGMT_PF_BAR4, 0, &sda_mgmt_bar) ) { fpga_pci_detach(ocl_kernel_bar); fpga_pci_detach(ocl_global_mem_bar); - ocl_kernel_bar = PCI_BAR_HANDLE_INIT; - ocl_global_mem_bar = PCI_BAR_HANDLE_INIT; - sda_mgmt_bar = PCI_BAR_HANDLE_INIT; - } + ocl_kernel_bar = -1; + ocl_global_mem_bar = -1; + sda_mgmt_bar = -1; + std::cout << "ERROR AwsXcl: PCI mgmt bar attach failed for slot# " << std::dec << slot_id << std::endl; + } #endif - initMemoryManager(); + + // + // Profiling - defaults + // Class-level defaults: mIsDebugIpLayoutRead = mIsDeviceProfiling = false + mDevUserName = xcldev::pci_device_scanner::device_list[mBoardNumber].user_name; + mMemoryProfilingNumberSlots = 0; + mPerfMonFifoCtrlBaseAddress = 0x00; + mPerfMonFifoReadBaseAddress = 0x00; + // + // Profiling - defaults + // Class-level defaults: mIsDebugIpLayoutRead = mIsDeviceProfiling = false + mDevUserName = xcldev::pci_device_scanner::device_list[mBoardNumber].user_name; + mMemoryProfilingNumberSlots = 0; + mPerfMonFifoCtrlBaseAddress = 0x00; + mPerfMonFifoReadBaseAddress = 0x00; + + // + // Profiling - defaults + // Class-level defaults: mIsDebugIpLayoutRead = mIsDeviceProfiling = false + mDevUserName = xcldev::pci_device_scanner::device_list[mBoardNumber].user_name; + mMemoryProfilingNumberSlots = 0; + mPerfMonFifoCtrlBaseAddress = 0x00; + mPerfMonFifoReadBaseAddress = 0x00; } bool AwsXcl::isGood() const { - if (!mDataMover) - return false; #ifdef INTERNAL_TESTING - if (mUserHandle < 0) + if (mUserHandle < 0) { + std::cout << "AwsXcl: Bad handle. No userPF Handle" << std::endl; return false; - if (mMgtHandle < 0) + } + if (mMgtHandle < 0) { + std::cout << "AwsXcl: Bad handle. No mgmtPF Handle" << std::endl; return false; + } #else - if (ocl_kernel_bar < 0) - return false; - if (ocl_global_mem_bar < 0) - return false; - if (sda_mgmt_bar < 0) + if (ocl_kernel_bar < 0) { + std::cout << "WARNING: AwsXcl isGood: kernel, global & mgmt bar are: " << std::hex << ocl_kernel_bar << ", " << std::hex << ocl_global_mem_bar << ", " << sda_mgmt_bar << std::endl; + return false; + } + if (ocl_global_mem_bar < 0) { + std::cout << "WARNING: AwsXcl isGood: kernel, global & mgmt bar are: " << std::hex << ocl_kernel_bar << ", " << std::hex << ocl_global_mem_bar << ", " << sda_mgmt_bar << std::endl; + return false; + } + if (sda_mgmt_bar < 0) { + std::cout << "WARNING: AwsXcl isGood: kernel, global & mgmt bar are: " << std::hex << ocl_kernel_bar << ", " << std::hex << ocl_global_mem_bar << ", " << sda_mgmt_bar << std::endl; + return false; + } + if (mUserHandle <= 0) { + std::cout << "WARNING: AwsXcl isGood: invalid user handle." << std::endl; return false; + } #endif - return mDataMover->isGood(); - // TODO: Add sanity check for card state + return true; } - int AwsXcl::pcieBarRead(unsigned int pf_bar, unsigned long long offset, void* buffer, unsigned long long length) { - const char *mem = 0; + int AwsXcl::pcieBarRead(int bar_num, unsigned long long offset, void* buffer, unsigned long long length) { char *qBuf = (char *)buffer; - switch (pf_bar) { + switch (bar_num) { #ifdef INTERNAL_TESTING + const char *mem = 0; case 0: { if ((length + offset) > MMAP_SIZE_USER) { @@ -714,7 +698,7 @@ namespace awsbwhal { #ifdef INTERNAL_TESTING *(unsigned *)qBuf = *(unsigned *)(mem + offset); #else - fpga_pci_peek(ocl_kernel_bar, (uint64_t)offset,(uint32_t*)qBuf); + fpga_pci_peek(ocl_kernel_bar, (uint64_t)offset,(uint32_t*)qBuf); #endif offset += 4; qBuf += 4; @@ -725,7 +709,7 @@ namespace awsbwhal { *qBuf = *(mem + offset); #else - // TODO - add support for sub 4-byte read in AWS platform + // TODO - add support for sub 4-byte read in AWS platform #endif offset++; qBuf++; @@ -734,210 +718,197 @@ namespace awsbwhal { // std::memcpy(buffer, mem + offset, length); return 0; - } + } - int AwsXcl::pcieBarWrite(unsigned int pf_bar, unsigned long long offset, const void* buffer, - unsigned long long length) { - char *mem = 0; - char *qBuf = (char *)buffer; - switch (pf_bar) { + int AwsXcl::pcieBarWrite(int bar_num, unsigned long long offset, const void* buffer, unsigned long long length) { + char *qBuf = (char *)buffer; + switch (bar_num) { #ifdef INTERNAL_TESTING - case ACCELERATOR_BAR: - { - if ((length + offset) > MMAP_SIZE_USER) { - return -1; - } - mem = mUserMap; - break; - } + char *mem = 0; + case 0: + { + if ((length + offset) > MMAP_SIZE_USER) { + return -1; + } + mem = mUserMap; #else - case APP_PF_BAR0: - { - - break; - } + case APP_PF_BAR0: + { #endif - default: - { - return -1; - } - } + break; + } + default: + { + return -1; + } + } - while (length >= 4) { + while (length >= 4) { #ifdef INTERNAL_TESTING - *(unsigned *)(mem + offset) = *(unsigned *)qBuf; + *(unsigned *)(mem + offset) = *(unsigned *)qBuf; #else - fpga_pci_poke(ocl_kernel_bar, uint64_t (offset), *((uint32_t*) qBuf)); + fpga_pci_poke(ocl_kernel_bar, uint64_t (offset), *((uint32_t*) qBuf)); #endif - offset += 4; - qBuf += 4; - length -= 4; - } - while (length) { + offset += 4; + qBuf += 4; + length -= 4; + } + while (length) { #ifdef INTERNEL_TESTING - *(mem + offset) = *qBuf; + *(mem + offset) = *qBuf; #else - std::cout << "xclWrite - unsupported write with length not multiple of 4-bytes" << std::endl; + std::cout << "xclWrite - unsupported write with length not multiple of 4-bytes" << std::endl; #endif - offset++; - qBuf++; - length--; - } - return 0; + offset++; + qBuf++; + length--; } - bool AwsXcl::zeroOutDDR() - { - // Zero out the FPGA external DRAM Content so memory controller - // will not complain about ECC error from memory regions not - // initialized before - // In AWS F1 FPGA, the DRAM is clear before loading new AFI - // hence this API is redundant and will return false to - // make sure developers dont assume it works - - // static const unsigned long long BLOCK_SIZE = 0x4000000; +// std::memcpy(mem + offset, buffer, length); + return 0; + } + + bool AwsXcl::zeroOutDDR() + { + // Zero out the FPGA external DRAM Content so memory controller + // will not complain about ECC error from memory regions not + // initialized before + // In AWS F1 FPGA, the DRAM is clear before loading new AFI + // hence this API is redundant and will return false to + // make sure developers dont assume it works + + // static const unsigned long long BLOCK_SIZE = 0x4000000; // void *buf = 0; // if (posix_memalign(&buf, DDR_BUFFER_ALIGNMENT, BLOCK_SIZE)) // return false; // memset(buf, 0, BLOCK_SIZE); // mDataMover->pset64(buf, BLOCK_SIZE, 0, mDeviceInfo.mDDRSize/BLOCK_SIZE); // free(buf); - return false; - } + return false; + } - /* Locks a given FPGA Slot - * By levering the available lock infrastrucutre for the DMA - * Driver associated with the given FPGA slot - */ - bool AwsXcl::xclLockDevice() - { - if (mDataMover->lock() == false) - return false; + /* Locks a given FPGA Slot + * By levering the available lock infrastrucutre for the DMA + * Driver associated with the given FPGA slot + */ + bool AwsXcl::xclLockDevice() + { #ifdef INTERNAL_TESTING - if (flock(mUserHandle, LOCK_EX | LOCK_NB) == -1) { - mDataMover->unlock(); - return false; - } #else // FIXME: do we need to flock the ocl_kernel interface as well ? // #endif - mLocked = true; + mLocked = true; // return zeroOutDDR(); - return true; - } + return true; + } - const std::string AwsXcl::getDSAName(unsigned short deviceId, unsigned short subsystemId) - { - // Hard coded to AWS DSA name - return "xilinx:aws-vu9p-f1:4ddr-xpr-2pr:4.0"; - } + std::string AwsXcl::getDSAName(unsigned short deviceId, unsigned short subsystemId) + { + std::string dsa("xilinx_aws-vu9p-f1-04261818_dynamic_5_0"); + return dsa; + } - int AwsXcl::xclGetDeviceInfo2(xclDeviceInfo2 *info) - { - std::memset(info, 0, sizeof(xclDeviceInfo2)); - info->mMagic = 0X586C0C6C; - info->mHALMajorVersion = XCLHAL_MAJOR_VER; - info->mHALMajorVersion = XCLHAL_MINOR_VER; - info->mMinTransferSize = DDR_BUFFER_ALIGNMENT; - info->mDMAThreads = mDataMover->channelCount(); + int AwsXcl::xclGetDeviceInfo2(xclDeviceInfo2 *info) + { + std::memset(info, 0, sizeof(xclDeviceInfo2)); + info->mMagic = 0X586C0C6C; + info->mHALMajorVersion = XCLHAL_MAJOR_VER; + info->mHALMajorVersion = XCLHAL_MINOR_VER; + info->mMinTransferSize = DDR_BUFFER_ALIGNMENT; + info->mDMAThreads = 4;//AWS has four threads. Others have only two threads #ifdef INTERNAL_TESTING - xdma_ioc_info obj = {{0X586C0C6C, XDMA_IOCINFO}}; - /* Calling the underlying DMA driver to extract - * DMA specific configuration - * A non-zero value reprent at error - */ - int ret = ioctl(mUserHandle, XDMA_IOCINFO, &obj); - // Log the return value for further debug - if (ret) - return ret; - - awsmgmt_ioc_info mgmt_info_obj; - ret = ioctl(mMgtHandle, AWSMGMT_IOCINFO, &mgmt_info_obj); - if (ret) - return ret; - - for (int i = 0; i < 4 ; ++i) { - info->mOCLFrequency[i] = mgmt_info_obj.ocl_frequency[i]; - } - info->mVendorId = obj.vendor; - info->mDeviceId = obj.device; - info->mSubsystemId = obj.subsystem_device; - info->mSubsystemVendorId = obj.subsystem_vendor; - info->mDeviceVersion = obj.subsystem_device & 0x00ff; - info->mPCIeLinkWidth = mgmt_info_obj.pcie_link_width; - info->mPCIeLinkSpeed = mgmt_info_obj.pcie_link_speed; + /* Sarab disabling xdma ioctl + xdma_ioc_info obj = {{0X586C0C6C, XDMA_IOCINFO}}; + /--* Calling the underlying DMA driver to extract + * DMA specific configuration + * A non-zero value reprent at error + *--/ + int ret = ioctl(mUserHandle, XDMA_IOCINFO, &obj); + // Log the return value for further debug + if (ret) + return ret; + info->mVendorId = obj.vendor; + info->mDeviceId = obj.device; + info->mSubsystemId = obj.subsystem_device; + info->mSubsystemVendorId = obj.subsystem_vendor; + info->mDeviceVersion = obj.subsystem_device & 0x00ff; + */ + awsmgmt_ioc_info mgmt_info_obj; + int ret = ioctl(mMgtHandle, AWSMGMT_IOCINFO, &mgmt_info_obj); + if (ret) + return ret; + + info->mVendorId = mgmt_info_obj.vendor; + info->mDeviceId = mgmt_info_obj.device; + info->mSubsystemId = mgmt_info_obj.subsystem_device; + info->mSubsystemVendorId = mgmt_info_obj.subsystem_vendor; + info->mDeviceVersion = mgmt_info_obj.subsystem_device & 0x00ff; + info->mPCIeLinkWidth = mgmt_info_obj.pcie_link_width; + info->mPCIeLinkSpeed = mgmt_info_obj.pcie_link_speed; + for (int i = 0; i < AWSMGMT_NUM_SUPPORTED_CLOCKS; ++i) { + info->mOCLFrequency[i] = mgmt_info_obj.ocl_frequency[i]; + } + info->mMigCalib = true; + for (int i = 0; i < 4; i++) { + info->mMigCalib = info->mMigCalib && mgmt_info_obj.mig_calibration[i]; + } #else - struct fpga_slot_spec slot_info; - fpga_pci_get_slot_spec(mBoardNumber, &slot_info); - info->mVendorId = slot_info.map[FPGA_APP_PF].vendor_id; - info->mDeviceId = slot_info.map[FPGA_APP_PF].device_id; -// FIXME - update next 3 variables -// info->mSubsystemId = 0; - info->mSubsystemVendorId = 0; - info->mDeviceVersion = 0; - - for (int i = 0; i < 4 ; ++i) { - info->mOCLFrequency[i] = 0; - } - info->mPCIeLinkWidth = 16;// PCIe Gen3 x16 bus - info->mPCIeLinkSpeed = 8; // 8Gbps Gen3 in AWS F1 + struct fpga_slot_spec slot_info; + //fpga_pci_get_slot_spec(mBoardNumber,FPGA_APP_PF, &slot_info); + fpga_pci_get_slot_spec(mBoardNumber, &slot_info); + info->mVendorId = slot_info.map[0].vendor_id; + info->mDeviceId = slot_info.map[0].device_id; + // FIXME - update next 3 variables + info->mSubsystemId = slot_info.map[0].subsystem_device_id; + info->mSubsystemVendorId = slot_info.map[0].subsystem_vendor_id; + info->mDeviceVersion = 0; + info->mPCIeLinkWidth = 16; + info->mPCIeLinkSpeed = 8000; + fpga_mgmt_image_info imageInfo; + fpga_mgmt_describe_local_image( mBoardNumber, &imageInfo, 0 ); + for (int i = 0; i < AWSMGMT_NUM_SUPPORTED_CLOCKS; ++i) { + info->mOCLFrequency[i] = imageInfo.metrics.clocks[i].frequency[0] / 1000000; + } + info->mMigCalib = true; #endif - - // F1 has 16 GiB per channel - info->mDDRSize = 0x400000000 * 4; - info->mDataAlignment = DDR_BUFFER_ALIGNMENT; - info->mNumClocks = 4; - // Number of available channels - // TODO: add support for other FPGA configurations with less - // than 4 DRAM channels - info->mDDRBankCount = 4; - - for (auto i : mDDRMemoryManager) { - info->mDDRFreeSize += i->freeSize(); - } - - const std::string deviceName = getDSAName(info->mDeviceId, info->mSubsystemId); - if (mLogStream.is_open()) + // F1 has 16 GiB per channel + info->mDDRSize = 0x400000000 * 4; + info->mDataAlignment = DDR_BUFFER_ALIGNMENT; + info->mNumClocks = AWSMGMT_NUM_ACTUAL_CLOCKS; + // Number of available channels + // TODO: add support for other FPGA configurations with less + // than 4 DRAM channels + info->mDDRBankCount = 4; + + const std::string deviceName = getDSAName(info->mDeviceId, info->mSubsystemId); + if (mLogStream.is_open()) mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << deviceName << std::endl; - std::size_t length = deviceName.copy(info->mName, deviceName.length(),0); - info->mName[length] = '\0'; + std::size_t length = deviceName.copy(info->mName, deviceName.length(),0); + info->mName[length] = '\0'; - if (mLogStream.is_open()) { - mLogStream << __func__ << ": name=" << deviceName << ", version=0x" << std::hex << info->mDeviceVersion - << ", clock freq=" << std::dec << info->mOCLFrequency[0] - << ", clock freq 2=" << std::dec << info->mOCLFrequency[1] << std::endl; - } - - info->mMigCalib = true; - for (int i = 0; i < 4; i++) { -#ifdef INTERNAL_TEST - info->mMigCalib = info->mMigCalib && mgmt_info_obj.mig_calibration[i]; -#else - info->mMigCalib = 1; -#endif - } - //TODO: Umang - info->mOnChipTemp = 25; - info->mFanTemp = 0; - info->mVInt = 0.9; - info->mVAux = 0.9; - info->mVBram = 0.9; - return 0; + if (mLogStream.is_open()) { + mLogStream << __func__ << ": name=" << deviceName << ", version=0x" << std::hex << info->mDeviceVersion + << ", clock freq=" << std::dec << info->mOCLFrequency[0] + << ", clock freq 2=" << std::dec << info->mOCLFrequency[1] << std::endl; } - int AwsXcl::resetDevice(xclResetKind kind) { - for (auto i : mDDRMemoryManager) { - i->reset(); - } + info->mOnChipTemp = 25; + info->mFanTemp = 0; + info->mVInt = 0.9; + info->mVAux = 0.9; + info->mVBram = 0.9; + return 0; + } + + int AwsXcl::resetDevice(xclResetKind kind) { - // Call a new IOCTL to just reset the OCL region - // TODO : umang + // Call a new IOCTL to just reset the OCL region // if (kind == XCL_RESET_FULL) { // xdma_ioc_base obj = {0X586C0C6C, XDMA_IOCHOTRESET}; // return ioctl(mUserHandle, XDMA_IOCHOTRESET, &obj); @@ -948,230 +919,624 @@ namespace awsbwhal { // } // return -EINVAL; - // AWS FIXME - add reset - return 0; - } + // AWS FIXME - add reset + return 0; + } - int AwsXcl::xclReClock2(unsigned short region, const unsigned short *targetFreqMHz) - { -#ifdef INTERNAL_TESTING - awsmgmt_ioc_freqscaling obj = {0, targetFreqMHz[0], targetFreqMHz[1], 0, 0}; + int AwsXcl::xclReClock2(unsigned short region, const unsigned short *targetFreqMHz) + { + #ifdef INTERNAL_TESTING + awsmgmt_ioc_freqscaling obj = {0, targetFreqMHz[0], targetFreqMHz[1], targetFreqMHz[2], 0}; return ioctl(mMgtHandle, AWSMGMT_IOCFREQSCALING, &obj); -#else + #else // # error "INTERNAL_TESTING macro disabled. AMZN code goes here. " // # This API is not supported in AWS, the frequencies are set per AFI - return 0; -#endif - } + return 0; + #endif } - - xclDeviceHandle xclOpen(unsigned index, const char *logfileName, xclVerbosityLevel level) + ssize_t AwsXcl::xclUnmgdPwrite(unsigned flags, const void *buf, size_t count, uint64_t offset) { - if (index >= awsbwhal::deviceList.size()) - return 0; - awsbwhal::AwsXcl *handle = new awsbwhal::AwsXcl(index, logfileName, level); - if (!awsbwhal::AwsXcl::handleCheck(handle)) { - delete handle; - handle = 0; - } - return (xclDeviceHandle *)handle; + if (flags) + return -EINVAL; + drm_xocl_pwrite_unmgd unmgd = {0, 0, offset, count, reinterpret_cast(buf)}; + return ioctl(mUserHandle, DRM_IOCTL_XOCL_PWRITE_UNMGD, &unmgd); } - void xclClose(xclDeviceHandle handle) + ssize_t AwsXcl::xclUnmgdPread(unsigned flags, void *buf, size_t count, uint64_t offset) { - if (awsbwhal::AwsXcl::handleCheck(handle)) { - delete ((awsbwhal::AwsXcl *)handle); - } + if (flags) + return -EINVAL; + drm_xocl_pread_unmgd unmgd = {0, 0, offset, count, reinterpret_cast(buf)}; + return ioctl(mUserHandle, DRM_IOCTL_XOCL_PREAD_UNMGD, &unmgd); } - int xclGetDeviceInfo2(xclDeviceHandle handle, xclDeviceInfo2 *info) + int AwsXcl::xclExportBO(unsigned int boHandle) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclGetDeviceInfo2(info); + drm_prime_handle info = {boHandle, 0, -1}; + int result = ioctl(mUserHandle, DRM_IOCTL_PRIME_HANDLE_TO_FD, &info); + return !result ? info.fd : result; } - int xclLoadXclBin(xclDeviceHandle handle, const xclBin *buffer) + unsigned int AwsXcl::xclImportBO(int fd, unsigned flags) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclLoadXclBin(buffer); + + /*Sarab + drm_xocl_userptr_bo user = {reinterpret_cast(userptr), size, mNullBO, flags}; + int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_USERPTR_BO, &user); + + */ + + + drm_prime_handle info = {mNullBO, flags, fd}; + int result = ioctl(mUserHandle, DRM_IOCTL_PRIME_FD_TO_HANDLE, &info); + if (result) { + std::cout << __func__ << " ERROR: FD to handle IOCTL failed" << std::endl; + } + return !result ? info.handle : mNullBO; } - size_t xclWrite(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, const void *hostBuf, size_t size) + int AwsXcl::xclGetBOProperties(unsigned int boHandle, xclBOProperties *properties) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclWrite(space, offset, hostBuf, size); + drm_xocl_info_bo info = {boHandle, 0, 0, 0}; + int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_INFO_BO, &info); + properties->handle = info.handle; + properties->flags = info.flags; + properties->size = info.size; + properties->paddr = info.paddr; + properties->domain = XCL_BO_DEVICE_RAM; // currently all BO domains are XCL_BO_DEVICE_RAM + return result ? mNullBO : 0; } - size_t xclRead(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, void *hostBuf, size_t size) + bool AwsXcl::xclUnlockDevice() { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclRead(space, offset, hostBuf, size); + flock(mUserHandle, LOCK_UN); + mLocked = false; + return true; } - - uint64_t xclAllocDeviceBuffer(xclDeviceHandle handle, size_t size) + // Assume that the memory is always + // created for the device ddr for now. Ignoring the flags as well. + unsigned int AwsXcl::xclAllocBO(size_t size, xclBOKind domain, unsigned flags) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclAllocDeviceBuffer(size); + drm_xocl_create_bo info = {size, mNullBO, flags}; + int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_CREATE_BO, &info); + if (result) { + std::cout << __func__ << " ERROR: AllocBO IOCTL failed" << std::endl; + } + return result ? mNullBO : info.handle; } - - uint64_t xclAllocDeviceBuffer2(xclDeviceHandle handle, size_t size, xclMemoryDomains domain, - unsigned flags) + unsigned int AwsXcl::xclAllocUserPtrBO(void *userptr, size_t size, unsigned flags) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclAllocDeviceBuffer2(size, domain, flags); + drm_xocl_userptr_bo user = {reinterpret_cast(userptr), size, mNullBO, flags}; + int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_USERPTR_BO, &user); + return result ? mNullBO : user.handle; } - - void xclFreeDeviceBuffer(xclDeviceHandle handle, uint64_t buf) + void AwsXcl::xclFreeBO(unsigned int boHandle) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return; - return drv->xclFreeDeviceBuffer(buf); + drm_gem_close closeInfo = {boHandle, 0}; + ioctl(mUserHandle, DRM_IOCTL_GEM_CLOSE, &closeInfo); } - - size_t xclCopyBufferHost2Device(xclDeviceHandle handle, uint64_t dest, const void *src, size_t size, size_t seek) + int AwsXcl::xclWriteBO(unsigned int boHandle, const void *src, size_t size, size_t seek) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclCopyBufferHost2Device(dest, src, size, seek); + drm_xocl_pwrite_bo pwriteInfo = { boHandle, 0, seek, size, reinterpret_cast(src) }; + return ioctl(mUserHandle, DRM_IOCTL_XOCL_PWRITE_BO, &pwriteInfo); } - - size_t xclCopyBufferDevice2Host(xclDeviceHandle handle, void *dest, uint64_t src, size_t size, size_t skip) + int AwsXcl::xclReadBO(unsigned int boHandle, void *dst, size_t size, size_t skip) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclCopyBufferDevice2Host(dest, src, size, skip); + drm_xocl_pread_bo preadInfo = { boHandle, 0, skip, size, reinterpret_cast(dst) }; + return ioctl(mUserHandle, DRM_IOCTL_XOCL_PREAD_BO, &preadInfo); } - -//This will be deprecated. - int xclUpgradeFirmware(xclDeviceHandle handle, const char *fileName) + void *AwsXcl::xclMapBO(unsigned int boHandle, bool write) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return xclUpgradeFirmware2(handle, fileName, 0); + drm_xocl_info_bo info = { boHandle, 0, 0 }; + int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_INFO_BO, &info); + if (result) + return nullptr; + + drm_xocl_map_bo mapInfo = { boHandle, 0, 0 }; + result = ioctl(mUserHandle, DRM_IOCTL_XOCL_MAP_BO, &mapInfo); + if (result) + return nullptr; + + return mmap(0, info.size, (write ? (PROT_READ|PROT_WRITE) : PROT_READ), + MAP_SHARED, mUserHandle, mapInfo.offset); } - int xclUpgradeFirmware2(xclDeviceHandle handle, const char *fileName1, const char* fileName2) + int AwsXcl::xclSyncBO(unsigned int boHandle, xclBOSyncDirection dir, + size_t size, size_t offset) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return -ENOSYS; + drm_xocl_sync_bo_dir drm_dir = (dir == XCL_BO_SYNC_BO_TO_DEVICE) ? + DRM_XOCL_SYNC_BO_TO_DEVICE : + DRM_XOCL_SYNC_BO_FROM_DEVICE; + drm_xocl_sync_bo syncInfo = {boHandle, 0, size, offset, drm_dir}; + return ioctl(mUserHandle, DRM_IOCTL_XOCL_SYNC_BO, &syncInfo); } - int xclBootFPGA(xclDeviceHandle handle) +#ifndef INTERNAL_TESTING + int AwsXcl::loadDefaultAfiIfCleared( void ) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return -ENOSYS; + int array_len = 16; + fpga_slot_spec spec_array[ array_len ]; + std::memset( spec_array, mBoardNumber, sizeof(fpga_slot_spec) * array_len ); + fpga_pci_get_all_slot_specs( spec_array, array_len ); + if( spec_array[mBoardNumber].map[FPGA_APP_PF].device_id == AWS_UserPF_DEVICE_ID ) { + std::string agfi = DEFAULT_GLOBAL_AFI; + fpga_mgmt_load_local_image( mBoardNumber, const_cast(agfi.c_str()) ); + if( sleepUntilLoaded( agfi ) ) { + std::cout << "ERROR: Sleep until load failed." << std::endl; + return -1; + } + fpga_pci_rescan_slot_app_pfs( mBoardNumber ); + } + return 0; } - unsigned xclProbe() + int AwsXcl::sleepUntilLoaded( const std::string afi ) { - std::lock_guard lock(awsbwhal::deviceListMutex); - if (awsbwhal::deviceList.size()) - return awsbwhal::deviceList.size(); + fpga_mgmt_image_info info; + int max_retries = 20; + int seconds_to_wait = 5; - unsigned i = 0; -#ifndef INTERNAL_TESTING - if (fpga_mgmt_init() || fpga_pci_init() ) { - std::cout << "xclProbe failed to initialized fpga libraries" << std::endl; - return 0; - } - fpga_slot_spec spec_array[16]; - std::memset(spec_array, 0, sizeof(fpga_slot_spec) * 16); - if (fpga_pci_get_all_slot_specs(spec_array, 16)) - return 0; + for( int i = 0; i < max_retries; i++ ) { + // Wait for 10 seconds before checking status + std::this_thread::sleep_for( std::chrono::seconds( seconds_to_wait ) ); + + std::memset( &info, 0, sizeof(struct fpga_mgmt_image_info) ); - unsigned short domain = 0; - unsigned char bus = 0, dev = 0, func = 0; - for (i = 0; i < 16; i++) { - if (spec_array[i].map[FPGA_APP_PF].vendor_id == 0) + // Get describe result in the info object + int result = fpga_mgmt_describe_local_image( mBoardNumber, &info, 0 ); + if( result ) { + std::cout << "ERROR: Load image failed." << std::endl; + return 1; + } + if( (info.status == FPGA_STATUS_LOADED) && !std::strcmp(info.ids.afi_id, const_cast(afi.c_str())) ) { break; + } - domain = spec_array[i].map[FPGA_APP_PF].domain; - bus = spec_array[i].map[FPGA_APP_PF].bus; - dev = spec_array[i].map[FPGA_APP_PF].dev; - func = spec_array[i].map[FPGA_APP_PF].func; + // Increment wait time + seconds_to_wait++; + } - int dmaIndex = awsbwhal::findDMADevice(domain, bus, dev, func); - if (dmaIndex < 0) - break; - awsbwhal::deviceList.push_back(dmaIndex); + // If after the timeout we check once more if our status is LOADED + if( info.status != FPGA_STATUS_LOADED ) { + std::cout << "ERROR: Load image failed after waiting till timeout." << std::endl; + return 1; + } - std::cout << "Device/Slot[" << i << "] (/dev/xdma" << dmaIndex << ", " << std::hex << domain << ":" << (unsigned)bus << ":" << (unsigned)dev << "." << (unsigned)func << std::dec << ')' << std::endl; + // After the AFI is loaded, we check again if the AFI ID is the correct one + if( std::strcmp(info.ids.afi_id, const_cast(afi.c_str())) ) { + std::cout << "ERROR: AFI loaded is not the one we are waiting on." << std::endl; + return 1; } -#else - char file_name_buf[128]; - for (i = 0; i < 16; i++) { - std::sprintf((char *)&file_name_buf, "/dev/awsmgmt%d", i); - int fd = open(file_name_buf, O_RDWR); - if (fd < 0) - break; - awsmgmt_ioc_info info; - if (ioctl(fd, AWSMGMT_IOCINFO, &info)) - break; - close(fd); - int dmaIndex = awsbwhal::findDMADevice(info.domain, info.bus, info.dev, 0); - if (dmaIndex < 0) - break; - awsbwhal::deviceList.push_back(dmaIndex); - std::cout << "Device[" << i << "] (/dev/xdma" << dmaIndex << ", " << std::hex << info.domain << ":" << (unsigned)info.bus << ":" << (unsigned)info.dev << ".0" << std::dec << ')' << std::endl; + + // If we have reached here, things look good + return 0; + } + + int AwsXcl::checkAndSkipReload( char *afi_id, fpga_mgmt_image_info *orig_info ) + { + if( (orig_info->status == FPGA_STATUS_LOADED) && !std::strcmp(orig_info->ids.afi_id, afi_id) ) { + std::cout << "This AFI already loaded. Skip reload!" << std::endl; + int result = 0; + //existing afi matched. + uint16_t status = 0; + result = fpga_mgmt_get_vDIP_status(mBoardNumber, &status); + if(result) { + printf("Error: can not get virtual DIP Switch state\n"); + return result; + } + //Set bit 0 to 1 + status |= (1 << 0); + result = fpga_mgmt_set_vDIP(mBoardNumber, status); + if(result) { + printf("Error trying to set virtual DIP Switch \n"); + return result; + } + std::this_thread::sleep_for(std::chrono::microseconds(250)); + //pulse the changes in. + result = fpga_mgmt_get_vDIP_status(mBoardNumber, &status); + if(result) { + printf("Error: can not get virtual DIP Switch state\n"); + return result; + } + //Set bit 0 to 0 + status &= ~(1 << 0); + result = fpga_mgmt_set_vDIP(mBoardNumber, status); + if(result) { + printf("Error trying to set virtual DIP Switch \n"); + return result; + } + std::this_thread::sleep_for(std::chrono::microseconds(250)); + + printf("Successfully skipped reloading of local image.\n"); + return result; + } else { + std::cout << "AFI not yet loaded, proceed to download." << std::endl; + return 1; } + } #endif +} /* end namespace awsbmhal */ + +xclDeviceHandle xclOpen(unsigned deviceIndex, const char *logFileName, xclVerbosityLevel level) +{ + if(xcldev::pci_device_scanner::device_list.size() <= deviceIndex) { + printf("Cannot find index %d \n", deviceIndex); + return nullptr; + } + + awsbwhal::AwsXcl *handle = new awsbwhal::AwsXcl(deviceIndex, logFileName, level); + if (!awsbwhal::AwsXcl::handleCheck(handle)) { + printf("WARNING: xclOpen Handle check failed\n"); + delete handle; + handle = nullptr; #ifndef INTERNAL_TESTING - std::cout << "xclProbe found " << i << " FPGA slots with XDMA driver running" << std::endl; -#else - std::cout << "xclProbe found " << i << " FPGA slots with baremetal driver running" << std::endl; + /* workaround necessary to load a default afi and program with xclbin when device is in a cleared state */ + xcldev::pci_device_scanner rescan; + rescan.clear_device_list(); + rescan.scan( true ); + for (unsigned int i=0; i(handle); +} + +void xclClose(xclDeviceHandle handle) { + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (drv) + delete drv; +} - int xclResetDevice(xclDeviceHandle handle, xclResetKind kind) +int xclGetDeviceInfo2(xclDeviceHandle handle, xclDeviceInfo2 *info) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return drv->xclGetDeviceInfo2(info); +} + +int xclLoadBitstream(xclDeviceHandle handle, const char *xclBinFileName) +{ + return -ENOSYS; +} + +int xclLoadXclBin(xclDeviceHandle handle, const xclBin *buffer) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return drv->xclLoadXclBin(buffer); +} + +size_t xclWrite(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, const void *hostBuf, size_t size) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return drv->xclWrite(space, offset, hostBuf, size); +} + +size_t xclRead(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, void *hostBuf, size_t size) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return drv->xclRead(space, offset, hostBuf, size); +} + + +uint64_t xclAllocDeviceBuffer(xclDeviceHandle handle, size_t size) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return drv->xclAllocDeviceBuffer(size); +} + + +uint64_t xclAllocDeviceBuffer2(xclDeviceHandle handle, size_t size, xclMemoryDomains domain, + unsigned flags) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return drv->xclAllocDeviceBuffer2(size, domain, flags); +} + + +void xclFreeDeviceBuffer(xclDeviceHandle handle, uint64_t buf) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return; + return drv->xclFreeDeviceBuffer(buf); +} + + +size_t xclCopyBufferHost2Device(xclDeviceHandle handle, uint64_t dest, const void *src, size_t size, size_t seek) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return drv->xclCopyBufferHost2Device(dest, src, size, seek); +} + + +size_t xclCopyBufferDevice2Host(xclDeviceHandle handle, void *dest, uint64_t src, size_t size, size_t skip) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return drv->xclCopyBufferDevice2Host(dest, src, size, skip); +} + + +//This will be deprecated. +int xclUpgradeFirmware(xclDeviceHandle handle, const char *fileName) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return xclUpgradeFirmware2(handle, fileName, 0); +} + +int xclUpgradeFirmware2(xclDeviceHandle handle, const char *fileName1, const char* fileName2) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return -ENOSYS; +} + +/* + * xclBootFPGA + * + * Sequence: + * 1) call boot ioctl + * 2) close the device, unload the driver + * 3) remove and scan + * 4) rescan pci devices + * 5) reload the driver (done by the calling function xcldev::boot()) + * + * Return 0 on success, -1 on failure. + */ +int xclBootFPGA(xclDeviceHandle handle) +{ +// awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); +// if (!drv) +// return -1; +// return -ENOSYS; + int retVal = -1; + + //awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); +// retVal = drv->xclBootFPGA(); // boot ioctl + retVal = 0; // skip boot ioctl since this may not be possible for AWS + + if( retVal == 0 ) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return -ENOSYS; + xclClose(handle); // close the device, unload the driver + retVal = xclRemoveAndScanFPGA(); // remove and scan } - int xclReClock2(xclDeviceHandle handle, unsigned short region, const unsigned short *targetFreqMHz) + if( retVal == 0 ) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclReClock2(region, targetFreqMHz); + xcldev::pci_device_scanner devScanner; + devScanner.scan( true ); // rescan pci devices } + return retVal; +} + +int xclRemoveAndScanFPGA( void ) +{ + const std::string devPath = "/devices/"; + const std::string removePath = "/remove"; + const std::string pciPath = "/sys/bus/pci"; + const std::string rescanPath = "/rescan"; + const char *input = "1\n"; - int xclLockDevice(xclDeviceHandle handle) + // remove devices "echo 1 > /sys/bus/pci/devices//remove" + for (unsigned int i = 0; i < xcldev::pci_device_scanner::device_list.size(); i++) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclLockDevice() ? 0 : -1; + std::string dev_name_pf_user = pciPath + devPath + xcldev::pci_device_scanner::device_list[i].user_name + removePath; + std::string dev_name_pf_mgmt = pciPath + devPath + xcldev::pci_device_scanner::device_list[i].mgmt_name + removePath; + + std::ofstream userFile( dev_name_pf_user ); + if( !userFile.is_open() ) { + perror( dev_name_pf_user.c_str() ); + return errno; + } + userFile << input; + + std::ofstream mgmtFile( dev_name_pf_mgmt ); + if( !mgmtFile.is_open() ) { + perror( dev_name_pf_mgmt.c_str() ); + return errno; + } + mgmtFile << input; + } + + std::this_thread::sleep_for(std::chrono::seconds(1)); + // initiate rescan "echo 1 > /sys/bus/pci/rescan" + std::ofstream rescanFile( pciPath + rescanPath ); + if( !rescanFile.is_open() ) { + perror( std::string( pciPath + rescanPath ).c_str() ); + return errno; } + rescanFile << input; + + return 0; +} + +unsigned xclProbe() +{ + return awsbwhal::AwsXcl::xclProbe(); +} + +int xclResetDevice(xclDeviceHandle handle, xclResetKind kind) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return -ENOSYS; +} + +int xclReClock2(xclDeviceHandle handle, unsigned short region, const unsigned short *targetFreqMHz) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return drv->xclReClock2(region, targetFreqMHz); +} + + +int xclLockDevice(xclDeviceHandle handle) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return drv->xclLockDevice() ? 0 : -1; +} + +//Sarab: Added for HAL2 support with XOCL GEM Driver + +int xclExportBO(xclDeviceHandle handle, unsigned int boHandle) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + return drv ? drv->xclExportBO(boHandle) : -ENODEV; +} + + +unsigned int xclImportBO(xclDeviceHandle handle, int fd, unsigned flags) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) { + std::cout << __func__ << ", " << std::this_thread::get_id() << ", handle & XOCL Device are bad" << std::endl; + } + return drv ? drv->xclImportBO(fd, flags) : -ENODEV; +} + +ssize_t xclUnmgdPwrite(xclDeviceHandle handle, unsigned flags, const void *buf, + size_t count, uint64_t offset) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + return drv ? drv->xclUnmgdPwrite(flags, buf, count, offset) : -ENODEV; +} + +int xclGetBOProperties(xclDeviceHandle handle, unsigned int boHandle, xclBOProperties *properties) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + return drv ? drv->xclGetBOProperties(boHandle, properties) : -ENODEV; +} + +ssize_t xclUnmgdPread(xclDeviceHandle handle, unsigned flags, void *buf, + size_t count, uint64_t offset) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + return drv ? drv->xclUnmgdPread(flags, buf, count, offset) : -ENODEV; +} + +int xclUpgradeFirmwareXSpi(xclDeviceHandle handle, const char *fileName, int index) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return -ENOSYS; + //return drv->xclUpgradeFirmwareXSpi(fileName, index); Not supported by AWS +} + +int xclUnlockDevice(xclDeviceHandle handle) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) { + std::cout << "xclUnlockDevice returning -ENODEV\n"; + return -ENODEV; + } else { + return drv->xclUnlockDevice() ? 0 : 1; + } +} + +unsigned int xclAllocBO(xclDeviceHandle handle, size_t size, xclBOKind domain, unsigned flags) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + return drv ? drv->xclAllocBO(size, domain, flags) : -ENODEV; +} + +unsigned int xclAllocUserPtrBO(xclDeviceHandle handle, void *userptr, size_t size, unsigned flags) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + return drv ? drv->xclAllocUserPtrBO(userptr, size, flags) : -ENODEV; +} + +void xclFreeBO(xclDeviceHandle handle, unsigned int boHandle) { + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return; + drv->xclFreeBO(boHandle); +} + +size_t xclWriteBO(xclDeviceHandle handle, unsigned int boHandle, const void *src, size_t size, + size_t seek) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + return drv ? drv->xclWriteBO(boHandle, src, size, seek) : -ENODEV; +} + +size_t xclReadBO(xclDeviceHandle handle, unsigned int boHandle, void *dst, size_t size, + size_t skip) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + return drv ? drv->xclReadBO(boHandle, dst, size, skip) : -ENODEV; +} + +void *xclMapBO(xclDeviceHandle handle, unsigned int boHandle, bool write) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + return drv ? drv->xclMapBO(boHandle, write) : nullptr; +} + + +int xclSyncBO(xclDeviceHandle handle, unsigned int boHandle, xclBOSyncDirection dir, + size_t size, size_t offset) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + return drv ? drv->xclSyncBO(boHandle, dir, size, offset) : -ENODEV; +} + +unsigned int xclVersion () { + return 2; +} + +int xclGetErrorStatus(xclDeviceHandle handle, xclErrorStatus *info) +{ + awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); + if (!drv) + return -1; + return -ENOSYS; + //return drv->xclGetErrorStatus(info); Not supported for AWS +} + +int xclXbsak(int argc, char *argv[]) +{ + return xcldev::xclXbsak(argc, argv); +} + diff --git a/SDAccel/userspace/src/shim.h b/SDAccel/userspace/src/shim.h index b52d548a..039909a3 100644 --- a/SDAccel/userspace/src/shim.h +++ b/SDAccel/userspace/src/shim.h @@ -17,20 +17,23 @@ * License for the specific language governing permissions and limitations * under the License. */ - #ifndef _XDMA_SHIM_H_ #define _XDMA_SHIM_H_ #include "xclhal.h" #include "xclperf.h" +#include "drm.h" #include #include +#include #include #include #include +#include #ifndef INTERNAL_TESTING #include "fpga_pci.h" +#include "fpga_mgmt.h" #endif // Work around GCC 4.8 + XDMA BAR implementation bugs @@ -43,6 +46,81 @@ #endif namespace awsbwhal { + + +struct AddresRange; + +std::ostream& operator<< (std::ostream &strm, const AddresRange &rng); + +/** + * Simple tuple struct to store non overlapping address ranges: address and size + */ +struct AddresRange : public std::pair { + // size will be zero when we are looking up an address that was passed by the user + AddresRange(uint64_t addr, size_t size = 0) : std::pair(std::make_pair(addr, size)) { + //std::cout << "CTOR(" << addr << ',' << size << ")\n"; + } + AddresRange(AddresRange && rhs) : std::pair(std::move(rhs)) { + //std::cout << "MOVE CTOR(" << rhs.first << ',' << rhs.second << ")\n"; + } + + AddresRange(const AddresRange &rhs) = delete; + AddresRange& operator=(const AddresRange &rhs) = delete; + + // Comparison operator is useful when using AddressRange as a key in std::map + // Note one operand in the comparator may have only the address without the size + // However both operands in the comparator will not have zero size + bool operator < (const AddresRange& other) const { + //std::cout << *this << " < " << other << "\n"; + if ((this->second != 0) && (other.second != 0)) + // regular ranges + return (this->first < other.first); + if (other.second == 0) + // second range just has an address + // (1000, 100) < (1200, 0) + // (1000, 100) < (1100, 0) first range ends at 1099 + return ((this->first + this->second) <= other.first); + assert(this->second == 0); + // this range just has an address + // (1100, 0) < (1200, 100) + return (this->first < other.first); + } +}; + +/** + * Simple map of address range to its bo handle and mapped virtual address + */ +static const std::pair mNullValue = std::make_pair(0xffffffff, nullptr); +class RangeTable { + std::map> mTable; + mutable std::mutex mMutex; +public: + void insert(uint64_t addr, size_t size, std::pair bo) { + // assert(find(addr) == 0xffffffff); + std::lock_guard lock(mMutex); + mTable[AddresRange(addr, size)] = bo; + } + + std::pair erase(uint64_t addr) { + std::lock_guard lock(mMutex); + std::map>::const_iterator i = mTable.find(AddresRange(addr)); + if (i == mTable.end()) + return mNullValue; + std::pair result = i->second; + mTable.erase(i); + return result; + } + + std::pair find(uint64_t addr) const { + std::lock_guard lock(mMutex); + std::map>::const_iterator i = mTable.find(AddresRange(addr)); + if (i == mTable.end()) + return mNullValue; + return i->second; + } +}; + + // Memory alignment for DDR and AXI-MM trace access template class AlignedAllocator { void *mBuffer; @@ -67,8 +145,9 @@ namespace awsbwhal { } }; - class MemoryManager; - class DataMover; + const uint64_t mNullAddr = 0xffffffffffffffffull; + const uint64_t mNullBO = 0xffffffff; + // XDMA Shim class AwsXcl{ @@ -87,14 +166,38 @@ namespace awsbwhal { typedef std::list > PairList; public: + //Sarab: Added for HAL2 XOCL Driver support + //int xclGetErrorStatus(xclErrorStatus *info); Not supported for AWS + bool xclUnlockDevice(); + unsigned int xclAllocBO(size_t size, xclBOKind domain, unsigned flags); + unsigned int xclAllocUserPtrBO(void *userptr, size_t size, unsigned flags); + void xclFreeBO(unsigned int boHandle); + int xclWriteBO(unsigned int boHandle, + const void *src, size_t size, size_t seek); + int xclReadBO(unsigned int boHandle, + void *dst, size_t size, size_t skip); + void *xclMapBO(unsigned int boHandle, bool write); + int xclSyncBO(unsigned int boHandle, xclBOSyncDirection dir, + size_t size, size_t offset); + int xclExportBO(unsigned int boHandle); + unsigned int xclImportBO(int fd, unsigned flags); + int xclGetBOProperties(unsigned int boHandle, xclBOProperties *properties); + ssize_t xclUnmgdPread(unsigned flags, void *buf, + size_t count, uint64_t offset); + ssize_t xclUnmgdPwrite(unsigned flags, const void *buf, + size_t count, uint64_t offset); + // Bitstreams + int xclGetXclBinIdFromSysfs(uint64_t &xclbinid); int xclLoadXclBin(const xclBin *buffer); + int xclLoadAxlf(const axlf *buffer); int xclUpgradeFirmware(const char *fileName); int xclUpgradeFirmware2(const char *file1, const char* file2); - int xclUpgradeFirmwareXSpi(const char *fileName, int device_index=0); + //int xclUpgradeFirmwareXSpi(const char *fileName, int device_index=0); Not supported by AWS int xclTestXSpi(int device_index); int xclBootFPGA(); + int xclRemoveAndScanFPGA(); int resetDevice(xclResetKind kind); int xclReClock2(unsigned short region, const unsigned short *targetFreqMHz); @@ -114,18 +217,19 @@ namespace awsbwhal { double xclGetDeviceClockFreqMHz(); double xclGetReadMaxBandwidthMBps(); double xclGetWriteMaxBandwidthMBps(); - void xclSetOclRegionProfilingNumberSlots(uint32_t numSlots); + //void xclSetOclRegionProfilingNumberSlots(uint32_t numSlots); + void xclSetProfilingNumberSlots(xclPerfMonType type, uint32_t numSlots); size_t xclPerfMonClockTraining(xclPerfMonType type); // Counters size_t xclPerfMonStartCounters(xclPerfMonType type); size_t xclPerfMonStopCounters(xclPerfMonType type); size_t xclPerfMonReadCounters(xclPerfMonType type, xclCounterResults& counterResults); - //debug related - uint64_t getProtocolCheckerBaseAddress(int type); uint32_t getCheckerNumberSlots(int type); + uint32_t getIPCountAddrNames(int type, uint64_t *baseAddress, std::string * portNames); size_t xclDebugReadCounters(xclDebugCountersResults* debugResult); size_t xclDebugReadCheckers(xclDebugCheckersResults* checkerResult); + void readDebugIpLayout(); // Trace size_t xclPerfMonStartTrace(xclPerfMonType type, uint32_t startTrigger); @@ -142,18 +246,18 @@ namespace awsbwhal { return mTag; } bool isGood() const; - bool is4DDR() {return m4DDR;}; ~AwsXcl(); AwsXcl(unsigned index, const char *logfileName, xclVerbosityLevel verbosity); private: - int xclLoadAxlf(const axlf *buffer); + size_t xclReadModifyWrite(uint64_t offset, const void *hostBuf, size_t size); + size_t xclReadSkipCopy(uint64_t offset, void *hostBuf, size_t size); bool zeroOutDDR(); bool isXPR() const { - return true; + return ((mDeviceInfo.mSubsystemId >> 12) == 4); } bool isMultipleOCLClockSupported() { @@ -165,14 +269,10 @@ namespace awsbwhal { bool isUltraScale() const { return (mDeviceInfo.mDeviceId & 0x8000); } - void initMemoryManager(); // Core DMA code - // Upper two bytes denote PF, lower two bytes denote BAR - // USERPF == 0x0 - // MGTPF == 0x10000 - SHIM_O2 int pcieBarRead(unsigned int pf_bar, unsigned long long offset, void* buffer, unsigned long long length); - SHIM_O2 int pcieBarWrite(unsigned int pf_bar, unsigned long long offset, const void* buffer, unsigned long long length); + SHIM_O2 int pcieBarRead(int bar_num, unsigned long long offset, void* buffer, unsigned long long length); + SHIM_O2 int pcieBarWrite(int bar_num, unsigned long long offset, const void* buffer, unsigned long long length); int freezeAXIGate(); int freeAXIGate(); @@ -193,7 +293,6 @@ namespace awsbwhal { bool bulkErase(); bool sectorErase(unsigned Addr); bool writeEnable(); - int setDDRCount(const axlf* buffer); #if 0 bool dataTransfer(bool read); #endif @@ -214,7 +313,7 @@ namespace awsbwhal { bool isDSAVersion(unsigned majorVersion, unsigned minorVersion, bool onlyThisVersion); unsigned getBankCount(); uint64_t getHostTraceTimeNsec(); - uint64_t getPerfMonBaseAddress(xclPerfMonType type); + uint64_t getPerfMonBaseAddress(xclPerfMonType type, uint32_t slotNum); uint64_t getPerfMonFifoBaseAddress(xclPerfMonType type, uint32_t fifonum); uint64_t getPerfMonFifoReadBaseAddress(xclPerfMonType type, uint32_t fifonum); uint32_t getPerfMonNumberSlots(xclPerfMonType type); @@ -230,37 +329,49 @@ namespace awsbwhal { uint32_t bin2dec(const char * str, int start, int number); std::string dec2bin(uint32_t n); std::string dec2bin(uint32_t n, unsigned bits); - static const std::string getDSAName(unsigned short deviceId, unsigned short subsystemId); + static std::string getDSAName(unsigned short deviceId, unsigned short subsystemId); private: // This is a hidden signature of this class and helps in preventing // user errors when incorrect pointers are passed in as handles. const unsigned mTag; const int mBoardNumber; - const int mDMADeviceNodeNumber; const size_t maxDMASize; bool mLocked; const uint64_t mOffsets[XCL_ADDR_SPACE_MAX]; - DataMover *mDataMover; -#ifdef INTERNAL_TESTING int mUserHandle; +#ifdef INTERNAL_TESTING int mMgtHandle; #else - pci_bar_handle_t ocl_kernel_bar; // AppPF BAR0 for OpenCL kernels - pci_bar_handle_t sda_mgmt_bar;; // MgmtPF BAR4, for SDAccel Perf mon etc - pci_bar_handle_t ocl_global_mem_bar; // AppPF BAR4 + pci_bar_handle_t ocl_kernel_bar; // AppPF BAR0 for OpenCL kernels + pci_bar_handle_t sda_mgmt_bar; // MgmtPF BAR4, for SDAccel Perf mon etc + pci_bar_handle_t ocl_global_mem_bar; // AppPF BAR4 #endif + uint32_t mMemoryProfilingNumberSlots; uint32_t mOclRegionProfilingNumberSlots; + std::string mDevUserName; + + // Information extracted from platform linker + bool mIsDebugIpLayoutRead = false; + bool mIsDeviceProfiling = false; + uint64_t mPerfMonFifoCtrlBaseAddress; + uint64_t mPerfMonFifoReadBaseAddress; + uint64_t mPerfMonBaseAddress[XSPM_MAX_NUMBER_SLOTS]; + std::string mPerfMonSlotName[XSPM_MAX_NUMBER_SLOTS]; char *mUserMap; std::ofstream mLogStream; xclVerbosityLevel mVerbosity; std::string mBinfile; ELARecordList mRecordList; - std::vector mDDRMemoryManager; xclDeviceInfo2 mDeviceInfo; - bool m4DDR; + RangeTable mLegacyAddressTable; +#ifndef INTERNAL_TESTING + int sleepUntilLoaded( std::string afi ); + int checkAndSkipReload( char *afi_id, fpga_mgmt_image_info *info ); + int loadDefaultAfiIfCleared( void ); +#endif public: static const unsigned TAG; }; diff --git a/SDAccel/userspace/src/test b/SDAccel/userspace/src/test deleted file mode 100644 index 3bf09b28..00000000 --- a/SDAccel/userspace/src/test +++ /dev/null @@ -1,1862 +0,0 @@ -diff --git a/sdk/userspace/fpga_image_tools/src/Makefile b/sdk/userspace/fpga_image_tools/src/Makefile -index 522c4a5..ab475a8 100644 ---- a/sdk/userspace/fpga_image_tools/src/Makefile -+++ b/sdk/userspace/fpga_image_tools/src/Makefile -@@ -21,7 +21,7 @@ LIB_PATH = $(TOP)/lib - INCLUDES = -I$(FPGAHALINC_PATH) -I$(TOP)/include -I../. -I. - - #OPT=-O2 --CFLAGS=$(OPT) -g -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) -+CFLAGS=$(OPT) -g -Wall -Werror -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) - - LDFLAGS = -L$(LIB_PATH)/so - LDLIBS = -lfpga_mgmt -diff --git a/sdk/userspace/fpga_image_tools/src/fpga_local_cmd.c b/sdk/userspace/fpga_image_tools/src/fpga_local_cmd.c -index aa3a3b1..7ab30fd 100644 ---- a/sdk/userspace/fpga_image_tools/src/fpga_local_cmd.c -+++ b/sdk/userspace/fpga_image_tools/src/fpga_local_cmd.c -@@ -91,7 +91,7 @@ cli_show_slot_app_pfs(int slot_id, struct fpga_slot_spec *spec) - - return 0; - err: -- return -1; -+ return FPGA_ERR_FAIL; - } - - /** -@@ -120,7 +120,7 @@ cli_attach(void) - out: - return 0; - err: -- return -1; -+ return FPGA_ERR_FAIL; - } - - /** -@@ -236,17 +236,17 @@ static int - command_metrics(void) - { - int ret; -- uint32_t i; -+ uint32_t i, flags; - struct fpga_mgmt_image_info info; -+ struct fpga_slot_spec slot_spec; - - memset(&info, 0, sizeof(struct fpga_mgmt_image_info)); - -- // todo: -- // req->fpga_cmd_flags |= (f1.get_hw_metrics) ? FPGA_CMD_GET_HW_METRICS : 0; -- // req->fpga_cmd_flags |= (f1.clear_hw_metrics) ? -- // FPGA_CMD_CLEAR_HW_METRICS : 0; -+ flags = 0; -+ flags |= (f1.get_hw_metrics) ? FPGA_CMD_GET_HW_METRICS : 0; -+ flags |= (f1.clear_hw_metrics) ? FPGA_CMD_CLEAR_HW_METRICS : 0; - -- ret = fpga_mgmt_describe_local_image(f1.afi_slot, &info); -+ ret = fpga_mgmt_describe_local_image(f1.afi_slot, &info, flags); - fail_on(ret, err, "Unable to describe local image"); - - if (f1.show_headers) { -@@ -261,13 +261,15 @@ command_metrics(void) - - if (f1.rescan) { - /** Rescan the application PFs for this slot */ -- ret = fpga_pci_rescan_slot_app_pfs(f1.afi_slot); // todo: implement this in the library -+ ret = fpga_pci_rescan_slot_app_pfs(); - fail_on_quiet(ret != 0, err, "cli_rescan_slot_app_pfs failed"); - } - - /** Display the application PFs for this slot */ -- // ret = cli_show_slot_app_pfs(f1.afi_slot); // todo -- //fail_on_quiet(ret != 0, err, "cli_show_slot_app_pfs failed"); -+ ret = fpga_pci_get_slot_spec(f1.afi_slot, &slot_spec); -+ fail_on_quiet(ret != 0, err, "fpga_pci_get_slot_spec failed"); -+ ret = cli_show_slot_app_pfs(f1.afi_slot, &slot_spec); -+ fail_on_quiet(ret != 0, err, "cli_show_slot_app_pfs failed"); - - if (f1.get_hw_metrics) { - if (f1.show_headers) { -@@ -375,7 +377,7 @@ command_metrics(void) - - return 0; - err: -- return -1; -+ return FPGA_ERR_FAIL; - } - - /** -@@ -400,7 +402,7 @@ command_describe_slots(void) - - ret = fpga_pci_get_all_slot_specs(spec_array, sizeof_array(spec_array)); - -- for (i = 0; i < sizeof_array(spec_array); ++i) { -+ for (i = 0; i < (int) sizeof_array(spec_array); ++i) { - if (spec_array[i].map[FPGA_APP_PF].vendor_id == 0) - continue; - -@@ -411,7 +413,7 @@ command_describe_slots(void) - } - return 0; - err: -- return -1; -+ return FPGA_ERR_FAIL; - } - - typedef int (*command_func_t)(void); -@@ -443,7 +445,7 @@ cli_main(void) - - return command_table[f1.opcode](); - err: -- return -1; -+ return FPGA_ERR_FAIL; - } - - /** -@@ -525,6 +527,9 @@ main(int argc, char *argv[]) - ret = cli_main(); - fail_on_quiet(ret != 0, err, "cli_main failed"); - err: -+ if (ret) { -+ printf("Error: (%d) %s\n", ret, fpga_mgmt_strerror(ret)); -+ } - cli_detach(); - cli_destroy(); - return ret; -diff --git a/sdk/userspace/fpga_image_tools/src/fpga_local_cmd.h b/sdk/userspace/fpga_image_tools/src/fpga_local_cmd.h -index df222c6..bf2e06d 100644 ---- a/sdk/userspace/fpga_image_tools/src/fpga_local_cmd.h -+++ b/sdk/userspace/fpga_image_tools/src/fpga_local_cmd.h -@@ -44,30 +44,16 @@ enum { - AFI_EXT_END - }; - --/** F1 Mailbox PF defines */ -+/** F1 Mailbox Device defines */ - #define F1_MBOX_VENDOR_ID 0x1d0f - #define F1_MBOX_DEVICE_ID 0x1041 - #define F1_MBOX_RESOURCE_NUM 0 - --/** F1 Application PF defines */ --#define F1_APP_PF_START 0 --#define F1_APP_PF_END 15 -- --/** -- * Generally, we allow a sanitized first level error to be displayed -- * for the user. We do not want low-level mailbox related errors -- * to be displayed (since we are abstracting the mailbox interface). -- * The fail_on_quiet define allows the multi-level trace debug info -- * to still be displayed for development if needed, by re-defining -- * fail_on_quiet as fail_on. -- */ --#define fail_on_quiet fail_on_user --// #define fail_on_quiet(CONDITION, LABEL, ...) \ --// do { \ --// if (CONDITION) { \ --// goto LABEL; \ --// } \ --// } while (0) -+/** F1 Application Device defines */ -+#define F1_MBOX_DEV2APP_DEV(dev) ((dev) - 1) -+#define F1_APP_PF 0 -+#define F1_REMOVE_APP_DEV_DELAY_MSEC 1000 -+#define F1_REMOVE_APP_DEV_MAX_RETRIES 3 - - /** - * This should be used for the sanitized first level errors to be -@@ -115,13 +101,11 @@ enum { - */ - struct ec2_fpga_cmd { - uint32_t slot_dev_index; -- struct fpga_slot_spec mbox_slot_devs[FPGA_SLOT_MAX]; /* todo: do we need this still? */ - uint32_t opcode; - uint32_t afi_slot; - char afi_id[AFI_ID_STR_MAX]; - uint32_t mbox_timeout; - uint32_t mbox_delay_msec; -- bool plat_attached; - bool show_headers; - bool get_hw_metrics; - bool clear_hw_metrics; -@@ -141,54 +125,3 @@ extern struct ec2_fpga_cmd f1; - */ - int - parse_args(int argc, char *argv[]); -- --/** -- * Initialize the AFI slot devices from the PCI/sysfs layer. -- * -- * @returns -- * 0 on success -- * -1 on failure -- */ --int cli_pci_init(void); -- --/** -- * De-initialize the PCI/sysfs layer. -- */ --void cli_pci_free(void); -- --/** -- * Retrieve the application PF map for the given mbox slot. -- * -- * @param[in] slot the fpga slot -- * @param[in] app_pf_num the application PF number to check -- * @param[out] map the application PF resource map to return -- * -- * @returns -- * 0 on success -- * -1 on failure -- */ --int cli_get_app_pf_map(uint32_t slot, uint32_t app_pf_num, -- struct fpga_pci_resource_map *map); -- --/** -- * Remove the application PF for the given mbox slot. -- * -- * @param[in] slot the fpga slot -- * @param[in] app_pf_num the application PF number to check -- * -- * @returns -- * 0 on success -- * -1 on failure -- */ --int --cli_remove_app_pf(uint32_t slot, uint32_t app_pf_num); -- --/** -- * PCI rescan. -- * -- * @returns -- * 0 on success -- * -1 on failure -- */ --int --cli_pci_rescan(void); -diff --git a/sdk/userspace/fpga_image_tools/src/fpga_local_cmd_parse.c b/sdk/userspace/fpga_image_tools/src/fpga_local_cmd_parse.c -index ab4f371..d07630a 100644 ---- a/sdk/userspace/fpga_image_tools/src/fpga_local_cmd_parse.c -+++ b/sdk/userspace/fpga_image_tools/src/fpga_local_cmd_parse.c -@@ -328,7 +328,7 @@ config_request_timeout(uint32_t timeout) - timeout, f1.mbox_timeout, f1.mbox_delay_msec); - return 0; - err: -- return -1; -+ return -EINVAL; - } - - /** -@@ -400,7 +400,7 @@ parse_args_load_afi(int argc, char *argv[]) - err: - print_usage(argv[0], load_afi_usage, sizeof_array(load_afi_usage)); - out_ver: -- return -1; -+ return -EINVAL; - } - - /** -@@ -462,7 +462,7 @@ parse_args_clear_afi(int argc, char *argv[]) - err: - print_usage(argv[0], clear_afi_usage, sizeof_array(clear_afi_usage)); - out_ver: -- return -1; -+ return -EINVAL; - } - - /** -@@ -538,7 +538,7 @@ parse_args_describe_afi(int argc, char *argv[]) - err: - print_usage(argv[0], describe_afi_usage, sizeof_array(describe_afi_usage)); - out_ver: -- return -1; -+ return -EINVAL; - } - - -@@ -596,7 +596,7 @@ err: - print_usage(argv[0], describe_afi_slots_usage, - sizeof_array(describe_afi_slots_usage)); - out_ver: -- return -1; -+ return -EINVAL; - } - - -@@ -669,7 +669,7 @@ parse_args_start_virtual_jtag(int argc, char *argv[]) - err: - print_usage(argv[0], start_virtual_jtag_usage, sizeof_array(start_virtual_jtag_usage)); - out_ver: -- return -1; -+ return -EINVAL; - } - - /** -@@ -724,7 +724,7 @@ parse_args_get_virtual_led(int argc, char *argv[]) - err: - print_usage(argv[0], get_virtual_led_usage, sizeof_array(get_virtual_led_usage)); - out_ver: -- return -1; -+ return -EINVAL; - } - - /** -@@ -780,7 +780,7 @@ parse_args_get_virtual_dip(int argc, char *argv[]) - err: - print_usage(argv[0], get_virtual_dip_usage, sizeof_array(get_virtual_dip_usage)); - out_ver: -- return -1; -+ return -EINVAL; - } - - /** -@@ -863,7 +863,7 @@ parse_args_set_virtual_dip(int argc, char *argv[]) - err: - print_usage(argv[0], set_virtual_dip_usage, sizeof_array(set_virtual_dip_usage)); - out_ver: -- return -1; -+ return -EINVAL; - } - - typedef int (*parse_args_func_t)(int argc, char *argv[]); -@@ -901,7 +901,7 @@ parse_args(int argc, char *argv[]) - - char *opcode_str = argv[1]; - size_t i; -- int ret = -1; -+ int ret = -EINVAL; - for (i = 0; i < sizeof_array(str2func); i++) { - struct parse_args_str2func *entry = &str2func[i]; - -@@ -919,5 +919,5 @@ parse_args(int argc, char *argv[]) - return ret; - err: - print_usage(argv[0], opcode_str_usage, sizeof_array(opcode_str_usage)); -- return -1; -+ return -EINVAL; - } -diff --git a/sdk/userspace/fpga_image_tools/src/virtual_jtag_pcie.c b/sdk/userspace/fpga_image_tools/src/virtual_jtag_pcie.c -index 260ec6c..b6f1689 100644 ---- a/sdk/userspace/fpga_image_tools/src/virtual_jtag_pcie.c -+++ b/sdk/userspace/fpga_image_tools/src/virtual_jtag_pcie.c -@@ -40,7 +40,7 @@ int open_port(uint32_t slot_id, pci_bar_handle_t* jtag_pci_bar) { - - void close_port(pci_bar_handle_t jtag_pci_bar) { - if (jtag_pci_bar >=0) -- fpga_pci_detatch(jtag_pci_bar); -+ fpga_pci_detach(jtag_pci_bar); - - } - -diff --git a/sdk/userspace/fpga_image_tools/src/virtual_jtag_server.c b/sdk/userspace/fpga_image_tools/src/virtual_jtag_server.c -index 4a1d4f6..684a2b8 100644 ---- a/sdk/userspace/fpga_image_tools/src/virtual_jtag_server.c -+++ b/sdk/userspace/fpga_image_tools/src/virtual_jtag_server.c -@@ -67,7 +67,7 @@ static int open_server(const char* tcp_port) { - if (err) { - fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(err)); - errno = EINVAL; -- return -1; -+ return FPGA_ERR_FAIL; - } - - for (res = reslist; res != NULL; res = res->ai_next) { -diff --git a/sdk/userspace/fpga_libs/fpga_mgmt/Makefile b/sdk/userspace/fpga_libs/fpga_mgmt/Makefile -index e4c697b..69483c8 100644 ---- a/sdk/userspace/fpga_libs/fpga_mgmt/Makefile -+++ b/sdk/userspace/fpga_libs/fpga_mgmt/Makefile -@@ -22,7 +22,7 @@ LIB_SO_PATH = $(LIB_PATH)/so - INCLUDES = -I$(TOPINC_PATH) -I/usr/include - - #OPT=-O2 --CFLAGS=$(OPT) -g -std=gnu99 -fPIC -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) -+CFLAGS=$(OPT) -g -std=gnu99 -fPIC -Wall -Werror -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) - - SRC = $(wildcard *.c) - OBJ = $(SRC:.c=.o) -diff --git a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt.c b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt.c -index e3c128c..e9d51b3 100644 ---- a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt.c -+++ b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt.c -@@ -54,7 +54,7 @@ void fpag_mgmt_set_cmd_delay_msec(uint32_t value) - } - - int fpga_mgmt_describe_local_image(int slot_id, -- struct fpga_mgmt_image_info *info) -+ struct fpga_mgmt_image_info *info, uint32_t flags) - { - int ret; - uint32_t len; -@@ -71,9 +71,7 @@ int fpga_mgmt_describe_local_image(int slot_id, - memset(&rsp, 0, sizeof(union afi_cmd)); - - /* initialize the command structure */ -- fpga_mgmt_cmd_init_metrics(&cmd, &len, -- /*bool get_hw_metrics*/ false, -- /*bool clear_hw_metrics*/ false); -+ fpga_mgmt_cmd_init_metrics(&cmd, &len, flags); - - /* send the command and wait for the response */ - ret = fpga_mgmt_process_cmd(slot_id, &cmd, &rsp, &len); -@@ -107,14 +105,6 @@ out: - return ret; - } - --/** -- * Gets the status of an FPGA. Status values are definted in enum fpga_status. -- * If you need the AFI id at the same time, use fpga_mgmt_describe_local_image. -- * -- * @param[in] slot_id the logical slot index -- * @param[out] status populated with status value -- * @returns 0 on success, non-zero on error -- */ - int fpga_mgmt_get_status(int slot_id, int *status) - { - int ret; -@@ -128,7 +118,7 @@ int fpga_mgmt_get_status(int slot_id, int *status) - - memset(&info, 0, sizeof(struct fpga_mgmt_image_info)); - -- ret = fpga_mgmt_describe_local_image(slot_id, &info); -+ ret = fpga_mgmt_describe_local_image(slot_id, &info, 0); - fail_on(ret, out, "fpga_mgmt_describe_local_image failed"); - - *status = info.status; -@@ -136,10 +126,18 @@ out: - return ret; - } - --const char *fpga_mgmt_get_status_name(int status) { -+const char *fpga_mgmt_get_status_name(int status) -+{ - return FPGA_STATUS2STR(status); - } - -+const char *fpga_mgmt_strerror(int err) { -+ if (err < 0) { -+ return strerror(-err); -+ } -+ return FPGA_ERR2STR(err); -+} -+ - int fpga_mgmt_clear_local_image(int slot_id) { - int ret; - uint32_t len; -@@ -193,14 +191,14 @@ int fpga_mgmt_get_vLED_status(int slot_id, uint16_t *status) { - - ret=fpga_pci_attach(slot_id, FPGA_MGMT_PF, MGMT_PF_BAR0, 0, &led_pci_bar); - if (ret) -- return -1; -+ return FPGA_ERR_FAIL; - - ret = fpga_pci_peek(led_pci_bar,F1_VIRTUAL_LED_REG_OFFSET,&read_data); - /* All this code assumes little endian, it would need rework for supporting non x86/arm platforms */ - *(status) = (uint16_t)( read_data & 0x0000FFFF); - - -- fpga_pci_detatch(led_pci_bar); -+ fpga_pci_detach(led_pci_bar); - return ret; - } - -@@ -211,7 +209,7 @@ int fpga_mgmt_set_vDIP(int slot_id, uint16_t value) { - - ret=fpga_pci_attach(slot_id, FPGA_MGMT_PF, MGMT_PF_BAR0, 0, &dip_pci_bar); - if (ret) -- return -1; -+ return FPGA_ERR_FAIL; - - - write_data = (uint32_t) value; -@@ -219,7 +217,7 @@ int fpga_mgmt_set_vDIP(int slot_id, uint16_t value) { - ret = fpga_pci_poke(dip_pci_bar,F1_VIRTUAL_DIP_REG_OFFSET,write_data); - - -- fpga_pci_detatch(dip_pci_bar); -+ fpga_pci_detach(dip_pci_bar); - return ret; - } - -@@ -231,13 +229,13 @@ int fpga_mgmt_get_vDIP_status(int slot_id, uint16_t *value) { - - ret=fpga_pci_attach(slot_id, FPGA_MGMT_PF, MGMT_PF_BAR0, 0, &dip_pci_bar); - if (ret) -- return -1; -+ return FPGA_ERR_FAIL; - - ret = fpga_pci_peek(dip_pci_bar,F1_VIRTUAL_DIP_REG_OFFSET,&read_data); - /* All this code assumes little endian, it would need rework for supporting non x86/arm platforms */ - *(value) = (uint16_t)read_data; - -- fpga_pci_detatch(dip_pci_bar); -+ fpga_pci_detach(dip_pci_bar); - return ret; - - } -diff --git a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_cmd.c b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_cmd.c -index 78bd0d6..ecbb8cc 100644 ---- a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_cmd.c -+++ b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_cmd.c -@@ -25,14 +25,10 @@ - #include - #include - #include -+#include - - #include "fpga_mgmt_internal.h" - --#include // todo: get rid of this? --#define fail_on_quiet fail_on --#define fail_on_internal fail_on --#define fail_on_user fail_on -- - /** - * AFI command get payload length utility. - * -@@ -76,7 +72,7 @@ afi_cmd_hdr_set_len(union afi_cmd *cmd, size_t len) - { - /* Null pointer or overflow? */ - if (!cmd || (len & ~AFI_CMD_HDR_LEN_MASK)) { -- return -1; -+ return FPGA_ERR_FAIL; - } - - cmd->hdr.len_flags &= ~AFI_CMD_HDR_LEN_MASK; -@@ -99,7 +95,7 @@ afi_cmd_hdr_set_flags(union afi_cmd *cmd, unsigned int flags) - { - /* Null pointer or overflow? */ - if (!cmd || (flags & ~AFI_CMD_HDR_ALL_FLAGS)) { -- return -1; -+ return FPGA_ERR_FAIL; - } - - cmd->hdr.len_flags &= AFI_CMD_HDR_LEN_MASK; -@@ -164,8 +160,7 @@ fpga_mgmt_cmd_init_load(union afi_cmd *cmd, uint32_t *len, const char *afi_id) - * @param[in,out] len cmd len - */ - void --fpga_mgmt_cmd_init_metrics(union afi_cmd *cmd, uint32_t *len, -- bool get_hw_metrics, bool clear_hw_metrics) -+fpga_mgmt_cmd_init_metrics(union afi_cmd *cmd, uint32_t *len, uint32_t flags) - { - assert(cmd); - assert(len); -@@ -180,10 +175,9 @@ fpga_mgmt_cmd_init_metrics(union afi_cmd *cmd, uint32_t *len, - afi_cmd_hdr_set_len(cmd, payload_len); - afi_cmd_hdr_set_flags(cmd, 0); - -- /** Fill in cmd body */ -- req->fpga_cmd_flags = 0; -- req->fpga_cmd_flags |= (get_hw_metrics) ? FPGA_CMD_GET_HW_METRICS : 0; -- req->fpga_cmd_flags |= (clear_hw_metrics) ? FPGA_CMD_CLEAR_HW_METRICS : 0; -+ /** Fill in cmd body; only allow specific flags to be set */ -+ req->fpga_cmd_flags = flags & -+ (FPGA_CMD_GET_HW_METRICS | FPGA_CMD_CLEAR_HW_METRICS); - - *len = sizeof(struct afi_cmd_hdr) + payload_len; - } -@@ -243,7 +237,7 @@ fpga_mgmt_cmd_handle_metrics(const union afi_cmd *rsp, uint32_t len, - - return 0; - err: -- return -1; -+ return FPGA_ERR_FAIL; - } - - -@@ -270,14 +264,14 @@ fpga_mgmt_mbox_attach(int slot_id) - }; - - ret = fpga_hal_mbox_init(&mbox); -- fail_on_internal(ret != 0, err, CLI_INTERNAL_ERR_STR); -+ fail_on(ret != 0, err, CLI_INTERNAL_ERR_STR); - - ret = fpga_hal_mbox_attach(true); /**< clear_state=true */ -- fail_on_internal(ret != 0, err, CLI_INTERNAL_ERR_STR); -+ fail_on(ret != 0, err, CLI_INTERNAL_ERR_STR); - - return 0; - err: -- return -1; -+ return FPGA_ERR_FAIL; - } - - static int -@@ -290,7 +284,7 @@ fpga_mgmt_mbox_detach(int slot_id) - /** Continue with plat detach */ - } - -- ret = fpga_pci_detatch(fpga_mgmt_state.slots[slot_id].handle); -+ ret = fpga_pci_detach(fpga_mgmt_state.slots[slot_id].handle); - if (ret != 0) { - log_error("%s (line %u)", CLI_INTERNAL_ERR_STR, __LINE__); - /* Continue with detach */ -@@ -311,18 +305,54 @@ int fpga_mgmt_detach_all(void) - } - - /** -+ * Handle AFI error response -+ * -+ * @param[in] rsp the response that was received. -+ * @param[in] len the expected response payload len. -+ * -+ * @returns -+ * zero on success, non-zero on failure -+ */ -+static int -+fpga_mgmt_handle_afi_cmd_error_rsp(const union afi_cmd *rsp, uint32_t len) -+{ -+ struct afi_cmd_err_rsp *err_rsp = (void *)rsp->body; -+ -+ uint32_t tmp_len = -+ sizeof(struct afi_cmd_hdr) + sizeof(struct afi_cmd_err_rsp); -+ -+ fail_on_quiet(len < tmp_len, err, "total_rsp_len(%u) < calculated_len(%u)", -+ len, tmp_len); -+ -+ /** Handle invalid API version error */ -+ if (err_rsp->error == FPGA_ERR_AFI_CMD_API_VERSION_INVALID) { -+ union afi_err_info *err_info = (void *)err_rsp->error_info; -+ -+ tmp_len += sizeof(err_info->afi_cmd_version); -+ fail_on_quiet(len < tmp_len, err, "total_rsp_len(%u) < calculated_len(%u)", -+ len, tmp_len); -+ -+ log_error("Error: Please upgrade from aws-fpga github to AFI CMD API Version: v%u\n", -+ err_info->afi_cmd_version); -+ } -+ -+ return err_rsp->error; -+err: -+ return FPGA_ERR_FAIL; -+} -+ -+/** - * Validate the AFI response header, using the command header. - * -- * @param[in] cmd the command that was sent. -- * @param[in] rsp the response that was received. -- * @param[in] len the expected response payload len. -+ * @param[in] cmd the command that was sent. -+ * @param[in] rsp the response that was received. -+ * @param[in] len the expected response payload len. - * - * @returns -- * 0 on success -- * -1 on failure -+ * zero on success, non-zero on failure - */ --static int --fpga_mgmt_afi_validate_header(const union afi_cmd *cmd, -+static int -+fpga_mgmt_afi_validate_header(const union afi_cmd *cmd, - const union afi_cmd *rsp, uint32_t len) - { - uint32_t stored_flags = afi_cmd_hdr_get_flags(rsp); -@@ -333,8 +363,8 @@ fpga_mgmt_afi_validate_header(const union afi_cmd *cmd, - fail_on_quiet(!rsp, err, "rsp == NULL"); - - /** Version */ -- fail_on_quiet(cmd->hdr.version != rsp->hdr.version, err, -- "cmd_ver(%u) != rsp_ver(%u)", -+ fail_on_quiet(cmd->hdr.version != rsp->hdr.version, err, -+ "cmd_ver(%u) != rsp_ver(%u)", - cmd->hdr.version, rsp->hdr.version); - - /** Opcode */ -@@ -346,11 +376,11 @@ fpga_mgmt_afi_validate_header(const union afi_cmd *cmd, - cmd->hdr.id, rsp->hdr.id); - - /** Received len too small */ -- fail_on_quiet(len < sizeof(struct afi_cmd_hdr), err, -+ fail_on_quiet(len < sizeof(struct afi_cmd_hdr), err, - "Received length %u too small", len); - - /** Payload len too big */ -- fail_on_quiet(payload_len + sizeof(struct afi_cmd_hdr) > AFI_CMD_DATA_LEN, -+ fail_on_quiet(payload_len + sizeof(struct afi_cmd_hdr) > AFI_CMD_DATA_LEN, - err, "Payload length %u too big", payload_len); - - /** Not a response */ -@@ -361,10 +391,10 @@ id_err: - return -EAGAIN; - op_err: - if (rsp->hdr.op == AFI_CMD_ERROR) { -- //return (cmd, rsp, len); // TODO -+ return fpga_mgmt_handle_afi_cmd_error_rsp(rsp, len); - } - err: -- return -1; -+ return FPGA_ERR_FAIL; - } - - static int -@@ -375,30 +405,33 @@ fpga_mgmt_send_cmd( - - /** Write the AFI cmd to the mailbox */ - ret = fpga_hal_mbox_write((void *)cmd, *len); -- fail_on_internal(ret != 0, err, CLI_INTERNAL_ERR_STR); -+ fail_on(ret != 0, err, CLI_INTERNAL_ERR_STR); - -- /** -+ /** - * Read the AFI rsp from the mailbox. -- * -also make a minimal attempt to drain stale responses -+ * -also make a minimal attempt to drain stale responses - * (if any). - */ - uint32_t id_retries = 0; - ret = -EAGAIN; - while (ret == -EAGAIN) { - ret = fpga_hal_mbox_read((void *)rsp, len); -- fail_on_user(ret != 0, err, "Error: operation timed out"); -+ fail_on(ret = (ret) ? ETIMEDOUT : 0, err_code, "Error: operation timed out"); - - ret = fpga_mgmt_afi_validate_header(cmd, rsp, *len); -+ fail_on(ret, err_code, CLI_INTERNAL_ERR_STR); - -- fail_on_internal(id_retries >= AFI_MAX_ID_RETRIES, err, -+ fail_on(id_retries >= AFI_MAX_ID_RETRIES, err, - CLI_INTERNAL_ERR_STR); - id_retries++; - } -- fail_on_internal(ret != 0, err, CLI_INTERNAL_ERR_STR); -- -+ fail_on(ret != 0, err, CLI_INTERNAL_ERR_STR); -+ - return 0; - err: -- return -1; -+ return FPGA_ERR_FAIL; -+err_code: -+ return ret; - } - - int -diff --git a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_internal.h b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_internal.h -index c06f6cd..bb6ddb1 100644 ---- a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_internal.h -+++ b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_internal.h -@@ -52,7 +52,7 @@ extern struct fgpa_mgmt_state_s { - int fpga_mgmt_process_cmd(int slot_id, - const union afi_cmd *cmd, union afi_cmd *rsp, uint32_t *len); - void fpga_mgmt_cmd_init_metrics(union afi_cmd *cmd, uint32_t *len, -- bool get_hw_metrics, bool clear_hw_metrics); -+ uint32_t flags); - void fpga_mgmt_cmd_init_load(union afi_cmd *cmd, uint32_t *len, - const char *afi_id); - void fpga_mgmt_cmd_init_clear(union afi_cmd *cmd, uint32_t *len); -diff --git a/sdk/userspace/fpga_libs/fpga_pci/Makefile b/sdk/userspace/fpga_libs/fpga_pci/Makefile -index 2a90df5..7e81c26 100644 ---- a/sdk/userspace/fpga_libs/fpga_pci/Makefile -+++ b/sdk/userspace/fpga_libs/fpga_pci/Makefile -@@ -21,7 +21,7 @@ LIB_PATH = $(TOP)/lib - INCLUDES = -I$(TOPINC_PATH) -I/usr/include - - #OPT=-O2 --CFLAGS=$(OPT) -g -std=gnu99 -fPIC -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) -+CFLAGS=$(OPT) -g -std=gnu99 -fPIC -Wall -Werror -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) - - SRC = $(wildcard *.c) - OBJ = $(SRC:.c=.o) -diff --git a/sdk/userspace/fpga_libs/fpga_pci/fpga_pci.c b/sdk/userspace/fpga_libs/fpga_pci/fpga_pci.c -index 0289f97..fd03c35 100644 ---- a/sdk/userspace/fpga_libs/fpga_pci/fpga_pci.c -+++ b/sdk/userspace/fpga_libs/fpga_pci/fpga_pci.c -@@ -13,7 +13,7 @@ - * permissions and limitations under the License. - */ - --#include "fpga_pci_interal.h" -+#include "fpga_pci_internal.h" - - #include - -@@ -23,11 +23,17 @@ fpga_pci_init() { - } - - int --fpga_pci_attach(int slot_id, int pf_id, int bar_id, uint32_t flags, pci_bar_handle_t *handle) { -+fpga_pci_attach(int slot_id, int pf_id, int bar_id, uint32_t flags, -+ pci_bar_handle_t *handle) -+{ - int rc; -+ bool write_combining; - struct fpga_slot_spec spec; -+ -+ (void) flags; - -- if (!handle || pf_id < 0 || pf_id >= FPGA_MAX_PF) { -+ if (!handle || pf_id < 0 || pf_id >= FPGA_MAX_PF || -+ bar_id < 0 || bar_id >= FPGA_BAR_PER_PF_MAX) { - return -EINVAL; - } - -@@ -36,13 +42,21 @@ fpga_pci_attach(int slot_id, int pf_id, int bar_id, uint32_t flags, pci_bar_hand - rc = fpga_pci_get_slot_spec(slot_id, &spec); - fail_on(rc, out, "Unable to prefill the slot spec\n"); - -- return fpga_plat_dev_attach(&spec, pf_id, bar_id, handle); -+ write_combining = false; -+ if (flags & BURST_CAPABLE) { -+ rc = (spec.map[pf_id].resource_burstable[bar_id]) ? 0 : FPGA_ERR_FAIL; -+ fail_on(rc, out, "bar is not BURST_CAPABLE (does not support write " -+ "combining.)"); -+ write_combining = true; -+ } -+ -+ return fpga_plat_dev_attach(&spec, pf_id, bar_id, write_combining, handle); - out: -- return 1; -+ return rc; - } - - int --fpga_pci_detatch(pci_bar_handle_t handle) { -+fpga_pci_detach(pci_bar_handle_t handle) { - return fpga_plat_dev_detach(handle); - } - -@@ -53,11 +67,7 @@ fpga_pci_poke(pci_bar_handle_t handle, uint64_t offset, uint32_t value) { - - int - fpga_pci_poke64(pci_bar_handle_t handle, uint64_t offset, uint64_t value) { -- (void) handle; -- (void) offset; -- (void) value; -- /* not implemened */ -- return 1; -+ return fpga_hal_dev_reg_write64(handle, offset, value); - } - - int -@@ -67,18 +77,10 @@ fpga_pci_peek(pci_bar_handle_t handle, uint64_t offset, uint32_t *value) { - - int - fpga_pci_peek64(pci_bar_handle_t handle, uint64_t offset, uint64_t *value) { -- (void) handle; -- (void) offset; -- (void) value; -- /* not implemented */ -- return 1; -+ return fpga_hal_dev_reg_read64(handle, offset, value); - } - - int fpga_pci_write_burst(pci_bar_handle_t handle, uint64_t offset, uint32_t* datap, uint32_t dword_len) { -- (void) handle; -- (void) offset; -- (void) datap; -- (void) dword_len; -- /* not implemented */ -- return 1; -+ int ret = fpga_plat_dev_reg_write_burst(handle, offset, datap, dword_len); -+ return ret ? FPGA_ERR_FAIL : 0; - } -diff --git a/sdk/userspace/fpga_libs/fpga_pci/fpga_pci_sysfs.c b/sdk/userspace/fpga_libs/fpga_pci/fpga_pci_sysfs.c -index 4233896..b0abfa6 100644 ---- a/sdk/userspace/fpga_libs/fpga_pci/fpga_pci_sysfs.c -+++ b/sdk/userspace/fpga_libs/fpga_pci/fpga_pci_sysfs.c -@@ -13,7 +13,7 @@ - * permissions and limitations under the License. - */ - --#include "fpga_pci_interal.h" -+#include "fpga_pci_internal.h" - - #include - #include -@@ -31,18 +31,18 @@ - /** - * Return the ID from the given sysfs file (e.g. Vendor ID, Device ID). - * -- * @param[in] path the sysfs file path -+ * @param[in] path the sysfs file path - * @param[in,out] id the returned id - * - * @returns -- * 0 on success -+ * 0 on success - * -1 on failure - */ - static int - fpga_pci_get_id(char *path, uint16_t *id) - { -- fail_on_internal(!path, err, CLI_INTERNAL_ERR_STR); -- fail_on_internal(!id, err, CLI_INTERNAL_ERR_STR); -+ fail_on(!path, err, CLI_INTERNAL_ERR_STR); -+ fail_on(!id, err, CLI_INTERNAL_ERR_STR); - - int ret = 0; - FILE *fp = fopen(path, "r"); -@@ -61,32 +61,59 @@ err_close: - fclose(fp); - err: - errno = 0; -- return -1; -+ return FPGA_ERR_FAIL; - } - - /** -- * Fill in the DBDF within the PCI resource map using the given PCI device -+ * Write a '1' to the given sysfs file. -+ * -+ * @param[in] path the sysfs file path -+ * -+ * @returns -+ * 0 on success -+ * -1 on failure -+ */ -+static int -+fpga_pci_write_one2file(char *path) -+{ -+ int ret = -1; -+ -+ int fd = open(path, O_WRONLY); -+ fail_on_quiet(fd == -1, err, "opening %s", path); -+ -+ char buf[] = { '1', 0 }; -+ ret = -!!write_loop(fd, buf, sizeof(buf)); -+ fail_on_quiet(ret != 0, err_close, "error writing %s", path); -+ -+err_close: -+ close(fd); -+err: -+ return ret; -+} -+ -+/** -+ * Fill in the DBDF within the PCI resource map using the given PCI device - * directory name. - * -- * @param[in] dir_name the PCI device directory name -- * @param[in,out] map the PCI resource map to fill in -+ * @param[in] dir_name the PCI device directory name -+ * @param[in,out] map the PCI resource map to fill in - * - * @returns -- * 0 on success -+ * 0 on success - * -1 on failure - */ - static int - fpga_pci_get_dbdf(char *dir_name, struct fpga_pci_resource_map *map) - { -- fail_on_internal(!dir_name, err, CLI_INTERNAL_ERR_STR); -- fail_on_internal(!map, err, CLI_INTERNAL_ERR_STR); -+ fail_on(!dir_name, err, CLI_INTERNAL_ERR_STR); -+ fail_on(!map, err, CLI_INTERNAL_ERR_STR); - - uint32_t domain; - uint32_t bus; - uint32_t dev; - int func; - int ret = sscanf(dir_name, PCI_DEV_FMT, &domain, &bus, &dev, &func); -- fail_on_internal(ret != 4, err, CLI_INTERNAL_ERR_STR); -+ fail_on(ret != 4, err, CLI_INTERNAL_ERR_STR); - - map->domain = domain; - map->bus = bus; -@@ -94,19 +121,19 @@ fpga_pci_get_dbdf(char *dir_name, struct fpga_pci_resource_map *map) - map->func = func; - return 0; - err: -- return -1; -+ return FPGA_ERR_FAIL; - } - - /** -- * Return the PCI resource size using the PCI directory name and resource -+ * Return the PCI resource size using the PCI directory name and resource - * number. - * -- * @param[in] dir_name the PCI device directory name -- * @param[in] resource_num the resource number -+ * @param[in] dir_name the PCI device directory name -+ * @param[in] resource_num the resource number - * @param[in,out] resource_size the returned resource size - * - * @returns -- * 0 on success -+ * 0 on success - * -1 on failure - */ - static int -@@ -115,17 +142,17 @@ fpga_pci_get_pci_resource_info(char *dir_name, - { - int ret; - -- fail_on_internal(!dir_name, err, CLI_INTERNAL_ERR_STR); -- fail_on_internal(!resource_size, err, CLI_INTERNAL_ERR_STR); -+ fail_on(!dir_name, err, CLI_INTERNAL_ERR_STR); -+ fail_on(!resource_size, err, CLI_INTERNAL_ERR_STR); - - char sysfs_name[NAME_MAX + 1]; -- ret = snprintf(sysfs_name, sizeof(sysfs_name), -- "/sys/bus/pci/devices/%s/resource%u", dir_name, -+ ret = snprintf(sysfs_name, sizeof(sysfs_name), -+ "/sys/bus/pci/devices/%s/resource%u", dir_name, - resource_num); - - fail_on_quiet(ret < 0, err, "Error building the sysfs path for resource%u", - resource_num); -- fail_on_quiet((size_t) ret >= sizeof(sysfs_name), err, -+ fail_on_quiet((size_t) ret >= sizeof(sysfs_name), err, - "sysfs path too long for resource%u", resource_num); - - /** Check for file existence, obtain the file size */ -@@ -135,13 +162,13 @@ fpga_pci_get_pci_resource_info(char *dir_name, - - *resource_size = file_stat.st_size; - -- ret = snprintf(sysfs_name, sizeof(sysfs_name), -- "/sys/bus/pci/devices/%s/resource%u_wc", dir_name, -+ ret = snprintf(sysfs_name, sizeof(sysfs_name), -+ "/sys/bus/pci/devices/%s/resource%u_wc", dir_name, - resource_num); - - fail_on_quiet(ret < 0, err, "Error building the sysfs path for resource%u", - resource_num); -- fail_on_quiet((size_t) ret >= sizeof(sysfs_name), err, -+ fail_on_quiet((size_t) ret >= sizeof(sysfs_name), err, - "sysfs path too long for resource%u", resource_num); - - memset(&file_stat, 0, sizeof(struct stat)); -@@ -150,7 +177,7 @@ fpga_pci_get_pci_resource_info(char *dir_name, - - return 0; - err: -- return -1; -+ return FPGA_ERR_FAIL; - } - - static int -@@ -176,20 +203,18 @@ fpga_pci_handle_resources(char *dir_name, struct fpga_pci_resource_map *map) - map->resource_burstable[resource_num] = burstable; - } - return 0; --err: -- return -1; - } - - - /** -- * Handle one PCI device directory with the given directory name, and see if -- * it is an AFI mbox slot. If so, initialize a slot device structure for it -+ * Handle one PCI device directory with the given directory name, and see if -+ * it is an AFI mbox slot. If so, initialize a slot device structure for it - * and its associated slot device (if any). - * -- * @param[in] dir_name the PCI device directory name -+ * @param[in] dir_name the PCI device directory name - * - * @returns -- * 0 on success -+ * 0 on success - * -1 on failure - */ - static int -@@ -199,12 +224,10 @@ fpga_pci_handle_pci_dir_name(char *dir_name, struct fpga_pci_resource_map *map) - uint16_t device_id = 0; - - fail_on_quiet(!dir_name, err, CLI_INTERNAL_ERR_STR); -- // fail_on_quiet(f1.slot_dev_index >= FPGA_SLOT_MAX, err, -- // CLI_INTERNAL_ERR_STR); - - /** Setup and read the PCI Vendor ID */ - char sysfs_name[NAME_MAX + 1]; -- int ret = snprintf(sysfs_name, sizeof(sysfs_name), -+ int ret = snprintf(sysfs_name, sizeof(sysfs_name), - "/sys/bus/pci/devices/%s/vendor", dir_name); - - fail_on_quiet(ret < 0, err, "Error building the sysfs path for vendor"); -@@ -214,7 +237,7 @@ fpga_pci_handle_pci_dir_name(char *dir_name, struct fpga_pci_resource_map *map) - fail_on_quiet(ret != 0, err, "Error retrieving vendor_id"); - - /** Setup and read the PCI Device ID */ -- ret = snprintf(sysfs_name, sizeof(sysfs_name), -+ ret = snprintf(sysfs_name, sizeof(sysfs_name), - "/sys/bus/pci/devices/%s/device", dir_name); - - fail_on_quiet(ret < 0, err, "Error building the sysfs path for device"); -@@ -223,27 +246,17 @@ fpga_pci_handle_pci_dir_name(char *dir_name, struct fpga_pci_resource_map *map) - ret = fpga_pci_get_id(sysfs_name, &device_id); - fail_on_quiet(ret != 0, err, "Error retrieving device_id"); - -- // /** Check for a match to the FPGA Mbox Vendor ID and Device ID */ -- // if ((vendor_id != F1_MBOX_VENDOR_ID) || (device_id != F1_MBOX_DEVICE_ID)) { -- // /* the device did not match */ -- // return 1; -- // } -- - /** Fill in the DBDF */ - ret = fpga_pci_get_dbdf(dir_name, map); - fail_on_quiet(ret != 0, err, "Error retrieving DBDF from dir_name=%s", - dir_name); - -- /** Retrieve the PCI resource size for plat attach */ -- ret = fpga_pci_handle_resources(dir_name, map); -- fail_on_quiet(ret != 0, err, "Error retrieving resource information"); -- - map->vendor_id = vendor_id; - map->device_id = device_id; - - return 0; - err: -- return -1; -+ return FPGA_ERR_FAIL; - } - - int -@@ -252,59 +265,70 @@ fpga_pci_get_all_slot_specs(struct fpga_slot_spec spec_array[], int size) - bool found_afi_slot = false; - char *path = "/sys/bus/pci/devices"; - DIR *dirp = opendir(path); -- fail_on_internal(!dirp, err, CLI_INTERNAL_ERR_STR); -+ fail_on(!dirp, err, CLI_INTERNAL_ERR_STR); -+ -+ struct dirent entry_a, entry_b, *entry, *previous_entry, *result; - int slot_dev_index = 0; - struct fpga_slot_spec search_spec; -- struct fpga_pci_resource_map search_map, previous_map; -+ struct fpga_pci_resource_map a, b, *search_map, *previous_map; - - memset(&search_spec, 0, sizeof(struct fpga_slot_spec)); -- memset(&previous_map, 0, sizeof(struct fpga_pci_resource_map)); -+ memset(&a, 0, sizeof(struct fpga_pci_resource_map)); -+ memset(&b, 0, sizeof(struct fpga_pci_resource_map)); -+ search_map = &a; -+ previous_map = &b; - -- /** Loop through the sysfs device directories */ -- for (;;) { -- struct dirent entry; -- struct dirent *result; -- memset(&entry, 0, sizeof(entry)); -+ entry = &entry_a; -+ previous_entry = &entry_b; - -- readdir_r(dirp, &entry, &result); -+ /** Loop through the sysfs device directories */ -+ while (true) { -+ memset(entry, 0, sizeof(entry)); -+ readdir_r(dirp, entry, &result); - if (result == NULL) { - /** No more directories */ - break; - } - - /** Handle the current directory entry */ -- memset(&search_map, 0, sizeof(struct fpga_pci_resource_map)); -- int ret = fpga_pci_handle_pci_dir_name(entry.d_name, &search_map); -+ memset(search_map, 0, sizeof(struct fpga_pci_resource_map)); -+ int ret = fpga_pci_handle_pci_dir_name(entry->d_name, search_map); - if (ret != 0) { -+ previous_map->device_id = 0; - continue; - } -- found_afi_slot = true; -- if (search_map.domain != previous_map.domain || -- search_map.bus != previous_map.bus || -- search_map.dev != previous_map.dev) { -- -- -- /* domain, bus, device do not match: this is the next slot */ -- if (search_spec.map[FPGA_MGMT_PF].vendor_id == F1_MBOX_VENDOR_ID && -- search_spec.map[FPGA_MGMT_PF].device_id == F1_MBOX_DEVICE_ID) { - -- spec_array[slot_dev_index] = search_spec; -- ++slot_dev_index; -- if (slot_dev_index >= size) { -- break; -- } -+ if (search_map->vendor_id == F1_MBOX_VENDOR_ID && -+ search_map->device_id == F1_MBOX_DEVICE_ID && -+ previous_map->device_id != 0) { -+ -+ /* Retrieve the PCI resource size for plat attach after confirming -+ * these devices are FPGAs. */ -+ /* mbox resources */ -+ ret = fpga_pci_handle_resources(entry->d_name, search_map); -+ fail_on_quiet(ret != 0, err, "Error retrieving resource information"); -+ /* app resources */ -+ ret = fpga_pci_handle_resources(previous_entry->d_name, previous_map); -+ fail_on_quiet(ret != 0, err, "Error retrieving resource information"); -+ -+ /* copy the results into the spec_array */ -+ spec_array[slot_dev_index].map[FPGA_APP_PF] = *previous_map; -+ spec_array[slot_dev_index].map[FPGA_MGMT_PF] = *search_map; -+ -+ found_afi_slot = true; -+ slot_dev_index += 1; -+ if (slot_dev_index >= size) { -+ break; - } -- } -- if (search_map.func >= FPGA_MAX_PF) { -- /* unexpected pf */ -+ -+ /* invalidate the previous_map and do not swap */ -+ previous_map->device_id = 0; - continue; - } -- /* copy the map into the spec array */ -- search_spec.map[search_map.func] = search_map; -- previous_map = search_map; -+ -+ swap(previous_map, search_map); -+ swap(previous_entry, entry); - } -- /* TODO: this has a bug in it: if there are no PCI devices after the last -- * FPGA, it will fail to find that FPGA. */ - - closedir(dirp); - -@@ -312,13 +336,14 @@ fpga_pci_get_all_slot_specs(struct fpga_slot_spec spec_array[], int size) - - return 0; - err: -- return -1; -+ return FPGA_ERR_FAIL; - } - - int - fpga_pci_get_slot_spec(int slot_id, struct fpga_slot_spec *spec) - { - int ret; -+ unsigned int size; - struct fpga_slot_spec spec_array[FPGA_SLOT_MAX]; - - if (slot_id < 0 || slot_id >= FPGA_SLOT_MAX || !spec) { -@@ -327,7 +352,9 @@ fpga_pci_get_slot_spec(int slot_id, struct fpga_slot_spec *spec) - - memset(spec_array, 0, sizeof(spec_array)); - -- ret = fpga_pci_get_all_slot_specs(spec_array, sizeof_array(spec_array)); -+ /* tell fpga_pci_get_all_slot_specs not to search past the slot number */ -+ size = min(sizeof_array(spec_array), (unsigned) slot_id); -+ ret = fpga_pci_get_all_slot_specs(spec_array, size); - fail_on_quiet(ret, err, "Unable to read PCI device information."); - - if (spec_array[slot_id].map[FPGA_APP_PF].vendor_id == 0) { -@@ -338,21 +365,49 @@ fpga_pci_get_slot_spec(int slot_id, struct fpga_slot_spec *spec) - *spec = spec_array[slot_id]; - return 0; - err: -- return -1; -+ return FPGA_ERR_FAIL; - } - - int --fpga_pci_get_resource_map(int slot_id, int pf_id, struct fpga_pci_resource_map *map) -+fpga_pci_get_resource_map(int slot_id, int pf_id, -+ struct fpga_pci_resource_map *map) - { -- (void) slot_id; -- (void) pf_id; -- (void) map; -- return -ENOSYS; -+ int ret; -+ -+ if (slot_id < 0 || slot_id >= FPGA_SLOT_MAX || -+ pf_id < 0 || pf_id >= FPGA_MAX_PF || -+ !map) { -+ return -EINVAL; -+ } -+ -+ struct fpga_slot_spec slot_spec; -+ memset(&slot_spec, 0, sizeof(struct fpga_slot_spec)); -+ -+ ret = fpga_pci_get_slot_spec(slot_id, &slot_spec); -+ fail_on_quiet(ret, out, "fpga_pci_get_slot_spec failed"); -+ -+ *map = slot_spec.map[pf_id]; -+out: -+ return ret; - } - - int --fpga_pci_rescan_slot_app_pfs(int slot_id) -+fpga_pci_rescan_slot_app_pfs(void) - { -- (void) slot_id; -- return -ENOSYS; -+ /** Setup and write '1' to the PCI rescan file */ -+ char sysfs_name[NAME_MAX + 1]; -+ int ret = snprintf(sysfs_name, sizeof(sysfs_name), "/sys/bus/pci/rescan"); -+ -+ fail_on_quiet(ret < 0, err, -+ "Error building the sysfs path for PCI rescan file"); -+ fail_on_quiet((size_t) ret >= sizeof(sysfs_name), err, -+ "sysfs path too long for PCI rescan file"); -+ -+ /** Write a "1" to the PCI rescan file */ -+ ret = fpga_pci_write_one2file(sysfs_name); -+ fail_on_quiet(ret != 0, err, "fpga_pci_write_one2file failed"); -+ -+ return 0; -+err: -+ return FPGA_ERR_FAIL; - } -diff --git a/sdk/userspace/hal/src/api/mbox/hw/Makefile b/sdk/userspace/hal/src/api/mbox/hw/Makefile -index d7e68e5..edb667a 100644 ---- a/sdk/userspace/hal/src/api/mbox/hw/Makefile -+++ b/sdk/userspace/hal/src/api/mbox/hw/Makefile -@@ -24,7 +24,7 @@ HALLIB_PATH = $(TOP)/lib - INCLUDES = -I. -I$(HALINC_PATH) -I$(TOPINC_PATH) -I$(FPGADINC_PATH) -I$(UTILINC_PATH) -I/usr/include - - #OPT=-O2 --CFLAGS=$(OPT) -g -fPIC -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) -+CFLAGS=$(OPT) -g -fPIC -Wall -Werror -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) - - SRC = fpga_hal_mbox.c - OBJ = $(SRC:.c=.o) -diff --git a/sdk/userspace/hal/src/api/reg/Makefile b/sdk/userspace/hal/src/api/reg/Makefile -index ead0ee4..52c9afe 100644 ---- a/sdk/userspace/hal/src/api/reg/Makefile -+++ b/sdk/userspace/hal/src/api/reg/Makefile -@@ -24,7 +24,7 @@ HALLIB_PATH = $(TOP)/lib - INCLUDES = -I. -I$(HALINC_PATH) -I$(TOPINC_PATH) -I$(FPGADINC_PATH) -I$(UTILINC_PATH) -I/usr/include - - #OPT=-O2 --CFLAGS=$(OPT) -g -fPIC -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) -+CFLAGS=$(OPT) -g -fPIC -Wall -Werror -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) - - SRC = $(wildcard *.c) - OBJ = $(SRC:.c=.o) -diff --git a/sdk/userspace/hal/src/api/reg/fpga_hal_reg.c b/sdk/userspace/hal/src/api/reg/fpga_hal_reg.c -index 44b61b5..83e4997 100644 ---- a/sdk/userspace/hal/src/api/reg/fpga_hal_reg.c -+++ b/sdk/userspace/hal/src/api/reg/fpga_hal_reg.c -@@ -41,6 +41,20 @@ fpga_hal_dev_reg_write(int dev_index, uint64_t offset, uint32_t value) - return fpga_plat_dev_reg_write(dev_index, offset, value); - } - -+int -+fpga_hal_dev_reg_read64(int dev_index, uint64_t offset, uint64_t *value) -+{ -+ log_debug("enter"); -+ return fpga_plat_dev_reg_read64(dev_index, offset, value); -+} -+ -+int -+fpga_hal_dev_reg_write64(int dev_index, uint64_t offset, uint64_t value) -+{ -+ log_debug("enter"); -+ return fpga_plat_dev_reg_write64(dev_index, offset, value); -+} -+ - /************************************************************************ - * Single device attachment and use. - * e.g. for applications that only attach to one FPGA at a time, -diff --git a/sdk/userspace/hal/src/platform/hw/Makefile b/sdk/userspace/hal/src/platform/hw/Makefile -index 7d6f203..05a20cf 100644 ---- a/sdk/userspace/hal/src/platform/hw/Makefile -+++ b/sdk/userspace/hal/src/platform/hw/Makefile -@@ -24,7 +24,7 @@ HALLIB_PATH = $(TOP)/lib - INCLUDES = -I. -I$(HALINC_PATH) -I$(TOPINC_PATH) -I$(FPGADINC_PATH) -I$(UTILINC_PATH) -I/usr/include - - #OPT=-O2 --CFLAGS=$(OPT) -g -fPIC -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) -+CFLAGS=$(OPT) -g -fPIC -Wall -Werror -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) - - SRC = $(wildcard *.c) - OBJ = $(SRC:.c=.o) -diff --git a/sdk/userspace/hal/src/platform/hw/fpga_hal_plat.c b/sdk/userspace/hal/src/platform/hw/fpga_hal_plat.c -index 0dbe576..bc17b82 100644 ---- a/sdk/userspace/hal/src/platform/hw/fpga_hal_plat.c -+++ b/sdk/userspace/hal/src/platform/hw/fpga_hal_plat.c -@@ -198,7 +198,8 @@ err: - ************************************************************************/ - - int --fpga_plat_dev_attach(struct fpga_slot_spec *spec, int pf_id, int bar_id, int *dev_index) -+fpga_plat_dev_attach(struct fpga_slot_spec *spec, int pf_id, int bar_id, -+ bool write_combining, int *dev_index) - { - log_debug("enter"); - -@@ -247,7 +248,7 @@ fpga_plat_dev_attach(struct fpga_slot_spec *spec, int pf_id, int bar_id, int *de - sysfs_name, map->device_id); - - char wc_suffix[3] = "\0"; -- if (map->resource_burstable[bar_id]) { -+ if (map->resource_burstable[bar_id] && write_combining) { - strncpy(wc_suffix, "_wc", sizeof(wc_suffix)); - } - -@@ -353,6 +354,84 @@ err: - return -1; - } - -+int -+fpga_plat_dev_reg_read64(int dev_index, uint64_t offset, uint64_t *value) -+{ -+ log_debug("dev_index=%d", dev_index); -+ fail_on(!value, err, "value is NULL"); -+ -+ int ret = fpga_plat_dev_check_mem_offset(dev_index, offset); -+ fail_on(ret != 0, err, "Invalid offset 0x%" PRIx64 ", or not attached", offset); -+ -+ uint64_t *reg_ptr = (uint64_t *)fpga_plat_dev_get_mem_at_offset(dev_index, -+ offset); -+ fail_on(!reg_ptr, err, "fpga_plat_get_mem_at_offset failed"); -+ -+ *value = *reg_ptr; -+ -+ log_debug("offset=0x%" PRIx64 ", value=0x%" PRIx64, offset, *value); -+ return 0; -+err: -+ return -1; -+} -+ -+int -+fpga_plat_dev_reg_write64(int dev_index, uint64_t offset, uint64_t value) -+{ -+ log_debug("dev_index=%d", dev_index); -+ -+ int ret = fpga_plat_dev_check_mem_offset(dev_index, offset); -+ fail_on(ret != 0, err, "Invalid offset=0x%" PRIx64 ", or not attached", -+ offset); -+ -+ log_debug("offset=0x%" PRIx64 ", value=0x%" PRIx64, offset, value); -+ uint64_t *reg_ptr = (uint64_t *)fpga_plat_dev_get_mem_at_offset(dev_index, -+ offset); -+ fail_on(!reg_ptr, err, "fpga_plat_get_mem_at_offset failed"); -+ -+ *reg_ptr = value; -+ -+ return 0; -+err: -+ return -1; -+} -+ -+int -+fpga_plat_dev_reg_write_burst(int dev_index, uint64_t offset, uint32_t* datap, -+ uint32_t dword_len) -+{ -+ int ret; -+ uint32_t i; -+ static const uint32_t dword_mult = 4; -+ log_debug("dev_index=%d", dev_index); -+ -+ /* validate the beginning of the data range */ -+ ret = fpga_plat_dev_check_mem_offset(dev_index, offset); -+ fail_on(ret != 0, err, "Invalid offset=0x%" PRIx64 ", or not attached", -+ offset); -+ -+ /* validate the end of the data range */ -+ ret = fpga_plat_dev_check_mem_offset(dev_index, -+ offset + dword_len * dword_mult - 1); -+ fail_on(ret != 0, err, "Invalid offset=0x%" PRIx64 " (out of range)", -+ offset); -+ -+ /* get the pointer to the beginning of the range */ -+ log_debug("offset=0x%" PRIx64, offset); -+ uint32_t *reg_ptr = (uint32_t *)fpga_plat_dev_get_mem_at_offset(dev_index, -+ offset); -+ fail_on(!reg_ptr, err, "fpga_plat_get_mem_at_offset failed"); -+ -+ /* memcpy */ -+ for (i = 0; i < dword_len; ++i) { -+ reg_ptr[i] = datap[i]; -+ } -+ -+ return 0; -+err: -+ return -1; -+} -+ - /************************************************************************ - * Single device attachment and use. - * e.g. for applications that only attach to one FPGA at a time, -@@ -366,7 +445,7 @@ fpga_plat_attach(struct fpga_slot_spec *spec, int pf_id, int bar_id) - log_debug("enter"); - - int dev_index = -1; -- int ret = fpga_plat_dev_attach(spec, pf_id, bar_id, &dev_index); -+ int ret = fpga_plat_dev_attach(spec, pf_id, bar_id, false, &dev_index); - fail_on(ret != 0, err, "fpga_plat_dev_attach failed"); - - if (dev_index != 0) { -diff --git a/sdk/userspace/include/fpga_common.h b/sdk/userspace/include/fpga_common.h -index 8921fb6..46786a7 100644 ---- a/sdk/userspace/include/fpga_common.h -+++ b/sdk/userspace/include/fpga_common.h -@@ -79,10 +79,8 @@ enum { - FPGA_ERR_CL_ID_MISMATCH = 12, - /** CL DDR calibration failed */ - FPGA_ERR_CL_DDR_CALIB_FAILED = 13, -- /** fpga_clk_recipe is invalid */ -- FPGA_ERR_CLK_RECIPE_INVALID = 14, -- /** fpga_clk_recipe programming failed */ -- FPGA_ERR_CLK_RECIPE_FAILED = 15, -+ /** generic/unspecified error */ -+ FPGA_ERR_FAIL = 14, - - FPGA_ERR_END - }; -@@ -95,8 +93,7 @@ enum { - ((error) == FPGA_ERR_AFI_CMD_API_VERSION_INVALID) ? "invalid-afi-cmd-api-version" : \ - ((error) == FPGA_ERR_CL_ID_MISMATCH) ? "cl-id-mismatch" : \ - ((error) == FPGA_ERR_CL_DDR_CALIB_FAILED) ? "cl-ddr-calib-failed" : \ -- ((error) == FPGA_ERR_CLK_RECIPE_INVALID) ? "invalid-clk-recipe" : \ -- ((error) == FPGA_ERR_CLK_RECIPE_FAILED) ? "clk-recipe-failed" : \ -+ ((error) == FPGA_ERR_FAIL) ? "unspecified-error" : \ - "internal-error" - - -diff --git a/sdk/userspace/include/fpga_mgmt.h b/sdk/userspace/include/fpga_mgmt.h -index 047010c..221508e 100644 ---- a/sdk/userspace/include/fpga_mgmt.h -+++ b/sdk/userspace/include/fpga_mgmt.h -@@ -22,28 +22,42 @@ - /** - * Initialize the fpga_mgmt library. - * Calls fpga_pci_init. -- * @returns -- * 0 on success -- * -1 on failure -+ * -+ * @returns 0 on success, non-zero on error - */ - int fpga_mgmt_init(void); - -+/** -+ * Closes the fpga_mgmt library and its dependencies and releases any acquired -+ * resources. -+ * -+ * @returns 0 on success, non-zero on error -+ */ - int fpga_mgmt_close(void); - - /** -+ * Get an error code string. -+ * -+ * @param[in] err The error code to decode -+ * @returns a string corresponding to the provided error code. -+ */ -+const char *fpga_mgmt_strerror(int err); -+ -+/** - * Sets the command timeout value in multiples of the delay_msec value. - * -- * @param[in] value timeout, n * delay_msec -+ * @param[in] value timeout, n * delay_msec - */ - void fpag_mgmt_set_cmd_timeout(uint32_t value); - - /** -+ * Sets the value of the delay_msec. The value is used as the basic unit of time -+ * used to calculate timeouts for communicating with the mailbox pf. - * -+ * @param[in] value number of ms used as base time unit - */ - void fpag_mgmt_set_cmd_delay_msec(uint32_t value); - --/* fpga-describe-local-image */ -- - /** - * This structure provides all of the information for - * fpga_mgmt_describe_local_image. -@@ -63,12 +77,14 @@ struct fpga_mgmt_image_info { - * - * @param[in] slot_id the logical slot index - * @param[out] info struct to populate with the slot description -+ * @param[in] flags set flags for for metrics retrieval options - * @returns 0 on success, non-zero on error - */ --int fpga_mgmt_describe_local_image(int slot_id, struct fpga_mgmt_image_info *info); -+int fpga_mgmt_describe_local_image(int slot_id, -+ struct fpga_mgmt_image_info *info, uint32_t flags); - - /** -- * Gets the status of an FPGA. Status values are definted in enum fpga_status. -+ * Gets the status of an FPGA. Status values are defined in enum fpga_status. - * If you need the AFI id at the same time, use fpga_mgmt_describe_local_image. - * - * @param[in] slot_id the logical slot index -@@ -86,7 +102,6 @@ int fpga_mgmt_get_status(int slot_id, int *status); - */ - const char *fpga_mgmt_get_status_name(int status); - --/* fpga-clear-local-image */ - /** - * Clears the specified FPGA image slot, including FPGA internal and external - * memories that are used by the slot. -@@ -96,7 +111,6 @@ const char *fpga_mgmt_get_status_name(int status); - */ - int fpga_mgmt_clear_local_image(int slot_id); - --/* fpga-load-local-image */ - /** - * Loads the specified FPGA image to the specified slot number. - * -@@ -107,17 +121,32 @@ int fpga_mgmt_clear_local_image(int slot_id); - int fpga_mgmt_load_local_image(int slot_id, char *afi_id); - - /** -- * getting the status of the 16 virtual LED -+ * Gets the status of the 16 virtual LEDs. Their statuses are returned as a -+ * 16-bit value with each bit corresponding to the on/off state of the LEDs. -+ * -+ * @param[in] slot_id the logical slot index -+ * @param[out] status 16 bits describing the LED states -+ * @returns 0 on success, non-zero on error - */ - int fpga_mgmt_get_vLED_status(int slot_id, uint16_t *status); - - /** -- * set the value for the 16 virtual DIP switchs -+ * Sets the status of the 16 virtual dip switches. Their statuses are set as a -+ * 16-bit value with each bit corresponding to the on/off state of the switches. -+ * -+ * @param[in] slot_id the logical slot index -+ * @param[in] value 16 bits describing the switch states -+ * @returns 0 on success, non-zero on error - */ - int fpga_mgmt_set_vDIP(int slot_id, uint16_t value); - - /** -- * get the value for the 16 virtual DIP switchs -+ * Gets the status of the 16 virtual dip switches. Their statuses are returned -+ * as a 16-bit value with each bit corresponding to the on/off state of the -+ * switches. -+ * -+ * @param[in] slot_id the logical slot index -+ * @param[out] value 16 bits describing the switch states -+ * @returns 0 on success, non-zero on error - */ --int fpga_mgmt_get_vDIP_status(int slot_id, uint16_t *); -- -+int fpga_mgmt_get_vDIP_status(int slot_id, uint16_t *value); -diff --git a/sdk/userspace/include/fpga_pci.h b/sdk/userspace/include/fpga_pci.h -index dda8865..20f219a 100644 ---- a/sdk/userspace/include/fpga_pci.h -+++ b/sdk/userspace/include/fpga_pci.h -@@ -33,15 +33,16 @@ enum { - MGMT_PF_BAR_MAX - }; - -+/** -+ * Type definition for a descriptor/handle used to specify a BAR. Initialize -+ * with PCI_BAR_HANDLE_INIT. -+ */ - typedef int pci_bar_handle_t; - #define PCI_BAR_HANDLE_INIT (-1) - - /** - * Initialize the pci library. -- * Calls fpga_hal_plat_init. -- * @returns -- * 0 on success -- * -1 on failure -+ * @returns 0 on success, non-zero on error - */ - int fpga_pci_init(void); - -@@ -57,11 +58,8 @@ int fpga_pci_init(void); - * - * @returns 0 on success, non-zero on error - */ --int fpga_pci_attach(int slot_id, int pf_id, int bar_id, uint32_t flags, pci_bar_handle_t *handle); -- --/** -- */ --int fpga_pci_attach_2(struct fpga_pci_resource_map *map, int bar_id, uint32_t flags, pci_bar_handle_t *handle); -+int fpga_pci_attach(int slot_id, int pf_id, int bar_id, uint32_t flags, -+ pci_bar_handle_t *handle); - - /** - * Flags used to specify options for fpga_pci_attach. -@@ -72,13 +70,13 @@ enum { - }; - - /** -- * Detatch from an FPGA memory space. -+ * Detach from an FPGA memory space. - * - * @param[in] handle the value provided by fpga_pci_attach corresponding to - * the memory space to detach - * @returns 0 on success, non-zero on error - */ --int fpga_pci_detatch(pci_bar_handle_t handle); -+int fpga_pci_detach(pci_bar_handle_t handle); - - /** - * Write a value to a register. -@@ -110,7 +108,8 @@ int fpga_pci_poke64(pci_bar_handle_t handle, uint64_t offset, uint64_t value); - * - * @returns 0 on success, non-zero on error - */ --int fpga_pci_write_burst(pci_bar_handle_t handle, uint64_t offset, uint32_t* datap, uint32_t dword_len); -+int fpga_pci_write_burst(pci_bar_handle_t handle, uint64_t offset, -+ uint32_t* datap, uint32_t dword_len); - - /** - * Read a value from a register. -@@ -144,13 +143,30 @@ int fpga_pci_peek64(pci_bar_handle_t handle, uint64_t offset, uint64_t *value); - int fpga_pci_get_slot_spec(int slot_id, struct fpga_slot_spec *spec); - - /** -+ * Populate slot specs for all FPGAs on the system. It is recommended to use -+ * FPGA_SLOT_MAX as the size of the spec_array; -+ * -+ * @param[out] spec_array array to populate -+ * @param[in] size allocated size of the provided array - */ - int fpga_pci_get_all_slot_specs(struct fpga_slot_spec spec_array[], int size); - - /** -+ * Get resource map information for a single slot and physical function. This -+ * information is provided in the slot_spec, but occasionally only the resource -+ * map is needed. -+ * -+ * @param[in] slot_id The logical slot id of the FPGA of interest -+ * @param[in] pf_id physical function id (e.g. FPGA_APP_PF) -+ * @param[out] map resource map to populate -+ * @returns 0 on success, non-zero on error - */ --int fpga_pci_get_resource_map(int slot_id, int pf_id, struct fpga_pci_resource_map *map); -+int fpga_pci_get_resource_map(int slot_id, int pf_id, -+ struct fpga_pci_resource_map *map); - - /** -+ * PCI rescan. -+ * -+ * @returns 0 on success, non-zero on error - */ --int fpga_pci_rescan_slot_app_pfs(int slot_id); -+int fpga_pci_rescan_slot_app_pfs(void); -diff --git a/sdk/userspace/include/hal/fpga_hal_plat.h b/sdk/userspace/include/hal/fpga_hal_plat.h -index a7e8088..15afbfc 100644 ---- a/sdk/userspace/include/hal/fpga_hal_plat.h -+++ b/sdk/userspace/include/hal/fpga_hal_plat.h -@@ -75,7 +75,8 @@ int fpga_plat_init(void); - * 0 on success - * -1 on failure - */ --int fpga_plat_dev_attach(struct fpga_slot_spec *spec, int pf_id, int bar_id, int *dev_index); -+int fpga_plat_dev_attach(struct fpga_slot_spec *spec, int pf_id, int bar_id, -+ bool write_combining, int *dev_index); - - /** - * Platform layer detach using the given slot specification. -@@ -114,6 +115,33 @@ int fpga_plat_dev_reg_read(int dev_index, uint64_t offset, uint32_t *value); - */ - int fpga_plat_dev_reg_write(int dev_index, uint64_t offset, uint32_t value); - -+ -+/** -+ * Platform layer register read. -+ * -+ * @param[in] dev_index the attached fpga device index. -+ * @param[in] offset the register offset -+ * @param[in,out] value the register value to return -+ * -+ * @returns -+ * 0 on success -+ * -1 on failure -+ */ -+int fpga_plat_dev_reg_read64(int dev_index, uint64_t offset, uint64_t *value); -+ -+/** -+ * Platform layer register write. -+ * -+ * @param[in] dev_index the attached fpga device index. -+ * @param[in] offset the register offset -+ * @param[in] value the register value to write -+ * -+ * @returns -+ * 0 on success -+ * -1 on failure -+ */ -+int fpga_plat_dev_reg_write64(int dev_index, uint64_t offset, uint64_t value); -+ - /** - * Platform layer get mem base. - * -@@ -125,6 +153,17 @@ int fpga_plat_dev_reg_write(int dev_index, uint64_t offset, uint32_t value); - */ - void *fpga_plat_dev_get_mem_base(int dev_index); - -+/** -+ * Platform layer write burst. -+ * -+ * @param[in] dev_index the attached fpga device index. -+ * @param[in] offset the register offset -+ * @param[in] datap source pointer -+ * @param[in] dword_len number of 4 byte words to copy -+ */ -+int fpga_plat_dev_reg_write_burst(int dev_index, uint64_t offset, -+ uint32_t* datap, uint32_t dword_len); -+ - /************************************************************************ - * Single device attachment and use. - * e.g. for applications that only attach to one FPGA at a time, -diff --git a/sdk/userspace/include/hal/fpga_hal_reg.h b/sdk/userspace/include/hal/fpga_hal_reg.h -index fa7bcad..e63cb31 100644 ---- a/sdk/userspace/include/hal/fpga_hal_reg.h -+++ b/sdk/userspace/include/hal/fpga_hal_reg.h -@@ -51,6 +51,32 @@ int fpga_hal_dev_reg_read(int dev_index, uint64_t offset, uint32_t *value); - */ - int fpga_hal_dev_reg_write(int dev_index, uint64_t offset, uint32_t value); - -+/** -+ * FPGA HAL layer register read. -+ * -+ * @param[in] dev_index the attached fpga device index. -+ * @param[in] offset the register offset -+ * @param[in,out] value the register value to return -+ * -+ * @returns -+ * 0 on success -+ * -1 on failure -+ */ -+int fpga_hal_dev_reg_read64(int dev_index, uint64_t offset, uint64_t *value); -+ -+/** -+ * FPGA HAL layer register write. -+ * -+ * @param[in] dev_index the attached fpga device index. -+ * @param[in] offset the register offset -+ * @param[in] value the register value to write -+ * -+ * @returns -+ * 0 on success -+ * -1 on failure -+ */ -+int fpga_hal_dev_reg_write64(int dev_index, uint64_t offset, uint64_t value); -+ - /************************************************************************ - * Single device attachment and use. - * e.g. for applications that only attach to one FPGA at a time, -diff --git a/sdk/userspace/include/utils/log.h b/sdk/userspace/include/utils/log.h -index d3cd7b4..60cec19 100644 ---- a/sdk/userspace/include/utils/log.h -+++ b/sdk/userspace/include/utils/log.h -@@ -163,6 +163,14 @@ static inline __printf(1, 2) void log_dummy(const char *fmt, ...) - } \ - } while (0) - -+#define fail_on_quiet(CONDITION, LABEL, ...) \ -+ do { \ -+ if (CONDITION) { \ -+ log_debug(__VA_ARGS__); \ -+ goto LABEL; \ -+ } \ -+ } while (0) -+ - extern const struct logger logger_stdout; - extern const struct logger logger_kmsg; - extern const struct logger *logger_default; -diff --git a/sdk/userspace/utils/Makefile b/sdk/userspace/utils/Makefile -index 90b7306..af10ad3 100644 ---- a/sdk/userspace/utils/Makefile -+++ b/sdk/userspace/utils/Makefile -@@ -18,7 +18,7 @@ INCLUDES = -I. -I../include -I/usr/include - LIB_PATH = $(TOP)/lib - - #OPT=-O2 --CFLAGS=$(OPT) -g -std=gnu99 -fPIC -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) -+CFLAGS=$(OPT) -g -std=gnu99 -fPIC -Wall -Werror -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes $(INCLUDES) - - UTILLIB = $(LIB_PATH)/libutils.a - diff --git a/SDAccel/userspace/src/xclbin.cpp b/SDAccel/userspace/src/xclbin.cpp index 18bb8475..7ce369e5 100644 --- a/SDAccel/userspace/src/xclbin.cpp +++ b/SDAccel/userspace/src/xclbin.cpp @@ -17,6 +17,7 @@ * License for the specific language governing permissions and limitations * under the License. */ +//#define INTERNAL_TESTING 1 #include diff --git a/SDAccel/userspace/src2/LICENSE-2.0.txt b/SDAccel/userspace/src2/LICENSE-2.0.txt deleted file mode 100644 index d6456956..00000000 --- a/SDAccel/userspace/src2/LICENSE-2.0.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/SDAccel/userspace/src2/Makefile b/SDAccel/userspace/src2/Makefile deleted file mode 100755 index f95abfd6..00000000 --- a/SDAccel/userspace/src2/Makefile +++ /dev/null @@ -1,82 +0,0 @@ -# Amazon FPGA Hardware Development Kit -# -# Copyright 2016-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Amazon Software License (the "License"). You may not use -# this file except in compliance with the License. A copy of the License is -# located at -# -# http://aws.amazon.com/asl/ -# -# or in the "license" file accompanying this file. This file is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or -# implied. See the License for the specific language governing permissions and -# limitations under the License. - - -# AWS Bare-metal HAL Driver Makefile -# set ec2=1 to compile for F1 instance - -CXX := g++ -CXX_EXT := cpp -AR := ar -ARFLAGS := rcv - -ifeq ($(ec2),1) - CXXFLAGS := -Werror -std=c++11 - STLIB = libxrt-aws.a - SHLIB = libxrt-aws.so -else -# For bare metal testing, i.e. in non EC2 environment - CXXFLAGS := -Werror -std=c++11 -DINTERNAL_TESTING - STLIB = libxrtbm-aws.a - SHLIB = libxrtbm-aws.so -endif - -LIBS := $(STLIB) $(SHLIB) - -XCLHAL_VER = -DXCLHAL_MAJOR_VER=2 -DXCLHAL_MINOR_VER=1 - -# Include XCLHAL includes, AWS fpga_pci/mgmt and AWS kernel drivers -SHIM_INC := -I../include -I$(SDK_DIR)/userspace/include -I$(SDK_DIR)/linux_kernel_drivers - -CXXFLAGS += $(CXXFLAGS) $(XCLHAL_VER) $(SHIM_INC) -fpic -fvisibility=hidden -lrt -Wall - -ifeq ($(debug),1) - CXXFLAGS += -g -DDEBUG -else - CXXFLAGS += -O2 -DNDEBUG -endif - -SRCS := $(wildcard *.$(CXX_EXT)) -OBJS := $(patsubst %.$(CXX_EXT), %.o, $(SRCS)) - --include $(OBJS:.o=.d) - -# the name that will be included as libXXXXX.so -AWS_FPGA_MGMTLIB := fpga_mgmt -AWS_FPGA_MGMTLIB_DIR := $(SDK_DIR)/userspace/lib - -ifeq ($(ec2),1) -LDFLAGS += -L$(AWS_FPGA_MGMTLIB_DIR) -LDLIBS += -l$(AWS_FPGA_MGMTLIB) -endif - -all: $(LIBS) - -clean: - rm -rf *.o *.d lib*drv.* - -%.o: %.$(CXX_EXT) - $(CXX) $(CXXFLAGS) $(MYCFLAGS) $(MYCXXFLAGS) -c $< -o $@ - $(CXX) $(CXXFLAGS) $(MYCFLAGS) $(MYCXXFLAGS) -c -MM $< -o $(patsubst %.o, %.d, $@) - -$(STLIB) : $(OBJS) - $(AR) $(ARFLAGS) -o $@ $(OBJS) - -$(SHLIB) : $(OBJS) - $(CXX) -shared -o $@ $(OBJS) $(LDFLAGS) $(LDLIBS) - -.PHONY: all clean - -.DEFAULT_GOAL := all diff --git a/SDAccel/userspace/src2/debug.cpp b/SDAccel/userspace/src2/debug.cpp deleted file mode 100644 index 7cbee184..00000000 --- a/SDAccel/userspace/src2/debug.cpp +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (C) 2017-2018 Xilinx, Inc - * Debug functionality to AWS hal driver - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - - -#include "shim.h" -#include "perfmon_parameters.h" -#include "xclbin2.h" - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef _WINDOWS -// TODO: Windows build support -// unistd.h is linux only header file -// it is included for read, write, close, lseek64 -#include -#endif - -#ifdef _WINDOWS -#define __func__ __FUNCTION__ -#endif - -namespace awsbwhal { - // **************** - // Helper functions - // **************** - - void AwsXcl::readDebugIpLayout() - { - if (mIsDebugIpLayoutRead) - return; - - // - // Profiling - addresses and names - // Parsed from debug_ip_layout.rtd contained in xclbin - if (mLogStream.is_open()) { - mLogStream << "debug_ip_layout: reading profile addresses and names..." << std::endl; - } - mMemoryProfilingNumberSlots = getIPCountAddrNames(AXI_MM_MONITOR, mPerfMonBaseAddress, mPerfMonSlotName); - mIsDeviceProfiling = (mMemoryProfilingNumberSlots > 0); - - std::string fifoName; - uint64_t fifoCtrlBaseAddr = mOffsets[XCL_ADDR_SPACE_DEVICE_PERFMON]; - getIPCountAddrNames(AXI_MONITOR_FIFO_LITE, &fifoCtrlBaseAddr, &fifoName); - mPerfMonFifoCtrlBaseAddress = fifoCtrlBaseAddr; - - uint64_t fifoReadBaseAddr = XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL2; - getIPCountAddrNames(AXI_MONITOR_FIFO_FULL, &fifoReadBaseAddr, &fifoName); - mPerfMonFifoReadBaseAddress = fifoReadBaseAddr; - - if (mLogStream.is_open()) { - for (unsigned int i = 0; i < mMemoryProfilingNumberSlots; ++i) { - mLogStream << "debug_ip_layout: AXI_MM_MONITOR slot " << i << ": " - << "base address = 0x" << std::hex << mPerfMonBaseAddress[i] - << ", name = " << mPerfMonSlotName[i] << std::endl; - } - mLogStream << "debug_ip_layout: AXI_MONITOR_FIFO_LITE: " - << "base address = 0x" << std::hex << fifoCtrlBaseAddr << std::endl; - mLogStream << "debug_ip_layout: AXI_MONITOR_FIFO_FULL: " - << "base address = 0x" << std::hex << fifoReadBaseAddr << std::endl; - } - - // Only need to read it once - mIsDebugIpLayoutRead = true; - } - - // Gets the information about the specified IP from the sysfs debug_ip_table. - // The IP types are defined in xclbin.h - uint32_t AwsXcl::getIPCountAddrNames(int type, uint64_t *baseAddress, std::string * portNames) { - debug_ip_layout *map; - std::string path = "/sys/bus/pci/devices/" + mDevUserName + "/debug_ip_layout"; - std::ifstream ifs(path.c_str(), std::ifstream::binary); - uint32_t count = 0; - char buffer[4096]; - if( ifs ) { - //sysfs max file size is 4096 - ifs.read(buffer, 4096); - if (ifs.gcount() > 0) { - map = (debug_ip_layout*)(buffer); - for( unsigned int i = 0; i < map->m_count; i++ ) { - if (map->m_debug_ip_data[i].m_type == type) { - if(baseAddress)baseAddress[count] = map->m_debug_ip_data[i].m_base_address; - if(portNames) portNames[count] = (char*)map->m_debug_ip_data[i].m_name; - ++count; - } - } - } - ifs.close(); - } - return count; - } - - // Read APM performance counters - size_t AwsXcl::xclDebugReadCheckers(xclDebugCheckersResults* aCheckerResults) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() - << ", " << aCheckerResults - << ", Read protocl checker status..." << std::endl; - } - - size_t size = 0; - - uint64_t statusRegisters[] = { - LAPC_OVERALL_STATUS_OFFSET, - - LAPC_CUMULATIVE_STATUS_0_OFFSET, LAPC_CUMULATIVE_STATUS_1_OFFSET, - LAPC_CUMULATIVE_STATUS_2_OFFSET, LAPC_CUMULATIVE_STATUS_3_OFFSET, - - LAPC_SNAPSHOT_STATUS_0_OFFSET, LAPC_SNAPSHOT_STATUS_1_OFFSET, - LAPC_SNAPSHOT_STATUS_2_OFFSET, LAPC_SNAPSHOT_STATUS_3_OFFSET - }; - - uint64_t baseAddress[XLAPC_MAX_NUMBER_SLOTS]; - uint32_t numSlots = getIPCountAddrNames(LAPC, baseAddress, nullptr); - uint32_t temp[XLAPC_STATUS_PER_SLOT]; - aCheckerResults->NumSlots = numSlots; - snprintf(aCheckerResults->DevUserName, 256, "%s", mDevUserName.c_str()); - for (uint32_t s = 0; s < numSlots; ++s) { - for (int c=0; c < XLAPC_STATUS_PER_SLOT; c++) - size += xclRead(XCL_ADDR_SPACE_DEVICE_CHECKER, baseAddress[s]+statusRegisters[c], &temp[c], 4); - - aCheckerResults->OverallStatus[s] = temp[XLAPC_OVERALL_STATUS]; - std::copy(temp+XLAPC_CUMULATIVE_STATUS_0, temp+XLAPC_SNAPSHOT_STATUS_0, aCheckerResults->CumulativeStatus[s]); - std::copy(temp+XLAPC_SNAPSHOT_STATUS_0, temp+XLAPC_STATUS_PER_SLOT, aCheckerResults->SnapshotStatus[s]); - } - - return size; - } - - // Read APM performance counters - - size_t AwsXcl::xclDebugReadCounters(xclDebugCountersResults* aCounterResults) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() - << ", " << XCL_PERF_MON_MEMORY << ", " << aCounterResults - << ", Read device counters..." << std::endl; - } - - size_t size = 0; - - uint64_t spm_offsets[] = { - XSPM_SAMPLE_WRITE_BYTES_OFFSET, - XSPM_SAMPLE_WRITE_TRANX_OFFSET, - XSPM_SAMPLE_READ_BYTES_OFFSET, - XSPM_SAMPLE_READ_TRANX_OFFSET, - XSPM_SAMPLE_OUTSTANDING_COUNTS_OFFSET, - XSPM_SAMPLE_LAST_WRITE_ADDRESS_OFFSET, - XSPM_SAMPLE_LAST_WRITE_DATA_OFFSET, - XSPM_SAMPLE_LAST_READ_ADDRESS_OFFSET, - XSPM_SAMPLE_LAST_READ_DATA_OFFSET - }; - - // Read all metric counters - uint64_t baseAddress[XSPM_MAX_NUMBER_SLOTS]; - uint32_t numSlots = getIPCountAddrNames(AXI_MM_MONITOR, baseAddress, nullptr); - - uint32_t temp[XSPM_DEBUG_SAMPLE_COUNTERS_PER_SLOT]; - - aCounterResults->NumSlots = numSlots; - snprintf(aCounterResults->DevUserName, 256, "%s", mDevUserName.c_str()); - for (uint32_t s=0; s < numSlots; s++) { - uint32_t sampleInterval; - // Read sample interval register to latch the sampled metric counters - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, - baseAddress[s] + XSPM_SAMPLE_OFFSET, - &sampleInterval, 4); - - for (int c=0; c < XSPM_DEBUG_SAMPLE_COUNTERS_PER_SLOT; c++) - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress[s]+spm_offsets[c], &temp[c], 4); - - aCounterResults->WriteBytes[s] = temp[0]; - aCounterResults->WriteTranx[s] = temp[1]; - - aCounterResults->ReadBytes[s] = temp[2]; - aCounterResults->ReadTranx[s] = temp[3]; - aCounterResults->OutStandCnts[s] = temp[4]; - aCounterResults->LastWriteAddr[s] = temp[5]; - aCounterResults->LastWriteData[s] = temp[6]; - aCounterResults->LastReadAddr[s] = temp[7]; - aCounterResults->LastReadData[s] = temp[8]; - } - return size; - } -} // namespace awsbwhal - -size_t xclDebugReadIPStatus(xclDeviceHandle handle, xclDebugReadType type, void* debugResults) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - switch (type) { - case XCL_DEBUG_READ_TYPE_LAPC : - return drv->xclDebugReadCheckers(reinterpret_cast(debugResults)); - case XCL_DEBUG_READ_TYPE_SPM : - return drv->xclDebugReadCounters(reinterpret_cast(debugResults)); - default : - break; - }; - return -1; -} diff --git a/SDAccel/userspace/src2/perf.cpp b/SDAccel/userspace/src2/perf.cpp deleted file mode 100755 index 74a0d994..00000000 --- a/SDAccel/userspace/src2/perf.cpp +++ /dev/null @@ -1,766 +0,0 @@ -/* - * Copyright (C) 2017-2018 Xilinx, Inc - * Performance Monitoring using PCIe for AWS HAL Driver - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -#include "shim.h" -#include "perfmon_parameters.h" -#include "xocl/xocl_ioctl.h" - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef _WINDOWS -#define __func__ __FUNCTION__ -#endif - -namespace awsbwhal { - - static int unmgdPread(int fd, void *buffer, size_t size, uint64_t addr) - { - drm_xocl_pread_unmgd unmgd = { 0, 0, addr, size, reinterpret_cast(buffer) }; - return ioctl(fd, DRM_IOCTL_XOCL_PREAD_UNMGD, &unmgd); - } - - // **************** - // Helper functions - // **************** - - unsigned AwsXcl::getBankCount() { - return mDeviceInfo.mDDRBankCount; - } - - void AwsXcl::xclSetProfilingNumberSlots(xclPerfMonType type, uint32_t numSlots) { - if (type == XCL_PERF_MON_OCL_REGION) - mOclRegionProfilingNumberSlots = numSlots; - } - - // Get host timestamp to write to APM - // IMPORTANT NOTE: this *must* be compatible with the method of generating - // timestamps as defined in RTProfile::getTraceTime() - uint64_t AwsXcl::getHostTraceTimeNsec() { - struct timespec now; - int err; - if ((err = clock_gettime(CLOCK_MONOTONIC, &now)) < 0) - return 0; - - return (uint64_t) now.tv_sec * 1000000000UL + (uint64_t) now.tv_nsec; - } - - uint64_t AwsXcl::getPerfMonBaseAddress(xclPerfMonType type, uint32_t slotNum) { - if (type == XCL_PERF_MON_MEMORY) return mPerfMonBaseAddress[slotNum]; - return 0; - } - - uint64_t AwsXcl::getPerfMonFifoBaseAddress(xclPerfMonType type, uint32_t fifonum) { - if (type == XCL_PERF_MON_MEMORY) return mPerfMonFifoCtrlBaseAddress; - return 0; - } - - uint64_t AwsXcl::getPerfMonFifoReadBaseAddress(xclPerfMonType type, uint32_t fifonum) { - if (type == XCL_PERF_MON_MEMORY) return mPerfMonFifoReadBaseAddress; - return 0; - } - - uint32_t AwsXcl::getPerfMonNumberFifos(xclPerfMonType type) { - if (type == XCL_PERF_MON_MEMORY) - return XPAR_AXI_PERF_MON_0_TRACE_NUMBER_FIFO; - if (type == XCL_PERF_MON_HOST_INTERFACE) - return XPAR_AXI_PERF_MON_1_TRACE_NUMBER_FIFO; - if (type == XCL_PERF_MON_OCL_REGION) { - if (mOclRegionProfilingNumberSlots > 4) - return 3; - else - return 2; - } - return 0; - } - - uint32_t AwsXcl::getPerfMonNumberSlots(xclPerfMonType type) { - if (type == XCL_PERF_MON_MEMORY) { - return (getBankCount() + 1); - } - if (type == XCL_PERF_MON_HOST_INTERFACE) { - return XPAR_AXI_PERF_MON_1_NUMBER_SLOTS; - } - if (type == XCL_PERF_MON_OCL_REGION) { - return mOclRegionProfilingNumberSlots; - } - return 1; - } - - uint32_t AwsXcl::getPerfMonNumberSamples(xclPerfMonType type) { - if (type == XCL_PERF_MON_MEMORY) return XPAR_AXI_PERF_MON_0_TRACE_NUMBER_SAMPLES; - if (type == XCL_PERF_MON_HOST_INTERFACE) return XPAR_AXI_PERF_MON_1_TRACE_NUMBER_SAMPLES; - if (type == XCL_PERF_MON_OCL_REGION) return XPAR_AXI_PERF_MON_2_TRACE_NUMBER_SAMPLES; - return 0; - } - - uint32_t AwsXcl::getPerfMonByteScaleFactor(xclPerfMonType type) { - return 1; - } - - uint8_t AwsXcl::getPerfMonShowIDS(xclPerfMonType type) { - if (type == XCL_PERF_MON_MEMORY) { - if (getBankCount() > 1) - return XPAR_AXI_PERF_MON_0_SHOW_AXI_IDS_2DDR; - return XPAR_AXI_PERF_MON_0_SHOW_AXI_IDS; - } - if (type == XCL_PERF_MON_HOST_INTERFACE) { - return XPAR_AXI_PERF_MON_1_SHOW_AXI_IDS; - } - if (type == XCL_PERF_MON_OCL_REGION) { - return XPAR_AXI_PERF_MON_2_SHOW_AXI_IDS; - } - return 0; - } - - uint8_t AwsXcl::getPerfMonShowLEN(xclPerfMonType type) { - if (type == XCL_PERF_MON_MEMORY) { - if (getBankCount() > 1) - return XPAR_AXI_PERF_MON_0_SHOW_AXI_LEN_2DDR; - return XPAR_AXI_PERF_MON_0_SHOW_AXI_LEN; - } - if (type == XCL_PERF_MON_HOST_INTERFACE) { - return XPAR_AXI_PERF_MON_1_SHOW_AXI_LEN; - } - if (type == XCL_PERF_MON_OCL_REGION) { - return XPAR_AXI_PERF_MON_2_SHOW_AXI_LEN; - } - return 0; - } - - uint32_t AwsXcl::getPerfMonSlotStartBit(xclPerfMonType type, uint32_t slotnum) { - // NOTE: ID widths also set to 5 in HEAD/data/sdaccel/board_support/alpha_data/common/xclplat/xclplat_ip.tcl - uint32_t bitsPerID = 5; - uint8_t showIDs = getPerfMonShowIDS(type); - uint8_t showLen = getPerfMonShowLEN(type); - uint32_t bitsPerSlot = 10 + (bitsPerID * 4 * showIDs) + (16 * showLen); - return (18 + (bitsPerSlot * slotnum)); - } - - uint32_t AwsXcl::getPerfMonSlotDataWidth(xclPerfMonType type, uint32_t slotnum) { - if (slotnum == 0) return XPAR_AXI_PERF_MON_0_SLOT0_DATA_WIDTH; - if (slotnum == 1) return XPAR_AXI_PERF_MON_0_SLOT1_DATA_WIDTH; - if (slotnum == 2) return XPAR_AXI_PERF_MON_0_SLOT2_DATA_WIDTH; - if (slotnum == 3) return XPAR_AXI_PERF_MON_0_SLOT3_DATA_WIDTH; - if (slotnum == 4) return XPAR_AXI_PERF_MON_0_SLOT4_DATA_WIDTH; - if (slotnum == 5) return XPAR_AXI_PERF_MON_0_SLOT5_DATA_WIDTH; - if (slotnum == 6) return XPAR_AXI_PERF_MON_0_SLOT6_DATA_WIDTH; - if (slotnum == 7) return XPAR_AXI_PERF_MON_0_SLOT7_DATA_WIDTH; - return XPAR_AXI_PERF_MON_0_SLOT0_DATA_WIDTH; - } - - // Get the device clock frequency (in MHz) - double AwsXcl::xclGetDeviceClockFreqMHz() { - unsigned clockFreq = mDeviceInfo.mOCLFrequency[0]; - if (clockFreq == 0) - clockFreq = 200; - - //if (mLogStream.is_open()) - // mLogStream << __func__ << ": clock freq = " << clockFreq << std::endl; - return ((double)clockFreq); - } - - // Get the maximum bandwidth for host reads from the device (in MB/sec) - // NOTE: for now, set to: (256/8 bytes) * 300 MHz = 9600 MBps - double AwsXcl::xclGetReadMaxBandwidthMBps() { - return 9600.0; - } - - // Get the maximum bandwidth for host writes to the device (in MB/sec) - // NOTE: for now, set to: (256/8 bytes) * 300 MHz = 9600 MBps - double AwsXcl::xclGetWriteMaxBandwidthMBps() { - return 9600.0; - } - - // Convert binary string to decimal - uint32_t AwsXcl::bin2dec(std::string str, int start, int number) { - return bin2dec(str.c_str(), start, number); - } - - // Convert binary char * to decimal - uint32_t AwsXcl::bin2dec(const char* ptr, int start, int number) { - const char* temp_ptr = ptr + start; - uint32_t value = 0; - int i = 0; - - do { - if (*temp_ptr != '0' && *temp_ptr!= '1') - return value; - value <<= 1; - if(*temp_ptr=='1') - value += 1; - i++; - temp_ptr++; - } while (i < number); - - return value; - } - - // Convert decimal to binary string - // NOTE: length of string is always sizeof(uint32_t) * 8 - std::string AwsXcl::dec2bin(uint32_t n) { - char result[(sizeof(uint32_t) * 8) + 1]; - unsigned index = sizeof(uint32_t) * 8; - result[index] = '\0'; - - do { - result[ --index ] = '0' + (n & 1); - } while (n >>= 1); - - for (int i=index-1; i >= 0; --i) - result[i] = '0'; - - return std::string( result ); - } - - // Convert decimal to binary string of length bits - std::string AwsXcl::dec2bin(uint32_t n, unsigned bits) { - char result[bits + 1]; - unsigned index = bits; - result[index] = '\0'; - - do result[ --index ] = '0' + (n & 1); - while (n >>= 1); - - for (int i=index-1; i >= 0; --i) - result[i] = '0'; - - return std::string( result ); - } - - // Reset all APM trace AXI stream FIFOs - size_t AwsXcl::resetFifos(xclPerfMonType type) { - uint64_t resetCoreAddress[] = { - getPerfMonFifoBaseAddress(type, 0) + AXI_FIFO_SRR, - getPerfMonFifoBaseAddress(type, 1) + AXI_FIFO_SRR, - getPerfMonFifoBaseAddress(type, 2) + AXI_FIFO_SRR - }; - - uint64_t resetFifoAddress[] = { - getPerfMonFifoBaseAddress(type, 0) + AXI_FIFO_RDFR, - getPerfMonFifoBaseAddress(type, 1) + AXI_FIFO_RDFR, - getPerfMonFifoBaseAddress(type, 2) + AXI_FIFO_RDFR - }; - - size_t size = 0; - uint32_t regValue = AXI_FIFO_RESET_VALUE; - - for (int f=0; f < XPAR_AXI_PERF_MON_0_TRACE_NUMBER_FIFO; f++) { - size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, resetCoreAddress[f], ®Value, 4); - size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, resetFifoAddress[f], ®Value, 4); - } - - return size; - } - - // ******** - // Counters - // ******** - - // Start device counters performance monitoring - size_t AwsXcl::xclPerfMonStartCounters(xclPerfMonType type) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " - << type << ", Start device counters..." << std::endl; - } - - // Update addresses for debug/profile IP - readDebugIpLayout(); - - if (!mIsDeviceProfiling) - return 0; - - size_t size = 0; - uint32_t regValue; - uint64_t baseAddress; - uint32_t numSlots = getPerfMonNumberSlots(type); - - for (uint32_t i = 0; i < numSlots; i++) { - baseAddress = getPerfMonBaseAddress(type, i); - - // 1. Reset AXI - MM monitor metric counters - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_CONTROL_OFFSET, ®Value, 4); - - regValue = regValue | XSPM_CR_COUNTER_RESET_MASK; - size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_CONTROL_OFFSET, ®Value, 4); - - regValue = regValue & ~(XSPM_CR_COUNTER_RESET_MASK); - size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_CONTROL_OFFSET, ®Value, 4); - - // 2. Start AXI-MM monitor metric counters - regValue = regValue | XSPM_CR_COUNTER_ENABLE_MASK; - size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_CONTROL_OFFSET, ®Value, 4); - - // 3. Read from sample register to ensure total time is read again at end - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_SAMPLE_OFFSET, ®Value, 4); - } - return size; - } - - // Stop both profile and trace performance monitoring - size_t AwsXcl::xclPerfMonStopCounters(xclPerfMonType type) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " - << type << ", Stop and reset device counters..." << std::endl; - } - - if (!mIsDeviceProfiling) - return 0; - - size_t size = 0; - uint32_t regValue; - uint64_t baseAddress; - uint32_t numSlots = getPerfMonNumberSlots(type); - - for (uint32_t i = 0; i < numSlots; i++) { - baseAddress = getPerfMonBaseAddress(type, i); - - // 1. Stop SPM metric counters - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_CONTROL_OFFSET, ®Value, 4); - - regValue = regValue & ~(XSPM_CR_COUNTER_ENABLE_MASK); - size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XSPM_CONTROL_OFFSET, ®Value, 4); - } - return size; - } - - // Read APM performance counters - size_t AwsXcl::xclPerfMonReadCounters(xclPerfMonType type, xclCounterResults& counterResults) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() - << ", " << type << ", " << &counterResults - << ", Read device counters..." << std::endl; - } - - // Initialize all values in struct to 0 - memset(&counterResults, 0, sizeof(xclCounterResults)); - - if (!mIsDeviceProfiling) - return 0; - - size_t size = 0; - uint64_t baseAddress; - uint32_t sampleInterval; - uint32_t numSlots = getPerfMonNumberSlots(type); - - for (uint32_t s = 0; s < numSlots; s++) { - baseAddress = getPerfMonBaseAddress(type, s); - - // Read sample interval register - // NOTE: this also latches the sampled metric counters - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, - baseAddress + XSPM_SAMPLE_OFFSET, - &sampleInterval, 4); - // Need to do this for every xilmon - if (s == 0) { - counterResults.SampleIntervalUsec = sampleInterval / xclGetDeviceClockFreqMHz(); - } - - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, - baseAddress + XSPM_SAMPLE_WRITE_BYTES_OFFSET, - &counterResults.WriteBytes[s], 4); - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, - baseAddress + XSPM_SAMPLE_WRITE_TRANX_OFFSET, - &counterResults.WriteTranx[s], 4); - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, - baseAddress + XSPM_SAMPLE_WRITE_LATENCY_OFFSET, - &counterResults.WriteLatency[s], 4); - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, - baseAddress + XSPM_SAMPLE_READ_BYTES_OFFSET, - &counterResults.ReadBytes[s], 4); - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, - baseAddress + XSPM_SAMPLE_READ_TRANX_OFFSET, - &counterResults.ReadTranx[s], 4); - size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, - baseAddress + XSPM_SAMPLE_READ_LATENCY_OFFSET, - &counterResults.ReadLatency[s], 4); - - if (mLogStream.is_open()) { - mLogStream << "Reading ...SlotNum : " << s << std::endl; - mLogStream << "Reading ...WriteBytes : " << counterResults.WriteBytes[s] << std::endl; - mLogStream << "Reading ...WriteTranx : " << counterResults.WriteTranx[s] << std::endl; - mLogStream << "Reading ...WriteLatency : " << counterResults.WriteLatency[s] << std::endl; - mLogStream << "Reading ...ReadBytes : " << counterResults.ReadBytes[s] << std::endl; - mLogStream << "Reading ...ReadTranx : " << counterResults.ReadTranx[s] << std::endl; - mLogStream << "Reading ...ReadLatency : " << counterResults.ReadLatency[s] << std::endl; - } - } - return size; - } - - // ***** - // Trace - // ***** - - // Clock training used in converting device trace timestamps to host domain - size_t AwsXcl::xclPerfMonClockTraining(xclPerfMonType type) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " - << type << ", Send clock training..." << std::endl; - } - // We're snapping first event to start of cu. - return 1; - } - - // Start trace performance monitoring - size_t AwsXcl::xclPerfMonStartTrace(xclPerfMonType type, uint32_t startTrigger) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() - << ", " << type << ", " << startTrigger - << ", Start device tracing..." << std::endl; - } - - // Update addresses for debug/profile IP - readDebugIpLayout(); - - if (!mIsDeviceProfiling) - return 0; - - size_t size = 0; - xclPerfMonGetTraceCount(type); - size += resetFifos(type); - xclPerfMonGetTraceCount(type); - return size; - } - - // Stop trace performance monitoring - size_t AwsXcl::xclPerfMonStopTrace(xclPerfMonType type) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " - << type << ", Stop and reset device tracing..." << std::endl; - } - - if (!mIsDeviceProfiling) - return 0; - - size_t size = 0; - xclPerfMonGetTraceCount(type); - size += resetFifos(type); - return size; - } - - // Get trace word count - uint32_t AwsXcl::xclPerfMonGetTraceCount(xclPerfMonType type) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() - << ", " << type << std::endl; - } - - if (!mIsDeviceProfiling) - return 0; - - xclAddressSpace addressSpace = (type == XCL_PERF_MON_OCL_REGION) ? - XCL_ADDR_KERNEL_CTRL : XCL_ADDR_SPACE_DEVICE_PERFMON; - - uint32_t fifoCount = 0; - uint32_t numSamples = 0; - uint32_t numBytes = 0; - xclRead(addressSpace, getPerfMonFifoBaseAddress(type, 0) + AXI_FIFO_RLR, &fifoCount, 4); - // Read bits 22:0 per AXI-Stream FIFO product guide (PG080, 10/1/14) - numBytes = fifoCount & 0x7FFFFF; - numSamples = numBytes / (XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH / 8); - - if (mLogStream.is_open()) { - mLogStream << " No. of trace samples = " << std::dec << numSamples - << " (fifoCount = 0x" << std::hex << fifoCount << ")" << std::dec << std::endl; - } - - return numSamples; - } - - // Read all values from APM trace AXI stream FIFOs - size_t AwsXcl::xclPerfMonReadTrace(xclPerfMonType type, xclTraceResultsVector& traceVector) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() - << ", " << type << ", " << &traceVector - << ", Reading device trace stream..." << std::endl; - } - - traceVector.mLength = 0; - if (!mIsDeviceProfiling) - return 0; - - uint32_t numSamples = xclPerfMonGetTraceCount(type); - if (numSamples == 0) - return 0; - - uint64_t fifoReadAddress[] = { 0, 0, 0 }; - if (type == XCL_PERF_MON_MEMORY) { - fifoReadAddress[0] = getPerfMonFifoReadBaseAddress(type, 0) + AXI_FIFO_RDFD_AXI_FULL; - } - else { - for (int i = 0; i < 3; i++) - fifoReadAddress[i] = getPerfMonFifoReadBaseAddress(type, i) + AXI_FIFO_RDFD; - } - - size_t size = 0; - - // Limit to max number of samples so we don't overrun trace buffer on host - uint32_t maxSamples = getPerfMonNumberSamples(type); - numSamples = (numSamples > maxSamples) ? maxSamples : numSamples; - traceVector.mLength = numSamples; - - const uint32_t bytesPerSample = (XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH / 8); - const uint32_t wordsPerSample = (XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH / 32); - //uint32_t numBytes = numSamples * bytesPerSample; - uint32_t numWords = numSamples * wordsPerSample; - - // Create trace buffer on host (requires alignment) - const int BUFFER_BYTES = MAX_TRACE_NUMBER_SAMPLES * bytesPerSample; - const int BUFFER_WORDS = MAX_TRACE_NUMBER_SAMPLES * wordsPerSample; -#if GCC_VERSION >= 40800 - alignas(AXI_FIFO_RDFD_AXI_FULL)uint32_t hostbuf[BUFFER_WORDS]; -#else - AlignedAllocator alignedBuffer(AXI_FIFO_RDFD_AXI_FULL, BUFFER_WORDS); - uint32_t* hostbuf = alignedBuffer.getBuffer(); -#endif - memset((void *)hostbuf, 0, BUFFER_BYTES); - - // Iterate over chunks - // NOTE: AXI limits this to 4K bytes per transfer - uint32_t chunkSizeWords = 256 * wordsPerSample; - if (chunkSizeWords > 1024) chunkSizeWords = 1024; - uint32_t chunkSizeBytes = 4 * chunkSizeWords; - uint32_t words = 0; - - // Read trace a chunk of bytes at a time - if (numWords > chunkSizeWords) { - for (; words < (numWords - chunkSizeWords); words += chunkSizeWords) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ": reading " << chunkSizeBytes << " bytes from 0x" - << std::hex << fifoReadAddress[0] << " and writing it to 0x" - << (void *)(hostbuf + words) << std::dec << std::endl; - } - - if (awsbwhal::unmgdPread(mUserHandle, (void *)(hostbuf + words), chunkSizeBytes, fifoReadAddress[0]) < 0) - return 0; - - size += chunkSizeBytes; - } - } - - // Read remainder of trace not divisible by chunk size - if (words < numWords) { - chunkSizeBytes = 4 * (numWords - words); - - if (mLogStream.is_open()) { - mLogStream << __func__ << ": reading " << chunkSizeBytes << " bytes from 0x" - << std::hex << fifoReadAddress[0] << " and writing it to 0x" - << (void *)(hostbuf + words) << std::dec << std::endl; - } - - if (awsbwhal::unmgdPread(mUserHandle, (void *)(hostbuf + words), chunkSizeBytes, fifoReadAddress[0]) < 0) - return 0; - - size += chunkSizeBytes; - } - - if (mLogStream.is_open()) { - mLogStream << __func__ << ": done reading " << size << " bytes " << std::endl; - } - - // ****************************** - // Read & process all trace FIFOs - // ****************************** - for (uint32_t wordnum = 0; wordnum < numSamples; wordnum++) { - uint32_t index = wordsPerSample * wordnum; - xclTraceResults results; - uint64_t temp = 0; - - temp = *(hostbuf + index) | (uint64_t)*(hostbuf + index + 1) << 32; - if (!temp) - continue; - - // Initialize result to 0 - memset(&results, 0, sizeof(xclTraceResults)); - // SDSoC Packet Format - results.Timestamp = temp & 0x1FFFFFFFFFFF; - results.EventType = ((temp >> 45) & 0xF) ? XCL_PERF_MON_END_EVENT : - XCL_PERF_MON_START_EVENT; - results.TraceID = (temp >> 49) & 0xFFF; - results.Reserved = (temp >> 61) & 0x1; - results.Overflow = (temp >> 62) & 0x1; - results.Error = (temp >> 63) & 0x1; - results.EventID = XCL_PERF_MON_HW_EVENT; - traceVector.mArray[wordnum] = results; - - if (mLogStream.is_open()) { - mLogStream << " Trace sample " << std::dec << wordnum << ": "; - mLogStream << dec2bin(uint32_t(temp >> 32)) << " " << dec2bin(uint32_t(temp & 0xFFFFFFFF)); - mLogStream << std::endl; - mLogStream << " Timestamp : " << results.Timestamp << " "; - mLogStream << "Event Type : " << results.EventType << " "; - mLogStream << "slotID : " << results.TraceID << " "; - mLogStream << "Start, Stop : " << static_cast(results.Reserved) << " "; - mLogStream << "Overflow : " << static_cast(results.Overflow) << " "; - mLogStream << "Error : " << static_cast(results.Error) << " "; - mLogStream << std::endl; - } - } - - return size; - } // end xclPerfMonReadTrace - -} // namespace awsbwhal - - -size_t xclPerfMonStartCounters(xclDeviceHandle handle, xclPerfMonType type) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclPerfMonStartCounters(type); -} - - -size_t xclPerfMonStopCounters(xclDeviceHandle handle, xclPerfMonType type) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclPerfMonStopCounters(type); -} - - -size_t xclPerfMonReadCounters(xclDeviceHandle handle, xclPerfMonType type, xclCounterResults& counterResults) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclPerfMonReadCounters(type, counterResults); -} - - -size_t xclPerfMonClockTraining(xclDeviceHandle handle, xclPerfMonType type) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclPerfMonClockTraining(type); -} - - -size_t xclPerfMonStartTrace(xclDeviceHandle handle, xclPerfMonType type, uint32_t startTrigger) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclPerfMonStartTrace(type, startTrigger); -} - - -size_t xclPerfMonStopTrace(xclDeviceHandle handle, xclPerfMonType type) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclPerfMonStopTrace(type); -} - - -uint32_t xclPerfMonGetTraceCount(xclDeviceHandle handle, xclPerfMonType type) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclPerfMonGetTraceCount(type); -} - - -size_t xclPerfMonReadTrace(xclDeviceHandle handle, xclPerfMonType type, xclTraceResultsVector& traceVector) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclPerfMonReadTrace(type, traceVector); -} - - -double xclGetDeviceClockFreqMHz(xclDeviceHandle handle) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return 0.0; - return drv->xclGetDeviceClockFreqMHz(); -} - - -double xclGetReadMaxBandwidthMBps(xclDeviceHandle handle) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return 0.0; - return drv->xclGetReadMaxBandwidthMBps(); -} - - -double xclGetWriteMaxBandwidthMBps(xclDeviceHandle handle) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return 0.0; - return drv->xclGetWriteMaxBandwidthMBps(); -} - - -size_t xclGetDeviceTimestamp(xclDeviceHandle handle) -{ - return 0; -} - - -void xclSetProfilingNumberSlots(xclDeviceHandle handle, xclPerfMonType type, uint32_t numSlots) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return; - return drv->xclSetProfilingNumberSlots(type, numSlots); -} - - -uint32_t xclGetProfilingNumberSlots(xclDeviceHandle handle, xclPerfMonType type) -{ - return 2; -} - - -void xclGetProfilingSlotName(xclDeviceHandle handle, xclPerfMonType type, uint32_t slotnum, - char* slotName, uint32_t length) -{ - const char* name = (slotnum == XPAR_SPM0_HOST_SLOT) ? "Host" : "Kernels"; - strncpy(slotName, name, length); -} - - -void xclWriteHostEvent(xclDeviceHandle handle, xclPerfMonEventType type, - xclPerfMonEventID id) -{ - // don't do anything -} - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/SDAccel/userspace/src2/perfmon_parameters.h b/SDAccel/userspace/src2/perfmon_parameters.h deleted file mode 100755 index 4a2f417b..00000000 --- a/SDAccel/userspace/src2/perfmon_parameters.h +++ /dev/null @@ -1,349 +0,0 @@ -/* - * Copyright (C) 2018 Xilinx, Inc - * Performance Monitoring Internal Parameters using PCIe for AWS HAL Driver. - * NOTE: partially taken from file xaxipmon_hw.h in v5.0 of APM driver - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -#ifndef _PERFMON_PARAMETERS_H -#define _PERFMON_PARAMETERS_H - -#define PERFMON0_OFFSET 0x100000 -#define PERFMON1_OFFSET 0x120000 -#define PERFMON2_OFFSET 0x010000 - -/************************ AXI Stream FIFOs ************************************/ - -/* Address offsets in core */ -#define AXI_FIFO_RDFR 0x18 -#define AXI_FIFO_RDFO 0x1c -#define AXI_FIFO_RDFD 0x20 -#define AXI_FIFO_RDFD_AXI_FULL 0x1000 -#define AXI_FIFO_TDFD 0x10 -#define AXI_FIFO_RLR 0x24 -#define AXI_FIFO_SRR 0x28 -#define AXI_FIFO_RESET_VALUE 0xA5 - -/************************ SDx Performance Monitor(SPM) ************************/ - -/* Address offsets in core */ -#define XSPM_CONTROL_OFFSET 0x08 -#define XSPM_TRACE_CTRL_OFFSET 0x10 -#define XSPM_EVENT_OFFSET 0x18 -#define XSPM_SAMPLE_OFFSET 0x20 -#define XSPM_FIFO_COUNTS_OFFSET 0x28 -#define XSPM_FIFO_READ_COUNTS_OFFSET 0x30 -#define XSPM_WRITE_BYTES_OFFSET 0x40 -#define XSPM_WRITE_TRANX_OFFSET 0x44 -#define XSPM_WRITE_LATENCY_OFFSET 0x48 -#define XSPM_READ_BYTES_OFFSET 0x4C -#define XSPM_READ_TRANX_OFFSET 0x50 -#define XSPM_READ_LATENCY_OFFSET 0x54 -//#define XSPM_MIN_MAX_WRITE_LATENCY_OFFSET 0x58 -//#define XSPM_MIN_MAX_READ_LATENCY_OFFSET 0x5C -#define XSPM_OUTSTANDING_COUNTS_OFFSET 0x58 -#define XSPM_LAST_WRITE_ADDRESS_OFFSET 0x5C -#define XSPM_LAST_WRITE_DATA_OFFSET 0x60 -#define XSPM_LAST_READ_ADDRESS_OFFSET 0x64 -#define XSPM_LAST_READ_DATA_OFFSET 0x68 -#define XSPM_SAMPLE_WRITE_BYTES_OFFSET 0x80 -#define XSPM_SAMPLE_WRITE_TRANX_OFFSET 0x84 -#define XSPM_SAMPLE_WRITE_LATENCY_OFFSET 0x88 -#define XSPM_SAMPLE_READ_BYTES_OFFSET 0x8C -#define XSPM_SAMPLE_READ_TRANX_OFFSET 0x90 -#define XSPM_SAMPLE_READ_LATENCY_OFFSET 0x94 -//#define XSPM_SAMPLE_MIN_MAX_WRITE_LATENCY_OFFSET 0x98 -//#define XSPM_SAMPLE_MIN_MAX_READ_LATENCY_OFFSET 0x9C -#define XSPM_SAMPLE_OUTSTANDING_COUNTS_OFFSET 0x98 -#define XSPM_SAMPLE_LAST_WRITE_ADDRESS_OFFSET 0x9C -#define XSPM_SAMPLE_LAST_WRITE_DATA_OFFSET 0xA0 -#define XSPM_SAMPLE_LAST_READ_ADDRESS_OFFSET 0xA4 -#define XSPM_SAMPLE_LAST_READ_DATA_OFFSET 0xA8 - -/* SPM Control Register masks */ -#define XSPM_CR_RESET_ON_SAMPLE_MASK 0x00000010 -#define XSPM_CR_FIFO_RESET_MASK 0x00000008 -#define XSPM_CR_TRACE_ENABLE_MASK 0x00000004 -#define XSPM_CR_COUNTER_RESET_MASK 0x00000002 -#define XSPM_CR_COUNTER_ENABLE_MASK 0x00000001 - -/************************ APM Constant Definitions ****************************/ - -/* Register offsets of AXIMONITOR in the Device Config */ - -#define XAPM_GCC_HIGH_OFFSET 0x0000 /**< Global Clock Counter 32 to 63 bits */ -#define XAPM_GCC_LOW_OFFSET 0x0004 /**< Global Clock Counter Lower 0-31 bits */ -#define XAPM_SI_HIGH_OFFSET 0x0020 /**< Sample Interval MSB */ -#define XAPM_SI_LOW_OFFSET 0x0024 /**< Sample Interval LSB */ -#define XAPM_SICR_OFFSET 0x0028 /**< Sample Interval Control Register */ -#define XAPM_SR_OFFSET 0x002C /**< Sample Register */ -#define XAPM_GIE_OFFSET 0x0030 /**< Global Interrupt Enable Register */ -#define XAPM_IE_OFFSET 0x0034 /**< Interrupt Enable Register */ -#define XAPM_IS_OFFSET 0x0038 /**< Interrupt Status Register */ - -#define XAPM_MSR0_OFFSET 0x0044 /**< Metric Selector 0 Register */ -#define XAPM_MSR1_OFFSET 0x0048 /**< Metric Selector 1 Register */ -#define XAPM_MSR2_OFFSET 0x004C /**< Metric Selector 2 Register */ - -#define XAPM_MC0_OFFSET 0x0100 /**< Metric Counter 0 Register */ -#define XAPM_INC0_OFFSET 0x0104 /**< Incrementer 0 Register */ -#define XAPM_RANGE0_OFFSET 0x0108 /**< Range 0 Register */ -#define XAPM_MC0LOGEN_OFFSET 0x010C /**< Metric Counter 0 Log Enable Register */ -#define XAPM_MC1_OFFSET 0x0110 /**< Metric Counter 1 Register */ -#define XAPM_INC1_OFFSET 0x0114 /**< Incrementer 1 Register */ -#define XAPM_RANGE1_OFFSET 0x0118 /**< Range 1 Register */ -#define XAPM_MC1LOGEN_OFFSET 0x011C /**< Metric Counter 1 Log Enable Register */ -#define XAPM_MC2_OFFSET 0x0120 /**< Metric Counter 2 Register */ -#define XAPM_INC2_OFFSET 0x0124 /**< Incrementer 2 Register */ -#define XAPM_RANGE2_OFFSET 0x0128 /**< Range 2 Register */ -#define XAPM_MC2LOGEN_OFFSET 0x012C /**< Metric Counter 2 Log Enable Register */ -#define XAPM_MC3_OFFSET 0x0130 /**< Metric Counter 3 Register */ -#define XAPM_INC3_OFFSET 0x0134 /**< Incrementer 3 Register */ -#define XAPM_RANGE3_OFFSET 0x0138 /**< Range 3 Register */ -#define XAPM_MC3LOGEN_OFFSET 0x013C /**< Metric Counter 3 Log Enable Register */ -#define XAPM_MC4_OFFSET 0x0140 /**< Metric Counter 4 Register */ -#define XAPM_INC4_OFFSET 0x0144 /**< Incrementer 4 Register */ -#define XAPM_RANGE4_OFFSET 0x0148 /**< Range 4 Register */ -#define XAPM_MC4LOGEN_OFFSET 0x014C /**< Metric Counter 4 Log Enable Register */ -#define XAPM_MC5_OFFSET 0x0150 /**< Metric Counter 5 Register */ -#define XAPM_INC5_OFFSET 0x0154 /**< Incrementer 5 Register */ -#define XAPM_RANGE5_OFFSET 0x0158 /**< Range 5 Register */ -#define XAPM_MC5LOGEN_OFFSET 0x015C /**< Metric Counter 5 Log Enable Register */ -#define XAPM_MC6_OFFSET 0x0160 /**< Metric Counter 6 Register */ -#define XAPM_INC6_OFFSET 0x0164 /**< Incrementer 6 Register */ -#define XAPM_RANGE6_OFFSET 0x0168 /**< Range 6 Register */ -#define XAPM_MC6LOGEN_OFFSET 0x016C /**< Metric Counter 6 Log Enable Register */ -#define XAPM_MC7_OFFSET 0x0170 /**< Metric Counter 7 Register */ -#define XAPM_INC7_OFFSET 0x0174 /**< Incrementer 7 Register */ -#define XAPM_RANGE7_OFFSET 0x0178 /**< Range 7 Register */ -#define XAPM_MC7LOGEN_OFFSET 0x017C /**< Metric Counter 7 Log Enable Register */ -#define XAPM_MC8_OFFSET 0x0180 /**< Metric Counter 8 Register */ -#define XAPM_INC8_OFFSET 0x0184 /**< Incrementer 8 Register */ -#define XAPM_RANGE8_OFFSET 0x0188 /**< Range 8 Register */ -#define XAPM_MC8LOGEN_OFFSET 0x018C /**< Metric Counter 8 Log Enable Register */ -#define XAPM_MC9_OFFSET 0x0190 /**< Metric Counter 9 Register */ -#define XAPM_INC9_OFFSET 0x0194 /**< Incrementer 9 Register */ -#define XAPM_RANGE9_OFFSET 0x0198 /**< Range 9 Register */ -#define XAPM_MC9LOGEN_OFFSET 0x019C /**< Metric Counter 9 Log Enable Register */ - -#define XAPM_SMC0_OFFSET 0x0200 /**< Sampled Metric Counter 0 Register */ -#define XAPM_SINC0_OFFSET 0x0204 /**< Sampled Incrementer 0 Register */ -#define XAPM_SMC1_OFFSET 0x0210 /**< Sampled Metric Counter 1 Register */ -#define XAPM_SINC1_OFFSET 0x0214 /**< Sampled Incrementer 1 Register */ -#define XAPM_SMC2_OFFSET 0x0220 /**< Sampled Metric Counter 2 Register */ -#define XAPM_SINC2_OFFSET 0x0224 /**< Sampled Incrementer 2 Register */ -#define XAPM_SMC3_OFFSET 0x0230 /**< Sampled Metric Counter 3 Register */ -#define XAPM_SINC3_OFFSET 0x0234 /**< Sampled Incrementer 3 Register */ -#define XAPM_SMC4_OFFSET 0x0240 /**< Sampled Metric Counter 4 Register */ -#define XAPM_SINC4_OFFSET 0x0244 /**< Sampled Incrementer 4 Register */ -#define XAPM_SMC5_OFFSET 0x0250 /**< Sampled Metric Counter 5 Register */ -#define XAPM_SINC5_OFFSET 0x0254 /**< Sampled Incrementer 5 Register */ -#define XAPM_SMC6_OFFSET 0x0260 /**< Sampled Metric Counter 6 Register */ -#define XAPM_SINC6_OFFSET 0x0264 /**< Sampled Incrementer 6 Register */ -#define XAPM_SMC7_OFFSET 0x0270 /**< Sampled Metric Counter 7 Register */ -#define XAPM_SINC7_OFFSET 0x0274 /**< Sampled Incrementer 7 Register */ -#define XAPM_SMC8_OFFSET 0x0280 /**< Sampled Metric Counter 8 Register */ -#define XAPM_SINC8_OFFSET 0x0284 /**< Sampled Incrementer 8 Register */ -#define XAPM_SMC9_OFFSET 0x0290 /**< Sampled Metric Counter 9 Register */ -#define XAPM_SINC9_OFFSET 0x0294 /**< Sampled Incrementer 9 Register */ - -#define XAPM_MC10_OFFSET 0x01A0 /**< Metric Counter 10 Register */ -#define XAPM_MC11_OFFSET 0x01B0 /**< Metric Counter 11 Register */ -#define XAPM_MC12_OFFSET 0x0500 /**< Metric Counter 12 Register */ -#define XAPM_MC13_OFFSET 0x0510 /**< Metric Counter 13 Register */ -#define XAPM_MC14_OFFSET 0x0520 /**< Metric Counter 14Register */ -#define XAPM_MC15_OFFSET 0x0530 /**< Metric Counter 15 Register */ -#define XAPM_MC16_OFFSET 0x0540 /**< Metric Counter 16 Register */ -#define XAPM_MC17_OFFSET 0x0550 /**< Metric Counter 17 Register */ -#define XAPM_MC18_OFFSET 0x0560 /**< Metric Counter 18 Register */ -#define XAPM_MC19_OFFSET 0x0570 /**< Metric Counter 19 Register */ -#define XAPM_MC20_OFFSET 0x0580 /**< Metric Counter 20 Register */ -#define XAPM_MC21_OFFSET 0x0590 /**< Metric Counter 21 Register */ -#define XAPM_MC22_OFFSET 0x05A0 /**< Metric Counter 22 Register */ -#define XAPM_MC23_OFFSET 0x05B0 /**< Metric Counter 23 Register */ -#define XAPM_MC24_OFFSET 0x0700 /**< Metric Counter 24 Register */ -#define XAPM_MC25_OFFSET 0x0710 /**< Metric Counter 25 Register */ -#define XAPM_MC26_OFFSET 0x0720 /**< Metric Counter 26 Register */ -#define XAPM_MC27_OFFSET 0x0730 /**< Metric Counter 27 Register */ -#define XAPM_MC28_OFFSET 0x0740 /**< Metric Counter 28 Register */ -#define XAPM_MC29_OFFSET 0x0750 /**< Metric Counter 29 Register */ -#define XAPM_MC30_OFFSET 0x0760 /**< Metric Counter 30 Register */ -#define XAPM_MC31_OFFSET 0x0770 /**< Metric Counter 31 Register */ -#define XAPM_MC32_OFFSET 0x0780 /**< Metric Counter 32 Register */ -#define XAPM_MC33_OFFSET 0x0790 /**< Metric Counter 33 Register */ -#define XAPM_MC34_OFFSET 0x07A0 /**< Metric Counter 34 Register */ -#define XAPM_MC35_OFFSET 0x07B0 /**< Metric Counter 35 Register */ -#define XAPM_MC36_OFFSET 0x0900 /**< Metric Counter 36 Register */ -#define XAPM_MC37_OFFSET 0x0910 /**< Metric Counter 37 Register */ -#define XAPM_MC38_OFFSET 0x0920 /**< Metric Counter 38 Register */ -#define XAPM_MC39_OFFSET 0x0930 /**< Metric Counter 39 Register */ -#define XAPM_MC40_OFFSET 0x0940 /**< Metric Counter 40 Register */ -#define XAPM_MC41_OFFSET 0x0950 /**< Metric Counter 41 Register */ -#define XAPM_MC42_OFFSET 0x0960 /**< Metric Counter 42 Register */ -#define XAPM_MC43_OFFSET 0x0970 /**< Metric Counter 43 Register */ -#define XAPM_MC44_OFFSET 0x0980 /**< Metric Counter 44 Register */ -#define XAPM_MC45_OFFSET 0x0990 /**< Metric Counter 45 Register */ -#define XAPM_MC46_OFFSET 0x09A0 /**< Metric Counter 46 Register */ -#define XAPM_MC47_OFFSET 0x09B0 /**< Metric Counter 47 Register */ - -#define XAPM_SMC10_OFFSET 0x02A0 /**< Sampled Metric Counter 10 Register */ -#define XAPM_SMC11_OFFSET 0x02B0 /**< Sampled Metric Counter 11 Register */ -#define XAPM_SMC12_OFFSET 0x0600 /**< Sampled Metric Counter 12 Register */ -#define XAPM_SMC13_OFFSET 0x0610 /**< Sampled Metric Counter 13 Register */ -#define XAPM_SMC14_OFFSET 0x0620 /**< Sampled Metric Counter 14 Register */ -#define XAPM_SMC15_OFFSET 0x0630 /**< Sampled Metric Counter 15 Register */ -#define XAPM_SMC16_OFFSET 0x0640 /**< Sampled Metric Counter 16 Register */ -#define XAPM_SMC17_OFFSET 0x0650 /**< Sampled Metric Counter 17 Register */ -#define XAPM_SMC18_OFFSET 0x0660 /**< Sampled Metric Counter 18 Register */ -#define XAPM_SMC19_OFFSET 0x0670 /**< Sampled Metric Counter 19 Register */ -#define XAPM_SMC20_OFFSET 0x0680 /**< Sampled Metric Counter 20 Register */ -#define XAPM_SMC21_OFFSET 0x0690 /**< Sampled Metric Counter 21 Register */ -#define XAPM_SMC22_OFFSET 0x06A0 /**< Sampled Metric Counter 22 Register */ -#define XAPM_SMC23_OFFSET 0x06B0 /**< Sampled Metric Counter 23 Register */ -#define XAPM_SMC24_OFFSET 0x0800 /**< Sampled Metric Counter 24 Register */ -#define XAPM_SMC25_OFFSET 0x0810 /**< Sampled Metric Counter 25 Register */ -#define XAPM_SMC26_OFFSET 0x0820 /**< Sampled Metric Counter 26 Register */ -#define XAPM_SMC27_OFFSET 0x0830 /**< Sampled Metric Counter 27 Register */ -#define XAPM_SMC28_OFFSET 0x0840 /**< Sampled Metric Counter 28 Register */ -#define XAPM_SMC29_OFFSET 0x0850 /**< Sampled Metric Counter 29 Register */ -#define XAPM_SMC30_OFFSET 0x0860 /**< Sampled Metric Counter 30 Register */ -#define XAPM_SMC31_OFFSET 0x0870 /**< Sampled Metric Counter 31 Register */ -#define XAPM_SMC32_OFFSET 0x0880 /**< Sampled Metric Counter 32 Register */ -#define XAPM_SMC33_OFFSET 0x0890 /**< Sampled Metric Counter 33 Register */ -#define XAPM_SMC34_OFFSET 0x08A0 /**< Sampled Metric Counter 34 Register */ -#define XAPM_SMC35_OFFSET 0x08B0 /**< Sampled Metric Counter 35 Register */ -#define XAPM_SMC36_OFFSET 0x0A00 /**< Sampled Metric Counter 36 Register */ -#define XAPM_SMC37_OFFSET 0x0A10 /**< Sampled Metric Counter 37 Register */ -#define XAPM_SMC38_OFFSET 0x0A20 /**< Sampled Metric Counter 38 Register */ -#define XAPM_SMC39_OFFSET 0x0A30 /**< Sampled Metric Counter 39 Register */ -#define XAPM_SMC40_OFFSET 0x0A40 /**< Sampled Metric Counter 40 Register */ -#define XAPM_SMC41_OFFSET 0x0A50 /**< Sampled Metric Counter 41 Register */ -#define XAPM_SMC42_OFFSET 0x0A60 /**< Sampled Metric Counter 42 Register */ -#define XAPM_SMC43_OFFSET 0x0A70 /**< Sampled Metric Counter 43 Register */ -#define XAPM_SMC44_OFFSET 0x0A80 /**< Sampled Metric Counter 44 Register */ -#define XAPM_SMC45_OFFSET 0x0A90 /**< Sampled Metric Counter 45 Register */ -#define XAPM_SMC46_OFFSET 0x0AA0 /**< Sampled Metric Counter 46 Register */ -#define XAPM_SMC47_OFFSET 0x0AB0 /**< Sampled Metric Counter 47 Register */ -/* Sampled metric counters 48-63: In Profile mode, this are min/max latency registers */ -#define XAPM_SMC48_OFFSET 0x0254 /**< Sampled Metric Counter 48 Register */ -#define XAPM_SMC49_OFFSET 0x0258 /**< Sampled Metric Counter 49 Register */ -#define XAPM_SMC50_OFFSET 0x02B4 /**< Sampled Metric Counter 50 Register */ -#define XAPM_SMC51_OFFSET 0x02B8 /**< Sampled Metric Counter 51 Register */ -#define XAPM_SMC52_OFFSET 0x0654 /**< Sampled Metric Counter 52 Register */ -#define XAPM_SMC53_OFFSET 0x0658 /**< Sampled Metric Counter 53 Register */ -#define XAPM_SMC54_OFFSET 0x06B4 /**< Sampled Metric Counter 54 Register */ -#define XAPM_SMC55_OFFSET 0x06B8 /**< Sampled Metric Counter 55 Register */ -#define XAPM_SMC56_OFFSET 0x0854 /**< Sampled Metric Counter 56 Register */ -#define XAPM_SMC57_OFFSET 0x0858 /**< Sampled Metric Counter 57 Register */ -#define XAPM_SMC58_OFFSET 0x08B4 /**< Sampled Metric Counter 58 Register */ -#define XAPM_SMC59_OFFSET 0x08B8 /**< Sampled Metric Counter 59 Register */ -#define XAPM_SMC60_OFFSET 0x0A54 /**< Sampled Metric Counter 60 Register */ -#define XAPM_SMC61_OFFSET 0x0A58 /**< Sampled Metric Counter 61 Register */ -#define XAPM_SMC62_OFFSET 0x0AB4 /**< Sampled Metric Counter 62 Register */ -#define XAPM_SMC63_OFFSET 0x0AB8 /**< Sampled Metric Counter 63 Register */ - -#define XAPM_CTL_OFFSET 0x0300 /**< Control Register */ -#define XAPM_ID_OFFSET 0x0304 /**< Latency ID Register */ -#define XAPM_IDMASK_OFFSET 0x0308 /**< ID Mask Register */ -#define XAPM_FEC_OFFSET 0x0400 /**< Flag Enable Control Register */ -#define XAPM_SWD_OFFSET 0x0404 /**< Software-written Data Register */ -#define XAPM_ENT_OFFSET 0x0408 /**< Enable Trace Register */ - -/* AXI Monitor Sample Interval Control Register mask(s) */ - -#define XAPM_SICR_MCNTR_RST_MASK 0x00000100 /**< Enable the Metric Counter Reset */ -#define XAPM_SICR_LOAD_MASK 0x00000002 /**< Load the Sample Interval Register Value into the counter */ -#define XAPM_SICR_ENABLE_MASK 0x00000001 /**< Enable the downcounter */ - -/* Interrupt Status/Enable Register Bit Definitions and Masks */ - -#define XAPM_IXR_MC9_OVERFLOW_MASK 0x00001000 /**< Metric Counter 9 Overflow> */ -#define XAPM_IXR_MC8_OVERFLOW_MASK 0x00000800 /**< Metric Counter 8 Overflow> */ -#define XAPM_IXR_MC7_OVERFLOW_MASK 0x00000400 /**< Metric Counter 7 Overflow> */ -#define XAPM_IXR_MC6_OVERFLOW_MASK 0x00000200 /**< Metric Counter 6 Overflow> */ -#define XAPM_IXR_MC5_OVERFLOW_MASK 0x00000100 /**< Metric Counter 5 Overflow> */ -#define XAPM_IXR_MC4_OVERFLOW_MASK 0x00000080 /**< Metric Counter 4 Overflow> */ -#define XAPM_IXR_MC3_OVERFLOW_MASK 0x00000040 /**< Metric Counter 3 Overflow> */ -#define XAPM_IXR_MC2_OVERFLOW_MASK 0x00000020 /**< Metric Counter 2 Overflow> */ -#define XAPM_IXR_MC1_OVERFLOW_MASK 0x00000010 /**< Metric Counter 1 Overflow> */ -#define XAPM_IXR_MC0_OVERFLOW_MASK 0x00000008 /**< Metric Counter 0 Overflow> */ -#define XAPM_IXR_FIFO_FULL_MASK 0x00000004 /**< Event Log FIFO full> */ -#define XAPM_IXR_SIC_OVERFLOW_MASK 0x00000002 /**< Sample Interval Counter Overflow> */ -#define XAPM_IXR_GCC_OVERFLOW_MASK 0x00000001 /**< Global Clock Counter Overflow> */ -#define XAPM_IXR_ALL_MASK (XAPM_IXR_SIC_OVERFLOW_MASK | \ - XAPM_IXR_GCC_OVERFLOW_MASK | \ - XAPM_IXR_FIFO_FULL_MASK | \ - XAPM_IXR_MC0_OVERFLOW_MASK | \ - XAPM_IXR_MC1_OVERFLOW_MASK | \ - XAPM_IXR_MC2_OVERFLOW_MASK | \ - XAPM_IXR_MC3_OVERFLOW_MASK | \ - XAPM_IXR_MC4_OVERFLOW_MASK | \ - XAPM_IXR_MC5_OVERFLOW_MASK | \ - XAPM_IXR_MC6_OVERFLOW_MASK | \ - XAPM_IXR_MC7_OVERFLOW_MASK | \ - XAPM_IXR_MC8_OVERFLOW_MASK | \ - XAPM_IXR_MC9_OVERFLOW_MASK) - -/* AXI Monitor Control Register mask(s) */ - -#define XAPM_CR_FIFO_RESET_MASK 0x02000000 /**< FIFO Reset */ -#define XAPM_CR_GCC_RESET_MASK 0x00020000 /**< Global Clk Counter Reset */ -#define XAPM_CR_GCC_ENABLE_MASK 0x00010000 /**< Global Clk Counter Enable */ -#define XAPM_CR_EVTLOG_EXTTRIGGER_MASK 0x00000200 /**< Enable External trigger to start event Log */ -#define XAPM_CR_EVENTLOG_ENABLE_MASK 0x00000100 /**< Event Log Enable */ -#define XAPM_CR_RDLATENCY_END_MASK 0x00000080 /**< Write Latency End point */ -#define XAPM_CR_RDLATENCY_START_MASK 0x00000040 /**< Read Latency Start point */ -#define XAPM_CR_WRLATENCY_END_MASK 0x00000020 /**< Write Latency End point */ -#define XAPM_CR_WRLATENCY_START_MASK 0x00000010 /**< Write Latency Start point */ -#define XAPM_CR_IDFILTER_ENABLE_MASK 0x00000008 /**< ID Filter Enable */ -#define XAPM_CR_MCNTR_EXTTRIGGER_MASK 0x00000004 /**< Enable External trigger to start Metric Counters */ -#define XAPM_CR_MCNTR_RESET_MASK 0x00000002 /**< Metrics Counter Reset */ -#define XAPM_CR_MCNTR_ENABLE_MASK 0x00000001 /**< Metrics Counter Enable */ - -/* AXI Monitor ID Register mask(s) */ - -#define XAPM_ID_RID_MASK 0xFFFF0000 /**< Read ID */ -#define XAPM_ID_WID_MASK 0x0000FFFF /**< Write ID */ - -/* AXI Monitor ID Mask Register mask(s) */ - -#define XAPM_MASKID_RID_MASK 0xFFFF0000 /**< Read ID Mask */ -#define XAPM_MASKID_WID_MASK 0x0000FFFF /**< Write ID Mask*/ - -/* AXI Monitor Min/Max Register masks and shifts */ - -#define XAPM_MAX_LATENCY_MASK 0xFFFF0000 /**< Max Latency Mask */ -#define XAPM_MIN_LATENCY_MASK 0x0000FFFF /**< Min Latency Mask */ -#define XAPM_MAX_LATENCY_SHIFT 16 /**< Max Latency Shift */ -#define XAPM_MIN_LATENCY_SHIFT 0 /**< Min Latency Shift */ - -/* LAPC Base address */ -#define LAPC0_BASE 0x00120000 //ocl master00 -#define LAPC1_BASE 0x00121000 //ocl master01 -#define LAPC2_BASE 0x00122000 //ocl master02 -#define LAPC3_BASE 0x00123000 //ocl master03 - -//Following status registers are available at each base -#define LAPC_OVERALL_STATUS_OFFSET 0x0 -#define LAPC_CUMULATIVE_STATUS_0_OFFSET 0x100 -#define LAPC_CUMULATIVE_STATUS_1_OFFSET 0x104 -#define LAPC_CUMULATIVE_STATUS_2_OFFSET 0x108 -#define LAPC_CUMULATIVE_STATUS_3_OFFSET 0x10c - -#define LAPC_SNAPSHOT_STATUS_0_OFFSET 0x200 -#define LAPC_SNAPSHOT_STATUS_1_OFFSET 0x204 -#define LAPC_SNAPSHOT_STATUS_2_OFFSET 0x208 -#define LAPC_SNAPSHOT_STATUS_3_OFFSET 0x20c -#endif - diff --git a/SDAccel/userspace/src2/shim.cpp b/SDAccel/userspace/src2/shim.cpp deleted file mode 100755 index 095ce5d0..00000000 --- a/SDAccel/userspace/src2/shim.cpp +++ /dev/null @@ -1,1514 +0,0 @@ -/* - * Copyright (C) 2017-2018 Xilinx, Inc - * Author: Sonal Santan - * AWS HAL Driver for SDAccel/OpenCL runtime evnrionemnt, for AWS EC2 F1 - * - * Code copied from SDAccel XDMA based HAL driver - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -#include "shim.h" -#include -/* - * Define GCC version macro so we can use newer C++11 features - * if possible - */ -#define GCC_VERSION (__GNUC__ * 10000 \ - + __GNUC_MINOR__ * 100 \ - + __GNUC_PATCHLEVEL__) - -#ifdef INTERNAL_TESTING -#define ACCELERATOR_BAR 0 -#define MMAP_SIZE_USER 0x400000 -#endif - -/* Aligning access to FPGA DRAM space to 4096 Byte */ -#define DDR_BUFFER_ALIGNMENT 0x1000 - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xclbin2.h" -#include "xocl/xocl_ioctl.h" -#include "scan.h" -#include "awssak.h" - -#ifdef INTERNAL_TESTING -#include "driver/aws/kernel/include/mgmt-ioctl.h" -#else -#define AWSMGMT_NUM_SUPPORTED_CLOCKS 4 -#define AWSMGMT_NUM_ACTUAL_CLOCKS 3 -// TODO - define this in a header file -extern char* get_afi_from_xclBin(const xclBin *); -extern char* get_afi_from_axlf(const axlf *); -// define DEFAULT_GLOBAL_AFI "agfi-069ddd533a748059b" // 1.4 shell -#define DEFAULT_GLOBAL_AFI "agfi-0cc0ac6a40aa73ce8" // 1.4 shell 4-ddr data retention enabled -#endif - -namespace awsbwhal { - // This list will get populated in xclProbe - // 0 -> /dev/dri/renderD129 - // 1 -> /dev/dri/renderD130 - static std::mutex deviceListMutex; - // static std::vector> deviceList; - - const unsigned AwsXcl::TAG = 0X586C0C6C; // XL OpenCL X->58(ASCII), L->6C(ASCII), O->0 C->C L->6C(ASCII); - -#ifdef INTERNAL_TESTING - int AwsXcl::xclLoadAxlf(const axlf *buffer) - { - if ( mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << buffer << std::endl; - } - - if ( !mLocked) - return -EPERM; - - std::cout << "Downloading xclbin ...\n" << std::endl; - const unsigned cmd = AWSMGMT_IOCICAPDOWNLOAD_AXLF; - awsmgmt_ioc_bitstream_axlf obj = { const_cast(buffer) }; - int ret = ioctl(mMgtHandle, cmd, &obj); - if ( 0 != ret) - return ret; - - // If it is an XPR DSA, zero out the DDR again as downloading the XCLBIN - // reinitializes the DDR and results in ECC error. - if ( isXPR()) { - if ( mLogStream.is_open()) { - mLogStream << __func__ << "XPR Device found, zeroing out DDR again.." << std::endl; - } - - if ( zeroOutDDR() == false) { - if ( mLogStream.is_open()) { - mLogStream << __func__ << "zeroing out DDR failed" << std::endl; - } - return -EIO; - } - } - - drm_xocl_axlf axlf_obj = {const_cast(buffer)}; - ret = ioctl(mUserHandle, DRM_IOCTL_XOCL_READ_AXLF, &axlf_obj); - return ret; - } -#endif - - int AwsXcl::xclGetXclBinIdFromSysfs(uint64_t &xclbin_id_from_sysfs) - { - const std::string devPath = "/sys/bus/pci/devices/" + xcldev::pci_device_scanner::device_list[ mBoardNumber ].user_name; - std::string binid_path = devPath + "/xclbinid"; - struct stat sb; - if( stat( binid_path.c_str(), &sb ) < 0 ) { - std::cout << "ERROR: failed to stat " << binid_path << std::endl; - return errno; - } - std::ifstream ifs( binid_path.c_str(), std::ifstream::binary ); - if( !ifs.good() ) { - return errno; - } - char* fileReadBuf = new char[sb.st_size]; - memset(fileReadBuf, 0, sb.st_size); - ifs.read( fileReadBuf, sb.st_size ); - if( ifs.gcount() > 0 ) { - std::string tmp_hex_string = fileReadBuf; - xclbin_id_from_sysfs = std::stoi(std::string(fileReadBuf),nullptr,16); - } else { // xclbinid exists, but no data read or reported - std::cout << "WARNING: 'xclbinid' invalid, unable to report xclbinid. Has the bitstream been loaded? See 'xbsak program'.\n"; - } - delete [] fileReadBuf; - ifs.close(); - return 0; - } - - int AwsXcl::xclLoadXclBin(const xclBin *buffer) - { - char *xclbininmemory = reinterpret_cast (const_cast (buffer)); -#ifdef INTERNAL_TESTING - if (!memcmp(xclbininmemory, "xclbin2", 8)) { - return xclLoadAxlf(reinterpret_cast(xclbininmemory)); - } - - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << buffer << std::endl; - } - - if (!mLocked) - return -EPERM; - - const unsigned cmd = AWSMGMT_IOCICAPDOWNLOAD; - awsmgmt_ioc_bitstream obj = {const_cast(buffer)}; - return ioctl(mMgtHandle, cmd, &obj); -#else - if (!memcmp(xclbininmemory, "xclbin2", 8)) { - int retVal = 0; - axlf *axlfbuffer = reinterpret_cast(const_cast (buffer)); - fpga_mgmt_image_info orig_info; - char* afi_id = get_afi_from_axlf(axlfbuffer); - std::memset(&orig_info, 0, sizeof(struct fpga_mgmt_image_info)); - fpga_mgmt_describe_local_image(mBoardNumber, &orig_info, 0); - - uint64_t xclbin_id_from_sysfs; - if( int retVal = xclGetXclBinIdFromSysfs( xclbin_id_from_sysfs ) != 0 ) - return retVal; - - if ( (xclbin_id_from_sysfs == 0) || (axlfbuffer->m_uniqueId != xclbin_id_from_sysfs) || checkAndSkipReload(afi_id, &orig_info) ) { - // force data retention option - union fpga_mgmt_load_local_image_options opt; - fpga_mgmt_init_load_local_image_options(&opt); - opt.flags = FPGA_CMD_DRAM_DATA_RETENTION; - opt.afi_id = afi_id; - opt.slot_id = mBoardNumber; - retVal = fpga_mgmt_load_local_image_with_options(&opt); - if (retVal == FPGA_ERR_DRAM_DATA_RETENTION_NOT_POSSIBLE || - retVal == FPGA_ERR_DRAM_DATA_RETENTION_FAILED || - retVal == FPGA_ERR_DRAM_DATA_RETENTION_SETUP_FAILED) { - std::cout << "INFO: Could not load AFI for data retention, code: " << retVal - << " - Loading in classic mode." << std::endl; - retVal = fpga_mgmt_load_local_image(mBoardNumber, afi_id); - } - // check retVal from image load - if (retVal) { - std::cout << "Failed to load AFI, error: " << retVal << std::endl; - return -retVal; - } - retVal = sleepUntilLoaded( std::string(afi_id) ); - if (!retVal) { - drm_xocl_axlf axlf_obj = { reinterpret_cast(const_cast(buffer)) }; - retVal = ioctl(mUserHandle, DRM_IOCTL_XOCL_READ_AXLF, &axlf_obj); - if (retVal) { - std::cout << "IOCTL DRM_IOCTL_XOCL_READ_AXLF Failed: " << retVal << std::endl; - } else { - std::cout << "AFI load complete." << std::endl; - } - } - } - return retVal; - } else { - char* afi_id = get_afi_from_xclBin(buffer); - return fpga_mgmt_load_local_image(mBoardNumber, afi_id); - } -#endif - } - - /* Accessing F1 FPGA memory space (i.e. OpenCL Global Memory) is mapped through AppPF BAR4 - * all offsets are relative to the base address available in AppPF BAR4 - * SDAcell XCL_ADDR_SPACE_DEVICE_RAM enum maps to AwsXcl::ocl_global_mem_bar, which is the - * handle for AppPF BAR4 - */ - size_t AwsXcl::xclReadModifyWrite(uint64_t offset, const void *hostBuf, size_t size) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " - << offset << ", " << hostBuf << ", " << size << std::endl; - } -#if GCC_VERSION >= 40800 - alignas(DDR_BUFFER_ALIGNMENT) char buffer[DDR_BUFFER_ALIGNMENT]; -#else - AlignedAllocator alignedBuffer(DDR_BUFFER_ALIGNMENT, DDR_BUFFER_ALIGNMENT); - char* buffer = alignedBuffer.getBuffer(); -#endif - - const size_t mod_size = offset % DDR_BUFFER_ALIGNMENT; - // Read back one full aligned block starting from preceding aligned address - const uint64_t mod_offset = offset - mod_size; - if (xclRead(XCL_ADDR_SPACE_DEVICE_RAM, mod_offset, buffer, DDR_BUFFER_ALIGNMENT) != DDR_BUFFER_ALIGNMENT) - return -1; - - // Update the local copy of buffer with user requested data - const size_t copy_size = (size + mod_size > DDR_BUFFER_ALIGNMENT) ? DDR_BUFFER_ALIGNMENT - mod_size : size; - std::memcpy(buffer + mod_size, hostBuf, copy_size); - - // Write back the updated aligned block - if (xclWrite(XCL_ADDR_SPACE_DEVICE_RAM, mod_offset, buffer, DDR_BUFFER_ALIGNMENT) != DDR_BUFFER_ALIGNMENT) - return -1; - - // Write any remaining blocks over DDR_BUFFER_ALIGNMENT size - if (size + mod_size > DDR_BUFFER_ALIGNMENT) { - size_t write_size = xclWrite(XCL_ADDR_SPACE_DEVICE_RAM, mod_offset + DDR_BUFFER_ALIGNMENT, - (const char *)hostBuf + copy_size, size - copy_size); - if (write_size != (size - copy_size)) - return -1; - } - return size; - } - - /* Accessing F1 FPGA memory space mapped through AppPF PCIe BARs - * space = XCL_ADDR_SPACE_DEVICE_RAM maps to AppPF PCIe BAR4, (sh_cl_dma_pcis_ bus), with AwsXcl::ocl_global_mem_bar as handle - * space = XCL_ADDR_KERNEL_CTRL maps to AppPF PCIe BAR0 (sh_cl_ocl bus), with AwsXcl::ocl_kernel_bar as handle - * all offsets are relative to the base address available in AppPF - */ - size_t AwsXcl::xclWrite(xclAddressSpace space, uint64_t offset, const void *hostBuf, size_t size) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << space << ", " - << offset << ", " << hostBuf << ", " << size << std::endl; - } - - if (!mLocked) - return -1; - - switch (space) { - - /* Current release now includes performance monitors */ - case XCL_ADDR_SPACE_DEVICE_PERFMON: - { -#ifdef INTERNAL_TESTING - if (pcieBarWrite(ACCELERATOR_BAR, offset, hostBuf, size) == 0) { - return size; - } -#else - if (pcieBarWrite(APP_PF_BAR0, offset, hostBuf, size) == 0) { - return size; - } -#endif - return -1; - } - case XCL_ADDR_KERNEL_CTRL: - { - if (mLogStream.is_open()) { - const unsigned *reg = static_cast(hostBuf); - size_t regSize = size / 4; - if (regSize > 32) - regSize = 32; - for (unsigned i = 0; i < regSize; i++) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << space << ", 0x" - << std::hex << offset + i << std::dec << ", 0x" << std::hex << reg[i] << std::dec << std::endl; - - } - } -#ifdef INTERNAL_TESTING - if (pcieBarWrite(ACCELERATOR_BAR, offset, hostBuf, size) == 0) { -#else - if (pcieBarWrite(APP_PF_BAR0, offset, hostBuf, size) == 0) { - -#endif - return size; - } - return -1; - } - default: - { - return -1; - } - } - } - - - size_t AwsXcl::xclRead(xclAddressSpace space, uint64_t offset, void *hostBuf, size_t size) { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << space << ", " - << offset << ", " << hostBuf << ", " << size << std::endl; - } - - switch (space) { - case XCL_ADDR_SPACE_DEVICE_PERFMON: - { -#ifdef INTERNAL_TESTING - if (pcieBarRead(ACCELERATOR_BAR, offset, hostBuf, size) == 0) { - return size; - } -#else - if (pcieBarRead(APP_PF_BAR0, offset, hostBuf, size) == 0) { - return size; - } -#endif - return -1; - } - case XCL_ADDR_KERNEL_CTRL: - { -#ifdef INTERNAL_TESTING - int result = pcieBarRead(ACCELERATOR_BAR, offset, hostBuf, size); -#else - int result = pcieBarRead(APP_PF_BAR0, offset, hostBuf, size); -#endif - if (mLogStream.is_open()) { - const unsigned *reg = static_cast(hostBuf); - size_t regSize = size / 4; - if (regSize > 4) - regSize = 4; - for (unsigned i = 0; i < regSize; i++) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << space << ", 0x" - << std::hex << offset + i << std::dec << ", 0x" << std::hex << reg[i] << std::dec << std::endl; - } - } - return !result ? size : 0; - } - default: - { - return -1; - } - } - } - - uint64_t AwsXcl::xclAllocDeviceBuffer(size_t size) - { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << size << std::endl; - } - - uint64_t result = mNullAddr; - unsigned boHandle = xclAllocBO(size, XCL_BO_DEVICE_RAM, 0x0); - if (boHandle == mNullBO) - return result; - - drm_xocl_info_bo boInfo = {boHandle, 0, 0, 0}; - if (ioctl(mUserHandle, DRM_IOCTL_XOCL_INFO_BO, &boInfo)) - return result; - - void *hbuf = xclMapBO(boHandle, true); - if (hbuf == MAP_FAILED) { - xclFreeBO(boHandle); - return mNullAddr; - } - mLegacyAddressTable.insert(boInfo.paddr, size, std::make_pair(boHandle, (char *)hbuf)); - return boInfo.paddr; - } - - uint64_t AwsXcl::xclAllocDeviceBuffer2(size_t size, xclMemoryDomains domain, unsigned flags) - { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << size << ", " - << domain << ", " << flags << std::endl; - } - - uint64_t result = mNullAddr; - if (domain != XCL_MEM_DEVICE_RAM) - return result; - - unsigned ddr = 1; - ddr <<= flags; - unsigned boHandle = xclAllocBO(size, XCL_BO_DEVICE_RAM, ddr); - if (boHandle == mNullBO) - return result; - - drm_xocl_info_bo boInfo = {boHandle, 0, 0, 0}; - if (ioctl(mUserHandle, DRM_IOCTL_XOCL_INFO_BO, &boInfo)) - return result; - - void *hbuf = xclMapBO(boHandle, true); - if (hbuf == MAP_FAILED) { - xclFreeBO(boHandle); - return mNullAddr; - } - mLegacyAddressTable.insert(boInfo.paddr, size, std::make_pair(boHandle, (char *)hbuf)); - return boInfo.paddr; - } - - void AwsXcl::xclFreeDeviceBuffer(uint64_t buf) - { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << buf << std::endl; - } - - std::pair bo = mLegacyAddressTable.erase(buf); - drm_xocl_info_bo boInfo = {bo.first, 0, 0, 0}; - if (!ioctl(mUserHandle, DRM_IOCTL_XOCL_INFO_BO, &boInfo)) { - munmap(bo.second, boInfo.size); - } - xclFreeBO(bo.first); - } - - - size_t AwsXcl::xclCopyBufferHost2Device(uint64_t dest, const void *src, size_t size, size_t seek) - { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << dest << ", " - << src << ", " << size << ", " << seek << std::endl; - } - - std::pair bo = mLegacyAddressTable.find(dest); - std::memcpy(bo.second + seek, src, size); - int result = xclSyncBO(bo.first, XCL_BO_SYNC_BO_TO_DEVICE, size, seek); - if (result) - return result; - return size; - } - - - size_t AwsXcl::xclCopyBufferDevice2Host(void *dest, uint64_t src, size_t size, size_t skip) - { - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << dest << ", " - << src << ", " << size << ", " << skip << std::endl; - } - - std::pair bo = mLegacyAddressTable.find(src); - int result = xclSyncBO(bo.first, XCL_BO_SYNC_BO_FROM_DEVICE, size, skip); - if (result) - return result; - std::memcpy(dest, bo.second + skip, size); - return size; - } - - - AwsXcl *AwsXcl::handleCheck(void *handle) { - // Sanity checks - if (!handle) - return 0; - if (*(unsigned *)handle != TAG) - return 0; - if (!((AwsXcl *)handle)->isGood()) { - return 0; - } - - return (AwsXcl *)handle; - } - - unsigned AwsXcl::xclProbe() { - std::lock_guard lock(awsbwhal::deviceListMutex); - if(xcldev::pci_device_scanner::device_list.size() == 0) { - xcldev::pci_device_scanner devices; - devices.scan(true); - } - - unsigned i = 0; -#ifdef INTERNAL_TESTING - char file_name_buf[128]; - for (i = 0; i < 16; i++) { - std::sprintf((char *)&file_name_buf, "/dev/awsmgmt%d", i); - int fd = open(file_name_buf, O_RDWR); - if (fd < 0) { - return i; - } - close(fd); - } - if (i != xcldev::pci_device_scanner::device_list.size()) { - std::cout << "ERROR xclProbe: Num of FPGA userPF device do not match num of mgmtPF devices" << std::endl; - std::cout << "ERROR xclProbe: Num of userPF, mgmtPF devices = " << std::dec << xcldev::pci_device_scanner::device_list.size() << std::dec << i << std::endl; - return 0; - } -#endif - i = xcldev::pci_device_scanner::device_list.size(); - -#ifndef INTERNAL_TESTING - std::cout << "xclProbe found " << std::dec << i << " FPGA slots with xocl driver running" << std::endl; -#else - std::cout << "xclProbe found " << std::dec << i << " FPGA slots with awsmgmt & xocl driver running" << std::endl; -#endif - return i; - } - - AwsXcl::~AwsXcl() - { -#ifdef INTERNAL_TESTING - if (mUserMap != MAP_FAILED) { - munmap(mUserMap, MMAP_SIZE_USER); - } - if (mUserHandle > 0) { - close(mUserHandle); - } - if (mMgtHandle > 0) - close(mMgtHandle); -#else -//# error "INTERNAL_TESTING macro disabled. AMZN code goes here. " - if (ocl_kernel_bar >=0) - fpga_pci_detach(ocl_kernel_bar); - if (ocl_global_mem_bar>=0) - fpga_pci_detach(ocl_global_mem_bar); - if (sda_mgmt_bar>=0) - fpga_pci_detach(sda_mgmt_bar); - - ocl_kernel_bar = -1; - ocl_global_mem_bar = -1; - sda_mgmt_bar = -1; - -#endif - - if (mLogStream.is_open()) { - mLogStream << __func__ << ", " << std::this_thread::get_id() << std::endl; - mLogStream.close(); - } - } - - AwsXcl::AwsXcl(unsigned index, const char *logfileName, - xclVerbosityLevel verbosity) : mTag(TAG), mBoardNumber(index), - maxDMASize(0xfa0000), - mLocked(false), - mOffsets{0x0, 0x0, 0x0, 0x0}, - mOclRegionProfilingNumberSlots(XPAR_AXI_PERF_MON_2_NUMBER_SLOTS) - { -#ifndef INTERNAL_TESTING - loadDefaultAfiIfCleared(); -#endif - const std::string devName = "/dev/dri/renderD" + std::to_string(xcldev::pci_device_scanner::device_list[mBoardNumber].user_instance); -#ifndef INTERNAL_TESTING - mUserHandle = open(devName.c_str(), O_RDWR); - if(mUserHandle <= 0) { - std::cout << "WARNING: AwsXcl - Cannot open userPF: " << devName << std::endl; - } -#endif - -#ifdef INTERNAL_TESTING - if(mUserHandle > 0) { - mUserMap = (char *)mmap(0, MMAP_SIZE_USER, PROT_READ | PROT_WRITE, MAP_SHARED, mUserHandle, 0); - if (mUserMap == MAP_FAILED) { - std::cout << "Map failed: " << devName << std::endl; - close(mUserHandle); - mUserHandle = -1; - } - } - - char file_name_buf[128]; - std::fill(&file_name_buf[0], &file_name_buf[0] + 128, 0); - std::sprintf((char *)&file_name_buf, "/dev/awsmgmt%d", mBoardNumber); - mMgtHandle = open(file_name_buf, O_RDWR | O_SYNC); - if(mMgtHandle > 0) { - if (xclGetDeviceInfo2(&mDeviceInfo)) { - close(mMgtHandle); - mMgtHandle = -1; - } - } else { - std::cout << "Cannot open mgmtPF: " << devName << std::endl; - } -#else - int slot_id = mBoardNumber; - ocl_kernel_bar = -1; - ocl_global_mem_bar = -1; - sda_mgmt_bar = -1; - - if (xclGetDeviceInfo2(&mDeviceInfo)) { - std::cout << "ERROR AwsXcl: DeviceInfo failed for slot# " << std::dec << slot_id << std::endl; - } else if (fpga_pci_attach(slot_id, FPGA_APP_PF, APP_PF_BAR0, 0, &ocl_kernel_bar) ) { - ocl_kernel_bar = -1; - std::cout << "ERROR AwsXcl: PCI kernel bar attach failed for slot# " << std::dec << slot_id << std::endl; - } else if (fpga_pci_attach(slot_id, FPGA_APP_PF, APP_PF_BAR4, 0, &ocl_global_mem_bar) ) { - fpga_pci_detach(ocl_kernel_bar); - ocl_kernel_bar = -1; - ocl_global_mem_bar = -1; - sda_mgmt_bar = -1; - std::cout << "ERROR AwsXcl: PCI global bar attach failed for slot# " << std::dec << slot_id << std::endl; - } else if (fpga_pci_attach(slot_id, FPGA_MGMT_PF, MGMT_PF_BAR4, 0, &sda_mgmt_bar) ) { - fpga_pci_detach(ocl_kernel_bar); - fpga_pci_detach(ocl_global_mem_bar); - ocl_kernel_bar = -1; - ocl_global_mem_bar = -1; - sda_mgmt_bar = -1; - std::cout << "ERROR AwsXcl: PCI mgmt bar attach failed for slot# " << std::dec << slot_id << std::endl; - } -#endif - - // - // Profiling - defaults - // Class-level defaults: mIsDebugIpLayoutRead = mIsDeviceProfiling = false - mDevUserName = xcldev::pci_device_scanner::device_list[mBoardNumber].user_name; - mMemoryProfilingNumberSlots = 0; - mPerfMonFifoCtrlBaseAddress = 0x00; - mPerfMonFifoReadBaseAddress = 0x00; - // - // Profiling - defaults - // Class-level defaults: mIsDebugIpLayoutRead = mIsDeviceProfiling = false - mDevUserName = xcldev::pci_device_scanner::device_list[mBoardNumber].user_name; - mMemoryProfilingNumberSlots = 0; - mPerfMonFifoCtrlBaseAddress = 0x00; - mPerfMonFifoReadBaseAddress = 0x00; - - // - // Profiling - defaults - // Class-level defaults: mIsDebugIpLayoutRead = mIsDeviceProfiling = false - mDevUserName = xcldev::pci_device_scanner::device_list[mBoardNumber].user_name; - mMemoryProfilingNumberSlots = 0; - mPerfMonFifoCtrlBaseAddress = 0x00; - mPerfMonFifoReadBaseAddress = 0x00; - } - - bool AwsXcl::isGood() const { -#ifdef INTERNAL_TESTING - if (mUserHandle < 0) { - std::cout << "AwsXcl: Bad handle. No userPF Handle" << std::endl; - return false; - } - if (mMgtHandle < 0) { - std::cout << "AwsXcl: Bad handle. No mgmtPF Handle" << std::endl; - return false; - } -#else - if (ocl_kernel_bar < 0) { - std::cout << "WARNING: AwsXcl isGood: kernel, global & mgmt bar are: " << std::hex << ocl_kernel_bar << ", " << std::hex << ocl_global_mem_bar << ", " << sda_mgmt_bar << std::endl; - return false; - } - if (ocl_global_mem_bar < 0) { - std::cout << "WARNING: AwsXcl isGood: kernel, global & mgmt bar are: " << std::hex << ocl_kernel_bar << ", " << std::hex << ocl_global_mem_bar << ", " << sda_mgmt_bar << std::endl; - return false; - } - if (sda_mgmt_bar < 0) { - std::cout << "WARNING: AwsXcl isGood: kernel, global & mgmt bar are: " << std::hex << ocl_kernel_bar << ", " << std::hex << ocl_global_mem_bar << ", " << sda_mgmt_bar << std::endl; - return false; - } - if (mUserHandle <= 0) { - std::cout << "WARNING: AwsXcl isGood: invalid user handle." << std::endl; - return false; - } -#endif - return true; - } - - - int AwsXcl::pcieBarRead(int bar_num, unsigned long long offset, void* buffer, unsigned long long length) { - char *qBuf = (char *)buffer; - switch (bar_num) { -#ifdef INTERNAL_TESTING - const char *mem = 0; - case 0: - { - if ((length + offset) > MMAP_SIZE_USER) { - return -1; - } - mem = mUserMap; -#else - case APP_PF_BAR0: - { -#endif - break; - } - default: - { - return -1; - } - } - - while (length >= 4) { -#ifdef INTERNAL_TESTING - *(unsigned *)qBuf = *(unsigned *)(mem + offset); -#else - fpga_pci_peek(ocl_kernel_bar, (uint64_t)offset,(uint32_t*)qBuf); -#endif - offset += 4; - qBuf += 4; - length -= 4; - } - while (length) { -#ifdef INTERNAL_TESTING - *qBuf = *(mem + offset); -#else - - // TODO - add support for sub 4-byte read in AWS platform -#endif - offset++; - qBuf++; - length--; - } - -// std::memcpy(buffer, mem + offset, length); - return 0; - } - - int AwsXcl::pcieBarWrite(int bar_num, unsigned long long offset, const void* buffer, unsigned long long length) { - char *qBuf = (char *)buffer; - switch (bar_num) { -#ifdef INTERNAL_TESTING - char *mem = 0; - case 0: - { - if ((length + offset) > MMAP_SIZE_USER) { - return -1; - } - mem = mUserMap; -#else - case APP_PF_BAR0: - { -#endif - break; - } - default: - { - return -1; - } - } - - while (length >= 4) { -#ifdef INTERNAL_TESTING - *(unsigned *)(mem + offset) = *(unsigned *)qBuf; -#else - fpga_pci_poke(ocl_kernel_bar, uint64_t (offset), *((uint32_t*) qBuf)); -#endif - offset += 4; - qBuf += 4; - length -= 4; - } - while (length) { -#ifdef INTERNEL_TESTING - *(mem + offset) = *qBuf; -#else - std::cout << "xclWrite - unsupported write with length not multiple of 4-bytes" << std::endl; - -#endif - offset++; - qBuf++; - length--; - } - -// std::memcpy(mem + offset, buffer, length); - return 0; - } - - bool AwsXcl::zeroOutDDR() - { - // Zero out the FPGA external DRAM Content so memory controller - // will not complain about ECC error from memory regions not - // initialized before - // In AWS F1 FPGA, the DRAM is clear before loading new AFI - // hence this API is redundant and will return false to - // make sure developers dont assume it works - - // static const unsigned long long BLOCK_SIZE = 0x4000000; -// void *buf = 0; -// if (posix_memalign(&buf, DDR_BUFFER_ALIGNMENT, BLOCK_SIZE)) -// return false; -// memset(buf, 0, BLOCK_SIZE); -// mDataMover->pset64(buf, BLOCK_SIZE, 0, mDeviceInfo.mDDRSize/BLOCK_SIZE); -// free(buf); - return false; - } - - /* Locks a given FPGA Slot - * By levering the available lock infrastrucutre for the DMA - * Driver associated with the given FPGA slot - */ - bool AwsXcl::xclLockDevice() - { -#ifdef INTERNAL_TESTING -#else -// FIXME: do we need to flock the ocl_kernel interface as well ? -// -#endif - mLocked = true; - -// return zeroOutDDR(); - return true; - } - - std::string AwsXcl::getDSAName(unsigned short deviceId, unsigned short subsystemId) - { - std::string dsa("xilinx_aws-vu9p-f1-04261818_dynamic_5_0"); - return dsa; - } - - int AwsXcl::xclGetDeviceInfo2(xclDeviceInfo2 *info) - { - std::memset(info, 0, sizeof(xclDeviceInfo2)); - info->mMagic = 0X586C0C6C; - info->mHALMajorVersion = XCLHAL_MAJOR_VER; - info->mHALMajorVersion = XCLHAL_MINOR_VER; - info->mMinTransferSize = DDR_BUFFER_ALIGNMENT; - info->mDMAThreads = 4;//AWS has four threads. Others have only two threads - -#ifdef INTERNAL_TESTING - /* Sarab disabling xdma ioctl - xdma_ioc_info obj = {{0X586C0C6C, XDMA_IOCINFO}}; - /--* Calling the underlying DMA driver to extract - * DMA specific configuration - * A non-zero value reprent at error - *--/ - int ret = ioctl(mUserHandle, XDMA_IOCINFO, &obj); - // Log the return value for further debug - if (ret) - return ret; - info->mVendorId = obj.vendor; - info->mDeviceId = obj.device; - info->mSubsystemId = obj.subsystem_device; - info->mSubsystemVendorId = obj.subsystem_vendor; - info->mDeviceVersion = obj.subsystem_device & 0x00ff; - */ - awsmgmt_ioc_info mgmt_info_obj; - int ret = ioctl(mMgtHandle, AWSMGMT_IOCINFO, &mgmt_info_obj); - if (ret) - return ret; - - info->mVendorId = mgmt_info_obj.vendor; - info->mDeviceId = mgmt_info_obj.device; - info->mSubsystemId = mgmt_info_obj.subsystem_device; - info->mSubsystemVendorId = mgmt_info_obj.subsystem_vendor; - info->mDeviceVersion = mgmt_info_obj.subsystem_device & 0x00ff; - info->mPCIeLinkWidth = mgmt_info_obj.pcie_link_width; - info->mPCIeLinkSpeed = mgmt_info_obj.pcie_link_speed; - for (int i = 0; i < AWSMGMT_NUM_SUPPORTED_CLOCKS; ++i) { - info->mOCLFrequency[i] = mgmt_info_obj.ocl_frequency[i]; - } - info->mMigCalib = true; - for (int i = 0; i < 4; i++) { - info->mMigCalib = info->mMigCalib && mgmt_info_obj.mig_calibration[i]; - } -#else - struct fpga_slot_spec slot_info; - //fpga_pci_get_slot_spec(mBoardNumber,FPGA_APP_PF, &slot_info); - fpga_pci_get_slot_spec(mBoardNumber, &slot_info); - info->mVendorId = slot_info.map[0].vendor_id; - info->mDeviceId = slot_info.map[0].device_id; - // FIXME - update next 3 variables - info->mSubsystemId = slot_info.map[0].subsystem_device_id; - info->mSubsystemVendorId = slot_info.map[0].subsystem_vendor_id; - info->mDeviceVersion = 0; - info->mPCIeLinkWidth = 16; - info->mPCIeLinkSpeed = 8000; - fpga_mgmt_image_info imageInfo; - fpga_mgmt_describe_local_image( mBoardNumber, &imageInfo, 0 ); - for (int i = 0; i < AWSMGMT_NUM_SUPPORTED_CLOCKS; ++i) { - info->mOCLFrequency[i] = imageInfo.metrics.clocks[i].frequency[0] / 1000000; - } - info->mMigCalib = true; -#endif - - // F1 has 16 GiB per channel - info->mDDRSize = 0x400000000 * 4; - info->mDataAlignment = DDR_BUFFER_ALIGNMENT; - info->mNumClocks = AWSMGMT_NUM_ACTUAL_CLOCKS; - // Number of available channels - // TODO: add support for other FPGA configurations with less - // than 4 DRAM channels - info->mDDRBankCount = 4; - - const std::string deviceName = getDSAName(info->mDeviceId, info->mSubsystemId); - if (mLogStream.is_open()) - mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << deviceName << std::endl; - - std::size_t length = deviceName.copy(info->mName, deviceName.length(),0); - info->mName[length] = '\0'; - - if (mLogStream.is_open()) { - mLogStream << __func__ << ": name=" << deviceName << ", version=0x" << std::hex << info->mDeviceVersion - << ", clock freq=" << std::dec << info->mOCLFrequency[0] - << ", clock freq 2=" << std::dec << info->mOCLFrequency[1] << std::endl; - } - - info->mOnChipTemp = 25; - info->mFanTemp = 0; - info->mVInt = 0.9; - info->mVAux = 0.9; - info->mVBram = 0.9; - return 0; - } - - int AwsXcl::resetDevice(xclResetKind kind) { - - // Call a new IOCTL to just reset the OCL region -// if (kind == XCL_RESET_FULL) { -// xdma_ioc_base obj = {0X586C0C6C, XDMA_IOCHOTRESET}; -// return ioctl(mUserHandle, XDMA_IOCHOTRESET, &obj); -// } -// else if (kind == XCL_RESET_KERNEL) { -// xdma_ioc_base obj = {0X586C0C6C, XDMA_IOCOCLRESET}; -// return ioctl(mUserHandle, XDMA_IOCOCLRESET, &obj); -// } -// return -EINVAL; - - // AWS FIXME - add reset - return 0; - } - - int AwsXcl::xclReClock2(unsigned short region, const unsigned short *targetFreqMHz) - { - #ifdef INTERNAL_TESTING - awsmgmt_ioc_freqscaling obj = {0, targetFreqMHz[0], targetFreqMHz[1], targetFreqMHz[2], 0}; - return ioctl(mMgtHandle, AWSMGMT_IOCFREQSCALING, &obj); - #else -// # error "INTERNAL_TESTING macro disabled. AMZN code goes here. " -// # This API is not supported in AWS, the frequencies are set per AFI - return 0; - #endif - } - - ssize_t AwsXcl::xclUnmgdPwrite(unsigned flags, const void *buf, size_t count, uint64_t offset) - { - if (flags) - return -EINVAL; - drm_xocl_pwrite_unmgd unmgd = {0, 0, offset, count, reinterpret_cast(buf)}; - return ioctl(mUserHandle, DRM_IOCTL_XOCL_PWRITE_UNMGD, &unmgd); - } - - ssize_t AwsXcl::xclUnmgdPread(unsigned flags, void *buf, size_t count, uint64_t offset) - { - if (flags) - return -EINVAL; - drm_xocl_pread_unmgd unmgd = {0, 0, offset, count, reinterpret_cast(buf)}; - return ioctl(mUserHandle, DRM_IOCTL_XOCL_PREAD_UNMGD, &unmgd); - } - - int AwsXcl::xclExportBO(unsigned int boHandle) - { - drm_prime_handle info = {boHandle, 0, -1}; - int result = ioctl(mUserHandle, DRM_IOCTL_PRIME_HANDLE_TO_FD, &info); - return !result ? info.fd : result; - } - - unsigned int AwsXcl::xclImportBO(int fd, unsigned flags) - { - - /*Sarab - drm_xocl_userptr_bo user = {reinterpret_cast(userptr), size, mNullBO, flags}; - int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_USERPTR_BO, &user); - - */ - - - drm_prime_handle info = {mNullBO, flags, fd}; - int result = ioctl(mUserHandle, DRM_IOCTL_PRIME_FD_TO_HANDLE, &info); - if (result) { - std::cout << __func__ << " ERROR: FD to handle IOCTL failed" << std::endl; - } - return !result ? info.handle : mNullBO; - } - - int AwsXcl::xclGetBOProperties(unsigned int boHandle, xclBOProperties *properties) - { - drm_xocl_info_bo info = {boHandle, 0, 0, 0}; - int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_INFO_BO, &info); - properties->handle = info.handle; - properties->flags = info.flags; - properties->size = info.size; - properties->paddr = info.paddr; - properties->domain = XCL_BO_DEVICE_RAM; // currently all BO domains are XCL_BO_DEVICE_RAM - return result ? mNullBO : 0; - } - - bool AwsXcl::xclUnlockDevice() - { - flock(mUserHandle, LOCK_UN); - mLocked = false; - return true; - } - - // Assume that the memory is always - // created for the device ddr for now. Ignoring the flags as well. - unsigned int AwsXcl::xclAllocBO(size_t size, xclBOKind domain, unsigned flags) - { - drm_xocl_create_bo info = {size, mNullBO, flags}; - int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_CREATE_BO, &info); - if (result) { - std::cout << __func__ << " ERROR: AllocBO IOCTL failed" << std::endl; - } - return result ? mNullBO : info.handle; - } - - unsigned int AwsXcl::xclAllocUserPtrBO(void *userptr, size_t size, unsigned flags) - { - drm_xocl_userptr_bo user = {reinterpret_cast(userptr), size, mNullBO, flags}; - int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_USERPTR_BO, &user); - return result ? mNullBO : user.handle; - } - - void AwsXcl::xclFreeBO(unsigned int boHandle) - { - drm_gem_close closeInfo = {boHandle, 0}; - ioctl(mUserHandle, DRM_IOCTL_GEM_CLOSE, &closeInfo); - } - - int AwsXcl::xclWriteBO(unsigned int boHandle, const void *src, size_t size, size_t seek) - { - drm_xocl_pwrite_bo pwriteInfo = { boHandle, 0, seek, size, reinterpret_cast(src) }; - return ioctl(mUserHandle, DRM_IOCTL_XOCL_PWRITE_BO, &pwriteInfo); - } - - int AwsXcl::xclReadBO(unsigned int boHandle, void *dst, size_t size, size_t skip) - { - drm_xocl_pread_bo preadInfo = { boHandle, 0, skip, size, reinterpret_cast(dst) }; - return ioctl(mUserHandle, DRM_IOCTL_XOCL_PREAD_BO, &preadInfo); - } - - void *AwsXcl::xclMapBO(unsigned int boHandle, bool write) - { - drm_xocl_info_bo info = { boHandle, 0, 0 }; - int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_INFO_BO, &info); - if (result) - return nullptr; - - drm_xocl_map_bo mapInfo = { boHandle, 0, 0 }; - result = ioctl(mUserHandle, DRM_IOCTL_XOCL_MAP_BO, &mapInfo); - if (result) - return nullptr; - - return mmap(0, info.size, (write ? (PROT_READ|PROT_WRITE) : PROT_READ), - MAP_SHARED, mUserHandle, mapInfo.offset); - } - - int AwsXcl::xclSyncBO(unsigned int boHandle, xclBOSyncDirection dir, - size_t size, size_t offset) - { - drm_xocl_sync_bo_dir drm_dir = (dir == XCL_BO_SYNC_BO_TO_DEVICE) ? - DRM_XOCL_SYNC_BO_TO_DEVICE : - DRM_XOCL_SYNC_BO_FROM_DEVICE; - drm_xocl_sync_bo syncInfo = {boHandle, 0, size, offset, drm_dir}; - return ioctl(mUserHandle, DRM_IOCTL_XOCL_SYNC_BO, &syncInfo); - } - -#ifndef INTERNAL_TESTING - int AwsXcl::loadDefaultAfiIfCleared( void ) - { - int array_len = 16; - fpga_slot_spec spec_array[ array_len ]; - std::memset( spec_array, mBoardNumber, sizeof(fpga_slot_spec) * array_len ); - fpga_pci_get_all_slot_specs( spec_array, array_len ); - if( spec_array[mBoardNumber].map[FPGA_APP_PF].device_id == AWS_UserPF_DEVICE_ID ) { - std::string agfi = DEFAULT_GLOBAL_AFI; - fpga_mgmt_load_local_image( mBoardNumber, const_cast(agfi.c_str()) ); - if( sleepUntilLoaded( agfi ) ) { - std::cout << "ERROR: Sleep until load failed." << std::endl; - return -1; - } - fpga_pci_rescan_slot_app_pfs( mBoardNumber ); - } - return 0; - } - - int AwsXcl::sleepUntilLoaded( const std::string afi ) - { - for( int i = 0; i < 20; i++ ) { - std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) ); - fpga_mgmt_image_info info; - std::memset( &info, 0, sizeof(struct fpga_mgmt_image_info) ); - int result = fpga_mgmt_describe_local_image( mBoardNumber, &info, 0 ); - if( result ) { - std::cout << "ERROR: Load image failed." << std::endl; - return 1; - } - if( (info.status == FPGA_STATUS_LOADED) && !std::strcmp(info.ids.afi_id, const_cast(afi.c_str())) ) { - break; - } - } - return 0; - } - - int AwsXcl::checkAndSkipReload( char *afi_id, fpga_mgmt_image_info *orig_info ) - { - if( (orig_info->status == FPGA_STATUS_LOADED) && !std::strcmp(orig_info->ids.afi_id, afi_id) ) { - std::cout << "This AFI already loaded. Skip reload!" << std::endl; - int result = 0; - //existing afi matched. - uint16_t status = 0; - result = fpga_mgmt_get_vDIP_status(mBoardNumber, &status); - if(result) { - printf("Error: can not get virtual DIP Switch state\n"); - return result; - } - //Set bit 0 to 1 - status |= (1 << 0); - result = fpga_mgmt_set_vDIP(mBoardNumber, status); - if(result) { - printf("Error trying to set virtual DIP Switch \n"); - return result; - } - std::this_thread::sleep_for(std::chrono::microseconds(250)); - //pulse the changes in. - result = fpga_mgmt_get_vDIP_status(mBoardNumber, &status); - if(result) { - printf("Error: can not get virtual DIP Switch state\n"); - return result; - } - //Set bit 0 to 0 - status &= ~(1 << 0); - result = fpga_mgmt_set_vDIP(mBoardNumber, status); - if(result) { - printf("Error trying to set virtual DIP Switch \n"); - return result; - } - std::this_thread::sleep_for(std::chrono::microseconds(250)); - - printf("Successfully skipped reloading of local image.\n"); - return result; - } else { - std::cout << "AFI not yet loaded, proceed to download." << std::endl; - return 1; - } - } -#endif -} /* end namespace awsbmhal */ - -xclDeviceHandle xclOpen(unsigned deviceIndex, const char *logFileName, xclVerbosityLevel level) -{ - if(xcldev::pci_device_scanner::device_list.size() <= deviceIndex) { - printf("Cannot find index %d \n", deviceIndex); - return nullptr; - } - - awsbwhal::AwsXcl *handle = new awsbwhal::AwsXcl(deviceIndex, logFileName, level); - if (!awsbwhal::AwsXcl::handleCheck(handle)) { - printf("WARNING: xclOpen Handle check failed\n"); - delete handle; - handle = nullptr; -#ifndef INTERNAL_TESTING - /* workaround necessary to load a default afi and program with xclbin when device is in a cleared state */ - xcldev::pci_device_scanner rescan; - rescan.clear_device_list(); - rescan.scan( true ); - for (unsigned int i=0; i(handle); -} - -void xclClose(xclDeviceHandle handle) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (drv) - delete drv; -} - - -int xclGetDeviceInfo2(xclDeviceHandle handle, xclDeviceInfo2 *info) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclGetDeviceInfo2(info); -} - -int xclLoadBitstream(xclDeviceHandle handle, const char *xclBinFileName) -{ - return -ENOSYS; -} - -int xclLoadXclBin(xclDeviceHandle handle, const xclBin *buffer) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclLoadXclBin(buffer); -} - -size_t xclWrite(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, const void *hostBuf, size_t size) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclWrite(space, offset, hostBuf, size); -} - -size_t xclRead(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, void *hostBuf, size_t size) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclRead(space, offset, hostBuf, size); -} - - -uint64_t xclAllocDeviceBuffer(xclDeviceHandle handle, size_t size) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclAllocDeviceBuffer(size); -} - - -uint64_t xclAllocDeviceBuffer2(xclDeviceHandle handle, size_t size, xclMemoryDomains domain, - unsigned flags) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclAllocDeviceBuffer2(size, domain, flags); -} - - -void xclFreeDeviceBuffer(xclDeviceHandle handle, uint64_t buf) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return; - return drv->xclFreeDeviceBuffer(buf); -} - - -size_t xclCopyBufferHost2Device(xclDeviceHandle handle, uint64_t dest, const void *src, size_t size, size_t seek) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclCopyBufferHost2Device(dest, src, size, seek); -} - - -size_t xclCopyBufferDevice2Host(xclDeviceHandle handle, void *dest, uint64_t src, size_t size, size_t skip) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclCopyBufferDevice2Host(dest, src, size, skip); -} - - -//This will be deprecated. -int xclUpgradeFirmware(xclDeviceHandle handle, const char *fileName) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return xclUpgradeFirmware2(handle, fileName, 0); -} - -int xclUpgradeFirmware2(xclDeviceHandle handle, const char *fileName1, const char* fileName2) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return -ENOSYS; -} - -/* - * xclBootFPGA - * - * Sequence: - * 1) call boot ioctl - * 2) close the device, unload the driver - * 3) remove and scan - * 4) rescan pci devices - * 5) reload the driver (done by the calling function xcldev::boot()) - * - * Return 0 on success, -1 on failure. - */ -int xclBootFPGA(xclDeviceHandle handle) -{ -// awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); -// if (!drv) -// return -1; -// return -ENOSYS; - int retVal = -1; - - //awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); -// retVal = drv->xclBootFPGA(); // boot ioctl - retVal = 0; // skip boot ioctl since this may not be possible for AWS - - if( retVal == 0 ) - { - xclClose(handle); // close the device, unload the driver - retVal = xclRemoveAndScanFPGA(); // remove and scan - } - - if( retVal == 0 ) - { - xcldev::pci_device_scanner devScanner; - devScanner.scan( true ); // rescan pci devices - } - - return retVal; -} - -int xclRemoveAndScanFPGA( void ) -{ - const std::string devPath = "/devices/"; - const std::string removePath = "/remove"; - const std::string pciPath = "/sys/bus/pci"; - const std::string rescanPath = "/rescan"; - const char *input = "1\n"; - - // remove devices "echo 1 > /sys/bus/pci/devices//remove" - for (unsigned int i = 0; i < xcldev::pci_device_scanner::device_list.size(); i++) - { - std::string dev_name_pf_user = pciPath + devPath + xcldev::pci_device_scanner::device_list[i].user_name + removePath; - std::string dev_name_pf_mgmt = pciPath + devPath + xcldev::pci_device_scanner::device_list[i].mgmt_name + removePath; - - std::ofstream userFile( dev_name_pf_user ); - if( !userFile.is_open() ) { - perror( dev_name_pf_user.c_str() ); - return errno; - } - userFile << input; - - std::ofstream mgmtFile( dev_name_pf_mgmt ); - if( !mgmtFile.is_open() ) { - perror( dev_name_pf_mgmt.c_str() ); - return errno; - } - mgmtFile << input; - } - - std::this_thread::sleep_for(std::chrono::seconds(1)); - // initiate rescan "echo 1 > /sys/bus/pci/rescan" - std::ofstream rescanFile( pciPath + rescanPath ); - if( !rescanFile.is_open() ) { - perror( std::string( pciPath + rescanPath ).c_str() ); - return errno; - } - rescanFile << input; - - return 0; -} - -unsigned xclProbe() -{ - return awsbwhal::AwsXcl::xclProbe(); -} - -int xclResetDevice(xclDeviceHandle handle, xclResetKind kind) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return -ENOSYS; -} - -int xclReClock2(xclDeviceHandle handle, unsigned short region, const unsigned short *targetFreqMHz) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclReClock2(region, targetFreqMHz); -} - - -int xclLockDevice(xclDeviceHandle handle) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return drv->xclLockDevice() ? 0 : -1; -} - -//Sarab: Added for HAL2 support with XOCL GEM Driver - -int xclExportBO(xclDeviceHandle handle, unsigned int boHandle) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - return drv ? drv->xclExportBO(boHandle) : -ENODEV; -} - - -unsigned int xclImportBO(xclDeviceHandle handle, int fd, unsigned flags) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) { - std::cout << __func__ << ", " << std::this_thread::get_id() << ", handle & XOCL Device are bad" << std::endl; - } - return drv ? drv->xclImportBO(fd, flags) : -ENODEV; -} - -ssize_t xclUnmgdPwrite(xclDeviceHandle handle, unsigned flags, const void *buf, - size_t count, uint64_t offset) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - return drv ? drv->xclUnmgdPwrite(flags, buf, count, offset) : -ENODEV; -} - -int xclGetBOProperties(xclDeviceHandle handle, unsigned int boHandle, xclBOProperties *properties) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - return drv ? drv->xclGetBOProperties(boHandle, properties) : -ENODEV; -} - -ssize_t xclUnmgdPread(xclDeviceHandle handle, unsigned flags, void *buf, - size_t count, uint64_t offset) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - return drv ? drv->xclUnmgdPread(flags, buf, count, offset) : -ENODEV; -} - -int xclUpgradeFirmwareXSpi(xclDeviceHandle handle, const char *fileName, int index) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return -ENOSYS; - //return drv->xclUpgradeFirmwareXSpi(fileName, index); Not supported by AWS -} - -int xclUnlockDevice(xclDeviceHandle handle) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) { - std::cout << "xclUnlockDevice returning -ENODEV\n"; - return -ENODEV; - } else { - return drv->xclUnlockDevice() ? 0 : 1; - } -} - -unsigned int xclAllocBO(xclDeviceHandle handle, size_t size, xclBOKind domain, unsigned flags) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - return drv ? drv->xclAllocBO(size, domain, flags) : -ENODEV; -} - -unsigned int xclAllocUserPtrBO(xclDeviceHandle handle, void *userptr, size_t size, unsigned flags) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - return drv ? drv->xclAllocUserPtrBO(userptr, size, flags) : -ENODEV; -} - -void xclFreeBO(xclDeviceHandle handle, unsigned int boHandle) { - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return; - drv->xclFreeBO(boHandle); -} - -size_t xclWriteBO(xclDeviceHandle handle, unsigned int boHandle, const void *src, size_t size, - size_t seek) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - return drv ? drv->xclWriteBO(boHandle, src, size, seek) : -ENODEV; -} - -size_t xclReadBO(xclDeviceHandle handle, unsigned int boHandle, void *dst, size_t size, - size_t skip) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - return drv ? drv->xclReadBO(boHandle, dst, size, skip) : -ENODEV; -} - -void *xclMapBO(xclDeviceHandle handle, unsigned int boHandle, bool write) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - return drv ? drv->xclMapBO(boHandle, write) : nullptr; -} - - -int xclSyncBO(xclDeviceHandle handle, unsigned int boHandle, xclBOSyncDirection dir, - size_t size, size_t offset) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - return drv ? drv->xclSyncBO(boHandle, dir, size, offset) : -ENODEV; -} - -unsigned int xclVersion () { - return 2; -} - -int xclGetErrorStatus(xclDeviceHandle handle, xclErrorStatus *info) -{ - awsbwhal::AwsXcl *drv = awsbwhal::AwsXcl::handleCheck(handle); - if (!drv) - return -1; - return -ENOSYS; - //return drv->xclGetErrorStatus(info); Not supported for AWS -} - -int xclXbsak(int argc, char *argv[]) -{ - return xcldev::xclXbsak(argc, argv); -} - diff --git a/SDAccel/userspace/src2/shim.h b/SDAccel/userspace/src2/shim.h deleted file mode 100755 index 753182fb..00000000 --- a/SDAccel/userspace/src2/shim.h +++ /dev/null @@ -1,380 +0,0 @@ -/** - * Copyright (C) 2017-2018 Xilinx, Inc - * Author: Sonal Santan - * AWS HAL Driver layered on top of kernel drivers - * - * Code copied from SDAccel XDMA based HAL driver - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ -#ifndef _XDMA_SHIM_H_ -#define _XDMA_SHIM_H_ - -#include "xclhal2.h" -#include "xclperf2.h" -#include "drm.h" -#include -#include -#include -#include -#include -#include -#include - -#ifndef INTERNAL_TESTING -#include "fpga_pci.h" -#include "fpga_mgmt.h" -#endif - -// Work around GCC 4.8 + XDMA BAR implementation bugs -// With -O3 PCIe BAR read/write are not reliable hence force -O2 as max -// optimization level for pcieBarRead() and pcieBarWrite() -#if defined(__GNUC__) && defined(NDEBUG) -#define SHIM_O2 __attribute__ ((optimize("-O2"))) -#else -#define SHIM_O2 -#endif - -namespace awsbwhal { - - -struct AddresRange; - -std::ostream& operator<< (std::ostream &strm, const AddresRange &rng); - -/** - * Simple tuple struct to store non overlapping address ranges: address and size - */ -struct AddresRange : public std::pair { - // size will be zero when we are looking up an address that was passed by the user - AddresRange(uint64_t addr, size_t size = 0) : std::pair(std::make_pair(addr, size)) { - //std::cout << "CTOR(" << addr << ',' << size << ")\n"; - } - AddresRange(AddresRange && rhs) : std::pair(std::move(rhs)) { - //std::cout << "MOVE CTOR(" << rhs.first << ',' << rhs.second << ")\n"; - } - - AddresRange(const AddresRange &rhs) = delete; - AddresRange& operator=(const AddresRange &rhs) = delete; - - // Comparison operator is useful when using AddressRange as a key in std::map - // Note one operand in the comparator may have only the address without the size - // However both operands in the comparator will not have zero size - bool operator < (const AddresRange& other) const { - //std::cout << *this << " < " << other << "\n"; - if ((this->second != 0) && (other.second != 0)) - // regular ranges - return (this->first < other.first); - if (other.second == 0) - // second range just has an address - // (1000, 100) < (1200, 0) - // (1000, 100) < (1100, 0) first range ends at 1099 - return ((this->first + this->second) <= other.first); - assert(this->second == 0); - // this range just has an address - // (1100, 0) < (1200, 100) - return (this->first < other.first); - } -}; - -/** - * Simple map of address range to its bo handle and mapped virtual address - */ -static const std::pair mNullValue = std::make_pair(0xffffffff, nullptr); -class RangeTable { - std::map> mTable; - mutable std::mutex mMutex; -public: - void insert(uint64_t addr, size_t size, std::pair bo) { - // assert(find(addr) == 0xffffffff); - std::lock_guard lock(mMutex); - mTable[AddresRange(addr, size)] = bo; - } - - std::pair erase(uint64_t addr) { - std::lock_guard lock(mMutex); - std::map>::const_iterator i = mTable.find(AddresRange(addr)); - if (i == mTable.end()) - return mNullValue; - std::pair result = i->second; - mTable.erase(i); - return result; - } - - std::pair find(uint64_t addr) const { - std::lock_guard lock(mMutex); - std::map>::const_iterator i = mTable.find(AddresRange(addr)); - if (i == mTable.end()) - return mNullValue; - return i->second; - } -}; - - - // Memory alignment for DDR and AXI-MM trace access - template class AlignedAllocator { - void *mBuffer; - size_t mCount; - public: - T *getBuffer() { - return (T *)mBuffer; - } - - size_t size() const { - return mCount * sizeof(T); - } - - AlignedAllocator(size_t alignment, size_t count) : mBuffer(0), mCount(count) { - if (posix_memalign(&mBuffer, alignment, count * sizeof(T))) { - mBuffer = 0; - } - } - ~AlignedAllocator() { - if (mBuffer) - free(mBuffer); - } - }; - - const uint64_t mNullAddr = 0xffffffffffffffffull; - const uint64_t mNullBO = 0xffffffff; - - // XDMA Shim - class AwsXcl{ - - struct ELARecord { - unsigned mStartAddress; - unsigned mEndAddress; - unsigned mDataCount; - - std::streampos mDataPos; - ELARecord() : mStartAddress(0), mEndAddress(0), - mDataCount(0), mDataPos(0) {} - }; - - typedef std::list ELARecordList; - - typedef std::list > PairList; - - public: - //Sarab: Added for HAL2 XOCL Driver support - //int xclGetErrorStatus(xclErrorStatus *info); Not supported for AWS - bool xclUnlockDevice(); - unsigned int xclAllocBO(size_t size, xclBOKind domain, unsigned flags); - unsigned int xclAllocUserPtrBO(void *userptr, size_t size, unsigned flags); - void xclFreeBO(unsigned int boHandle); - int xclWriteBO(unsigned int boHandle, - const void *src, size_t size, size_t seek); - int xclReadBO(unsigned int boHandle, - void *dst, size_t size, size_t skip); - void *xclMapBO(unsigned int boHandle, bool write); - int xclSyncBO(unsigned int boHandle, xclBOSyncDirection dir, - size_t size, size_t offset); - int xclExportBO(unsigned int boHandle); - unsigned int xclImportBO(int fd, unsigned flags); - int xclGetBOProperties(unsigned int boHandle, xclBOProperties *properties); - ssize_t xclUnmgdPread(unsigned flags, void *buf, - size_t count, uint64_t offset); - ssize_t xclUnmgdPwrite(unsigned flags, const void *buf, - size_t count, uint64_t offset); - - - // Bitstreams - int xclGetXclBinIdFromSysfs(uint64_t &xclbinid); - int xclLoadXclBin(const xclBin *buffer); - int xclLoadAxlf(const axlf *buffer); - int xclUpgradeFirmware(const char *fileName); - int xclUpgradeFirmware2(const char *file1, const char* file2); - //int xclUpgradeFirmwareXSpi(const char *fileName, int device_index=0); Not supported by AWS - int xclTestXSpi(int device_index); - int xclBootFPGA(); - int xclRemoveAndScanFPGA(); - int resetDevice(xclResetKind kind); - int xclReClock2(unsigned short region, const unsigned short *targetFreqMHz); - - // Raw read/write - size_t xclWrite(xclAddressSpace space, uint64_t offset, const void *hostBuf, size_t size); - size_t xclRead(xclAddressSpace space, uint64_t offset, void *hostBuf, size_t size); - - // Buffer management - uint64_t xclAllocDeviceBuffer(size_t size); - uint64_t xclAllocDeviceBuffer2(size_t size, xclMemoryDomains domain, unsigned flags); - void xclFreeDeviceBuffer(uint64_t buf); - size_t xclCopyBufferHost2Device(uint64_t dest, const void *src, size_t size, size_t seek); - size_t xclCopyBufferDevice2Host(void *dest, uint64_t src, size_t size, size_t skip); - - // Performance monitoring - // Control - double xclGetDeviceClockFreqMHz(); - double xclGetReadMaxBandwidthMBps(); - double xclGetWriteMaxBandwidthMBps(); - //void xclSetOclRegionProfilingNumberSlots(uint32_t numSlots); - void xclSetProfilingNumberSlots(xclPerfMonType type, uint32_t numSlots); - size_t xclPerfMonClockTraining(xclPerfMonType type); - // Counters - size_t xclPerfMonStartCounters(xclPerfMonType type); - size_t xclPerfMonStopCounters(xclPerfMonType type); - size_t xclPerfMonReadCounters(xclPerfMonType type, xclCounterResults& counterResults); - //debug related - uint32_t getCheckerNumberSlots(int type); - uint32_t getIPCountAddrNames(int type, uint64_t *baseAddress, std::string * portNames); - size_t xclDebugReadCounters(xclDebugCountersResults* debugResult); - size_t xclDebugReadCheckers(xclDebugCheckersResults* checkerResult); - void readDebugIpLayout(); - - // Trace - size_t xclPerfMonStartTrace(xclPerfMonType type, uint32_t startTrigger); - size_t xclPerfMonStopTrace(xclPerfMonType type); - uint32_t xclPerfMonGetTraceCount(xclPerfMonType type); - size_t xclPerfMonReadTrace(xclPerfMonType type, xclTraceResultsVector& traceVector); - - // Sanity checks - int xclGetDeviceInfo2(xclDeviceInfo2 *info); - static AwsXcl *handleCheck(void *handle); - static unsigned xclProbe(); - bool xclLockDevice(); - unsigned getTAG() const { - return mTag; - } - bool isGood() const; - - ~AwsXcl(); - AwsXcl(unsigned index, const char *logfileName, xclVerbosityLevel verbosity); - - private: - - size_t xclReadModifyWrite(uint64_t offset, const void *hostBuf, size_t size); - size_t xclReadSkipCopy(uint64_t offset, void *hostBuf, size_t size); - bool zeroOutDDR(); - - bool isXPR() const { - return ((mDeviceInfo.mSubsystemId >> 12) == 4); - } - - bool isMultipleOCLClockSupported() { - unsigned dsaNum = ((mDeviceInfo.mDeviceId << 16) | mDeviceInfo.mSubsystemId); - // 0x82384431 : TUL KU115 4ddr 3.1 DSA - return ((dsaNum == 0x82384431) || (dsaNum == 0x82384432))? true : false; - } - - bool isUltraScale() const { - return (mDeviceInfo.mDeviceId & 0x8000); - } - - // Core DMA code - SHIM_O2 int pcieBarRead(int bar_num, unsigned long long offset, void* buffer, unsigned long long length); - SHIM_O2 int pcieBarWrite(int bar_num, unsigned long long offset, const void* buffer, unsigned long long length); - int freezeAXIGate(); - int freeAXIGate(); - - // PROM flashing - int prepare(unsigned startAddress, unsigned endAddress); - int program(std::ifstream& mcsStream, const ELARecord& record); - int program(std::ifstream& mcsStream); - int waitForReady(unsigned code, bool verbose = true); - int waitAndFinish(unsigned code, unsigned data, bool verbose = true); - - //XSpi flashing. - bool prepareXSpi(); - int programXSpi(std::ifstream& mcsStream, const ELARecord& record); - int programXSpi(std::ifstream& mcsStream); - bool waitTxEmpty(); - bool isFlashReady(); - //bool windDownWrites(); - bool bulkErase(); - bool sectorErase(unsigned Addr); - bool writeEnable(); -#if 0 - bool dataTransfer(bool read); -#endif - bool readPage(unsigned addr, uint8_t readCmd = 0xff); - bool writePage(unsigned addr, uint8_t writeCmd = 0xff); - unsigned readReg(unsigned offset); - int writeReg(unsigned regOffset, unsigned value); - bool finalTransfer(uint8_t *sendBufPtr, uint8_t *recvBufPtr, int byteCount); - bool getFlashId(); - //All remaining read /write register commands can be issued through this function. - bool readRegister(unsigned commandCode, unsigned bytes); - bool writeRegister(unsigned commandCode, unsigned value, unsigned bytes); - bool select4ByteAddressMode(); - bool deSelect4ByteAddressMode(); - - - // Performance monitoring helper functions - bool isDSAVersion(unsigned majorVersion, unsigned minorVersion, bool onlyThisVersion); - unsigned getBankCount(); - uint64_t getHostTraceTimeNsec(); - uint64_t getPerfMonBaseAddress(xclPerfMonType type, uint32_t slotNum); - uint64_t getPerfMonFifoBaseAddress(xclPerfMonType type, uint32_t fifonum); - uint64_t getPerfMonFifoReadBaseAddress(xclPerfMonType type, uint32_t fifonum); - uint32_t getPerfMonNumberSlots(xclPerfMonType type); - uint32_t getPerfMonNumberSamples(xclPerfMonType type); - uint32_t getPerfMonNumberFifos(xclPerfMonType type); - uint32_t getPerfMonByteScaleFactor(xclPerfMonType type); - uint8_t getPerfMonShowIDS(xclPerfMonType type); - uint8_t getPerfMonShowLEN(xclPerfMonType type); - uint32_t getPerfMonSlotStartBit(xclPerfMonType type, uint32_t slotnum); - uint32_t getPerfMonSlotDataWidth(xclPerfMonType type, uint32_t slotnum); - size_t resetFifos(xclPerfMonType type); - uint32_t bin2dec(std::string str, int start, int number); - uint32_t bin2dec(const char * str, int start, int number); - std::string dec2bin(uint32_t n); - std::string dec2bin(uint32_t n, unsigned bits); - static std::string getDSAName(unsigned short deviceId, unsigned short subsystemId); - - private: - // This is a hidden signature of this class and helps in preventing - // user errors when incorrect pointers are passed in as handles. - const unsigned mTag; - const int mBoardNumber; - const size_t maxDMASize; - bool mLocked; - const uint64_t mOffsets[XCL_ADDR_SPACE_MAX]; - int mUserHandle; -#ifdef INTERNAL_TESTING - int mMgtHandle; -#else - pci_bar_handle_t ocl_kernel_bar; // AppPF BAR0 for OpenCL kernels - pci_bar_handle_t sda_mgmt_bar; // MgmtPF BAR4, for SDAccel Perf mon etc - pci_bar_handle_t ocl_global_mem_bar; // AppPF BAR4 -#endif - uint32_t mMemoryProfilingNumberSlots; - uint32_t mOclRegionProfilingNumberSlots; - std::string mDevUserName; - - // Information extracted from platform linker - bool mIsDebugIpLayoutRead = false; - bool mIsDeviceProfiling = false; - uint64_t mPerfMonFifoCtrlBaseAddress; - uint64_t mPerfMonFifoReadBaseAddress; - uint64_t mPerfMonBaseAddress[XSPM_MAX_NUMBER_SLOTS]; - std::string mPerfMonSlotName[XSPM_MAX_NUMBER_SLOTS]; - - char *mUserMap; - std::ofstream mLogStream; - xclVerbosityLevel mVerbosity; - std::string mBinfile; - ELARecordList mRecordList; - xclDeviceInfo2 mDeviceInfo; - RangeTable mLegacyAddressTable; - -#ifndef INTERNAL_TESTING - int sleepUntilLoaded( std::string afi ); - int checkAndSkipReload( char *afi_id, fpga_mgmt_image_info *info ); - int loadDefaultAfiIfCleared( void ); -#endif - public: - static const unsigned TAG; - }; -} - -#endif diff --git a/SDAccel/userspace/src2/xclbin.cpp b/SDAccel/userspace/src2/xclbin.cpp deleted file mode 100755 index bf1c0184..00000000 --- a/SDAccel/userspace/src2/xclbin.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (C) 2017-2018 Xilinx, Inc - * Author: Sonal Santan - * AWS HAL Driver for SDAccel/OpenCL runtime evnrionemnt, for AWS EC2 F1 - * - * Code copied from SDAccel XDMA based HAL driver - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may - * not use this file except in compliance with the License. A copy of the - * License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ -//#define INTERNAL_TESTING 1 - -#include - -#include "xclbin2.h" - -#ifdef INTERNAL_TESTING -#define AFI_ID_STR_MAX 64 -#else -#include "hal/fpga_common.h" -#endif - -const char *get_afi_from_xclBin(const xclBin *buffer) -{ - const char *afid = reinterpret_cast(buffer); - afid += buffer->m_primaryFirmwareOffset; - if (buffer->m_primaryFirmwareLength > AFI_ID_STR_MAX) - return nullptr; - if (std::memcmp(afid, "afi-", 4) && std::memcmp(afid, "agfi-", 5)) - return nullptr; - return afid; -} - -const char *get_afi_from_axlf(const axlf *buffer) -{ - const axlf_section_header *bit_header = xclbin::get_axlf_section(buffer, BITSTREAM); - const char *afid = reinterpret_cast(buffer); - afid += bit_header->m_sectionOffset; - if (bit_header->m_sectionSize > AFI_ID_STR_MAX) - return nullptr; - if (std::memcmp(afid, "afi-", 4) && std::memcmp(afid, "agfi-", 5)) - return nullptr; - return afid; -} diff --git a/Vitis/README.md b/Vitis/README.md new file mode 100644 index 00000000..5e34083e --- /dev/null +++ b/Vitis/README.md @@ -0,0 +1,216 @@ +# Quick Start Guide to Accelerating your C/C++ application on an AWS F1 FPGA Instance with Vitis + +There are three steps for accelerating your application on an Amazon EC2 FPGA instance using the software-defined development flow: +1. Build the host application, and the Xilinx FPGA binary +2. Create an AFI +3. Run the FPGA accelerated application on AWS FPGA instances + +This quick start guide will utilize a simple "Hello World" Vitis example to get you started. + +It is highly recommended you read the documentation and utilize software and hardware emulation prior to running on F1. +The F1 HW Target compile time is ~50 minutes, therefore, software and hardware emulation should be used during development. + + +# Table of Content + +1. [Overview](#overview) +2. [Prerequisites](#prerequisites) + * [AWS Account, F1/EC2 Instances, On-Premises, AWS IAM Permissions, AWS CLI and S3 Setup](#iss) + * [Github and Environment Setup](#gitsetenv) +3. [Build the host application, Xilinx FPGA binary and verify you are ready for FPGA acceleration](#createapp) + * [Emulate the code](#emu) + * [Software Emulation](#swemu) + * [Hardware Emulation](#hwemu) + * [Build the host application and Xilinx FPGA Binary](#hw) +4. [Create an Amazon FPGA Image (AFI)](#createafi) +5. [Run the FPGA accelerated application on F1](#runonf1) +6. [Additional Vitis Information](#read) + + + +# Overview +* Vitis is a complete development environment for applications accelerated using Xilinx FPGAs +* It leverages the OpenCL heterogeneous computing framework to offload compute intensive workloads to the FPGA +* The accelerated application is written in C/C++, OpenCL or RTL with OpenCL APIs + + +# Prerequisites + +## AWS Account, F1/EC2 Instances, On-Premises, AWS IAM Permissions, AWS CLI and S3 Setup (One-time Setup) +* [Setup an AWS Account](https://aws.amazon.com/free/) +* Launch an instance using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) which comes pre-installed with Vitis and required licenses. + * You may use this F1 instance to [build your host application and Xilinx FPGA binary](#createapp), however, it is more cost efficient to either: + * Launch the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) on a compute EC2 instance, with a minimum of 30GiB RAM), **OR** + * Follow the [On-Premises Instructions](../docs/on_premise_licensing_help.md) to purchase and install a license from Xilinx. +* Setup AWS IAM permissions for creating FPGA Images (CreateFpgaImage and DescribeFpgaImages). [EC2 API Permissions are described in more detail](http://docs.aws.amazon.com/AWSEC2/latest/APIReference/ec2-api-permissions.html). It is highly recommended that you validate your AWS IAM permissions prior to proceeding with this quick start. By calling the [DescribeFpgaImages API](../hdk/docs/describe_fpga_images.md) you can check that your IAM permissions are correct. +* [Setup AWS CLI and S3 Bucket](docs/Setup_AWS_CLI_and_S3_Bucket.md) to enable AFI creation. +* Install optional [packages](packages.txt) required to run all examples. If you do not install these packages, some examples may not work properly. The setup scripts will warn you of any missing packages. +* Additional dependencies may get flagged during the AWS Vitis scripts as warnings or errors. + + +## Github and Environment Setup +* Clone this github repository and source the *vitis_setup.sh* script: +``` + $ git clone https://github.com/aws/aws-fpga.git $AWS_FPGA_REPO_DIR + $ cd $AWS_FPGA_REPO_DIR + $ source vitis_setup.sh +``` + +* Sourcing the *vitis_setup.sh* script: + * Downloads and sets the correct AWS Platform: + * [AWS Vitis Platform](./aws_platform/xilinx_aws-vu9p-f1_shell-v04261818_201920_2) that contains the dynamic hardware that enables Vitis kernels to run on AWS F1 instances. + * Valid platforms for shell_v04261818: `AWS_PLATFORM_201920_2` (Default) AWS F1 Vitis platform. + * Sets up the Xilinx Vitis example submodules. + * Installs the required libraries and package dependencies. + * Run environment checks to verify supported tool/lib versions. + + +# 1. Build the host application, Xilinx FPGA binary and verify you are ready for FPGA acceleration + +This section will walk you through creating, emulating and compiling your host application and FPGA Binary + + +# Emulate your Code + +The main goal of emulation is to ensure functional correctness and to determine how to partition the application between the host CPU and the FPGA. +HW/SW Emulation does not require use of actual FPGA's and can be run on any compute instances. Using non-F1 EC2 compute instances for initial development will help reduce costs. + + +## Software (SW) Emulation + +For CPU-based (SW) emulation, both the host code and the FPGA binary code are compiled to run on an x86 processor. +SW Emulation enables developers to iterate and refine the algorithms through fast compilation. +The iteration time is similar to software compile and run cycles on a CPU. + +The instructions below describe how to run the Vitis SW Emulation flow using the Makefile provided with a simple "hello world" example + +``` + $ cd $VITIS_DIR/examples/xilinx/hello_world + $ make clean + $ make check TARGET=sw_emu DEVICE=$AWS_PLATFORM all +``` + +For more information on how to debug your application in a SW Emulation environment. + + +## Hardware (HW) Emulation + +The Vitis hardware emulation flow enables the developer to check the correctness of the logic generated for the FPGA binary. This emulation flow invokes the hardware simulator in the Vitis environment to test the functionality of the code that will be executed on the FPGA Custom Logic. + +The instructions below describe how to run the HW Emulation flow using the Makefile provided with a simple "hello world" example: + +``` + $ cd $VITIS_DIR/examples/xilinx/hello_world + $ make clean + $ make check TARGET=hw_emu DEVICE=$AWS_PLATFORM all +``` +For more information on how to debug your application in a HW Emulation environment. + + +# Build the Host Application and Xilinx FPGA Binary + +The Vitis system build flow enables the developer to build their host application as well as their Xilinx FPGA Binary. + +The instructions below describe how to build the Xilinx FPGA Binary and host application using the Makefile provided with a simple "hello world" example: + +``` + $ cd $VITIS_DIR/examples/xilinx/hello_world + $ make clean + $ make TARGET=hw DEVICE=$AWS_PLATFORM all +``` + +NOTE: If you encounter an error with `No current synthesis run set`, you may have previously run the [HDK IPI examples](../hdk/docs/IPI_GUI_Vivado_Setup.md) and created a `Vivado_init.tcl` file in `~/.Xilinx/Vivado`. This will cause [problems](https://forums.aws.amazon.com/thread.jspa?threadID=268202&tstart=25) with the build process, thus it is recommended to remove it before starting a hardware system build. + + +# 2. Create an Amazon FPGA Image (AFI) + +*The Vitis Flow only supports AFI's created with Device ID 0xF010 and Vendor ID 0x1D0F.* + +The runtime drivers are designed to only bind to 0xF010 and 0x1042(Cleared AFI) and loading AFI's from your application that provide other Device/Vendor ID's will require restarting the Xilinx MPD. + +This assumes you have: +* [Compiled your host application and Xilinx FPGA Binary](#hw) +* Validated your code using [SW/HW Emulation](#emu) and you are ready to create an AFI and test on F1. +* [Setup AWS CLI and S3 bucket](docs/Setup_AWS_CLI_and_S3_Bucket.md) for AFI creation + +The [create_vitis_afi.sh](./tools/create_vitis_afi.sh) script is provided to facilitate AFI creation from a Xilinx FPGA Binary, it: +* Takes in your Xilinx FPGA Binary \*.xclbin file +* Calls *aws ec2 create_fpga_image* to generate an AFI under the hood +* Generates a \_afi_id.txt which contains the identifiers for your AFI +* Creates an AWS FPGA Binary file with an \*.awsxclbin extension that is composed of: Metadata and AGFI-ID. + * **This \*.awsxclbin is the AWS FPGA Binary file that will need to be loaded by your host application to the FPGA** + +``` + $ $VITIS_DIR/tools/create_vitis_afi.sh -xclbin= + -o= \ + -s3_bucket= -s3_dcp_key= -s3_logs_key= +``` + +**Save the \*.awsxclbin, you will need to copy it to your F1 instance along with your executable host application.** + +**NOTE**: *Attempting to load your AFI immediately on an F1 instance will result in an 'Invalid AFI ID' error. +Please wait until you confirm the AFI has been created successfully.* + +## Tracking the status of your registered AFI + +The \*_afi_id.txt file generated by the create_vitis_afi.sh also includes the two identifiers for your AFI: +- **FPGA Image Identifier** or **AFI ID**: this is the main ID used to manage your AFI through the AWS EC2 CLI commands and AWS SDK APIs. + This ID is regional, i.e., if an AFI is copied across multiple regions, it will have a different unique AFI ID in each region. + An example AFI ID is **`afi-06d0ffc989feeea2a`**. +- **Global FPGA Image Identifier** or **AGFI ID**: this is a global ID that is used to refer to an AFI from within an F1 instance. + For example, to load or clear an AFI from an FPGA slot, you use the AGFI ID. + **This is embedded into the AWS FPGA Binary \*.awsxclbin file generated by create_vitis_afi.sh.** + Since the AGFI IDs is global (by design), it allows you to copy a combination of AFI/AMI to multiple regions, and they will work without requiring any extra setup. + An example AGFI ID is **`agfi-0f0e045f919413242`**. + + +Use the [describe-fpga-images](../hdk/docs/describe_fpga_images.md) API to check the AFI state during the background AFI generation process. + +``` + $ aws ec2 describe-fpga-images --fpga-image-ids +``` + +When AFI creation completes successfully, the output should contain: +``` + ... + "State": { + "Code": "available" + }, + ... +``` + +If the “State” code indicates the AFI generation has "failed", the AFI creation logs can be found in the bucket location (```s3:///```) provided to create_vitis_afi.sh above. These will detail the errors encountered during the AFI creation process. + +For help with AFI creation issues, see [create-fpga-image error codes](../hdk/docs/create_fpga_image_error_codes.md) + + + +# 3. Run the FPGA accelerated application on Amazon FPGA instances + +* Start an FPGA instance using [FPGA Developer AMI on AWS Marketplace](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) and check the AMI [compatibility table](../README.md#fpga-developer-ami) and [runtime compatibility table](./docs/Create_Runtime_AMI.md#runtime-ami-compatibility-table). Alternatively, you can [create your own Runtime AMI](docs/Create_Runtime_AMI.md) for running your Vitis applications on Amazon FPGA instances. + * *Assuming the developer flow (compilation) was done on a separate build instance you will need to:* + * Copy the compiled host executable (exe) to the new F1 instance + * Copy the \*.awsxclbin AWS FPGA binary file to the new instance + * Copy any data files required for execution to the new instance + * [Clone the github repository to the new F1 instance and install runtime drivers](#gitsetenv) + +* To setup tools, runtime environment & execute your Host Application: + ``` + $ git clone https://github.com/aws/aws-fpga.git $AWS_FPGA_REPO_DIR + $ cd $AWS_FPGA_REPO_DIR + $ source vitis_runtime_setup.sh # Other runtime env settings needed by the host app should be setup after this step + # Wait till the MPD service has initialized. Check systemctl status mpd + $ ./host ./vadd.awsxclbin + ``` +* The runtime setup script also starts the Xilinx XRT Message Proxy Daemon(MPD) service. To learn more about the XRT implementation, check the [XRT Instructions](./docs/XRT_installation_instructions.md#mpd) + + +# Additional Vitis Information + +* [Vitis User Guide](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug1393-vitis-application-acceleration.pdf) + +* [Vitis Product Info](https://www.xilinx.com/products/design-tools/vitis.html) + +* [XRT Documentation](https://xilinx.github.io/XRT/master/html/) + +* [XRT MPD Documentation](https://xilinx.github.io/XRT/master/html/cloud_vendor_support.html) diff --git a/Vitis/Runtime/xrt_common_functions.sh b/Vitis/Runtime/xrt_common_functions.sh new file mode 100644 index 00000000..b6b7c02b --- /dev/null +++ b/Vitis/Runtime/xrt_common_functions.sh @@ -0,0 +1,117 @@ +# +# Copyright (C) 2018 Xilinx, Inc +# Xilinx XRT setup functions +# +# Author: ryan.radjabi@xilinx.com +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may +# not use this file except in compliance with the License. A copy of the +# License is located at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +XRT_PATH="${VITIS_DIR}/Runtime/XRT_${RELEASE_VER}" + +function get_install_cmd { + xrt_inst_cmd="install" + aws_inst_cmd="install" + if [[ $(lsb_release -si) == "Ubuntu" ]]; then + dpkg -s xrt + ret=$? + if [[ $ret == "0" ]]; then + xrt_inst_cmd="install --reinstall" + fi + dpkg -s xrt-aws + ret=$? + if [[ $ret == "0" ]]; then + aws_inst_cmd="install --reinstall" + fi + elif [[ $(lsb_release -si) == "CentOS" ]]; then + rpm -q xrt + ret=$? + if [[ $ret == "0" ]]; then + xrt_inst_cmd="reinstall" + fi + rpm -q xrt-aws + ret=$? + if [[ $ret == "0" ]]; then + aws_inst_cmd="reinstall" + fi + fi +} + +function build_xrt { + info_msg "xrt-path: $XRT_PATH" + if [ -z "$(ls -A $XRT_PATH)" ]; then + # XRT_PATH is empty, this is the first run, so call submodule update + git submodule update --init -- $XRT_PATH + else + # XRT_PATH is not empty, only call init, this allows local changes to exist in XRT + git submodule init -- $XRT_PATH + fi + info_msg "XRT Developer Flow: Building Xilinx runtime XRT..." + sudo sh -c "cd $XRT_PATH;./src/runtime_src/tools/scripts/xrtdeps.sh;" + ret=$? + if [[ $ret != 0 ]]; then + err_msg "XRT Developer Flow: Failed to install dependencies: xrtdeps.sh: {$?}" + return $? + fi + if [[ $(lsb_release -si) == "CentOS" ]]; then + scl enable devtoolset-6 "cd ${XRT_PATH}/build/; ./build.sh;" + elif [[ $(lsb_release -si) == "Ubuntu" ]]; then + sudo sh -c "cd ${XRT_PATH}/build/; ./build.sh;" + fi + ret=$? + if [[ $ret != 0 ]]; then + err_msg "XRT Developer Flow: Failed to build XRT: {$?}" + fi + + return $? +} + +# takes the path to RPM/DEB package as argument +function install_xrt_package { + get_install_cmd + if [[ $(lsb_release -si) == "CentOS" ]]; then + sudo sh -c "cd $1; yum ${xrt_inst_cmd} -y xrt_*-xrt.rpm; yum ${aws_inst_cmd} -y xrt_*-aws.rpm;" + elif [[ $(lsb_release -si) == "Ubuntu" ]]; then + sudo sh -c "cd $1; apt ${xrt_inst_cmd} ./xrt_*-xrt.deb; apt ${aws_inst_cmd} ./xrt_*-aws.deb;" + fi + ret=$? + if [[ $ret != 0 ]]; then + err_msg "XRT Developer Flow: Failed to install XRT: {$?}" + else + info_msg "Xilinx runtime installed" + fi + return $? +} + +function setup_runtime { + if [ -e /opt/xilinx/xrt ]; then # Check if XRT is installed + info_msg "XRT Install, non-dev" + export XILINX_XRT=/opt/xilinx/xrt + export PATH=$PATH:/opt/xilinx/xrt/bin + + export LD_LIBRARY_PATH=$XILINX_XRT/lib:$LD_LIBRARY_PATH + # copy libstdc++ from $XILINX_VITIS/lib + if [[ $(lsb_release -si) == "Ubuntu" ]]; then + sudo cp $XILINX_VITIS/lib/lnx64.o/Ubuntu/libstdc++.so* /opt/xilinx/xrt/lib/ + elif [[ $(lsb_release -si) == "CentOS" ]]; then + sudo cp $XILINX_VITIS/lib/lnx64.o/Default/libstdc++.so* /opt/xilinx/xrt/lib/ + else + info_msg "Unsupported OS." + return 1 + fi + else # No XRT available + err_msg "Xilinx XRT runtime not installed - This is required if you are running on an F1 instance." + # Placeholder for code to download pre-compiled RPM/DEB package and remove above message + # install_xrt_package + fi +} diff --git a/Vitis/aws_platform/xilinx_aws-vu9p-f1_shell-v04261818_201920_2/sw/xilinx_aws-vu9p-f1_shell-v04261818_201920_2.spfm b/Vitis/aws_platform/xilinx_aws-vu9p-f1_shell-v04261818_201920_2/sw/xilinx_aws-vu9p-f1_shell-v04261818_201920_2.spfm new file mode 100644 index 00000000..8cf04dce --- /dev/null +++ b/Vitis/aws_platform/xilinx_aws-vu9p-f1_shell-v04261818_201920_2/sw/xilinx_aws-vu9p-f1_shell-v04261818_201920_2.spfm @@ -0,0 +1,26 @@ + + + + {No description given} + + + + + config0_0 Linux OS on x86_0 + + + + + + diff --git a/Vitis/aws_platform/xilinx_aws-vu9p-f1_shell-v04261818_201920_2/xilinx_aws-vu9p-f1_shell-v04261818_201920_2.xpfm b/Vitis/aws_platform/xilinx_aws-vu9p-f1_shell-v04261818_201920_2/xilinx_aws-vu9p-f1_shell-v04261818_201920_2.xpfm new file mode 100644 index 00000000..b0f9bee9 --- /dev/null +++ b/Vitis/aws_platform/xilinx_aws-vu9p-f1_shell-v04261818_201920_2/xilinx_aws-vu9p-f1_shell-v04261818_201920_2.xpfm @@ -0,0 +1,20 @@ + + + + {No description given} + + + + + + + + + + + diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration.md b/Vitis/docs/Alveo_to_AWS_F1_Migration.md new file mode 100644 index 00000000..01513f8a --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration.md @@ -0,0 +1,181 @@ +# Application Migration between Alveo U200 platform & Amazon EC2 F1 instances + +The Vitis development environment provides a unified environment to develop FPGA accelerated applications across Alveo™ products and Amazon EC2 F1 instances. +The Vitis® flow is based on standard programming languages for both software and hardware components, along with an open-source runtime library and optimizing compilation technology. +This approach enables seamless application migration across acceleration platforms. + +Using the Vitis tool flow, Xilinx was able to seamlessly port over 40+ designs from the Alveo U200 platform to F1 instances without touching the kernel source code and making only minor cosmetic changes to application source code. +One example is Xilinx Real-Time Anti Money Laundering Watch List Management Compute Solution that was developed with Vitis and can be deployed to Alveo U200 and F1 instances. + +## Introduction to Vitis + +FPGA-applications built with the Vitis flow rely on a stack of standardized software and hardware components that insulate the application from platform-specific details, as seen in the figure below. + + +![img](./Alveo_to_AWS_F1_Migration/img/image01.png) + +In the Vitis flow, user applications are developed in C or C++ and use standard user-space APIs to interact with accelerated functions (also known as kernels) implemented in the FPGA device. +These APIs are implemented by the Xilinx Runtime library (XRT) and are built on top drivers that manage communication to and from the FPGA device. +On the hardware side, a platform-specific shell is responsible for essential services such as managing the PCIe link, DMA transfers (to and from the host), and interfacing with off-chip DDR memory. +The shell also exposes standard AXI interfaces to which the user kernels can be connected. + +With this architecture, the user’s source code (host application and acceleration kernel) remains mostly agnostic of platform-specific platform details. +The application sees the standardized XRT APIs and AXI interfaces which are common to all Vitis acceleration platforms. +This aspect is key to enabling application portability across similar FPGA platforms. For most designs, porting from an Alveo U200 platform to F1 instances **can be as simple as changing the --platform option when building the design with Vitis.** + +More details about the Vitis programming and execution model can be found in the [Introduction](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/chunkbreaker1.html#ctb1559866511109) chapter of the Vitis documentation. + + +## Comparison of the Alveo U200 platform and AWS EC2 F1 instances + +FPGA accelerated applications developed with Vitis are highly portable across similar acceleration platforms. +While Vitis greatly facilitates the migration process, it is important to recognize that the features and characteristics of the acceleration platform or instance targeted will have an impact on functionality and achievable performance. + +The following table contrasts the key characteristics of the Alveo U200 and AWS EC2 F1 platforms. + +| | **Feature** | **AWS f1.2xlarge instance** | **Alveo U200 platform** | +| ----------------------- | -------------------- | ------------------------------------------------- | --------------------------- | +| **Available resources** | SLRs | 3 | 3 | +| | LUTs | 895k | 983k | +| | Registers | 1790k | 1966k | +| | DSP Slices | 5640 | 5856 | +| | URAM | 800*288kb = 225Mb | 800*288kb = 225Mb | +| | BRAM | 1680*36kb = 59Mb | 1848*36kb = 64.9Mb | +| **Off-chip memory** | DDR total capacity | 64GB (4x16GB) | 64GB (4x16GB) | +| | DDR Total BW | 68GB/s | 77GB/s | +| **Interfaces** | PCI Express | Gen3x16 | Gen3x16 | +| **Floorplan** | Shell Occupancy | SLR0 and SLR1 | SLR1 | +| | SLR0 | DDR3 | DDR0 | +| | SLR1 | DDR0 (in shell)
DDR2 | DDR1 (in shell)
DDR2 | +| | SLR2 | DDR1 | DDR3 | +| **Tool support** | Vitis | Yes | Yes | +| | ERT | Disabled | Available | +| | XRT | Full Access | Full Access | + + + +### FPGA Resources + +FPGA resources are the key building blocks for any FPGA design. +Resources are physically distributed across 3 different logic regions (SLRs) on both platforms, due to the nature of the FPGA architecture. +The FPGA devices on the Alveo U200 platform and AWS F1 instance have very similar numbers of available resources and performance is expected to be comparable across both platforms. + +The slight variance in resources is due to inherent differences between the shells for each platform. As shown in the figure below, the size and layout of the shell vary between the Alveo U200 platform and the AWS F1 instance. +Physical shell differences may impact the layout of available resources across the devices. +Developers should keep this in mind when migrating large and complex designs between the Alveo U200 platform and the F1 instances. +Advanced design considerations such as timing closure techniques may need to be considered. + +![img](./Alveo_to_AWS_F1_Migration/img/image02.png) + +For FPGA designs that utilize more than 70% of the FPGA resources, portability between platforms may require additional optimizations. +Please refer to the [UltraFast Design Methodology Timing Closure Quick Reference Guide (UG1292)](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug1292-ultrafast-timing-closure-quick-reference.pdf) for recommendations on timing closure. + +### Clock Frequency + +Both the Alveo U200 platform and F1 instances will support data clock rates up to 250MHz. +Applications running at 250MHz and below will seamlessly port between both platforms. +In addition, the Alveo U200 platform can support data clock rates between 250Mhz and 300MHz. +The data clock is used to transfer data between kernels and DDR, and deltas in clock rate may impact performance. + + +### Off-Chip DDR Memory + +Both the Alveo U200 platform and F1 instances provide identical off-chip DDR memory: 4 banks of 16GBytes each for a total of 64GBytes. +It is important to note that the placement and identification of DDR banks vary across platforms. +On the Alveo U200 platform, the DDR interface placed in the shell is DDR1. +On F1 instances the equivalent DDR interface placed in the shell is DDR0. +If the application only needs a single DDR interface, it is recommended to use the dedicated controller located in the shell. + +The following table details the naming and location of DDR interfaces on the F1 instances and Alveo U200 platforms: + +| **AWS name** | **Vitis tag** | **Location** | **U200 Equivalent** | +| --------------- | ---------------| ----------------------------- | --------------------- | +| DDR A | DDR[1] | SLR2 (top SLR) | DDR[3] | +| DDR B | DDR[2] | SLR1 (mid SLR) | DDR[2] | +| DDR C | DDR[0] | SLR1 (mid SLR, shell region) | DDR[1] | +| DDR D | DDR[3] | SLR0 (bottom SLR) | DDR[0] | + +The same information can be extracted from the platform file using the `platforminfo` Vitis utility: + +```bash +# Run this after you have sourced vitis_setup.sh +platforminfo $AWS_PLATFORM +``` + +When building the FPGA design, the Vitis linker takes care of connecting kernel ports to DDR interfaces. +For both the Alveo U200 and F1 instances, Vitis will default to use the DDR interface placed in the shell. +This default behavior can be modified by using command line options to specify which DDR interfaces should be used for each connection. +This is especially useful when the FPGA design needs to access multiple DDR banks. +When migrating applications between the Alveo U200 platform and F1 instances updates may be required to the Vitis compilation script to achieve the desired DDR mapping. +The example provided at the end of this document explains how to do update the Vitus scripts for DDR mapping. + + +## Migration Results using Vitis 2020.1 + +To demonstrate the seamless migration path offered by Vitis as well as the potential impact on the performance of the differences between the Alveo U200 platform and F1 instances, we ran over 40+ full system level applications across both platforms. + +For the vast majority of these designs, migrating between the Alveo U200 platform and the F1 instances require zero code changes (to either the host application or the kernel code). Porting the design was as simple as changing a few command line options in the Vitis compilation scripts such as the --platform and --sp switches. See [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/vitiscommandcompiler.html#qcm1528577331870__section_N10049_N10019_N10001) and [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/vitiscommandcompiler.html#clt1568640709907__section_tfc_zxm_1jb) for more details about these options. + +In a few cases, the host application relied on the XCL_MEM_TOPOLOGY flag, and this flag had to be modified to port between the Alveo 200 platform and F1 instances. This optional flag can be used to explicitly specify in which DDR bank a given buffer needs to be allocated. See [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/optimizingperformance.html#utc1504034308941) for more details about this flag. + +The following table shows results for a subset of these applications and compares the performance of each kernel (CU), looking at both the duration and primary kernel clock frequency. All these applications could be easily ported between the Alveo U200 platform and F1 instances maintaining application performance. + +| **TEST CASE** | **KERNEL NAMEs** | **CU Time AWS F1 (ms)** | **CU Time Alveo U200 (ms)** | **CU Clock AWS F1 (MHz)** | **CU Clock Alveo U200 (MHz)** | +| ----------------------------------------------- | ---------------------- | ----------------------- | --------------------------- | ------------------------- | ----------------------------- | +| Data Analytics (Bayes classification training) | naiveBayesTrain_kernel | 0.5491 | 0.5114 | 250 | 279 | +| Compression (gzip) | xilDecompressFull | 0.0335 | 0.0334 | 250 | 231 | +| | xilHuffmanKernel | 0.0435 | 0.0549 | 250 | 231 | +| | xilLz77Compress | 0.0203 | 0.0324 | 250 | 231 | +| Compression (zlib) | xilDecompressFull | 0.0254 | 0.0440 | 245 | 243 | +| | xilHuffmanKernel | 0.0440 | 0.0535 | 245 | 243 | +| | xilLz77Compress | 0.0211 | 0.0358 | 245 | 243 | +| Database (Compound Sort) | SortKernel | 1.1088 | 1.2326 | 250 | 234 | +| Quantitative Finance (BlackScholes) | bs_kernel | 0.0566 | 0.0541 | 250 | 300 | +| Quantitative Finance (BlackScholesMerton) | bsm_kernel | 0.2469 | 0.1984 | 250 | 280 | +| Quantitative Finance (HestonFD) | fd_kernel | 744.6795 | 704.1600 | 156 | 169 | +| Quantitative Finance (MonteCarlo) | mc_euro_k | 0.1351 | 0.1198 | 250 | 300 | +| Quantitative Finance (MonteCarloDJE) | kernel_mc_0 | 0.5365 | 0.4587 | 250 | 300 | +| Quantitative Finance (PortfolioOptimisation) | po_kernel | 0.1678 | 0.2154 | 138 | 115 | +| Quantitative Finance (b76) | b76_kernel | 0.5407 | 0.4251 | 250 | 300 | +| Quantitative Finance (cds) | CDS_kernel | 0.0489 | 0.0459 | 250 | 300 | +| Quantitative Finance (fdbslv) | fd_bs_lv_kernel | 2.3244 | 1.8575 | 250 | 300 | +| Quantitative Finance (hcf) | hcf_kernel | 0.2393 | 0.2144 | 250 | 300 | +| Matrix Solver (gesvdj) | kernel_gesvdj_0 | 0.2579 | 0.2595 | 250 | 273 | +| Matrix Solver (gesvj) | kernel_gesvj_0 | 0.0201 | 0.0377 | 250 | 300 | +| Computer Vision (Color detection) | color_detect | 0.0785 | 0.0755 | 250 | 300 | +| Computer Vision (Pixel pipeline) | pp_pipeline_accel | 0.1603 | 0.1459 | 250 | 300 | +| Computer Vision (Gaussian difference) | gaussiandiference | 33.5225 | 28.0049 | 250 | 300 | +| Computer Vision (Letterbox) | letterbox_accel | 0.0344 | 0.0394 | 250 | 300 | +| Computer Vision (Stereo vision pipeline) | stereopipeline_accel | 11.6349 | 9.7058 | 250 | 300 | +| Computer Vision (Corner Tracker) | cornerTracker | 0.2409 | 0.2160 | 250 | 300 | + +It should also be noted that these results only look at kernel performance, some of which are able to run the full clock rate available, for the F1 instances this is up to 250MHz and for the Alveo U200 platform, this is 300MHz. +Algorithms such as Compression libraries implemented in hardware can be seen to have nearly identical performance as the maximum clock rates are not quite achievable for these libraries. +Additional system level application advantages of either the Alveo U200 platform or F1 instances are not captured with this benchmark. + + +## Migration Example + +A detailed working example walking through all the steps required to migrate an application from U200 to F1 instances can be found [here](./Alveo_to_AWS_F1_Migration/example/README.md). + +In this example, the source code for the software program and the FPGA kernels remains identical whether targeting U200 or F1 instances. +Only command line changes are necessary to port the application. + +## Summary – Migration Checklist + +Because Vitis provides platform-independent APIs and interfaces to the developer, the process of migrating applications across similar FPGA acceleration cards is greatly facilitated. + +The following summarizes the main requirements and techniques involved in migrating from Alveo U200 to F1 instances and can be used as a checklist to help along the process. + +#### Mandatory changes +* Update the --platform option in the Vitis compilation script – more details [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/vitiscommandcompiler.html#qcm1528577331870__section_N10049_N10019_N10001) +* Create an Amazon FPGA Image (AFI) from the FPGA binary (.xclbin) generated by Vitis – more details [here](https://github.com/aws/aws-fpga/blob/master/Vitis/README.md#2-create-an-amazon-fpga-image-afi) + +#### Design specific changes related to DDR mapping +* Use the --sp option to specify the assignment of kernel interfaces to DDR banks – more details [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/vitiscommandcompiler.html#clt1568640709907__section_tfc_zxm_1jb) +* Use the XCL_MEM_TOPOLOGY flag in the host source code – more details [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/optimizingperformance.html#utc1504034308941) + +#### Design specific changes related to timing closure +* Use the --frequency option to override the default clock frequency defined on the hardware platform– more details [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/vitiscommandcompiler.html#qcm1528577331870__section_frk_xtr_t3b) +* Use the –slr option to map kernels to specific SLRs in the device in order to help with timing closure – more details [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/vitiscommandcompiler.html#clt1568640709907__section_m3v_qxm_1jb) +* Apply advanced Vivado options to optimize implementation results - more details [here](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/buildingdevicebinary.html#hnw1523048617934) and [here](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug1292-ultrafast-timing-closure-quick-reference.pdf). diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/README.md b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/README.md new file mode 100644 index 00000000..e35d4d71 --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/README.md @@ -0,0 +1,149 @@ +# Alveo U200 to AWS F1 Migration Example + +This example illustrates how to port a Vitis application developed for an Alveo U200 card to an AWS EC2 F1 instance. + +The Vitis development flow provides platform independent APIs and interfaces to the developer. This greatly facilitates the process of migrating applications across similar FPGA acceleration cards. In this example, the source code for the software program and for the FPGA kernels remains unchanged. Only command line changes are necessary to port the application from Alveo U200 to AWS F1. + + +## Example Overview + +The accelerator used in this example is a simple vector-add kernel. The [`src`](./src) directory contains the source code for the project: + +- [`vadd.cpp`](./src/vadd.cpp) contains the C++ source code of the accelerator which adds 2 arbitrarily sized input vectors. +- [`host.cpp`](./src/host.cpp) contains the main function running on the host CPU. The host application is written in C++ and uses OpenCL™ APIs to interact with the FPGA accelerator. + +The [`u200`](./u200) and The [`u200`](./u200) and [`f1`](./f1) directories contain the Makefiles and scripts for building for Alveo U200 and AWS F1 respectively. directories contain the Makefiles and scripts for building for Alveo U200 and AWS F1 respectively. + + + +## Building for Alveo U200 + +*Note: The instructions below assume that the required tools and platforms are installed and that the environment is properly setup to run Vitis. It is also a good idea to complete the Vitis example flow end-to-end before running this example.* + +1. Go to the `u200` directory + +2. The example is built with the following commands: + + ```bash + g++ -D__USE_XOPEN2K8 -I/$XILINX_XRT/include/ -I./src -O3 -Wall -fmessage-length=0 -std=c++11 ../src/host.cpp -L/$XILINX_XRT/lib/ -lxilinxopencl -lpthread -lrt -o host + + v++ -c -g -t hw -R 1 -k vadd --config ./options.cfg --profile_kernel data:all:all:all --profile_kernel stall:all:all:all --save-temps --temp_dir ./temp_dir --report_dir ./report_dir --log_dir ./log_dir -I../src ../src/vadd.cpp -o ./vadd.hw.xo + + v++ -l -g -t hw -R 1 --config ./options.cfg --profile_kernel data:all:all:all --profile_kernel stall:all:all:all --temp_dir ./temp_dir --report_dir ./report_dir --log_dir ./log_dir -I../src vadd.hw.xo -o add.hw.xclbin + ``` + + * The `g++` command compiles the host program and links it with the Xilinx Runtime (XRT) libraries. XRT provides platform-independent APIs to interact with the FPGA, allowing the source code to remain the same for U200 and F1. + * The `v++ -c` command compiles the source code for vector-add kernel (vadd.cpp) and generates the compiled kernel object (.xo). + * The `v++ -l` command links the compiled kernel to the shell and produces the FPGA binary (.xclbin file) which can then be loaded on the U200 acceleration card. + +3. For both `v++`commands, the `--config` option is used to pass the name of a file called `options.cfg` containing additional options specific to building for U200. The `options.cfg` file contains the following options: + + ``` + platform=xilinx_u200_xdma_201830_2 + [connectivity] + sp=vadd_1.in1:DDR[1] + sp=vadd_1.in2:DDR[1] + sp=vadd_1.out:DDR[1] + ``` + + * The `platform` option specifies which acceleration platform is targeted for the build. Here we are using the U200 shell. + * The `sp` options are used to specify the assignment of kernel arguments to DDR banks. In this case, we are mapping all three kernel arguments to DDR[1], which is the DDR interface located in the shell on Alveo U200. + + > Putting all the platform-specific options in one file is not mandatory but it is very convenient and facilitates the porting process. With this approach, the main command line can be reused as is for all platforms. Refer to the [Vitis Documentation](https://www.xilinx.com/html_docs/xilinx2020_1/vitis_doc/kme1569523964461.html) for more information on v++ related commands and options. + +4. The Makefile provided in the directory can be used to build the project for U200. Running `make build` will build the host application, compile the kernel and finally create the FPGA binary for U200. + + +## Building for AWS F1 + +In order to port the vector-add example from Alveo U200 to AWS F1, the only change required is in the `options.cfg` file. The source code remains unchanged and the g++ and v++ commands remain identical. + +1. Go to the `f1` directory + +2. The `options.cfg` file for AWS F1 contains the following options: + + ``` + platform=xilinx_aws-vu9p-f1_shell-v04261818_201920_2 + [connectivity] + sp=vadd_1.in1:DDR[0] + sp=vadd_1.in2:DDR[0] + sp=vadd_1.out:DDR[0] + ``` + + * The `platform` option is set to target the AWS F1 shell. The string used corresponds to the name of the latest shell which can be found [here](https://github.com/aws/aws-fpga/tree/master/Vitis/aws_platform) on the aws-fpga repo. + * The `sp` options are set to connect the kernel arguments to DDR[0], which is the DDR interface located in the AWS F1 shell. Keeping the same settings as the U200 would produce a working design on AWS F1. But in order to produce exactly the same configuration and target the DDR interface located in the AWS F1 shell, the sp options are modified to use DDR[0]. + + These changes are the only ones needed to port this project from U200 to F1. + +3. You can build the project for AWS F1 using the exact same commands that were used for U200: + + ```bash + export PLATFORM_REPO_PATHS=/home/centos/src/project_data/aws-fpga/Vitis/aws_platform + + g++ -D__USE_XOPEN2K8 -I/$XILINX_XRT/include/ -I./src -O3 -Wall -fmessage-length=0 -std=c++11 ../src/host.cpp -L/$XILINX_XRT/lib/ -lxilinxopencl -lpthread -lrt -o host + + v++ -c -g -t hw -R 1 -k vadd --config ./options.cfg --profile_kernel data:all:all:all --profile_kernel stall:all:all:all --save-temps --temp_dir ./temp_dir --report_dir ./report_dir --log_dir ./log_dir -I../src ../src/vadd.cpp -o ./vadd.hw.xo + + v++ -l -g -t hw -R 1 --config ./options.cfg --profile_kernel data:all:all:all --profile_kernel stall:all:all:all --temp_dir ./temp_dir --report_dir ./report_dir --log_dir ./log_dir -I../src vadd.hw.xo -o add.hw.xclbin + ``` + + *NOTE: The PLATFORM_REPO_PATHS environment variable is used to specify the directory where the AWS platform (xilinx_aws-vu9p-f1_shell-v04261818_201920_2) is installed.* + +4. When targeting AWS F1, you need to go through the additional step of creating an Amazon FPGA Image (AFI). This is done with the `create_vitis_afi.sh` command provided by AWS. More information about this command is available on the [AWS documentation](https://github.com/aws/aws-fpga/blob/master/Vitis/README.md#2-create-an-amazon-fpga-image-afi). + + Use the command below to generate the AFI and the .awsxclbin file: + + ```bash + $AWS_FPGA_REPO_DIR/Vitis/tools/create_vitis_afi.sh -xclbin=./vadd.xclbin -o=./vadd -s3_bucket= -s3_dcp_key=f1-dcp-folder -s3_logs_key=f1-logs + ``` + + *NOTE: Make sure to use your S3 bucket information when running the create_vitis_afi command.* + + Check the status of the AFI creation process by using the AFI ID with the follow command: + + ```bash + aws ec2 describe-fpga-images --fpga-image-ids + ``` + + The AFI is ready to use when the state is reported as 'available'. + + ```json + "State": { + "Code": "available" + }, + ``` + + *NOTE: The AFI ID can be found in the _afi_id.txt file created by the create_vitis_afi command.* + + + +## Running the Application on AWS F1 + +1. Execute the following command to source the Vitis runtime environment + + ```bash + source $AWS_FPGA_REPO_DIR/vitis_runtime_setup.sh + ``` + +2. Execute the host application with the .awsxclbin FPGA binary + + ```bash + ./host vadd.awsxclbin + ``` + +3. The messages below will indicate that the program ran successfully. + + ```bash + Found Platform + Platform Name: Xilinx + INFO: Reading ./vadd.awsxclbin + Loading: './vadd.awsxclbin' + TEST PASSED + ``` + + +## Summary + +In the Vitis flow, the user can develop source code which remains mostly agnostic of platform-specific platform details. This greatly facilitates the process of migrating applications across similar FPGA acceleration cards. In this example, the same source code could be ported from Alveo U200 to AWS F1 without any changes at all. Only a couple of changes to the v++ compilation options were required. + +For a complete application migration checklist, refer to the [Migration between Alveo U200 platform & Amazon EC2 F1 instances](../../Alveo_to_AWS_F1_Migration.md) application note. diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/f1/Makefile b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/f1/Makefile new file mode 100644 index 00000000..28023dbd --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/f1/Makefile @@ -0,0 +1,48 @@ +TARGET := hw + +build: xclbin host + +run: build + ./host ./vadd.xclbin + +vadd.xo: ../src/vadd.cpp + v++ -c -g -t $(TARGET) -R 1 -k vadd \ + --profile_kernel data:all:all:all \ + --profile_kernel stall:all:all:all \ + --save-temps \ + --temp_dir ./temp_dir \ + --report_dir ./report_dir \ + --log_dir ./log_dir \ + --config ./options.cfg \ + -I../src \ + ../src/vadd.cpp \ + -o ./vadd.xo + +vadd.xclbin: vadd.xo + v++ -l -g -t $(TARGET) -R 1 \ + --profile_kernel data:all:all:all \ + --profile_kernel stall:all:all:all \ + --temp_dir ./temp_dir \ + --report_dir ./report_dir \ + --log_dir ./log_dir \ + --config ./options.cfg \ + -I../src \ + vadd.xo \ + -o vadd.xclbin + +host: ../src/host.cpp ../src/host.hpp + g++ -D__USE_XOPEN2K8 \ + -I$(XILINX_XRT)/include/ \ + -I./src \ + -O3 -Wall -fmessage-length=0 -std=c++11\ + ../src/host.cpp \ + -L$(XILINX_XRT)/lib/ \ + -lxilinxopencl -lpthread -lrt \ + -o ./host + +xclbin: vadd.xclbin + +xo: vadd.xo + +clean: + rm -rf temp_dir log_dir report_dir *log host vadd.* *.csv *summary .run .Xil vitis* xclbin *.protoinst *.wdb *.wcfg diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/f1/options.cfg b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/f1/options.cfg new file mode 100644 index 00000000..a5eca86f --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/f1/options.cfg @@ -0,0 +1,6 @@ +platform=xilinx_aws-vu9p-f1_shell-v04261818_201920_2 +[connectivity] +sp=vadd_1.in1:DDR[0] +sp=vadd_1.in2:DDR[0] +sp=vadd_1.out:DDR[0] + diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/host.cpp b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/host.cpp new file mode 100644 index 00000000..1b426733 --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/host.cpp @@ -0,0 +1,183 @@ +/********** +Copyright (c) 2018, Xilinx, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********/ + +#include "host.hpp" + +int main(int argc, char** argv) +{ + if (argc != 2) { + std::cout << "Usage: " << argv[0] << " " << std::endl; + return EXIT_FAILURE; + } + + std::string binaryFile = argv[1]; + size_t vector_size_bytes = sizeof(int) * DATA_SIZE; + cl_int err; + unsigned fileBufSize; + // Allocate Memory in Host Memory + std::vector> source_in1(DATA_SIZE); + std::vector> source_in2(DATA_SIZE); + std::vector> source_hw_results(DATA_SIZE); + std::vector> source_sw_results(DATA_SIZE); + + // Create the test data + for(int i = 0 ; i < DATA_SIZE ; i++){ + source_in1[i] = rand() % DATA_SIZE; + source_in2[i] = rand() % DATA_SIZE; + source_sw_results[i] = source_in1[i] + source_in2[i]; + source_hw_results[i] = 0; + } + +// OPENCL HOST CODE AREA START + +// ------------------------------------------------------------------------------------ +// Step 1: Get All PLATFORMS, then search for Target_Platform_Vendor (CL_PLATFORM_VENDOR) +// Search for Platform: Xilinx +// Check if the current platform matches Target_Platform_Vendor +// ------------------------------------------------------------------------------------ + std::vector devices = get_devices("Xilinx"); + devices.resize(1); + cl::Device device = devices[0]; + +// ------------------------------------------------------------------------------------ +// Step 1: Create Context +// ------------------------------------------------------------------------------------ + OCL_CHECK(err, cl::Context context(device, NULL, NULL, NULL, &err)); + +// ------------------------------------------------------------------------------------ +// Step 1: Create Command Queue +// ------------------------------------------------------------------------------------ + OCL_CHECK(err, cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE, &err)); + +// ------------------------------------------------------------------ +// Step 1: Load Binary File from disk +// ------------------------------------------------------------------ + char* fileBuf = read_binary_file(binaryFile, fileBufSize); + cl::Program::Binaries bins{{fileBuf, fileBufSize}}; + +// ------------------------------------------------------------- +// Step 1: Create the program object from the binary and program the FPGA device with it +// ------------------------------------------------------------- + OCL_CHECK(err, cl::Program program(context, devices, bins, NULL, &err)); + +// ------------------------------------------------------------- +// Step 1: Create Kernels +// ------------------------------------------------------------- + OCL_CHECK(err, cl::Kernel krnl_vector_add(program,"vadd", &err)); + +// ================================================================ +// Step 2: Setup Buffers and run Kernels +// ================================================================ +// o) Allocate Memory to store the results +// o) Create Buffers in Global Memory to store data +// ================================================================ + +// ------------------------------------------------------------------ +// Step 2: Create Buffers in Global Memory to store data +// o) buffer_in1 - stores source_in1 +// o) buffer_in2 - stores source_in2 +// o) buffer_ouput - stores Results +// ------------------------------------------------------------------ + +// ....................................................... +// Allocate Global Memory for source_in1 +// ....................................................... + OCL_CHECK(err, cl::Buffer buffer_in1 (context,CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, + vector_size_bytes, source_in1.data(), &err)); +// ....................................................... +// Allocate Global Memory for source_in2 +// ....................................................... + OCL_CHECK(err, cl::Buffer buffer_in2 (context,CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, + vector_size_bytes, source_in2.data(), &err)); +// ....................................................... +// Allocate Global Memory for sourcce_hw_results +// ....................................................... + OCL_CHECK(err, cl::Buffer buffer_output(context,CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, + vector_size_bytes, source_hw_results.data(), &err)); + +// ============================================================================ +// Step 2: Set Kernel Arguments and Run the Application +// o) Set Kernel Arguments +// ---------------------------------------------------- +// Kernel Argument Description +// ---------------------------------------------------- +// in1 (input) --> Input Vector1 +// in2 (input) --> Input Vector2 +// out (output) --> Output Vector +// size (input) --> Size of Vector in Integer +// o) Copy Input Data from Host to Global Memory on the device +// o) Submit Kernels for Execution +// o) Copy Results from Global Memory, device to Host +// ============================================================================ + int size = DATA_SIZE; + OCL_CHECK(err, err = krnl_vector_add.setArg(0, buffer_in1)); + OCL_CHECK(err, err = krnl_vector_add.setArg(1, buffer_in2)); + OCL_CHECK(err, err = krnl_vector_add.setArg(2, buffer_output)); + OCL_CHECK(err, err = krnl_vector_add.setArg(3, size)); + +// ------------------------------------------------------ +// Step 2: Copy Input data from Host to Global Memory on the device +// ------------------------------------------------------ + OCL_CHECK(err, err = q.enqueueMigrateMemObjects({buffer_in1, buffer_in2},0/* 0 means from host*/)); + +// ---------------------------------------- +// Step 2: Submit Kernels for Execution +// ---------------------------------------- + OCL_CHECK(err, err = q.enqueueTask(krnl_vector_add)); + +// -------------------------------------------------- +// Step 2: Copy Results from Device Global Memory to Host +// -------------------------------------------------- + OCL_CHECK(err, err = q.enqueueMigrateMemObjects({buffer_output},CL_MIGRATE_MEM_OBJECT_HOST)); + + q.finish(); + +// OPENCL HOST CODE AREA END + + // Compare the results of the Device to the simulation + bool match = true; + for (int i = 0 ; i < DATA_SIZE ; i++){ + if (source_hw_results[i] != source_sw_results[i]){ + std::cout << "Error: Result mismatch" << std::endl; + std::cout << "i = " << i << " CPU result = " << source_sw_results[i] + << " Device result = " << source_hw_results[i] << std::endl; + match = false; + break; + } + } + +// ============================================================================ +// Step 3: Release Allocated Resources +// ============================================================================ + delete[] fileBuf; + + std::cout << "TEST " << (match ? "PASSED" : "FAILED") << std::endl; + return (match ? EXIT_SUCCESS : EXIT_FAILURE); +} + diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/host.hpp b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/host.hpp new file mode 100644 index 00000000..2f294f4d --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/host.hpp @@ -0,0 +1,85 @@ +#define CL_HPP_CL_1_2_DEFAULT_BUILD +#define CL_HPP_TARGET_OPENCL_VERSION 120 +#define CL_HPP_MINIMUM_OPENCL_VERSION 120 +#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1 +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS + +//OCL_CHECK doesn't work if call has templatized function call +#define OCL_CHECK(error,call) \ + call; \ + if (error != CL_SUCCESS) { \ + printf("%s:%d Error calling " #call ", error code is: %d\n", \ + __FILE__,__LINE__, error); \ + exit(EXIT_FAILURE); \ + } +#define DATA_SIZE 4096 + +#include +#include +#include +#include +#include + +template +struct aligned_allocator +{ + using value_type = T; + T* allocate(std::size_t num) + { + void* ptr = nullptr; + if (posix_memalign(&ptr,4096,num*sizeof(T))) + throw std::bad_alloc(); + return reinterpret_cast(ptr); + } + void deallocate(T* p, std::size_t num) + { + free(p); + } +}; + +std::vector get_devices(const std::string& vendor_name) { + + size_t i; + cl_int err; + std::vector platforms; + OCL_CHECK(err, err = cl::Platform::get(&platforms)); + cl::Platform platform; + for (i = 0 ; i < platforms.size(); i++){ + platform = platforms[i]; + OCL_CHECK(err, std::string platformName = platform.getInfo(&err)); + if (platformName == vendor_name){ + std::cout << "Found Platform" << std::endl; + std::cout << "Platform Name: " << platformName.c_str() << std::endl; + break; + } + } + if (i == platforms.size()) { + std::cout << "Error: Failed to find Xilinx platform" << std::endl; + exit(EXIT_FAILURE); + } + + //Getting ACCELERATOR Devices and selecting 1st such device + std::vector devices; + OCL_CHECK(err, err = platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices)); + return devices; +} + +char* read_binary_file(const std::string &xclbin_file_name, unsigned &nb) +{ + std::cout << "INFO: Reading " << xclbin_file_name << std::endl; + + if(access(xclbin_file_name.c_str(), R_OK) != 0) { + printf("ERROR: %s xclbin not available please build\n", xclbin_file_name.c_str()); + exit(EXIT_FAILURE); + } + //Loading XCL Bin into char buffer + std::cout << "Loading: '" << xclbin_file_name.c_str() << "'\n"; + std::ifstream bin_file(xclbin_file_name.c_str(), std::ifstream::binary); + bin_file.seekg (0, bin_file.end); + nb = bin_file.tellg(); + bin_file.seekg (0, bin_file.beg); + char *buf = new char [nb]; + bin_file.read(buf, nb); + return buf; +} + diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/vadd.cpp b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/vadd.cpp new file mode 100644 index 00000000..805daffd --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/src/vadd.cpp @@ -0,0 +1,111 @@ +/********** +Copyright (c) 2018, Xilinx, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********/ + +/******************************************************************************* +Description: + HLS pragmas can be used to optimize the design : improve throughput, reduce latency and + device resource utilization of the resulting RTL code + This is vector addition example to demonstrate how HLS optimizations are used in kernel. +*******************************************************************************/ + + +#define BUFFER_SIZE 1024 + +/* + Vector Addition Kernel Implementation + Arguments: + in1 (input) --> Input Vector1 + in2 (input) --> Input Vector2 + out (output) --> Output Vector + size (input) --> Size of Vector in Integer + */ +extern "C" { +void vadd( + const unsigned int *in1, // Read-Only Vector 1 + const unsigned int *in2, // Read-Only Vector 2 + unsigned int *out, // Output Result + int size // Size in integer + ) +{ +// SDAccel kernel must have one and only one s_axilite interface which will be used by host application to configure the kernel. +// Here bundle control is defined which is s_axilite interface and associated with all the arguments (in1, in2, out and size), +// control interface must also be associated with "return". +// All the global memory access arguments must be associated to one m_axi(AXI Master Interface). Here all three arguments(in1, in2, out) are +// associated to bundle gmem which means that a AXI master interface named "gmem" will be created in Kernel and all these variables will be +// accessing global memory through this interface. +// Multiple interfaces can also be created based on the requirements. For example when multiple memory accessing arguments need access to +// global memory simultaneously, user can create multiple master interfaces and can connect to different arguments. +#pragma HLS INTERFACE m_axi port=in1 offset=slave bundle=gmem +#pragma HLS INTERFACE m_axi port=in2 offset=slave bundle=gmem +#pragma HLS INTERFACE m_axi port=out offset=slave bundle=gmem +#pragma HLS INTERFACE s_axilite port=in1 bundle=control +#pragma HLS INTERFACE s_axilite port=in2 bundle=control +#pragma HLS INTERFACE s_axilite port=out bundle=control +#pragma HLS INTERFACE s_axilite port=size bundle=control +#pragma HLS INTERFACE s_axilite port=return bundle=control + + unsigned int v1_buffer[BUFFER_SIZE]; // Local memory to store vector1 + unsigned int v2_buffer[BUFFER_SIZE]; // Local memory to store vector2 + unsigned int vout_buffer[BUFFER_SIZE]; // Local Memory to store result + + + //Per iteration of this loop perform BUFFER_SIZE vector addition + for(int i = 0; i < size; i += BUFFER_SIZE) + { + int chunk_size = BUFFER_SIZE; + //boundary checks + if ((i + BUFFER_SIZE) > size) + chunk_size = size - i; + + // Transferring data in bursts hides the memory access latency as well as improves bandwidth utilization and efficiency of the memory controller. + // It is recommended to infer burst transfers from successive requests of data from consecutive address locations. + // A local memory vl_local is used for buffering the data from a single burst. The entire input vector is read in multiple bursts. + // The choice of LOCAL_MEM_SIZE depends on the specific applications and available on-chip memory on target FPGA. + // burst read of v1 and v2 vector from global memory + read1: for (int j = 0 ; j < chunk_size ; j++){ + v1_buffer[j] = in1[i + j]; + } + read2: for (int j = 0 ; j < chunk_size ; j++){ + v2_buffer[j] = in2[i + j]; + } + + // PIPELINE pragma reduces the initiation interval for loop by allowing the + // concurrent executions of operations + vadd: for (int j = 0 ; j < chunk_size; j ++){ + #pragma HLS PIPELINE II=1 + //perform vector addition + vout_buffer[j] = v1_buffer[j] + v2_buffer[j]; + } + //burst write the result + write: for (int j = 0 ; j < chunk_size ; j++){ + out[i + j] = vout_buffer[j]; + } + } +} +} diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/u200/Makefile b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/u200/Makefile new file mode 100644 index 00000000..28023dbd --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/u200/Makefile @@ -0,0 +1,48 @@ +TARGET := hw + +build: xclbin host + +run: build + ./host ./vadd.xclbin + +vadd.xo: ../src/vadd.cpp + v++ -c -g -t $(TARGET) -R 1 -k vadd \ + --profile_kernel data:all:all:all \ + --profile_kernel stall:all:all:all \ + --save-temps \ + --temp_dir ./temp_dir \ + --report_dir ./report_dir \ + --log_dir ./log_dir \ + --config ./options.cfg \ + -I../src \ + ../src/vadd.cpp \ + -o ./vadd.xo + +vadd.xclbin: vadd.xo + v++ -l -g -t $(TARGET) -R 1 \ + --profile_kernel data:all:all:all \ + --profile_kernel stall:all:all:all \ + --temp_dir ./temp_dir \ + --report_dir ./report_dir \ + --log_dir ./log_dir \ + --config ./options.cfg \ + -I../src \ + vadd.xo \ + -o vadd.xclbin + +host: ../src/host.cpp ../src/host.hpp + g++ -D__USE_XOPEN2K8 \ + -I$(XILINX_XRT)/include/ \ + -I./src \ + -O3 -Wall -fmessage-length=0 -std=c++11\ + ../src/host.cpp \ + -L$(XILINX_XRT)/lib/ \ + -lxilinxopencl -lpthread -lrt \ + -o ./host + +xclbin: vadd.xclbin + +xo: vadd.xo + +clean: + rm -rf temp_dir log_dir report_dir *log host vadd.* *.csv *summary .run .Xil vitis* xclbin *.protoinst *.wdb *.wcfg diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/example/u200/options.cfg b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/u200/options.cfg new file mode 100644 index 00000000..c48bf33d --- /dev/null +++ b/Vitis/docs/Alveo_to_AWS_F1_Migration/example/u200/options.cfg @@ -0,0 +1,6 @@ +platform=xilinx_u200_xdma_201830_2 +[connectivity] +sp=vadd_1.in1:DDR[1] +sp=vadd_1.in2:DDR[1] +sp=vadd_1.out:DDR[1] + diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/img/image01.png b/Vitis/docs/Alveo_to_AWS_F1_Migration/img/image01.png new file mode 100644 index 00000000..b760d4c0 Binary files /dev/null and b/Vitis/docs/Alveo_to_AWS_F1_Migration/img/image01.png differ diff --git a/Vitis/docs/Alveo_to_AWS_F1_Migration/img/image02.png b/Vitis/docs/Alveo_to_AWS_F1_Migration/img/image02.png new file mode 100644 index 00000000..d0fc2bd2 Binary files /dev/null and b/Vitis/docs/Alveo_to_AWS_F1_Migration/img/image02.png differ diff --git a/Vitis/docs/Create_Runtime_AMI.md b/Vitis/docs/Create_Runtime_AMI.md new file mode 100644 index 00000000..8cc3bc62 --- /dev/null +++ b/Vitis/docs/Create_Runtime_AMI.md @@ -0,0 +1,36 @@ +# Create a Runtime AMI Starting with Amazon Linux 2, Centos or Ubuntu + +## Runtime AMI Compatibility Table + +| Vitis Version used for AFI Development | Compatible Xilinx Runtime | +|--------------------------------------|-----------------------------| +| 2020.2 | AWS FPGA Developer AMI 1.10.x (XRT is pre-installed) or [XRT](https://xilinx.github.io/XRT/2020.2/html/build.html) | +| 2020.1 | AWS FPGA Developer AMI 1.9.x (XRT is pre-installed) or [XRT](https://xilinx.github.io/XRT/2020.1/html/build.html) | +| 2019.2 | AWS FPGA Developer AMI 1.8.x (XRT is pre-installed) or [XRT](https://xilinx.github.io/XRT/2019.2/html/build.html) | + +## 1. Launch a Runtime Instance & Install Required Packages + +* Launch an F1 instance using [Centos 7](https://aws.amazon.com/marketplace/pp/B00O7WM7QW), Ubuntu or Amazon Linux 2 AMI's. + +## 2. Install Runtime Drivers +* Build XRT on either your runtime or a similar instance using the [XRT build steps](https://xilinx.github.io/XRT/2019.2/html/build.html). +* Install the XRT package on your runtime instance + +## 3. Run your FPGA accelerated application on your Runtime Instance. +* Source the runtime setup script: +``` +$ source /opt/xilinx/xrt/setup.sh +``` +* Run application to verify that it works: +```bash +$ ./helloworld ./vector_addition.awsxclbin +``` +* You might want to add a link to the setup command: `/opt/xilinx/xrt/setup.sh` in the `/etc/profile.d` path to be able to setup on start. + +## 4. Create your Runtime AMI based on your Instance. + +* Once you have your application running you should be able to create a Runtime AMI based your Runtime Instance as specified [here](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/creating-an-ami-ebs.html). + +## 5. Make Runtime AMI available on the AWS Marketplace + +* Please see [Section 5 of the AWS Marketplace Seller's Guide](https://awsmp-loadforms.s3.amazonaws.com/AWS_Marketplace_-_Seller_Guide.pdf#page=19) for more details. diff --git a/Vitis/docs/Debug_Vitis_Kernel.md b/Vitis/docs/Debug_Vitis_Kernel.md new file mode 100644 index 00000000..4ae82c45 --- /dev/null +++ b/Vitis/docs/Debug_Vitis_Kernel.md @@ -0,0 +1,151 @@ +Hardware Debug of Vitis Kernel +====================== + +This file contains the following sections: + +1. Overview +2. Enabling ChipScope Debug +3. Host code changes to support debugging +4. Building the executable, creating the AFI, and executing the host code +5. Start debug servers + + +## 1. Overview +The sections below give you a brief explanation of the steps required to debug your Vitis kernel. They include enabling ChipScope debug, pausing the execution of the host code at the appropriate stage to ensure the setup of ILA triggers, building the running the host code and starting the debug servers to debug the design in hardware. + +## 2. Enabling ChipScope Debug + +Debug cores can be added to the AXI interfaces on the kernel itself to monitor AXI transaction level activity (part of the ChipScope Debug feature of Vitis). + +Adding debug cores to the AXI interfaces on the kernel can be done using the v++ --dk chipscope option with the compute unit name and optional interface name. + +This can be enabled by adding an v++ option to the CLFLAGS in the makefile. The --dk option shown below shows the general usage: + +``` +--dk chipscope:: +``` + +For example, to add ChipScope debugging to the helloworld_ocl OpenCL example , enabling chipscope debug can be accomplished by adding the following v++ option to the CLFLAGS in the makefile: + +``` +--dk chipscope:krnl_vadd_1 +``` + +For detailed usage and more examples, refer to the Debugging section of Vitis Application Acceleration (UG1393). + + +### Adding debug cores to the RTL kernel code + +To debug signals internal to an RTL Kernel you need to instantiate debug cores like the Integrated Logic Analyzer(ILA), Virtual Input/Output(VIO) etc in your application RTL kernel code. + +The ILA Debug IP can be created and added to the RTL Kernel in a couple of ways. + + +1. Open the ILA IP customization wizard in the Vivado GUI and customize the ILA and instantiate it in the RTL code – similar to any other IP in Vivado. + + +2. Create the ILA IP on the fly using TCL. A snippet of the create_ip TCL command is shown below. The example below creates the ILA IP with 7 probes and associates properties with the IP. + +``` +create_ip -name ila -vendor xilinx.com -library ip -module_name ila_0 +set_property -dict [list CONFIG.C_PROBE6_WIDTH {32} CONFIG.C_PROBE3_WIDTH {64} +CONFIG.C_NUM_OF_PROBES {7} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_INPUT_PIPE_STAGES {2} CONFIG.C_ADV_TRIGGER {true} CONFIG.ALL_PROBE_SAME_MU_CNT {4} CONFIG.C_PROBE6_MU_CNT {4} CONFIG.C_PROBE5_MU_CNT {4} CONFIG.C_PROBE4_MU_CNT {4} CONFIG.C_PROBE3_MU_CNT {4} CONFIG.C_PROBE2_MU_CNT {4} CONFIG.C_PROBE1_MU_CNT {4} CONFIG.C_PROBE0_MU_CNT {4}] [get_ips ila_0] +``` + +This TCL file should be added as an RTL Kernel source in the Makefile of your design + + +Now you are ready to instantiate the ILA Debug core in your RTL Kernel. The RTL code snippet below is an ILA that monitors the output of a combinatorial adder. + + // ILA monitoring combinatorial adder + ila_0 i_ila_0 ( + .clk(ap_clk), // input wire clk + .probe0(areset), // input wire [0:0] probe0 + .probe1(rd_fifo_tvalid_n), // input wire [0:0] probe1 + .probe2(rd_fifo_tready), // input wire [0:0] probe2 + .probe3(rd_fifo_tdata), // input wire [63:0] probe3 + .probe4(adder_tvalid), // input wire [0:0] probe4 + .probe5(adder_tready_n), // input wire [0:0] probe5 + .probe6(adder_tdata) // input wire [31:0] probe6 + ); + + +## 3. Host code changes to support debugging + +The application host code needs to be modified to ensure you can set up the ILA trigger conditions **prior** to running the kernel. + + +The host code shown below introduces the wait for the setup of ILA Trigger conditions and the arming of the ILA. + +src/host.cpp + + void wait_for_enter(const std::string& msg) + { + std::cout << msg << std::endl; + std::cin.ignore(std::numeric_limits::max(), '\n'); + } + + ... + + cl::Program::Binaries bins = xcl::import_binary_file(binaryFile); + devices.resize(1); + cl::Program program(context, devices, bins); + cl::Kernel krnl_vadd(program,"krnl_vadd_rtl"); + + wait_for_enter("\nPress ENTER to continue after setting up ILA trigger..."); + + //Allocate Buffer in Global Memory + + ... + + //Launch the Kernel + q.enqueueTask(krnl_vadd); + + + +## 4. Building the executable, creating the AFI and executing the host code + +- **Build the executable** in your design directory (`your_design_directory`) by running the steps below: + +``` + cd your_design_directory + + make all DEVICES=$AWS_PLATFORM +``` + +- **Creating and registering the AFI** + +Please note, the angle bracket directories need to be replaced according to the user setup. + +``` + $VITIS_DIR/tools/create_vitis_afi.sh -xclbin=your_design.xclbin -o=your_design.awsxclbin -s3_bucket= +``` + +- **Setup and Execute** + +``` + $ cd $AWS_FPGA_REPO_DIR + $ source vitis_runtime_setup.sh + $ ./host +``` +This produces the following output: +``` + + platform Name: Xilinx + Vendor Name : Xilinx + Found Platform + XCLBIN File Name: vadd + INFO: Importing ./binary_container_1.awsxclbin + Loading: './binary_container_1.awsxclbin' + Successfully skipped reloading of local image. + + Press ENTER to continue after setting up ILA trigger... +``` + + +## 5. Start Debug Servers + +#### Starting Debug Servers on Amazon F1 instance +Instructions to start the debug servers on an Amazon F1 instance can be found [here](../../hdk/docs/Virtual_JTAG_XVC.md). +Once you have setup your ILA triggers and armed the ILA core, you can now Press Enter on your host to continue execution of the application and RTL Kernel. + diff --git a/Vitis/docs/FAQ.md b/Vitis/docs/FAQ.md new file mode 100644 index 00000000..2da973b4 --- /dev/null +++ b/Vitis/docs/FAQ.md @@ -0,0 +1,5 @@ +# Frequently Asked Questions (FAQ) + +## Q: What is the lowest frequency Vitis design supported on the AWS F1 Platform? +A: We support creating AFI's from CL's that have been built to work at Frequencies no lower than 80MHz. + Re-clocking/Loading a dynamic clock frequency lower than 80MHz will also result in an error. diff --git a/Vitis/docs/Setup_AWS_CLI_and_S3_Bucket.md b/Vitis/docs/Setup_AWS_CLI_and_S3_Bucket.md new file mode 100644 index 00000000..dae67296 --- /dev/null +++ b/Vitis/docs/Setup_AWS_CLI_and_S3_Bucket.md @@ -0,0 +1,26 @@ +## Setup CLI and Create S3 Bucket +The developer is required to create an S3 bucket for the AFI generation. The bucket will contain a tar file and logs which are generated from the AFI creation service. + +To install the AWS CLI, please follow the [instructions here](http://docs.aws.amazon.com/cli/latest/userguide/installing.html). + +The AWS Vitis scripts require JSON output format and the scripts will not work properly if you use any other output format types (ex: text, table). JSON is the default output format of the AWS CLI. + +``` + $ aws configure # to set your credentials (found in your console.aws.amazon.com page), region (us-east-1) and output (json) +``` +This S3 bucket will be used by the AWS SDAccel scripts to upload your DCP to AWS for AFI generation which will be packaged into a tar file. +Start by creating a bucket and a folder within your new bucket: +``` + $ aws s3 mb s3:// --region us-east-1 # Create an S3 bucket (choose a unique bucket name) + $ aws s3 mb s3:/// # Create folder for your tarball files + $ touch FILES_GO_HERE.txt # Create a temp file + $ aws s3 cp FILES_GO_HERE.txt s3://// # Which creates the folder on S3 +``` +The AFI creation process will generate logs and will be placed in your S3 bucket. These logs can be used for debug if the AFI generation fails. +Next, create a folder for your log files: +``` + $ aws s3 mb s3:/// # Create a folder to keep your logs + $ touch LOGS_FILES_GO_HERE.txt # Create a temp file + $ aws s3 cp LOGS_FILES_GO_HERE.txt s3://// # Which creates the folder on S3 +``` +Once your AFI has been created successfully, you are free to delete the tar file and logs as needed. Deleting these files will not delete or modify your AFI. diff --git a/Vitis/docs/XRT_installation_instructions.md b/Vitis/docs/XRT_installation_instructions.md new file mode 100644 index 00000000..8a1afb9b --- /dev/null +++ b/Vitis/docs/XRT_installation_instructions.md @@ -0,0 +1,173 @@ +# Xilinx Runtime (XRT) and Vitis Tool versions + +* Xilinx Runtime versions match with the tool that you created your Vitis AFI with. +* We provide pre-built RPM's for Centos/RHEL/AL2 and instructions for building XRT +* Use the below table as reference to install and use the correct XRT version for your applications. + +| Xilinx Vitis Tool Version | XRT Release Tag | SHA | `xrt` or `xrt-aws` RPM's (Centos/RHEL) |`xrt` or`xrt-aws` RPM's (AL2) | +|---|---|---|---|---| +|2020.2| [202020.2.8.743](https://github.com/Xilinx/XRT/releases/tag/202020.2.8.743) | 77d5484b5c4daa691a7f78235053fb036829b1e9 | [xrt_202020.2.8.0_7.9.2009-x86_64-xrt.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.10.0/Patches/XRT_2020_2/xrt_202020.2.8.0_7.9.2009-x86_64-xrt.rpm) [xrt_202020.2.8.0_7.9.2009-x86_64-aws.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.10.0/Patches/XRT_2020_2/xrt_202020.2.8.0_7.9.2009-x86_64-aws.rpm) | [xrt_202020.2.8.0_2-x86_64-xrt.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.10.0/Patches/XRT_2020_2/xrt_202020.2.8.0_2-x86_64-xrt.rpm) [xrt_202020.2.8.0_2-x86_64-aws.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.10.0/Patches/XRT_2020_2/xrt_202020.2.8.0_2-x86_64-aws.rpm)| +|2020.1| [202010.2.6.AWS](https://github.com/Xilinx/XRT/releases/tag/202010.2.6.AWS) | d09c4a458c16e8d843b3165dcf929c38f7a32b6f | [xrt_202010.2.6.0_7.7.1908-x86_64-xrt.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.9.0/Patches/XRT_2020_1/xrt_202010.2.6.0_7.7.1908-x86_64-xrt.rpm) [xrt_202010.2.6.0_7.7.1908-x86_64-aws.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.9.0/Patches/XRT_2020_1/xrt_202010.2.6.0_7.7.1908-x86_64-aws.rpm) | [xrt_202010.2.6.0_2-x86_64-xrt.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.9.0/Patches/XRT_2020_1/xrt_202010.2.6.0_2-x86_64-xrt.rpm) [xrt_202010.2.6.0_2-x86_64-aws.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.9.0/Patches/XRT_2020_1/xrt_202010.2.6.0_2-x86_64-aws.rpm)| +|2019.2| [2019.2.0.3](https://github.com/Xilinx/XRT/releases/tag/2019.2.0.3) | 9e13d57c4563e2c19bf5f518993f6e5a8dadc18a | [xrt_201920.2.3.0_7.7.1908-xrt.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.8.0/Patches/XRT_2019_2/xrt_201920.2.3.0_7.7.1908-xrt.rpm) [xrt_201920.2.3.0_7.7.1908-aws.rpm](https://aws-fpga-developer-ami.s3.amazonaws.com/1.8.0/Patches/XRT_2019_2/xrt_201920.2.3.0_7.7.1908-aws.rpm) | N/A | + + +# MPD +From 2019.2 toolset onwards, [Xilinx XRT architecture has been made more modular](https://xilinx.github.io/XRT/master/html/cloud_vendor_support.html). +To be able to do so, the new architecture implements a Message Proxy Daemon in user space that interacts with the management library. +This also allows us to make calls to the management library without requiring privileged access to the user on the host. + +## FPGA Developer AMI usecase +Since this Daemon is only required for the Vitis flow, it is disabled by default on the FPGA Developer AMI as we support both the Vitis and The Vivado flows. +The `vitis_runtime_setup.sh` script when called automatically checks for and starts the MPD daemon. +Once MPD Daemon starts up, it loads a 'Default AFI' on all the slots that lets the XOCL driver bind to the device. +Since we only support Device ID 0xF010 for the Vitis workflow, any subsequent loads of AFI's would work seamlessly. + +However, after MPD has started if you clear the slot, the cleared slot will show a Device ID 0x1042 and XOCL will not bind. +Therefore, to be able to run your host application again after clearing a slot manually, you will need to restart the MPD service: + ```sudo systemctl restart mpd``` + +**Note that MPD service starts asynchronously, so you might have to wait till all the slots are loaded with the Default AFI before your application can run.** + +## Custom Runtime AMI usecase +On your custom Runtime AMI, MPD will be enabled by default once you install Xilinx XRT. +On startup, MPD will check if the instance has FPGA's and will load the Default AFI's. +After MPD has started if you clear the slot, the cleared slot will show a Device ID 0x1042 and XOCL will not bind. +Therefore, to be able to run your host application again after clearing a slot manually, you will need to restart the MPD service: + ```sudo systemctl restart mpd``` +**Note that MPD service starts asynchronously, so you might have to wait till all the slots are loaded with the Default AFI before your application can run.** + +## Default AFI details +The Default AFI loaded is a regular `Hello World` AFI that provides the Device ID 0xF010. + +# Centos/RHEL build and install steps + +```bash +XRT_RELEASE_TAG=2019.2.0.3 # Substitute XRT_RELEASE_TAG= + +git clone https://github.com/aws/aws-fpga.git + +cd aws-fpga +source vitis_setup.sh +cd $VITIS_DIR/Runtime +export XRT_PATH="${VITIS_DIR}/Runtime/${XRT_RELEASE_TAG}" +git clone http://www.github.com/Xilinx/XRT.git -b ${XRT_RELEASE_TAG} ${XRT_PATH} + +cd ${XRT_PATH} +sudo ./src/runtime_src/tools/scripts/xrtdeps.sh + +cd build +scl enable devtoolset-6 bash +./build.sh + +cd Release +sudo yum reinstall xrt_*.rpm -y +``` + +# AL2 build and install steps + +```bash +XRT_RELEASE_TAG=202010.2.6.AWS # Substitute XRT_RELEASE_TAG= + +git clone https://github.com/aws/aws-fpga.git + +cd aws-fpga +source vitis_setup.sh +cd $VITIS_DIR/Runtime +export XRT_PATH="${VITIS_DIR}/Runtime/${XRT_RELEASE_TAG}" +git clone http://www.github.com/Xilinx/XRT.git -b ${XRT_RELEASE_TAG} ${XRT_PATH} + +cd ${XRT_PATH} +sudo ./src/runtime_src/tools/scripts/xrtdeps.sh + +cd build +./build.sh + +cd Release +sudo yum reinstall xrt_*.rpm -y +``` + +# Centos/RHEL/AL2 pre-built RPM install steps + + +```bash +curl -s -o xrt.rpm +curl -s -o xrt-aws.rpm +sudo yum reinstall xrt*.rpm -y +``` + +# FAQ + +*Q:* What should I do if I see this message when I run the host application: ```xclProbe found 1 FPGA slots with xocl driver running +WARNING: AwsXcl - Cannot open userPF: /dev/dri/renderD0 +WARNING: AwsXcl isGood: invalid user handle. +WARNING: xclOpen Handle check failed +device[0].user_instance : 0 +WARNING: AwsXcl - Cannot open userPF: /dev/dri/renderD0 +WARNING: AwsXcl isGood: invalid user handle. +ERROR: xclOpen Handle check failed``` + +This means that the XOCL driver hasn't been able to bind to the User PF. Please try to restart MPD: `sudo systemctl restart mpd` + +*Q:* How do I verify that my device is usable?: +Use the Xilinx `xbutil` utility. If you sourced the `vitis_runtime_setup.sh` script, it should be available in your path. + +``` +xbutil scan +INFO: Found total 1 card(s), 1 are usable +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +System Configuration +OS name: Linux +Release: 3.10.0-1062.4.1.el7.x86_64 +Version: #1 SMP Fri Oct 18 17:15:30 UTC 2019 +Machine: x86_64 +Model: HVM domU +CPU cores: 8 +Memory: 122724 MB +Glibc: 2.17 +Distribution: CentOS Linux 7 (Core) +Now: Thu Jan 30 03:29:45 2020 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +XRT Information +Version: 2.3.0 +Git Hash: 42da4cceb02e0386e0daeaea230bdc86ea40d19a +Git Branch: 2019.2 +Build Date: 2020-01-30 02:56:41 +XOCL: 2.3.0,42da4cceb02e0386e0daeaea230bdc86ea40d19a +XCLMGMT: unknown +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + [0] 0000:00:1d.0 xilinx_aws-vu9p-f1_dynamic_5_0(ts=0xabcd) user(inst=128) +``` + +An unusable device will show up like this: +``` +xbutil scan +INFO: Found total 1 card(s), 1 are usable +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +System Configuration +OS name: Linux +Release: 3.10.0-1062.4.1.el7.x86_64 +Version: #1 SMP Fri Oct 18 17:15:30 UTC 2019 +Machine: x86_64 +Model: HVM domU +CPU cores: 8 +Memory: 122724 MB +Glibc: 2.17 +Distribution: CentOS Linux 7 (Core) +Now: Thu Jan 30 03:29:45 2020 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +XRT Information +Version: 2.3.0 +Git Hash: 42da4cceb02e0386e0daeaea230bdc86ea40d19a +Git Branch: 2019.2 +Build Date: 2020-01-30 02:56:41 +XOCL: 2.3.0,42da4cceb02e0386e0daeaea230bdc86ea40d19a +XCLMGMT: unknown +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*[0] 0000:00:1d.0 xilinx_aws-vu9p-f1_dynamic_5_0(ts=0xabcd) user(inst=128) +WARNING: card(s) marked by '*' are not ready, is MPD runing? run 'systemctl status mpd' to check MPD details.``` +``` + +# Additional Documentation +* [XRT Documentation](https://xilinx.github.io/XRT/master/html/) + +* [XRT MPD Documentation](https://xilinx.github.io/XRT/master/html/cloud_vendor_support.html) diff --git a/Vitis/examples/xilinx_2019.2 b/Vitis/examples/xilinx_2019.2 new file mode 160000 index 00000000..bb80c8ec --- /dev/null +++ b/Vitis/examples/xilinx_2019.2 @@ -0,0 +1 @@ +Subproject commit bb80c8ec699c3131e8874735bd99475ac6fe2ec7 diff --git a/Vitis/examples/xilinx_2020.1 b/Vitis/examples/xilinx_2020.1 new file mode 160000 index 00000000..6dc51743 --- /dev/null +++ b/Vitis/examples/xilinx_2020.1 @@ -0,0 +1 @@ +Subproject commit 6dc5174366f13e541af446213db0c98f401ba1e6 diff --git a/Vitis/examples/xilinx_2020.2 b/Vitis/examples/xilinx_2020.2 new file mode 160000 index 00000000..f72dff9e --- /dev/null +++ b/Vitis/examples/xilinx_2020.2 @@ -0,0 +1 @@ +Subproject commit f72dff9eea45a76e9ee0713774589624e2b52c9f diff --git a/Vitis/kernel_version.txt b/Vitis/kernel_version.txt new file mode 100644 index 00000000..18fe1c12 --- /dev/null +++ b/Vitis/kernel_version.txt @@ -0,0 +1,9 @@ +3.10.0-862.11.6.el7.x86_64 +3.10.0-693.21.1.el7.x86_64 +3.10.0-957.1.3.el7.x86_64 +3.10.0-957.5.1.el7.x86_64 +3.10.0-957.27.2.el7.x86_64 +3.10.0-1062.4.1.el7.x86_64 +3.10.0-1062.9.1.el7.x86_64 +3.10.0-1127.10.1.el7.x86_64 +4.14.209-160.339.amzn2.x86_64 diff --git a/Vitis/packages.txt b/Vitis/packages.txt new file mode 100644 index 00000000..c682af2f --- /dev/null +++ b/Vitis/packages.txt @@ -0,0 +1,18 @@ +ocl-icd +ocl-icd-devel +opencl-headers +libstdc++-static +kernel-headers +kernel-devel +gcc-c++ +gcc +gdb +libstdc++-static +make +opencv +python +git +libjpeg-turbo-devel +libpng12-devel +libtiff-devel +compat-libtiff3 diff --git a/Vitis/tests/test_build_vitis_example.py b/Vitis/tests/test_build_vitis_example.py new file mode 100644 index 00000000..5da6e81a --- /dev/null +++ b/Vitis/tests/test_build_vitis_example.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python2.7 + +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. +''' +Pytest module: + +Call using ```pytest test_build_vitis_example.py``` + +See TESTING.md for details. +''' + +from __future__ import print_function +import os +from os.path import dirname, realpath, basename +import json +try: + import aws_fpga_utils + import aws_fpga_test_utils + from aws_fpga_test_utils.AwsFpgaTestBase import AwsFpgaTestBase +except ImportError as e: + traceback.print_tb(sys.exc_info()[2]) + print("error: {}\nMake sure to source shared/bin/setup_test_env.sh".format(sys.exc_info()[1])) + sys.exit(1) + +logger = aws_fpga_utils.get_logger(__name__) + +class TestBuildVitisExample(AwsFpgaTestBase): + ''' + Pytest test class. + + NOTE: Cannot have an __init__ method. + + ''' + ADD_EXAMPLEPATH = True + ADD_RTENAME = True + ADD_XILINX_VERSION = True + + @classmethod + def setup_class(cls): + ''' + Do any setup required for tests. + ''' + + AwsFpgaTestBase.setup_class(cls, __file__) + + AwsFpgaTestBase.assert_sdk_setup() + AwsFpgaTestBase.assert_vitis_setup() + + return + + def test_sw_emu(self, examplePath, rteName, xilinxVersion): + target = "sw_emu" + self.base_test(examplePath=examplePath, target=target, rteName=rteName, xilinxVersion=xilinxVersion, check=True) + + def test_hw_emu(self, examplePath, rteName, xilinxVersion): + target = "hw_emu" + self.base_test(examplePath=examplePath, target=target, rteName=rteName, xilinxVersion=xilinxVersion, check=True) + + def test_hw_build(self, examplePath, rteName, xilinxVersion): + target = "hw" + self.base_test(examplePath=examplePath, target=target, rteName=rteName, xilinxVersion=xilinxVersion, check=False) + + def check_build(self, examplePath, target): + + xclbin_path = self.get_vitis_xclbin_dir(examplePath, target) + + logger.info("Checking if Vitis Example xclbin path={} exists".format(xclbin_path)) + assert os.path.exists(xclbin_path), "Vitis Example xclbinpath={} does not exist".format(xclbin_path) + + logger.info("Checking that a non zero size xclbin file exists in {}".format(xclbin_path)) + xclbin = self.assert_non_zero_file(os.path.join(xclbin_path, "*.xclbin")) + logger.info("xclbin: {}".format(xclbin)) + + return xclbin + + def base_test(self, examplePath, target, rteName, xilinxVersion, clean=True, check=True): + + full_example_path = self.get_vitis_example_fullpath(examplePath=examplePath) + logger.info("Vitis Example path={}".format(full_example_path)) + + assert os.path.exists(full_example_path), "Vitis Example path={} does not exist".format(full_example_path) + + os.chdir(full_example_path) + + if clean: + (rc, stdout_lines, stderr_lines) = self.run_cmd("make clean") + assert rc == 0, "Vitis build failed while cleaning with rc={}".format(rc) + + check_string = "" + if check: + check_string = "check" + + (rc, stdout_lines, stderr_lines) = self.run_cmd("make {0} TARGET={1} DEVICE={2} all PROFILE=yes".format(check_string, target, os.environ['AWS_PLATFORM'])) + assert rc == 0, "Vitis build failed with rc={}".format(rc) + + # Check for non zero xclbin + xclbin = self.check_build(examplePath=examplePath, target=target) + + xclbin_key = os.path.join(self.get_vitis_example_s3_xclbin_tag(examplePath=examplePath, target=target, rteName=rteName, xilinxVersion=xilinxVersion), basename(xclbin)) + + logger.info("Uploading xclbin to {}".format(os.path.join(self.s3_bucket, xclbin_key))) + self.s3_client().upload_file(xclbin, self.s3_bucket, xclbin_key) + + return diff --git a/Vitis/tests/test_create_vitis_afi.py b/Vitis/tests/test_create_vitis_afi.py new file mode 100644 index 00000000..05f51b23 --- /dev/null +++ b/Vitis/tests/test_create_vitis_afi.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python2.7 + +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +''' +Pytest module: + +Call using ```pytest test_create_vitis_afi.py``` + +See TESTING.md for details. +''' + +from __future__ import print_function +from __builtin__ import str +import boto3 +import os +from os.path import basename, dirname, realpath +import pytest +import re +import sys +import traceback +import json + +try: + import aws_fpga_test_utils + from aws_fpga_test_utils.AwsFpgaTestBase import AwsFpgaTestBase + import aws_fpga_utils +except ImportError as e: + traceback.print_tb(sys.exc_info()[2]) + print("error: {}\nMake sure to source shared/bin/setup_test_env.sh".format(sys.exc_info()[1])) + sys.exit(1) + +logger = aws_fpga_utils.get_logger(__name__) + +class TestCreateVitisAfi(AwsFpgaTestBase): + ''' + Pytest test class. + + NOTE: Cannot have an __init__ method. + + Create AFI from xclbin. + ''' + + ADD_EXAMPLEPATH = True + ADD_RTENAME = True + ADD_XILINX_VERSION = True + + @classmethod + def setup_class(cls): + ''' + Do any setup required for tests. + ''' + AwsFpgaTestBase.setup_class(cls, __file__) + + AwsFpgaTestBase.assert_sdk_setup() + AwsFpgaTestBase.assert_vitis_setup() + + return + + def call_create_afi_script(self, examplePath, xclbin, target, rteName, xilinxVersion): + + full_example_path = self.get_vitis_example_fullpath(examplePath=examplePath) + logger.info("Vitis Example path={}".format(full_example_path)) + + assert os.path.exists(full_example_path), "Vitis Example path={} does not exist".format(full_example_path) + + os.chdir(full_example_path) + + xclbin_basename = os.path.basename(xclbin) + xclbin_filename = os.path.splitext(xclbin_basename)[0] + aws_xclbin_filename_rte = xclbin_filename + + aws_xclbin_path = AwsFpgaTestBase.get_vitis_xclbin_dir(examplePath) + aws_xclbin_basename = os.path.join(aws_xclbin_path, aws_xclbin_filename_rte) + + cmd = "{}/Vitis/tools/create_vitis_afi.sh -s3_bucket={} -s3_dcp_key={} -xclbin={} -o={}".format( + self.WORKSPACE, + self.s3_bucket, + self.get_vitis_example_s3_dcp_tag(examplePath=examplePath, target=target, rteName=rteName, xilinxVersion=xilinxVersion), + xclbin, + aws_xclbin_basename + ) + + logger.info(cmd) + rc = os.system(cmd) + assert rc == 0, "Error encountered while running the create_vitis_afi.sh script" + + logger.info("Checking that a non zero size aws_xclbin file exists in {}".format(aws_xclbin_path)) + aws_xclbin = self.assert_non_zero_file(os.path.join(aws_xclbin_path, "*.awsxclbin")) + logger.info("Uploading aws_xclbin file: {}".format(aws_xclbin)) + + aws_xclbin_key = os.path.join(self.get_vitis_example_s3_xclbin_tag(examplePath=examplePath, target=target, rteName=rteName, xilinxVersion=xilinxVersion), basename(aws_xclbin)) + self.s3_client().upload_file(aws_xclbin, self.s3_bucket, aws_xclbin_key) + + create_afi_response_file = self.assert_non_zero_file(os.path.join(full_example_path, "*afi_id.txt")) + + create_afi_response_file_key = self.get_vitis_example_s3_afi_tag(examplePath=examplePath, target=target, rteName=rteName, xilinxVersion=xilinxVersion) + + logger.info("Uploading create_afi output file: {}".format(create_afi_response_file)) + self.s3_client().upload_file(create_afi_response_file, self.s3_bucket, create_afi_response_file_key) + + create_afi_response = json.load(open(create_afi_response_file)) + + return create_afi_response + + + def test_create_vitis_afi(self, examplePath, rteName, xilinxVersion, target="hw"): + + xclbin = self.get_vitis_xclbin_file(examplePath, rteName, xilinxVersion) + create_afi_response = self.call_create_afi_script(examplePath, xclbin, target, rteName, xilinxVersion) + + afi = create_afi_response.get("FpgaImageId", None) + + assert afi is not None, "AFI ID not available in create_afi response:{}".format(str(create_afi_response)) + + # Wait for the AFI to complete + rc = os.system(self.WORKSPACE + "/shared/bin/scripts/wait_for_afi.py --afi {}".format(afi)) + assert rc == 0, "Error while waiting for afi={}".format(afi) + + self.assert_afi_available(afi) diff --git a/Vitis/tests/test_find_vitis_examples.py b/Vitis/tests/test_find_vitis_examples.py new file mode 100644 index 00000000..9d54393c --- /dev/null +++ b/Vitis/tests/test_find_vitis_examples.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python2.7 + +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. +''' +Pytest module: + +Call using ```pytest test_find_vitis_examples.py``` + +See TESTING.md for details. +''' + +from __future__ import print_function +import os +from os.path import dirname, realpath +import json +try: + import aws_fpga_utils + import aws_fpga_test_utils + from aws_fpga_test_utils.AwsFpgaTestBase import AwsFpgaTestBase +except ImportError as e: + traceback.print_tb(sys.exc_info()[2]) + print("error: {}\nMake sure to source shared/bin/setup_test_env.sh".format(sys.exc_info()[1])) + sys.exit(1) + +logger = aws_fpga_utils.get_logger(__name__) + +class TestFindVitisExamples(AwsFpgaTestBase): + ''' + Pytest test class. + + NOTE: Cannot have an __init__ method. + + ''' + ADD_XILINX_VERSION = True + + @classmethod + def setup_class(cls): + ''' + Do any setup required for tests. + ''' + AwsFpgaTestBase.setup_class(cls, __file__) + return + + def test_find_example_makefiles(self, xilinxVersion): + + assert os.path.exists(self.xilinx_vitis_examples_dir), "The Xilinx Vitis example dir does not exist: {}".format(self.xilinx_vitis_examples_dir) + assert os.listdir(self.xilinx_vitis_examples_dir) != [], "Xilinx Vitis example submodule not cloned or does not exist" + + xilinx_examples_makefiles = [] + xilinx_vitis_example_map = {} + + for root, dirs, files in os.walk(self.xilinx_vitis_examples_dir): + ignore = False + + if os.path.exists(root + "/description.json") and os.path.exists(root + "/Makefile"): + with open(root + "/description.json", "r") as description_file: + description = json.load(description_file) + + if "containers" in description: + if len(description["containers"]) > 1: + ignore = True + logger.info("Ignoring {} as >1 containers found in description.json.".format(root)) + else: + ignore = True + logger.info("Ignoring {} as no containers found in description.json.".format(root)) + continue + + if "ndevice" in description: + if "aws" in description["ndevice"]: + ignore = True + logger.info("Ignoring {} as F1 device found in ndevice.".format(root)) + continue + else: + ignore = True + logger.warn("Ignoring: {} as no Makefile/description.json exist".format(root)) + + if not ignore: + xilinx_examples_makefiles.append(root) + logger.info("Adding: " + root) + + assert len(xilinx_examples_makefiles) != 0, "Could not find any Xilinx Vitis example in %s" % self.xilinx_vitis_examples_dir + + # Remove the workspace path so that the next node can reference this path directly + # So we don't face cases like /workspace@3 .. + xilinx_examples_makefiles = [os.path.relpath(full_path, self.WORKSPACE) for full_path in xilinx_examples_makefiles] + + for example_path in xilinx_examples_makefiles: + + example_test_class = example_path.replace('/', '__').capitalize() + + xilinx_vitis_example_map[example_test_class] = example_path + + with open(self.xilinx_vitis_examples_list_file, 'w') as outfile: + json.dump(xilinx_vitis_example_map, outfile) + + # Also write the archive file + with open(self.xilinx_vitis_examples_list_file + "." + xilinxVersion, 'w') as archive_file: + json.dump(xilinx_vitis_example_map, archive_file) + + assert os.path.getsize(self.xilinx_vitis_examples_list_file) > 0, "%s is a non zero file. We need to have some data in the file" % self.xilinx_vitis_examples_list_file diff --git a/Vitis/tests/test_run_vitis_example.py b/Vitis/tests/test_run_vitis_example.py new file mode 100644 index 00000000..ec24b6dc --- /dev/null +++ b/Vitis/tests/test_run_vitis_example.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python2.7 + +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +''' +Pytest module: + +Call using ```pytest test_create_afi.py``` + +See TESTING.md for details. +''' + +from __future__ import print_function +import boto3 +import os +from os.path import basename, dirname, realpath +import pytest +import re +import sys +import traceback +import json +try: + import aws_fpga_test_utils + from aws_fpga_test_utils.AwsFpgaTestBase import AwsFpgaTestBase + import aws_fpga_utils +except ImportError as e: + traceback.print_tb(sys.exc_info()[2]) + print("error: {}\nMake sure to source shared/bin/setup_test_env.sh".format(sys.exc_info()[1])) + sys.exit(1) + +logger = aws_fpga_utils.get_logger(__name__) + +class TestRunVitisExample(AwsFpgaTestBase): + ''' + Pytest test class. + + NOTE: Cannot have an __init__ method. + + Run the Vitis example + ''' + + ADD_EXAMPLEPATH = True + ADD_RTENAME = True + ADD_XILINX_VERSION = True + + @classmethod + def setup_class(cls): + ''' + Do any setup required for tests. + ''' + AwsFpgaTestBase.setup_class(cls, __file__) + + AwsFpgaTestBase.assert_sdk_setup() + AwsFpgaTestBase.assert_vitis_setup() + + return + + @pytest.mark.flaky(reruns=2, reruns_delay=2) + def test_run_vitis_example(self, examplePath, rteName, xilinxVersion): + os.chdir(self.get_vitis_example_fullpath(examplePath)) + + (rc, stdout_lines, stderr_lines) = self.run_cmd("make exe") + assert rc == 0 + + em_run_cmd = self.get_vitis_example_run_cmd(examplePath, xilinxVersion) + check_runtime_script = os.path.join(AwsFpgaTestBase.WORKSPACE,'vitis_runtime_setup.sh') + + self.get_vitis_aws_xclbin_file(examplePath, rteName, xilinxVersion) + + # run_cmd = "sudo -E /bin/bash -l -c \"source {} && {} \"".format(check_runtime_script, em_run_cmd) + run_cmd = "source {} && sleep 1m && {}".format(check_runtime_script, em_run_cmd) + logger.info("Running cmd={}".format(run_cmd)) + (rc, stdout_lines, stderr_lines) = self.run_cmd(run_cmd) + assert rc == 0 + + diff --git a/Vitis/tests/test_vitis_scripts.py b/Vitis/tests/test_vitis_scripts.py new file mode 100644 index 00000000..e106c1b6 --- /dev/null +++ b/Vitis/tests/test_vitis_scripts.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python2.7 + +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import logging +import os +from os.path import dirname, realpath +import pytest +import subprocess +import sys +import traceback +try: + import aws_fpga_utils + import aws_fpga_test_utils + from aws_fpga_test_utils.AwsFpgaTestBase import AwsFpgaTestBase +except ImportError as e: + traceback.print_tb(sys.exc_info()[2]) + print("error: {}\nMake sure to source hdk_setup.sh".format(sys.exc_info()[1])) + sys.exit(1) + +logger = aws_fpga_utils.get_logger(__name__) + +class TestVitisScripts(AwsFpgaTestBase): + ''' + Pytest test class. + + NOTE: Cannot have an __init__ method. + ''' + + @classmethod + def setup_class(cls): + ''' + Do any setup required for tests. + ''' + AwsFpgaTestBase.setup_class(cls, __file__) + + AwsFpgaTestBase.assert_sdaccel_setup() + return + + @pytest.mark.skip(reason="Not implemented") + def test_vitis_setup(self): + assert False diff --git a/Vitis/tools/create_vitis_afi.sh b/Vitis/tools/create_vitis_afi.sh new file mode 100755 index 00000000..5f19c848 --- /dev/null +++ b/Vitis/tools/create_vitis_afi.sh @@ -0,0 +1,288 @@ +#!/bin/bash + +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +script=${BASH_SOURCE[0]} +full_script=$(readlink -f $script) +script_name=$(basename $full_script) + +source $AWS_FPGA_REPO_DIR/shared/bin/set_common_functions.sh +source $AWS_FPGA_REPO_DIR/shared/bin/set_common_env_vars.sh +script_dir=$(dirname $full_script) + + +debug=0 + +function usage { + info_msg "USAGE: $script_name [-h|-help] [-s3_bucket=] [-s3_dcp_key=] [-s3_logs_key=] [-xclbin=] [-o=] [-awsprofile=]" +} + +function help { + info_msg "$script_name" + info_msg " " + info_msg "Vitis AFI Creation" + info_msg " " + info_msg "create_vitis_afi.sh assumes you have:" + info_msg " (*) Read the README on Github and understand the Vitis workflow (Vitis/README.md)" + info_msg " (*) Generated an XCLBIN using the Vitis Build flow" + info_msg " (*) Ready to create an AFI and test on F1. Your kernel has been validated using SW/HW Emulation." + info_msg "create_vitis_afi.sh will:" + info_msg " (1) Extract three parts from your XCLBIN: DCP (.dcp) , clocks data(.json) and build metadata (.json)" + info_msg " (2) Generates a Manifest file for AFI creation that sets the clocks based on your achieved target freq from your build" + info_msg " (3) Prepares tar file for AFI creation process" + info_msg " (4) Calls aws ec2 create-fpga-image" + info_msg " (5) Creates new XCLBIN (called AWSXCLBIN) that is composed of: Metadata and AGFI-ID" + echo " " + usage +} + +if [ "$1" == "" ] +then + err_msg "Invalid usage" + usage + exit 1 +fi + +while [ "$1" != "" ]; do + PARAM=`echo $1 | awk -F= '{print $1}'` + VALUE=`echo $1 | awk -F= '{print $2}'` + case $PARAM in + -h | --help) + help + exit + ;; + -xclbin) + xclbin=$VALUE + ;; + -o) + awsxclbin=$VALUE + ;; + -aws_profile_name) + aws_profile_name=$VALUE + ;; + -s3_bucket) + s3_bucket=$VALUE + ;; + -s3_logs_key) + s3_logs=$VALUE + ;; + -s3_dcp_key) + s3_dcps=$VALUE + ;; + *) + err_msg "Unknown parameter \"$PARAM\"" + usage + exit 1 + ;; + esac + shift +done + +if [ "$RELEASE_VER" == "" ] +then + err_msg "Env variable RELEASE_VER not set, did you source sdaccel_setup.sh?" + exit 1 +fi + +if [[ -e "$xclbin" ]] +then + info_msg "Found xclbin '$xclbin'" +else + err_msg "File '$xclbin' not found" + exit 1 +fi + +stripped_xclbin=$(basename $xclbin) +ext_xclbin=${stripped_xclbin##*.} +stripped_xclbin=${stripped_xclbin%.*} + +info_msg "$stripped_xclbin" + +if [ "$awsxclbin" == "" ] +then + awsxclbin=$stripped_xclbin +fi + + +if [ "$awsxclbin" != "$stripped_xclbin" ] +then + warn_msg "$awsxclbin does not match $stripped_xclbin" + warn_msg "For github examples, -o must be equal to $stripped_xclbin" +fi + +if [[ -e "$awsxclbin" ]] +then + err_msg "File '$awsxclbin' already exists" + exit 1 +fi + +if [ ":$s3_bucket" == ":" ] +then + err_msg "Invalid s3_bucket" + usage + exit 1 +fi + +# s3 logs is not required +# s3 dcp key is required +if [ "$s3_dcps" == "" ] +then + err_msg "Invalid s3_dcps key" + usage + exit 1 +fi + +timestamp=$(date +"%y_%m_%d-%H%M%S") + +#Steps +#1. Strip XCLBIN to get DCP for ingestion +#2. Create Manifest file +#3. Prepare ingestion tar file +#4. Call create-fpga-image +#5. Manipulate the AFI ID +#6. Create awsxclbin + +#STEP 1 +#Strip XCLBIN to get DCP for ingestion +#/opt/xilinx/xrt/bin/xclbinsplit -o ${timestamp} $xclbin +/opt/xilinx/xrt/bin/xclbinutil --dump-section BUILD_METADATA:JSON:${timestamp}_build.json --dump-section CLOCK_FREQ_TOPOLOGY:JSON:${timestamp}_clocks.json --dump-section BITSTREAM:RAW:${timestamp}_SH_CL_routed.dcp -i $xclbin +if [[ -e "${timestamp}_SH_CL_routed.dcp" ]] +then + info_msg "Split DCP from xclbin: ${timestamp}_SH_CL_routed.dcp" +else + err_msg "File ${timestamp}_SH_CL_routed.dcp not found" + exit 1 +fi +if [[ -e "${timestamp}_build.json" ]] +then + info_msg "The build Metadata from xclbin: ${timestamp}_build.json" +else + err_msg "File ${timestamp}_build.json not found" + exit 1 +fi +if [[ -e "${timestamp}_clocks.json" ]] +then + info_msg "The clocks Metadata from xclbin: ${timestamp}_clocks.json" +else + err_msg "File ${timestamp}_clocks.json not found" + exit 1 +fi + +if [[ -d "to_aws" ]] +then + err_msg "Directory to_aws already exists" + exit 1 +fi + +mkdir to_aws +cp ${timestamp}_SH_CL_routed.dcp ./to_aws/ + +#STEP 2 +#Create Manifest file +strategy=DEFAULT +hdk_version=$(grep 'HDK_VERSION' $script_dir/../../hdk/hdk_version.txt | sed 's/=/ /g' | awk '{print $2}') +shell_version=0x04261818 +tool_version=v$RELEASE_VER +device_id=0xF010 +vendor_id=0x1D0F +subsystem_id=0x1D51 +subsystem_vendor_id=0xFEDD +# Get XSA info from build metadata +vendor=$(echo `grep -A 6 dsa ./${timestamp}_build.json | grep vendor | sed 's/.*: "//g' | sed 's/",//'`) +board_id=$(echo `grep -A 6 dsa ./${timestamp}_build.json | grep board_id | sed 's/.*: "//g' | sed 's/",//'`) +plat_name=$(echo `grep -A 6 dsa ./${timestamp}_build.json | grep name | sed 's/.*: "//g' | sed 's/",//'`) +major=$(echo `grep -A 6 dsa ./${timestamp}_build.json | grep major | sed 's/.*: "//g' | sed 's/",//'`) +minor=$(echo `grep -A 6 dsa ./${timestamp}_build.json | grep minor | sed 's/.*: "//g' | sed 's/",//'`) +# Get clock info from clock metadata +clock_main_a0=$(echo `grep -B 1 SYSTEM ${timestamp}_clocks.json | grep -o -e '[0-9]*'`) +clock_extra_b0=$(echo `grep -B 2 DATA_CLK ${timestamp}_clocks.json | grep freq | grep -o -e '[0-9]*'`) +clock_extra_c0=$(echo `grep -B 2 KERNEL_CLK ${timestamp}_clocks.json | grep -o -e '[0-9]*'`) + +if [[ "$vendor" != "xilinx" && "$board_id" != "aws-vu9p-f1" && "$plat_name" != "shell-v04261818" && "$major" != "201920" && "$minor" != "2" ]] +then + err_msg "Platform ${vendor}_${board_id}_${plat_name}_${major}_${minor} used to create xclbin is not correct, you should be using xilinx_aws-vu9p-f1_shell-v04261818_201920_2" + exit +fi + +#Write Manifest File here +hash=$( sha256sum to_aws/${timestamp}_SH_CL_routed.dcp | awk '{ print $1 }' ) +manifest_file="${timestamp}_manifest.txt" +exec 3<>$manifest_file +echo "manifest_format_version=2" >&3 +echo "pci_vendor_id=$vendor_id" >&3 +echo "pci_device_id=$device_id" >&3 +echo "pci_subsystem_id=$subsystem_id" >&3 +echo "pci_subsystem_vendor_id=$subsystem_vendor_id" >&3 +echo "dcp_hash=$hash" >&3 +echo "shell_version=$shell_version" >&3 +echo "dcp_file_name=${timestamp}_SH_CL_routed.dcp" >&3 +echo "hdk_version=$hdk_version" >&3 +echo "tool_version=$tool_version" >&3 +echo "date=$timestamp" >&3 +echo "clock_main_a0=$clock_main_a0" >&3 +echo "clock_extra_b0=$clock_extra_b0" >&3 +echo "clock_extra_c0=$clock_extra_c0" >&3 +exec 3>&- +exec 3>&- + +if [[ -e "$manifest_file" ]] +then + info_msg "Generated manifest file '$manifest_file'" +else + err_msg "File '$manifest_file' not found" + exit 1 +fi +cp $manifest_file to_aws/$manifest_file + + +#STEP 3 +#Prepare ingestion +tar -cf ${timestamp}_Developer_Vitis_Kernel.tar to_aws/${timestamp}_SH_CL_routed.dcp to_aws/${timestamp}_manifest.txt +#STEP 4 +#Call create-fpga-image +profile_text="" +if [ "$aws_profile_name" != "" ] +then + profile_text="--profile ${aws_profile_name}" +fi + +log_storage_text="" +if [ "${s3_logs}" != "" ] +then + log_storage_text="--logs-storage-location Bucket=${s3_bucket},Key=${s3_logs}" +fi + +aws s3 ${profile_text} cp ${timestamp}_Developer_Vitis_Kernel.tar s3://${s3_bucket}/${s3_dcps}/ +aws ec2 ${profile_text} create-fpga-image --name ${stripped_xclbin} --description ${stripped_xclbin} --input-storage-location Bucket=${s3_bucket},Key=${s3_dcps}/${timestamp}_Developer_Vitis_Kernel.tar ${log_storage_text} > ${timestamp}_afi_id.txt + + +#STEP 5 +#Manipulate the AFI ID +test=`grep agfi ${timestamp}_afi_id.txt | awk -F: '{print $2}' | sed 's/ \"//g' | sed 's/\".*//g' | sed ':a;N;$!ba;s/\n/ /g'` +echo -n $test > ${timestamp}_agfi_id.txt +echo ${timestamp}_agfi_id.txt + +#STEP 6 +#Create .awsxclbin + +if [ "$RELEASE_VER" == "2020.2" ] +then + /opt/xilinx/xrt/bin/xclbinutil -i $xclbin --remove-section PARTITION_METADATA --replace-section BITSTREAM:RAW:${timestamp}_agfi_id.txt -o ${awsxclbin}.awsxclbin +else + /opt/xilinx/xrt/bin/xclbinutil -i $xclbin --remove-section PARTITION_METADATA --remove-section SYSTEM_METADATA --replace-section BITSTREAM:RAW:${timestamp}_agfi_id.txt -o ${awsxclbin}.awsxclbin +fi diff --git a/Vitis/vitis_xrt_version.txt b/Vitis/vitis_xrt_version.txt new file mode 100644 index 00000000..df3ef470 --- /dev/null +++ b/Vitis/vitis_xrt_version.txt @@ -0,0 +1,4 @@ +2019.2:9e13d57c4563e2c19bf5f518993f6e5a8dadc18a +2020.1:12115fd4054cb46a5ade62fafa74c523f59116e6 +2020.1:d09c4a458c16e8d843b3165dcf929c38f7a32b6f +2020.2:77d5484b5c4daa691a7f78235053fb036829b1e9 diff --git a/conftest.py b/conftest.py index 697ba1dc..47b71fa1 100644 --- a/conftest.py +++ b/conftest.py @@ -23,6 +23,8 @@ def pytest_addoption(parser): + parser.addoption("--batch", action="store", required=False, type=str, + help="batch option for internal simulations", default=0) parser.addoption("--simulator", action="store", required=False, type=str, help="Simulator tool requested for this test", default="vivado") parser.addoption("--examplePath", action="store", required=False, type=str, @@ -34,7 +36,12 @@ def pytest_addoption(parser): def pytest_generate_tests(metafunc): - + + if metafunc.cls.ADD_BATCH: + print("Configuring parameters of {}::{}".format(metafunc.module.__name__, metafunc.function.__name__)) + print("Batch = " + metafunc.config.getoption('batch')) + metafunc.parametrize("batch", [metafunc.config.getoption('batch')]) + if metafunc.cls.ADD_SIMULATOR: print("Configuring parameters of {}::{}".format(metafunc.module.__name__, metafunc.function.__name__)) print("Simulator = " + metafunc.config.getoption('simulator')) diff --git a/developer_resources/DCV.md b/developer_resources/DCV.md new file mode 100644 index 00000000..3d28203f --- /dev/null +++ b/developer_resources/DCV.md @@ -0,0 +1,129 @@ +# GUI FPGA Development Environment with NICE DCV +This guide shows steps to setup a GUI FPGA Development Environment using the FPGA Developer AMI using NICE DCV + +## Overview + +[NICE DCV](https://docs.aws.amazon.com/dcv/latest/adminguide/what-is-dcv.html) can be used create a virtual desktop on your FPGA Developer AMI instance. + +[NICE DCV](https://docs.aws.amazon.com/dcv/latest/adminguide/what-is-dcv.html) is a high-performance remote +display protocol that provides customers with a secure way to deliver remote desktops and application streaming +from any cloud or data center to any device, over varying network conditions. + +With NICE DCV and Amazon EC2, customers can run graphics-intensive applications remotely on EC2 instances +and stream the results to simpler client machines, eliminating the need for expensive dedicated workstations. +Customers across a broad range of HPC workloads use NICE DCV for their remote visualization requirements. +The NICE DCV streaming protocol is also utilized by popular services like Amazon AppStream 2.0 and AWS RoboMaker. + +The [DCV Administrator guide](https://docs.aws.amazon.com/dcv/latest/adminguide/what-is-dcv.html) +and the [User guide](https://docs.aws.amazon.com/dcv/latest/userguide/getting-started.html) +are the official resources on how to configure and use DCV. + +The installation process is summarized below for your convenience. + +**NOTE**: +These steps may change when new versions of the DCV Server and Clients are released. +If you experience issues please refer to the [Official DCV documentation](https://docs.aws.amazon.com/dcv/latest/adminguide/what-is-dcv.html). + +## Installation Process + +1. [Setup your FPGA Developer AMI Instance with an IAM Role](https://docs.aws.amazon.com/dcv/latest/adminguide/setting-up-license.html#setting-up-license-ec2) that grants your instance access to NICE DCV endpoints. + + NICE DCV is available for free to use on EC2. + + The NICE DCV server automatically detects that it is running on an Amazon EC2 instance and periodically connects to an Amazon S3 bucket to determine whether a valid license is available. The IAM role enables this functionality. + + Please follow the steps mentioned in the above guide to attach an IAM role to your instance with the following policy: + ``` + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": "s3:GetObject", + "Resource": "arn:aws:s3:::dcv-license.region/*" + } + ] + } + ``` + **NOTE:** Without access to the DCV bucket mentioned in the [NICE DCV licensing setup guide](https://docs.aws.amazon.com/dcv/latest/adminguide/setting-up-license.html#setting-up-license-ec2), your server license is only valid of 15 days. + +1. On your FPGA Developer AMI Instance [update the Instance Security Group](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-network-security.html#adding-security-group-rule) to allow TCP Port **8443** Ingress + +1. [Install NICE DCV pre-requisites](https://docs.aws.amazon.com/dcv/latest/adminguide/setting-up-installing-linux-prereq.html) + + ``` + sudo yum -y install kernel-devel + sudo yum -y groupinstall "GNOME Desktop" + sudo yum -y install glx-utils + ``` + +1. [Install NICE DCV Server](https://docs.aws.amazon.com/dcv/latest/adminguide/setting-up-installing-linux-server.html) + + ``` + sudo rpm --import https://s3-eu-west-1.amazonaws.com/nice-dcv-publish/NICE-GPG-KEY + wget https://d1uj6qtbmh3dt5.cloudfront.net/2019.0/Servers/nice-dcv-2019.0-7318-el7.tgz + tar xvf nice-dcv-2019.0-7318-el7.tgz + cd nice-dcv-2019.0-7318-el7 + sudo yum -y install nice-dcv-server-2019.0.7318-1.el7.x86_64.rpm + sudo yum -y install nice-xdcv-2019.0.224-1.el7.x86_64.rpm + + sudo systemctl enable dcvserver + sudo systemctl start dcvserver + ``` + +1. Setup Password + + ``` + sudo passwd centos + ``` + +1. Change firewall settings + + Options: + + * Disable firewalld to allow all connections + ``` + sudo systemctl stop firewalld + sudo systemctl disable firewalld + ``` + + * Open up the firewall only for tcp port 8443 + + ``` + sudo systemctl start firewalld + sudo systemctl enable firewalld + sudo firewall-cmd --zone=public --add-port=8443/tcp --permanent + sudo firewall-cmd --reload + ``` + +1. Create a virtual session to connect to + + **NOTE: You will have to create a new session if you restart your instance.** + + ``` + dcv create-session --type virtual --user centos centos + ``` + +1. Connect to the DCV Remote Desktop session + + 1. **Using a web browser** + + * Make sure that you are using a [supported web browser](https://docs.aws.amazon.com/dcv/latest/adminguide/what-is-dcv.html#what-is-dcv-requirements). + + * Use the secure URL, Public IP address, and correct port (8443) to connect. For example: `https://111.222.333.444:8443` + + **NOTE:** When you connect make sure you use the `https` protocol to ensure a secure connection. + + 1. **Using the NICE DCV Client** + + * Download and install the [DCV Client](https://download.nice-dcv.com/) + + * Use the Public IP address, and correct port (8443) to connect + + An example login screen (for the DCV Client you will need to connect first using the IP:Port, for example `111.222.333.444:8443`): + + ![DCV Login](images/dcv_login.png) + +1. Logging in should show you your new GUI Desktop: + + ![DCV Desktop](images/dcv_desktop.png) \ No newline at end of file diff --git a/developer_resources/DCV_with_ParallelCluster.md b/developer_resources/DCV_with_ParallelCluster.md new file mode 100644 index 00000000..e5132719 --- /dev/null +++ b/developer_resources/DCV_with_ParallelCluster.md @@ -0,0 +1,457 @@ + + +# GUI FPGA Development Environment with NICE DCV and ParallelCluster + +Deploy a CloudFormation template to Launch an EC2 instance with the FPGA Developer AMI that has DCV Remote Desktop and ParallelCluster. + +## Table of Contents + + * [Overview](#overview) + * [Requirements](#requirements) + * [Architecture](#architecture) + * [Cost](#cost) + * [Duration](#duration) + * [Step-by-step Guide](#step-by-step-guide) + * [Subscribe to AWS FPGA Developer AMI](#subscribe-to-aws-fpga-developer-ami) + * [Launch with CloudFormation](#launch-with-cloudformation) + * [Connect to the DCV Remote Desktop session](#connect-to-the-dcv-remote-desktop-session) + * [Launch Vivado](#launch-vivado) + * [ParallelCluster Configuration](#pcluster-config) + * [Building a DCP On ParallelCluster Using SGE](#building-a-dcp-on-parallelcluster-using-sge) + * [Building a DCP On ParallelCluster Using Slurm](#building-a-dcp-on-parallelcluster-using-slurm) + * [Building a DCP On ParallelCluster Using Torque](#building-a-dcp-on-parallelcluster-using-torque) + * [FAQ](#faq) + * [References](#references) + + +## Overview + +This tutorial shows how to launch an EC2 instance using the FPGA Developer AMI that has +[NICE DCV](https://docs.aws.amazon.com/dcv/latest/adminguide/what-is-dcv.html) and +[AWS ParallelCluster](https://docs.aws.amazon.com/parallelcluster/latest/ug/what-is-aws-parallelcluster.html) +installed and configured to enable FPGA development in a GUI environment that is high performance +and cost effective. + +[NICE DCV](https://docs.aws.amazon.com/dcv/latest/adminguide/what-is-dcv.html) is a high-performance remote +display protocol that provides customers with a secure way to deliver remote desktops and application streaming +from any cloud or data center to any device, over varying network conditions. + +With NICE DCV and Amazon EC2, customers can run graphics-intensive applications remotely on EC2 instances +and stream the results to simpler client machines, eliminating the need for expensive dedicated workstations. +Customers across a broad range of HPC workloads use NICE DCV for their remote visualization requirements. +The NICE DCV streaming protocol is also utilized by popular services like Amazon AppStream 2.0 and AWS RoboMaker. + +[AWS ParallelCluster](https://docs.aws.amazon.com/parallelcluster/latest/ug/what-is-aws-parallelcluster.html) +provides a scalable compute environment for running compute or resource intensive jobs such as DCP generation or +F1 runtime applications. +ParallelCluster can help manage costs by automatically starting and terminating instances as needed by jobs. + + + +## Requirements +- You will need to subscribe to the [AWS FPGA Developer AMI on the AWS Marketplace](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) +- You will need a VPC that has access to the internet, either using a public subnet or NAT gateway. + - This is required to download all of the packages (for both DCV and OS packages) and to be able to connect to the instances. + - ParallelCluster instances can run in either private or public subnets that have access to the internet. + + +## Architecture + +![Architecture](images/vivado_dcv_diagram.png) + + +## Cost + +There is no additional charge to use NICE DCV or ParallelCluster on Amazon EC2. + +You only pay for the EC2 resources you use to run and store your workloads. + + +## Duration + +The following table shows the estimated time for the different steps in this tutorial. +The time it takes to complete each step will vary based on the instance types the instance types that use. + +| **Step** | **t3-2xlarge Duration** | **c5.4xlarge Duration** | **z1d.xlarge Duration** | **m5.2xlarge Duration** | **r5.xlarge Duration** | +|-------------------------------------------------------------|-------------------------|-------------------------|-------------------------|-------------------------|------------------------| +| [Subscribe to AWS FPGA Developer AMI](#subscribe) | 1 min | 1 min | 1 min | 1 min | 1 min | +| [Launch with CloudFormation](#launch) | 23 min | 18 min | 17 min | 18 min | 20 min | +| [Connect to the DCV Remote Desktop session](#connect) | 1 min | 1 min | 1 min | 1 min | 1 min | +| cl_hello_world DCP on Desktop | 91m40s | 75m44s | 77m9s | 83m42s | 83m20s | + +It will take ~20 minutes for CloudFormation to automatically create your GUI Desktop environment. + + +## Step-by-step Guide + + +### Subscribe to AWS FPGA Developer AMI + +Before you can launch the CloudFormation stack, you will need to subscribe to the AWS FPGA Developer AMI. +There is no charge to subscribe to the AWS FPGA Developer AMI; you will only be charged for the underlying resources. + +* Sign into your AWS account +* Go to the [AWS FPGA Developer AMI on the AWS Marketplace](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) +* Click on **Continue to Subscribe** button on upper right + +![Continue to Subscribe](images/Continue_to_Subscribe.png) + + +### Launch with CloudFormation + +The resources used in this workshop will be launched with AWS CloudFormation. For additional information about CloudFormation please visit +[AWS CloudFormation](https://aws.amazon.com/cloudformation/). + +IMPORTANT: Read through all steps below before *_clicking_* the *Launch on AWS* button. + +1. Click on the *Launch on AWS* button and follow the CloudFormation prompts to begin. + + Currently available in these regions. + + TIP *_Context-click (right-click)_* the *Launch on AWS* button and open the link in a new tab or +window to make it easy to navigate between this guide and the AWS Console. + + | *Region* | *Launch template* | + |----------|-------------------| + | *N. Virginia* (us-east-1) | | + | *Ohio* (us-east-21) | | + | *N. California* (us-west-1) | | + | *Oregon* (us-west-2) | | + | *Ireland* (eu-west-1) | | + | *Sydney* (ap-southeast-2) | | + | *Hong Kong\** (ap-east-1) | | + + \**May require additional request for access* +1. Accept the defaults on the *Prerequisite - Prepare template* page and *_click_* *Next*. +1. You should see the *Stack Details* page: + ![Stack Details](images/stack_details.png) +1. *_Enter_* values for parameters. + + | *Parameter* | *Variable Name* | *Description* + |-------------|-----------------|--------------| + | *VPC ID* | VPCId | VPC ID for where the remote desktop instance should be launched. + | *VPC CIDR Block* | VPCCidrBlock | We use this to create a security group that allows NFS access to and from the remote desktop instance. Pick the CIDR from the VPC ID Parameter above(For eg: `vpc-123abc(10.0.0.0/16)`). + | *FPGA Developer AMI Version* | FpgaDevAmiVersion | Select the FPGA Developer AMI Version you want to launch your instances with. Picks the latest version by default. + | *User name for DCV login* | UserName | User name for DCV remote desktop login, default is *_simuser_* + | *Password for DCV login* | UserPass | Password for DCV remote desktop login. + | *Subnet ID* | Subnet | Select a Subnet ID in the Availability Zone where you want the instance launched. Pick a subnet from the VPC selected above. + | *EC2 Key Name* | EC2KeyName | Name of an existing EC2 KeyPair to enable SSH access to the instance. + | *Remote Desktop Instance Type* | remoteDesktopInstanceType | Select an instance type for your remote desktop. + | *CIDR block for remote access (ports 22 and 8443)* | AccessCidr | Put the IP ranges from where you allow remote access to your remote desktop. This opens up ports 22 and 8443 for the CIDR range. We recommend setting it to the output of [Check My IP](http://checkip.amazonaws.com/). For eg: `12.34.56.78/32` so that only you can access the instance. + | *Project Data Size* | ProjectDataSize | Enter the size in GB for your project_data EBS volume. You can always [increase your volume size later](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/requesting-ebs-volume-modifications.html). Default is 5GB. + | _OPTIONAL_: *Existing Security Group (e.g. sg-abcd1234efgh)* | ExistingSecurityGroup | *OPTIONAL:* Needs to be a SG ID, for example sg-abcd1234efgh. This is an [already existing Security Group ID that is in the same VPC](https://console.aws.amazon.com/vpc/home?#SecurityGroups), this is an addition to the security groups that are automatically created to enable access to the remote desktop, leave as NO_VALUE if you choose not use this. + | _OPTIONAL_: *Static Private IP Address* | StaticPrivateIpAddress | *OPTIONAL:* If you already have a private VPC address range, you can specify the private IP address to use, leave as *NO_VALUE* if you choose not use this + | *Assign a Public IP address* | UsePublicIp | Should a public IP address be given to the instance, this is overridden by `*CreateElasticIP = True*` + | *Create an Elastic IP address* | CreateElasticIP | Should an Elastic IP address be created and assigned, this allows for persistent IP address assignment + | _OPTIONAL_: *S3 bucket for read access* | S3BucketName | *OPTIONAL:* S3 bucket to allow this instance read access (List and Get), leave as *NO_VALUE* if you choose not use this + | _OPTIONAL_: *ParallelCluster Scheduler* | Scheduler | *OPTIONAL:* Select a scheduler to setup with ParallelCluster. Only necessary if you want to deploy a compute cluster. + | _OPTIONAL_: *ParallelCluster Subnet ID* | PclusterSubnet | *OPTIONAL:* Select a Subnet ID in the Availability Zone where you want the cluster instances launched. Pick a subnet from the VPC selected above. + | _OPTIONAL_: *Scheduler instance type* | MasterInstanceType | *OPTIONAL:* Select an instance type you want the scheduler master to run on. This can be a small/free tier instance. + | _OPTIONAL_: *DCP Build instance type* | DcpInstanceType | *OPTIONAL:* Select an instance type for building DCP's. z1d.xlarge, c5.4xlarge, m5.2xlarge, r5.xlarge, t3.2xlarge, t2.2xlarge are recommended. + | _OPTIONAL_: *F1 instance type* | F1InstanceType | *OPTIONAL:* Select a runtime instance type for your Runtime queue. + +1. After you have entered values for the parameters, *_click_* *Next*. + +1. *_Accept_* the default values of the *Configure stack options* and *Advanced options* sections and *_click_* *Next*. + +1. *_Review_* the CloudFormation stack settings. + +1. *_Click_* all checkboxes in the blue *Capabilities* box at the bottom of the page. + ![Capabilities](images/capabilities_checkbox.png) + +1. *_Click_* *Create stack*. + + This will start the deployment process. + AWS CloudFormation will create all of the resources specified in the template and set them up. + +1. Verify stack was created successfully + + In the *Events* tab, you should see `*CREATE_COMPLETE*` for the `AWS::CloudFormation::Stack` event Type. + In the *Stack Info* tab, you should see `*CREATE_COMPLETE*` in the Status field. + It will take ~20 minutes for the stack creation to complete. This is due to the large number of packages that need to be installed. Upon completion you should see the connection information (IP address) in the *Outputs* section of the stack. + + +### Connect to the DCV Remote Desktop session + +You can either use your web browser to connect to the DCV Remote Desktop session or you can use the DCV Client. + +1. **Using a web browser** + i. Make sure that you are using a [supported web browser](https://docs.aws.amazon.com/dcv/latest/adminguide/what-is-dcv.html#what-is-dcv-requirements). + i. Use the secure URL, Public IP address, and correct port (8443) to connect + + When you connect make sure you use the https protocol, to ensure you are using a connecting connection. + + For example: `https://111.222.333.444:8443` + +1. **Use the NICE DCV Client** + + * Download and install the [DCV Client](https://download.nice-dcv.com/) + + * Use the Public IP address, and correct port (8443) to connect + + For example: `111.222.333.444:8443` + + An example login screen (for the DCV Client you will need to connect first using the IP:Port, for example 111.222.333.444:8443): + + ![DCV Login](images/dcv_login.png) + + * After you login with the credentials you specified when creating the stack, you will see the Desktop: + + ![DCV Desktop](images/dcv_desktop.png) + + + +### Launch Vivado + +Now that your remote desktop is setup, you can launch the Vivado Design Suite (included in the AWS FPGA Developer AMI). + + i. Start a terminal session, go to *_Applications -> Favorites -> Terminal_*. + + i. Type `vivado` at the command prompt and hit enter: + + ![Vivado Launch](images/vivado_launch.png) + + Vivado will launch in a GUI session: + + ![Vivado Startup](images/vivado_startup.png) + + + +### ParallelCluster Configuration + +The template creates a ParallelCluster configuration and an AMI for the cluster instances. +If you selected a scheduler, then it will also create two ParallelCluster clusters. +If you didn't select a scheduler in the template you can still manually start a cluster. + +The configuration file for ParallelCluster is found in `~/.parallelcluster/config` and the +configuration parameters are documented [here](https://docs.aws.amazon.com/parallelcluster/latest/ug/configuration.html). +It supports the following schedulers: +* sge +* slurm +* torque + +The template creates a custom ParallelCluster AMI based on the FPGA Developer AMI so that they have +the Xilinx tools installed. +They also mount `~/src/project_data` from your DCV instance so that your project data is accessible +on the ParallelCluster compute nodes. + +If you selected a scheduler then the template will create two ParallelCluster clusters, where +${Scheduler} is the scheduler you selected when you launched the template. + +* The fpgadev-${Scheduler} cluster is for running compute intense jobs such as DCP generation. +* The fpgarun-${Scheduler} cluster is for testing your AFI on F1 instances. + +If you didn't select a scheduler then you can start the clusters manually using the following commands +replacing ${Scheduler} with the scheduler you want to use. + +``` +pcluster create -t fpgadev-${Scheduler} fpgadev-${Scheduler} +pcluster create -t fpgarun-${Scheduler} fpgarun-${Scheduler} +``` + +All the clusters are configured to terminate the compute nodes if they are idle for more than one minute. +When jobs are queued the cluster will automatically launch enough compute nodes to run the jobs. +The configuration file limits the max number of compute nodes in the cluster to two nodes. +You can modify the `max_queue_size` parameter in the configuration file if you need to increase that limit. + +You can check the status of the clusters using the `pcluster list` command. + +``` +$ pcluster list +fpgadev-sge CREATE_IN_PROGRESS 2.4.1 +fpgarun-sge CREATE_IN_PROGRESS 2.4.1 +``` + +If no clusters are listed then it is possible that the custom AMI isn't complete. +You can check the status of the custom AMI generation by looking in the log file +at `~/.parallelcluster/create-ami.log`. + +Wait until the cluster status is *CREATE_COMPLETE*. + +``` +$ pcluster list +fpgadev-sge CREATE_COMPLETE 2.4.1 +fpgarun-sge CREATE_COMPLETE 2.4.1 +``` + +You can get information about the cluster by running the `pcluster status` command. + +``` +$ pcluster status fpgadev-sge +Status: CREATE_COMPLETE +MasterServer: RUNNING +MasterPublicIP: 3.95.42.219 +ClusterUser: centos +MasterPrivateIP: 172.31.15.131 +``` + +**NOTE:** All of the scheduler commands have to be executed on the scheduler's master instance. +You will use ssh to connect to the instance. + +The CloudFormation template created an EC2 KeyPair, saved the private key at `~/pcluster.pem`, and added it to your ssh agent. +ParallelCluster is configured to use this key. +You can connect to the master using the following command. + +``` +pcluster ssh fpgadev-sge +``` + +Any additional arguments are passed to the ssh command. +This allows you to run commands on the master from your desktop. +For example you can check that your project data is mounted +in the cluster. + +``` +$ pcluster ssh fpgadev-sge ls ~/src/project_data +aws-fpga +build_cl_hello_world.sh +``` + +Note that the master in this tutorial is configured as a t3.micro instance so it lacks the +compute resources required for running jobs. +It's role is to manage jobs running in the cluster. + +The following sections show how to run a the cl_hello_world example's DCP generation job +on ParallelCluster using the different schedulers. +The script to do the DCP generation is at `~/src/project_data/build_cl_hello_world.sh`. + + + +### Building a DCP On ParallelCluster Using SGE + +Use the `qsub` command to submit the job on an SGE cluster. + +``` +$ pcluster ssh fpgadev-sge qsub ~/src/project_data/build_cl_hello_world.sh +Unable to run job: warning: ${UserName}'s job is not allowed to run in any queue +Your job 1 ("build_cl_hello_world.sh") has been submitted +Exiting. +``` + +The warning is because a compute node isn't available to run the job. +You can verify that the job was actually submitted using the `qstat` command. + +``` +$ qstat +job-ID prior name user state submit/start at queue slots ja-task-ID +----------------------------------------------------------------------------------------------------------------- + 1 0.55500 build_cl_h ${UserName} qw 09/17/2019 18:06:38 1 +``` + +ParallelCluster will detect that the job is queued and start a new compute node to run it. +You can verify this by going to the EC2 console. + +When the compute node starts, the job will transition to running state. + +``` +$ pcluster ssh fpgadev-sge qstat +job-ID prior name user state submit/start at queue slots ja-task-ID +----------------------------------------------------------------------------------------------------------------- + 1 0.55500 build_cl_h ${UserName} r 09/17/2019 18:38:15 all.q@ip-172-31-12-135.ec2.int 1 +``` + +The output of the job is written to your home directory on the master. + +``` +$ pcluster ssh fpgadev-sge ls +build_cl_hello_world.sh.e1 +build_cl_hello_world.sh.o1 +src +``` + + + +### Building a DCP On ParallelCluster Using Slurm + +The process for using Slurm is similar, except the scheduler commands are different. +Use the `sbatch` command to submit a job. + +``` +$ pcluster ssh fpgadev-slurm sbatch src/project_data/build_cl_hello_world.sh +Submitted batch job 1 +``` + +Use the `squeue` command to check the status. + +``` +$ pcluster ssh fpgadev-slurm squeue + JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1 compute build_cl cartalla R 0:06 1 ip-172-31-13-182 +``` + + + +### Building a DCP On ParallelCluster Using Torque + +The process for using Torque is the same as sge except the output is different. +Use the `qsub` command to submit a job. + +``` +$ pcluster ssh fpgadev-torque qsub src/project_data/build_cl_hello_world.sh +1.ip-172-31-5-142.ec2.internal +``` + +Use the `qstat` command to check the status. + +``` +$ pcluster ssh fpgadev-torque qstat +Job ID Name User Time Use S Queue +------------------------- ---------------- --------------- -------- - ----- +1.ip-172-31-5-142.ec2.interna ...ello_world.sh cartalla 0 Q batch +``` + + + +## FAQ + +* How do I find out if my template deployment completed? + + * In the *Events* tab, you should see `*CREATE_COMPLETE*` for the `AWS::CloudFormation::Stack` event Type. + * In the *Stack Info* tab, you should see `*CREATE_COMPLETE*` in the Status field. + +* How do I update to a new template? + + You can update your deployed stack by going to the [CloudFormation console](https://console.aws.amazon.com/cloudformation) -> Stacks -> Your Stack and selecting the `Update` button at the top. + + You have three ways of updating: + + 1. Use current template + + This option lets you update parameters in the currently deployed stack. + Click next after selecting this option to see the parameters, change them and go through the deployment steps as before. + + 1. Replace current template + + This option lets you select an updated template. + + If you want to update your stack with a new template that we have released, select this option and point to our template URL: + `https://aws-fpga-hdk-resources.s3.amazonaws.com/developer_resources/cfn_templates/dcv_with_pcluster.yaml` + + This will let you get any fixes and updates that we publish to the template. + + 1. Modify the template in the CloudFormation Designer + + This option lets you graphically edit the template and add parts depending on your need. + Check the [Official CloudFormation Designer documentation](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/working-with-templates-cfn-designer.html) for more details on how to get started! + +* How do I terminate my instance? + + To clean up resources created by a CloudFormation stack, we strongly suggest deleting the stack instead of deleting resources individually. + + CloudFormation will handle the instance termination for you. + + To delete a stack, please follow the [CloudFormation User Guide](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/cfn-console-delete-stack.html) + +* How do I troubleshoot CloudFormation stack deployment issues? + + To start off, please check the [CloudFormation Troubleshooting Guide](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/troubleshooting.html) + + Next, post a question on the [FPGA Development forum](https://forums.aws.amazon.com/forum.jspa?forumID=243&start=0) **OR** file a support ticket from the [Support Center](https://console.aws.amazon.com/support) and someone should be able to help you out! + + + +## References + +- [NICE DCV](https://docs.aws.amazon.com/dcv/latest/adminguide/what-is-dcv.html) +- [Xilinx Vivado Design Suite](https://www.xilinx.com/products/design-tools/vivado.html) +- [AWS ParallelCluster](https://docs.aws.amazon.com/parallelcluster/latest/ug/what-is-aws-parallelcluster.html) diff --git a/developer_resources/README.md b/developer_resources/README.md new file mode 100644 index 00000000..91f1165e --- /dev/null +++ b/developer_resources/README.md @@ -0,0 +1,15 @@ +# Developer Resources + +We provide the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) with Xilinx tools pre-installed and setup for development right away. + +However, setting up an instance for development might require a few extra steps like +* Setting up a GUI Desktop. +* Setting up a Runtime instance. +* Setting up a development environment that includes an elastic compute cluster. + +This document guides developers to setting up their development environment for development, +building/running/debugging designs and managing costs while doing so. + +We have provided the following two ways of setting up your development environment: +* [Guide for creating a GUI Desktop for FPGA Development using NICE DCV and the FPGA Developer AMI](DCV.md) +* [Automated Deployment of a GUI Desktop and HPC cluster for FPGA Development based on NICE DCV and AWS ParallelCluster](DCV_with_ParallelCluster.md) \ No newline at end of file diff --git a/developer_resources/images/Continue_to_Subscribe.png b/developer_resources/images/Continue_to_Subscribe.png new file mode 100644 index 00000000..dd13b148 Binary files /dev/null and b/developer_resources/images/Continue_to_Subscribe.png differ diff --git a/developer_resources/images/Launch_on_AWS.png b/developer_resources/images/Launch_on_AWS.png new file mode 100644 index 00000000..89fc87e0 Binary files /dev/null and b/developer_resources/images/Launch_on_AWS.png differ diff --git a/developer_resources/images/capabilities_checkbox.png b/developer_resources/images/capabilities_checkbox.png new file mode 100644 index 00000000..bb536a1f Binary files /dev/null and b/developer_resources/images/capabilities_checkbox.png differ diff --git a/developer_resources/images/dcv_desktop.png b/developer_resources/images/dcv_desktop.png new file mode 100644 index 00000000..3c62f20b Binary files /dev/null and b/developer_resources/images/dcv_desktop.png differ diff --git a/developer_resources/images/dcv_login.png b/developer_resources/images/dcv_login.png new file mode 100644 index 00000000..9ca3208f Binary files /dev/null and b/developer_resources/images/dcv_login.png differ diff --git a/developer_resources/images/stack_details.png b/developer_resources/images/stack_details.png new file mode 100644 index 00000000..cb6f94d8 Binary files /dev/null and b/developer_resources/images/stack_details.png differ diff --git a/developer_resources/images/vivado_dcv_diagram.png b/developer_resources/images/vivado_dcv_diagram.png new file mode 100644 index 00000000..e29b28e2 Binary files /dev/null and b/developer_resources/images/vivado_dcv_diagram.png differ diff --git a/developer_resources/images/vivado_launch.png b/developer_resources/images/vivado_launch.png new file mode 100644 index 00000000..43166db2 Binary files /dev/null and b/developer_resources/images/vivado_launch.png differ diff --git a/developer_resources/images/vivado_startup.png b/developer_resources/images/vivado_startup.png new file mode 100644 index 00000000..848bd687 Binary files /dev/null and b/developer_resources/images/vivado_startup.png differ diff --git a/docs/examples/example_list.md b/docs/examples/example_list.md new file mode 100644 index 00000000..5cff1e6d --- /dev/null +++ b/docs/examples/example_list.md @@ -0,0 +1,28 @@ +## Example Applications List + +| Accelerator Application | Example | Development Environment | Description | +| --------|---------|---------|-------| +| Custom hardware | [cl\_hello\_world](../../hdk/cl/examples/cl_hello_world) | HDK - RTL (Verilog) | Simple [getting started example](../../hdk/README.md) with minimal hardware | +| Custom hardware | [cl\_dram\_dma](../../hdk/cl/examples/cl_dram_dma) | HDK - RTL (Verilog) | Demonstrates CL connectivity to the F1 shell and connectivity to/from all DDRs | +| Custom hardware | [IP integration example using a GUI - cl\_dram\_dma\_hlx](../../hdk/cl/examples/cl_dram_dma_hlx) | HLx - Verilog | Demonstrates CL connectivity to the F1 shell and connectivity to/from DRAM using the Vivado IP Integrator GUI | +| Custom hardware | [Virtual Ethernet Application](../../sdk/apps/virtual-ethernet) | [Streaming Data Engine](../../hdk/cl/examples/cl_sde) | The Virtual Ethernet framework facilitates streaming Ethernet frames from a network interface (or any source) into the FPGA for processing and back out to some destination. Possible use cases for this include deep packet inspection, software defined networking, stream encryption or compression, and more. | +| Custom hardware | [Pipelined Workload Applications - cl\_dram\_dma\_data\_retention](../../hdk/docs/data_retention.md)| [HDK](../../hdk/cl/examples/cl_dram_dma/software/runtime/test_dram_dma_retention.c) [SDAccel](../../SDAccel/examples/aws/data_retention) | Demonstrates how to preserve data in DRAMs while swapping out accelerators. Applications that use a temporal accelerator pipeline can take advantage of this feature to reduce latency between FPGA image swaps | +| High Level Synthesis | [Digital Up-Converter - cl\_hls\_dds\_hlx](../../hdk/cl/examples/cl_hls_dds_hlx) | HLx - C-to-RTL | Demonstrates an example application written in C that is synthesized to RTL (Verilog) | +| Custom Hardware with Software Defined Acceleration | [RTL Kernels](https://github.com/Xilinx/Vitis_Accel_Examples/tree/master/rtl_kernels) | Vitis - RTL (Verilog) + C/C++/OpenCL | These examples demonstrate developing new hardware designs (RTL) in a Software Defined workflow| +| Vitis Compression Libraries | [File Compression using GZip](https://github.com/Xilinx/Vitis_Accel_Examples/tree/master/library_examples/gzip_app) | Vitis - C/C++/OpenCL | This example demonstrates how to use Vitis Libraries to speed up GZIP compression on an FPGA | +| Vitis BLAS libraries | [Matrix Transposer using BLAS](https://github.com/Xilinx/Vitis_Accel_Examples/tree/master/library_examples/transp) | Vitis - C/C++/OpenCL | This example shows how to use Vitis BLAS Libraries to create a Matrix Transposer on an FPGA | +| Vitis Financial libraries | [Monte Carlo European Engine](https://github.com/Xilinx/Vitis_Accel_Examples/tree/master/library_examples/MCEuropeanEngine) | Vitis - C/C++/OpenCL | This example shows how to use Vitis Financial Libraries to accelerate MCEuropean Engine on an FPGA| + +## Application Notes + +App Note | Description | +|---------|---------| +| [Using PCIe Peer-2-Peer connectivity](https://github.com/awslabs/aws-fpga-app-notes/tree/master/Using-PCIe-Peer2Peer) | This app note shows how to use PCIe P2P connectivity on F1.16XL instances | +| [Using PCIM Port](https://github.com/awslabs/aws-fpga-app-notes/tree/master/Using-PCIM-Port) | This app note shows how to use the PCIM AXI port to transfer data between card and host memory | +| [Using PCIe User Interrupts](https://github.com/awslabs/aws-fpga-app-notes/tree/master/Using-PCIe-Interrupts) | This app note describes the basic kernel calls needed for a developer to write a custom interrupt service routine (ISR) and provides an example that demonstrates those calls | +| [Using PCIe Write Combining](https://github.com/awslabs/aws-fpga-app-notes/tree/master/Using-PCIe-Write-Combining) | This app note describes when to use write combining and how to take advantage of write combining in software for a F1 accelerator | + +## Workshops + +* [ReInvent:19 Workshop](https://github.com/awslabs/aws-fpga-app-notes/tree/master/reInvent19_Developer_Workshop) +* [ReInvent:18 Workshop](https://github.com/awslabs/aws-fpga-app-notes/tree/master/reInvent18_Developer_Workshop) diff --git a/hdk/docs/on_premise_licensing_help.md b/docs/on_premise_licensing_help.md similarity index 64% rename from hdk/docs/on_premise_licensing_help.md rename to docs/on_premise_licensing_help.md index 06059c9b..8b8f2af6 100644 --- a/hdk/docs/on_premise_licensing_help.md +++ b/docs/on_premise_licensing_help.md @@ -1,31 +1,47 @@ - # Enabling on-premises development with Xilinx tools **NOTE: If you are developing on the AWS cloud and using AWS FPGA Developer AMI provided on AWS Marketplace, you can skip this document.** This document helps developers who choose to develop on-premises with specifying and licensing AWS-compatible Xilinx tools for use with the AWS FPGA HDK. +## Requirements for AWS HDK 1.4.18+ (2020.2) + * Xilinx Vivado or Vitis v2020.2 + * License: EF-VIVADO-SDX-VU9P-OP + * URL: https://www.xilinx.com/member/forms/download/xef.html?filename=Xilinx_Unified_2020.2_1118_1232.tar.gz + * MD5 SUM Value: 523e8596f114ab5e389c14df50ecb1d8 +## Requirements for AWS HDK 1.4.16+ (2020.1) + * Xilinx Vivado or Vitis v2020.1 + * License: EF-VIVADO-SDX-VU9P-OP + * URL: https://www.xilinx.com/member/forms/download/xef.html?filename=Xilinx_Unified_2020.1_0602_1208.tar.gz + * MD5 SUM Value: b018f7b331ab0446137756156ff944d9 + +## Requirements for AWS HDK 1.4.13+ (2019.2) + * Xilinx Vivado or Vitis v2019.2 + * License: EF-VIVADO-SDX-VU9P-OP + * URL: https://www.xilinx.com/member/forms/download/xef-vitis.html?filename=Xilinx_Vitis_2019.2_1106_2127.tar.gz + * MD5 SUM Value: d63bae9cad9bcaa4b2c7f6df9480eaa6 + +## Requirements for AWS HDK 1.4.11+ (2019.1) + * Xilinx Vivado v2019.1 or v2019.1.op (64-bit) + * License: EF-VIVADO-SDX-VU9P-OP + * URL: https://www.xilinx.com/member/forms/download/xef.html?filename=Xilinx_SDAccel_2019.1_0524_1430_Lin64.bin + * MD5 SUM Value: aa20eba36ebe480ec7ae59a4a8c85896 + ## Requirements for AWS HDK 1.4.8+ (2018.3) * Xilinx Vivado v2018.3 or v2018.3.op (64-bit) * License: EF-VIVADO-SDX-VU9P-OP - * SW Build 2405991 on Thu Dec 6 23:36:41 MST 2018 - * IP Build 2404404 on Fri Dec 7 01:43:56 MST 2018 * URL: https://www.xilinx.com/member/forms/download/xef.html?filename=Xilinx_SDx_op_Lin_2018.3_1207_2324_Lin64.bin&akdm=0 * MD5 SUM Value: aa20eba36ebe480ec7ae59a4a8c85896 ## Requirements for AWS HDK 1.4.4+ (2018.2) * Xilinx Vivado v2018.2 or v2018.2.op (64-bit) * License: EF-VIVADO-SDX-VU9P-OP - * SW Build 2258646 on Thu Jun 14 20:02:38 MDT 2018 - * IP Build 2256618 on Thu Jun 14 22:10:49 MDT 2018 * URL: https://www.xilinx.com/member/forms/download/xef.html?filename=Xilinx_SDx_op_Lin_2018.2_0614_1954_Lin64.bin&akdm=0 * MD5 SUM Value: 6b6939e70d4fa90677d2c54a37ec25c7 ## Requirements for AWS HDK 1.3.7+ (2017.4) * Xilinx Vivado v2017.4.op (64-bit) * License: EF-VIVADO-SDX-VU9P-OP - * SW Build 2193838 on Tue Apr 10 18:06:59 MDT 2018 - * IP Build 2189296 on Tue Apr 10 19:39:46 MDT 2018 * URL: https://www.xilinx.com/member/forms/download/xef.html?filename=Xilinx_SDx_op_2017.4_0411_1_Lin64.bin&akdm=0 * MD5 SUM Value: e0b59c86d5ddee601ab17a069d231207 diff --git a/hdk/README.md b/hdk/README.md index bd6a9686..6e3e3954 100644 --- a/hdk/README.md +++ b/hdk/README.md @@ -27,9 +27,9 @@ * Familiarity with concepts related to designing for FPGAs, DMA, DDR, AXI protocol and linux drivers * RTL simulation * Experience with simulation debug or FPGA runtime waveform viewer debug methods -* Developers not familiar with these areas should start with [software defined acceleration](../SDAccel/README.md) -* Developers with existing RTL IP that are not familiar with the areas listed above should start with RTL Kernel development using [software defined acceleration](../SDAccel/README.md). -* Developers looking for a faster HDK development path, should start with RTL Kernel development using [software defined acceleration](../SDAccel/README.md) +* Developers not familiar with these areas should start with [software defined acceleration](../Vitis/README.md) +* Developers with existing RTL IP that are not familiar with the areas listed above should start with RTL Kernel development using [software defined acceleration](../Vitis/README.md). +* Developers looking for a faster HDK development path, should start with RTL Kernel development using [software defined acceleration](../Vitis/README.md) * The [documents directory](./docs) provides the specification for the AWS Shell (SH) to Custom Logic (CL) interface: * [Shell Interface](./docs/AWS_Shell_Interface_Specification.md) @@ -44,7 +44,8 @@ * Developers should not need to change any file under the `/common` directory * `shell_stable` directory contains the files needed by developers to build a CL using a current production shell. -* The [Custom Logic (cl) directory](./cl) is where the Custom Logic is expected to be developed (For RTL-based development using Verilog or VHDL). It includes a number of examples under the [examples directory](./cl/examples), as well as a placeholder for the developer's own Custom Logic under [developer_designs directory](./cl/developer_designs). For more details on the examples, see the [examples table](./cl/examples/cl_examples_list.md). +* The [Custom Logic (cl) directory](./cl) is where the Custom Logic is expected to be developed (For RTL-based development using Verilog or VHDL). It includes a number of examples under the [examples directory](./cl/examples), as well as a placeholder for the developer's own Custom Logic under [developer_designs directory](./cl/developer_designs). +For more details on the examples, see the [examples table](./cl/examples/cl_examples_list.md). ## Getting Started @@ -52,19 +53,23 @@ #### AWS Account, F1/EC2 Instances, On-Premises, AWS IAM Permissions, AWS CLI and S3 Setup (One-time Setup) * [Setup an AWS Account](https://aws.amazon.com/free/) -* Launch an instance using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) which comes pre-installed with Vivado and required licenses. Given the large size of the FPGA used inside the AWS FPGA instances, the implementation tools require 32GiB Memory (ex: c4.4xlarge, m4.2xlarge, r4.xlarge, t2.2xlarge). c4.4xlarge and c4.8xlarge would provide the fastest execution time with 30 and 60GiB of memory respectively. Developers who want to save on cost, would start coding and run simulations on low-cost instances, like t2.2xlarge, and move to the aforementioned larger instances to run the synthesis of their acceleration code. Follow the [On-Premises Instructions](docs/on_premise_licensing_help.md) to purchase and install a license from Xilinx. - * This release supports Xilinx SDx 2017.4 only. The compatibility table describes the mapping of developer kit version to [FPGA developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) version: +* Launch an instance using the [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) which comes pre-installed with Vivado and required licenses. Given the large size of the FPGA used inside the AWS FPGA instances, the implementation tools require 32GiB Memory (ex: c4.4xlarge, m4.2xlarge, r4.xlarge, t2.2xlarge). c4.4xlarge and c4.8xlarge would provide the fastest execution time with 30 and 60GiB of memory respectively. Developers who want to save on cost, would start coding and run simulations on low-cost instances, like t2.2xlarge, and move to the aforementioned larger instances to run the synthesis of their acceleration code. Follow the [On-Premises Instructions](../docs/on_premise_licensing_help.md) to purchase and install a license from Xilinx. +* The compatibility table describes the mapping of developer kit version to [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) version: -| Developer Kit Version | Tool Version Supported | Compatible FPGA developer AMI Version | +| Developer Kit Version | Tool Version Supported | Compatible FPGA Developer AMI Version | |-----------|-----------|------| | 1.3.0-1.3.6 | 2017.1(Deprecated) | v1.3.5(Deprecated) | | 1.3.7-1.3.X | 2017.1(Deprecated) | v1.3.5-v1.3.X(Deprecated) | -| 1.3.7-1.3.X | 2017.4 | v1.4.0-v1.4.X (Xilinx SDx 2017.4) | -| 1.4.0-1.4.X | 2017.4 | v1.4.0-v1.4.X (Xilinx SDx 2017.4) | -| 1.4.3-1.4.X | 2018.2 | v1.5.0 (Xilinx SDx 2018.2) | -| 1.4.8-1.4.X | 2018.3 | v1.6.0 (Xilinx SDx 2018.3) | +| 1.3.7-1.4.15a | 2017.4 | v1.4.0-v1.4.X (Xilinx Vivado 2017.4) | +| 1.4.3-1.4.15a | 2018.2 | v1.5.0 (Xilinx Vivado 2018.2) | +| 1.4.8-1.4.15a | 2018.3 | v1.6.0 (Xilinx Vivado 2018.3) | +| 1.4.11-1.4.x | 2019.1 | v1.7.0 (Xilinx Vivado 2019.1) | +| 1.4.11-1.4.x | 2019.2 | v1.8.x (Xilinx Vivado 2019.2) | +| 1.4.16-1.4.x | 2020.1 | v1.9.x (Xilinx Vivado 2020.1) | +| 1.4.18-1.4.x | 2020.2 | v1.10.x (Xilinx Vivado 2020.2) | -* FPGA developer kit version is listed in [hdk_version.txt](./hdk_version.txt) + +* The FPGA Developer Kit version is listed in [hdk_version.txt](./hdk_version.txt) * FPGA developer kit supported tool versions are listed in [supported\_vivado\_versions](../supported_vivado_versions.txt) @@ -72,23 +77,31 @@ * [Setup AWS CLI and S3 Bucket](../SDAccel/docs/Setup_AWS_CLI_and_S3_Bucket.md) to enable AFI creation. * To install the AWS CLI, please follow the [AWS CLI Installation guide](http://docs.aws.amazon.com/cli/latest/userguide/installing.html). - + ``` $ aws configure # to set your credentials (found in your console.aws.amazon.com page) and default region + ``` Use the aws-cli [region](http://docs.aws.amazon.com/cli/latest/userguide/cli-command-line.html) command line argument to override the profile default region. Supported regions include: us-east-1, us-west-2, eu-west-1 and us-gov-west-1. #### Install the HDK and setup environment -The AWS FPGA HDK can be cloned to your EC2 instance or server by executing: +The AWS FPGA HDK can be cloned to your instance by executing: -When using the developer AMI: ```AWS_FPGA_REPO_DIR=/home/centos/src/project_data/aws-fpga``` +> When using the FPGA Developer AMI, add: +> `AWS_FPGA_REPO_DIR=/home/centos/src/project_data/aws-fpga` +```bash $ git clone https://github.com/aws/aws-fpga.git $AWS_FPGA_REPO_DIR $ cd $AWS_FPGA_REPO_DIR $ source hdk_setup.sh +``` + +Sourcing `hdk_setup.sh` does the following: +* It sets required environment variables that are used throughout the examples in the HDK. +* Downloads DDR simulation models and DCP(s) from S3. -Note that sourcing `hdk_setup.sh` will set required environment variables that are used throughout the examples in the HDK. DDR simulation models and DCP(s) are downloaded from S3 during hdk setup. New terminal or xterm requires `hdk_setup.sh` to be rerun. +New terminals or xterm require `hdk_setup.sh` to be rerun so that the correct environment variables are set. #### Review examples @@ -101,15 +114,18 @@ The [Examples readme](./cl/examples/cl_examples_list.md) provides an overview of #### Fast path to running CL Examples on FPGA Instance -For developers that want to skip the development flow and start running the examples on the FPGA instance. You can skip steps 1 through 3 if you are not interested in the development process. Step 4 through 6 will show you how to use one of the predesigned AFI examples. By using the public AFIs, developers can skip the build flow steps and jump to step 4. [Public AFIs are available for each example and can be found in the example/README](cl/examples/cl_hello_world/README.md#metadata). +For developers that want to skip the development flow and start running the examples on the FPGA instance. You can skip steps 1 through 3 if you are not interested in the development process. Step 4 through 6 will show you how to use one of the pre-designed AFI examples. +By using the public AFIs, developers can skip the build flow steps and jump to step 4. [Public AFIs are available for each example and can be found in the example/README](cl/examples/cl_hello_world/README.md#metadata). #### Step 1. Pick one of the examples and start in the example directory It is recommended that you complete this step-by-step guide using HDK hello world example. Next use this same guide to develop using the [cl\_dram\_dma](cl/examples/cl_dram_dma). When your ready, copy one of the examples provided and modify the design files, scripts and constraints directory. +``` $ cd $HDK_DIR/cl/examples/cl_hello_world # you can change cl_hello_world to cl_dram_dma, cl_uram_example or cl_hello_world_vhdl $ export CL_DIR=$(pwd) +``` Setting up the CL_DIR environment variable is crucial as the build scripts rely on that value. Each example follows the recommended directory structure to match the expected structure for HDK simulation and build scripts. @@ -120,20 +136,25 @@ Each example follows the recommended directory structure to match the expected s This [checklist](./cl/CHECKLIST_BEFORE_BUILDING_CL.md) should be consulted before you start the build process. **NOTE** *This step requires you to have Xilinx Vivado Tools and Licenses installed* - +``` $ vivado -mode batch # Verify Vivado is installed. +``` Executing the `aws_build_dcp_from_cl.sh` script will perform the entire implementation process converting the CL design into a completed Design Checkpoint that meets timing and placement constrains of the target FPGA. The output is a tarball file comprising the DCP file, and other log/manifest files, formatted as `YY_MM_DD-hhmm.Developer_CL.tar`. This file would be submitted to AWS to create an AFI. By default the build script will use Clock Group A Recipe A0 which uses a main clock of 125 MHz. +``` $ cd $CL_DIR/build/scripts $ ./aws_build_dcp_from_cl.sh +``` In order to use a 250 MHz main clock the developer can specify the A1 Clock Group A Recipe as in the following example: +``` $ cd $CL_DIR/build/scripts $ ./aws_build_dcp_from_cl.sh -clock_recipe_a A1 +``` Other clock recipes can be specified as well. More details on the [Clock Group Recipes Table](./docs/clock_recipes.csv) and how to specify different recipes can be found in the following [README](./common/shell_v04261818/new_cl_template/build/README.md). @@ -144,6 +165,7 @@ To be notified via e-mail when the build completes: 1. Set up notification via SNS: ``` + $ pip install --user --upgrade boto3 # boto3 package is required by the notify_via_sns script $ export EMAIL=your.email@example.com $ $AWS_FPGA_REPO_DIR/shared/bin/scripts/notify_via_sns.py @@ -294,7 +316,7 @@ If fpga-describe-local-image API call returns a status 'Busy', the FPGA is still Now, let us try loading your AFI to FPGA `slot 0`: ``` - $ sudo fpga-load-local-image -S 0 -I agfi-0f0e045f919413242 + $ sudo fpga-load-local-image -S 0 -I agfi-0fcf87119b8e97bf3 ``` @@ -303,11 +325,10 @@ Now, let us try loading your AFI to FPGA `slot 0`: Now, you can verify that the AFI was loaded properly. The output shows the FPGA in the “loaded” state after the FPGA image "load" operation. The "-R" option performs a PCI device remove and recan in order to expose the unique AFI Vendor and Device Id. ``` $ sudo fpga-describe-local-image -S 0 -R -H - Type FpgaImageSlot FpgaImageId StatusName StatusCode ErrorName ErrorCode ShVersion - AFI 0 agfi-0f0e045f919413242 loaded 0 ok 0 + AFI 0 agfi-0fcf87119b8e97bf3 loaded 0 ok 0 0x04261818 Type FpgaImageSlot VendorId DeviceId DBDF - AFIDEVICE 0 0x6789 0x1d50 0000:00:0f.0 + AFIDEVICE 0 0x1d0f 0xf000 0000:00:1d.0 ``` @@ -336,7 +357,7 @@ Follow the [RTL simulation environment setup](./docs/RTL_Simulating_CL_Designs.m * Before starting your new design review the specification for the AWS Shell (SH) to Custom Logic (CL) [interface](./docs/AWS_Shell_Interface_Specification.md). * Try the [debug flow](docs/Virtual_JTAG_XVC.md) and understand the [shell timeout behavior](docs/HOWTO_detect_shell_timeout.md). -* When your ready, copy an example to [start your own CL design](./cl/developer_designs/Starting_Your_Own_CL.md) and make a simple modification to get familiar with customizing the hardware developer kit for your development needs. +* When you are ready, copy an example to [start your own CL design](./cl/developer_designs/Starting_Your_Own_CL.md) and make a simple modification to get familiar with customizing the hardware developer kit for your development needs. diff --git a/hdk/cl/CHECKLIST_BEFORE_BUILDING_CL.md b/hdk/cl/CHECKLIST_BEFORE_BUILDING_CL.md index 0d8b0127..6913b9d1 100644 --- a/hdk/cl/CHECKLIST_BEFORE_BUILDING_CL.md +++ b/hdk/cl/CHECKLIST_BEFORE_BUILDING_CL.md @@ -11,3 +11,5 @@ This checklist includes important items that the developer should check before c 5. Update the timing and placement constraints under `$CL_DIR/build/constraints` for your design specific changes. 6. Update `$CL_DIR/build/scripts/create_dcp_from_cl.tcl` for your design specific changes, specifically around IP sources and xdc files, and your specific design xdc files. + +7. If you ran the HLx flow before, make sure you [follow the steps to switch between HLx and HDK flows](../docs/IPI_GUI_Vivado_Setup.md#hlxhdk_switch) \ No newline at end of file diff --git a/hdk/cl/developer_designs/prepare_new_cl.sh b/hdk/cl/developer_designs/prepare_new_cl.sh index 80d3e3f6..b84ebd6f 100755 --- a/hdk/cl/developer_designs/prepare_new_cl.sh +++ b/hdk/cl/developer_designs/prepare_new_cl.sh @@ -19,5 +19,5 @@ # Check if /build and /design directories exist, abort # Check if $HDK_COMMON_DIR exist -cp -r $HDK_SHELL_DIR/new_cl_template/build . +cp -rl $HDK_SHELL_DIR/new_cl_template/build . cp -r $HDK_SHELL_DIR/new_cl_template/design . diff --git a/hdk/cl/examples/cl_dram_dma/README.md b/hdk/cl/examples/cl_dram_dma/README.md index 26497be8..1f5713a4 100644 --- a/hdk/cl/examples/cl_dram_dma/README.md +++ b/hdk/cl/examples/cl_dram_dma/README.md @@ -188,6 +188,6 @@ Alternatively, you can directly use a pre-generated AFI for this CL. | PCI Vendor ID | 0x1D0F (Amazon) | | PCI Subsystem ID | 0x1D51 | | PCI Subsystem Vendor ID | 0xFEDC | -| Pre-generated AFI ID | afi-0583e8d7a84ac7ce2 | -| Pre-generated AGFI ID | agfi-0d132ece5c8010bf7 | +| Pre-generated AFI ID | afi-063e6afe717a22158 | +| Pre-generated AGFI ID | agfi-0b5c35827af676702 | diff --git a/hdk/cl/examples/cl_dram_dma/build/scripts/create_dcp_from_cl.tcl b/hdk/cl/examples/cl_dram_dma/build/scripts/create_dcp_from_cl.tcl index f044ed7b..fdae02f6 100644 --- a/hdk/cl/examples/cl_dram_dma/build/scripts/create_dcp_from_cl.tcl +++ b/hdk/cl/examples/cl_dram_dma/build/scripts/create_dcp_from_cl.tcl @@ -19,7 +19,7 @@ package require tar set TOP top_sp ## Replace with the name of your module -set CL_MODULE cl_dram_dma +set CL_MODULE cl_dram_dma ################################################# ## Command-line Arguments @@ -39,7 +39,7 @@ set uram_option [lindex $argv 11] set notify_via_sns [lindex $argv 12] set VDEFINES [lindex $argv 13] ################################################## -## Flow control variables +## Flow control variables ################################################## set cl.synth 1 set implement 1 @@ -147,6 +147,9 @@ set_msg_config -id {DRC CKLD-2} -suppress set_msg_config -id {DRC REQP-1853} -suppress set_msg_config -id {Timing 38-436} -suppress +set_msg_config -severity "CRITICAL WARNING" -string "WRAPPER_INST/SH" -suppress +set_msg_config -severity "WARNING" -string "WRAPPER_INST/SH" -suppress + # Check that an email address has been set, else unset notify_via_sns if {[string compare $notify_via_sns "1"] == 0} { @@ -159,7 +162,7 @@ if {[string compare $notify_via_sns "1"] == 0} { } ################################################## -### Strategy options +### Strategy options ################################################## switch $strategy { "BASIC" { @@ -200,7 +203,7 @@ source $HDK_SHELL_DIR/build/scripts/device_type.tcl source $HDK_SHELL_DIR/build/scripts/step_user.tcl -notrace ######################################## -## Generate clocks based on Recipe +## Generate clocks based on Recipe ######################################## puts "AWS FPGA: ([clock format [clock seconds] -format %T]) Calling aws_gen_clk_constraints.tcl to generate clock constraints from developer's specified recipe."; @@ -254,7 +257,7 @@ if {$implement} { # Apply Clock Properties for Clock Table Recipes ################################################## puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Sourcing aws_clock_properties.tcl to apply properties to clocks. "; - + # Apply properties to clocks source $HDK_SHELL_DIR/build/scripts/aws_clock_properties.tcl @@ -385,5 +388,3 @@ if {[string compare $notify_via_sns "1"] == 0} { } puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Build complete."; - - diff --git a/hdk/cl/examples/cl_dram_dma/design/cl_tst.sv b/hdk/cl/examples/cl_dram_dma/design/cl_tst.sv index 93f8d490..e8d60b7f 100644 --- a/hdk/cl/examples/cl_dram_dma/design/cl_tst.sv +++ b/hdk/cl/examples/cl_dram_dma/design/cl_tst.sv @@ -157,7 +157,7 @@ always_ff @(negedge rst_n or posedge clk) // configuration //------------------------------------------- -//Offset 0x00: +//Offset 0x00: // 0 - Continuous mode - Keep looping through all the isntructions. // 1 - Incrementing loop data (every time through loop increment the start data) // 2 - PRBS mode (else incremeting). Data will be generated with PRBS. If not enabled, data will be incrementing per DW @@ -176,7 +176,7 @@ always_ff @(negedge rst_n or posedge clk) // 15:0 - Read Start -- This is not implemented (not sure we need this) // 31:0 - Max Write ahead -- This is not implemented (not sure we need this) //Offset 0x08: -// 0 - Write Go (read back write in progress) - Write this bit to start executing the write instructions. Reads back '1' while write instructions are in progress. +// 0 - Write Go (read back write in progress) - Write this bit to start executing the write instructions. Reads back '1' while write instructions are in progress. // 1 - Read Go (read back write in progress) - Write this bit to start executing the read instructions. Reads back '1' while read instructions are in progress. // 2 - Read response pending (read only). REad only, reads back '1' while read responses are pending. //Offset 0x0c: @@ -188,7 +188,7 @@ always_ff @(negedge rst_n or posedge clk) //Offset 0x14: // 3:0 - Max Read outstanding - Max number of read requests to issue (how many simultaneous read requests) // -// Offset 0x1c: Write Index - Write instruction Index +// Offset 0x1c: Write Index - Write instruction Index // Offset 0x20: Write address low - Write instruction address // Offset 0x24: Write address high - Write instruction address // Offset 0x28: Write data - Write instruction start data. All other data will be incrementing or PRBS @@ -351,7 +351,7 @@ always @(posedge clk) cfg_wr_stretch <= 0; cfg_rd_stretch <= 0; end - else + else begin cfg_wr_stretch <= cfg_wr || (cfg_wr_stretch && !tst_cfg_ack); cfg_rd_stretch <= cfg_rd || (cfg_rd_stretch && !tst_cfg_ack); @@ -455,28 +455,28 @@ always @(posedge clk) always @(posedge clk) begin case (cfg_addr_q) - 8'h0: tst_cfg_rdata <= {6'h0, cfg_const_data_mode, cfg_inc_id_mode, - 2'h0, cfg_rd_loop_addr_shift[5:0], - 2'h0, cfg_wr_loop_addr_shift[5:0], + 8'h0: tst_cfg_rdata <= {6'h0, cfg_const_data_mode, cfg_inc_id_mode, + 2'h0, cfg_rd_loop_addr_shift[5:0], + 2'h0, cfg_wr_loop_addr_shift[5:0], cfg_user_mode, cfg_loop_addr_mode, cfg_iter_mode, cfg_sync_mode, cfg_rd_compare_en, cfg_prbs_mode, cfg_inc_data_loop_mode, cfg_cont_mode}; 8'h4: tst_cfg_rdata <= {cfg_max_write, cfg_read_start}; 8'h8: tst_cfg_rdata <= {rd_resp_pend, rd_inp, wr_inp}; 8'hc: tst_cfg_rdata <= {wr_state[1:0], rd_tag_avail[15:0]}; 8'h10: tst_cfg_rdata <= {cfg_rd_num_inst, cfg_wr_num_inst}; 8'h14: tst_cfg_rdata <= {cfg_max_read_req}; - - 8'h1c: tst_cfg_rdata <= cfg_wr_inst_index; + + 8'h1c: tst_cfg_rdata <= cfg_wr_inst_index; 8'h20: tst_cfg_rdata <= wr_cfg_inst_rdata_q; - 8'h24: tst_cfg_rdata <= wr_cfg_inst_rdata_q >> 32; - 8'h28: tst_cfg_rdata <= wr_cfg_inst_rdata_q >> 64; + 8'h24: tst_cfg_rdata <= wr_cfg_inst_rdata_q >> 32; + 8'h28: tst_cfg_rdata <= wr_cfg_inst_rdata_q >> 64; 8'h2c: tst_cfg_rdata <= {wr_cfg_inst_rdata_q[127:96]}; 8'h30: tst_cfg_rdata <= {31'b0, cfg_atg_enable}; 8'h3c: tst_cfg_rdata <= cfg_rd_inst_index; 8'h40: tst_cfg_rdata <= rd_cfg_inst_rdata_q; - 8'h44: tst_cfg_rdata <= rd_cfg_inst_rdata_q >> 32; - 8'h48: tst_cfg_rdata <= rd_cfg_inst_rdata_q >> 64; + 8'h44: tst_cfg_rdata <= rd_cfg_inst_rdata_q >> 32; + 8'h48: tst_cfg_rdata <= rd_cfg_inst_rdata_q >> 64; 8'h4c: tst_cfg_rdata <= {rd_cfg_inst_rdata_q[127:96]}; 8'h60: tst_cfg_rdata <= cfg_rd_data_index; @@ -535,7 +535,7 @@ always_ff @(posedge clk) tst_cfg_ack <= 0; else tst_cfg_ack <= ((cfg_wr_stretch||cfg_rd_stretch) && !cfg_ram_access && !tst_cfg_ack) || - ((cfg_wr_stretch||cfg_rd_stretch) && cfg_ram_access && rd_cfg_read_ram_ack && !tst_cfg_ack); + ((cfg_wr_stretch||cfg_rd_stretch) && cfg_ram_access && rd_cfg_read_ram_ack && !tst_cfg_ack); //--------------------------------------- // Inst RAMs @@ -578,7 +578,7 @@ always @(posedge clk) //-------------------------------- -// Write state machine +// Write state machine //-------------------------------- logic[7:0] wr_running_length = 0; @@ -609,7 +609,7 @@ begin begin if (awready) wr_state_nxt = WR_DAT; - else + else wr_state_nxt = WR_ADDR; end @@ -658,11 +658,11 @@ always @(posedge clk) wr_loop_count <= wr_loop_count + 1; end -//Increment wr_cyc_count after the Write data for the read/write holdoff +//Increment wr_cyc_count after the Write data bresp for the read/write holdoff always @(posedge clk) if (cfg_wr_go) wr_cyc_count <= 0; - else if ((wr_state==WR_DAT) && (wr_state_nxt!=WR_DAT)) + else if (bvalid && bready) wr_cyc_count <= wr_cyc_count + 1; //Timer @@ -724,7 +724,7 @@ always_ff @( posedge clk) awlen <= inst_wr_rdata[103:96]; awuser <= (cfg_user_mode)? inst_wr_rdata[127:112]: ((inst_wr_rdata[103:96]+1) * user_length_mult) - wr_first_adj - inst_wr_rdata[104+:ADJ_DW_WIDTH]; end - else + else begin awid <= 0; awaddr <=0 ; @@ -757,7 +757,7 @@ logic[DATA_WIDTH-1:0] first_wdata = 0; //Pre-compute this for timing always @(posedge clk) begin for (int i=0; i= wr_cyc_count); -wire rd_wr_holdoff = cfg_sync_mode && wr_inp && rd_cyc_holdoff; +wire rd_wr_holdoff = cfg_sync_mode && rd_cyc_holdoff; -//Increment the read instruction +//Increment the read instruction assign rd_tag_pop = rd_inp && rd_tag_some_avail && !rd_fifo_full && !rd_wr_holdoff; always @(posedge clk) @@ -1085,7 +1084,7 @@ always_ff @(posedge clk) // rd_trk[rid_q].running_length <= rd_trk[rid].running_length + 1; // rd_trk[rid_q].req_data <= rd_data_nxt; // end -// end +// end //rd_trk_wr.req_data[32*i+:32] = inst_rd_rdata_q[95:64] + (rd_loop_count[7:0] & {32{cfg_inc_data_loop_mode}}) + i; always_comb @@ -1116,8 +1115,8 @@ always @(posedge clk) rd_md_ram_wr_addr <= rd_md_ram_wr_addr_pre; rd_md_ram_wr <= rd_md_ram_wr_pre; rd_md_ram_wr_data <= rd_md_ram_wr_data_pre; - end - + end + always @(posedge clk) begin @@ -1162,7 +1161,7 @@ begin rd_data_mask = ((rd_trk_rd.running_length==0) && rlast_q)? ({DATA_WIDTH{1'b1}} << (rd_trk_first_adj*32)) & (~({DATA_WIDTH{1'b1}} << (({ADJ_DW_WIDTH+5{1'b1}} + 1) - (rd_trk_rd.last_adj[0+:ADJ_DW_WIDTH] * 32)) )): (rd_trk_rd.running_length==0)? ({DATA_WIDTH{1'b1}} << (rd_trk_first_adj*32)): (rlast_q)? ~({DATA_WIDTH{1'b1}} << (({ADJ_DW_WIDTH+5{1'b1}} + 1) - (rd_trk_rd.last_adj[0+:ADJ_DW_WIDTH] * 32)) ): - {DATA_WIDTH{1'b1}}; + {DATA_WIDTH{1'b1}}; //for (int i=1; i> 2); @@ -1242,7 +1241,7 @@ flop_fifo #(.DEPTH(4), .WIDTH(9+11+8+64)) RD_REQ_FIFO ( .push(rd_tag_pop_qq), .push_data({rd_cur_req_tag, rd_push_user, inst_rd_rdata_q[103:96], rd_push_addr}), .pop(arvalid & arready), - + .pop_data({arid[8:0], aruser, arlen, araddr}), .half_full(), .watermark(rd_fifo_full), @@ -1250,7 +1249,7 @@ flop_fifo #(.DEPTH(4), .WIDTH(9+11+8+64)) RD_REQ_FIFO ( ); //------------------------------ -// Read track RAM +// Read track RAM bram_2rw #(.WIDTH(`RD_TRK_RAM_WIDTH), .ADDR_WIDTH(9), .DEPTH(512)) RD_TRK_RAM ( .clk(clk), @@ -1293,7 +1292,7 @@ always @(posedge clk) end assign rd_md_ram_rd_data = (rd_md_ram_col_q_pre)? rd_md_ram_wr_data_q_pre: - (rd_md_ram_col_q)? rd_md_ram_wr_data_q: + (rd_md_ram_col_q)? rd_md_ram_wr_data_q: rd_md_ram_rd_data_ram; @@ -1412,11 +1411,11 @@ always @(posedge clk) rresp_error_first; end - + ////Write addres recording //always_ff @(posedge clk) // if (cfg_wr_stretch && tst_cfg_ack && (cfg_addr_q==8'he0) && (cfg_wdata_q[31])) -// begin +// begin // for (int i=0; i<32; i++) // wr_addr_rec[i] <= {64{1'b1}}; // wr_addr_rec_ptr <= 0; @@ -1430,7 +1429,7 @@ always @(posedge clk) ////Read address recording //always_ff @(posedge clk) // if (cfg_wr_stretch && tst_cfg_ack && (cfg_addr_q==8'he0) && (cfg_wdata_q[31])) -// begin +// begin // for (int i=0; i<32; i++) // rd_addr_rec[i] <= {64{1'b1}}; // rd_addr_rec_ptr <= 0; @@ -1474,5 +1473,5 @@ begin end endfunction - + endmodule diff --git a/hdk/cl/examples/cl_dram_dma/software/runtime/test_dram_dma_hwsw_cosim.c b/hdk/cl/examples/cl_dram_dma/software/runtime/test_dram_dma_hwsw_cosim.c index 91c9e661..5444622a 100644 --- a/hdk/cl/examples/cl_dram_dma/software/runtime/test_dram_dma_hwsw_cosim.c +++ b/hdk/cl/examples/cl_dram_dma/software/runtime/test_dram_dma_hwsw_cosim.c @@ -167,6 +167,7 @@ int dma_example_hwsw_cosim(int slot_id, size_t buffer_size) setup_send_rdbuf_to_c(read_buffer, buffer_size); printf("Starting DDR init...\n"); init_ddr(); + deselect_atg_hw(); printf("Done DDR init...\n"); #endif printf("filling buffer with random data...\n") ; diff --git a/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile b/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile index 496e44ea..8671cd45 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile +++ b/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile @@ -22,7 +22,8 @@ endif export TEST ?= test_null export C_TEST ?= test_null -export CL_ROOT = $(PWD)/../.. +export SCRIPTS_DIR = $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +export CL_ROOT = $(realpath $(SCRIPTS_DIR)/../..) export SDK_DIR = $(AWS_FPGA_REPO_DIR)/sdk export C_COMMON_DIR = $(HDK_COMMON_DIR)/software @@ -45,29 +46,29 @@ ifeq ($(C_TEST),test_null) ifeq ($(AXI_MEMORY_MODEL), 1) ifeq ($(ECC_DIRECT), 1) export SIM_DIR = $(SIM_ROOT)/$(TEST)_sv_fast_ecc_direct - else + else ifeq ($(ECC_RAND), 1) export SIM_DIR = $(SIM_ROOT)/$(TEST)_sv_fast_ecc_rnd else ifeq ($(DDR_BKDR), 1) export SIM_DIR = $(SIM_ROOT)/$(TEST)_sv_fast_bkdr - else + else export SIM_DIR = $(SIM_ROOT)/$(TEST)_sv_fast - endif + endif endif endif else ifeq ($(ECC_DIRECT), 1) export SIM_DIR = $(SIM_ROOT)/$(TEST)_sv_ecc_direct - else + else ifeq ($(ECC_RAND), 1) export SIM_DIR = $(SIM_ROOT)/$(TEST)_sv_ecc_rnd else ifeq ($(DDR_BKDR), 1) export SIM_DIR = $(SIM_ROOT)/$(TEST)_sv_bkdr - else + else export SIM_DIR = $(SIM_ROOT)/$(TEST)_sv - endif + endif endif endif endif @@ -75,7 +76,6 @@ else export SIM_DIR = $(SIM_ROOT)/$(C_TEST)_c endif -export SCRIPTS_DIR = $(PWD) export XILINX_IP = $(HDK_SHELL_DESIGN_DIR)/ip export SH_LIB_DIR = $(HDK_SHELL_DESIGN_DIR)/lib export SH_INF_DIR = $(HDK_SHELL_DESIGN_DIR)/interfaces diff --git a/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile.ies b/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile.ies index 59da02e6..65df9e03 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile.ies +++ b/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile.ies @@ -39,5 +39,5 @@ endif $(COMPLIB_DIR): cd $(SIM_ROOT) && echo "compile_simlib -language all -dir $(COMPLIB_DIR) -simulator $(SIMULATOR) -library all -family all" > create_libs.tcl - cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl + -cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl cd $(SIM_ROOT) && rm -rf create_libs.tcl diff --git a/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile.questa b/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile.questa index bf14b3d3..f0fbe206 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile.questa +++ b/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile.questa @@ -22,31 +22,21 @@ ## 2. make all QUESTA=1 -> Runs the test ################################################################## +LIBLISTS = $(COMMON_LIBLISTS) +LIBLISTS_ARGS = $(shell echo " $(strip $(LIBLISTS))" | sed 's|\ | -L |g') + compile: $(COMPLIB_DIR) mkdir -p $(SIM_DIR) cd ${SIM_DIR} && ln -s -f ../questa_complib/modelsim.ini cd $(SIM_DIR) && vlog $(C_FILES) -ccflags "-I$(C_SDK_USR_INC_DIR)" -ccflags "-I$(C_SDK_USR_UTILS_DIR)" -ccflags "-I$(C_COMMON_DIR)/include" -ccflags "-I$(C_COMMON_DIR)/src" -ccflags "-DSV_TEST" -ccflags "-DSCOPE" -ccflags "-DQUESTA_SIM" -ccflags "-DINT_MAIN" -ccflags "-I$(C_INC_DIR)" - cd $(SIM_DIR) && vlog -suppress 2732 +define+DMA_TEST $(DEFAULT_DEFINES) -mfcu -sv -64 -timescale 1ps/1ps -93 -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/secureip -f $(SCRIPTS_DIR)/top.$(SIMULATOR).f + cd $(SIM_DIR) && vlog +define+DMA_TEST $(DEFAULT_DEFINES) -mfcu -sv -64 -timescale 1ps/1ps -93 -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/secureip -f $(SCRIPTS_DIR)/top.$(SIMULATOR).f + run: -ifeq ($(VIVADO_TOOL_VERSION), v2017.4) -ifeq ($(TEST),test_null) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_15 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_16 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_14 -L $(COMPLIB_DIR)/fifo_generator_v13_2_1 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_14 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 $(PLUSARGS) -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) -else - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_15 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_16 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_14 -L $(COMPLIB_DIR)/fifo_generator_v13_2_1 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_14 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 $(PLUSARGS) -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) -endif -else ifeq ($(VIVADO_TOOL_VERSION), v2018.3) ifeq ($(TEST),test_null) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_18 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_19 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_17 -L $(COMPLIB_DIR)/fifo_generator_v13_2_3 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_17 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 $(PLUSARGS) -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) + cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random $(LIBLISTS_ARGS) $(PLUSARGS) -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) else - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_18 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_19 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_17 -L $(COMPLIB_DIR)/fifo_generator_v13_2_3 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_17 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 $(PLUSARGS) -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) -endif -else -ifeq ($(TEST),test_null) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_17 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_18 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_16 -L $(COMPLIB_DIR)/fifo_generator_v13_2_2 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_16 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 $(PLUSARGS) -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) -else - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_17 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_18 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_16 -L $(COMPLIB_DIR)/fifo_generator_v13_2_2 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_16 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 $(PLUSARGS) -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) -endif + cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random $(LIBLISTS_ARGS) $(PLUSARGS) -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) endif $(COMPLIB_DIR): diff --git a/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile.vcs b/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile.vcs index 3321775c..61cb8232 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile.vcs +++ b/hdk/cl/examples/cl_dram_dma/verif/scripts/Makefile.vcs @@ -27,7 +27,7 @@ compile: $(COMPLIB_DIR) mkdir -p $(SIM_DIR) cd ${SIM_DIR} && ln -s -f ../vcs_complib/synopsys_sim.setup cd $(SIM_DIR) && vlogan -g -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog +systemverilogext+.sv +libext+.sv +libext+.v -full64 -lca -v2005 +v2k -l compile.vlogan.log -debug_all -f $(SCRIPTS_DIR)/top.$(SIMULATOR).f +define+VCS +define+DMA_TEST $(DEFAULT_DEFINES) +lint=TFIPC-L - cd $(SIM_DIR) && vcs tb $(TEST) $(C_FILES) $(VCS_OPT) -CFLAGS "-I$(C_SDK_USR_INC_DIR)" -CFLAGS "-I$(C_SDK_USR_UTILS_DIR) -std=gnu99" -CFLAGS "-I$(C_COMMON_DIR)/include" -CFLAGS "-I$(C_COMMON_DIR)/src" -CFLAGS "-DSV_TEST" -CFLAGS "-DSCOPE" -CFLAGS "-I$(C_INC_DIR)" "-I$(C_SRC_DIR)" -debug_all -M -I +lint=TFIPC-L -debug_all -debug_pp glbl -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog -full64 +memcbk -licqueue -lca -v2005 -l compile.vcs.log + cd $(SIM_DIR) && vcs tb $(TEST) $(C_FILES) $(VCS_OPT) -CFLAGS "-I$(C_SDK_USR_INC_DIR)" -CFLAGS "-I$(C_SDK_USR_UTILS_DIR) -std=gnu99" -CFLAGS "-I$(C_COMMON_DIR)/include" -CFLAGS "-I$(C_COMMON_DIR)/src" -CFLAGS "-DSV_TEST" -CFLAGS "-DSCOPE" -CFLAGS "-I$(C_INC_DIR)" "-I$(C_SRC_DIR)" -debug_all -M +lint=TFIPC-L -debug_all -debug_pp glbl -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog -full64 +memcbk -licqueue -lca -v2005 -l compile.vcs.log run: @@ -39,5 +39,5 @@ endif $(COMPLIB_DIR): cd $(SIM_ROOT) && echo "compile_simlib -language all -dir $(COMPLIB_DIR) -simulator $(SIMULATOR) -library all -family all" > create_libs.tcl - cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl + -cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl cd $(SIM_ROOT) && rm -rf create_libs.tcl diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_axi_mstr_multi_rw.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_axi_mstr_multi_rw.sv index 717b1a3a..7bc3c9c5 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_axi_mstr_multi_rw.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_axi_mstr_multi_rw.sv @@ -134,7 +134,7 @@ for (int i = 0; i <= 12; i=i+4) begin //{ (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q), (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q) ); if (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0] !== tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q[31:0]) begin - $display("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", + $error("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", $realtime, tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_hi_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_lo_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0], @@ -200,7 +200,7 @@ for (int i = 0; i <= 12; i=i+4) begin //{ (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q), (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q) ); if (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0] !== tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q[31:0]) begin - $display("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", + $error("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", $realtime, tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_hi_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_lo_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0], @@ -266,7 +266,7 @@ for (int i = 0; i <= 12; i=i+4) begin //{ (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q), (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q) ); if (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0] !== tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q[31:0]) begin - $display("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", + $error("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", $realtime, tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_hi_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_lo_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0], @@ -333,7 +333,7 @@ for (int i = 0; i <= 12; i=i+4) begin //{ (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q), (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q) ); if (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0] !== tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q[31:0]) begin - $display("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", + $error("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", $realtime, tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_hi_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_lo_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0], @@ -356,7 +356,7 @@ end //} $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail || (tb.chk_prot_err_stat())) begin - $display("[%t] : TEST_FAILED", $realtime); + $error("[%t] : TEST_FAILED", $realtime); end else begin $display("[%t] : TEST_PASSED", $realtime); end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_bar1.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_bar1.sv index 5fcc8731..4a6ce482 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_bar1.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_bar1.sv @@ -63,7 +63,7 @@ module test_bar1(); end while ((read_data[31:0] !== bar1_data[31:0]) && (timeout_count < 1000)); // UNMATCHED !! if ((timeout_count == 1000) || (read_data[31:0] !== bar1_data[31:0])) begin - $display("[%t] : *** ERROR *** Read data mismatch for bar1 exp_data %h act_data %h.", $realtime, bar1_data, read_data); + $error("[%t] : *** ERROR *** Read data mismatch for bar1 exp_data %h act_data %h.", $realtime, bar1_data, read_data); error_count++; end @@ -91,7 +91,7 @@ module test_bar1(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail || (tb.chk_prot_err_stat())) begin - $display("[%t] : TEST_FAILED", $realtime); + $error("[%t] : TEST_FAILED", $realtime); end else begin $display("[%t] : TEST_PASSED", $realtime); end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_clk_recipe.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_clk_recipe.sv index d8c76ace..fe21b1c3 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_clk_recipe.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_clk_recipe.sv @@ -215,7 +215,7 @@ module test_clk_recipe(); tb.power_down(); if (tb.chk_clk_err_cnt()) begin - $display("[%t] : *** TEST FAILED ***", $realtime); + $error("[%t] : *** TEST FAILED ***", $realtime); end else begin $display("[%t] : *** TEST PASSED ***", $realtime); end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_ddr.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_ddr.sv index 642deeed..dfcf96ad 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_ddr.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_ddr.sv @@ -82,7 +82,7 @@ module test_ddr(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail) begin - $display("[%t] : *** TEST FAILED ***", $realtime); + $error("[%t] : *** TEST FAILED ***", $realtime); end else begin $display("[%t] : *** TEST PASSED ***", $realtime); end @@ -163,7 +163,7 @@ module test_ddr(); end while ((read_data[2:0] !== 3'b000) && (timeout_count < 100)); if ((timeout_count == 100) && (read_data[2:0] !== 3'b000)) begin - $display("[%t] : *** ERROR *** Timeout waiting for writes and reads to complete.", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for writes and reads to complete.", $realtime); error_count++; end else begin // Stop reads and writes ([1] for reads, [0] for writes) @@ -178,7 +178,7 @@ module test_ddr(); tb.peek_ocl(.addr(base_addr + 64'h0f4), .data(read_data)); cycle_count[63:32] = read_data; if (cycle_count == 64'h0) begin - $display("[%t] : *** ERROR *** Write Timer value was 0x0 at end of test.", $realtime); + $error("[%t] : *** ERROR *** Write Timer value was 0x0 at end of test.", $realtime); error_count++; end @@ -189,7 +189,7 @@ module test_ddr(); tb.peek_ocl(.addr(base_addr + 64'h0fc), .data(read_data)); cycle_count[63:32] = read_data; if (cycle_count == 64'h0) begin - $display("[%t] : *** ERROR *** Read Timer value was 0x0 at end of test.", $realtime); + $error("[%t] : *** ERROR *** Read Timer value was 0x0 at end of test.", $realtime); error_count++; end @@ -204,7 +204,7 @@ module test_ddr(); error_addr[63:32] = read_data; tb.peek_ocl(.addr(base_addr + 64'h0bc), .data(read_data)); error_index = read_data[3:0]; - $display("[%t] : *** ERROR *** Read compare error from address 0x%016x, index 0x%1x", $realtime, error_addr, error_index); + $error("[%t] : *** ERROR *** Read compare error from address 0x%016x, index 0x%1x", $realtime, error_addr, error_index); error_count++; end end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_ddr_peek_bdr_walking_ones.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_ddr_peek_bdr_walking_ones.sv index d339866a..d0a20a24 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_ddr_peek_bdr_walking_ones.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_ddr_peek_bdr_walking_ones.sv @@ -162,7 +162,7 @@ module test_ddr_peek_bdr_walking_ones(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail || (tb.chk_prot_err_stat())) begin - $display("[%t] : TEST_FAILED", $realtime); + $error("[%t] : TEST_FAILED", $realtime); end else begin $display("[%t] : TEST_PASSED", $realtime); end @@ -180,7 +180,7 @@ module test_ddr_peek_bdr_walking_ones(); $display("Read Data for Addr %h: Act %h", bdr_addr, read_data); if (read_data != data) begin - $display("Read Data mismatch for Addr %h: Exp %h, Act %h", bdr_addr, data, read_data); + $error("Read Data mismatch for Addr %h: Exp %h, Act %h", bdr_addr, data, read_data); error_count++; end end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_ddr_peek_poke.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_ddr_peek_poke.sv index ea420f44..6ad3b844 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_ddr_peek_poke.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_ddr_peek_poke.sv @@ -61,7 +61,7 @@ module test_ddr_peek_poke(); $display("Read to Addr %b", addr); tb.peek(.addr(addr), .data(wide_read_data), .size(DataSize::UINT512)); if (wide_read_data != {512{1'b1}}) begin - $display("Read Data mismatch for Addr %h: Exp %h, Act %h", addr, {512{1'b1}}, wide_read_data); + $error("Read Data mismatch for Addr %h: Exp %h, Act %h", addr, {512{1'b1}}, wide_read_data); error_count++; end //Walk through DDR address range to check if two adjacent bits are wrongly wired. @@ -75,7 +75,7 @@ module test_ddr_peek_poke(); $display("Read to Addr %b", addr); tb.peek(.addr(addr), .data(wide_read_data), .size(DataSize::UINT512)); if (wide_read_data != {512{1'b1}}) begin - $display("Read Data mismatch for Addr %h: Exp %h, Act %h", addr, {512{1'b1}}, wide_read_data); + $error("Read Data mismatch for Addr %h: Exp %h, Act %h", addr, {512{1'b1}}, wide_read_data); error_count++; end end @@ -95,7 +95,7 @@ module test_ddr_peek_poke(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail) begin - $display("[%t] : *** TEST FAILED ***", $realtime); + $error("[%t] : *** TEST FAILED ***", $realtime); end else begin $display("[%t] : *** TEST PASSED ***", $realtime); end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_dma_pcim_concurrent.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_dma_pcim_concurrent.sv index da1193ed..e2c42543 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_dma_pcim_concurrent.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_dma_pcim_concurrent.sv @@ -139,9 +139,14 @@ module test_dma_pcim_concurrent(); end while ((status[0] !== 'h1) && (timeout_count < 4000)); if (timeout_count > 4000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end + + // DMA transfers are posted writes. The above code checks only if the dma transfer is setup and done. + // We need to wait for writes to finish to memory before issuing reads. + $display("[%t] : Waiting for DMA write activity to complete", $realtime); + #500ns; $display("[%t] : starting C2H DMA channels ", $realtime); @@ -161,7 +166,7 @@ module test_dma_pcim_concurrent(); end while ((status[0] !== 'h1) && (timeout_count < 4000)); if (timeout_count > 4000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -174,7 +179,7 @@ module test_dma_pcim_concurrent(); host_memory_buffer_address = 64'h0_0001_0800; for (int i = 0 ; i 4000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end + // DMA transfers are posted writes. The above code checks only if the dma transfer is setup and done. + // We need to wait for writes to finish to memory before issuing reads. + $display("[%t] : Waiting for DMA write activity to complete", $realtime); + #500ns; + $display("[%t] : starting C2H DMA channels ", $realtime); // read the data from cl and put it in the host memory @@ -114,7 +119,7 @@ module test_dma_pcis_concurrent(); end while ((status[0] !== 'h1) && (timeout_count < 4000)); if (timeout_count > 4000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -127,22 +132,24 @@ module test_dma_pcis_concurrent(); host_memory_buffer_address = 64'h0_0001_0800; for (int i = 0 ; i 4000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end + // DMA transfers are posted writes. The above code checks only if the dma transfer is setup and done. + // We need to wait for writes to finish to memory before issuing reads. + $display("[%t] : Waiting for DMA write activity to complete", $realtime); + #500ns; + $display("[%t] : starting C2H DMA channels ", $realtime); // read the data from cl and put it in the host memory @@ -166,7 +171,7 @@ module test_dma_sda_concurrent(); end while ((status[0] !== 'h1) && (timeout_count < 4000)); if (timeout_count > 4000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -179,7 +184,7 @@ module test_dma_sda_concurrent(); host_memory_buffer_address = 64'h0_0001_0800; for (int i = 0 ; i= 4000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end - $display("[%t] : starting C2H DMA channels ", $realtime); + // DMA transfers are posted writes. The above code checks only if the dma transfer is setup and done. + // We need to wait for writes to finish to memory before issuing reads. + $display("[%t] : Waiting for DMA write transfers to complete", $realtime); + #2us; + $display("[%t] : starting C2H DMA channels ", $realtime); + // read the data from cl and put it in the host memory host_memory_buffer_address = 64'h0_0001_0800; tb.que_cl_to_buffer(.chan(0), .dst_addr(host_memory_buffer_address), .cl_addr(64'h0000_0000_1f00), .len(len0) ); // move DDR0 to buffer @@ -157,7 +162,7 @@ module test_dram_dma(); end while ((status != 4'hf) && (timeout_count < 1000)); if (timeout_count >= 1000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -170,7 +175,7 @@ module test_dram_dma(); host_memory_buffer_address = 64'h0_0001_0800; for (int i = 0 ; i= 4000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end + // DMA transfers are posted writes. The above code checks only if the dma transfer is setup and done. + // We need to wait for writes to finish to memory before issuing reads. + $display("[%t] : Waiting for DMA write activity to complete", $realtime); + #500ns; + $display("[%t] : starting C2H DMA channels ", $realtime); // read the data from cl and put it in the host memory @@ -154,7 +159,7 @@ module test_dram_dma_4k_crossing(); end while ((status != 4'hf) && (timeout_count < 1000)); if (timeout_count >= 1000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -167,7 +172,7 @@ module test_dram_dma_4k_crossing(); host_memory_buffer_address = 64'h0_0001_0800; for (int i = 0 ; i= 4000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end + // DMA transfers are posted writes. The above code checks only if the dma transfer is setup and done. + // We need to wait for writes to finish to memory before issuing reads. + $display("[%t] : Waiting for DMA write activity to complete", $realtime); + #500ns; + $display("[%t] : starting C2H DMA channels ", $realtime); // read the data from cl and put it in the host memory @@ -152,7 +157,7 @@ module test_dram_dma_allgn_addr_4k(); end while ((status != 4'hf) && (timeout_count < 1000)); if (timeout_count >= 1000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -165,7 +170,7 @@ module test_dram_dma_allgn_addr_4k(); host_memory_buffer_address = 64'h0_0001_0800; for (int i = 0 ; i= 1000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -129,7 +129,7 @@ module test_dram_dma_axi_mstr(); (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q), (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q) ); if (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0] !== tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q[31:0]) begin - $display("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", + $error("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", $realtime, tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_hi_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_lo_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0], @@ -175,7 +175,7 @@ module test_dram_dma_axi_mstr(); end while ((status != 4'hf) && (timeout_count < 1000)); if (timeout_count >= 1000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -188,7 +188,7 @@ module test_dram_dma_axi_mstr(); (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q), (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q) ); if (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0] !== tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q[31:0]) begin - $display("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", + $error("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", $realtime, tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_hi_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_lo_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0], @@ -234,7 +234,7 @@ module test_dram_dma_axi_mstr(); end while ((status != 4'hf) && (timeout_count < 1000)); if (timeout_count >= 1000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -247,7 +247,7 @@ module test_dram_dma_axi_mstr(); (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q), (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q) ); if (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0] !== tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q[31:0]) begin - $display("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", + $error("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", $realtime, tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_hi_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_lo_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0], @@ -293,7 +293,7 @@ module test_dram_dma_axi_mstr(); end while ((status != 4'hf) && (timeout_count < 1000)); if (timeout_count >= 1000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -306,7 +306,7 @@ module test_dram_dma_axi_mstr(); (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q), (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q) ); if (tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0] !== tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_rd_data_q[31:0]) begin - $display("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", + $error("[%t] : *** ERROR *** Data mismatch, addr:0x%0h_%0h write data is: 0x%h read data is: 0x%h", $realtime, tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_hi_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_addr_lo_q[31:0], tb.card.fpga.CL.CL_DRAM_DMA_AXI_MSTR.cmd_wr_data_q[31:0], @@ -328,7 +328,7 @@ module test_dram_dma_axi_mstr(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail || (tb.chk_prot_err_stat())) begin - $display("[%t] : *** TEST FAILED ***", $realtime); + $error("[%t] : *** TEST FAILED ***", $realtime); end else begin $display("[%t] : *** TEST PASSED ***", $realtime); end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_bdr_common.svh b/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_bdr_common.svh index a4a84954..394b4aed 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_bdr_common.svh +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_bdr_common.svh @@ -28,11 +28,6 @@ //Front Door read data for ( int i=0; i= 1000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -64,11 +59,11 @@ for (int i = 0 ; i> 8; end end // for ( int i=0; i<6; i++) - endtask// dma_c2h_transfers \ No newline at end of file + endtask// dma_c2h_transfers diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_dram_bdr_row_col_combo.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_dram_bdr_row_col_combo.sv index 340c0041..f5cbddbe 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_dram_bdr_row_col_combo.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_dram_bdr_row_col_combo.sv @@ -117,7 +117,7 @@ module test_dram_dma_dram_bdr_row_col_combo(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail || (tb.chk_prot_err_stat())) begin - $display("[%t] : TEST_FAILED", $realtime); + $error("[%t] : TEST_FAILED", $realtime); end else begin $display("[%t] : TEST_PASSED", $realtime); end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_dram_bdr_wr.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_dram_bdr_wr.sv index 7b6b50da..802c7a0d 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_dram_bdr_wr.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_dram_bdr_wr.sv @@ -94,7 +94,7 @@ module test_dram_dma_dram_bdr_wr(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail || (tb.chk_prot_err_stat())) begin - $display("[%t] : *** TEST FAILED ***", $realtime); + $error("[%t] : *** TEST FAILED ***", $realtime); end else begin $display("[%t] : *** TEST PASSED ***", $realtime); end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_mem_model_bdr_rd.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_mem_model_bdr_rd.sv index 459c1146..24e12978 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_mem_model_bdr_rd.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_mem_model_bdr_rd.sv @@ -117,7 +117,7 @@ module test_dram_dma_mem_model_bdr_rd(); end while ((status != 4'hf) && (timeout_count < 4000)); if (timeout_count >= 4000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -130,7 +130,7 @@ module test_dram_dma_mem_model_bdr_rd(); // Put test pattern in host memory for (int i = 0 ; i < len0 ; i++) begin if (tb.card.fpga.CL.SH_DDR.u_mem_model.bfm_inst[0].u_bfm.axi_mem_bdr_read(.addr(ddr_A_addr)) !== 8'hAA) begin - $display("[%t] : *** ERROR *** DDR0 Data mismatch, addr:%0x read data is: %0x", + $error("[%t] : *** ERROR *** DDR0 Data mismatch, addr:%0x read data is: %0x", $realtime, ddr_A_addr, (tb.card.fpga.CL.SH_DDR.u_mem_model.bfm_inst[0].u_bfm.axi_mem_bdr_read(.addr(ddr_A_addr)))); error_count++; end @@ -145,7 +145,7 @@ module test_dram_dma_mem_model_bdr_rd(); // Put test pattern in host memory for (int i = 0 ; i < len1 ; i++) begin if (tb.card.fpga.CL.SH_DDR.u_mem_model.bfm_inst[1].u_bfm.axi_mem_bdr_read(.addr(ddr_B_addr)) !== 8'hBB) begin - $display("[%t] : *** ERROR *** DDR1 Data mismatch, addr:%0x read data is: %0x", + $error("[%t] : *** ERROR *** DDR1 Data mismatch, addr:%0x read data is: %0x", $realtime, ddr_B_addr, (tb.card.fpga.CL.SH_DDR.u_mem_model.bfm_inst[1].u_bfm.axi_mem_bdr_read(.addr(ddr_B_addr)))); error_count++; end @@ -159,7 +159,7 @@ module test_dram_dma_mem_model_bdr_rd(); // Put test pattern in hst memory for (int i = 0 ; i < len2 ; i++) begin if (tb.card.fpga.sh.u_mem_model.axi_mem_bdr_read(.addr(ddr_C_addr)) !== 8'hCC) begin - $display("[%t] : *** ERROR *** DDR2 Data mismatch, addr:%0x read data is: %0x", + $error("[%t] : *** ERROR *** DDR2 Data mismatch, addr:%0x read data is: %0x", $realtime, ddr_C_addr, (tb.card.fpga.sh.u_mem_model.axi_mem_bdr_read(.addr(ddr_C_addr)))); error_count++; end @@ -173,7 +173,7 @@ module test_dram_dma_mem_model_bdr_rd(); // Put test pattern in host memory for (int i = 0 ; i < len3 ; i++) begin if (tb.card.fpga.CL.SH_DDR.u_mem_model.bfm_inst[2].u_bfm.axi_mem_bdr_read(.addr(ddr_D_addr)) !== 8'hDD) begin - $display("[%t] : *** ERROR *** DDR3 Data mismatch, addr:%0x read data is: %0x", + $error("[%t] : *** ERROR *** DDR3 Data mismatch, addr:%0x read data is: %0x", $realtime, ddr_D_addr, (tb.card.fpga.CL.SH_DDR.u_mem_model.bfm_inst[2].u_bfm.axi_mem_bdr_read(.addr(ddr_D_addr)))); error_count++; end @@ -194,7 +194,7 @@ module test_dram_dma_mem_model_bdr_rd(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail || (tb.chk_prot_err_stat())) begin - $display("[%t] : TEST_FAILED", $realtime); + $error("[%t] : TEST_FAILED", $realtime); end else begin $display("[%t] : TEST_PASSED", $realtime); end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_mem_model_bdr_wr.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_mem_model_bdr_wr.sv index dfa3fce8..a5220eae 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_mem_model_bdr_wr.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_dram_dma_mem_model_bdr_wr.sv @@ -130,7 +130,7 @@ module test_dram_dma_mem_model_bdr_wr(); end while ((status != 4'hf) && (timeout_count < 1000)); if (timeout_count >= 1000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -143,7 +143,7 @@ module test_dram_dma_mem_model_bdr_wr(); host_memory_buffer_address = 64'h0_0001_0800; for (int i = 0 ; i= 4000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end + // DMA transfers are posted writes. The above code checks only if the dma transfer is setup and done. + // We need to wait for writes to finish to memory before issuing reads. + $display("[%t] : Waiting for DMA write activity to complete", $realtime); + #500ns; + $display("[%t] : starting C2H DMA channels ", $realtime); // read the data from cl and put it in the host memory @@ -172,7 +177,7 @@ module test_dram_dma_multi_ddr(); end while ((status != 4'hf) && (timeout_count < 1000)); if (timeout_count >= 1000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -186,7 +191,7 @@ module test_dram_dma_multi_ddr(); host_memory_buffer_address = 64'h0_0001_0800; for (int i = 0 ; i= 10000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -179,7 +179,7 @@ module test_dram_dma_rnd(); end while ((status != 4'hf) && (timeout_count < 10000)); if (timeout_count >= 10000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -192,7 +192,7 @@ module test_dram_dma_rnd(); host_memory_buffer_address = 64'h0_000A_0000; for (int i = 0 ; i= 4000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end + // DMA transfers are posted writes. The above code checks only if the dma transfer is setup and done. + // We need to wait for writes to finish to memory before issuing reads. + $display("[%t] : Waiting for DMA write activity to complete", $realtime); + #500ns; + $display("[%t] : starting C2H DMA channels ", $realtime); // read the data from cl and put it in the host memory @@ -152,7 +157,7 @@ module test_dram_dma_single_beat_4k(); end while ((status != 4'hf) && (timeout_count < 1000)); if (timeout_count >= 1000) begin - $display("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for dma transfers from cl", $realtime); error_count++; end @@ -165,7 +170,7 @@ module test_dram_dma_single_beat_4k(); host_memory_buffer_address = 64'h0_0001_0800; for (int i = 0 ; i 0) begin + fail = 1; + end + $display("[%t] : Detected %3d errors during this test", $realtime, error_count); + + if (fail || (tb.chk_prot_err_stat())) begin + $error("[%t] : *** TEST FAILED ***", $realtime); + end else begin + $display("[%t] : *** TEST PASSED ***", $realtime); + end + + $finish; + end // initial begin + +endmodule // test_host_pcim diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_int.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_int.sv index 3bf5d826..2daad804 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_int.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_int.sv @@ -66,9 +66,9 @@ module test_int(); timeout_count++; if (timeout_count == 100) begin if (vector_num !== vector_num2) begin - $display("[%t] : *** ERROR *** Timeout waiting for cl_int_tst Done bits to be set (vectors %2d, %2d).", $realtime, vector_num, vector_num2); + $error("[%t] : *** ERROR *** Timeout waiting for cl_int_tst Done bits to be set (vectors %2d, %2d).", $realtime, vector_num, vector_num2); end else begin - $display("[%t] : *** ERROR *** Timeout waiting for cl_int_tst Done bit to be set (vector %2d).", $realtime, vector_num); + $error("[%t] : *** ERROR *** Timeout waiting for cl_int_tst Done bit to be set (vector %2d).", $realtime, vector_num); end error_count++; end @@ -77,9 +77,9 @@ module test_int(); tb.peek_ocl(.addr(base_addr + 64'h000), .data(read_data)); if (read_data !== 32'h0000_0000) begin if (vector_num !== vector_num2) begin - $display("[%t] : *** ERROR *** Done bits were not cleared for vectors %2d, %2d.", $realtime, vector_num, vector_num2); + $error("[%t] : *** ERROR *** Done bits were not cleared for vectors %2d, %2d.", $realtime, vector_num, vector_num2); end else begin - $display("[%t] : *** ERROR *** Done bit was not cleared for vector %2d.", $realtime, vector_num); + $error("[%t] : *** ERROR *** Done bit was not cleared for vector %2d.", $realtime, vector_num); end error_count++; end @@ -100,7 +100,7 @@ module test_int(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail) begin - $display("[%t] : *** TEST FAILED ***", $realtime); + $error("[%t] : *** TEST FAILED ***", $realtime); end else begin $display("[%t] : *** TEST PASSED ***", $realtime); end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke.sv index a53a653b..bfcee265 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke.sv @@ -106,11 +106,28 @@ module test_peek_poke(); tb.poke_ocl(.addr(`NUM_INST), .data(32'h0000_0000)); // Start writes and reads - tb.poke_ocl(.addr(`CNTL_REG), .data(`WR_START_BIT | `RD_START_BIT)); + tb.poke_ocl(.addr(`CNTL_REG), .data(`WR_START_BIT)); - $display("[%t] : Waiting for PCIe write and read activity to complete", $realtime); + $display("[%t] : Waiting for PCIe write activity to complete", $realtime); #500ns; - + timeout_count = 0; + //Even in SYNC mode ATG doesn't wait for write response before issuing read transactions. + //adding 500ns wait to account for random back pressure from sh_bfm on write address & write data channels. + do begin + tb.peek_ocl(.addr(`CNTL_REG), .data(read_data)); + timeout_count++; + end while ((read_data[2:0] !== 3'b000) && (timeout_count < 100)); + + if ((timeout_count == 100) && (read_data[2:0] !== 3'b000)) begin + $error("[%t] : *** ERROR *** Timeout waiting for writes to complete.", $realtime); + error_count++; + end + + tb.poke_ocl(.addr(`CNTL_REG), .data(`RD_START_BIT)); + // adding 500ns wait to account for random back pressure from sh_bfm on read request channel. + $display("[%t] : Waiting for PCIe read activity to complete", $realtime); + #500ns; + timeout_count = 0; do begin tb.peek_ocl(.addr(`CNTL_REG), .data(read_data)); @@ -118,7 +135,7 @@ module test_peek_poke(); end while ((read_data[2:0] !== 3'b000) && (timeout_count < 100)); if ((timeout_count == 100) && (read_data[2:0] !== 3'b000)) begin - $display("[%t] : *** ERROR *** Timeout waiting for writes and reads to complete.", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for reads to complete.", $realtime); error_count++; end else begin // Stop reads and writes ([1] for reads, [0] for writes) @@ -133,7 +150,7 @@ module test_peek_poke(); tb.peek_ocl(.addr(`WR_CYCLE_CNT_HIGH), .data(read_data)); cycle_count[63:32] = read_data; if (cycle_count == 64'h0) begin - $display("[%t] : *** ERROR *** Write Timer value was 0x0 at end of test.", $realtime); + $error("[%t] : *** ERROR *** Write Timer value was 0x0 at end of test.", $realtime); error_count++; end @@ -144,7 +161,7 @@ module test_peek_poke(); tb.peek_ocl(.addr(`RD_CYCLE_CNT_HIGH), .data(read_data)); cycle_count[63:32] = read_data; if (cycle_count == 64'h0) begin - $display("[%t] : *** ERROR *** Read Timer value was 0x0 at end of test.", $realtime); + $error("[%t] : *** ERROR *** Read Timer value was 0x0 at end of test.", $realtime); error_count++; end @@ -159,7 +176,7 @@ module test_peek_poke(); error_addr[63:32] = read_data; tb.peek_ocl(.addr(`RD_ERR_INDEX), .data(read_data)); error_index = read_data[3:0]; - $display("[%t] : *** ERROR *** Read compare error from address 0x%016x, index 0x%1x", $realtime, error_addr, error_index); + $error("[%t] : *** ERROR *** Read compare error from address 0x%016x, index 0x%1x", $realtime, error_addr, error_index); error_count++; end end @@ -177,7 +194,7 @@ module test_peek_poke(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail || (tb.chk_prot_err_stat())) begin - $display("[%t] : *** TEST FAILED ***", $realtime); + $error("[%t] : *** TEST FAILED ***", $realtime); end else begin $display("[%t] : *** TEST PASSED ***", $realtime); end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_len.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_len.sv index f50794a3..8c6168a7 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_len.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_len.sv @@ -114,11 +114,28 @@ module test_peek_poke_len(); tb.poke_ocl(.addr(`NUM_INST), .data(32'h0000_0000)); // Start writes and reads - tb.poke_ocl(.addr(`CNTL_REG), .data(`WR_START_BIT | `RD_START_BIT)); - - $display("[%t] : Waiting for PCIe write and read activity to complete", $realtime); + tb.poke_ocl(.addr(`CNTL_REG), .data(`WR_START_BIT)); + //Even in SYNC mode ATG doesn't wait for write response before issuing read transactions. + // adding 500ns wait to account for random back pressure from sh_bfm on write address & write data channels. + $display("[%t] : Waiting for PCIe write activity to complete", $realtime); #500ns; + timeout_count = 0; + do begin + tb.peek_ocl(.addr(`CNTL_REG), .data(read_data)); + timeout_count++; + end while ((read_data[2:0] !== 3'b000) && (timeout_count < 100)); + + if ((timeout_count == 100) && (read_data[2:0] !== 3'b000)) begin + $error("[%t] : *** ERROR *** Timeout waiting for writes to complete.", $realtime); + error_count++; + end + + tb.poke_ocl(.addr(`CNTL_REG), .data(`RD_START_BIT)); + // adding 500ns wait to account for random back pressure from sh_bfm on read request channel. + $display("[%t] : Waiting for PCIe read activity to complete", $realtime); + #500ns; + timeout_count = 0; do begin tb.peek_ocl(.addr(`CNTL_REG), .data(read_data)); @@ -126,7 +143,7 @@ module test_peek_poke_len(); end while ((read_data[2:0] !== 3'b000) && (timeout_count < 100)); if ((timeout_count == 100) && (read_data[2:0] !== 3'b000)) begin - $display("[%t] : *** ERROR *** Timeout waiting for writes and reads to complete.", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for reads to complete.", $realtime); error_count++; end else begin // Stop reads and writes ([1] for reads, [0] for writes) @@ -141,7 +158,7 @@ module test_peek_poke_len(); tb.peek_ocl(.addr(`WR_CYCLE_CNT_HIGH), .data(read_data)); cycle_count[63:32] = read_data; if (cycle_count == 64'h0) begin - $display("[%t] : *** ERROR *** Write Timer value was 0x0 at end of test.", $realtime); + $error("[%t] : *** ERROR *** Write Timer value was 0x0 at end of test.", $realtime); error_count++; end @@ -152,7 +169,7 @@ module test_peek_poke_len(); tb.peek_ocl(.addr(`RD_CYCLE_CNT_HIGH), .data(read_data)); cycle_count[63:32] = read_data; if (cycle_count == 64'h0) begin - $display("[%t] : *** ERROR *** Read Timer value was 0x0 at end of test.", $realtime); + $error("[%t] : *** ERROR *** Read Timer value was 0x0 at end of test.", $realtime); error_count++; end @@ -167,7 +184,7 @@ module test_peek_poke_len(); error_addr[63:32] = read_data; tb.peek_ocl(.addr(`RD_ERR_INDEX), .data(read_data)); error_index = read_data[3:0]; - $display("[%t] : *** ERROR *** Read compare error from address 0x%016x, index 0x%1x", $realtime, error_addr, error_index); + $error("[%t] : *** ERROR *** Read compare error from address 0x%016x, index 0x%1x", $realtime, error_addr, error_index); error_count++; end end @@ -185,7 +202,7 @@ module test_peek_poke_len(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail || (tb.chk_prot_err_stat())) begin - $display("[%t] : *** TEST FAILED ***", $realtime); + $error("[%t] : *** TEST FAILED ***", $realtime); end else begin $display("[%t] : *** TEST PASSED ***", $realtime); end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_pcis_axsize.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_pcis_axsize.sv index a2d6a296..a17182b7 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_pcis_axsize.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_pcis_axsize.sv @@ -85,7 +85,7 @@ module test_peek_poke_pcis_axsize(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail || (tb.chk_prot_err_stat())) begin - $display("[%t] : *** TEST FAILED ***", $realtime); + $error("[%t] : *** TEST FAILED ***", $realtime); end else begin $display("[%t] : *** TEST PASSED ***", $realtime); end @@ -95,7 +95,7 @@ module test_peek_poke_pcis_axsize(); task compare_data(logic [511:0] act_data, exp_data); if(act_data !== exp_data) begin - $display($time,,,"***ERROR*** : Data Mismatch. Actual Data:%0h <==> Expected Data: %0h", + $error($time,,,"***ERROR*** : Data Mismatch. Actual Data:%0h <==> Expected Data: %0h", act_data, exp_data); error_count ++; end @@ -105,7 +105,7 @@ module test_peek_poke_pcis_axsize(); endtask task disp_err (input string s); - $display($time,,,"***ERROR*** : %s", s); + $error($time,,,"***ERROR*** : %s", s); error_count ++; endtask endmodule // test_peek_poke_pcis_axsize diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_rnd_lengths.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_rnd_lengths.sv index 78b07f2c..36ec6918 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_rnd_lengths.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_rnd_lengths.sv @@ -126,7 +126,7 @@ module test_peek_poke_rnd_lengths(); end while ((read_data[2:0] !== 3'b000) && (timeout_count < 100)); if ((timeout_count == 100) && (read_data[2:0] !== 3'b000)) begin - $display("[%t] : *** ERROR *** Timeout waiting for writes and reads to complete.", $realtime); + $error("[%t] : *** ERROR *** Timeout waiting for writes and reads to complete.", $realtime); error_count++; end else begin // Stop reads and writes ([1] for reads, [0] for writes) @@ -141,7 +141,7 @@ module test_peek_poke_rnd_lengths(); tb.peek_ocl(.addr(`WR_CYCLE_CNT_HIGH), .data(read_data)); cycle_count[63:32] = read_data; if (cycle_count == 64'h0) begin - $display("[%t] : *** ERROR *** Write Timer value was 0x0 at end of test.", $realtime); + $error("[%t] : *** ERROR *** Write Timer value was 0x0 at end of test.", $realtime); error_count++; end @@ -152,7 +152,7 @@ module test_peek_poke_rnd_lengths(); tb.peek_ocl(.addr(`RD_CYCLE_CNT_HIGH), .data(read_data)); cycle_count[63:32] = read_data; if (cycle_count == 64'h0) begin - $display("[%t] : *** ERROR *** Read Timer value was 0x0 at end of test.", $realtime); + $error("[%t] : *** ERROR *** Read Timer value was 0x0 at end of test.", $realtime); error_count++; end @@ -167,7 +167,7 @@ module test_peek_poke_rnd_lengths(); error_addr[63:32] = read_data; tb.peek_ocl(.addr(`RD_ERR_INDEX), .data(read_data)); error_index = read_data[3:0]; - $display("[%t] : *** ERROR *** Read compare error from address 0x%016x, index 0x%1x", $realtime, error_addr, error_index); + $error("[%t] : *** ERROR *** Read compare error from address 0x%016x, index 0x%1x", $realtime, error_addr, error_index); error_count++; end end @@ -185,7 +185,7 @@ module test_peek_poke_rnd_lengths(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail || (tb.chk_prot_err_stat())) begin - $display("[%t] : *** TEST FAILED ***", $realtime); + $error("[%t] : *** TEST FAILED ***", $realtime); end else begin $display("[%t] : *** TEST PASSED ***", $realtime); end diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_wc.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_wc.sv index a9d61b38..99fc0f2c 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_wc.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_peek_poke_wc.sv @@ -123,7 +123,7 @@ module test_peek_poke_wc(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail || (tb.chk_prot_err_stat())) begin - $display("[%t] : *** TEST FAILED ***", $realtime); + $error("[%t] : *** TEST FAILED ***", $realtime); end else begin $display("[%t] : *** TEST PASSED ***", $realtime); end @@ -133,7 +133,7 @@ module test_peek_poke_wc(); task compare_data(logic [511:0] act_data, exp_data); if(act_data !== exp_data) begin - $display($time,,,"***ERROR*** : Data Mismatch. Actual Data:%0h <==> Expected Data: %0h", + $error($time,,,"***ERROR*** : Data Mismatch. Actual Data:%0h <==> Expected Data: %0h", act_data, exp_data); error_count ++; end @@ -143,7 +143,7 @@ module test_peek_poke_wc(); endtask // compare_data task disp_err (input string s); - $display($time,,,"***ERROR*** : %s", s); + $error($time,,,"***ERROR*** : %s", s); error_count ++; endtask // disp_err diff --git a/hdk/cl/examples/cl_dram_dma/verif/tests/test_sda.sv b/hdk/cl/examples/cl_dram_dma/verif/tests/test_sda.sv index c208a7db..4ab86cd9 100644 --- a/hdk/cl/examples/cl_dram_dma/verif/tests/test_sda.sv +++ b/hdk/cl/examples/cl_dram_dma/verif/tests/test_sda.sv @@ -58,7 +58,7 @@ module test_sda(); end while ((read_data[31:0] !== sda_data[31:0]) && (timeout_count < 1000)); // UNMATCHED !! if ((timeout_count == 1000) || (read_data[31:0] !== sda_data[31:0])) begin - $display("[%t] : *** ERROR *** Read data mismatch for sda exp_data %h act_data %h.", $realtime, sda_data, read_data); + $error("[%t] : *** ERROR *** Read data mismatch for sda exp_data %h act_data %h.", $realtime, sda_data, read_data); error_count++; end @@ -84,7 +84,7 @@ module test_sda(); $display("[%t] : Detected %3d errors during this test", $realtime, error_count); if (fail || (tb.chk_prot_err_stat())) begin - $display("[%t] : *** TEST FAILED ***", $realtime); + $error("[%t] : *** TEST FAILED ***", $realtime); end else begin $display("[%t] : *** TEST PASSED ***", $realtime); end diff --git a/hdk/cl/examples/cl_dram_dma_hlx/build/scripts/create_dcp_from_cl.tcl b/hdk/cl/examples/cl_dram_dma_hlx/build/scripts/create_dcp_from_cl.tcl index 90553f00..63445b80 100755 --- a/hdk/cl/examples/cl_dram_dma_hlx/build/scripts/create_dcp_from_cl.tcl +++ b/hdk/cl/examples/cl_dram_dma_hlx/build/scripts/create_dcp_from_cl.tcl @@ -37,6 +37,10 @@ if {[string compare $notify_via_sns "1"] == 0} { } } +# suppress warnings coming from Shell +set_msg_config -severity "CRITICAL WARNING" -string "WRAPPER_INST/SH" -suppress +set_msg_config -severity "WARNING" -string "WRAPPER_INST/SH" -suppress + ################################################# ## Create BD (Block Design) of example Hello World design ################################################# diff --git a/hdk/cl/examples/cl_hello_world/README.md b/hdk/cl/examples/cl_hello_world/README.md index fdf690af..213086ef 100644 --- a/hdk/cl/examples/cl_hello_world/README.md +++ b/hdk/cl/examples/cl_hello_world/README.md @@ -1,7 +1,7 @@ # Hello World CL Example -## :exclamation: NOTE: If this is your first time using F1, you should read [How To Create an Amazon FPGA Image (AFI) From One of The CL Examples: Step-by-Step Guide](./../../../README.md) first!! +## **⚠️ NOTE:** If this is your first time using F1, you should read [How To Create an Amazon FPGA Image (AFI) From One of The CL Examples: Step-by-Step Guide](./../../../README.md) first!! ## Table of Contents @@ -13,29 +13,24 @@ ## Overview -This simple *hello_world* example builds a Custom Logic (CL) that will enable the instance to "peek" and "poke" registers in the Custom Logic (CL). -These registers will be in the memory space behind AppPF BAR0, which is the ocl\_cl\_ AXI-lite bus on the Shell to CL interface. - +This *hello_world* example builds a Custom Logic (CL) that will enable the instance to "peek" and "poke" registers in the Custom Logic (CL). This example demonstrate a basic use-case of the Virtual LED and Virtual DIP switches. -All of the unused interfaces between AWS Shell and the CL are tied to fixed values, and it is recommended that the developer use similar values for every unused interface in the developer's CL. - - ## Functional Description -The cl_hello_world example demonstrates basic Shell-to-CL connectivity, memory-mapped register instantiations and the use of the Virtual LED and DIP switches. The cl_hello_world example implements two registers in the FPGA AppPF BAR0 memory space connected to the OCL AXI-L interface. The two registers are: +The cl_hello_world example demonstrates basic Shell-to-CL connectivity, memory-mapped register instantiations and the use of the Virtual LED and DIP switches. +The cl_hello_world example implements two registers in the [FPGA AppPF BAR0 memory space](../../../docs/AWS_Fpga_Pcie_Memory_Map.md) connected to the OCL AXI-L interface. +The two registers are: 1. Hello World Register (offset 0x500) 2. Virtual LED Register (offset 0x504) -Please refer to the [FPGA PCIe memory space overview](../../../docs/AWS_Fpga_Pcie_Memory_Map.md) - The Hello World Register is a 32-bit read/write register. However, in order to demonstrate that the register is being accessed correctly, the read data returned for the register will be byte swapped. The Virtual LED register is a 16-bit read-only register that shadows the lower 16 bits of the Hello World Register such that it will hold the same value as bits 15:0 of the Hello World Register. -The cl_hello_world design utilizes the Virtual LED and DIP switch interface which consistes of two signals described in the [cl_ports.vh] (./../../../common/shell_stable/design/interfaces/cl_ports.vh) file: +The cl_hello_world design utilizes the Virtual LED and DIP switch interface which consists of two signals described in the [cl_ports.vh](./../../../common/shell_stable/design/interfaces/cl_ports.vh) file: ``` @@ -43,9 +38,12 @@ The cl_hello_world design utilizes the Virtual LED and DIP switch interface whic output logic[15:0] cl_sh_status_vled, //Virtual LEDs, monitored through FPGA management PF and tools ``` -In this example the Virtual LED Register is used to drive the Virtual LED signal, cl_sh_status_vled. In addition, the Virtual DIP switch, sh_cl_status_vdip, is used to gate the Virtual LED Register value sent to the Virtual LEDs. So, for example, if the sh_cl_status_vdip is set to 16'h00FF, then only the lower 8 bits of the Virtual LED Register will be signaled on the Virtual LED signal cl_sh_status_vled. +In this example the Virtual LED Register is used to drive the Virtual LED signal, cl_sh_status_vled. +In addition, the Virtual DIP switch, sh_cl_status_vdip, is used to gate the Virtual LED Register value sent to the Virtual LEDs. +So, for example, if the sh_cl_status_vdip is set to 16'h00FF, then only the lower 8 bits of the Virtual LED Register will be signaled on the Virtual LED signal cl_sh_status_vled. -While running on F1, the developer can use the FPGA tools `fpga-get-virtual-led` to read the LED values on the CL-to-Shell interface. While `fpga-set-virtual-dip-switch` tool is used to set the DIP switch values on the Shell-to-CL interface. +While running on F1, the developer can use the FPGA tools `fpga-get-virtual-led` to read the LED values on the CL-to-Shell interface. +While `fpga-set-virtual-dip-switch` tool is used to set the DIP switch values on the Shell-to-CL interface. ### Unused interfaces diff --git a/hdk/cl/examples/cl_hello_world/build/scripts/create_dcp_from_cl.tcl b/hdk/cl/examples/cl_hello_world/build/scripts/create_dcp_from_cl.tcl index 8f7b975c..d36310e6 100644 --- a/hdk/cl/examples/cl_hello_world/build/scripts/create_dcp_from_cl.tcl +++ b/hdk/cl/examples/cl_hello_world/build/scripts/create_dcp_from_cl.tcl @@ -39,7 +39,7 @@ set uram_option [lindex $argv 11] set notify_via_sns [lindex $argv 12] set VDEFINES [lindex $argv 13] ################################################## -## Flow control variables +## Flow control variables ################################################## set cl.synth 1 set implement 1 @@ -134,6 +134,9 @@ set_msg_config -id {Synth 8-3848} -suppress set_msg_config -id {Synth 8-3917} -suppress set_msg_config -id {Opt 31-430} -suppress +set_msg_config -severity "CRITICAL WARNING" -string "WRAPPER_INST/SH" -suppress +set_msg_config -severity "WARNING" -string "WRAPPER_INST/SH" -suppress + puts "AWS FPGA: ([clock format [clock seconds] -format %T]) Calling the encrypt.tcl."; # Check that an email address has been set, else unset notify_via_sns @@ -148,7 +151,7 @@ if {[string compare $notify_via_sns "1"] == 0} { } ################################################## -### Strategy options +### Strategy options ################################################## switch $strategy { "BASIC" { @@ -187,14 +190,14 @@ source $HDK_SHELL_DIR/build/scripts/device_type.tcl source $HDK_SHELL_DIR/build/scripts/step_user.tcl -notrace ######################################## -## Generate clocks based on Recipe +## Generate clocks based on Recipe ######################################## puts "AWS FPGA: ([clock format [clock seconds] -format %T]) Calling aws_gen_clk_constraints.tcl to generate clock constraints from developer's specified recipe."; source $HDK_SHELL_DIR/build/scripts/aws_gen_clk_constraints.tcl ################################################################# -#### Do not remove this setting. Need to workaround bug +#### Do not remove this setting. Need to workaround bug ################################################################## set_param hd.clockRoutingWireReduction false ################################################## @@ -236,7 +239,7 @@ if {$implement} { # Apply Clock Properties for Clock Table Recipes ################################################## puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Sourcing aws_clock_properties.tcl to apply properties to clocks. "; - + # Apply properties to clocks source $HDK_SHELL_DIR/build/scripts/aws_clock_properties.tcl @@ -365,5 +368,3 @@ if {[string compare $notify_via_sns "1"] == 0} { } puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Build complete."; - - diff --git a/hdk/cl/examples/cl_hello_world/build/scripts/synth_cl_hello_world.tcl b/hdk/cl/examples/cl_hello_world/build/scripts/synth_cl_hello_world.tcl index f5819636..8d1d5c04 100644 --- a/hdk/cl/examples/cl_hello_world/build/scripts/synth_cl_hello_world.tcl +++ b/hdk/cl/examples/cl_hello_world/build/scripts/synth_cl_hello_world.tcl @@ -21,7 +21,7 @@ set VDEFINES $VDEFINES create_project -in_memory -part [DEVICE_TYPE] -force ######################################## -## Generate clocks based on Recipe +## Generate clocks based on Recipe ######################################## puts "AWS FPGA: ([clock format [clock seconds] -format %T]) Calling aws_gen_clk_constraints.tcl to generate clock constraints from developer's specified recipe."; diff --git a/hdk/cl/examples/cl_hello_world/software/runtime/test_hello_world.c b/hdk/cl/examples/cl_hello_world/software/runtime/test_hello_world.c index 1471055b..6bd8e358 100644 --- a/hdk/cl/examples/cl_hello_world/software/runtime/test_hello_world.c +++ b/hdk/cl/examples/cl_hello_world/software/runtime/test_hello_world.c @@ -52,10 +52,6 @@ static uint16_t pci_device_id = 0xF000; /* PCI Device ID preassigned by Amazon f * check if the corresponding AFI for hello_world is loaded */ int check_afi_ready(int slot_id); -/* - * An example to attach to an arbitrary slot, pf, and bar with register access. - */ -int peek_poke_example(uint32_t value, int slot_id, int pf_id, int bar_id); void usage(char* program_name) { printf("usage: %s [--slot ][]\n", program_name); @@ -65,6 +61,11 @@ uint32_t byte_swap(uint32_t value); #endif +/* + * An example to attach to an arbitrary slot, pf, and bar with register access. + */ +int peek_poke_example(uint32_t value, int slot_id, int pf_id, int bar_id); + uint32_t byte_swap(uint32_t value) { uint32_t swapped_value = 0; int b; @@ -76,14 +77,15 @@ uint32_t byte_swap(uint32_t value) { #ifdef SV_TEST //For cadence and questa simulators the main has to return some value - #ifdef INT_MAIN - int test_main(uint32_t *exit_code) { - #else - void test_main(uint32_t *exit_code) { - #endif +# ifdef INT_MAIN +int test_main(uint32_t *exit_code) +# else +void test_main(uint32_t *exit_code) +# endif #else - int main(int argc, char **argv) { +int main(int argc, char **argv) #endif +{ //The statements within SCOPE ifdef below are needed for HW/SW co-simulation with VCS #ifdef SCOPE svScope scope; @@ -121,9 +123,9 @@ uint32_t byte_swap(uint32_t value) { } #endif - /* initialize the fpga_pci library so we could have access to FPGA PCIe from this applications */ - rc = fpga_pci_init(); - fail_on(rc, out, "Unable to initialize the fpga_pci library"); + /* initialize the fpga_mgmt library */ + rc = fpga_mgmt_init(); + fail_on(rc, out, "Unable to initialize the fpga_mgmt library"); #ifndef SV_TEST rc = check_afi_ready(slot_id); @@ -172,7 +174,7 @@ uint32_t byte_swap(uint32_t value) { /* As HW simulation test is not run on a AFI, the below function is not valid */ #ifndef SV_TEST - int check_afi_ready(int slot_id) { +int check_afi_ready(int slot_id) { struct fpga_mgmt_image_info info = {0}; int rc; @@ -218,7 +220,7 @@ uint32_t byte_swap(uint32_t value) { return rc; out: return 1; - } +} #endif diff --git a/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile b/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile index 0531c878..6347104a 100644 --- a/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile +++ b/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile @@ -22,7 +22,8 @@ endif export TEST ?= test_null export C_TEST ?= test_null -export CL_ROOT = $(PWD)/../.. +export SCRIPTS_DIR = $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +export CL_ROOT = $(realpath $(SCRIPTS_DIR)/../..) export SDK_DIR = $(AWS_FPGA_REPO_DIR)/sdk export C_COMMON_DIR = $(HDK_COMMON_DIR)/software @@ -47,15 +48,13 @@ else export SIM_DIR = $(SIM_ROOT)/$(C_TEST)_c endif - -export SCRIPTS_DIR = $(PWD) export XILINX_IP = $(HDK_SHELL_DESIGN_DIR)/ip export SH_LIB_DIR = $(HDK_SHELL_DESIGN_DIR)/lib export SH_INF_DIR = $(HDK_SHELL_DESIGN_DIR)/interfaces export SH_SH_DIR = $(HDK_SHELL_DESIGN_DIR)/sh_ddr/sim SV_TEST_LIST = test_hello_world -C_FILES = $(C_TEST_NAME) $(C_SDK_USR_UTILS_DIR)/sh_dpi_tasks.c $(C_COMMON_DIR)/src/fpga_pci_sv.c +C_FILES = $(C_TEST_NAME) $(C_SDK_USR_UTILS_DIR)/sh_dpi_tasks.c $(C_COMMON_DIR)/src/fpga_pci_sv.c ifeq ($(XCHK), 1) all: make_sim_dir compile_chk run diff --git a/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile.ies b/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile.ies index dec3281b..12842ce3 100644 --- a/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile.ies +++ b/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile.ies @@ -39,5 +39,5 @@ endif $(COMPLIB_DIR): cd $(SIM_ROOT) && echo "compile_simlib -language all -dir $(COMPLIB_DIR) -simulator $(SIMULATOR) -library all -family all" > create_libs.tcl - cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl + -cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl cd $(SIM_ROOT) && rm -rf create_libs.tcl diff --git a/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile.questa b/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile.questa index 206aa5f0..188155df 100644 --- a/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile.questa +++ b/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile.questa @@ -17,41 +17,30 @@ ################################################################## ## Makefile For Questa compiles and simulations ## Step to run : -## 1. make create_libs QUESTA=1 -> To generate xilinx compile +## 1. make create_libs QUESTA=1 -> To generate xilinx compile ## libraries. This is a one time step ## 2. make all QUESTA=1 -> Runs the test ################################################################## +LIBLISTS = $(COMMON_LIBLISTS) +LIBLISTS_ARGS = $(shell echo " $(strip $(LIBLISTS))" | sed 's|\ | -L |g') + compile: $(COMPLIB_DIR) mkdir -p $(SIM_DIR) - cd ${SIM_DIR} && ln -s -f ../questa_complib/modelsim.ini - cd $(SIM_DIR) && vlog $(C_FILES) -ccflags "-I$(C_SDK_USR_INC_DIR)" -ccflags "-I$(C_SDK_USR_UTILS_DIR)" -ccflags "-I$(C_COMMON_DIR)/include" -ccflags "-I$(C_COMMON_DIR)/src" -ccflags "-DSV_TEST" -ccflags "-DSCOPE" -ccflags "-DQUESTA_SIM" -ccflags "-DINT_MAIN" -ccflags "-I$(C_INC_DIR)" + cd ${SIM_DIR} && ln -s -f ../questa_complib/modelsim.ini + cd $(SIM_DIR) && vlog $(C_FILES) -ccflags "-I$(C_SDK_USR_INC_DIR)" -ccflags "-I$(C_SDK_USR_UTILS_DIR)" -ccflags "-I$(C_COMMON_DIR)/include" -ccflags "-I$(C_COMMON_DIR)/src" -ccflags "-DSV_TEST" -ccflags "-DSCOPE" -ccflags "-DQUESTA_SIM" -ccflags "-DINT_MAIN" -ccflags "-I$(C_INC_DIR)" cd $(SIM_DIR) && vlog -mfcu -sv -64 -timescale 1ps/1ps -93 -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/secureip -f $(SCRIPTS_DIR)/top.$(SIMULATOR).f run: -ifeq ($(VIVADO_TOOL_VERSION), v2017.4) ifeq ($(TEST),test_null) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_15 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_16 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_14 -L $(COMPLIB_DIR)/fifo_generator_v13_2_1 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_14 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) + cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random $(LIBLISTS_ARGS) -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) else - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_15 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_16 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_14 -L $(COMPLIB_DIR)/fifo_generator_v13_2_1 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_14 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) -endif -else ifeq ($(VIVADO_TOOL_VERSION), v2018.3) -ifeq ($(TEST),test_null) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_18 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_19 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_17 -L $(COMPLIB_DIR)/fifo_generator_v13_2_3 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_17 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) -else - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_18 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_19 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_17 -L $(COMPLIB_DIR)/fifo_generator_v13_2_3 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_17 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) -endif -else -ifeq ($(TEST),test_null) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_17 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_18 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_16 -L $(COMPLIB_DIR)/fifo_generator_v13_2_2 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_16 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) -else - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_17 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_18 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_16 -L $(COMPLIB_DIR)/fifo_generator_v13_2_2 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_16 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) -endif + cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random $(LIBLISTS_ARGS) -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) endif $(COMPLIB_DIR): cd $(SIM_ROOT) && echo "compile_simlib -language all -dir $(COMPLIB_DIR) -simulator $(SIMULATOR) -library all -family all" > create_libs.tcl cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl - cd $(SIM_ROOT) && rm -rf create_libs.tcl + cd $(SIM_ROOT) && rm -rf create_libs.tcl diff --git a/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile.vcs b/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile.vcs index ed3f6487..9f3fbcd4 100644 --- a/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile.vcs +++ b/hdk/cl/examples/cl_hello_world/verif/scripts/Makefile.vcs @@ -26,7 +26,7 @@ compile: $(COMPLIB_DIR) mkdir -p $(SIM_DIR) cd ${SIM_DIR} && ln -s -f ../vcs_complib/synopsys_sim.setup cd $(SIM_DIR) && vlogan -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog +systemverilogext+.sv +libext+.sv +libext+.v -full64 -lca -v2005 +v2k -l compile.vlogan.log -f $(SCRIPTS_DIR)/top.$(SIMULATOR).f +define+VCS $(DEFINES) +lint=TFIPC-L - cd $(SIM_DIR) && vcs tb $(TEST) $(C_FILES) -CFLAGS "-I$(C_SDK_USR_INC_DIR)" -CFLAGS "-I$(C_SDK_USR_UTILS_DIR)" -CFLAGS "-I$(C_COMMON_DIR)/include" -CFLAGS "-I$(C_COMMON_DIR)/src" -CFLAGS "-DSV_TEST" -CFLAGS "-DSCOPE" -CFLAGS "-I$(C_INC_DIR)" -debug_all -M -I +lint=TFIPC-L -debug_pp glbl -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog -full64 +memcbk -licqueue -lca -v2005 -l compile.vcs.log + cd $(SIM_DIR) && vcs tb $(TEST) $(C_FILES) -CFLAGS "-I$(C_SDK_USR_INC_DIR)" -CFLAGS "-I$(C_SDK_USR_UTILS_DIR)" -CFLAGS "-I$(C_COMMON_DIR)/include" -CFLAGS "-I$(C_COMMON_DIR)/src" -CFLAGS "-DSV_TEST" -CFLAGS "-DSCOPE" -CFLAGS "-I$(C_INC_DIR)" -debug_all -M +lint=TFIPC-L -debug_pp glbl -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog -full64 +memcbk -licqueue -lca -v2005 -l compile.vcs.log run: @@ -38,5 +38,5 @@ endif $(COMPLIB_DIR): cd $(SIM_ROOT) && echo "compile_simlib -language all -dir $(COMPLIB_DIR) -simulator $(SIMULATOR) -library all -family all" > create_libs.tcl - cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl + -cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl cd $(SIM_ROOT) && rm -rf create_libs.tcl \ No newline at end of file diff --git a/hdk/cl/examples/cl_hello_world/verif/tests/test_gl_cntr.sv b/hdk/cl/examples/cl_hello_world/verif/tests/test_gl_cntr.sv index 2f218188..0c7ee941 100644 --- a/hdk/cl/examples/cl_hello_world/verif/tests/test_gl_cntr.sv +++ b/hdk/cl/examples/cl_hello_world/verif/tests/test_gl_cntr.sv @@ -13,7 +13,10 @@ // implied. See the License for the specific language governing permissions and // limitations under the License. - +//------------------------------------------------------------------------------- +// Description: This test is a heartbeat check and checks the global counters values +// before and after a poke & peek operation on CL register. +//-------------------------------------------------------------------------------- module test_gl_cntr(); import tb_type_defines_pkg::*; @@ -37,6 +40,7 @@ import tb_type_defines_pkg::*; $display ("Global counter 0 value before poke is 0x%x \n", glcntr0); $display ("Global counter 1 value before poke is 0x%x \n", glcntr1); +// write to cl register. when read back we should see byte swap on this register. tb.poke(.addr(`HELLO_WORLD_REG_ADDR), .data(32'hDEAD_BEEF), .id(AXI_ID), .size(DataSize::UINT16), .intf(AxiPort::PORT_OCL)); // write register glcntr0 = tb.get_global_counter_0(); @@ -54,13 +58,17 @@ import tb_type_defines_pkg::*; tb.peek(.addr(`HELLO_WORLD_REG_ADDR), .data(rdata), .id(AXI_ID), .size(DataSize::UINT16), .intf(AxiPort::PORT_OCL)); // start read & write $display ("Reading 0x%x from address 0x%x", rdata, `HELLO_WORLD_REG_ADDR); + if (rdata == 32'hEFBE_ADDE) // Check for byte swap in register read + $display ("TEST PASSED"); + else + $error ("TEST FAILED"); + glcntr0 = tb.get_global_counter_0(); glcntr1 = tb.get_global_counter_1(); $display ("Global counter 0 value after peek is 0x%x \n", glcntr0); $display ("Global counter 1 value after peek is 0x%x \n", glcntr1); - $display ("*** TEST PASSED ***"); $finish; end // initial begin diff --git a/hdk/cl/examples/cl_hello_world/verif/tests/test_hello_world.sv b/hdk/cl/examples/cl_hello_world/verif/tests/test_hello_world.sv index 6401b30b..dd0f749b 100644 --- a/hdk/cl/examples/cl_hello_world/verif/tests/test_hello_world.sv +++ b/hdk/cl/examples/cl_hello_world/verif/tests/test_hello_world.sv @@ -13,6 +13,10 @@ // implied. See the License for the specific language governing permissions and // limitations under the License. +//------------------------------------------------------------------------------ +// Description: This test checks the byte swap feature of the hello_world CL. It also checks +// if the upper word of the CL register is written to Vdip +//------------------------------------------------------------------------------- module test_hello_world(); @@ -46,7 +50,7 @@ logic [15:0] vled_value; if (rdata == 32'hEFBE_ADDE) // Check for byte swap in register read $display ("TEST PASSED"); else - $display ("TEST FAILED"); + $error ("TEST FAILED"); tb.peek_ocl(.addr(`VLED_REG_ADDR), .data(rdata)); // start read $display ("Reading 0x%x from address 0x%x", rdata, `VLED_REG_ADDR); @@ -54,7 +58,7 @@ logic [15:0] vled_value; if (rdata == 32'h0000_BEEF) // Check for LED register read $display ("*** TEST PASSED ***"); else - $display ("*** TEST FAILED ***"); + $error ("*** TEST FAILED ***"); vled_value = tb.get_virtual_led(); diff --git a/hdk/cl/examples/cl_hello_world_hlx/build/scripts/create_dcp_from_cl.tcl b/hdk/cl/examples/cl_hello_world_hlx/build/scripts/create_dcp_from_cl.tcl index af16971c..06938eee 100755 --- a/hdk/cl/examples/cl_hello_world_hlx/build/scripts/create_dcp_from_cl.tcl +++ b/hdk/cl/examples/cl_hello_world_hlx/build/scripts/create_dcp_from_cl.tcl @@ -37,6 +37,10 @@ if {[string compare $notify_via_sns "1"] == 0} { } } +# suppress warnings coming from Shell +set_msg_config -severity "CRITICAL WARNING" -string "WRAPPER_INST/SH" -suppress +set_msg_config -severity "WARNING" -string "WRAPPER_INST/SH" -suppress + ################################################# ## Create BD (Block Design) of example Hello World design ################################################# diff --git a/hdk/cl/examples/cl_hello_world_ref_hlx/build/scripts/create_dcp_from_cl.tcl b/hdk/cl/examples/cl_hello_world_ref_hlx/build/scripts/create_dcp_from_cl.tcl index 385782dc..99ff2a68 100755 --- a/hdk/cl/examples/cl_hello_world_ref_hlx/build/scripts/create_dcp_from_cl.tcl +++ b/hdk/cl/examples/cl_hello_world_ref_hlx/build/scripts/create_dcp_from_cl.tcl @@ -29,6 +29,10 @@ if {[string compare $notify_via_sns "1"] == 0} { } } +# suppress warnings coming from Shell +set_msg_config -severity "CRITICAL WARNING" -string "WRAPPER_INST/SH" -suppress +set_msg_config -severity "WARNING" -string "WRAPPER_INST/SH" -suppress + ################################################# ## Create BD (Block Design) of example Hello World design ################################################# diff --git a/hdk/cl/examples/cl_hello_world_vhdl/README.md b/hdk/cl/examples/cl_hello_world_vhdl/README.md index 1722a67c..76e8158b 100644 --- a/hdk/cl/examples/cl_hello_world_vhdl/README.md +++ b/hdk/cl/examples/cl_hello_world_vhdl/README.md @@ -10,14 +10,9 @@ ## Overview -The purpose of this example is to provide an environment for VHDL users which uses the hello_world example. -This hello_world_vhdl example is based upon the main hello_world example except for a VHDL wrapper is provided for VHDL users. -This design can be modified to include or exclude certain interfaces for VHDL logic and mean't to be modified for VHDL designs/users. -Unused interfaces interfaces between AWS Shell and the CL are automatically tied off based upon `define in cl_hello_world_defines.vh. - -This simple *hello_world* example builds a Custom Logic (CL) that will enable the instance to "peek" and "poke" registers in the Custom Logic (C). These registers will be in the memory space behind AppPF BAR0, which is the ocl\_cl\_ AXI-lite bus on the Shell to CL interface. - -This example demonstrate a basic use-case of the Virtual LED and Virtual DIP switches. +The purpose of this example is to provide an environment for VHDL users which uses the `hello_world` example. +This `hello_world_vhdl` example is based upon the main `hello_world` example except for a VHDL wrapper is provided for VHDL users. +This design can be modified to include or exclude certain interfaces for VHDL logic. Please read here for [general instructions to build the CL, register an AFI, and start using it on an F1 instance](./../../../README.md). @@ -25,20 +20,20 @@ Please read here for [general instructions to build the CL, register an AFI, and ## Functional Description -The cl_hello_world example demonstrates basic Shell-to-CL connectivity, memory-mapped register instantiations and the use of the Virtual LED and DIP switches. The cl_hello_world example implements two registers in the FPGA AppPF BAR0 memory space connected to the OCL AXI-L interface. The two registers are: +The cl_hello_world example demonstrates basic Shell-to-CL connectivity, memory-mapped register instantiations and the use of the Virtual LED and DIP switches. +The cl_hello_world example implements two registers in the [FPGA AppPF BAR0 memory space](../../../docs/AWS_Fpga_Pcie_Memory_Map.md) connected to the OCL AXI-L interface. +The two registers are: 1. Hello World Register (offset 0x500) 2. Virtual LED Register (offset 0x504) -Please refer to the [FPGA PCIe memory space overview](../../../docs/AWS_Fpga_Pcie_Memory_Map.md) - -The Hello World logic is incorporated into a verilog module and called out in the VHDL wrapper. However, the debug logic is written in the VHDL wrapper. +The Hello World logic is incorporated into a verilog module and called out in the VHDL wrapper. However, the debug logic is written in the VHDL wrapper. The Hello World Register is a 32-bit read/write register. However, in order to demonstrate that the register is being accessed correctly, the read data returned for the register will be byte swapped. The Virtual LED register is a 16-bit read-only register that shadows the lower 16 bits of the Hello World Register such that it will hold the same value as bits 15:0 of the Hello World Register. -The cl_hello_world design utilizes the Virtual LED and DIP switch interface which consistes of two signals described in the [cl_ports.vh] (./../../../common/shell_stable/design/interfaces/cl_ports.vh) file: +The cl_hello_world design utilizes the Virtual LED and DIP switch interface which consists of two signals described in the [cl_ports.vh](./../../../common/shell_stable/design/interfaces/cl_ports.vh) file: ``` @@ -46,11 +41,17 @@ The cl_hello_world design utilizes the Virtual LED and DIP switch interface whic output logic[15:0] cl_sh_status_vled, //Virtual LEDs, monitored through FPGA management PF and tools ``` -In this example the Virtual LED Register is used to drive the Virtual LED signal, cl_sh_status_vled. In addition, the Virtual DIP switch, sh_cl_status_vdip, is used to gate the Virtual LED Register value sent to the Virtual LEDs. So, for example, if the sh_cl_status_vdip is set to 16'h00FF, then only the lower 8 bits of the Virtual LED Register will be signaled on the Virtual LED signal cl_sh_status_vled. +In this example the Virtual LED Register is used to drive the Virtual LED signal, cl_sh_status_vled. +In addition, the Virtual DIP switch, sh_cl_status_vdip, is used to gate the Virtual LED Register value sent to the Virtual LEDs. +So, for example, if the sh_cl_status_vdip is set to 16'h00FF, then only the lower 8 bits of the Virtual LED Register will be signaled on the Virtual LED signal cl_sh_status_vled. + +While running on F1, the developer can use the FPGA tools `fpga-get-virtual-led` to read the LED values on the CL-to-Shell interface. While `fpga-set-virtual-dip-switch` tool is used to set the DIP switch values on the Shell-to-CL interface. + +### Unused interfaces -While running on F1, the developer can use the FPGA tools `fpga-get-virtual-led` to read the LED values on the CL-to-Shell interface. While `fpga-set-virtual-dip-switch` tool is used to set the DIP switch values on the Shell-to-CL interface. +The Hello World example does not use most of AWS Shell interface, hence the unused signals are tied off. +At the end of `cl_hello_world.sv` file, there is a specific `include` command for an interface-specific `.inc` file, to handle the tie-off\'s for every unused interface. - ### VHDL Wrapper Information Clock/Reset/General Information @@ -61,9 +62,9 @@ MISC Interfaces are not added in wrappers (Interrupts). PCIM hasn't been fully tested in the VHDL flow. Use at your own risk but provide feedback if used. -Below is the hiearchy of the design. +Below is the hierarchy of the design. -cl_hello_world.sv - This module uses `define that are configured in cl_hello_world_defines.sv and ensure to tie off signals to the SH when necessary for seamless usage of the different flows (VHDL Flow this file shouldn't be modified) +cl_hello_world.sv - This module uses `define that are configured in cl_hello_world_defines.sv and ensure to tie off signals to the SH when necessary for seamless usage of the different flows (VHDL Flow this file shouldn't be modified) cl_hello_world_defines.sv - Comment out AXI Interfaces that are not used (AXIL_OCL, AXIL_USR, AXIL_SDA, DMA_PCIS, DDR4_SH, DDR4_CL, PCIM). -cl_vhdl_wrapper.vhd - VHDL users are encouraged to modify this wrapper based upon design requirements. VHDL Wrapper flow Can use generate statements to connect signals from top level ports when AXI Interfaces are used. Not required to use these generates statements but makes code more cleaner. This file currently connects the hello_world module for OCL AXI interface and VLED and VDIP logic and contains debug logic. @@ -79,11 +80,11 @@ Alternatively, you can directly use a pre-generated AFI for this CL. | Key | Value | |-----------|------| -| Shell Version | 0x04151701 | +| Shell Version | 0x04261818 | | PCI Device ID | 0xF000 | | PCI Vendor ID | 0x1D0F (Amazon) | | PCI Subsystem ID | 0x1D51 | | PCI Subsystem Vendor ID | 0xFEDD | -| Pre-generated AFI ID | afi-0f0927bc2649e6259 | -| Pre-generated AGFI ID | agfi-0f0e045f919413242 | +| Pre-generated AFI ID | afi-03d11a4ea66e883ef | +| Pre-generated AGFI ID | agfi-0fcf87119b8e97bf3 | diff --git a/hdk/cl/examples/cl_hello_world_vhdl/build/scripts/create_dcp_from_cl.tcl b/hdk/cl/examples/cl_hello_world_vhdl/build/scripts/create_dcp_from_cl.tcl index ad7a216c..75a28a7b 100644 --- a/hdk/cl/examples/cl_hello_world_vhdl/build/scripts/create_dcp_from_cl.tcl +++ b/hdk/cl/examples/cl_hello_world_vhdl/build/scripts/create_dcp_from_cl.tcl @@ -39,7 +39,7 @@ set uram_option [lindex $argv 11] set notify_via_sns [lindex $argv 12] set VDEFINES [lindex $argv 13] ################################################## -## Flow control variables +## Flow control variables ################################################## set cl.synth 1 set implement 1 @@ -133,6 +133,10 @@ set_msg_config -id {Synth 8-350} -suppress set_msg_config -id {Synth 8-3848} -suppress set_msg_config -id {Synth 8-3917} -suppress +# suppress warnings coming from Shell +set_msg_config -severity "CRITICAL WARNING" -string "WRAPPER_INST/SH" -suppress +set_msg_config -severity "WARNING" -string "WRAPPER_INST/SH" -suppress + puts "AWS FPGA: ([clock format [clock seconds] -format %T]) Calling the encrypt.tcl."; # Check that an email address has been set, else unset notify_via_sns @@ -147,7 +151,7 @@ if {[string compare $notify_via_sns "1"] == 0} { } ################################################## -### Strategy options +### Strategy options ################################################## switch $strategy { "BASIC" { @@ -186,14 +190,14 @@ source $HDK_SHELL_DIR/build/scripts/device_type.tcl source $HDK_SHELL_DIR/build/scripts/step_user.tcl -notrace ######################################## -## Generate clocks based on Recipe +## Generate clocks based on Recipe ######################################## puts "AWS FPGA: ([clock format [clock seconds] -format %T]) Calling aws_gen_clk_constraints.tcl to generate clock constraints from developer's specified recipe."; source $HDK_SHELL_DIR/build/scripts/aws_gen_clk_constraints.tcl ################################################################# -#### Do not remove this setting. Need to workaround bug +#### Do not remove this setting. Need to workaround bug ################################################################# set_param hd.clockRoutingWireReduction false @@ -235,7 +239,7 @@ if {$implement} { # Apply Clock Properties for Clock Table Recipes ################################################## puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Sourcing aws_clock_properties.tcl to apply properties to clocks. "; - + # Apply properties to clocks source $HDK_SHELL_DIR/build/scripts/aws_clock_properties.tcl @@ -359,5 +363,3 @@ if {[string compare $notify_via_sns "1"] == 0} { } puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Build complete."; - - diff --git a/hdk/cl/examples/cl_hello_world_vhdl/software/verif_rtl/src/test_hello_world.c b/hdk/cl/examples/cl_hello_world_vhdl/software/verif_rtl/src/test_hello_world.c index 3969f969..d612bc8f 100644 --- a/hdk/cl/examples/cl_hello_world_vhdl/software/verif_rtl/src/test_hello_world.c +++ b/hdk/cl/examples/cl_hello_world_vhdl/software/verif_rtl/src/test_hello_world.c @@ -26,7 +26,7 @@ #include "sh_dpi_tasks.h" -#define HELLO_WORLD_REG_ADDR UINT64_C(0x00) +#define HELLO_WORLD_REG_ADDR UINT64_C(0x500) void test_main(uint32_t *exit_code) { diff --git a/hdk/cl/examples/cl_hello_world_vhdl/verif/scripts/Makefile b/hdk/cl/examples/cl_hello_world_vhdl/verif/scripts/Makefile index 76a16792..adb03a8e 100644 --- a/hdk/cl/examples/cl_hello_world_vhdl/verif/scripts/Makefile +++ b/hdk/cl/examples/cl_hello_world_vhdl/verif/scripts/Makefile @@ -22,7 +22,8 @@ endif export TEST ?= test_null export C_TEST ?= test_null -export CL_ROOT = $(PWD)/../.. +export SCRIPTS_DIR = $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +export CL_ROOT = $(realpath $(SCRIPTS_DIR)/../..) export C_INC_DIR = $(CL_ROOT)/software/verif_rtl/include export C_SRC_DIR = $(CL_ROOT)/software/verif_rtl/src @@ -40,7 +41,6 @@ else endif -export SCRIPTS_DIR = $(PWD) export XILINX_IP = $(HDK_SHELL_DESIGN_DIR)/ip export SH_LIB_DIR = $(HDK_SHELL_DESIGN_DIR)/lib export SH_INF_DIR = $(HDK_SHELL_DESIGN_DIR)/interfaces diff --git a/hdk/cl/examples/cl_hello_world_vhdl/verif/scripts/Makefile.questa b/hdk/cl/examples/cl_hello_world_vhdl/verif/scripts/Makefile.questa index 1307e4e4..19c18b94 100644 --- a/hdk/cl/examples/cl_hello_world_vhdl/verif/scripts/Makefile.questa +++ b/hdk/cl/examples/cl_hello_world_vhdl/verif/scripts/Makefile.questa @@ -22,22 +22,18 @@ ## 2. make all QUESTA=1 -> Runs the test ################################################################## +LIBLISTS = $(COMMON_LIBLISTS) +LIBLISTS_ARGS = $(shell echo " $(strip $(LIBLISTS))" | sed 's|\ | -L |g') + compile: $(COMPLIB_DIR) mkdir -p $(SIM_DIR) cd ${SIM_DIR} && ln -s -f ../questa_complib/modelsim.ini cd $(SIM_DIR) && vlog $(C_TEST_NAME) -ccflags "-I$(C_INC_DIR)" - cd $(SIM_DIR) && vcom -work work -64 -93 -f $(SCRIPTS_DIR)/top_vhdl.$(SIMULATOR).f + cd $(SIM_DIR) && vcom -work work -64 -93 -f $(SCRIPTS_DIR)/top_vhdl.$(SIMULATOR).f cd $(SIM_DIR) && vlog -mfcu -sv -64 -timescale 1ps/1ps -93 -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/secureip -f $(SCRIPTS_DIR)/top.$(SIMULATOR).f run: -ifeq ($(VIVADO_TOOL_VERSION), v2017.4) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_15 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_16 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_14 -L $(COMPLIB_DIR)/fifo_generator_v13_2_1 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_14 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -do "run -all; quit -f" tb glbl $(TEST) -else ifeq ($(VIVADO_TOOL_VERSION), v2018.3) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_18 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_19 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_17 -L $(COMPLIB_DIR)/fifo_generator_v13_2_3 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_17 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -do "run -all; quit -f" tb glbl $(TEST) -else - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_17 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_18 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_16 -L $(COMPLIB_DIR)/fifo_generator_v13_2_2 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_16 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -do "run -all; quit -f" tb glbl $(TEST) -endif - + cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random $(LIBLISTS_ARGS) -do "run -all; quit -f" tb glbl $(TEST) $(COMPLIB_DIR): cd $(SIM_ROOT) && echo "compile_simlib -language all -dir $(COMPLIB_DIR) -simulator $(SIMULATOR) -library all -family all" > create_libs.tcl diff --git a/hdk/cl/examples/cl_hello_world_vhdl/verif/scripts/Makefile.vcs b/hdk/cl/examples/cl_hello_world_vhdl/verif/scripts/Makefile.vcs index 1e0d2200..b3c250dd 100644 --- a/hdk/cl/examples/cl_hello_world_vhdl/verif/scripts/Makefile.vcs +++ b/hdk/cl/examples/cl_hello_world_vhdl/verif/scripts/Makefile.vcs @@ -27,12 +27,12 @@ compile: $(COMPLIB_DIR) cd ${SIM_DIR} && ln -s -f ../vcs_complib/synopsys_sim.setup cd $(SIM_DIR) && vlogan -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog +systemverilogext+.sv +libext+.sv +libext+.v -full64 -lca -v2005 +v2k -l compile.vlogan.log -f $(SCRIPTS_DIR)/top.$(SIMULATOR).f +define+VCS $(DEFINES) +lint=TFIPC-L cd $(SIM_DIR) && vhdlan -full64 -l compile.vhdlan.log -f $(SCRIPTS_DIR)/top_vhdl.$(SIMULATOR).f - cd $(SIM_DIR) && vcs tb $(TEST) $(C_TEST_NAME) -CFLAGS "-I$(C_INC_DIR)" -debug_all -M -I +lint=TFIPC-L -debug_pp glbl -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog -full64 +memcbk -licqueue -lca -v2005 -l compile.vcs.log + cd $(SIM_DIR) && vcs tb $(TEST) $(C_TEST_NAME) -CFLAGS "-I$(C_INC_DIR)" -debug_all -M +lint=TFIPC-L -debug_pp glbl -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog -full64 +memcbk -licqueue -lca -v2005 -l compile.vcs.log run: cd $(SIM_DIR) && ./simv -l $(TEST).log $(PLUSARGS) +ntb_random_seed_automatic +vpdfile+$(TEST).vpd $(COMPLIB_DIR): cd $(SIM_ROOT) && echo "compile_simlib -language all -dir $(COMPLIB_DIR) -simulator $(SIMULATOR) -library all -family all" > create_libs.tcl - cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl + -cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl cd $(SIM_ROOT) && rm -rf create_libs.tcl diff --git a/hdk/cl/examples/cl_hls_dds_hlx/build/scripts/create_dcp_from_cl.tcl b/hdk/cl/examples/cl_hls_dds_hlx/build/scripts/create_dcp_from_cl.tcl index 43d7d6d4..7e942750 100755 --- a/hdk/cl/examples/cl_hls_dds_hlx/build/scripts/create_dcp_from_cl.tcl +++ b/hdk/cl/examples/cl_hls_dds_hlx/build/scripts/create_dcp_from_cl.tcl @@ -29,6 +29,10 @@ if {[string compare $notify_via_sns "1"] == 0} { } } +# suppress warnings coming from Shell +set_msg_config -severity "CRITICAL WARNING" -string "WRAPPER_INST/SH" -suppress +set_msg_config -severity "WARNING" -string "WRAPPER_INST/SH" -suppress + ################################################# ## Create BD (Block Design) of example Hello World design ################################################# diff --git a/hdk/cl/examples/cl_ipi_cdma_test_hlx/build/scripts/create_dcp_from_cl.tcl b/hdk/cl/examples/cl_ipi_cdma_test_hlx/build/scripts/create_dcp_from_cl.tcl index e146e23c..911f471d 100755 --- a/hdk/cl/examples/cl_ipi_cdma_test_hlx/build/scripts/create_dcp_from_cl.tcl +++ b/hdk/cl/examples/cl_ipi_cdma_test_hlx/build/scripts/create_dcp_from_cl.tcl @@ -29,6 +29,10 @@ if {[string compare $notify_via_sns "1"] == 0} { } } +# suppress warnings coming from Shell +set_msg_config -severity "CRITICAL WARNING" -string "WRAPPER_INST/SH" -suppress +set_msg_config -severity "WARNING" -string "WRAPPER_INST/SH" -suppress + ################################################# ## Create BD (Block Design) of example Hello World design ################################################# diff --git a/hdk/cl/examples/cl_sde/README.md b/hdk/cl/examples/cl_sde/README.md index db6f7525..bb9682ab 100644 --- a/hdk/cl/examples/cl_sde/README.md +++ b/hdk/cl/examples/cl_sde/README.md @@ -35,7 +35,7 @@ See [SDE HW Guide](../../../../sdk/apps/virtual-ethernet/doc/SDE_HW_Guide.md) fo ## Interfaces and Address Range ### Interfaces -CL_SDE uses two interfaces from the Shell. +CL_SDE uses three interfaces from the Shell. The PCIS interface is used to provide connectivity between the [Virtual Ethernet Application](../../../../sdk/apps/virtual-ethernet/doc/Virtual_Ethernet_Application_Guide.md) and the SDE. The OCL interface is used to provide connectivity between the host and all the test/control/utility blocks (except the SDE). The PCIM interfaces is used by the SDE to read and write to host memory. @@ -74,5 +74,5 @@ The following table displays information about the CL that is required to regist | PCI Vendor ID | 0x1D0F (Amazon) | | PCI Subsystem ID | 0x1D51 | | PCI Subsystem Vendor ID | 0xFEDC | -| Pre-generated AFI ID (N.Virginia:us-east-1) | afi-08fca33060fff4a62 | -| Pre-generated AGFI ID | agfi-0f4eca32dc6100729 | +| Pre-generated AFI ID (N.Virginia:us-east-1) | afi-030f5efdbdb03e186 | +| Pre-generated AGFI ID | agfi-0e6adf0cd6932d880 | diff --git a/hdk/cl/examples/cl_sde/build/scripts/create_dcp_from_cl.tcl b/hdk/cl/examples/cl_sde/build/scripts/create_dcp_from_cl.tcl index 1f16f71b..699332f1 100644 --- a/hdk/cl/examples/cl_sde/build/scripts/create_dcp_from_cl.tcl +++ b/hdk/cl/examples/cl_sde/build/scripts/create_dcp_from_cl.tcl @@ -19,7 +19,7 @@ package require tar set TOP top_sp ## Replace with the name of your module -set CL_MODULE cl_sde +set CL_MODULE cl_sde ################################################# ## Command-line Arguments @@ -39,7 +39,7 @@ set uram_option [lindex $argv 11] set notify_via_sns [lindex $argv 12] ################################################## -## Flow control variables +## Flow control variables ################################################## set cl.synth 1 set implement 1 @@ -142,6 +142,9 @@ set_msg_config -id {DRC CKLD-2} -suppress set_msg_config -id {DRC REQP-1853} -suppress set_msg_config -id {Timing 38-436} -suppress +set_msg_config -severity "CRITICAL WARNING" -string "WRAPPER_INST/SH" -suppress +set_msg_config -severity "WARNING" -string "WRAPPER_INST/SH" -suppress + puts "AWS FPGA: ([clock format [clock seconds] -format %T]) Calling the encrypt.tcl."; # Check that an email address has been set, else unset notify_via_sns @@ -156,7 +159,7 @@ if {[string compare $notify_via_sns "1"] == 0} { } ################################################## -### Strategy options +### Strategy options ################################################## switch $strategy { "BASIC" { @@ -195,7 +198,7 @@ source $HDK_SHELL_DIR/build/scripts/device_type.tcl source $HDK_SHELL_DIR/build/scripts/step_user.tcl -notrace ######################################## -## Generate clocks based on Recipe +## Generate clocks based on Recipe ######################################## puts "AWS FPGA: ([clock format [clock seconds] -format %T]) Calling aws_gen_clk_constraints.tcl to generate clock constraints from developer's specified recipe."; @@ -245,7 +248,7 @@ if {$implement} { # Apply Clock Properties for Clock Table Recipes ################################################## puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Sourcing aws_clock_properties.tcl to apply properties to clocks. "; - + # Apply properties to clocks source $HDK_SHELL_DIR/build/scripts/aws_clock_properties.tcl @@ -369,5 +372,3 @@ if {[string compare $notify_via_sns "1"] == 0} { } puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Build complete."; - - diff --git a/hdk/cl/examples/cl_sde/build/scripts/encrypt.tcl b/hdk/cl/examples/cl_sde/build/scripts/encrypt.tcl index 6e662da4..651018b1 100644 --- a/hdk/cl/examples/cl_sde/build/scripts/encrypt.tcl +++ b/hdk/cl/examples/cl_sde/build/scripts/encrypt.tcl @@ -68,7 +68,7 @@ file copy -force $CL_DIR/design/sde_desc.sv $TARGET_DIR file copy -force $CL_DIR/design/sde_pm.sv $TARGET_DIR file copy -force $CL_DIR/design/sde_ps_acc.sv $TARGET_DIR file copy -force $CL_DIR/design/sde_ps.sv $TARGET_DIR - +file copy -force $CL_DIR/design/cl_sde_defines.vh $TARGET_DIR file copy -force $CL_DIR/design/cl_id_defines.vh $TARGET_DIR file copy -force $CL_DIR/design/cl_pkt_tst.sv $TARGET_DIR file copy -force $CL_DIR/design/cl_tst.sv $TARGET_DIR diff --git a/hdk/cl/examples/cl_sde/design/cl_pkt_tst.sv b/hdk/cl/examples/cl_sde/design/cl_pkt_tst.sv index 923f9c12..1b8da105 100644 --- a/hdk/cl/examples/cl_sde/design/cl_pkt_tst.sv +++ b/hdk/cl/examples/cl_sde/design/cl_pkt_tst.sv @@ -79,18 +79,18 @@ module cl_pkt_tst #(parameter DATA_WIDTH = 512, // Should be atleast 32 ); - parameter DATA_DW = DATA_WIDTH / 32; + localparam DATA_DW = DATA_WIDTH / 32; `ifdef SIM // For simulation - parameter PREAMBLE_PKT_CNT = 32'hF; - parameter PREAMBLE_TKEEP = {{(TKEEP_WIDTH-4){1'b0}}, {4{1'b1}}}; + localparam PREAMBLE_PKT_CNT = 32'hF; + localparam PREAMBLE_TKEEP = {{(TKEEP_WIDTH-4){1'b0}}, {4{1'b1}}}; `else - parameter PREAMBLE_PKT_CNT = 32'hFF; - parameter PREAMBLE_TKEEP = {{(TKEEP_WIDTH-8){1'b0}}, {8{1'b1}}}; + localparam PREAMBLE_PKT_CNT = 32'hFF; + localparam PREAMBLE_TKEEP = {{(TKEEP_WIDTH-8){1'b0}}, {8{1'b1}}}; `endif - parameter TX_WAIT_CNT = 32'hF; - parameter RX_LOCK_CNT_MINUS1 = 32'h4; + localparam TX_WAIT_CNT = 32'hF; + localparam RX_LOCK_CNT_MINUS1 = 32'h4; typedef enum logic [2:0] {IDLE = 0, PREAMBLE = 1, diff --git a/hdk/cl/examples/cl_sde/design/cl_sde.sv b/hdk/cl/examples/cl_sde/design/cl_sde.sv index 2438d2ba..e5ebf42e 100644 --- a/hdk/cl/examples/cl_sde/design/cl_sde.sv +++ b/hdk/cl/examples/cl_sde/design/cl_sde.sv @@ -16,6 +16,7 @@ // CL Streaming +`include "cl_sde_defines.vh" module cl_sde @@ -210,6 +211,9 @@ module cl_sde logic [2:0] pre_sde_arid_q; logic [2:0] pre_sde_rid_q; + logic [9:0] float_bid ; + logic [9:0] float_rid ; + `include "unused_flr_template.inc" `include "unused_ddr_a_b_d_template.inc" `include "unused_ddr_c_template.inc" @@ -393,7 +397,7 @@ always @(posedge clk_main_a0) .aclk (clk_main_a0), .aresetn (rst_main_n_sync_bot_slr), - .s_axi_awid (sh_cl_dma_pcis_awid ), + .s_axi_awid ({10'b0, sh_cl_dma_pcis_awid} ), .s_axi_awaddr (sh_cl_dma_pcis_awaddr ), .s_axi_awlen (sh_cl_dma_pcis_awlen ), .s_axi_awsize (sh_cl_dma_pcis_awsize ), @@ -404,17 +408,17 @@ always @(posedge clk_main_a0) .s_axi_wlast (sh_cl_dma_pcis_wlast ), .s_axi_wvalid (sh_cl_dma_pcis_wvalid ), .s_axi_wready (cl_sh_dma_pcis_wready ), - .s_axi_bid (cl_sh_dma_pcis_bid ), + .s_axi_bid ({float_bid, cl_sh_dma_pcis_bid} ), .s_axi_bresp (cl_sh_dma_pcis_bresp ), .s_axi_bvalid (cl_sh_dma_pcis_bvalid ), .s_axi_bready (sh_cl_dma_pcis_bready ), - .s_axi_arid (sh_cl_dma_pcis_arid ), + .s_axi_arid ({10'b0, sh_cl_dma_pcis_arid} ), .s_axi_araddr (sh_cl_dma_pcis_araddr ), .s_axi_arlen (sh_cl_dma_pcis_arlen ), .s_axi_arsize (sh_cl_dma_pcis_arsize ), .s_axi_arvalid (sh_cl_dma_pcis_arvalid ), .s_axi_arready (cl_sh_dma_pcis_arready ), - .s_axi_rid (cl_sh_dma_pcis_rid ), + .s_axi_rid ({float_rid, cl_sh_dma_pcis_rid} ), .s_axi_rdata (cl_sh_dma_pcis_rdata ), .s_axi_rresp (cl_sh_dma_pcis_rresp ), .s_axi_rlast (cl_sh_dma_pcis_rlast ), @@ -653,6 +657,7 @@ sde #(.C2H_BUF_DEPTH(`C2H_BUF_DEPTH), //If simulation instantiate the Stream BFM `ifdef SIMULATION + stream_bfm STREAM_BFM (.clk (clk_main_a0), .rst_n (rst_main_n_sync), @@ -673,13 +678,14 @@ sde #(.C2H_BUF_DEPTH(`C2H_BUF_DEPTH), ); `else - logic bfm_h2c_axis_ready = 0; - logic bfm_c2h_axis_valid = 0; - logic [511:0] bfm_c2h_axis_data = 0; - logic [63:0] bfm_c2h_axis_keep = 0; - logic bfm_c2h_axis_last = 0; - logic [63:0] bfm_c2h_axis_user = 0; + assign bfm_h2c_axis_ready = 0; + + assign bfm_c2h_axis_valid = 0; + assign bfm_c2h_axis_data = 0; + assign bfm_c2h_axis_keep = 0; + assign bfm_c2h_axis_last = 0; + assign bfm_c2h_axis_user = 0; `endif //Instantiate the RTL Stream block diff --git a/hdk/cl/examples/cl_sde/design/cl_sde_defines.vh b/hdk/cl/examples/cl_sde/design/cl_sde_defines.vh index 46228c4f..923b784a 100644 --- a/hdk/cl/examples/cl_sde/design/cl_sde_defines.vh +++ b/hdk/cl/examples/cl_sde/design/cl_sde_defines.vh @@ -25,5 +25,6 @@ // Uncomment to disable Virtual JTAG //`define DISABLE_VJTAG_DEBUG +`define NO_SDE_DEBUG_ILA `endif diff --git a/hdk/cl/examples/cl_sde/design/cl_tst.sv b/hdk/cl/examples/cl_sde/design/cl_tst.sv index 3f23e563..dbc10afa 100644 --- a/hdk/cl/examples/cl_sde/design/cl_tst.sv +++ b/hdk/cl/examples/cl_sde/design/cl_tst.sv @@ -63,7 +63,7 @@ module cl_tst #(parameter DATA_WIDTH=512, parameter NUM_RD_TAG=512) ( output logic rready ); -parameter DATA_DW = DATA_WIDTH / 32; + localparam DATA_DW = DATA_WIDTH / 32; //-------------------------- // Internal signals @@ -157,7 +157,7 @@ always_ff @(negedge rst_n or posedge clk) // configuration //------------------------------------------- -//Offset 0x00: +//Offset 0x00: // 0 - Continuous mode - Keep looping through all the isntructions. // 1 - Incrementing loop data (every time through loop increment the start data) // 2 - PRBS mode (else incremeting). Data will be generated with PRBS. If not enabled, data will be incrementing per DW @@ -177,7 +177,7 @@ always_ff @(negedge rst_n or posedge clk) // 15:0 - Read Start -- This is not implemented (not sure we need this) // 31:0 - Max Write ahead -- This is not implemented (not sure we need this) //Offset 0x08: -// 0 - Write Go (read back write in progress) - Write this bit to start executing the write instructions. Reads back '1' while write instructions are in progress. +// 0 - Write Go (read back write in progress) - Write this bit to start executing the write instructions. Reads back '1' while write instructions are in progress. // 1 - Read Go (read back write in progress) - Write this bit to start executing the read instructions. Reads back '1' while read instructions are in progress. // 2 - Read response pending (read only). REad only, reads back '1' while read responses are pending. //Offset 0x0c: @@ -189,7 +189,7 @@ always_ff @(negedge rst_n or posedge clk) //Offset 0x14: // 3:0 - Max Read outstanding - Max number of read requests to issue (how many simultaneous read requests) // -// Offset 0x1c: Write Index - Write instruction Index +// Offset 0x1c: Write Index - Write instruction Index // Offset 0x20: Write address low - Write instruction address // Offset 0x24: Write address high - Write instruction address // Offset 0x28: Write data - Write instruction start data. All other data will be incrementing or PRBS @@ -353,7 +353,7 @@ always @(posedge clk) cfg_wr_stretch <= 0; cfg_rd_stretch <= 0; end - else + else begin cfg_wr_stretch <= cfg_wr || (cfg_wr_stretch && !tst_cfg_ack); cfg_rd_stretch <= cfg_rd || (cfg_rd_stretch && !tst_cfg_ack); @@ -458,28 +458,28 @@ always @(posedge clk) always @(posedge clk) begin case (cfg_addr_q) - 8'h0: tst_cfg_rdata <= {5'h0, cfg_inc_awid, cfg_const_data_mode, cfg_inc_id_mode, - 2'h0, cfg_rd_loop_addr_shift[5:0], - 2'h0, cfg_wr_loop_addr_shift[5:0], + 8'h0: tst_cfg_rdata <= {5'h0, cfg_inc_awid, cfg_const_data_mode, cfg_inc_id_mode, + 2'h0, cfg_rd_loop_addr_shift[5:0], + 2'h0, cfg_wr_loop_addr_shift[5:0], cfg_user_mode, cfg_loop_addr_mode, cfg_iter_mode, cfg_sync_mode, cfg_rd_compare_en, cfg_prbs_mode, cfg_inc_data_loop_mode, cfg_cont_mode}; 8'h4: tst_cfg_rdata <= {cfg_max_write, cfg_read_start}; 8'h8: tst_cfg_rdata <= {rd_resp_pend, rd_inp, wr_inp}; 8'hc: tst_cfg_rdata <= {wr_state[1:0], rd_tag_avail[15:0]}; 8'h10: tst_cfg_rdata <= {cfg_rd_num_inst, cfg_wr_num_inst}; 8'h14: tst_cfg_rdata <= {cfg_max_read_req}; - - 8'h1c: tst_cfg_rdata <= cfg_wr_inst_index; + + 8'h1c: tst_cfg_rdata <= cfg_wr_inst_index; 8'h20: tst_cfg_rdata <= wr_cfg_inst_rdata_q; - 8'h24: tst_cfg_rdata <= wr_cfg_inst_rdata_q >> 32; - 8'h28: tst_cfg_rdata <= wr_cfg_inst_rdata_q >> 64; + 8'h24: tst_cfg_rdata <= wr_cfg_inst_rdata_q >> 32; + 8'h28: tst_cfg_rdata <= wr_cfg_inst_rdata_q >> 64; 8'h2c: tst_cfg_rdata <= {wr_cfg_inst_rdata_q[127:96]}; 8'h30: tst_cfg_rdata <= {31'b0, cfg_atg_enable}; 8'h3c: tst_cfg_rdata <= cfg_rd_inst_index; 8'h40: tst_cfg_rdata <= rd_cfg_inst_rdata_q; - 8'h44: tst_cfg_rdata <= rd_cfg_inst_rdata_q >> 32; - 8'h48: tst_cfg_rdata <= rd_cfg_inst_rdata_q >> 64; + 8'h44: tst_cfg_rdata <= rd_cfg_inst_rdata_q >> 32; + 8'h48: tst_cfg_rdata <= rd_cfg_inst_rdata_q >> 64; 8'h4c: tst_cfg_rdata <= {rd_cfg_inst_rdata_q[127:96]}; 8'h60: tst_cfg_rdata <= cfg_rd_data_index; @@ -538,7 +538,7 @@ always_ff @(posedge clk) tst_cfg_ack <= 0; else tst_cfg_ack <= ((cfg_wr_stretch||cfg_rd_stretch) && !cfg_ram_access && !tst_cfg_ack) || - ((cfg_wr_stretch||cfg_rd_stretch) && cfg_ram_access && rd_cfg_read_ram_ack && !tst_cfg_ack); + ((cfg_wr_stretch||cfg_rd_stretch) && cfg_ram_access && rd_cfg_read_ram_ack && !tst_cfg_ack); //--------------------------------------- // Inst RAMs @@ -581,7 +581,7 @@ always @(posedge clk) //-------------------------------- -// Write state machine +// Write state machine //-------------------------------- logic[7:0] wr_running_length = 0; @@ -612,7 +612,7 @@ begin begin if (awready) wr_state_nxt = WR_DAT; - else + else wr_state_nxt = WR_ADDR; end @@ -665,7 +665,7 @@ always @(posedge clk) always @(posedge clk) if (cfg_wr_go) wr_cyc_count <= 0; - else if ((wr_state==WR_DAT) && (wr_state_nxt!=WR_DAT)) + else if (bvalid && bready) wr_cyc_count <= wr_cyc_count + 1; //Timer @@ -705,7 +705,7 @@ assign wr_loop_addr_adj = (cfg_loop_addr_mode)? wr_loop_count << cfg_wr_loop_add //assign awuser = (cfg_user_mode)? inst_wr_rdata[127:112]: (inst_wr_rdata[103:96]+1) * user_length_mult; //This is the number of DW to adjust -parameter ADJ_DW_WIDTH = (DATA_WIDTH==512)? 4: +localparam ADJ_DW_WIDTH = (DATA_WIDTH==512)? 4: (DATA_WIDTH==256)? 3: (DATA_WIDTH==128)? 2: 1; @@ -727,7 +727,7 @@ always_ff @( posedge clk) awlen <= inst_wr_rdata[103:96]; awuser <= (cfg_user_mode)? inst_wr_rdata[127:112]: ((inst_wr_rdata[103:96]+1) * user_length_mult) - wr_first_adj - inst_wr_rdata[104+:ADJ_DW_WIDTH]; end - else + else begin awid <= (cfg_inc_awid)? awid + 1: 0; awaddr <=0 ; @@ -760,7 +760,7 @@ logic[DATA_WIDTH-1:0] first_wdata = 0; //Pre-compute this for timing always @(posedge clk) begin for (int i=0; i= wr_cyc_count); -wire rd_wr_holdoff = cfg_sync_mode && wr_inp && rd_cyc_holdoff; +wire rd_wr_holdoff = cfg_sync_mode && rd_cyc_holdoff; -//Increment the read instruction +//Increment the read instruction assign rd_tag_pop = rd_inp && rd_tag_some_avail && !rd_fifo_full && !rd_wr_holdoff; always @(posedge clk) @@ -1088,7 +1087,7 @@ always_ff @(posedge clk) // rd_trk[rid_q].running_length <= rd_trk[rid].running_length + 1; // rd_trk[rid_q].req_data <= rd_data_nxt; // end -// end +// end //rd_trk_wr.req_data[32*i+:32] = inst_rd_rdata_q[95:64] + (rd_loop_count[7:0] & {32{cfg_inc_data_loop_mode}}) + i; always_comb @@ -1119,8 +1118,8 @@ always @(posedge clk) rd_md_ram_wr_addr <= rd_md_ram_wr_addr_pre; rd_md_ram_wr <= rd_md_ram_wr_pre; rd_md_ram_wr_data <= rd_md_ram_wr_data_pre; - end - + end + always @(posedge clk) begin @@ -1165,7 +1164,7 @@ begin rd_data_mask = ((rd_trk_rd.running_length==0) && rlast_q)? ({DATA_WIDTH{1'b1}} << (rd_trk_first_adj*32)) & (~({DATA_WIDTH{1'b1}} << (({ADJ_DW_WIDTH+5{1'b1}} + 1) - (rd_trk_rd.last_adj[0+:ADJ_DW_WIDTH] * 32)) )): (rd_trk_rd.running_length==0)? ({DATA_WIDTH{1'b1}} << (rd_trk_first_adj*32)): (rlast_q)? ~({DATA_WIDTH{1'b1}} << (({ADJ_DW_WIDTH+5{1'b1}} + 1) - (rd_trk_rd.last_adj[0+:ADJ_DW_WIDTH] * 32)) ): - {DATA_WIDTH{1'b1}}; + {DATA_WIDTH{1'b1}}; //for (int i=1; i> 2); @@ -1245,7 +1244,7 @@ flop_fifo #(.DEPTH(4), .WIDTH(9+11+8+64)) RD_REQ_FIFO ( .push(rd_tag_pop_qq), .push_data({rd_cur_req_tag, rd_push_user, inst_rd_rdata_q[103:96], rd_push_addr}), .pop(arvalid & arready), - + .pop_data({arid[8:0], aruser, arlen, araddr}), .half_full(), .watermark(rd_fifo_full), @@ -1253,7 +1252,7 @@ flop_fifo #(.DEPTH(4), .WIDTH(9+11+8+64)) RD_REQ_FIFO ( ); //------------------------------ -// Read track RAM +// Read track RAM bram_1w1r #(.WIDTH(`RD_TRK_RAM_WIDTH), .ADDR_WIDTH(9), .DEPTH(512)) RD_TRK_RAM ( .clk(clk), @@ -1290,7 +1289,7 @@ always @(posedge clk) end assign rd_md_ram_rd_data = (rd_md_ram_col_q_pre)? rd_md_ram_wr_data_q_pre: - (rd_md_ram_col_q)? rd_md_ram_wr_data_q: + (rd_md_ram_col_q)? rd_md_ram_wr_data_q: rd_md_ram_rd_data_ram; @@ -1400,11 +1399,11 @@ always @(posedge clk) rresp_error_first; end - + ////Write addres recording //always_ff @(posedge clk) // if (cfg_wr_stretch && tst_cfg_ack && (cfg_addr_q==8'he0) && (cfg_wdata_q[31])) -// begin +// begin // for (int i=0; i<32; i++) // wr_addr_rec[i] <= {64{1'b1}}; // wr_addr_rec_ptr <= 0; @@ -1418,7 +1417,7 @@ always @(posedge clk) ////Read address recording //always_ff @(posedge clk) // if (cfg_wr_stretch && tst_cfg_ack && (cfg_addr_q==8'he0) && (cfg_wdata_q[31])) -// begin +// begin // for (int i=0; i<32; i++) // rd_addr_rec[i] <= {64{1'b1}}; // rd_addr_rec_ptr <= 0; @@ -1462,5 +1461,5 @@ begin end endfunction - + endmodule diff --git a/hdk/cl/examples/cl_sde/design/sde_c2h_data.sv b/hdk/cl/examples/cl_sde/design/sde_c2h_data.sv index 9b4edfcf..7c61f0d1 100644 --- a/hdk/cl/examples/cl_sde/design/sde_c2h_data.sv +++ b/hdk/cl/examples/cl_sde/design/sde_c2h_data.sv @@ -202,44 +202,44 @@ module sde_c2h_data #(parameter bit DESC_TYPE = 0, // 0 - Regular, 1 - Compact always_comb begin - req_state_next <= req_state; + req_state_next = req_state; case (req_state) REQ_IDLE : if (desc_dm_desc_valid) - req_state_next <= REQ_WAIT_DATA; // REQ_GET_DESC; + req_state_next = REQ_WAIT_DATA; // REQ_GET_DESC; else - req_state_next <= REQ_IDLE; + req_state_next = REQ_IDLE; // REQ_GET_DESC: -// req_state_next <= REQ_WAIT_DATA; +// req_state_next = REQ_WAIT_DATA; REQ_WAIT_DATA: if (curr_txn_data_avail & ~dp_wb_ff_full & bresp_prealloc_avail) - req_state_next <= REQ_ADDR; + req_state_next = REQ_ADDR; else - req_state_next <= REQ_WAIT_DATA; + req_state_next = REQ_WAIT_DATA; REQ_ADDR: if (dm_pm_awvalid & pm_dm_awready) - req_state_next <= REQ_DATA; + req_state_next = REQ_DATA; else - req_state_next <= REQ_ADDR; + req_state_next = REQ_ADDR; REQ_DATA: if (data_tx_done && data_desc_done) - req_state_next <= REQ_IDLE; + req_state_next = REQ_IDLE; else if (data_tx_done) - req_state_next <= REQ_WAIT_CALC; + req_state_next = REQ_WAIT_CALC; else - req_state_next <= REQ_DATA; + req_state_next = REQ_DATA; REQ_WAIT_CALC: // Only required to be in this state when servicing multiple packets per descriptor // Need to wait 1 clock for the buf_dm_num_bytes to get updated after the end of REQ_DATA phase - req_state_next <= REQ_WAIT_DATA; + req_state_next = REQ_WAIT_DATA; default: - req_state_next <= req_state; + req_state_next = req_state; endcase // case (req_state) end // always_comb diff --git a/hdk/cl/examples/cl_sde/design/sde_h2c_data.sv b/hdk/cl/examples/cl_sde/design/sde_h2c_data.sv index 67efcb4d..70e4f5ea 100644 --- a/hdk/cl/examples/cl_sde/design/sde_h2c_data.sv +++ b/hdk/cl/examples/cl_sde/design/sde_h2c_data.sv @@ -183,33 +183,33 @@ module sde_h2c_data #(parameter bit DESC_TYPE = 0, // 0 - Regular, 1 - Compact always_comb begin - req_state_next <= req_state; + req_state_next = req_state; case (req_state) REQ_IDLE : if (desc_dm_desc_valid) - req_state_next <= REQ_WAIT_DATA; // REQ_GET_DESC; + req_state_next = REQ_WAIT_DATA; // REQ_GET_DESC; else - req_state_next <= REQ_IDLE; + req_state_next = REQ_IDLE; // REQ_GET_DESC: -// req_state_next <= REQ_WAIT_DATA; +// req_state_next = REQ_WAIT_DATA; REQ_WAIT_DATA: if (curr_txn_space_avail && ~rd_txn_trk_ff_full) - req_state_next <= REQ_ADDR; + req_state_next = REQ_ADDR; else - req_state_next <= REQ_WAIT_DATA; + req_state_next = REQ_WAIT_DATA; REQ_ADDR: if (desc_req_done && desc_done) - req_state_next <= REQ_IDLE; + req_state_next = REQ_IDLE; else if (desc_req_done) - req_state_next <= REQ_WAIT_DATA; + req_state_next = REQ_WAIT_DATA; else - req_state_next <= REQ_ADDR; + req_state_next = REQ_ADDR; default: - req_state_next <= req_state; + req_state_next = req_state; endcase // case (req_state) end // always_comb diff --git a/hdk/cl/examples/cl_sde/design/sde_ps_acc.sv b/hdk/cl/examples/cl_sde/design/sde_ps_acc.sv index e0628bdd..b4e8b6a1 100644 --- a/hdk/cl/examples/cl_sde/design/sde_ps_acc.sv +++ b/hdk/cl/examples/cl_sde/design/sde_ps_acc.sv @@ -71,10 +71,12 @@ if (LIMITED_SUPPORT == 0) begin logic [ACC_WIDTH-1:0] acc_in_wdata_d; logic [ACC_DW_IDX_WIDTH:0] acc_wr_num_dw; - always_comb + always_comb begin + pcis_wr_num_dw_d = '0; for (int dw_idx = 0; dw_idx < (PCIS_DATA_WIDTH>>5); dw_idx++) if (pcis_wstrb[dw_idx*4]) - pcis_wr_num_dw_d <= dw_idx + 1; + pcis_wr_num_dw_d = dw_idx + 1; + end always @(posedge clk) if (!rst_n) begin @@ -190,10 +192,12 @@ else begin logic [ACC_WIDTH-1:0] acc_in_wdata_d; logic [ACC_DW_IDX_WIDTH:0] acc_wr_num_dw; - always_comb + always_comb begin + pcis_wr_num_dw_d = '0; for (int dw_idx = 0; dw_idx < (PCIS_DATA_WIDTH>>5)/2; dw_idx++) //(512/32)/2=8 if ((pcis_wstrb[dw_idx*4]) && (dw_idx == 0 || dw_idx == 3 || dw_idx == 7)) - pcis_wr_num_dw_d <= dw_idx + 1; //Supported DW= 1DW, 4DW and 8DW + pcis_wr_num_dw_d = dw_idx + 1; //Supported DW= 1DW, 4DW and 8DW + end always @(posedge clk) if (!rst_n) begin diff --git a/hdk/cl/examples/cl_sde/design/sde_wb.sv b/hdk/cl/examples/cl_sde/design/sde_wb.sv index 3d37da34..12eb10cb 100644 --- a/hdk/cl/examples/cl_sde/design/sde_wb.sv +++ b/hdk/cl/examples/cl_sde/design/sde_wb.sv @@ -742,7 +742,7 @@ end // if (~H2C_N_C2H) if (rst_n) begin wr_done_q <= wr_done; - if (cfg_desc_cdt_wc_en & (desc_wb_limit_q != 32'h0) & ~wr_done_q & ~desc_cdt_req_pend) + if (cfg_wb_desc_cnt_en & cfg_desc_cdt_wc_en & (desc_wb_limit_q != 32'h0) & ~wr_done_q & ~desc_cdt_req_pend) assert (desc_wb_limit - desc_wb_limit_q <= (cfg_wc_cnt * 2)) else begin $display("%m: *** ERROR ***: Desc Limit Write Coalesce Error. desc_wb_limit = 0x%x, desc_wb_limit_q = 0x%x, cfg_wc_cnt = 0x%x. @ %0t", desc_wb_limit, desc_wb_limit_q, cfg_wc_cnt, $time); $finish; @@ -760,7 +760,7 @@ end // if (~H2C_N_C2H) $finish; end - if (cfg_md_wr_ptr_wc_en && (md_wr_ptr >= md_wr_ptr_q) & ~wr_done_q & ~md_wr_ptr_req_pend) + if (cfg_wb_md_ptr_en & cfg_md_wr_ptr_wc_en && (md_wr_ptr >= md_wr_ptr_q) & ~wr_done_q & ~md_wr_ptr_req_pend) assert (md_wr_ptr - md_wr_ptr_q <= (cfg_wc_cnt * 2)) else begin $display("%m: *** ERROR ***: Desc Limit Write Coalesce Error. md_wr_ptr = 0x%x, md_wr_ptr_q = 0x%x, cfg_wc_cnt = 0x%x. @ %0t", md_wr_ptr, md_wr_ptr_q, cfg_wc_cnt, $time); $finish; diff --git a/hdk/cl/examples/cl_sde/lib/ram_fifo_ft.sv b/hdk/cl/examples/cl_sde/lib/ram_fifo_ft.sv index fc51ce3b..f221580f 100644 --- a/hdk/cl/examples/cl_sde/lib/ram_fifo_ft.sv +++ b/hdk/cl/examples/cl_sde/lib/ram_fifo_ft.sv @@ -37,7 +37,7 @@ module ram_fifo_ft #(parameter WIDTH=32, parameter PTR_WIDTH=7, parameter WATERM // to see if FIFO is not ); -parameter[31:0] NUM_LOC = 1'b1 << PTR_WIDTH; +localparam[31:0] NUM_LOC = 1'b1 << PTR_WIDTH; logic ram_pop; logic[WIDTH-1:0] ram_rdata; diff --git a/hdk/cl/examples/cl_sde/software/runtime/Makefile b/hdk/cl/examples/cl_sde/software/runtime/Makefile index ae9df9a4..5b9e1609 100644 --- a/hdk/cl/examples/cl_sde/software/runtime/Makefile +++ b/hdk/cl/examples/cl_sde/software/runtime/Makefile @@ -15,26 +15,29 @@ export TEST ?= test_sde_loopback -INCLUDES = -I$(SDK_DIR)/userspace/include +SLOT_NUM = 0 +APP_SCRIPTS_DIR = $(SDK_DIR)/apps/virtual-ethernet/scripts +APP_INSTALL_DIR = veth_app +DPDK_DIR = $(APP_INSTALL_DIR)/dpdk +RMDIR = sudo rm -rf +STATS_PERIOD = 0 -CC = gcc -CFLAGS = -DCONFIG_LOGLEVEL=4 -std=gnu99 -g -Wall -Werror $(INCLUDES) +.PHONY: all clean -LDLIBS = -lfpga_mgmt -lrt -lpthread +all: check_env install run -SRC = common_dma.c $(TEST).c -OBJ = $(SRC:.c=.o) -BIN = $(TEST) +install: check_env + sudo $(APP_SCRIPTS_DIR)/virtual_ethernet_install.py $(APP_INSTALL_DIR) + sudo $(APP_SCRIPTS_DIR)/virtual_ethernet_setup.py $(DPDK_DIR) $(SLOT_NUM) -all: check_env - -#$(BIN): $(OBJ) -# $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LDLIBS) +run: check_env + sudo $(DPDK_DIR)/x86_64-native-linuxapp-gcc/app/testpmd -l 0-1 -- --port-topology=loop --auto-start --tx-first --stats-period=$(STATS_PERIOD) & \ clean: - rm -f *.o $(BIN) + $(RMDIR) $(APP_INSTALL_DIR) check_env: ifndef SDK_DIR $(error SDK_DIR is undefined. Try "source sdk_setup.sh" to set the software environment) endif + diff --git a/hdk/cl/examples/cl_sde/verif/scripts/Makefile b/hdk/cl/examples/cl_sde/verif/scripts/Makefile index 793c0d20..746af12b 100644 --- a/hdk/cl/examples/cl_sde/verif/scripts/Makefile +++ b/hdk/cl/examples/cl_sde/verif/scripts/Makefile @@ -21,8 +21,9 @@ endif export TEST ?= test_null export C_TEST ?= test_null - -export CL_ROOT = $(PWD)/../.. + +export SCRIPTS_DIR = $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +export CL_ROOT = $(realpath $(SCRIPTS_DIR)/../..) export SDK_DIR = $(AWS_FPGA_REPO_DIR)/sdk export C_COMMON_DIR = $(HDK_COMMON_DIR)/software @@ -30,7 +31,7 @@ export C_SDK_USR_INC_DIR = $(SDK_DIR)/userspace/include export C_SDK_USR_UTILS_DIR = $(SDK_DIR)/userspace/utils export C_INC_DIR = $(CL_ROOT)/software/runtime export C_SRC_DIR = $(CL_ROOT)/software/src - + export TEST_NAME = $(CL_ROOT)/verif/tests/$(TEST).sv ifeq ($(C_TEST),test_null) @@ -47,13 +48,11 @@ else export SIM_DIR = $(SIM_ROOT)/$(C_TEST) endif - -export SCRIPTS_DIR = $(PWD) export XILINX_IP = $(HDK_SHELL_DESIGN_DIR)/ip export SH_LIB_DIR = $(HDK_SHELL_DESIGN_DIR)/lib export SH_INF_DIR = $(HDK_SHELL_DESIGN_DIR)/interfaces export SH_SH_DIR = $(HDK_SHELL_DESIGN_DIR)/sh_ddr/sim - + SV_TEST_LIST = test_null.sv C_FILES = $(C_TEST_NAME) $(C_SDK_USR_UTILS_DIR)/sh_dpi_tasks.c $(C_COMMON_DIR)/src/fpga_pci_sv.c @@ -62,6 +61,6 @@ ifeq ($(XCHK), 1) else all: make_sim_dir compile run endif - + include $(HDK_COMMON_DIR)/verif/tb/scripts/Makefile.common.inc diff --git a/hdk/cl/examples/cl_sde/verif/scripts/Makefile.ies b/hdk/cl/examples/cl_sde/verif/scripts/Makefile.ies new file mode 100644 index 00000000..65df9e03 --- /dev/null +++ b/hdk/cl/examples/cl_sde/verif/scripts/Makefile.ies @@ -0,0 +1,43 @@ +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + + +################################################################## +## Makefile For IES compiles and simulations +## Step to run : +## 1. make create_libs IES=1 -> To generate xilinx compile +## libraries. This is a one time step +## 2. make all IES=1 -> Runs the test +################################################################## + +compile: $(COMPLIB_DIR) + mkdir -p $(SIM_DIR) + cd $(SIM_DIR) && ln -s -f ../ies_complib/cds.lib + cd $(SIM_DIR) && ln -s -f ../ies_complib/hdl.var + cd $(SIM_DIR) && gcc -fPIC -g -shared -I$(C_SDK_USR_INC_DIR) -I$(C_SDK_USR_UTILS_DIR) -I$(C_COMMON_DIR)/include -I$(C_COMMON_DIR)/src -I$(C_INC_DIR) -DSV_TEST -DSCOPE -DIES_SIM -DINT_MAIN -o libdpi.so $(C_FILES) -I/`ncroot`/tools/include + cd $(SIM_DIR) && irun -64bit -elaborate +libext+.v+.sv -disable_sem2009 -l compile.ies.log -I$(C_SDK_USR_INC_DIR) -I$(C_SDK_USR_UTILS_DIR) -I$(C_COMMON_DIR) -define SV_TEST -define DMA_TEST -define SCOPE -define IES_SIM $(DEFAULT_DEFINES) -I$(C_INC_DIR)/include -I$(C_INC_DIR)/src -f $(SCRIPTS_DIR)/top.$(SIMULATOR).f -top tb -top glbl -top $(TEST) $(DEFINES) $(TEST_NAME) -timescale 1ps/1ps + +run: + +ifeq ($(TEST),test_null) + cd $(SIM_DIR) && irun -R -access +rwc -timescale 1ps/1ps -l $(C_TEST).log $(PLUSARGS) +vpdfile+$(TEST).vpd +else + cd $(SIM_DIR) && irun -R -access +rwc -timescale 1ps/1ps -l $(TEST).log $(PLUSARGS) +vpdfile+$(TEST).vpd +endif + +$(COMPLIB_DIR): + cd $(SIM_ROOT) && echo "compile_simlib -language all -dir $(COMPLIB_DIR) -simulator $(SIMULATOR) -library all -family all" > create_libs.tcl + -cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl + cd $(SIM_ROOT) && rm -rf create_libs.tcl diff --git a/hdk/cl/examples/cl_sde/verif/scripts/Makefile.questa b/hdk/cl/examples/cl_sde/verif/scripts/Makefile.questa index b4858172..16058390 100644 --- a/hdk/cl/examples/cl_sde/verif/scripts/Makefile.questa +++ b/hdk/cl/examples/cl_sde/verif/scripts/Makefile.questa @@ -22,6 +22,9 @@ ## 2. make all QUESTA=1 -> Runs the test ################################################################## +LIBLISTS = $(COMMON_LIBLISTS) +LIBLISTS_ARGS = $(shell echo " $(strip $(LIBLISTS))" | sed 's|\ | -L |g') + compile: $(COMPLIB_DIR) mkdir -p $(SIM_DIR) cd ${SIM_DIR} && ln -s -f ../questa_complib/modelsim.ini @@ -29,24 +32,10 @@ compile: $(COMPLIB_DIR) cd $(SIM_DIR) && vlog -mfcu -sv -64 -timescale 1ps/1ps -93 -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/secureip -f $(SCRIPTS_DIR)/top.$(SIMULATOR).f run: -ifeq ($(VIVADO_TOOL_VERSION), v2017.4) ifeq ($(TEST),test_null) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_15 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_16 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_14 -L $(COMPLIB_DIR)/fifo_generator_v13_2_1 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_14 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) + cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random $(LIBLISTS_ARGS) -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) else - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_15 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_16 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_14 -L $(COMPLIB_DIR)/fifo_generator_v13_2_1 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_14 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) -endif -else ifeq ($(VIVADO_TOOL_VERSION), v2018.3) -ifeq ($(TEST),test_null) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_18 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_19 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_17 -L $(COMPLIB_DIR)/fifo_generator_v13_2_3 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_17 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) -else - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_18 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_19 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_17 -L $(COMPLIB_DIR)/fifo_generator_v13_2_3 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_17 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) -endif -else -ifeq ($(TEST),test_null) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_17 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_18 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_16 -L $(COMPLIB_DIR)/fifo_generator_v13_2_2 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_16 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) -else - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_17 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_18 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_16 -L $(COMPLIB_DIR)/fifo_generator_v13_2_2 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_16 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) -endif + cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random $(LIBLISTS_ARGS) -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) endif $(COMPLIB_DIR): diff --git a/hdk/cl/examples/cl_sde/verif/scripts/Makefile.vcs b/hdk/cl/examples/cl_sde/verif/scripts/Makefile.vcs index 61f241f9..67b92dd4 100644 --- a/hdk/cl/examples/cl_sde/verif/scripts/Makefile.vcs +++ b/hdk/cl/examples/cl_sde/verif/scripts/Makefile.vcs @@ -26,12 +26,12 @@ compile: $(COMPLIB_DIR) mkdir -p $(SIM_DIR) cd $(SIM_DIR) && ln -s -f ../vcs_complib/synopsys_sim.setup cd $(SIM_DIR) && vlogan -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog +systemverilogext+.sv +libext+.sv +libext+.v -full64 -lca -v2005 +v2k -l compile.vlogan.log -f $(SCRIPTS_DIR)/top.$(SIMULATOR).f +define+VCS $(DEFINES) +lint=TFIPC-L - cd $(SIM_DIR) && vcs tb $(TEST) $(C_FILES) -CFLAGS "-I$(C_SDK_USR_INC_DIR)" -CFLAGS "-I$(C_SDK_USR_UTILS_DIR)" -CFLAGS "-I$(C_COMMON_DIR)/include" -CFLAGS "-I$(C_COMMON_DIR)/src" -CFLAGS "-DSV_TEST" -CFLAGS "-DSCOPE" -CFLAGS "-I$(C_INC_DIR)" -debug_all -M -I +lint=TFIPC-L -debug_pp glbl -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog -full64 +memcbk -licqueue -lca -v2005 -l compile.vcs.log + cd $(SIM_DIR) && vcs tb $(TEST) $(C_FILES) -CFLAGS "-I$(C_SDK_USR_INC_DIR)" -CFLAGS "-I$(C_SDK_USR_UTILS_DIR)" -CFLAGS "-I$(C_COMMON_DIR)/include" -CFLAGS "-I$(C_COMMON_DIR)/src" -CFLAGS "-DSV_TEST" -CFLAGS "-DSCOPE" -CFLAGS "-I$(C_INC_DIR)" -debug_all -M +lint=TFIPC-L -debug_pp glbl -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog -full64 +memcbk -licqueue -lca -v2005 -l compile.vcs.log run: - cd $(SIM_DIR) && ./simv -l -l $(TEST).log $(PLUSARGS) +ntb_random_seed_automatic +vpdfile+$(TEST).vpd + cd $(SIM_DIR) && ./simv -l $(TEST).log $(PLUSARGS) +ntb_random_seed_automatic +vpdfile+$(TEST).vpd $(COMPLIB_DIR): cd $(SIM_ROOT) && echo "compile_simlib -language all -dir $(COMPLIB_DIR) -simulator $(SIMULATOR) -library all -family all" > create_libs.tcl - cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl + -cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl cd $(SIM_ROOT) && rm -rf create_libs.tcl diff --git a/hdk/cl/examples/cl_sde/verif/scripts/top.ies.f b/hdk/cl/examples/cl_sde/verif/scripts/top.ies.f new file mode 100644 index 00000000..3071ce0b --- /dev/null +++ b/hdk/cl/examples/cl_sde/verif/scripts/top.ies.f @@ -0,0 +1,113 @@ +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + ++define+IES_SIM ++define+CARD_1=card ++define+CL_NAME=cl_sde ++define+SIMULATION ++define+NO_SDE_DEBUG_ILA ++define+DISABLE_VJTAG_DEBUG + ++libext+.v ++libext+.sv ++libext+.svh + +-y ${CL_ROOT}/../common/design +-y ${CL_ROOT}/design +-y ${SH_LIB_DIR} +-y ${SH_INF_DIR} +-y ${HDK_SHELL_DESIGN_DIR}/sh_ddr/sim +-y ${HDK_SHELL_DESIGN_DIR}/ip/cl_debug_bridge/bd_0/hdl +-y ${HDK_SHELL_DESIGN_DIR}/ip/cl_debug_bridge/sim + ++incdir+${CL_ROOT}/../common/design ++incdir+${CL_ROOT}/design ++incdir+${CL_ROOT}/verif/tests ++incdir+${SH_LIB_DIR} ++incdir+${SH_INF_DIR} ++incdir+${SH_SH_DIR} ++incdir+${HDK_COMMON_DIR}/verif/include ++incdir+${HDK_SHELL_DESIGN_DIR}/ip/cl_debug_bridge/bd_0/ip/ip_0/hdl/verilog ++incdir+${HDK_SHELL_DESIGN_DIR}/ip/axi_register_slice_light/hdl ++incdir+${HDK_SHELL_DESIGN_DIR}/sh_ddr/sim ++incdir+${HDK_SHELL_DESIGN_DIR}/interfaces + +${CL_ROOT}/../common/design/cl_common_defines.vh +${CL_ROOT}/design/cl_sde_defines.vh +${HDK_SHELL_DESIGN_DIR}/ip/ila_vio_counter/sim/ila_vio_counter.v +${HDK_SHELL_DESIGN_DIR}/ip/ila_0/sim/ila_0.v +${HDK_SHELL_DESIGN_DIR}/ip/cl_debug_bridge/bd_0/sim/bd_a493.v +${HDK_SHELL_DESIGN_DIR}/ip/cl_debug_bridge/bd_0/ip/ip_0/sim/bd_a493_xsdbm_0.v +${HDK_SHELL_DESIGN_DIR}/ip/cl_debug_bridge/bd_0/ip/ip_0/hdl/xsdbm_v3_0_vl_rfs.v +${HDK_SHELL_DESIGN_DIR}/ip/cl_debug_bridge/bd_0/ip/ip_0/hdl/ltlib_v1_0_vl_rfs.v +${HDK_SHELL_DESIGN_DIR}/ip/cl_debug_bridge/bd_0/ip/ip_1/sim/bd_a493_lut_buffer_0.v +${HDK_SHELL_DESIGN_DIR}/ip/cl_debug_bridge/bd_0/ip/ip_1/hdl/lut_buffer_v2_0_vl_rfs.v +${HDK_SHELL_DESIGN_DIR}/ip/cl_debug_bridge/bd_0/hdl/bd_a493_wrapper.v +${HDK_SHELL_DESIGN_DIR}/ip/cl_debug_bridge/sim/cl_debug_bridge.v +${HDK_SHELL_DESIGN_DIR}/ip/vio_0/sim/vio_0.v +${HDK_SHELL_DESIGN_DIR}/ip/axi_register_slice_light/sim/axi_register_slice_light.v +${HDK_SHELL_DESIGN_DIR}/ip/axi_register_slice/sim/axi_register_slice.v +${HDK_SHELL_DESIGN_DIR}/ip/axi_register_slice_light/hdl/axi_register_slice_v2_1_vl_rfs.v +${HDK_SHELL_DESIGN_DIR}/ip/axi_register_slice_light/hdl/axi_infrastructure_v1_1_vl_rfs.v +${HDK_SHELL_DESIGN_DIR}/ip/axi_clock_converter_0/simulation/fifo_generator_vlog_beh.v +${HDK_SHELL_DESIGN_DIR}/ip/axi_clock_converter_0/hdl/fifo_generator_v13_2_rfs.v +${HDK_SHELL_DESIGN_DIR}/ip/axi_clock_converter_0/hdl/axi_clock_converter_v2_1_vl_rfs.v +${HDK_SHELL_DESIGN_DIR}/ip/axi_clock_converter_0/sim/axi_clock_converter_0.v +${CL_ROOT}/ip/ila_axi4/sim/ila_axi4.v +${CL_ROOT}/ip/ila_axi4_512/sim/ila_axi4_512.v +${CL_ROOT}/ip/ila_axis/sim/ila_axis.v +${CL_ROOT}/ip/ila_sde_c2h_buf/sim/ila_sde_c2h_buf.v +${CL_ROOT}/ip/ila_sde_c2h_dm/sim/ila_sde_c2h_dm.v +${CL_ROOT}/ip/ila_sde_h2c_buf/sim/ila_sde_h2c_buf.v +${CL_ROOT}/ip/ila_sde_h2c_dm/sim/ila_sde_h2c_dm.v +${CL_ROOT}/ip/ila_sde_ps/sim/ila_sde_ps.v +${CL_ROOT}/ip/ila_sde_wb/sim/ila_sde_wb.v +${CL_ROOT}/lib/axis_flop_fifo.sv +${CL_ROOT}/lib/bram_1w1r.sv +${CL_ROOT}/lib/flop_fifo_in.sv +${CL_ROOT}/lib/ft_fifo_p.v +${CL_ROOT}/lib/ft_fifo.v +${CL_ROOT}/lib/ram_fifo_ft.sv +${CL_ROOT}/lib/rr_arb.sv +${CL_ROOT}/design/cl_id_defines.vh +${CL_ROOT}/design/sde_pkg.sv +${CL_ROOT}/design/cl_pkt_tst.sv +${CL_ROOT}/design/ila_axi4_wrapper.sv +${CL_ROOT}/design/axi_prot_chk.sv +${CL_ROOT}/design/cl_tst.sv +${CL_ROOT}/design/cl_sde_srm.sv +${CL_ROOT}/design/sde_c2h_axis.sv +${CL_ROOT}/design/sde_c2h_buf.sv +${CL_ROOT}/design/sde_c2h_cfg.sv +${CL_ROOT}/design/sde_c2h_data.sv +${CL_ROOT}/design/sde_c2h.sv +${CL_ROOT}/design/sde_h2c_axis.sv +${CL_ROOT}/design/sde_h2c_buf.sv +${CL_ROOT}/design/sde_h2c_cfg.sv +${CL_ROOT}/design/sde_h2c_data.sv +${CL_ROOT}/design/sde_h2c.sv +${CL_ROOT}/design/sde_pm.sv +${CL_ROOT}/design/sde_ps_acc.sv +${CL_ROOT}/design/sde_ps.sv +${CL_ROOT}/design/sde_wb.sv +${CL_ROOT}/design/sde_desc.sv +${CL_ROOT}/design/sde.sv +${HDK_COMMON_DIR}/verif/models/base/gen_buf_t.sv +${HDK_COMMON_DIR}/verif/models/stream_bfm/stream_bfm.sv +${CL_ROOT}/design/cl_sde.sv + +-f ${HDK_COMMON_DIR}/verif/tb/filelists/tb.${SIMULATOR}.f +${HDK_COMMON_DIR}/verif/tb/sv/dma_classes.sv +${TEST_NAME} diff --git a/hdk/cl/examples/cl_uram_example/build/scripts/create_dcp_from_cl.tcl b/hdk/cl/examples/cl_uram_example/build/scripts/create_dcp_from_cl.tcl index 0b21940d..2c1a0888 100644 --- a/hdk/cl/examples/cl_uram_example/build/scripts/create_dcp_from_cl.tcl +++ b/hdk/cl/examples/cl_uram_example/build/scripts/create_dcp_from_cl.tcl @@ -39,7 +39,7 @@ set uram_option [lindex $argv 11] set notify_via_sns [lindex $argv 12] set VDEFINES [lindex $argv 13] ################################################## -## Flow control variables +## Flow control variables ################################################## set cl.synth 1 set implement 1 @@ -133,6 +133,9 @@ set_msg_config -id {Synth 8-350} -suppress set_msg_config -id {Synth 8-3848} -suppress set_msg_config -id {Synth 8-3917} -suppress +set_msg_config -severity "CRITICAL WARNING" -string "WRAPPER_INST/SH" -suppress +set_msg_config -severity "WARNING" -string "WRAPPER_INST/SH" -suppress + puts "AWS FPGA: ([clock format [clock seconds] -format %T]) Calling the encrypt.tcl."; # Check that an email address has been set, else unset notify_via_sns @@ -147,7 +150,7 @@ if {[string compare $notify_via_sns "1"] == 0} { } ################################################## -### Strategy options +### Strategy options ################################################## switch $strategy { "BASIC" { @@ -186,7 +189,7 @@ source $HDK_SHELL_DIR/build/scripts/device_type.tcl source $HDK_SHELL_DIR/build/scripts/step_user.tcl -notrace ######################################## -## Generate clocks based on Recipe +## Generate clocks based on Recipe ######################################## puts "AWS FPGA: ([clock format [clock seconds] -format %T]) Calling aws_gen_clk_constraints.tcl to generate clock constraints from developer's specified recipe."; @@ -235,7 +238,7 @@ if {$implement} { # Apply Clock Properties for Clock Table Recipes ################################################## puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Sourcing aws_clock_properties.tcl to apply properties to clocks. "; - + # Apply properties to clocks source $HDK_SHELL_DIR/build/scripts/aws_clock_properties.tcl @@ -300,7 +303,7 @@ if {$implement} { # This is what will deliver to AWS puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Writing final DCP to to_aws directory."; - + #writing unencrypted dcp for analysis to checkpoints dir. write_checkpoint -force $CL_DIR/build/checkpoints/${timestamp}.SH_CL_routed.dcp @@ -363,5 +366,3 @@ if {[string compare $notify_via_sns "1"] == 0} { } puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Build complete."; - - diff --git a/hdk/cl/examples/cl_uram_example/software/runtime/test_uram_example.c b/hdk/cl/examples/cl_uram_example/software/runtime/test_uram_example.c index 81a3a0f9..c056057f 100644 --- a/hdk/cl/examples/cl_uram_example/software/runtime/test_uram_example.c +++ b/hdk/cl/examples/cl_uram_example/software/runtime/test_uram_example.c @@ -142,7 +142,7 @@ uint32_t glb_value; printf("Enter your command followed by your 32 bits hexadecimal value (without 0x)\n"); printf("Note that only the [28:0] bits will be stored in the URAM\n"); -#ifndef VIVADO_SIM +#ifndef SV_TEST printf("Example: find CAFE4B1D\n"); printf("Example: del CAFE4B1D\n"); scanf("%s %x", command, &value); @@ -162,7 +162,7 @@ uint32_t glb_value; printf("After setting command value \n"); -#ifndef VIVADO_SIM +#ifndef SV_TEST // The 3 MSB are used to encode {find, add, del} when writing to the CL // On a read they indicate {find_ok, del_ok, busy} @@ -187,10 +187,10 @@ uint32_t glb_value; /* initialize the fpga_pci library so we could have access to FPGA PCIe from this applications */ - printf("Starting to initialize the fpga_pci library \n"); - rc = fpga_pci_init(); - fail_on(rc, out, "Unable to initialize the fpga_pci library\n TEST FAILED\n"); - printf("Done initializing the fpga_pci library \n"); + printf("Starting to initialize the fpga_mgmt library\n"); + rc = fpga_mgmt_init(); + fail_on(rc, out, "Unable to initialize the fpga_mgmt library\n TEST FAILED\n"); + printf("Done initializing the fpga_mgmt library\n"); #ifndef SV_TEST rc = check_afi_ready(slot_id); @@ -203,14 +203,18 @@ uint32_t glb_value; rc = uram_example(slot_id, FPGA_APP_PF, APP_PF_BAR0, value); fail_on(rc, out, "peek-poke example failed\n TEST FAILED\n"); + fpga_mgmt_close(); + #ifndef SV_TEST return rc; out: + fpga_mgmt_close(); return 1; #else out: + fpga_mgmt_close(); if (rc != 0) { printf("TEST_FAILED \n"); } diff --git a/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile b/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile index 0531c878..6347104a 100644 --- a/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile +++ b/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile @@ -22,7 +22,8 @@ endif export TEST ?= test_null export C_TEST ?= test_null -export CL_ROOT = $(PWD)/../.. +export SCRIPTS_DIR = $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +export CL_ROOT = $(realpath $(SCRIPTS_DIR)/../..) export SDK_DIR = $(AWS_FPGA_REPO_DIR)/sdk export C_COMMON_DIR = $(HDK_COMMON_DIR)/software @@ -47,15 +48,13 @@ else export SIM_DIR = $(SIM_ROOT)/$(C_TEST)_c endif - -export SCRIPTS_DIR = $(PWD) export XILINX_IP = $(HDK_SHELL_DESIGN_DIR)/ip export SH_LIB_DIR = $(HDK_SHELL_DESIGN_DIR)/lib export SH_INF_DIR = $(HDK_SHELL_DESIGN_DIR)/interfaces export SH_SH_DIR = $(HDK_SHELL_DESIGN_DIR)/sh_ddr/sim SV_TEST_LIST = test_hello_world -C_FILES = $(C_TEST_NAME) $(C_SDK_USR_UTILS_DIR)/sh_dpi_tasks.c $(C_COMMON_DIR)/src/fpga_pci_sv.c +C_FILES = $(C_TEST_NAME) $(C_SDK_USR_UTILS_DIR)/sh_dpi_tasks.c $(C_COMMON_DIR)/src/fpga_pci_sv.c ifeq ($(XCHK), 1) all: make_sim_dir compile_chk run diff --git a/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile.ies b/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile.ies index 4e46036b..d623c649 100644 --- a/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile.ies +++ b/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile.ies @@ -40,5 +40,5 @@ endif $(COMPLIB_DIR): cd $(SIM_ROOT) && echo "compile_simlib -language all -dir $(COMPLIB_DIR) -simulator $(SIMULATOR) -library all -family all" > create_libs.tcl - cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl + -cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl cd $(SIM_ROOT) && rm -rf create_libs.tcl diff --git a/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile.questa b/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile.questa index 2a86de74..f82c7de3 100644 --- a/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile.questa +++ b/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile.questa @@ -22,6 +22,9 @@ ## 2. make all QUESTA=1 -> Runs the test ################################################################## +LIBLISTS = $(COMMON_LIBLISTS) +LIBLISTS_ARGS = $(shell echo " $(strip $(LIBLISTS))" | sed 's|\ | -L |g') + compile: $(COMPLIB_DIR) mkdir -p $(SIM_DIR) cd ${SIM_DIR} && ln -s -f ../questa_complib/modelsim.ini @@ -30,28 +33,14 @@ compile: $(COMPLIB_DIR) cd $(SIM_DIR) && vlog -mfcu -sv -64 -timescale 1ps/1ps -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/secureip -f $(SCRIPTS_DIR)/top.$(SIMULATOR).f run: -ifeq ($(VIVADO_TOOL_VERSION),v2017.4) -ifeq ($(TEST),test_null) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_15 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_16 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_14 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_14 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) -else - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_15 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_16 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_14 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_14 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) -endif -else ifeq($(VIVADO_TOOL_VERSION),v2018.3) ifeq ($(TEST),test_null) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_18 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_19 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_17 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_17 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) + cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random $(LIBLISTS_ARGS) -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) else - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_18 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_19 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_17 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_17 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) -endif -else -ifeq ($(TEST),test_null) - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_17 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_18 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_16 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_16 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(C_TEST).log -do "run -all; quit -f" tb glbl $(TEST) -else - cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random -L $(COMPLIB_DIR)/unisims_ver -L $(COMPLIB_DIR)/unisim -L $(COMPLIB_DIR)/unifast_ver -L $(COMPLIB_DIR)/unifast -L $(COMPLIB_DIR)/unimacro_ver -L $(COMPLIB_DIR)/unimacro -L $(COMPLIB_DIR)/secureip -L $(COMPLIB_DIR)/axi_register_slice_v2_1_17 -L $(COMPLIB_DIR)/axi_infrastructure_v1_1_0 -L $(COMPLIB_DIR)/axi_crossbar_v2_1_18 -L $(COMPLIB_DIR)/xpm -L $(COMPLIB_DIR)/axi_clock_converter_v2_1_16 -L $(COMPLIB_DIR)/fifo_generator_v13_1_4 -L $(COMPLIB_DIR)/axi_data_fifo_v2_1_16 -L $(COMPLIB_DIR)/generic_baseblocks_v2_1_0 -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) -endif + cd $(SIM_DIR) && vsim -c -voptargs="+acc" -64 -t ps -sv_seed random $(LIBLISTS_ARGS) -l $(TEST).log -do "run -all; quit -f" tb glbl $(TEST) endif + $(COMPLIB_DIR): cd $(SIM_ROOT) && echo "compile_simlib -language all -dir $(COMPLIB_DIR) -simulator $(SIMULATOR) -library all -family all" > create_libs.tcl cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl cd $(SIM_ROOT) && rm -rf create_libs.tcl - diff --git a/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile.vcs b/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile.vcs index efc2fd92..367b53fe 100644 --- a/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile.vcs +++ b/hdk/cl/examples/cl_uram_example/verif/scripts/Makefile.vcs @@ -27,7 +27,7 @@ compile: $(COMPLIB_DIR) cd ${SIM_DIR} && ln -s -f ../vcs_complib/synopsys_sim.setup cd $(SIM_DIR) && vhdlan -full64 ${CL_ROOT}/design/ctrl_uram.vhd cd $(SIM_DIR) && vlogan -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog +systemverilogext+.sv +libext+.sv +libext+.v -full64 -lca -v2005 +v2k -l compile.vlogan.log -f $(SCRIPTS_DIR)/top.$(SIMULATOR).f +define+VCS $(DEFINES) +lint=TFIPC-L - cd $(SIM_DIR) && vcs tb $(TEST) $(C_FILES) -CFLAGS "-I$(C_SDK_USR_INC_DIR)" -CFLAGS "-I$(C_SDK_USR_UTILS_DIR)" -CFLAGS "-I$(C_COMMON_DIR)/include" -CFLAGS "-I$(C_COMMON_DIR)/src" -CFLAGS "-DSV_TEST" -CFLAGS "-DSCOPE" -CFLAGS "-I$(C_INC_DIR)" -debug_all -M -I +lint=TFIPC-L -debug_pp glbl -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog -full64 +memcbk -licqueue -lca -v2005 -l compile.vcs.log + cd $(SIM_DIR) && vcs tb $(TEST) $(C_FILES) -CFLAGS "-I$(C_SDK_USR_INC_DIR)" -CFLAGS "-I$(C_SDK_USR_UTILS_DIR)" -CFLAGS "-I$(C_COMMON_DIR)/include" -CFLAGS "-I$(C_COMMON_DIR)/src" -CFLAGS "-DSV_TEST" -CFLAGS "-DSCOPE" -CFLAGS "-I$(C_INC_DIR)" -debug_all -M +lint=TFIPC-L -debug_pp glbl -ntb_opts tb_timescale=1ps/1ps -timescale=1ps/1ps -sverilog -full64 +memcbk -licqueue -lca -v2005 -l compile.vcs.log run: @@ -39,5 +39,5 @@ endif $(COMPLIB_DIR): cd $(SIM_ROOT) && echo "compile_simlib -language all -dir $(COMPLIB_DIR) -simulator $(SIMULATOR) -library all -family all" > create_libs.tcl - cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl + -cd $(SIM_ROOT) && vivado -mode batch -source create_libs.tcl cd $(SIM_ROOT) && rm -rf create_libs.tcl \ No newline at end of file diff --git a/hdk/cl/examples/cl_uram_example/verif/tests/test_uram_example.sv b/hdk/cl/examples/cl_uram_example/verif/tests/test_uram_example.sv index 9731efe5..21220a73 100644 --- a/hdk/cl/examples/cl_uram_example/verif/tests/test_uram_example.sv +++ b/hdk/cl/examples/cl_uram_example/verif/tests/test_uram_example.sv @@ -14,6 +14,10 @@ // limitations under the License. // This test tests add, delete and find operations of a URAM. +//------------------------------------------------------------------------------------------ +// Description: This test checks if Find, Add & Del commands implemented by the uram_example CL work accurately. +//------------------------------------------------------------------------------------------- + module test_uram_example(); import tb_type_defines_pkg::*; @@ -41,22 +45,23 @@ logic [31:0] glb_value; tb.poke(.addr(64'h500), .data(value), .intf(AxiPort::PORT_OCL)); - // Wait for the busy status to be cleared + // Wait for the busy status to be cleared. busy = 1; do begin if (timeout == 10) begin - $display("Timeout - Something went wrong with the HW. Please do\n"); + $display("Timeout - Command not finished after 100ns."); + $error("TEST FAILED") ; $finish; end if (timeout) begin - $display("Please wait, it may take time ...\n"); + $display("Please wait, Command in execution ...\n"); end // Wait for the HW to process tb.nsec_delay(10000); timeout++; - // Read + // Read CL register to determine if the command is done. bit 29 indicates command done. tb.peek(.addr(64'h500), .data(value), .intf(AxiPort::PORT_OCL)); find_ok = value[31]; @@ -80,6 +85,7 @@ logic [31:0] glb_value; end else begin $display("The value 0x%x has been added to the URAM successfully\n", value); + $display("TEST PASSED"); end end // if (find_ok == 1) endtask // uram_task diff --git a/hdk/cl/examples/hello_world_hlx/README.md b/hdk/cl/examples/hello_world_hlx/README.md index d6d9371f..47e35ebf 100755 --- a/hdk/cl/examples/hello_world_hlx/README.md +++ b/hdk/cl/examples/hello_world_hlx/README.md @@ -101,6 +101,7 @@ The runtime software must be compiled for the AFI to run on F1. Copy the software directory to any directory and compile with the following commands: ``` +$ source $AWS_FPGA_REPO_DIR/sdk_setup.sh $ cp -r $HDK_COMMON_DIR/shell_stable/hlx/hlx_examples/build/IPI/hello_world/software $ cd software $ make all diff --git a/hdk/cl/examples/hello_world_hlx/build/scripts/create_dcp_from_cl.tcl b/hdk/cl/examples/hello_world_hlx/build/scripts/create_dcp_from_cl.tcl index 59246c89..5a5d24f5 100755 --- a/hdk/cl/examples/hello_world_hlx/build/scripts/create_dcp_from_cl.tcl +++ b/hdk/cl/examples/hello_world_hlx/build/scripts/create_dcp_from_cl.tcl @@ -29,6 +29,10 @@ if {[string compare $notify_via_sns "1"] == 0} { } } +# suppress warnings coming from Shell +set_msg_config -severity "CRITICAL WARNING" -string "WRAPPER_INST/SH" -suppress +set_msg_config -severity "WARNING" -string "WRAPPER_INST/SH" -suppress + ################################################# ## Create BD (Block Design) of example Hello World design ################################################# diff --git a/hdk/common/shell_v04261818/build/scripts/params.tcl b/hdk/common/shell_v04261818/build/scripts/params.tcl index 96103532..1ad16bb1 100755 --- a/hdk/common/shell_v04261818/build/scripts/params.tcl +++ b/hdk/common/shell_v04261818/build/scripts/params.tcl @@ -25,3 +25,6 @@ if {$uram_option != 2} { ####Enable support of clocking from one RP to another (SH-->CL) set_param hd.supportClockNetCrossDiffReconfigurablePartitions 1 +# Maintain DONT TOUCH functionality for 2020.2 onwards +if {[string match *2020.2* [version -short]]} {set_param project.replaceDontTouchWithKeepHierarchySoft false} + diff --git a/hdk/common/shell_v04261818/design/sh_ddr/sim/axi4_slave_bfm.sv b/hdk/common/shell_v04261818/design/sh_ddr/sim/axi4_slave_bfm.sv index 39de4053..8ff2f925 100644 --- a/hdk/common/shell_v04261818/design/sh_ddr/sim/axi4_slave_bfm.sv +++ b/hdk/common/shell_v04261818/design/sh_ddr/sim/axi4_slave_bfm.sv @@ -13,7 +13,7 @@ // implied. See the License for the specific language governing permissions and // limitations under the License. -module axi4_slave_bfm +module axi4_slave_bfm #( parameter ECC_EN = 0, parameter ECC_ADDR_HI = 'h410, parameter ECC_ADDR_LO = 'h400, parameter RND_ECC_EN = 0, parameter RND_ECC_WEIGHT = 100) ( input clk_core, @@ -29,19 +29,19 @@ module axi4_slave_bfm input[1:0] cl_sh_ddr_awburst, //Note only INCR/WRAP supported. If un-supported mode on this signal, will default to INCR //input[10:0] cl_sh_ddr_awuser, input cl_sh_ddr_awvalid, - output logic[2:0] sh_cl_ddr_awready, + output logic sh_cl_ddr_awready, input[15:0] cl_sh_ddr_wid, input[511:0] cl_sh_ddr_wdata, input[63:0] cl_sh_ddr_wstrb, - input[2:0] cl_sh_ddr_wlast, - input[2:0] cl_sh_ddr_wvalid, - output logic[2:0] sh_cl_ddr_wready, + input cl_sh_ddr_wlast, + input cl_sh_ddr_wvalid, + output logic sh_cl_ddr_wready, output logic[15:0] sh_cl_ddr_bid, output logic[1:0] sh_cl_ddr_bresp, - output logic[2:0] sh_cl_ddr_bvalid, - input[2:0] cl_sh_ddr_bready, + output logic sh_cl_ddr_bvalid, + input cl_sh_ddr_bready, input[15:0] cl_sh_ddr_arid, input[63:0] cl_sh_ddr_araddr, @@ -49,15 +49,15 @@ module axi4_slave_bfm input[2:0] cl_sh_ddr_arsize, //input[10:0] cl_sh_ddr_aruser, input[1:0] cl_sh_ddr_arburst, //Note only INCR/WRAP supported. If un-supported mode on this signal, will default to INCR - input[2:0] cl_sh_ddr_arvalid, - output logic[2:0] sh_cl_ddr_arready, + input cl_sh_ddr_arvalid, + output logic sh_cl_ddr_arready, output logic[15:0] sh_cl_ddr_rid, output logic[511:0] sh_cl_ddr_rdata, output logic[1:0] sh_cl_ddr_rresp, - output logic[2:0] sh_cl_ddr_rlast, - output logic[2:0] sh_cl_ddr_rvalid, - input[2:0] cl_sh_ddr_rready + output logic sh_cl_ddr_rlast, + output logic sh_cl_ddr_rvalid, + input cl_sh_ddr_rready ); `include "axi_bfm_defines.svh" diff --git a/hdk/common/shell_v04261818/design/sh_ddr/sim/axi_bfm_defines.svh b/hdk/common/shell_v04261818/design/sh_ddr/sim/axi_bfm_defines.svh index 4c15afe5..ec92932a 100644 --- a/hdk/common/shell_v04261818/design/sh_ddr/sim/axi_bfm_defines.svh +++ b/hdk/common/shell_v04261818/design/sh_ddr/sim/axi_bfm_defines.svh @@ -19,7 +19,7 @@ typedef struct { logic [63:0] addr; logic [7:0] len; logic [2:0] size; - logic [5:0] id; + logic [15:0] id; logic [1:0] resp; logic last; } AXI_Command; @@ -27,7 +27,7 @@ typedef struct { typedef struct { logic [511:0] data; logic [63:0] strb; - logic [5:0] id; + logic [15:0] id; logic last; } AXI_Data; diff --git a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/component.xml b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/component.xml old mode 100755 new mode 100644 index 1a4dff5b..de54734c --- a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/component.xml +++ b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/component.xml @@ -4407,7 +4407,7 @@ viewChecksum - 01e58ca0 + ebc2f515 @@ -4429,7 +4429,7 @@ viewChecksum - 7a659885 + ffa0fbf3 @@ -15920,7 +15920,61 @@ aws_v1_0_2 - hdl/aws_v1_0_vl_rfs.sv + hdl/sim/axi4_slave_bfm.sv + systemVerilogSource + USED_IN_ipstatic + aws_v1_0_2 + + + hdl/sim/axi_bfm_defines.svh + systemVerilogSource + USED_IN_ipstatic + aws_v1_0_2 + + + hdl/sim/axi_mem_model.sv + systemVerilogSource + USED_IN_ipstatic + aws_v1_0_2 + + + hdl/lib_pipe.sv + systemVerilogSource + USED_IN_ipstatic + aws_v1_0_2 + + + hdl/sim/ccf_ctl.v + verilogSource + USED_IN_ipstatic + aws_v1_0_2 + + + hdl/sim/sync.v + verilogSource + USED_IN_ipstatic + aws_v1_0_2 + + + hdl/sim/flop_ccf.sv + systemVerilogSource + USED_IN_ipstatic + aws_v1_0_2 + + + hdl/sim/mgt_acc_axl.sv + systemVerilogSource + USED_IN_ipstatic + aws_v1_0_2 + + + hdl/sim/mgt_gen_axl.sv + systemVerilogSource + USED_IN_ipstatic + aws_v1_0_2 + + + hdl/sim/sh_ddr.sv systemVerilogSource USED_IN_ipstatic aws_v1_0_2 @@ -15938,6 +15992,12 @@ true aws_v1_0_2 + + hdl/aws_v1_0_top.sv + systemVerilogSource + USED_IN_ipstatic + aws_v1_0_2 + xilinx_verilogbehavioralsimulation_xilinx_com_ip_ddr4_2_2__ref_view_fileset @@ -15979,7 +16039,37 @@ aws_v1_0_2 - hdl/aws_v1_0_vlsyn_rfs.sv + hdl/lib_pipe.sv + systemVerilogSource + aws_v1_0_2 + + + hdl/synth/ccf_ctl.v + verilogSource + aws_v1_0_2 + + + hdl/synth/sync.v + verilogSource + aws_v1_0_2 + + + hdl/synth/flop_ccf.sv + systemVerilogSource + aws_v1_0_2 + + + hdl/synth/mgt_acc_axl.sv + systemVerilogSource + aws_v1_0_2 + + + hdl/synth/mgt_gen_axl.sv + systemVerilogSource + aws_v1_0_2 + + + hdl/synth/sh_ddr.sv systemVerilogSource aws_v1_0_2 @@ -15995,6 +16085,12 @@ true aws_v1_0_2 + + hdl/aws_v1_0_top.sv + systemVerilogSource + CHECKSUM_c59ef6a9 + aws_v1_0_2 + xilinx_verilogsynthesis_xilinx_com_ip_ddr4_2_2__ref_view_fileset @@ -16429,14 +16525,14 @@ AWS http://www.xilinx.com 2 - 2018-06-05T19:20:33Z + 2019-08-08T23:47:40Z 2017.4 - + diff --git a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/doc/aws_v1_0_changelog.txt b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/doc/aws_v1_0_changelog.txt old mode 100755 new mode 100644 index eaf4576a..ab0095d6 --- a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/doc/aws_v1_0_changelog.txt +++ b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/doc/aws_v1_0_changelog.txt @@ -4,6 +4,7 @@ * Bug Fix: Changed default Clock Group A recipe from A0 to A1. * New Feature: Support new Amazon shell v1.4 (SHELL_VERSION = 0x04261818). * New Feature: Changed default DEVICE_ID from 0xF000 to 0xF010. + * New Feature: Added AXI slave BFM simulation models. * Revision change in one or more subcores 2017.3: @@ -20,7 +21,7 @@ * New Feature: Native Vivado Release * New Feature: Initial release. -(c) Copyright 2017 - 2018 Xilinx, Inc. All rights reserved. +(c) Copyright 2017 - 2019 Xilinx, Inc. All rights reserved. This file contains confidential and proprietary information of Xilinx, Inc. and is protected under U.S. and diff --git a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/aws_v1_0_top.sv b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/aws_v1_0_top.sv new file mode 100644 index 00000000..df3ce581 --- /dev/null +++ b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/aws_v1_0_top.sv @@ -0,0 +1,1006 @@ +// (c) Copyright 2017 Xilinx, Inc. All rights reserved. +// +// This file contains confidential and proprietary information +// of Xilinx, Inc. and is protected under U.S. and +// international copyright and other intellectual property +// laws. +// +// DISCLAIMER +// This disclaimer is not a license and does not grant any +// rights to the materials distributed herewith. Except as +// otherwise provided in a valid license issued to you by +// Xilinx, and to the maximum extent permitted by applicable +// law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND +// WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES +// AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING +// BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON- +// INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and +// (2) Xilinx shall not be liable (whether in contract or tort, +// including negligence, or under any other theory of +// liability) for any loss or damage of any kind or nature +// related to, arising under or in connection with these +// materials, including for any direct, or any indirect, +// special, incidental, or consequential loss or damage +// (including loss of data, profits, goodwill, or any type of +// loss or damage suffered as a result of any action brought +// by a third party) even if such damage or loss was +// reasonably foreseeable or Xilinx had been advised of the +// possibility of the same. +// +// CRITICAL APPLICATIONS +// Xilinx products are not designed or intended to be fail- +// safe, or for use in any application requiring fail-safe +// performance, such as life-support or safety devices or +// systems, Class III medical devices, nuclear facilities, +// applications related to the deployment of airbags, or any +// other applications that could lead to death, personal +// injury, or severe property or environmental damage +// (individually and collectively, "Critical +// Applications"). Customer assumes the sole risk and +// liability of any use of Xilinx products in Critical +// Applications, subject only to applicable laws and +// regulations governing limitations on product liability. +// +// THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS +// PART OF THIS FILE AT ALL TIMES. +// +// DO NOT MODIFY THIS FILE. + +(* DowngradeIPIdentifiedWarnings="yes" *) +module aws_v1_0_2_top # + ( + parameter integer C_MODE = 0, + // 0 = AWS HLS (IPI) flow: All interfaces are available. + // 1 = SDx Unified flow Memory-only mode: Only DDR interfaces and related ports are available. + // 2 = SDx Unified flow Non-memory mode: All interfaces except DDR-related are available. + parameter integer C_DDR_A_PRESENT = 0, + parameter integer C_DDR_B_PRESENT = 0, + parameter integer C_DDR_D_PRESENT = 0, + parameter integer C_NUM_A_CLOCKS = 1, + parameter integer C_NUM_B_CLOCKS = 0, + parameter integer C_NUM_C_CLOCKS = 0, + parameter [15:0] C_VENDOR_ID = 16'h1D0F, + parameter [15:0] C_DEVICE_ID = 16'hF010, + parameter [15:0] C_SUBSYSTEM_VENDOR_ID = 16'hFEDD, + parameter [15:0] C_SUBSYSTEM_ID = 16'h1D51, + parameter C_CLOCK_A0_PERIOD = "4.0", + parameter C_CLOCK_A1_PERIOD = "8.0", + parameter C_CLOCK_B0_PERIOD = "4.0", + parameter C_CLOCK_C0_PERIOD = "3.333333", + parameter integer C_CLOCK_A_RECIPE = 1, + parameter integer C_CLOCK_B_RECIPE = 0, + parameter integer C_CLOCK_C_RECIPE = 0, + parameter integer C_NUM_STAGES_STATS = 1, + parameter integer C_PCIM_PRESENT = 0 + ) + ( + //-------------------------------- + // S_SH bus-interface ports + //-------------------------------- + `include "aws_v1_0_2_ports.vh" // Subset of Amazon-provided port definitions (without Debug Bridge) + , + + //-------------------------------- + // Globals + //-------------------------------- + output wire clk_main_a0_out, //Main clock. This is the clock for all of the interfaces of AWS + output wire clk_extra_a1_out, //Extra clock A1 (phase aligned to "A" clock group) + output wire clk_extra_a2_out, //Extra clock A2 (phase aligned to "A" clock group) + output wire clk_extra_a3_out, //Extra clock A3 (phase aligned to "A" clock group) + output wire clk_extra_b0_out, //Extra clock B0 (phase aligned to "B" clock group) + output wire clk_extra_b1_out, //Extra clock B1 (phase aligned to "B" clock group) + output wire clk_extra_c0_out, //Extra clock C0 (phase aligned to "C" clock group) + output wire clk_extra_c1_out, //Extra clock C1 (phase aligned to "C" clock group) + + output wire rst_main_n_out, //Reset sync to main clock. + output wire kernel_rst_n_out, //Kernel_reset. + + output wire flr_assert, //Function level reset assertion. + input wire flr_done, //Function level reset done acknowledge + + output wire [15:0] status_vdip, //Virtual DIP switches. + input wire [15:0] status_vled, //Virtual LEDs + + input wire [15:0] irq_req, // User-defined interrupt request + output wire [15:0] irq_ack, // User-defined interrupt acknowledge + + output wire [63:0] glcount0, //Global counter 0 + output wire [63:0] glcount1, //Global counter 1 + + //-------------------------------- + // S_AXI_DDRA bus-interface ports + //-------------------------------- + input wire [15:0] s_axi_ddra_awid, + input wire [63:0] s_axi_ddra_awaddr, + input wire [7:0] s_axi_ddra_awlen, + input wire [2:0] s_axi_ddra_awsize, + input wire s_axi_ddra_awvalid, + output wire s_axi_ddra_awready, + input wire [511:0] s_axi_ddra_wdata, + input wire [63:0] s_axi_ddra_wstrb, + input wire s_axi_ddra_wlast, + input wire s_axi_ddra_wvalid, + output wire s_axi_ddra_wready, + output wire [15:0] s_axi_ddra_bid, + output wire [1:0] s_axi_ddra_bresp, + output wire s_axi_ddra_bvalid, + input wire s_axi_ddra_bready, + input wire [15:0] s_axi_ddra_arid, + input wire [63:0] s_axi_ddra_araddr, + input wire [7:0] s_axi_ddra_arlen, + input wire [2:0] s_axi_ddra_arsize, + input wire s_axi_ddra_arvalid, + output wire s_axi_ddra_arready, + output wire [15:0] s_axi_ddra_rid, + output wire [511:0] s_axi_ddra_rdata, + output wire [1:0] s_axi_ddra_rresp, + output wire s_axi_ddra_rlast, + output wire s_axi_ddra_rvalid, + input wire s_axi_ddra_rready, + + output wire ddra_is_ready, + + //-------------------------------- + // S_AXI_DDRB bus-interface ports + //-------------------------------- + input wire [15:0] s_axi_ddrb_awid, + input wire [63:0] s_axi_ddrb_awaddr, + input wire [7:0] s_axi_ddrb_awlen, + input wire [2:0] s_axi_ddrb_awsize, + input wire s_axi_ddrb_awvalid, + output wire s_axi_ddrb_awready, + input wire [511:0] s_axi_ddrb_wdata, + input wire [63:0] s_axi_ddrb_wstrb, + input wire s_axi_ddrb_wlast, + input wire s_axi_ddrb_wvalid, + output wire s_axi_ddrb_wready, + output wire [15:0] s_axi_ddrb_bid, + output wire [1:0] s_axi_ddrb_bresp, + output wire s_axi_ddrb_bvalid, + input wire s_axi_ddrb_bready, + input wire [15:0] s_axi_ddrb_arid, + input wire [63:0] s_axi_ddrb_araddr, + input wire [7:0] s_axi_ddrb_arlen, + input wire [2:0] s_axi_ddrb_arsize, + input wire s_axi_ddrb_arvalid, + output wire s_axi_ddrb_arready, + output wire [15:0] s_axi_ddrb_rid, + output wire [511:0] s_axi_ddrb_rdata, + output wire [1:0] s_axi_ddrb_rresp, + output wire s_axi_ddrb_rlast, + output wire s_axi_ddrb_rvalid, + input wire s_axi_ddrb_rready, + + output wire ddrb_is_ready, + + //-------------------------------- + // S_AXI_DDRC bus-interface ports + //-------------------------------- + input wire [15:0] s_axi_ddrc_awid, + input wire [63:0] s_axi_ddrc_awaddr, + input wire [7:0] s_axi_ddrc_awlen, + input wire [2:0] s_axi_ddrc_awsize, + input wire s_axi_ddrc_awvalid, + output wire s_axi_ddrc_awready, + input wire [511:0] s_axi_ddrc_wdata, + input wire [63:0] s_axi_ddrc_wstrb, + input wire s_axi_ddrc_wlast, + input wire s_axi_ddrc_wvalid, + output wire s_axi_ddrc_wready, + output wire [15:0] s_axi_ddrc_bid, + output wire [1:0] s_axi_ddrc_bresp, + output wire s_axi_ddrc_bvalid, + input wire s_axi_ddrc_bready, + input wire [15:0] s_axi_ddrc_arid, + input wire [63:0] s_axi_ddrc_araddr, + input wire [7:0] s_axi_ddrc_arlen, + input wire [2:0] s_axi_ddrc_arsize, + input wire s_axi_ddrc_arvalid, + output wire s_axi_ddrc_arready, + output wire [15:0] s_axi_ddrc_rid, + output wire [511:0] s_axi_ddrc_rdata, + output wire [1:0] s_axi_ddrc_rresp, + output wire s_axi_ddrc_rlast, + output wire s_axi_ddrc_rvalid, + input wire s_axi_ddrc_rready, + + output wire ddrc_is_ready, + + //-------------------------------- + // S_AXI_DDRD bus-interface ports + //-------------------------------- + input wire [15:0] s_axi_ddrd_awid, + input wire [63:0] s_axi_ddrd_awaddr, + input wire [7:0] s_axi_ddrd_awlen, + input wire [2:0] s_axi_ddrd_awsize, + input wire s_axi_ddrd_awvalid, + output wire s_axi_ddrd_awready, + input wire [511:0] s_axi_ddrd_wdata, + input wire [63:0] s_axi_ddrd_wstrb, + input wire s_axi_ddrd_wlast, + input wire s_axi_ddrd_wvalid, + output wire s_axi_ddrd_wready, + output wire [15:0] s_axi_ddrd_bid, + output wire [1:0] s_axi_ddrd_bresp, + output wire s_axi_ddrd_bvalid, + input wire s_axi_ddrd_bready, + input wire [15:0] s_axi_ddrd_arid, + input wire [63:0] s_axi_ddrd_araddr, + input wire [7:0] s_axi_ddrd_arlen, + input wire [2:0] s_axi_ddrd_arsize, + input wire s_axi_ddrd_arvalid, + output wire s_axi_ddrd_arready, + output wire [15:0] s_axi_ddrd_rid, + output wire [511:0] s_axi_ddrd_rdata, + output wire [1:0] s_axi_ddrd_rresp, + output wire s_axi_ddrd_rlast, + output wire s_axi_ddrd_rvalid, + input wire s_axi_ddrd_rready, + + output wire ddrd_is_ready, + + //-------------------------------- + // M_AXI_SDA bus-interface ports + //-------------------------------- + output wire [31:0] m_axi_sda_awaddr, + output wire m_axi_sda_awvalid, + input wire m_axi_sda_awready, + output wire [31:0] m_axi_sda_wdata, + output wire [3:0] m_axi_sda_wstrb, + output wire m_axi_sda_wvalid, + input wire m_axi_sda_wready, + input wire [1:0] m_axi_sda_bresp, + input wire m_axi_sda_bvalid, + output wire m_axi_sda_bready, + output wire [31:0] m_axi_sda_araddr, + output wire m_axi_sda_arvalid, + input wire m_axi_sda_arready, + input wire [31:0] m_axi_sda_rdata, + input wire [1:0] m_axi_sda_rresp, + input wire m_axi_sda_rvalid, + output wire m_axi_sda_rready, + + //-------------------------------- + // M_AXI_OCL bus-interface ports + //-------------------------------- + output wire [31:0] m_axi_ocl_awaddr, + output wire m_axi_ocl_awvalid, + input wire m_axi_ocl_awready, + output wire [31:0] m_axi_ocl_wdata, + output wire [3:0] m_axi_ocl_wstrb, + output wire m_axi_ocl_wvalid, + input wire m_axi_ocl_wready, + input wire [1:0] m_axi_ocl_bresp, + input wire m_axi_ocl_bvalid, + output wire m_axi_ocl_bready, + output wire [31:0] m_axi_ocl_araddr, + output wire m_axi_ocl_arvalid, + input wire m_axi_ocl_arready, + input wire [31:0] m_axi_ocl_rdata, + input wire [1:0] m_axi_ocl_rresp, + input wire m_axi_ocl_rvalid, + output wire m_axi_ocl_rready, + + //-------------------------------- + // M_AXI_BAR1 bus-interface ports + //-------------------------------- + output wire [31:0] m_axi_bar1_awaddr, + output wire m_axi_bar1_awvalid, + input wire m_axi_bar1_awready, + output wire [31:0] m_axi_bar1_wdata, + output wire [3:0] m_axi_bar1_wstrb, + output wire m_axi_bar1_wvalid, + input wire m_axi_bar1_wready, + input wire [1:0] m_axi_bar1_bresp, + input wire m_axi_bar1_bvalid, + output wire m_axi_bar1_bready, + output wire [31:0] m_axi_bar1_araddr, + output wire m_axi_bar1_arvalid, + input wire m_axi_bar1_arready, + input wire [31:0] m_axi_bar1_rdata, + input wire [1:0] m_axi_bar1_rresp, + input wire m_axi_bar1_rvalid, + output wire m_axi_bar1_rready, + + //-------------------------------- + // M_AXI_PCIS bus-interface ports (SH transactions to CL) + //-------------------------------- + output wire [5:0] m_axi_pcis_awid, + output wire [63:0] m_axi_pcis_awaddr, + output wire [7:0] m_axi_pcis_awlen, + output wire [2:0] m_axi_pcis_awsize, + output wire m_axi_pcis_awvalid, + input wire m_axi_pcis_awready, + output wire [511:0] m_axi_pcis_wdata, + output wire [63:0] m_axi_pcis_wstrb, + output wire m_axi_pcis_wlast, + output wire m_axi_pcis_wvalid, + input wire m_axi_pcis_wready, + input wire [5:0] m_axi_pcis_bid, + input wire [1:0] m_axi_pcis_bresp, + input wire m_axi_pcis_bvalid, + output wire m_axi_pcis_bready, + output wire [5:0] m_axi_pcis_arid, + output wire [63:0] m_axi_pcis_araddr, + output wire [7:0] m_axi_pcis_arlen, + output wire [2:0] m_axi_pcis_arsize, + output wire m_axi_pcis_arvalid, + input wire m_axi_pcis_arready, + input wire [5:0] m_axi_pcis_rid, + input wire [511:0] m_axi_pcis_rdata, + input wire [1:0] m_axi_pcis_rresp, + input wire m_axi_pcis_rlast, + input wire m_axi_pcis_rvalid, + output wire m_axi_pcis_rready, + output wire [1:0] m_axi_pcis_awburst, + output wire [1:0] m_axi_pcis_arburst, + + //-------------------------------- + // S_AXI_PCIM bus-interface ports (CL transactions to SH) + //-------------------------------- + input wire [15:0] s_axi_pcim_awid, + input wire [63:0] s_axi_pcim_awaddr, + input wire [7:0] s_axi_pcim_awlen, + input wire [2:0] s_axi_pcim_awsize, + input wire [18:0] s_axi_pcim_awuser, + input wire s_axi_pcim_awvalid, + output wire s_axi_pcim_awready, + input wire [511:0] s_axi_pcim_wdata, + input wire [63:0] s_axi_pcim_wstrb, + input wire s_axi_pcim_wlast, + input wire s_axi_pcim_wvalid, + output wire s_axi_pcim_wready, + output wire [15:0] s_axi_pcim_bid, + output wire [1:0] s_axi_pcim_bresp, + output wire s_axi_pcim_bvalid, + input wire s_axi_pcim_bready, + input wire [15:0] s_axi_pcim_arid, + input wire [63:0] s_axi_pcim_araddr, + input wire [7:0] s_axi_pcim_arlen, + input wire [2:0] s_axi_pcim_arsize, + input wire [18:0] s_axi_pcim_aruser, + input wire s_axi_pcim_arvalid, + output wire s_axi_pcim_arready, + output wire [15:0] s_axi_pcim_rid, + output wire [511:0] s_axi_pcim_rdata, + output wire [1:0] s_axi_pcim_rresp, + output wire s_axi_pcim_rlast, + output wire s_axi_pcim_rvalid, + input wire s_axi_pcim_rready, + + output wire [1:0] cfg_max_payload_out, //Max payload size - 00:128B, 01:256B, 10:512B + output wire [2:0] cfg_max_read_req_out //Max read requst size - 000b:128B, 001b:256B, 010b:512B, 011b:1024B + ); + + generate + + assign clk_main_a0_out = clk_main_a0 ; + assign clk_extra_a1_out = C_NUM_A_CLOCKS>1 ? clk_extra_a1 : 1'b0 ; + assign clk_extra_a2_out = C_NUM_A_CLOCKS>2 ? clk_extra_a2 : 1'b0 ; + assign clk_extra_a3_out = C_NUM_A_CLOCKS>3 ? clk_extra_a3 : 1'b0 ; + assign clk_extra_b0_out = C_NUM_B_CLOCKS>0 ? clk_extra_b0 : 1'b0 ; + assign clk_extra_b1_out = C_NUM_B_CLOCKS>1 ? clk_extra_b1 : 1'b0 ; + assign clk_extra_c0_out = C_NUM_C_CLOCKS>0 ? clk_extra_c0 : 1'b0 ; + assign clk_extra_c1_out = C_NUM_C_CLOCKS>1 ? clk_extra_c1 : 1'b0 ; + assign rst_main_n_out = rst_main_n ; + assign kernel_rst_n_out = kernel_rst_n ; + assign flr_assert = sh_cl_flr_assert ; + assign status_vdip = sh_cl_status_vdip ; + assign irq_ack = sh_cl_apppf_irq_ack ; + assign glcount0 = sh_cl_glcount0 ; + assign glcount1 = sh_cl_glcount1 ; + + assign cl_sh_flr_done = flr_done ; + assign cl_sh_status_vled = status_vled ; + assign cl_sh_apppf_irq_req = irq_req ; + + assign cl_sh_status0 = 0 ; + assign cl_sh_status1 = 0 ; + assign cl_sh_id0 = {C_DEVICE_ID, C_VENDOR_ID} ; + assign cl_sh_id1 = {C_SUBSYSTEM_ID, C_SUBSYSTEM_VENDOR_ID} ; + + assign cl_sh_dma_wr_full = 1'b0; + assign cl_sh_dma_rd_full = 1'b0; + + assign cl_sh_ddr_awid = s_axi_ddrc_awid ; + assign cl_sh_ddr_awaddr = s_axi_ddrc_awaddr ; + assign cl_sh_ddr_awlen = s_axi_ddrc_awlen ; + assign cl_sh_ddr_awsize = s_axi_ddrc_awsize ; + assign cl_sh_ddr_awburst = 2'b01 ; + assign cl_sh_ddr_awvalid = s_axi_ddrc_awvalid ; + assign cl_sh_ddr_wdata = s_axi_ddrc_wdata ; + assign cl_sh_ddr_wstrb = s_axi_ddrc_wstrb ; + assign cl_sh_ddr_wlast = s_axi_ddrc_wlast ; + assign cl_sh_ddr_wvalid = s_axi_ddrc_wvalid ; + assign cl_sh_ddr_bready = s_axi_ddrc_bready ; + assign cl_sh_ddr_arid = s_axi_ddrc_arid ; + assign cl_sh_ddr_araddr = s_axi_ddrc_araddr ; + assign cl_sh_ddr_arlen = s_axi_ddrc_arlen ; + assign cl_sh_ddr_arsize = s_axi_ddrc_arsize ; + assign cl_sh_ddr_arburst = 2'b01 ; + assign cl_sh_ddr_arvalid = s_axi_ddrc_arvalid ; + assign cl_sh_ddr_rready = s_axi_ddrc_rready ; + + assign s_axi_ddrc_awready = sh_cl_ddr_awready ; + assign s_axi_ddrc_wready = sh_cl_ddr_wready ; + assign s_axi_ddrc_bid = sh_cl_ddr_bid ; + assign s_axi_ddrc_bresp = sh_cl_ddr_bresp ; + assign s_axi_ddrc_bvalid = sh_cl_ddr_bvalid ; + assign s_axi_ddrc_arready = sh_cl_ddr_arready ; + assign s_axi_ddrc_rid = sh_cl_ddr_rid ; + assign s_axi_ddrc_rdata = sh_cl_ddr_rdata ; + assign s_axi_ddrc_rresp = sh_cl_ddr_rresp ; + assign s_axi_ddrc_rlast = sh_cl_ddr_rlast ; + assign s_axi_ddrc_rvalid = sh_cl_ddr_rvalid ; + assign ddrc_is_ready = sh_cl_ddr_is_ready ; + + assign cl_sh_ddr_wid = 0 ; + + assign cl_sda_awready = m_axi_sda_awready ; + assign cl_sda_wready = m_axi_sda_wready ; + assign cl_sda_bresp = m_axi_sda_bresp ; + assign cl_sda_bvalid = m_axi_sda_bvalid ; + assign cl_sda_arready = m_axi_sda_arready ; + assign cl_sda_rdata = m_axi_sda_rdata ; + assign cl_sda_rresp = m_axi_sda_rresp ; + assign cl_sda_rvalid = m_axi_sda_rvalid ; + + assign m_axi_sda_awaddr = sda_cl_awaddr ; + assign m_axi_sda_awvalid = sda_cl_awvalid ; + assign m_axi_sda_wdata = sda_cl_wdata ; + assign m_axi_sda_wstrb = sda_cl_wstrb ; + assign m_axi_sda_wvalid = sda_cl_wvalid ; + assign m_axi_sda_bready = sda_cl_bready ; + assign m_axi_sda_araddr = sda_cl_araddr ; + assign m_axi_sda_arvalid = sda_cl_arvalid ; + assign m_axi_sda_rready = sda_cl_rready ; + + assign ocl_sh_awready = m_axi_ocl_awready ; + assign ocl_sh_wready = m_axi_ocl_wready ; + assign ocl_sh_bresp = m_axi_ocl_bresp ; + assign ocl_sh_bvalid = m_axi_ocl_bvalid ; + assign ocl_sh_arready = m_axi_ocl_arready ; + assign ocl_sh_rdata = m_axi_ocl_rdata ; + assign ocl_sh_rresp = m_axi_ocl_rresp ; + assign ocl_sh_rvalid = m_axi_ocl_rvalid ; + + assign m_axi_ocl_awaddr = sh_ocl_awaddr ; + assign m_axi_ocl_awvalid = sh_ocl_awvalid ; + assign m_axi_ocl_wdata = sh_ocl_wdata ; + assign m_axi_ocl_wstrb = sh_ocl_wstrb ; + assign m_axi_ocl_wvalid = sh_ocl_wvalid ; + assign m_axi_ocl_bready = sh_ocl_bready ; + assign m_axi_ocl_araddr = sh_ocl_araddr ; + assign m_axi_ocl_arvalid = sh_ocl_arvalid ; + assign m_axi_ocl_rready = sh_ocl_rready ; + + assign bar1_sh_awready = m_axi_bar1_awready ; + assign bar1_sh_wready = m_axi_bar1_wready ; + assign bar1_sh_bresp = m_axi_bar1_bresp ; + assign bar1_sh_bvalid = m_axi_bar1_bvalid ; + assign bar1_sh_arready = m_axi_bar1_arready ; + assign bar1_sh_rdata = m_axi_bar1_rdata ; + assign bar1_sh_rresp = m_axi_bar1_rresp ; + assign bar1_sh_rvalid = m_axi_bar1_rvalid ; + + assign m_axi_bar1_awaddr = sh_bar1_awaddr ; + assign m_axi_bar1_awvalid = sh_bar1_awvalid ; + assign m_axi_bar1_wdata = sh_bar1_wdata ; + assign m_axi_bar1_wstrb = sh_bar1_wstrb ; + assign m_axi_bar1_wvalid = sh_bar1_wvalid ; + assign m_axi_bar1_bready = sh_bar1_bready ; + assign m_axi_bar1_araddr = sh_bar1_araddr ; + assign m_axi_bar1_arvalid = sh_bar1_arvalid ; + assign m_axi_bar1_rready = sh_bar1_rready ; + + assign cl_sh_dma_pcis_awready = m_axi_pcis_awready ; + assign cl_sh_dma_pcis_wready = m_axi_pcis_wready ; + assign cl_sh_dma_pcis_bid = m_axi_pcis_bid ; + assign cl_sh_dma_pcis_bresp = m_axi_pcis_bresp ; + assign cl_sh_dma_pcis_bvalid = m_axi_pcis_bvalid ; + assign cl_sh_dma_pcis_arready = m_axi_pcis_arready ; + assign cl_sh_dma_pcis_rid = m_axi_pcis_rid ; + assign cl_sh_dma_pcis_rdata = m_axi_pcis_rdata ; + assign cl_sh_dma_pcis_rresp = m_axi_pcis_rresp ; + assign cl_sh_dma_pcis_rlast = m_axi_pcis_rlast ; + assign cl_sh_dma_pcis_rvalid = m_axi_pcis_rvalid ; + + assign m_axi_pcis_awid = sh_cl_dma_pcis_awid ; + assign m_axi_pcis_awaddr = sh_cl_dma_pcis_awaddr ; + assign m_axi_pcis_awlen = sh_cl_dma_pcis_awlen ; + assign m_axi_pcis_awsize = sh_cl_dma_pcis_awsize ; + assign m_axi_pcis_awvalid = sh_cl_dma_pcis_awvalid ; + assign m_axi_pcis_wdata = sh_cl_dma_pcis_wdata ; + assign m_axi_pcis_wstrb = sh_cl_dma_pcis_wstrb ; + assign m_axi_pcis_wlast = sh_cl_dma_pcis_wlast ; + assign m_axi_pcis_wvalid = sh_cl_dma_pcis_wvalid ; + assign m_axi_pcis_bready = sh_cl_dma_pcis_bready ; + assign m_axi_pcis_arid = sh_cl_dma_pcis_arid ; + assign m_axi_pcis_araddr = sh_cl_dma_pcis_araddr ; + assign m_axi_pcis_arlen = sh_cl_dma_pcis_arlen ; + assign m_axi_pcis_arsize = sh_cl_dma_pcis_arsize ; + assign m_axi_pcis_arvalid = sh_cl_dma_pcis_arvalid ; + assign m_axi_pcis_rready = sh_cl_dma_pcis_rready ; + assign m_axi_pcis_awburst = 2'b01 ; + assign m_axi_pcis_arburst = 2'b01 ; + + assign cl_sh_pcim_awid = s_axi_pcim_awid ; + assign cl_sh_pcim_awaddr = s_axi_pcim_awaddr ; + assign cl_sh_pcim_awlen = s_axi_pcim_awlen ; + assign cl_sh_pcim_awsize = s_axi_pcim_awsize ; + assign cl_sh_pcim_awuser = s_axi_pcim_awuser ; + assign cl_sh_pcim_awvalid = s_axi_pcim_awvalid ; + assign cl_sh_pcim_wdata = s_axi_pcim_wdata ; + assign cl_sh_pcim_wstrb = s_axi_pcim_wstrb ; + assign cl_sh_pcim_wlast = s_axi_pcim_wlast ; + assign cl_sh_pcim_wvalid = s_axi_pcim_wvalid ; + assign cl_sh_pcim_bready = s_axi_pcim_bready ; + assign cl_sh_pcim_arid = s_axi_pcim_arid ; + assign cl_sh_pcim_araddr = s_axi_pcim_araddr ; + assign cl_sh_pcim_arlen = s_axi_pcim_arlen ; + assign cl_sh_pcim_arsize = s_axi_pcim_arsize ; + assign cl_sh_pcim_aruser = s_axi_pcim_aruser ; + assign cl_sh_pcim_arvalid = s_axi_pcim_arvalid ; + assign cl_sh_pcim_rready = s_axi_pcim_rready ; + + assign s_axi_pcim_awready = sh_cl_pcim_awready ; + assign s_axi_pcim_wready = sh_cl_pcim_wready ; + assign s_axi_pcim_bid = sh_cl_pcim_bid ; + assign s_axi_pcim_bresp = sh_cl_pcim_bresp ; + assign s_axi_pcim_bvalid = sh_cl_pcim_bvalid ; + assign s_axi_pcim_arready = sh_cl_pcim_arready ; + assign s_axi_pcim_rid = sh_cl_pcim_rid ; + assign s_axi_pcim_rdata = sh_cl_pcim_rdata ; + assign s_axi_pcim_rresp = sh_cl_pcim_rresp ; + assign s_axi_pcim_rlast = sh_cl_pcim_rlast ; + assign s_axi_pcim_rvalid = sh_cl_pcim_rvalid ; + assign cfg_max_payload_out = cfg_max_payload ; + assign cfg_max_read_req_out = cfg_max_read_req ; + + if ((C_MODE == 0) || (C_MODE == 1)) begin : gen_mem + + logic [15:0] cl_sh_ddr_awid_2d[2:0]; + logic [63:0] cl_sh_ddr_awaddr_2d[2:0]; + logic [7:0] cl_sh_ddr_awlen_2d[2:0]; + logic [2:0] cl_sh_ddr_awsize_2d[2:0]; + logic [1:0] cl_sh_ddr_awburst_2d[2:0]; + logic cl_sh_ddr_awvalid_2d[2:0]; + logic [2:0] sh_cl_ddr_awready_2d; + logic [15:0] cl_sh_ddr_wid_2d[2:0]; + logic [511:0] cl_sh_ddr_wdata_2d[2:0]; + logic [63:0] cl_sh_ddr_wstrb_2d[2:0]; + logic [2:0] cl_sh_ddr_wlast_2d; + logic [2:0] cl_sh_ddr_wvalid_2d; + logic [2:0] sh_cl_ddr_wready_2d; + logic [15:0] sh_cl_ddr_bid_2d[2:0]; + logic [1:0] sh_cl_ddr_bresp_2d[2:0]; + logic [2:0] sh_cl_ddr_bvalid_2d; + logic [2:0] cl_sh_ddr_bready_2d; + logic [15:0] cl_sh_ddr_arid_2d[2:0]; + logic [63:0] cl_sh_ddr_araddr_2d[2:0]; + logic [7:0] cl_sh_ddr_arlen_2d[2:0]; + logic [2:0] cl_sh_ddr_arsize_2d[2:0]; + logic [1:0] cl_sh_ddr_arburst_2d[2:0]; + logic [2:0] cl_sh_ddr_arvalid_2d; + logic [2:0] sh_cl_ddr_arready_2d; + logic [15:0] sh_cl_ddr_rid_2d[2:0]; + logic [511:0] sh_cl_ddr_rdata_2d[2:0]; + logic [1:0] sh_cl_ddr_rresp_2d[2:0]; + logic [2:0] sh_cl_ddr_rlast_2d; + logic [2:0] sh_cl_ddr_rvalid_2d; + logic [2:0] cl_sh_ddr_rready_2d; + logic [2:0] sh_cl_ddr_is_ready_2d; + + assign cl_sh_ddr_awid_2d[0] = s_axi_ddra_awid ; + assign cl_sh_ddr_awaddr_2d[0] = s_axi_ddra_awaddr ; + assign cl_sh_ddr_awlen_2d[0] = s_axi_ddra_awlen ; + assign cl_sh_ddr_awsize_2d[0] = s_axi_ddra_awsize ; + assign cl_sh_ddr_awburst_2d[0] = 2'b01 ; + assign cl_sh_ddr_awvalid_2d[0] = s_axi_ddra_awvalid ; + assign cl_sh_ddr_wid_2d[0] = 0 ; + assign cl_sh_ddr_wdata_2d[0] = s_axi_ddra_wdata ; + assign cl_sh_ddr_wstrb_2d[0] = s_axi_ddra_wstrb ; + assign cl_sh_ddr_wlast_2d[0] = s_axi_ddra_wlast ; + assign cl_sh_ddr_wvalid_2d[0] = s_axi_ddra_wvalid ; + assign cl_sh_ddr_bready_2d[0] = s_axi_ddra_bready ; + assign cl_sh_ddr_arid_2d[0] = s_axi_ddra_arid ; + assign cl_sh_ddr_araddr_2d[0] = s_axi_ddra_araddr ; + assign cl_sh_ddr_arlen_2d[0] = s_axi_ddra_arlen ; + assign cl_sh_ddr_arsize_2d[0] = s_axi_ddra_arsize ; + assign cl_sh_ddr_arburst_2d[0] = 2'b01 ; + assign cl_sh_ddr_arvalid_2d[0] = s_axi_ddra_arvalid ; + assign cl_sh_ddr_rready_2d[0] = s_axi_ddra_rready ; + + assign s_axi_ddra_awready = sh_cl_ddr_awready_2d[0] ; + assign s_axi_ddra_wready = sh_cl_ddr_wready_2d[0] ; + assign s_axi_ddra_bid = sh_cl_ddr_bid_2d[0] ; + assign s_axi_ddra_bresp = sh_cl_ddr_bresp_2d[0] ; + assign s_axi_ddra_bvalid = sh_cl_ddr_bvalid_2d[0] ; + assign s_axi_ddra_arready = sh_cl_ddr_arready_2d[0] ; + assign s_axi_ddra_rid = sh_cl_ddr_rid_2d[0] ; + assign s_axi_ddra_rdata = sh_cl_ddr_rdata_2d[0] ; + assign s_axi_ddra_rresp = sh_cl_ddr_rresp_2d[0] ; + assign s_axi_ddra_rlast = sh_cl_ddr_rlast_2d[0] ; + assign s_axi_ddra_rvalid = sh_cl_ddr_rvalid_2d[0] ; + assign ddra_is_ready = sh_cl_ddr_is_ready_2d[0]; + + assign cl_sh_ddr_awid_2d[1] = s_axi_ddrb_awid ; + assign cl_sh_ddr_awaddr_2d[1] = s_axi_ddrb_awaddr ; + assign cl_sh_ddr_awlen_2d[1] = s_axi_ddrb_awlen ; + assign cl_sh_ddr_awsize_2d[1] = s_axi_ddrb_awsize ; + assign cl_sh_ddr_awburst_2d[1] = 2'b01 ; + assign cl_sh_ddr_awvalid_2d[1] = s_axi_ddrb_awvalid ; + assign cl_sh_ddr_wid_2d[1] = 0 ; + assign cl_sh_ddr_wdata_2d[1] = s_axi_ddrb_wdata ; + assign cl_sh_ddr_wstrb_2d[1] = s_axi_ddrb_wstrb ; + assign cl_sh_ddr_wlast_2d[1] = s_axi_ddrb_wlast ; + assign cl_sh_ddr_wvalid_2d[1] = s_axi_ddrb_wvalid ; + assign cl_sh_ddr_bready_2d[1] = s_axi_ddrb_bready ; + assign cl_sh_ddr_arid_2d[1] = s_axi_ddrb_arid ; + assign cl_sh_ddr_araddr_2d[1] = s_axi_ddrb_araddr ; + assign cl_sh_ddr_arlen_2d[1] = s_axi_ddrb_arlen ; + assign cl_sh_ddr_arsize_2d[1] = s_axi_ddrb_arsize ; + assign cl_sh_ddr_arburst_2d[1] = 2'b01 ; + assign cl_sh_ddr_arvalid_2d[1] = s_axi_ddrb_arvalid ; + assign cl_sh_ddr_rready_2d[1] = s_axi_ddrb_rready ; + + assign s_axi_ddrb_awready = sh_cl_ddr_awready_2d[1] ; + assign s_axi_ddrb_wready = sh_cl_ddr_wready_2d[1] ; + assign s_axi_ddrb_bid = sh_cl_ddr_bid_2d[1] ; + assign s_axi_ddrb_bresp = sh_cl_ddr_bresp_2d[1] ; + assign s_axi_ddrb_bvalid = sh_cl_ddr_bvalid_2d[1] ; + assign s_axi_ddrb_arready = sh_cl_ddr_arready_2d[1] ; + assign s_axi_ddrb_rid = sh_cl_ddr_rid_2d[1] ; + assign s_axi_ddrb_rdata = sh_cl_ddr_rdata_2d[1] ; + assign s_axi_ddrb_rresp = sh_cl_ddr_rresp_2d[1] ; + assign s_axi_ddrb_rlast = sh_cl_ddr_rlast_2d[1] ; + assign s_axi_ddrb_rvalid = sh_cl_ddr_rvalid_2d[1] ; + assign ddrb_is_ready = sh_cl_ddr_is_ready_2d[1]; + + assign cl_sh_ddr_awid_2d[2] = s_axi_ddrd_awid ; + assign cl_sh_ddr_awaddr_2d[2] = s_axi_ddrd_awaddr ; + assign cl_sh_ddr_awlen_2d[2] = s_axi_ddrd_awlen ; + assign cl_sh_ddr_awsize_2d[2] = s_axi_ddrd_awsize ; + assign cl_sh_ddr_awburst_2d[2] = 2'b01 ; + assign cl_sh_ddr_awvalid_2d[2] = s_axi_ddrd_awvalid ; + assign cl_sh_ddr_wid_2d[2] = 0 ; + assign cl_sh_ddr_wdata_2d[2] = s_axi_ddrd_wdata ; + assign cl_sh_ddr_wstrb_2d[2] = s_axi_ddrd_wstrb ; + assign cl_sh_ddr_wlast_2d[2] = s_axi_ddrd_wlast ; + assign cl_sh_ddr_wvalid_2d[2] = s_axi_ddrd_wvalid ; + assign cl_sh_ddr_bready_2d[2] = s_axi_ddrd_bready ; + assign cl_sh_ddr_arid_2d[2] = s_axi_ddrd_arid ; + assign cl_sh_ddr_araddr_2d[2] = s_axi_ddrd_araddr ; + assign cl_sh_ddr_arlen_2d[2] = s_axi_ddrd_arlen ; + assign cl_sh_ddr_arsize_2d[2] = s_axi_ddrd_arsize ; + assign cl_sh_ddr_arburst_2d[2] = 2'b01 ; + assign cl_sh_ddr_arvalid_2d[2] = s_axi_ddrd_arvalid ; + assign cl_sh_ddr_rready_2d[2] = s_axi_ddrd_rready ; + + assign s_axi_ddrd_awready = sh_cl_ddr_awready_2d[2] ; + assign s_axi_ddrd_wready = sh_cl_ddr_wready_2d[2] ; + assign s_axi_ddrd_bid = sh_cl_ddr_bid_2d[2] ; + assign s_axi_ddrd_bresp = sh_cl_ddr_bresp_2d[2] ; + assign s_axi_ddrd_bvalid = sh_cl_ddr_bvalid_2d[2] ; + assign s_axi_ddrd_arready = sh_cl_ddr_arready_2d[2] ; + assign s_axi_ddrd_rid = sh_cl_ddr_rid_2d[2] ; + assign s_axi_ddrd_rdata = sh_cl_ddr_rdata_2d[2] ; + assign s_axi_ddrd_rresp = sh_cl_ddr_rresp_2d[2] ; + assign s_axi_ddrd_rlast = sh_cl_ddr_rlast_2d[2] ; + assign s_axi_ddrd_rvalid = sh_cl_ddr_rvalid_2d[2] ; + assign ddrd_is_ready = sh_cl_ddr_is_ready_2d[2]; + + logic ddr_aws_stat_ack0; + logic [31:0] ddr_aws_stat_rdata0; + logic [7:0] ddr_aws_stat_int0; + logic ddr_aws_stat_ack1; + logic [31:0] ddr_aws_stat_rdata1; + logic [7:0] ddr_aws_stat_int1; + logic ddr_aws_stat_ack2; + logic [31:0] ddr_aws_stat_rdata2; + logic [7:0] ddr_aws_stat_int2; + + logic [7:0] pipe_ddr_stat_addr0; + logic pipe_ddr_stat_wr0; + logic pipe_ddr_stat_rd0; + logic [31:0] pipe_ddr_stat_wdata0; + logic ddr_pipe_stat_ack0; + logic [31:0] ddr_pipe_stat_rdata0; + logic [7:0] ddr_pipe_stat_int0; + + logic [7:0] pipe_ddr_stat_addr1; + logic pipe_ddr_stat_wr1; + logic pipe_ddr_stat_rd1; + logic [31:0] pipe_ddr_stat_wdata1; + logic ddr_pipe_stat_ack1; + logic [31:0] ddr_pipe_stat_rdata1; + logic [7:0] ddr_pipe_stat_int1; + + logic [7:0] pipe_ddr_stat_addr2; + logic pipe_ddr_stat_wr2; + logic pipe_ddr_stat_rd2; + logic [31:0] pipe_ddr_stat_wdata2; + logic ddr_pipe_stat_ack2; + logic [31:0] ddr_pipe_stat_rdata2; + logic [7:0] ddr_pipe_stat_int2; + +//------------------------------------------------- +// Tie-offs when DDRs are disabled +//------------------------------------------------- + assign ddr_sh_stat_ack0 = (C_DDR_A_PRESENT!=0) ? ddr_aws_stat_ack0 : 1'b1; + assign ddr_sh_stat_rdata0 = (C_DDR_A_PRESENT!=0) ? ddr_aws_stat_rdata0 : 0; + assign ddr_sh_stat_int0 = (C_DDR_A_PRESENT!=0) ? ddr_aws_stat_int0 : 8'b0; + assign ddr_sh_stat_ack1 = (C_DDR_B_PRESENT!=0) ? ddr_aws_stat_ack1 : 1'b1; + assign ddr_sh_stat_rdata1 = (C_DDR_B_PRESENT!=0) ? ddr_aws_stat_rdata1 : 0; + assign ddr_sh_stat_int1 = (C_DDR_B_PRESENT!=0) ? ddr_aws_stat_int1 : 8'b0; + assign ddr_sh_stat_ack2 = (C_DDR_D_PRESENT!=0) ? ddr_aws_stat_ack2 : 1'b1; + assign ddr_sh_stat_rdata2 = (C_DDR_D_PRESENT!=0) ? ddr_aws_stat_rdata2 : 0; + assign ddr_sh_stat_int2 = (C_DDR_D_PRESENT!=0) ? ddr_aws_stat_int2 : 8'b0; + +//------------------------------------------------- +// Reset Synchronization +//------------------------------------------------- + logic pre_sync_rst_n; + logic sync_rst_n; + + always @(negedge rst_main_n or posedge clk_main_a0) begin + if (!rst_main_n) begin + pre_sync_rst_n <= 1'b0; + sync_rst_n <= 1'b0; + end else begin + pre_sync_rst_n <= 1'b1; + sync_rst_n <= pre_sync_rst_n; + end + end + + `ifdef FPGA_LESS_RST + `undef FPGA_LESS_RST + `endif + + lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wdata0 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_wdata0), .out_bus(pipe_ddr_stat_wdata0)); + lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_addr0 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_addr0), .out_bus(pipe_ddr_stat_addr0)); + lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wr0 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_wr0), .out_bus(pipe_ddr_stat_wr0)); + lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rd0 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_rd0), .out_bus(pipe_ddr_stat_rd0)); + lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rdata0 (.clk(clk_main_a0), .rst_n(1'b1), .out_bus(ddr_aws_stat_rdata0), .in_bus(ddr_pipe_stat_rdata0)); + lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_ack0 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_ack0), .in_bus(ddr_pipe_stat_ack0)); + lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_int0 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_int0), .in_bus(ddr_pipe_stat_int0)); + + lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wdata1 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_wdata1), .out_bus(pipe_ddr_stat_wdata1)); + lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_addr1 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_addr1), .out_bus(pipe_ddr_stat_addr1)); + lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wr1 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_wr1), .out_bus(pipe_ddr_stat_wr1)); + lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rd1 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_rd1), .out_bus(pipe_ddr_stat_rd1)); + lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rdata1 (.clk(clk_main_a0), .rst_n(1'b1), .out_bus(ddr_aws_stat_rdata1), .in_bus(ddr_pipe_stat_rdata1)); + lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_ack1 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_ack1), .in_bus(ddr_pipe_stat_ack1)); + lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_int1 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_int1), .in_bus(ddr_pipe_stat_int1)); + + lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wdata2 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_wdata2), .out_bus(pipe_ddr_stat_wdata2)); + lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_addr2 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_addr2), .out_bus(pipe_ddr_stat_addr2)); + lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wr2 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_wr2), .out_bus(pipe_ddr_stat_wr2)); + lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rd2 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_rd2), .out_bus(pipe_ddr_stat_rd2)); + lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rdata2 (.clk(clk_main_a0), .rst_n(1'b1), .out_bus(ddr_aws_stat_rdata2), .in_bus(ddr_pipe_stat_rdata2)); + lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_ack2 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_ack2), .in_bus(ddr_pipe_stat_ack2)); + lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_int2 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_int2), .in_bus(ddr_pipe_stat_int2)); + + sh_ddr #( + .DDR_A_PRESENT(C_DDR_A_PRESENT), + .DDR_B_PRESENT(C_DDR_B_PRESENT), + .DDR_D_PRESENT(C_DDR_D_PRESENT) + ) sh_ddr_0 + ( + .clk(clk_main_a0), + .rst_n(sync_rst_n), + + .stat_clk(clk_main_a0), + .stat_rst_n(sync_rst_n), + + .CLK_300M_DIMM0_DP(CLK_300M_DIMM0_DP), + .CLK_300M_DIMM0_DN(CLK_300M_DIMM0_DN), + .M_A_ACT_N(M_A_ACT_N), + .M_A_MA(M_A_MA), + .M_A_BA(M_A_BA), + .M_A_BG(M_A_BG), + .M_A_CKE(M_A_CKE), + .M_A_ODT(M_A_ODT), + .M_A_CS_N(M_A_CS_N), + .M_A_CLK_DN(M_A_CLK_DN), + .M_A_CLK_DP(M_A_CLK_DP), + .M_A_PAR(M_A_PAR), + .M_A_DQ(M_A_DQ), + .M_A_ECC(M_A_ECC), + .M_A_DQS_DP(M_A_DQS_DP), + .M_A_DQS_DN(M_A_DQS_DN), + .cl_RST_DIMM_A_N(cl_RST_DIMM_A_N), + + .CLK_300M_DIMM1_DP(CLK_300M_DIMM1_DP), + .CLK_300M_DIMM1_DN(CLK_300M_DIMM1_DN), + .M_B_ACT_N(M_B_ACT_N), + .M_B_MA(M_B_MA), + .M_B_BA(M_B_BA), + .M_B_BG(M_B_BG), + .M_B_CKE(M_B_CKE), + .M_B_ODT(M_B_ODT), + .M_B_CS_N(M_B_CS_N), + .M_B_CLK_DN(M_B_CLK_DN), + .M_B_CLK_DP(M_B_CLK_DP), + .M_B_PAR(M_B_PAR), + .M_B_DQ(M_B_DQ), + .M_B_ECC(M_B_ECC), + .M_B_DQS_DP(M_B_DQS_DP), + .M_B_DQS_DN(M_B_DQS_DN), + .cl_RST_DIMM_B_N(cl_RST_DIMM_B_N), + + .CLK_300M_DIMM3_DP(CLK_300M_DIMM3_DP), + .CLK_300M_DIMM3_DN(CLK_300M_DIMM3_DN), + .M_D_ACT_N(M_D_ACT_N), + .M_D_MA(M_D_MA), + .M_D_BA(M_D_BA), + .M_D_BG(M_D_BG), + .M_D_CKE(M_D_CKE), + .M_D_ODT(M_D_ODT), + .M_D_CS_N(M_D_CS_N), + .M_D_CLK_DN(M_D_CLK_DN), + .M_D_CLK_DP(M_D_CLK_DP), + .M_D_PAR(M_D_PAR), + .M_D_DQ(M_D_DQ), + .M_D_ECC(M_D_ECC), + .M_D_DQS_DP(M_D_DQS_DP), + .M_D_DQS_DN(M_D_DQS_DN), + .cl_RST_DIMM_D_N(cl_RST_DIMM_D_N), + + //------------------------------------------------------ + // AXI Slave Interfaces + //------------------------------------------------------ + .cl_sh_ddr_awid(cl_sh_ddr_awid_2d), + .cl_sh_ddr_awaddr(cl_sh_ddr_awaddr_2d), + .cl_sh_ddr_awlen(cl_sh_ddr_awlen_2d), + .cl_sh_ddr_awsize(cl_sh_ddr_awsize_2d), + .cl_sh_ddr_awburst(cl_sh_ddr_awburst_2d), + .cl_sh_ddr_awvalid(cl_sh_ddr_awvalid_2d), + .sh_cl_ddr_awready(sh_cl_ddr_awready_2d), + + .cl_sh_ddr_wid(cl_sh_ddr_wid_2d), + .cl_sh_ddr_wdata(cl_sh_ddr_wdata_2d), + .cl_sh_ddr_wstrb(cl_sh_ddr_wstrb_2d), + .cl_sh_ddr_wlast(cl_sh_ddr_wlast_2d), + .cl_sh_ddr_wvalid(cl_sh_ddr_wvalid_2d), + .sh_cl_ddr_wready(sh_cl_ddr_wready_2d), + + .sh_cl_ddr_bid(sh_cl_ddr_bid_2d), + .sh_cl_ddr_bresp(sh_cl_ddr_bresp_2d), + .sh_cl_ddr_bvalid(sh_cl_ddr_bvalid_2d), + .cl_sh_ddr_bready(cl_sh_ddr_bready_2d), + + .cl_sh_ddr_arid(cl_sh_ddr_arid_2d), + .cl_sh_ddr_araddr(cl_sh_ddr_araddr_2d), + .cl_sh_ddr_arlen(cl_sh_ddr_arlen_2d), + .cl_sh_ddr_arsize(cl_sh_ddr_arsize_2d), + .cl_sh_ddr_arburst(cl_sh_ddr_arburst_2d), + .cl_sh_ddr_arvalid(cl_sh_ddr_arvalid_2d), + .sh_cl_ddr_arready(sh_cl_ddr_arready_2d), + + .sh_cl_ddr_rid(sh_cl_ddr_rid_2d), + .sh_cl_ddr_rdata(sh_cl_ddr_rdata_2d), + .sh_cl_ddr_rresp(sh_cl_ddr_rresp_2d), + .sh_cl_ddr_rlast(sh_cl_ddr_rlast_2d), + .sh_cl_ddr_rvalid(sh_cl_ddr_rvalid_2d), + .cl_sh_ddr_rready(cl_sh_ddr_rready_2d), + + .sh_cl_ddr_is_ready(sh_cl_ddr_is_ready_2d), + + .sh_ddr_stat_addr0 (pipe_ddr_stat_addr0 ), + .sh_ddr_stat_wr0 (pipe_ddr_stat_wr0 ), + .sh_ddr_stat_rd0 (pipe_ddr_stat_rd0 ), + .sh_ddr_stat_wdata0 (pipe_ddr_stat_wdata0), + .ddr_sh_stat_ack0 (ddr_pipe_stat_ack0 ), + .ddr_sh_stat_rdata0 (ddr_pipe_stat_rdata0), + .ddr_sh_stat_int0 (ddr_pipe_stat_int0 ), + + .sh_ddr_stat_addr1 (pipe_ddr_stat_addr1 ), + .sh_ddr_stat_wr1 (pipe_ddr_stat_wr1 ), + .sh_ddr_stat_rd1 (pipe_ddr_stat_rd1 ), + .sh_ddr_stat_wdata1 (pipe_ddr_stat_wdata1), + .ddr_sh_stat_ack1 (ddr_pipe_stat_ack1 ), + .ddr_sh_stat_rdata1 (ddr_pipe_stat_rdata1), + .ddr_sh_stat_int1 (ddr_pipe_stat_int1 ), + + .sh_ddr_stat_addr2 (pipe_ddr_stat_addr2 ), + .sh_ddr_stat_wr2 (pipe_ddr_stat_wr2 ), + .sh_ddr_stat_rd2 (pipe_ddr_stat_rd2 ), + .sh_ddr_stat_wdata2 (pipe_ddr_stat_wdata2), + .ddr_sh_stat_ack2 (ddr_pipe_stat_ack2 ), + .ddr_sh_stat_rdata2 (ddr_pipe_stat_rdata2), + .ddr_sh_stat_int2 (ddr_pipe_stat_int2 ) + + ); + + end else begin : gen_non_mem + + assign s_axi_ddra_awready = 0; + assign s_axi_ddra_wready = 0; + assign s_axi_ddra_bid = 0; + assign s_axi_ddra_bresp = 0; + assign s_axi_ddra_bvalid = 0; + assign s_axi_ddra_arready = 0; + assign s_axi_ddra_rid = 0; + assign s_axi_ddra_rdata = 0; + assign s_axi_ddra_rresp = 0; + assign s_axi_ddra_rlast = 1'b1; + assign s_axi_ddra_rvalid = 0; + assign ddra_is_ready = 0; + + assign s_axi_ddrb_awready = 0; + assign s_axi_ddrb_wready = 0; + assign s_axi_ddrb_bid = 0; + assign s_axi_ddrb_bresp = 0; + assign s_axi_ddrb_bvalid = 0; + assign s_axi_ddrb_arready = 0; + assign s_axi_ddrb_rid = 0; + assign s_axi_ddrb_rdata = 0; + assign s_axi_ddrb_rresp = 0; + assign s_axi_ddrb_rlast = 1'b1; + assign s_axi_ddrb_rvalid = 0; + assign ddrb_is_ready = 0; + + assign s_axi_ddrd_awready = 0; + assign s_axi_ddrd_wready = 0; + assign s_axi_ddrd_bid = 0; + assign s_axi_ddrd_bresp = 0; + assign s_axi_ddrd_bvalid = 0; + assign s_axi_ddrd_arready = 0; + assign s_axi_ddrd_rid = 0; + assign s_axi_ddrd_rdata = 0; + assign s_axi_ddrd_rresp = 0; + assign s_axi_ddrd_rlast = 1'b1; + assign s_axi_ddrd_rvalid = 0; + assign ddrd_is_ready = 0; + + assign ddr_sh_stat_ack0 = 1'b1; + assign ddr_sh_stat_rdata0 = 0; + assign ddr_sh_stat_int0 = 8'b0; + assign ddr_sh_stat_ack1 = 1'b1; + assign ddr_sh_stat_rdata1 = 0; + assign ddr_sh_stat_int1 = 8'b0; + assign ddr_sh_stat_ack2 = 1'b1; + assign ddr_sh_stat_rdata2 = 0; + assign ddr_sh_stat_int2 = 8'b0; + + assign M_A_ACT_N = 0; + assign M_A_MA = 0; + assign M_A_BA = 0; + assign M_A_BG = 0; + assign M_A_CKE = 0; + assign M_A_ODT = 0; + assign M_A_CS_N = 0; + assign M_A_CLK_DN = 0; + assign M_A_CLK_DP = 0; + assign M_A_PAR = 0; + assign cl_RST_DIMM_A_N = 0; + + assign M_B_ACT_N = 0; + assign M_B_MA = 0; + assign M_B_BA = 0; + assign M_B_BG = 0; + assign M_B_CKE = 0; + assign M_B_ODT = 0; + assign M_B_CS_N = 0; + assign M_B_CLK_DN = 0; + assign M_B_CLK_DP = 0; + assign M_B_PAR = 0; + assign cl_RST_DIMM_B_N = 0; + + assign M_D_ACT_N = 0; + assign M_D_MA = 0; + assign M_D_BA = 0; + assign M_D_BG = 0; + assign M_D_CKE = 0; + assign M_D_ODT = 0; + assign M_D_CS_N = 0; + assign M_D_CLK_DN = 0; + assign M_D_CLK_DP = 0; + assign M_D_PAR = 0; + assign cl_RST_DIMM_D_N = 0; + + end // gen_mem + endgenerate + +endmodule diff --git a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/aws_v1_0_vl_rfs.sv b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/aws_v1_0_vl_rfs.sv deleted file mode 100755 index edf00b8c..00000000 --- a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/aws_v1_0_vl_rfs.sv +++ /dev/null @@ -1,3076 +0,0 @@ -//---------------------------------------------------------------------------------- -//Copyright (c) 2014 -// -//Permission is hereby granted, free of charge, to any person obtaining a copy -//of this software and associated documentation files (the "Software"), to deal -//in the Software without restriction, including without limitation the rights -//to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -//copies of the Software, and to permit persons to whom the Software is -//furnished to do so, subject to the following conditions: -// -//The above copyright notice and this permission notice shall be included in -//all copies or substantial portions of the Software. -// -//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -//FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -//AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -//LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -//OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -//THE SOFTWARE. -//---------------------------------------------------------------------------------- - -//simple pipeline - -//WIDTH is the width of the DATA -//STAGES is the number of stages (flops in the pipeline) -module lib_pipe #(parameter WIDTH=8, parameter STAGES=1) ( - input clk, - input rst_n, - - input[WIDTH-1:0] in_bus, - - output [WIDTH-1:0] out_bus - ); - -//Note the shreg_extract=no directs Xilinx to not infer shift registers which -// defeats using this as a pipeline - - -`ifdef FPGA_LESS_RST - (*shreg_extract="no"*) logic [WIDTH-1:0] pipe[STAGES-1:0] = '{default:'0}; -`else - (*shreg_extract="no"*) logic [WIDTH-1:0] pipe[STAGES-1:0]; -`endif - -//(*srl_style="register"*) logic [WIDTH-1:0] pipe [STAGES-1:0]; -// logic [WIDTH-1:0] pipe [STAGES-1:0]; - - integer i; - -`ifdef FPGA_LESS_RST - always @(posedge clk) -`else - always @(negedge rst_n or posedge clk) - if (!rst_n) - begin - for (i=0; i1) - begin - for (i=1; i1 ? clk_extra_a1 : 1'b0 ; - assign clk_extra_a2_out = C_NUM_A_CLOCKS>2 ? clk_extra_a2 : 1'b0 ; - assign clk_extra_a3_out = C_NUM_A_CLOCKS>3 ? clk_extra_a3 : 1'b0 ; - assign clk_extra_b0_out = C_NUM_B_CLOCKS>0 ? clk_extra_b0 : 1'b0 ; - assign clk_extra_b1_out = C_NUM_B_CLOCKS>1 ? clk_extra_b1 : 1'b0 ; - assign clk_extra_c0_out = C_NUM_C_CLOCKS>0 ? clk_extra_c0 : 1'b0 ; - assign clk_extra_c1_out = C_NUM_C_CLOCKS>1 ? clk_extra_c1 : 1'b0 ; - assign rst_main_n_out = rst_main_n ; - assign kernel_rst_n_out = kernel_rst_n ; - assign flr_assert = sh_cl_flr_assert ; - assign status_vdip = sh_cl_status_vdip ; - assign irq_ack = sh_cl_apppf_irq_ack ; - assign glcount0 = sh_cl_glcount0 ; - assign glcount1 = sh_cl_glcount1 ; - - assign cl_sh_flr_done = flr_done ; - assign cl_sh_status_vled = status_vled ; - assign cl_sh_apppf_irq_req = irq_req ; - - assign cl_sh_status0 = 0 ; - assign cl_sh_status1 = 0 ; - assign cl_sh_id0 = {C_DEVICE_ID, C_VENDOR_ID} ; - assign cl_sh_id1 = {C_SUBSYSTEM_ID, C_SUBSYSTEM_VENDOR_ID} ; - - assign cl_sh_dma_wr_full = 1'b0; - assign cl_sh_dma_rd_full = 1'b0; - - assign cl_sh_ddr_awid = s_axi_ddrc_awid ; - assign cl_sh_ddr_awaddr = s_axi_ddrc_awaddr ; - assign cl_sh_ddr_awlen = s_axi_ddrc_awlen ; - assign cl_sh_ddr_awsize = s_axi_ddrc_awsize ; - assign cl_sh_ddr_awburst = 2'b01 ; - assign cl_sh_ddr_awvalid = s_axi_ddrc_awvalid ; - assign cl_sh_ddr_wdata = s_axi_ddrc_wdata ; - assign cl_sh_ddr_wstrb = s_axi_ddrc_wstrb ; - assign cl_sh_ddr_wlast = s_axi_ddrc_wlast ; - assign cl_sh_ddr_wvalid = s_axi_ddrc_wvalid ; - assign cl_sh_ddr_bready = s_axi_ddrc_bready ; - assign cl_sh_ddr_arid = s_axi_ddrc_arid ; - assign cl_sh_ddr_araddr = s_axi_ddrc_araddr ; - assign cl_sh_ddr_arlen = s_axi_ddrc_arlen ; - assign cl_sh_ddr_arsize = s_axi_ddrc_arsize ; - assign cl_sh_ddr_arburst = 2'b01 ; - assign cl_sh_ddr_arvalid = s_axi_ddrc_arvalid ; - assign cl_sh_ddr_rready = s_axi_ddrc_rready ; - - assign s_axi_ddrc_awready = sh_cl_ddr_awready ; - assign s_axi_ddrc_wready = sh_cl_ddr_wready ; - assign s_axi_ddrc_bid = sh_cl_ddr_bid ; - assign s_axi_ddrc_bresp = sh_cl_ddr_bresp ; - assign s_axi_ddrc_bvalid = sh_cl_ddr_bvalid ; - assign s_axi_ddrc_arready = sh_cl_ddr_arready ; - assign s_axi_ddrc_rid = sh_cl_ddr_rid ; - assign s_axi_ddrc_rdata = sh_cl_ddr_rdata ; - assign s_axi_ddrc_rresp = sh_cl_ddr_rresp ; - assign s_axi_ddrc_rlast = sh_cl_ddr_rlast ; - assign s_axi_ddrc_rvalid = sh_cl_ddr_rvalid ; - assign ddrc_is_ready = sh_cl_ddr_is_ready ; - - assign cl_sh_ddr_wid = 0 ; - - assign cl_sda_awready = m_axi_sda_awready ; - assign cl_sda_wready = m_axi_sda_wready ; - assign cl_sda_bresp = m_axi_sda_bresp ; - assign cl_sda_bvalid = m_axi_sda_bvalid ; - assign cl_sda_arready = m_axi_sda_arready ; - assign cl_sda_rdata = m_axi_sda_rdata ; - assign cl_sda_rresp = m_axi_sda_rresp ; - assign cl_sda_rvalid = m_axi_sda_rvalid ; - - assign m_axi_sda_awaddr = sda_cl_awaddr ; - assign m_axi_sda_awvalid = sda_cl_awvalid ; - assign m_axi_sda_wdata = sda_cl_wdata ; - assign m_axi_sda_wstrb = sda_cl_wstrb ; - assign m_axi_sda_wvalid = sda_cl_wvalid ; - assign m_axi_sda_bready = sda_cl_bready ; - assign m_axi_sda_araddr = sda_cl_araddr ; - assign m_axi_sda_arvalid = sda_cl_arvalid ; - assign m_axi_sda_rready = sda_cl_rready ; - - assign ocl_sh_awready = m_axi_ocl_awready ; - assign ocl_sh_wready = m_axi_ocl_wready ; - assign ocl_sh_bresp = m_axi_ocl_bresp ; - assign ocl_sh_bvalid = m_axi_ocl_bvalid ; - assign ocl_sh_arready = m_axi_ocl_arready ; - assign ocl_sh_rdata = m_axi_ocl_rdata ; - assign ocl_sh_rresp = m_axi_ocl_rresp ; - assign ocl_sh_rvalid = m_axi_ocl_rvalid ; - - assign m_axi_ocl_awaddr = sh_ocl_awaddr ; - assign m_axi_ocl_awvalid = sh_ocl_awvalid ; - assign m_axi_ocl_wdata = sh_ocl_wdata ; - assign m_axi_ocl_wstrb = sh_ocl_wstrb ; - assign m_axi_ocl_wvalid = sh_ocl_wvalid ; - assign m_axi_ocl_bready = sh_ocl_bready ; - assign m_axi_ocl_araddr = sh_ocl_araddr ; - assign m_axi_ocl_arvalid = sh_ocl_arvalid ; - assign m_axi_ocl_rready = sh_ocl_rready ; - - assign bar1_sh_awready = m_axi_bar1_awready ; - assign bar1_sh_wready = m_axi_bar1_wready ; - assign bar1_sh_bresp = m_axi_bar1_bresp ; - assign bar1_sh_bvalid = m_axi_bar1_bvalid ; - assign bar1_sh_arready = m_axi_bar1_arready ; - assign bar1_sh_rdata = m_axi_bar1_rdata ; - assign bar1_sh_rresp = m_axi_bar1_rresp ; - assign bar1_sh_rvalid = m_axi_bar1_rvalid ; - - assign m_axi_bar1_awaddr = sh_bar1_awaddr ; - assign m_axi_bar1_awvalid = sh_bar1_awvalid ; - assign m_axi_bar1_wdata = sh_bar1_wdata ; - assign m_axi_bar1_wstrb = sh_bar1_wstrb ; - assign m_axi_bar1_wvalid = sh_bar1_wvalid ; - assign m_axi_bar1_bready = sh_bar1_bready ; - assign m_axi_bar1_araddr = sh_bar1_araddr ; - assign m_axi_bar1_arvalid = sh_bar1_arvalid ; - assign m_axi_bar1_rready = sh_bar1_rready ; - - assign cl_sh_dma_pcis_awready = m_axi_pcis_awready ; - assign cl_sh_dma_pcis_wready = m_axi_pcis_wready ; - assign cl_sh_dma_pcis_bid = m_axi_pcis_bid ; - assign cl_sh_dma_pcis_bresp = m_axi_pcis_bresp ; - assign cl_sh_dma_pcis_bvalid = m_axi_pcis_bvalid ; - assign cl_sh_dma_pcis_arready = m_axi_pcis_arready ; - assign cl_sh_dma_pcis_rid = m_axi_pcis_rid ; - assign cl_sh_dma_pcis_rdata = m_axi_pcis_rdata ; - assign cl_sh_dma_pcis_rresp = m_axi_pcis_rresp ; - assign cl_sh_dma_pcis_rlast = m_axi_pcis_rlast ; - assign cl_sh_dma_pcis_rvalid = m_axi_pcis_rvalid ; - - assign m_axi_pcis_awid = sh_cl_dma_pcis_awid ; - assign m_axi_pcis_awaddr = sh_cl_dma_pcis_awaddr ; - assign m_axi_pcis_awlen = sh_cl_dma_pcis_awlen ; - assign m_axi_pcis_awsize = sh_cl_dma_pcis_awsize ; - assign m_axi_pcis_awvalid = sh_cl_dma_pcis_awvalid ; - assign m_axi_pcis_wdata = sh_cl_dma_pcis_wdata ; - assign m_axi_pcis_wstrb = sh_cl_dma_pcis_wstrb ; - assign m_axi_pcis_wlast = sh_cl_dma_pcis_wlast ; - assign m_axi_pcis_wvalid = sh_cl_dma_pcis_wvalid ; - assign m_axi_pcis_bready = sh_cl_dma_pcis_bready ; - assign m_axi_pcis_arid = sh_cl_dma_pcis_arid ; - assign m_axi_pcis_araddr = sh_cl_dma_pcis_araddr ; - assign m_axi_pcis_arlen = sh_cl_dma_pcis_arlen ; - assign m_axi_pcis_arsize = sh_cl_dma_pcis_arsize ; - assign m_axi_pcis_arvalid = sh_cl_dma_pcis_arvalid ; - assign m_axi_pcis_rready = sh_cl_dma_pcis_rready ; - assign m_axi_pcis_awburst = 2'b01 ; - assign m_axi_pcis_arburst = 2'b01 ; - - assign cl_sh_pcim_awid = s_axi_pcim_awid ; - assign cl_sh_pcim_awaddr = s_axi_pcim_awaddr ; - assign cl_sh_pcim_awlen = s_axi_pcim_awlen ; - assign cl_sh_pcim_awsize = s_axi_pcim_awsize ; - assign cl_sh_pcim_awuser = s_axi_pcim_awuser ; - assign cl_sh_pcim_awvalid = s_axi_pcim_awvalid ; - assign cl_sh_pcim_wdata = s_axi_pcim_wdata ; - assign cl_sh_pcim_wstrb = s_axi_pcim_wstrb ; - assign cl_sh_pcim_wlast = s_axi_pcim_wlast ; - assign cl_sh_pcim_wvalid = s_axi_pcim_wvalid ; - assign cl_sh_pcim_bready = s_axi_pcim_bready ; - assign cl_sh_pcim_arid = s_axi_pcim_arid ; - assign cl_sh_pcim_araddr = s_axi_pcim_araddr ; - assign cl_sh_pcim_arlen = s_axi_pcim_arlen ; - assign cl_sh_pcim_arsize = s_axi_pcim_arsize ; - assign cl_sh_pcim_aruser = s_axi_pcim_aruser ; - assign cl_sh_pcim_arvalid = s_axi_pcim_arvalid ; - assign cl_sh_pcim_rready = s_axi_pcim_rready ; - - assign s_axi_pcim_awready = sh_cl_pcim_awready ; - assign s_axi_pcim_wready = sh_cl_pcim_wready ; - assign s_axi_pcim_bid = sh_cl_pcim_bid ; - assign s_axi_pcim_bresp = sh_cl_pcim_bresp ; - assign s_axi_pcim_bvalid = sh_cl_pcim_bvalid ; - assign s_axi_pcim_arready = sh_cl_pcim_arready ; - assign s_axi_pcim_rid = sh_cl_pcim_rid ; - assign s_axi_pcim_rdata = sh_cl_pcim_rdata ; - assign s_axi_pcim_rresp = sh_cl_pcim_rresp ; - assign s_axi_pcim_rlast = sh_cl_pcim_rlast ; - assign s_axi_pcim_rvalid = sh_cl_pcim_rvalid ; - assign cfg_max_payload_out = cfg_max_payload ; - assign cfg_max_read_req_out = cfg_max_read_req ; - - if ((C_MODE == 0) || (C_MODE == 1)) begin : gen_mem - - logic [15:0] cl_sh_ddr_awid_2d[2:0]; - logic [63:0] cl_sh_ddr_awaddr_2d[2:0]; - logic [7:0] cl_sh_ddr_awlen_2d[2:0]; - logic [2:0] cl_sh_ddr_awsize_2d[2:0]; - logic [1:0] cl_sh_ddr_awburst_2d[2:0]; - logic cl_sh_ddr_awvalid_2d[2:0]; - logic [2:0] sh_cl_ddr_awready_2d; - logic [15:0] cl_sh_ddr_wid_2d[2:0]; - logic [511:0] cl_sh_ddr_wdata_2d[2:0]; - logic [63:0] cl_sh_ddr_wstrb_2d[2:0]; - logic [2:0] cl_sh_ddr_wlast_2d; - logic [2:0] cl_sh_ddr_wvalid_2d; - logic [2:0] sh_cl_ddr_wready_2d; - logic [15:0] sh_cl_ddr_bid_2d[2:0]; - logic [1:0] sh_cl_ddr_bresp_2d[2:0]; - logic [2:0] sh_cl_ddr_bvalid_2d; - logic [2:0] cl_sh_ddr_bready_2d; - logic [15:0] cl_sh_ddr_arid_2d[2:0]; - logic [63:0] cl_sh_ddr_araddr_2d[2:0]; - logic [7:0] cl_sh_ddr_arlen_2d[2:0]; - logic [2:0] cl_sh_ddr_arsize_2d[2:0]; - logic [1:0] cl_sh_ddr_arburst_2d[2:0]; - logic [2:0] cl_sh_ddr_arvalid_2d; - logic [2:0] sh_cl_ddr_arready_2d; - logic [15:0] sh_cl_ddr_rid_2d[2:0]; - logic [511:0] sh_cl_ddr_rdata_2d[2:0]; - logic [1:0] sh_cl_ddr_rresp_2d[2:0]; - logic [2:0] sh_cl_ddr_rlast_2d; - logic [2:0] sh_cl_ddr_rvalid_2d; - logic [2:0] cl_sh_ddr_rready_2d; - logic [2:0] sh_cl_ddr_is_ready_2d; - - assign cl_sh_ddr_awid_2d[0] = s_axi_ddra_awid ; - assign cl_sh_ddr_awaddr_2d[0] = s_axi_ddra_awaddr ; - assign cl_sh_ddr_awlen_2d[0] = s_axi_ddra_awlen ; - assign cl_sh_ddr_awsize_2d[0] = s_axi_ddra_awsize ; - assign cl_sh_ddr_awburst_2d[0] = 2'b01 ; - assign cl_sh_ddr_awvalid_2d[0] = s_axi_ddra_awvalid ; - assign cl_sh_ddr_wid_2d[0] = 0 ; - assign cl_sh_ddr_wdata_2d[0] = s_axi_ddra_wdata ; - assign cl_sh_ddr_wstrb_2d[0] = s_axi_ddra_wstrb ; - assign cl_sh_ddr_wlast_2d[0] = s_axi_ddra_wlast ; - assign cl_sh_ddr_wvalid_2d[0] = s_axi_ddra_wvalid ; - assign cl_sh_ddr_bready_2d[0] = s_axi_ddra_bready ; - assign cl_sh_ddr_arid_2d[0] = s_axi_ddra_arid ; - assign cl_sh_ddr_araddr_2d[0] = s_axi_ddra_araddr ; - assign cl_sh_ddr_arlen_2d[0] = s_axi_ddra_arlen ; - assign cl_sh_ddr_arsize_2d[0] = s_axi_ddra_arsize ; - assign cl_sh_ddr_arburst_2d[0] = 2'b01 ; - assign cl_sh_ddr_arvalid_2d[0] = s_axi_ddra_arvalid ; - assign cl_sh_ddr_rready_2d[0] = s_axi_ddra_rready ; - - assign s_axi_ddra_awready = sh_cl_ddr_awready_2d[0] ; - assign s_axi_ddra_wready = sh_cl_ddr_wready_2d[0] ; - assign s_axi_ddra_bid = sh_cl_ddr_bid_2d[0] ; - assign s_axi_ddra_bresp = sh_cl_ddr_bresp_2d[0] ; - assign s_axi_ddra_bvalid = sh_cl_ddr_bvalid_2d[0] ; - assign s_axi_ddra_arready = sh_cl_ddr_arready_2d[0] ; - assign s_axi_ddra_rid = sh_cl_ddr_rid_2d[0] ; - assign s_axi_ddra_rdata = sh_cl_ddr_rdata_2d[0] ; - assign s_axi_ddra_rresp = sh_cl_ddr_rresp_2d[0] ; - assign s_axi_ddra_rlast = sh_cl_ddr_rlast_2d[0] ; - assign s_axi_ddra_rvalid = sh_cl_ddr_rvalid_2d[0] ; - assign ddra_is_ready = sh_cl_ddr_is_ready_2d[0]; - - assign cl_sh_ddr_awid_2d[1] = s_axi_ddrb_awid ; - assign cl_sh_ddr_awaddr_2d[1] = s_axi_ddrb_awaddr ; - assign cl_sh_ddr_awlen_2d[1] = s_axi_ddrb_awlen ; - assign cl_sh_ddr_awsize_2d[1] = s_axi_ddrb_awsize ; - assign cl_sh_ddr_awburst_2d[1] = 2'b01 ; - assign cl_sh_ddr_awvalid_2d[1] = s_axi_ddrb_awvalid ; - assign cl_sh_ddr_wid_2d[1] = 0 ; - assign cl_sh_ddr_wdata_2d[1] = s_axi_ddrb_wdata ; - assign cl_sh_ddr_wstrb_2d[1] = s_axi_ddrb_wstrb ; - assign cl_sh_ddr_wlast_2d[1] = s_axi_ddrb_wlast ; - assign cl_sh_ddr_wvalid_2d[1] = s_axi_ddrb_wvalid ; - assign cl_sh_ddr_bready_2d[1] = s_axi_ddrb_bready ; - assign cl_sh_ddr_arid_2d[1] = s_axi_ddrb_arid ; - assign cl_sh_ddr_araddr_2d[1] = s_axi_ddrb_araddr ; - assign cl_sh_ddr_arlen_2d[1] = s_axi_ddrb_arlen ; - assign cl_sh_ddr_arsize_2d[1] = s_axi_ddrb_arsize ; - assign cl_sh_ddr_arburst_2d[1] = 2'b01 ; - assign cl_sh_ddr_arvalid_2d[1] = s_axi_ddrb_arvalid ; - assign cl_sh_ddr_rready_2d[1] = s_axi_ddrb_rready ; - - assign s_axi_ddrb_awready = sh_cl_ddr_awready_2d[1] ; - assign s_axi_ddrb_wready = sh_cl_ddr_wready_2d[1] ; - assign s_axi_ddrb_bid = sh_cl_ddr_bid_2d[1] ; - assign s_axi_ddrb_bresp = sh_cl_ddr_bresp_2d[1] ; - assign s_axi_ddrb_bvalid = sh_cl_ddr_bvalid_2d[1] ; - assign s_axi_ddrb_arready = sh_cl_ddr_arready_2d[1] ; - assign s_axi_ddrb_rid = sh_cl_ddr_rid_2d[1] ; - assign s_axi_ddrb_rdata = sh_cl_ddr_rdata_2d[1] ; - assign s_axi_ddrb_rresp = sh_cl_ddr_rresp_2d[1] ; - assign s_axi_ddrb_rlast = sh_cl_ddr_rlast_2d[1] ; - assign s_axi_ddrb_rvalid = sh_cl_ddr_rvalid_2d[1] ; - assign ddrb_is_ready = sh_cl_ddr_is_ready_2d[1]; - - assign cl_sh_ddr_awid_2d[2] = s_axi_ddrd_awid ; - assign cl_sh_ddr_awaddr_2d[2] = s_axi_ddrd_awaddr ; - assign cl_sh_ddr_awlen_2d[2] = s_axi_ddrd_awlen ; - assign cl_sh_ddr_awsize_2d[2] = s_axi_ddrd_awsize ; - assign cl_sh_ddr_awburst_2d[2] = 2'b01 ; - assign cl_sh_ddr_awvalid_2d[2] = s_axi_ddrd_awvalid ; - assign cl_sh_ddr_wid_2d[2] = 0 ; - assign cl_sh_ddr_wdata_2d[2] = s_axi_ddrd_wdata ; - assign cl_sh_ddr_wstrb_2d[2] = s_axi_ddrd_wstrb ; - assign cl_sh_ddr_wlast_2d[2] = s_axi_ddrd_wlast ; - assign cl_sh_ddr_wvalid_2d[2] = s_axi_ddrd_wvalid ; - assign cl_sh_ddr_bready_2d[2] = s_axi_ddrd_bready ; - assign cl_sh_ddr_arid_2d[2] = s_axi_ddrd_arid ; - assign cl_sh_ddr_araddr_2d[2] = s_axi_ddrd_araddr ; - assign cl_sh_ddr_arlen_2d[2] = s_axi_ddrd_arlen ; - assign cl_sh_ddr_arsize_2d[2] = s_axi_ddrd_arsize ; - assign cl_sh_ddr_arburst_2d[2] = 2'b01 ; - assign cl_sh_ddr_arvalid_2d[2] = s_axi_ddrd_arvalid ; - assign cl_sh_ddr_rready_2d[2] = s_axi_ddrd_rready ; - - assign s_axi_ddrd_awready = sh_cl_ddr_awready_2d[2] ; - assign s_axi_ddrd_wready = sh_cl_ddr_wready_2d[2] ; - assign s_axi_ddrd_bid = sh_cl_ddr_bid_2d[2] ; - assign s_axi_ddrd_bresp = sh_cl_ddr_bresp_2d[2] ; - assign s_axi_ddrd_bvalid = sh_cl_ddr_bvalid_2d[2] ; - assign s_axi_ddrd_arready = sh_cl_ddr_arready_2d[2] ; - assign s_axi_ddrd_rid = sh_cl_ddr_rid_2d[2] ; - assign s_axi_ddrd_rdata = sh_cl_ddr_rdata_2d[2] ; - assign s_axi_ddrd_rresp = sh_cl_ddr_rresp_2d[2] ; - assign s_axi_ddrd_rlast = sh_cl_ddr_rlast_2d[2] ; - assign s_axi_ddrd_rvalid = sh_cl_ddr_rvalid_2d[2] ; - assign ddrd_is_ready = sh_cl_ddr_is_ready_2d[2]; - - logic ddr_aws_stat_ack0; - logic [31:0] ddr_aws_stat_rdata0; - logic [7:0] ddr_aws_stat_int0; - logic ddr_aws_stat_ack1; - logic [31:0] ddr_aws_stat_rdata1; - logic [7:0] ddr_aws_stat_int1; - logic ddr_aws_stat_ack2; - logic [31:0] ddr_aws_stat_rdata2; - logic [7:0] ddr_aws_stat_int2; - - logic [7:0] pipe_ddr_stat_addr0; - logic pipe_ddr_stat_wr0; - logic pipe_ddr_stat_rd0; - logic [31:0] pipe_ddr_stat_wdata0; - logic ddr_pipe_stat_ack0; - logic [31:0] ddr_pipe_stat_rdata0; - logic [7:0] ddr_pipe_stat_int0; - - logic [7:0] pipe_ddr_stat_addr1; - logic pipe_ddr_stat_wr1; - logic pipe_ddr_stat_rd1; - logic [31:0] pipe_ddr_stat_wdata1; - logic ddr_pipe_stat_ack1; - logic [31:0] ddr_pipe_stat_rdata1; - logic [7:0] ddr_pipe_stat_int1; - - logic [7:0] pipe_ddr_stat_addr2; - logic pipe_ddr_stat_wr2; - logic pipe_ddr_stat_rd2; - logic [31:0] pipe_ddr_stat_wdata2; - logic ddr_pipe_stat_ack2; - logic [31:0] ddr_pipe_stat_rdata2; - logic [7:0] ddr_pipe_stat_int2; - -//------------------------------------------------- -// Tie-offs when DDRs are disabled -//------------------------------------------------- - assign ddr_sh_stat_ack0 = (C_DDR_A_PRESENT!=0) ? ddr_aws_stat_ack0 : 1'b1; - assign ddr_sh_stat_rdata0 = (C_DDR_A_PRESENT!=0) ? ddr_aws_stat_rdata0 : 0; - assign ddr_sh_stat_int0 = (C_DDR_A_PRESENT!=0) ? ddr_aws_stat_int0 : 8'b0; - assign ddr_sh_stat_ack1 = (C_DDR_B_PRESENT!=0) ? ddr_aws_stat_ack1 : 1'b1; - assign ddr_sh_stat_rdata1 = (C_DDR_B_PRESENT!=0) ? ddr_aws_stat_rdata1 : 0; - assign ddr_sh_stat_int1 = (C_DDR_B_PRESENT!=0) ? ddr_aws_stat_int1 : 8'b0; - assign ddr_sh_stat_ack2 = (C_DDR_D_PRESENT!=0) ? ddr_aws_stat_ack2 : 1'b1; - assign ddr_sh_stat_rdata2 = (C_DDR_D_PRESENT!=0) ? ddr_aws_stat_rdata2 : 0; - assign ddr_sh_stat_int2 = (C_DDR_D_PRESENT!=0) ? ddr_aws_stat_int2 : 8'b0; - -//------------------------------------------------- -// Reset Synchronization -//------------------------------------------------- - logic pre_sync_rst_n; - logic sync_rst_n; - - always @(negedge rst_main_n or posedge clk_main_a0) begin - if (!rst_main_n) begin - pre_sync_rst_n <= 1'b0; - sync_rst_n <= 1'b0; - end else begin - pre_sync_rst_n <= 1'b1; - sync_rst_n <= pre_sync_rst_n; - end - end - - `ifdef FPGA_LESS_RST - `undef FPGA_LESS_RST - `endif - - lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wdata0 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_wdata0), .out_bus(pipe_ddr_stat_wdata0)); - lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_addr0 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_addr0), .out_bus(pipe_ddr_stat_addr0)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wr0 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_wr0), .out_bus(pipe_ddr_stat_wr0)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rd0 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_rd0), .out_bus(pipe_ddr_stat_rd0)); - lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rdata0 (.clk(clk_main_a0), .rst_n(1'b1), .out_bus(ddr_aws_stat_rdata0), .in_bus(ddr_pipe_stat_rdata0)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_ack0 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_ack0), .in_bus(ddr_pipe_stat_ack0)); - lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_int0 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_int0), .in_bus(ddr_pipe_stat_int0)); - - lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wdata1 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_wdata1), .out_bus(pipe_ddr_stat_wdata1)); - lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_addr1 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_addr1), .out_bus(pipe_ddr_stat_addr1)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wr1 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_wr1), .out_bus(pipe_ddr_stat_wr1)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rd1 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_rd1), .out_bus(pipe_ddr_stat_rd1)); - lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rdata1 (.clk(clk_main_a0), .rst_n(1'b1), .out_bus(ddr_aws_stat_rdata1), .in_bus(ddr_pipe_stat_rdata1)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_ack1 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_ack1), .in_bus(ddr_pipe_stat_ack1)); - lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_int1 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_int1), .in_bus(ddr_pipe_stat_int1)); - - lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wdata2 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_wdata2), .out_bus(pipe_ddr_stat_wdata2)); - lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_addr2 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_addr2), .out_bus(pipe_ddr_stat_addr2)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wr2 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_wr2), .out_bus(pipe_ddr_stat_wr2)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rd2 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_rd2), .out_bus(pipe_ddr_stat_rd2)); - lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rdata2 (.clk(clk_main_a0), .rst_n(1'b1), .out_bus(ddr_aws_stat_rdata2), .in_bus(ddr_pipe_stat_rdata2)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_ack2 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_ack2), .in_bus(ddr_pipe_stat_ack2)); - lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_int2 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_int2), .in_bus(ddr_pipe_stat_int2)); - - sh_ddr #( - .DDR_A_PRESENT(C_DDR_A_PRESENT), - .DDR_B_PRESENT(C_DDR_B_PRESENT), - .DDR_D_PRESENT(C_DDR_D_PRESENT) - ) sh_ddr_0 - ( - .clk(clk_main_a0), - .rst_n(sync_rst_n), - - .stat_clk(clk_main_a0), - .stat_rst_n(sync_rst_n), - - .CLK_300M_DIMM0_DP(CLK_300M_DIMM0_DP), - .CLK_300M_DIMM0_DN(CLK_300M_DIMM0_DN), - .M_A_ACT_N(M_A_ACT_N), - .M_A_MA(M_A_MA), - .M_A_BA(M_A_BA), - .M_A_BG(M_A_BG), - .M_A_CKE(M_A_CKE), - .M_A_ODT(M_A_ODT), - .M_A_CS_N(M_A_CS_N), - .M_A_CLK_DN(M_A_CLK_DN), - .M_A_CLK_DP(M_A_CLK_DP), - .M_A_PAR(M_A_PAR), - .M_A_DQ(M_A_DQ), - .M_A_ECC(M_A_ECC), - .M_A_DQS_DP(M_A_DQS_DP), - .M_A_DQS_DN(M_A_DQS_DN), - .cl_RST_DIMM_A_N(cl_RST_DIMM_A_N), - - .CLK_300M_DIMM1_DP(CLK_300M_DIMM1_DP), - .CLK_300M_DIMM1_DN(CLK_300M_DIMM1_DN), - .M_B_ACT_N(M_B_ACT_N), - .M_B_MA(M_B_MA), - .M_B_BA(M_B_BA), - .M_B_BG(M_B_BG), - .M_B_CKE(M_B_CKE), - .M_B_ODT(M_B_ODT), - .M_B_CS_N(M_B_CS_N), - .M_B_CLK_DN(M_B_CLK_DN), - .M_B_CLK_DP(M_B_CLK_DP), - .M_B_PAR(M_B_PAR), - .M_B_DQ(M_B_DQ), - .M_B_ECC(M_B_ECC), - .M_B_DQS_DP(M_B_DQS_DP), - .M_B_DQS_DN(M_B_DQS_DN), - .cl_RST_DIMM_B_N(cl_RST_DIMM_B_N), - - .CLK_300M_DIMM3_DP(CLK_300M_DIMM3_DP), - .CLK_300M_DIMM3_DN(CLK_300M_DIMM3_DN), - .M_D_ACT_N(M_D_ACT_N), - .M_D_MA(M_D_MA), - .M_D_BA(M_D_BA), - .M_D_BG(M_D_BG), - .M_D_CKE(M_D_CKE), - .M_D_ODT(M_D_ODT), - .M_D_CS_N(M_D_CS_N), - .M_D_CLK_DN(M_D_CLK_DN), - .M_D_CLK_DP(M_D_CLK_DP), - .M_D_PAR(M_D_PAR), - .M_D_DQ(M_D_DQ), - .M_D_ECC(M_D_ECC), - .M_D_DQS_DP(M_D_DQS_DP), - .M_D_DQS_DN(M_D_DQS_DN), - .cl_RST_DIMM_D_N(cl_RST_DIMM_D_N), - - //------------------------------------------------------ - // AXI Slave Interfaces - //------------------------------------------------------ - .cl_sh_ddr_awid(cl_sh_ddr_awid_2d), - .cl_sh_ddr_awaddr(cl_sh_ddr_awaddr_2d), - .cl_sh_ddr_awlen(cl_sh_ddr_awlen_2d), - .cl_sh_ddr_awsize(cl_sh_ddr_awsize_2d), - .cl_sh_ddr_awburst(cl_sh_ddr_awburst_2d), - .cl_sh_ddr_awvalid(cl_sh_ddr_awvalid_2d), - .sh_cl_ddr_awready(sh_cl_ddr_awready_2d), - - .cl_sh_ddr_wid(cl_sh_ddr_wid_2d), - .cl_sh_ddr_wdata(cl_sh_ddr_wdata_2d), - .cl_sh_ddr_wstrb(cl_sh_ddr_wstrb_2d), - .cl_sh_ddr_wlast(cl_sh_ddr_wlast_2d), - .cl_sh_ddr_wvalid(cl_sh_ddr_wvalid_2d), - .sh_cl_ddr_wready(sh_cl_ddr_wready_2d), - - .sh_cl_ddr_bid(sh_cl_ddr_bid_2d), - .sh_cl_ddr_bresp(sh_cl_ddr_bresp_2d), - .sh_cl_ddr_bvalid(sh_cl_ddr_bvalid_2d), - .cl_sh_ddr_bready(cl_sh_ddr_bready_2d), - - .cl_sh_ddr_arid(cl_sh_ddr_arid_2d), - .cl_sh_ddr_araddr(cl_sh_ddr_araddr_2d), - .cl_sh_ddr_arlen(cl_sh_ddr_arlen_2d), - .cl_sh_ddr_arsize(cl_sh_ddr_arsize_2d), - .cl_sh_ddr_arburst(cl_sh_ddr_arburst_2d), - .cl_sh_ddr_arvalid(cl_sh_ddr_arvalid_2d), - .sh_cl_ddr_arready(sh_cl_ddr_arready_2d), - - .sh_cl_ddr_rid(sh_cl_ddr_rid_2d), - .sh_cl_ddr_rdata(sh_cl_ddr_rdata_2d), - .sh_cl_ddr_rresp(sh_cl_ddr_rresp_2d), - .sh_cl_ddr_rlast(sh_cl_ddr_rlast_2d), - .sh_cl_ddr_rvalid(sh_cl_ddr_rvalid_2d), - .cl_sh_ddr_rready(cl_sh_ddr_rready_2d), - - .sh_cl_ddr_is_ready(sh_cl_ddr_is_ready_2d), - - .sh_ddr_stat_addr0 (pipe_ddr_stat_addr0 ), - .sh_ddr_stat_wr0 (pipe_ddr_stat_wr0 ), - .sh_ddr_stat_rd0 (pipe_ddr_stat_rd0 ), - .sh_ddr_stat_wdata0 (pipe_ddr_stat_wdata0), - .ddr_sh_stat_ack0 (ddr_pipe_stat_ack0 ), - .ddr_sh_stat_rdata0 (ddr_pipe_stat_rdata0), - .ddr_sh_stat_int0 (ddr_pipe_stat_int0 ), - - .sh_ddr_stat_addr1 (pipe_ddr_stat_addr1 ), - .sh_ddr_stat_wr1 (pipe_ddr_stat_wr1 ), - .sh_ddr_stat_rd1 (pipe_ddr_stat_rd1 ), - .sh_ddr_stat_wdata1 (pipe_ddr_stat_wdata1), - .ddr_sh_stat_ack1 (ddr_pipe_stat_ack1 ), - .ddr_sh_stat_rdata1 (ddr_pipe_stat_rdata1), - .ddr_sh_stat_int1 (ddr_pipe_stat_int1 ), - - .sh_ddr_stat_addr2 (pipe_ddr_stat_addr2 ), - .sh_ddr_stat_wr2 (pipe_ddr_stat_wr2 ), - .sh_ddr_stat_rd2 (pipe_ddr_stat_rd2 ), - .sh_ddr_stat_wdata2 (pipe_ddr_stat_wdata2), - .ddr_sh_stat_ack2 (ddr_pipe_stat_ack2 ), - .ddr_sh_stat_rdata2 (ddr_pipe_stat_rdata2), - .ddr_sh_stat_int2 (ddr_pipe_stat_int2 ) - - ); - - end else begin : gen_non_mem - - assign s_axi_ddra_awready = 0; - assign s_axi_ddra_wready = 0; - assign s_axi_ddra_bid = 0; - assign s_axi_ddra_bresp = 0; - assign s_axi_ddra_bvalid = 0; - assign s_axi_ddra_arready = 0; - assign s_axi_ddra_rid = 0; - assign s_axi_ddra_rdata = 0; - assign s_axi_ddra_rresp = 0; - assign s_axi_ddra_rlast = 1'b1; - assign s_axi_ddra_rvalid = 0; - assign ddra_is_ready = 0; - - assign s_axi_ddrb_awready = 0; - assign s_axi_ddrb_wready = 0; - assign s_axi_ddrb_bid = 0; - assign s_axi_ddrb_bresp = 0; - assign s_axi_ddrb_bvalid = 0; - assign s_axi_ddrb_arready = 0; - assign s_axi_ddrb_rid = 0; - assign s_axi_ddrb_rdata = 0; - assign s_axi_ddrb_rresp = 0; - assign s_axi_ddrb_rlast = 1'b1; - assign s_axi_ddrb_rvalid = 0; - assign ddrb_is_ready = 0; - - assign s_axi_ddrd_awready = 0; - assign s_axi_ddrd_wready = 0; - assign s_axi_ddrd_bid = 0; - assign s_axi_ddrd_bresp = 0; - assign s_axi_ddrd_bvalid = 0; - assign s_axi_ddrd_arready = 0; - assign s_axi_ddrd_rid = 0; - assign s_axi_ddrd_rdata = 0; - assign s_axi_ddrd_rresp = 0; - assign s_axi_ddrd_rlast = 1'b1; - assign s_axi_ddrd_rvalid = 0; - assign ddrd_is_ready = 0; - - assign ddr_sh_stat_ack0 = 1'b1; - assign ddr_sh_stat_rdata0 = 0; - assign ddr_sh_stat_int0 = 8'b0; - assign ddr_sh_stat_ack1 = 1'b1; - assign ddr_sh_stat_rdata1 = 0; - assign ddr_sh_stat_int1 = 8'b0; - assign ddr_sh_stat_ack2 = 1'b1; - assign ddr_sh_stat_rdata2 = 0; - assign ddr_sh_stat_int2 = 8'b0; - - assign M_A_ACT_N = 0; - assign M_A_MA = 0; - assign M_A_BA = 0; - assign M_A_BG = 0; - assign M_A_CKE = 0; - assign M_A_ODT = 0; - assign M_A_CS_N = 0; - assign M_A_CLK_DN = 0; - assign M_A_CLK_DP = 0; - assign M_A_PAR = 0; - assign cl_RST_DIMM_A_N = 0; - - assign M_B_ACT_N = 0; - assign M_B_MA = 0; - assign M_B_BA = 0; - assign M_B_BG = 0; - assign M_B_CKE = 0; - assign M_B_ODT = 0; - assign M_B_CS_N = 0; - assign M_B_CLK_DN = 0; - assign M_B_CLK_DP = 0; - assign M_B_PAR = 0; - assign cl_RST_DIMM_B_N = 0; - - assign M_D_ACT_N = 0; - assign M_D_MA = 0; - assign M_D_BA = 0; - assign M_D_BG = 0; - assign M_D_CKE = 0; - assign M_D_ODT = 0; - assign M_D_CS_N = 0; - assign M_D_CLK_DN = 0; - assign M_D_CLK_DP = 0; - assign M_D_PAR = 0; - assign cl_RST_DIMM_D_N = 0; - - end // gen_mem - endgenerate - -endmodule - - diff --git a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/aws_v1_0_vlsyn_rfs.sv b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/aws_v1_0_vlsyn_rfs.sv deleted file mode 100755 index f2122cce..00000000 --- a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/aws_v1_0_vlsyn_rfs.sv +++ /dev/null @@ -1,3099 +0,0 @@ -//---------------------------------------------------------------------------------- -//Copyright (c) 2014 -// -//Permission is hereby granted, free of charge, to any person obtaining a copy -//of this software and associated documentation files (the "Software"), to deal -//in the Software without restriction, including without limitation the rights -//to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -//copies of the Software, and to permit persons to whom the Software is -//furnished to do so, subject to the following conditions: -// -//The above copyright notice and this permission notice shall be included in -//all copies or substantial portions of the Software. -// -//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -//FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -//AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -//LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -//OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -//THE SOFTWARE. -//---------------------------------------------------------------------------------- - -//simple pipeline - -//WIDTH is the width of the DATA -//STAGES is the number of stages (flops in the pipeline) -module lib_pipe #(parameter WIDTH=8, parameter STAGES=1) ( - input clk, - input rst_n, - - input[WIDTH-1:0] in_bus, - - output [WIDTH-1:0] out_bus - ); - -//Note the shreg_extract=no directs Xilinx to not infer shift registers which -// defeats using this as a pipeline - - -`ifdef FPGA_LESS_RST - (*shreg_extract="no"*) logic [WIDTH-1:0] pipe[STAGES-1:0] = '{default:'0}; -`else - (*shreg_extract="no"*) logic [WIDTH-1:0] pipe[STAGES-1:0]; -`endif - -//(*srl_style="register"*) logic [WIDTH-1:0] pipe [STAGES-1:0]; -// logic [WIDTH-1:0] pipe [STAGES-1:0]; - - integer i; - -`ifdef FPGA_LESS_RST - always @(posedge clk) -`else - always @(negedge rst_n or posedge clk) - if (!rst_n) - begin - for (i=0; i1) - begin - for (i=1; i1 ? clk_extra_a1 : 1'b0 ; - assign clk_extra_a2_out = C_NUM_A_CLOCKS>2 ? clk_extra_a2 : 1'b0 ; - assign clk_extra_a3_out = C_NUM_A_CLOCKS>3 ? clk_extra_a3 : 1'b0 ; - assign clk_extra_b0_out = C_NUM_B_CLOCKS>0 ? clk_extra_b0 : 1'b0 ; - assign clk_extra_b1_out = C_NUM_B_CLOCKS>1 ? clk_extra_b1 : 1'b0 ; - assign clk_extra_c0_out = C_NUM_C_CLOCKS>0 ? clk_extra_c0 : 1'b0 ; - assign clk_extra_c1_out = C_NUM_C_CLOCKS>1 ? clk_extra_c1 : 1'b0 ; - assign rst_main_n_out = rst_main_n ; - assign kernel_rst_n_out = kernel_rst_n ; - assign flr_assert = sh_cl_flr_assert ; - assign status_vdip = sh_cl_status_vdip ; - assign irq_ack = sh_cl_apppf_irq_ack ; - assign glcount0 = sh_cl_glcount0 ; - assign glcount1 = sh_cl_glcount1 ; - - assign cl_sh_flr_done = flr_done ; - assign cl_sh_status_vled = status_vled ; - assign cl_sh_apppf_irq_req = irq_req ; - - assign cl_sh_status0 = 0 ; - assign cl_sh_status1 = 0 ; - assign cl_sh_id0 = {C_DEVICE_ID, C_VENDOR_ID} ; - assign cl_sh_id1 = {C_SUBSYSTEM_ID, C_SUBSYSTEM_VENDOR_ID} ; - - assign cl_sh_dma_wr_full = 1'b0; - assign cl_sh_dma_rd_full = 1'b0; - - assign cl_sh_ddr_awid = s_axi_ddrc_awid ; - assign cl_sh_ddr_awaddr = s_axi_ddrc_awaddr ; - assign cl_sh_ddr_awlen = s_axi_ddrc_awlen ; - assign cl_sh_ddr_awsize = s_axi_ddrc_awsize ; - assign cl_sh_ddr_awburst = 2'b01 ; - assign cl_sh_ddr_awvalid = s_axi_ddrc_awvalid ; - assign cl_sh_ddr_wdata = s_axi_ddrc_wdata ; - assign cl_sh_ddr_wstrb = s_axi_ddrc_wstrb ; - assign cl_sh_ddr_wlast = s_axi_ddrc_wlast ; - assign cl_sh_ddr_wvalid = s_axi_ddrc_wvalid ; - assign cl_sh_ddr_bready = s_axi_ddrc_bready ; - assign cl_sh_ddr_arid = s_axi_ddrc_arid ; - assign cl_sh_ddr_araddr = s_axi_ddrc_araddr ; - assign cl_sh_ddr_arlen = s_axi_ddrc_arlen ; - assign cl_sh_ddr_arsize = s_axi_ddrc_arsize ; - assign cl_sh_ddr_arburst = 2'b01 ; - assign cl_sh_ddr_arvalid = s_axi_ddrc_arvalid ; - assign cl_sh_ddr_rready = s_axi_ddrc_rready ; - - assign s_axi_ddrc_awready = sh_cl_ddr_awready ; - assign s_axi_ddrc_wready = sh_cl_ddr_wready ; - assign s_axi_ddrc_bid = sh_cl_ddr_bid ; - assign s_axi_ddrc_bresp = sh_cl_ddr_bresp ; - assign s_axi_ddrc_bvalid = sh_cl_ddr_bvalid ; - assign s_axi_ddrc_arready = sh_cl_ddr_arready ; - assign s_axi_ddrc_rid = sh_cl_ddr_rid ; - assign s_axi_ddrc_rdata = sh_cl_ddr_rdata ; - assign s_axi_ddrc_rresp = sh_cl_ddr_rresp ; - assign s_axi_ddrc_rlast = sh_cl_ddr_rlast ; - assign s_axi_ddrc_rvalid = sh_cl_ddr_rvalid ; - assign ddrc_is_ready = sh_cl_ddr_is_ready ; - - assign cl_sh_ddr_wid = 0 ; - - assign cl_sda_awready = m_axi_sda_awready ; - assign cl_sda_wready = m_axi_sda_wready ; - assign cl_sda_bresp = m_axi_sda_bresp ; - assign cl_sda_bvalid = m_axi_sda_bvalid ; - assign cl_sda_arready = m_axi_sda_arready ; - assign cl_sda_rdata = m_axi_sda_rdata ; - assign cl_sda_rresp = m_axi_sda_rresp ; - assign cl_sda_rvalid = m_axi_sda_rvalid ; - - assign m_axi_sda_awaddr = sda_cl_awaddr ; - assign m_axi_sda_awvalid = sda_cl_awvalid ; - assign m_axi_sda_wdata = sda_cl_wdata ; - assign m_axi_sda_wstrb = sda_cl_wstrb ; - assign m_axi_sda_wvalid = sda_cl_wvalid ; - assign m_axi_sda_bready = sda_cl_bready ; - assign m_axi_sda_araddr = sda_cl_araddr ; - assign m_axi_sda_arvalid = sda_cl_arvalid ; - assign m_axi_sda_rready = sda_cl_rready ; - - assign ocl_sh_awready = m_axi_ocl_awready ; - assign ocl_sh_wready = m_axi_ocl_wready ; - assign ocl_sh_bresp = m_axi_ocl_bresp ; - assign ocl_sh_bvalid = m_axi_ocl_bvalid ; - assign ocl_sh_arready = m_axi_ocl_arready ; - assign ocl_sh_rdata = m_axi_ocl_rdata ; - assign ocl_sh_rresp = m_axi_ocl_rresp ; - assign ocl_sh_rvalid = m_axi_ocl_rvalid ; - - assign m_axi_ocl_awaddr = sh_ocl_awaddr ; - assign m_axi_ocl_awvalid = sh_ocl_awvalid ; - assign m_axi_ocl_wdata = sh_ocl_wdata ; - assign m_axi_ocl_wstrb = sh_ocl_wstrb ; - assign m_axi_ocl_wvalid = sh_ocl_wvalid ; - assign m_axi_ocl_bready = sh_ocl_bready ; - assign m_axi_ocl_araddr = sh_ocl_araddr ; - assign m_axi_ocl_arvalid = sh_ocl_arvalid ; - assign m_axi_ocl_rready = sh_ocl_rready ; - - assign bar1_sh_awready = m_axi_bar1_awready ; - assign bar1_sh_wready = m_axi_bar1_wready ; - assign bar1_sh_bresp = m_axi_bar1_bresp ; - assign bar1_sh_bvalid = m_axi_bar1_bvalid ; - assign bar1_sh_arready = m_axi_bar1_arready ; - assign bar1_sh_rdata = m_axi_bar1_rdata ; - assign bar1_sh_rresp = m_axi_bar1_rresp ; - assign bar1_sh_rvalid = m_axi_bar1_rvalid ; - - assign m_axi_bar1_awaddr = sh_bar1_awaddr ; - assign m_axi_bar1_awvalid = sh_bar1_awvalid ; - assign m_axi_bar1_wdata = sh_bar1_wdata ; - assign m_axi_bar1_wstrb = sh_bar1_wstrb ; - assign m_axi_bar1_wvalid = sh_bar1_wvalid ; - assign m_axi_bar1_bready = sh_bar1_bready ; - assign m_axi_bar1_araddr = sh_bar1_araddr ; - assign m_axi_bar1_arvalid = sh_bar1_arvalid ; - assign m_axi_bar1_rready = sh_bar1_rready ; - - assign cl_sh_dma_pcis_awready = m_axi_pcis_awready ; - assign cl_sh_dma_pcis_wready = m_axi_pcis_wready ; - assign cl_sh_dma_pcis_bid = m_axi_pcis_bid ; - assign cl_sh_dma_pcis_bresp = m_axi_pcis_bresp ; - assign cl_sh_dma_pcis_bvalid = m_axi_pcis_bvalid ; - assign cl_sh_dma_pcis_arready = m_axi_pcis_arready ; - assign cl_sh_dma_pcis_rid = m_axi_pcis_rid ; - assign cl_sh_dma_pcis_rdata = m_axi_pcis_rdata ; - assign cl_sh_dma_pcis_rresp = m_axi_pcis_rresp ; - assign cl_sh_dma_pcis_rlast = m_axi_pcis_rlast ; - assign cl_sh_dma_pcis_rvalid = m_axi_pcis_rvalid ; - - assign m_axi_pcis_awid = sh_cl_dma_pcis_awid ; - assign m_axi_pcis_awaddr = sh_cl_dma_pcis_awaddr ; - assign m_axi_pcis_awlen = sh_cl_dma_pcis_awlen ; - assign m_axi_pcis_awsize = sh_cl_dma_pcis_awsize ; - assign m_axi_pcis_awvalid = sh_cl_dma_pcis_awvalid ; - assign m_axi_pcis_wdata = sh_cl_dma_pcis_wdata ; - assign m_axi_pcis_wstrb = sh_cl_dma_pcis_wstrb ; - assign m_axi_pcis_wlast = sh_cl_dma_pcis_wlast ; - assign m_axi_pcis_wvalid = sh_cl_dma_pcis_wvalid ; - assign m_axi_pcis_bready = sh_cl_dma_pcis_bready ; - assign m_axi_pcis_arid = sh_cl_dma_pcis_arid ; - assign m_axi_pcis_araddr = sh_cl_dma_pcis_araddr ; - assign m_axi_pcis_arlen = sh_cl_dma_pcis_arlen ; - assign m_axi_pcis_arsize = sh_cl_dma_pcis_arsize ; - assign m_axi_pcis_arvalid = sh_cl_dma_pcis_arvalid ; - assign m_axi_pcis_rready = sh_cl_dma_pcis_rready ; - assign m_axi_pcis_awburst = 2'b01 ; - assign m_axi_pcis_arburst = 2'b01 ; - - assign cl_sh_pcim_awid = s_axi_pcim_awid ; - assign cl_sh_pcim_awaddr = s_axi_pcim_awaddr ; - assign cl_sh_pcim_awlen = s_axi_pcim_awlen ; - assign cl_sh_pcim_awsize = s_axi_pcim_awsize ; - assign cl_sh_pcim_awuser = s_axi_pcim_awuser ; - assign cl_sh_pcim_awvalid = s_axi_pcim_awvalid ; - assign cl_sh_pcim_wdata = s_axi_pcim_wdata ; - assign cl_sh_pcim_wstrb = s_axi_pcim_wstrb ; - assign cl_sh_pcim_wlast = s_axi_pcim_wlast ; - assign cl_sh_pcim_wvalid = s_axi_pcim_wvalid ; - assign cl_sh_pcim_bready = s_axi_pcim_bready ; - assign cl_sh_pcim_arid = s_axi_pcim_arid ; - assign cl_sh_pcim_araddr = s_axi_pcim_araddr ; - assign cl_sh_pcim_arlen = s_axi_pcim_arlen ; - assign cl_sh_pcim_arsize = s_axi_pcim_arsize ; - assign cl_sh_pcim_aruser = s_axi_pcim_aruser ; - assign cl_sh_pcim_arvalid = s_axi_pcim_arvalid ; - assign cl_sh_pcim_rready = s_axi_pcim_rready ; - - assign s_axi_pcim_awready = sh_cl_pcim_awready ; - assign s_axi_pcim_wready = sh_cl_pcim_wready ; - assign s_axi_pcim_bid = sh_cl_pcim_bid ; - assign s_axi_pcim_bresp = sh_cl_pcim_bresp ; - assign s_axi_pcim_bvalid = sh_cl_pcim_bvalid ; - assign s_axi_pcim_arready = sh_cl_pcim_arready ; - assign s_axi_pcim_rid = sh_cl_pcim_rid ; - assign s_axi_pcim_rdata = sh_cl_pcim_rdata ; - assign s_axi_pcim_rresp = sh_cl_pcim_rresp ; - assign s_axi_pcim_rlast = sh_cl_pcim_rlast ; - assign s_axi_pcim_rvalid = sh_cl_pcim_rvalid ; - assign cfg_max_payload_out = cfg_max_payload ; - assign cfg_max_read_req_out = cfg_max_read_req ; - - if ((C_MODE == 0) || (C_MODE == 1)) begin : gen_mem - - logic [15:0] cl_sh_ddr_awid_2d[2:0]; - logic [63:0] cl_sh_ddr_awaddr_2d[2:0]; - logic [7:0] cl_sh_ddr_awlen_2d[2:0]; - logic [2:0] cl_sh_ddr_awsize_2d[2:0]; - logic [1:0] cl_sh_ddr_awburst_2d[2:0]; - logic cl_sh_ddr_awvalid_2d[2:0]; - logic [2:0] sh_cl_ddr_awready_2d; - logic [15:0] cl_sh_ddr_wid_2d[2:0]; - logic [511:0] cl_sh_ddr_wdata_2d[2:0]; - logic [63:0] cl_sh_ddr_wstrb_2d[2:0]; - logic [2:0] cl_sh_ddr_wlast_2d; - logic [2:0] cl_sh_ddr_wvalid_2d; - logic [2:0] sh_cl_ddr_wready_2d; - logic [15:0] sh_cl_ddr_bid_2d[2:0]; - logic [1:0] sh_cl_ddr_bresp_2d[2:0]; - logic [2:0] sh_cl_ddr_bvalid_2d; - logic [2:0] cl_sh_ddr_bready_2d; - logic [15:0] cl_sh_ddr_arid_2d[2:0]; - logic [63:0] cl_sh_ddr_araddr_2d[2:0]; - logic [7:0] cl_sh_ddr_arlen_2d[2:0]; - logic [2:0] cl_sh_ddr_arsize_2d[2:0]; - logic [1:0] cl_sh_ddr_arburst_2d[2:0]; - logic [2:0] cl_sh_ddr_arvalid_2d; - logic [2:0] sh_cl_ddr_arready_2d; - logic [15:0] sh_cl_ddr_rid_2d[2:0]; - logic [511:0] sh_cl_ddr_rdata_2d[2:0]; - logic [1:0] sh_cl_ddr_rresp_2d[2:0]; - logic [2:0] sh_cl_ddr_rlast_2d; - logic [2:0] sh_cl_ddr_rvalid_2d; - logic [2:0] cl_sh_ddr_rready_2d; - logic [2:0] sh_cl_ddr_is_ready_2d; - - assign cl_sh_ddr_awid_2d[0] = s_axi_ddra_awid ; - assign cl_sh_ddr_awaddr_2d[0] = s_axi_ddra_awaddr ; - assign cl_sh_ddr_awlen_2d[0] = s_axi_ddra_awlen ; - assign cl_sh_ddr_awsize_2d[0] = s_axi_ddra_awsize ; - assign cl_sh_ddr_awburst_2d[0] = 2'b01 ; - assign cl_sh_ddr_awvalid_2d[0] = s_axi_ddra_awvalid ; - assign cl_sh_ddr_wid_2d[0] = 0 ; - assign cl_sh_ddr_wdata_2d[0] = s_axi_ddra_wdata ; - assign cl_sh_ddr_wstrb_2d[0] = s_axi_ddra_wstrb ; - assign cl_sh_ddr_wlast_2d[0] = s_axi_ddra_wlast ; - assign cl_sh_ddr_wvalid_2d[0] = s_axi_ddra_wvalid ; - assign cl_sh_ddr_bready_2d[0] = s_axi_ddra_bready ; - assign cl_sh_ddr_arid_2d[0] = s_axi_ddra_arid ; - assign cl_sh_ddr_araddr_2d[0] = s_axi_ddra_araddr ; - assign cl_sh_ddr_arlen_2d[0] = s_axi_ddra_arlen ; - assign cl_sh_ddr_arsize_2d[0] = s_axi_ddra_arsize ; - assign cl_sh_ddr_arburst_2d[0] = 2'b01 ; - assign cl_sh_ddr_arvalid_2d[0] = s_axi_ddra_arvalid ; - assign cl_sh_ddr_rready_2d[0] = s_axi_ddra_rready ; - - assign s_axi_ddra_awready = sh_cl_ddr_awready_2d[0] ; - assign s_axi_ddra_wready = sh_cl_ddr_wready_2d[0] ; - assign s_axi_ddra_bid = sh_cl_ddr_bid_2d[0] ; - assign s_axi_ddra_bresp = sh_cl_ddr_bresp_2d[0] ; - assign s_axi_ddra_bvalid = sh_cl_ddr_bvalid_2d[0] ; - assign s_axi_ddra_arready = sh_cl_ddr_arready_2d[0] ; - assign s_axi_ddra_rid = sh_cl_ddr_rid_2d[0] ; - assign s_axi_ddra_rdata = sh_cl_ddr_rdata_2d[0] ; - assign s_axi_ddra_rresp = sh_cl_ddr_rresp_2d[0] ; - assign s_axi_ddra_rlast = sh_cl_ddr_rlast_2d[0] ; - assign s_axi_ddra_rvalid = sh_cl_ddr_rvalid_2d[0] ; - assign ddra_is_ready = sh_cl_ddr_is_ready_2d[0]; - - assign cl_sh_ddr_awid_2d[1] = s_axi_ddrb_awid ; - assign cl_sh_ddr_awaddr_2d[1] = s_axi_ddrb_awaddr ; - assign cl_sh_ddr_awlen_2d[1] = s_axi_ddrb_awlen ; - assign cl_sh_ddr_awsize_2d[1] = s_axi_ddrb_awsize ; - assign cl_sh_ddr_awburst_2d[1] = 2'b01 ; - assign cl_sh_ddr_awvalid_2d[1] = s_axi_ddrb_awvalid ; - assign cl_sh_ddr_wid_2d[1] = 0 ; - assign cl_sh_ddr_wdata_2d[1] = s_axi_ddrb_wdata ; - assign cl_sh_ddr_wstrb_2d[1] = s_axi_ddrb_wstrb ; - assign cl_sh_ddr_wlast_2d[1] = s_axi_ddrb_wlast ; - assign cl_sh_ddr_wvalid_2d[1] = s_axi_ddrb_wvalid ; - assign cl_sh_ddr_bready_2d[1] = s_axi_ddrb_bready ; - assign cl_sh_ddr_arid_2d[1] = s_axi_ddrb_arid ; - assign cl_sh_ddr_araddr_2d[1] = s_axi_ddrb_araddr ; - assign cl_sh_ddr_arlen_2d[1] = s_axi_ddrb_arlen ; - assign cl_sh_ddr_arsize_2d[1] = s_axi_ddrb_arsize ; - assign cl_sh_ddr_arburst_2d[1] = 2'b01 ; - assign cl_sh_ddr_arvalid_2d[1] = s_axi_ddrb_arvalid ; - assign cl_sh_ddr_rready_2d[1] = s_axi_ddrb_rready ; - - assign s_axi_ddrb_awready = sh_cl_ddr_awready_2d[1] ; - assign s_axi_ddrb_wready = sh_cl_ddr_wready_2d[1] ; - assign s_axi_ddrb_bid = sh_cl_ddr_bid_2d[1] ; - assign s_axi_ddrb_bresp = sh_cl_ddr_bresp_2d[1] ; - assign s_axi_ddrb_bvalid = sh_cl_ddr_bvalid_2d[1] ; - assign s_axi_ddrb_arready = sh_cl_ddr_arready_2d[1] ; - assign s_axi_ddrb_rid = sh_cl_ddr_rid_2d[1] ; - assign s_axi_ddrb_rdata = sh_cl_ddr_rdata_2d[1] ; - assign s_axi_ddrb_rresp = sh_cl_ddr_rresp_2d[1] ; - assign s_axi_ddrb_rlast = sh_cl_ddr_rlast_2d[1] ; - assign s_axi_ddrb_rvalid = sh_cl_ddr_rvalid_2d[1] ; - assign ddrb_is_ready = sh_cl_ddr_is_ready_2d[1]; - - assign cl_sh_ddr_awid_2d[2] = s_axi_ddrd_awid ; - assign cl_sh_ddr_awaddr_2d[2] = s_axi_ddrd_awaddr ; - assign cl_sh_ddr_awlen_2d[2] = s_axi_ddrd_awlen ; - assign cl_sh_ddr_awsize_2d[2] = s_axi_ddrd_awsize ; - assign cl_sh_ddr_awburst_2d[2] = 2'b01 ; - assign cl_sh_ddr_awvalid_2d[2] = s_axi_ddrd_awvalid ; - assign cl_sh_ddr_wid_2d[2] = 0 ; - assign cl_sh_ddr_wdata_2d[2] = s_axi_ddrd_wdata ; - assign cl_sh_ddr_wstrb_2d[2] = s_axi_ddrd_wstrb ; - assign cl_sh_ddr_wlast_2d[2] = s_axi_ddrd_wlast ; - assign cl_sh_ddr_wvalid_2d[2] = s_axi_ddrd_wvalid ; - assign cl_sh_ddr_bready_2d[2] = s_axi_ddrd_bready ; - assign cl_sh_ddr_arid_2d[2] = s_axi_ddrd_arid ; - assign cl_sh_ddr_araddr_2d[2] = s_axi_ddrd_araddr ; - assign cl_sh_ddr_arlen_2d[2] = s_axi_ddrd_arlen ; - assign cl_sh_ddr_arsize_2d[2] = s_axi_ddrd_arsize ; - assign cl_sh_ddr_arburst_2d[2] = 2'b01 ; - assign cl_sh_ddr_arvalid_2d[2] = s_axi_ddrd_arvalid ; - assign cl_sh_ddr_rready_2d[2] = s_axi_ddrd_rready ; - - assign s_axi_ddrd_awready = sh_cl_ddr_awready_2d[2] ; - assign s_axi_ddrd_wready = sh_cl_ddr_wready_2d[2] ; - assign s_axi_ddrd_bid = sh_cl_ddr_bid_2d[2] ; - assign s_axi_ddrd_bresp = sh_cl_ddr_bresp_2d[2] ; - assign s_axi_ddrd_bvalid = sh_cl_ddr_bvalid_2d[2] ; - assign s_axi_ddrd_arready = sh_cl_ddr_arready_2d[2] ; - assign s_axi_ddrd_rid = sh_cl_ddr_rid_2d[2] ; - assign s_axi_ddrd_rdata = sh_cl_ddr_rdata_2d[2] ; - assign s_axi_ddrd_rresp = sh_cl_ddr_rresp_2d[2] ; - assign s_axi_ddrd_rlast = sh_cl_ddr_rlast_2d[2] ; - assign s_axi_ddrd_rvalid = sh_cl_ddr_rvalid_2d[2] ; - assign ddrd_is_ready = sh_cl_ddr_is_ready_2d[2]; - - logic ddr_aws_stat_ack0; - logic [31:0] ddr_aws_stat_rdata0; - logic [7:0] ddr_aws_stat_int0; - logic ddr_aws_stat_ack1; - logic [31:0] ddr_aws_stat_rdata1; - logic [7:0] ddr_aws_stat_int1; - logic ddr_aws_stat_ack2; - logic [31:0] ddr_aws_stat_rdata2; - logic [7:0] ddr_aws_stat_int2; - - logic [7:0] pipe_ddr_stat_addr0; - logic pipe_ddr_stat_wr0; - logic pipe_ddr_stat_rd0; - logic [31:0] pipe_ddr_stat_wdata0; - logic ddr_pipe_stat_ack0; - logic [31:0] ddr_pipe_stat_rdata0; - logic [7:0] ddr_pipe_stat_int0; - - logic [7:0] pipe_ddr_stat_addr1; - logic pipe_ddr_stat_wr1; - logic pipe_ddr_stat_rd1; - logic [31:0] pipe_ddr_stat_wdata1; - logic ddr_pipe_stat_ack1; - logic [31:0] ddr_pipe_stat_rdata1; - logic [7:0] ddr_pipe_stat_int1; - - logic [7:0] pipe_ddr_stat_addr2; - logic pipe_ddr_stat_wr2; - logic pipe_ddr_stat_rd2; - logic [31:0] pipe_ddr_stat_wdata2; - logic ddr_pipe_stat_ack2; - logic [31:0] ddr_pipe_stat_rdata2; - logic [7:0] ddr_pipe_stat_int2; - -//------------------------------------------------- -// Tie-offs when DDRs are disabled -//------------------------------------------------- - assign ddr_sh_stat_ack0 = (C_DDR_A_PRESENT!=0) ? ddr_aws_stat_ack0 : 1'b1; - assign ddr_sh_stat_rdata0 = (C_DDR_A_PRESENT!=0) ? ddr_aws_stat_rdata0 : 0; - assign ddr_sh_stat_int0 = (C_DDR_A_PRESENT!=0) ? ddr_aws_stat_int0 : 8'b0; - assign ddr_sh_stat_ack1 = (C_DDR_B_PRESENT!=0) ? ddr_aws_stat_ack1 : 1'b1; - assign ddr_sh_stat_rdata1 = (C_DDR_B_PRESENT!=0) ? ddr_aws_stat_rdata1 : 0; - assign ddr_sh_stat_int1 = (C_DDR_B_PRESENT!=0) ? ddr_aws_stat_int1 : 8'b0; - assign ddr_sh_stat_ack2 = (C_DDR_D_PRESENT!=0) ? ddr_aws_stat_ack2 : 1'b1; - assign ddr_sh_stat_rdata2 = (C_DDR_D_PRESENT!=0) ? ddr_aws_stat_rdata2 : 0; - assign ddr_sh_stat_int2 = (C_DDR_D_PRESENT!=0) ? ddr_aws_stat_int2 : 8'b0; - -//------------------------------------------------- -// Reset Synchronization -//------------------------------------------------- - logic pre_sync_rst_n; - logic sync_rst_n; - - always @(negedge rst_main_n or posedge clk_main_a0) begin - if (!rst_main_n) begin - pre_sync_rst_n <= 1'b0; - sync_rst_n <= 1'b0; - end else begin - pre_sync_rst_n <= 1'b1; - sync_rst_n <= pre_sync_rst_n; - end - end - - `ifdef FPGA_LESS_RST - `undef FPGA_LESS_RST - `endif - - lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wdata0 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_wdata0), .out_bus(pipe_ddr_stat_wdata0)); - lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_addr0 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_addr0), .out_bus(pipe_ddr_stat_addr0)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wr0 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_wr0), .out_bus(pipe_ddr_stat_wr0)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rd0 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_rd0), .out_bus(pipe_ddr_stat_rd0)); - lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rdata0 (.clk(clk_main_a0), .rst_n(1'b1), .out_bus(ddr_aws_stat_rdata0), .in_bus(ddr_pipe_stat_rdata0)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_ack0 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_ack0), .in_bus(ddr_pipe_stat_ack0)); - lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_int0 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_int0), .in_bus(ddr_pipe_stat_int0)); - - lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wdata1 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_wdata1), .out_bus(pipe_ddr_stat_wdata1)); - lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_addr1 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_addr1), .out_bus(pipe_ddr_stat_addr1)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wr1 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_wr1), .out_bus(pipe_ddr_stat_wr1)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rd1 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_rd1), .out_bus(pipe_ddr_stat_rd1)); - lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rdata1 (.clk(clk_main_a0), .rst_n(1'b1), .out_bus(ddr_aws_stat_rdata1), .in_bus(ddr_pipe_stat_rdata1)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_ack1 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_ack1), .in_bus(ddr_pipe_stat_ack1)); - lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_int1 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_int1), .in_bus(ddr_pipe_stat_int1)); - - lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wdata2 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_wdata2), .out_bus(pipe_ddr_stat_wdata2)); - lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_addr2 (.clk(clk_main_a0), .rst_n(1'b1), .in_bus(sh_ddr_stat_addr2), .out_bus(pipe_ddr_stat_addr2)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_wr2 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_wr2), .out_bus(pipe_ddr_stat_wr2)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rd2 (.clk(clk_main_a0), .rst_n(sync_rst_n), .in_bus(sh_ddr_stat_rd2), .out_bus(pipe_ddr_stat_rd2)); - lib_pipe #(.WIDTH(32), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_rdata2 (.clk(clk_main_a0), .rst_n(1'b1), .out_bus(ddr_aws_stat_rdata2), .in_bus(ddr_pipe_stat_rdata2)); - lib_pipe #(.WIDTH(1), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_ack2 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_ack2), .in_bus(ddr_pipe_stat_ack2)); - lib_pipe #(.WIDTH(8), .STAGES(C_NUM_STAGES_STATS)) pipe_stat_int2 (.clk(clk_main_a0), .rst_n(sync_rst_n), .out_bus(ddr_aws_stat_int2), .in_bus(ddr_pipe_stat_int2)); - - sh_ddr #( - .DDR_A_PRESENT(C_DDR_A_PRESENT), - .DDR_B_PRESENT(C_DDR_B_PRESENT), - .DDR_D_PRESENT(C_DDR_D_PRESENT) - ) sh_ddr_0 - ( - .clk(clk_main_a0), - .rst_n(sync_rst_n), - - .stat_clk(clk_main_a0), - .stat_rst_n(sync_rst_n), - - .CLK_300M_DIMM0_DP(CLK_300M_DIMM0_DP), - .CLK_300M_DIMM0_DN(CLK_300M_DIMM0_DN), - .M_A_ACT_N(M_A_ACT_N), - .M_A_MA(M_A_MA), - .M_A_BA(M_A_BA), - .M_A_BG(M_A_BG), - .M_A_CKE(M_A_CKE), - .M_A_ODT(M_A_ODT), - .M_A_CS_N(M_A_CS_N), - .M_A_CLK_DN(M_A_CLK_DN), - .M_A_CLK_DP(M_A_CLK_DP), - .M_A_PAR(M_A_PAR), - .M_A_DQ(M_A_DQ), - .M_A_ECC(M_A_ECC), - .M_A_DQS_DP(M_A_DQS_DP), - .M_A_DQS_DN(M_A_DQS_DN), - .cl_RST_DIMM_A_N(cl_RST_DIMM_A_N), - - .CLK_300M_DIMM1_DP(CLK_300M_DIMM1_DP), - .CLK_300M_DIMM1_DN(CLK_300M_DIMM1_DN), - .M_B_ACT_N(M_B_ACT_N), - .M_B_MA(M_B_MA), - .M_B_BA(M_B_BA), - .M_B_BG(M_B_BG), - .M_B_CKE(M_B_CKE), - .M_B_ODT(M_B_ODT), - .M_B_CS_N(M_B_CS_N), - .M_B_CLK_DN(M_B_CLK_DN), - .M_B_CLK_DP(M_B_CLK_DP), - .M_B_PAR(M_B_PAR), - .M_B_DQ(M_B_DQ), - .M_B_ECC(M_B_ECC), - .M_B_DQS_DP(M_B_DQS_DP), - .M_B_DQS_DN(M_B_DQS_DN), - .cl_RST_DIMM_B_N(cl_RST_DIMM_B_N), - - .CLK_300M_DIMM3_DP(CLK_300M_DIMM3_DP), - .CLK_300M_DIMM3_DN(CLK_300M_DIMM3_DN), - .M_D_ACT_N(M_D_ACT_N), - .M_D_MA(M_D_MA), - .M_D_BA(M_D_BA), - .M_D_BG(M_D_BG), - .M_D_CKE(M_D_CKE), - .M_D_ODT(M_D_ODT), - .M_D_CS_N(M_D_CS_N), - .M_D_CLK_DN(M_D_CLK_DN), - .M_D_CLK_DP(M_D_CLK_DP), - .M_D_PAR(M_D_PAR), - .M_D_DQ(M_D_DQ), - .M_D_ECC(M_D_ECC), - .M_D_DQS_DP(M_D_DQS_DP), - .M_D_DQS_DN(M_D_DQS_DN), - .cl_RST_DIMM_D_N(cl_RST_DIMM_D_N), - - //------------------------------------------------------ - // AXI Slave Interfaces - //------------------------------------------------------ - .cl_sh_ddr_awid(cl_sh_ddr_awid_2d), - .cl_sh_ddr_awaddr(cl_sh_ddr_awaddr_2d), - .cl_sh_ddr_awlen(cl_sh_ddr_awlen_2d), - .cl_sh_ddr_awsize(cl_sh_ddr_awsize_2d), - .cl_sh_ddr_awburst(cl_sh_ddr_awburst_2d), - .cl_sh_ddr_awvalid(cl_sh_ddr_awvalid_2d), - .sh_cl_ddr_awready(sh_cl_ddr_awready_2d), - - .cl_sh_ddr_wid(cl_sh_ddr_wid_2d), - .cl_sh_ddr_wdata(cl_sh_ddr_wdata_2d), - .cl_sh_ddr_wstrb(cl_sh_ddr_wstrb_2d), - .cl_sh_ddr_wlast(cl_sh_ddr_wlast_2d), - .cl_sh_ddr_wvalid(cl_sh_ddr_wvalid_2d), - .sh_cl_ddr_wready(sh_cl_ddr_wready_2d), - - .sh_cl_ddr_bid(sh_cl_ddr_bid_2d), - .sh_cl_ddr_bresp(sh_cl_ddr_bresp_2d), - .sh_cl_ddr_bvalid(sh_cl_ddr_bvalid_2d), - .cl_sh_ddr_bready(cl_sh_ddr_bready_2d), - - .cl_sh_ddr_arid(cl_sh_ddr_arid_2d), - .cl_sh_ddr_araddr(cl_sh_ddr_araddr_2d), - .cl_sh_ddr_arlen(cl_sh_ddr_arlen_2d), - .cl_sh_ddr_arsize(cl_sh_ddr_arsize_2d), - .cl_sh_ddr_arburst(cl_sh_ddr_arburst_2d), - .cl_sh_ddr_arvalid(cl_sh_ddr_arvalid_2d), - .sh_cl_ddr_arready(sh_cl_ddr_arready_2d), - - .sh_cl_ddr_rid(sh_cl_ddr_rid_2d), - .sh_cl_ddr_rdata(sh_cl_ddr_rdata_2d), - .sh_cl_ddr_rresp(sh_cl_ddr_rresp_2d), - .sh_cl_ddr_rlast(sh_cl_ddr_rlast_2d), - .sh_cl_ddr_rvalid(sh_cl_ddr_rvalid_2d), - .cl_sh_ddr_rready(cl_sh_ddr_rready_2d), - - .sh_cl_ddr_is_ready(sh_cl_ddr_is_ready_2d), - - .sh_ddr_stat_addr0 (pipe_ddr_stat_addr0 ), - .sh_ddr_stat_wr0 (pipe_ddr_stat_wr0 ), - .sh_ddr_stat_rd0 (pipe_ddr_stat_rd0 ), - .sh_ddr_stat_wdata0 (pipe_ddr_stat_wdata0), - .ddr_sh_stat_ack0 (ddr_pipe_stat_ack0 ), - .ddr_sh_stat_rdata0 (ddr_pipe_stat_rdata0), - .ddr_sh_stat_int0 (ddr_pipe_stat_int0 ), - - .sh_ddr_stat_addr1 (pipe_ddr_stat_addr1 ), - .sh_ddr_stat_wr1 (pipe_ddr_stat_wr1 ), - .sh_ddr_stat_rd1 (pipe_ddr_stat_rd1 ), - .sh_ddr_stat_wdata1 (pipe_ddr_stat_wdata1), - .ddr_sh_stat_ack1 (ddr_pipe_stat_ack1 ), - .ddr_sh_stat_rdata1 (ddr_pipe_stat_rdata1), - .ddr_sh_stat_int1 (ddr_pipe_stat_int1 ), - - .sh_ddr_stat_addr2 (pipe_ddr_stat_addr2 ), - .sh_ddr_stat_wr2 (pipe_ddr_stat_wr2 ), - .sh_ddr_stat_rd2 (pipe_ddr_stat_rd2 ), - .sh_ddr_stat_wdata2 (pipe_ddr_stat_wdata2), - .ddr_sh_stat_ack2 (ddr_pipe_stat_ack2 ), - .ddr_sh_stat_rdata2 (ddr_pipe_stat_rdata2), - .ddr_sh_stat_int2 (ddr_pipe_stat_int2 ) - - ); - - end else begin : gen_non_mem - - assign s_axi_ddra_awready = 0; - assign s_axi_ddra_wready = 0; - assign s_axi_ddra_bid = 0; - assign s_axi_ddra_bresp = 0; - assign s_axi_ddra_bvalid = 0; - assign s_axi_ddra_arready = 0; - assign s_axi_ddra_rid = 0; - assign s_axi_ddra_rdata = 0; - assign s_axi_ddra_rresp = 0; - assign s_axi_ddra_rlast = 1'b1; - assign s_axi_ddra_rvalid = 0; - assign ddra_is_ready = 0; - - assign s_axi_ddrb_awready = 0; - assign s_axi_ddrb_wready = 0; - assign s_axi_ddrb_bid = 0; - assign s_axi_ddrb_bresp = 0; - assign s_axi_ddrb_bvalid = 0; - assign s_axi_ddrb_arready = 0; - assign s_axi_ddrb_rid = 0; - assign s_axi_ddrb_rdata = 0; - assign s_axi_ddrb_rresp = 0; - assign s_axi_ddrb_rlast = 1'b1; - assign s_axi_ddrb_rvalid = 0; - assign ddrb_is_ready = 0; - - assign s_axi_ddrd_awready = 0; - assign s_axi_ddrd_wready = 0; - assign s_axi_ddrd_bid = 0; - assign s_axi_ddrd_bresp = 0; - assign s_axi_ddrd_bvalid = 0; - assign s_axi_ddrd_arready = 0; - assign s_axi_ddrd_rid = 0; - assign s_axi_ddrd_rdata = 0; - assign s_axi_ddrd_rresp = 0; - assign s_axi_ddrd_rlast = 1'b1; - assign s_axi_ddrd_rvalid = 0; - assign ddrd_is_ready = 0; - - assign ddr_sh_stat_ack0 = 1'b1; - assign ddr_sh_stat_rdata0 = 0; - assign ddr_sh_stat_int0 = 8'b0; - assign ddr_sh_stat_ack1 = 1'b1; - assign ddr_sh_stat_rdata1 = 0; - assign ddr_sh_stat_int1 = 8'b0; - assign ddr_sh_stat_ack2 = 1'b1; - assign ddr_sh_stat_rdata2 = 0; - assign ddr_sh_stat_int2 = 8'b0; - - assign M_A_ACT_N = 0; - assign M_A_MA = 0; - assign M_A_BA = 0; - assign M_A_BG = 0; - assign M_A_CKE = 0; - assign M_A_ODT = 0; - assign M_A_CS_N = 0; - assign M_A_CLK_DN = 0; - assign M_A_CLK_DP = 0; - assign M_A_PAR = 0; - assign cl_RST_DIMM_A_N = 0; - - assign M_B_ACT_N = 0; - assign M_B_MA = 0; - assign M_B_BA = 0; - assign M_B_BG = 0; - assign M_B_CKE = 0; - assign M_B_ODT = 0; - assign M_B_CS_N = 0; - assign M_B_CLK_DN = 0; - assign M_B_CLK_DP = 0; - assign M_B_PAR = 0; - assign cl_RST_DIMM_B_N = 0; - - assign M_D_ACT_N = 0; - assign M_D_MA = 0; - assign M_D_BA = 0; - assign M_D_BG = 0; - assign M_D_CKE = 0; - assign M_D_ODT = 0; - assign M_D_CS_N = 0; - assign M_D_CLK_DN = 0; - assign M_D_CLK_DP = 0; - assign M_D_PAR = 0; - assign cl_RST_DIMM_D_N = 0; - - end // gen_mem - endgenerate - -endmodule - - diff --git a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/lib_pipe.sv b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/lib_pipe.sv new file mode 120000 index 00000000..c55fd543 --- /dev/null +++ b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/lib_pipe.sv @@ -0,0 +1 @@ +../../../../../design/lib/lib_pipe.sv \ No newline at end of file diff --git a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/sim b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/sim new file mode 120000 index 00000000..4eb7f5ae --- /dev/null +++ b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/sim @@ -0,0 +1 @@ +../../../../../design/sh_ddr/sim \ No newline at end of file diff --git a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/sim/gray.inc b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/sim/gray.inc deleted file mode 100755 index 35e3b5ac..00000000 --- a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/sim/gray.inc +++ /dev/null @@ -1,54 +0,0 @@ -// SHA: bddf8457046b3a64e63d28d7e334020b6f1d09ee -`pragma protect begin_protected -`pragma protect version = 1 -`pragma protect encrypt_agent = "XILINX" -`pragma protect encrypt_agent_info = "Xilinx Encryption Tool 2015" -`pragma protect key_keyowner = "Xilinx", key_keyname = "xilinxt_2017_05", key_method = "rsa" -`pragma protect encoding = (enctype = "BASE64", line_length = 76, bytes = 256) -`pragma protect key_block -iWZCE953hJCDyIc+ne+gwzh8qXsoHncv0uZD9mD+v5fx+PEpRYsrUwVcqY8NGks/8KrnC1SNztDZ -curQivQImMnSoAGPeG2bNV8bmkBS1rhgCF+dM7tLc6A2UDWpvGzLUBwtZEoYGo9qI/brjagfJ4AB -rEXslIMBpU4DM78ZslW+HuM6LGQaxRCRc5YmcX9lULqKp4gcYejmK7bNBZVoMQPaxbOJKJ6Shm8B -OwZERFn7ecS2YTdmKNHaXgTG11pozaLBWmvQ9dAoBbjBP2u9av9r72qQ/x/sB/rBhukAV1tbxMRT -N5VdaW8njTW1/BmqfN0EYNrisu9/VrhCD8CpOw== - -`pragma protect data_method = "AES128-CBC" -`pragma protect encoding = (enctype = "BASE64", line_length = 76, bytes = 2048) -`pragma protect data_block -kJG/xGGs9D/8k2u3JRVp+XhN8w/W2vao3+RyPdGKa2ielHnUCL6Wf5zeKZvunCPxvE+hY4IzwA3A -wigLpWB1GOP6DyilyJ2bV2zhnBCaKdy+sFGQK2AzyHbMPzKs/Ubz1cWrjh6Jcdp+x7Wl6gnlaHmm -C6KgAmVndCZq6vvWz4TzJpiMaoUt8ge+9MvH1ILAiY5mdQk8SA9G9EnSNdGDfC8Qe+hAMW0ttojk -f04YI1slnd2P1kOSfMve5S1SG6p2NyrTQR/dOSRVfWgY4tzGGMBPPsf4SE2IDpA+/2vOC+FLNBhd -xa57bmlMKsW3u0KXGvtjziIxPzWYRt5wl3/6MpPPTR3jKfxlfT+y5iP19sf4otY1AYhFc7+Jz4+q -Wm7eM7ZCz75nYMrfbJsiPnEskhdbpqSKb4gbKLLSwYu8I6zrl8/KMBIsKSdDK2YQiFrUtH2pWpp3 -7GYqqwBtYcFQB66ouMrQVA4q+ihCg0WtFf+5LpFri1Cy5khHYCDA2T0Cj06nwDTK9P7PS0axtUli -1PFd+O9zh72LvJ8Ayr+H/vZLeFdvf15XP1DGfzcDf83nWk3lINWwdxQ6BDGyKCp1x5rz9BSLpKKF -U2GzDlX8RcGGYQoli/JUZA8qX8be1ele/nXV/Ml/60KLS+L+Lrr0TzzFuCjU5xek/jO6w3+/e5D9 -ftBYgkZyeXr7nYiIM7X0zkkdMGrsDu2uhKKrvCe4x5IakCs9osJhpqCj4mRdENY1GEMxLv1q/4NS -tMo6k3LWn6pAfTPvHZlSvjdkP3JRNKrLFObdtaqGs4/TgTh6HtlG3PTskPeRmU6XF+jQoW249M3x -jJoW5N32GZlP8PlSRUnr9pUUHZ+ysjkUNjBrmmSV0F091Fyy8gqV688MkjrN2mQVcPO2G/EiJ4Kn -z2RnciR5HBjJVzkUA2rZR8pjz2n5htUIY8/+O14sYbEPFakdH0JKGpNPimxPcOkczl6xtGsbhiD1 -MftzBA2MxWF9lwRtNp1op3ugvZerTcQ7ftERBTzrV3AU7btg17E6iWiFUty3r+ToY6rzD5n8ZSOY -N1ji41hc1VpTMxrYty33ejc2GruqxpW7O6iZyQ4q7nXYZpt3K84119Au2avjVTunErpwN30qgOxR -Cw1HLyxIqmft7QUz74FiYnPS9s2A13hRVQ29TYLf+X46vBfHdO5kTFIKu+eNH0PQ9I9CcXBnUrHj -Mo26d3+JtSZGPO6Hm356Ur45Waq0+l5mm3YbfgqMESCPUQfzTfSVF4PN4DAU5Hv8HPB6H7y5le5q -QGBbza/nPgLnY546FMEbAczzR2A6eefQ2chEKdoTH3OT5rlMZZqBiK4ZHToaQYXWKTih0DoJMV0U -5pCpeoLoErMEqP0S+wcvA3zQkA0YqXc2KDkrovZgTyyaPgdC8YzghEmlxMq8DQs8ikdVy3hdfYob -8QWNYdrgVk781vc8DXNU3mHZ0jEZQYaaTZV5ZlBGtJzWngGmWwMoL0F29f3LdlZAhERnFbejR/eZ -UBXXBzgj5nrM2Fr+RlAowrjFcs/nr32fkCSfpnl9r77NtqxX4CnTBhLe7c4WSm+4Etah0DGdI7Gh -JVKmqaeEhXaB2f/QlCabxSheicrWkLWBUD3TG/j33ZwcHjs3r56T/YsYLfPmXxz5MZpZus6kp7oz -7TOqe9WlXjsAw4SEDPEZhbcPu8kxzbqdq+O8E0FJjBXNhqgkNmhOmJdxgpGODASsM8WGOLsDILOb -TmcS7J94JWDDI5E8v2XGhrGSvRPpQI4s7SS7Q1i457UZzrHM8wTTCq6ONvI3jGeEKlaCcbGTAWYD -3/Rc7CpZO3vho08wcgRVlAeozKLfiqrnyk/fyx1M6pESrKQUYHzPf2uXUaEH2qHZpBOABoyVhT7M -tXxZL6KImwr/UKLb4WQpsCV8txdAHSojgCjEahgloM/yQQyNFDBmYfG2yQGxeGgXGgCneDNvdmXq -zo8SmZGuEU6583y2Af6rwXWuvfMcByzcBDUkVXO3WBXfBoG94KJBhHo40FXVrfVwpVjeEBXakwKy -B0NarE4jj5uofWkKhaUEYIhVvPV6fmR0DUlyt6DW6zJXNDe1dsgfs2jw6I6N2ffgzX4IXyMpOh48 -Xz/DaHENQArcIhlJL8za5WIoZROdtYf2sfL0Jlnl6rwzcwXEFIBZfSeheWkoP+wTUM5tKsbiB1xV -2eUJshGm2lnAIWhRLn4baHxeDSMtIjYTGwkVmgF/N8uAPr5UkrofL5GiVA3iWXtTdpA3gSVbe+U2 -Fsw4SarJJp+PbTjgPh6dP+KdrOjVGXM7KV0KAr6qV+woPXK7GAxsj9CTepwf+5pOq0N7fGsc1GH0 -8GdvwfmbELp3tXvI0emNI9dy7Zl40gDneqnqZPbWRMqPi911iT/1BxGo0LzeGDquUXbRPj9xm7sH -qPdkoIIENexwsxeLugkSs9+/K+S4rkwkVsG/1zx7GzZ+SOWpU0sXkHIq7+wPJ04/OVjyzrubRCxt -6VgTKH6NZwIkFBEcxLP7UzhV5x5RgPGi3qmissvhCBWtyKJomwqVhX0ozi0QLYOIUMFx9ftAAHQF -fXrlhhz9DKLH8khVKAUcqCreXnSyHY/FxanjBnzovfxUl3XzDYxv5oPPbu6LT37jDNOqw6cRAuMP -fedDSu42Dymfn7IPAwV0tBVMFbxNSO8Q87Nl9Oz19SSqCJS3meV0MZd+KYQyM3LEb545tts= -`pragma protect end_protected diff --git a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/synth b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/synth new file mode 120000 index 00000000..85198b6a --- /dev/null +++ b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/synth @@ -0,0 +1 @@ +../../../../../design/sh_ddr/synth \ No newline at end of file diff --git a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/synth/gray.inc b/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/synth/gray.inc deleted file mode 100755 index d1576016..00000000 --- a/hdk/common/shell_v04261818/hlx/design/ip/aws_v1_0/hdl/synth/gray.inc +++ /dev/null @@ -1,61 +0,0 @@ -// SHA: bddf8457046b3a64e63d28d7e334020b6f1d09ee -`pragma protect begin_protected -`pragma protect version = 2 -`pragma protect encrypt_agent = "XILINX" -`pragma protect encrypt_agent_info = "Xilinx Encryption Tool 2015" -`pragma protect begin_commonblock -`pragma protect control error_handling="delegated" -`pragma protect end_commonblock -`pragma protect begin_toolblock -`pragma protect rights_digest_method="sha256" -`pragma protect key_keyowner = "Xilinx", key_keyname= "xilinxt_2017_05", key_method = "rsa", key_block -GQ3cf/VGHY0xKfRLLTPQZDazEA3OyQ5p2SCdrSlmdPjCM7yE2VcditpKLLWOfn5+0jtL5Uy85K3Z -RJNHMY3ze7jXvEClcJ2Vzp4fwkFCOlM/vh4YGp9wET1CUY56wndycOW/RlHrhiqDq2bXfR7NoD8w -aUaWced9KaFJ122f3nIBhocYiGlY/mHq0LOkoyK9v0rxbrTOm4QTm9WMLhpFX5+pN2RWspU3lhH4 -4c0N26aRVupP7gBcHGRqsMPfRxZ8cjR5JDjSAuOQjPsOBwlBb2C6oraqRKgBy/u0d80adeVoucHF -MEe7jM34roCrBZ0ebqJX8uHjLcNKZfzAt+w65g== - -`pragma protect control xilinx_configuration_visible = "false" -`pragma protect control xilinx_enable_modification = "false" -`pragma protect control xilinx_enable_probing = "false" -`pragma protect end_toolblock="+F6HPoNYthaGX52y+5DOhQmlp8k/oolHCsFRV36MRR8=" -`pragma protect data_method = "AES128-CBC" -`pragma protect encoding = (enctype = "BASE64", line_length = 76, bytes = 2048) -`pragma protect data_block -53kgbiIAqixoiMbgLm6sQR7urx0digpOBUiU5C4zCLVOgvP6jILtzGHp1o8/cRAaUwztJPdFqMku -qUzzLLOCANzAAXRvJS4UEx3mHiUJ3Bt/jh6mx8J/NBWMds9dF5xtS4nN6sHEtPhm1xgUSGRDH1vO -LUwOLDgTOIzTAIiVylONAVKeZ6VgTdDlBnsFlPApvJxx4NNDDLc5v5gwJek7RBaDOCtDWaf8vx/R -B2Z8k0NnpdYFcgTrjoNUTCZErPhqzsWdrrUNr28ANK541xMJ2NK5ON4CIAiQMhPcmzojS08ydU3c -DLdcPTSKaXL5U3HS2d/hlYEpi7atjmq6jrFyMrP5JHaPXDq4JE4g8ZM694zfqTEDCqrKmPQQCQW1 -7DaiFOwFB1dkzUDSdXbao7ccRsWb1hzl+9ylXaVQC8lRlfx4kIEWNOae4sNbD7Zdz0xFp3FN5Y8b -Cpb1uEV/J6dKw/DV8pBHMW5reix2s+uk9gA++A3gQciMm0qPc2ZBQ4dgyLb/RAiJN1OFggXDn6wH -pO4YYvg5gy5WLvOW4saFHN5/bFSIPCCSfBC83hP5/j93J5LuS/Tg9g6DAv+QShQLo/IY3znLxYwu -1EUhZOY4hc7TS9CZQqdLoZzvpUK2xpEq8v9Cg+t/aHqJQbeHw8He3hiaBiVjGfA8j/GePf644T20 -oGeQ9v9VTUW77rIJzpl8pejhftSnwWLEsrBqcAj0STiGRMF0u5088G84Bzf95qgq4q2/qKwXpEqa -bBISKPumQoaUsdVIOqoxuO6IpZDWKFOpu4WJcm/Yw5Giypi9uZRkAOPehQ8NkBi9+vNC/OwuWjKp -2tA4vUFUIKXMMUuNSErf2wJ064ajf34K54QfrKXp1bNB7fvwlekUwWirIRButrxEJSWS3tcgRGNn -zdYAv+hZEihs3jfi4HuXpbLEVAxQqZZUPfS4LZTQRUWDXj6zLw5VdrlmfTHe6XYaQ4JMhhD0ChYI -4kkD+jq37vfYfN6NNRWJLhk8cYa14Jdn/b6VrCaUSkMKUfN3ao0vhWrxqPSHmB7g5Yk6hihdxfAn -s2mQ/QIDFy56Kx21K9eypEWvtyU08yMllG58ZLH3mxAR2COg+eI9ku+Bzkzj2DDIAHHB/RQevjIZ -sDpluevaqALD7Eolcy3ikXos8+UnTOQQ8VBcAAFwcPdiF9/K0qKd4EPNCyOsvFe+4YY4bKro9fjr -Bfc7UNF1MPEIUCVBmjW8U9NxLSPmAnSxXYM6z1TBrwLN8DREPYS7owTqdheLEpC0a2a2EN3RjOUg -aOBfyhIcupxSbhzgmMuiSjhOQb/Hrw0SGpljEAJRXqtZ2xWdXulKgUjTLUYI98mzyTviBozRJx55 -vpjIgJSB+1ewvrY3EA72ZaVE3HsuF3STL8zuCnp0x/lP6f+hQbQTSuFA6XxZVrnp110if2lOzEMr -2urkDBqDW5f3FDAndhakpPB4GEYEzsli41PfnO4YHK4hXvGhmw1W25nsbaAaILjmYJY6v8ATRHvh -0hy7+MiItqTg4pXjuKip4WY9n5FjUhGvFW7KP9XsgKMz1MwEc+XSpdlDprY0JPiENyFxPqZf9qDl -tGr2eQ1GF7zGWz5/kl84JVNbpvqArofI2aJaszFsRC86cYtyocHugHUNCia+3xVG7Ch5fqf57LtM -YeFmiWTtIkocbCGsOC5PIusc5olkVd3TIKkGimkV04fvMdOuSqrvIi3Chndu0xDVApCuC+wchLiN -iPCXc0wfTKuqiKbvZiaUUPDyaGAGpqeZL/kRbowWbZ5ZVAnX+MANoozeZT5A5INyw9JwdzOx71Im -qcL+2C0XMHj0sijLuIG9Yx6WDBSOcO4P5xXO0Gir7rV0jG47x6Dj1+p64UGdAPolUYUwXUeQcM34 -1ohA9lTfT4egDO9GOgkovwi2Jhqt3HLvTu/ERAMUc/03DgLykeAaW2C1ya+g7HwHvIZbrptX6Bb1 -YZOmkBUN5rF0OkhIRaJ9kSuteLPUbgSPImbyAPmyHt9TPuaMHD/v2fIjq2ojC+O8BVG3p78FsloG -I+hILpbDCsC7PGFzmAhuMPTN4gJTig0YDJfHOTAq1OYeWt6y60ldRwKfIorbPrQ4EzS1CdorSIXM -nf5yFA8VwSkiGn6+JBmR+4nKK+RfDJCLYdHa3E9we2aO6SbhRRUr3gfaPmqRjGYCARnAcwt2GmL0 -Qh/l3zfL8yCqeFYN0lcfUG2HaGzvdwQ5bilQwfw3iU/6A2uRl0h9WhEaOZ0zTfh1SC0RJwgaB9b6 -amGnp9AoqrrWCmWpHQlJXWtd2m0Z/8X/GzLaWKjziBr1TJzVBmzT7F0kG92bdCugH/LHFdBVRxq1 -OdkKq+CDmYMiXP8Pn6SmP/JIykeAnW8zfNPyFV0DZ/DtCSNPSqqsxM/XzBBYq2DCYBymx8fJTpTs -uotFv42OYAtoxsuwThd5cIUJBT8HvG+m9agbzH1xi96xWZlpOviytMTwBJwqJqU2dk+p2VlyJN2F -46Zt6rq/dYtEhGbwgiRoEa4dO8JVS3XylGy9meaFqofkMPAvt0AI5ql0dzgqxMqvBfLCcYtbiNvq -av1/jdaLcOCHVpWSxwQYLOys37ItLASqrgbRQpug4aoJHNdU5/7jDDVs2v5o3HGNlrZ0PJTxVfQr -C0spm0fWtBxMCEmbkj98XyqI0PDupjLGcbrX5kRdp9E0DCJadbthVTs0zcw09dlckjhzGwA= -`pragma protect end_protected diff --git a/hdk/common/shell_v04261818/hlx/hlx_examples/build/IPI/cl_hls_dds/software/test_cl.c b/hdk/common/shell_v04261818/hlx/hlx_examples/build/IPI/cl_hls_dds/software/test_cl.c index 86a70410..605d3094 100755 --- a/hdk/common/shell_v04261818/hlx/hlx_examples/build/IPI/cl_hls_dds/software/test_cl.c +++ b/hdk/common/shell_v04261818/hlx/hlx_examples/build/IPI/cl_hls_dds/software/test_cl.c @@ -67,8 +67,8 @@ int main(int argc, char **argv) { int slot_id; /* initialize the fpga_pci library so we could have access to FPGA PCIe from this applications */ - rc = fpga_pci_init(); - fail_on(rc, out, "Unable to initialize the fpga_pci library"); + rc = fpga_mgmt_init(); + fail_on(rc, out, "Unable to initialize the fpga_mgmt library"); /* This demo works with single FPGA slot, we pick slot #0 as it works for both f1.2xl and f1.16xl */ diff --git a/hdk/common/shell_v04261818/hlx/hlx_examples/build/IPI/cl_ipi_cdma_test/software/test_cl.c b/hdk/common/shell_v04261818/hlx/hlx_examples/build/IPI/cl_ipi_cdma_test/software/test_cl.c index d8ca274c..afe30616 100644 --- a/hdk/common/shell_v04261818/hlx/hlx_examples/build/IPI/cl_ipi_cdma_test/software/test_cl.c +++ b/hdk/common/shell_v04261818/hlx/hlx_examples/build/IPI/cl_ipi_cdma_test/software/test_cl.c @@ -69,9 +69,9 @@ int main(int argc, char **argv) { int rc; int slot_id; - /* initialize the fpga_pci library so we could have access to FPGA PCIe from this applications */ - rc = fpga_pci_init(); - fail_on(rc, out, "Unable to initialize the fpga_pci library"); + /* initialize the fpga_mgmt library so we could have access to FPGA PCIe from this applications */ + rc = fpga_mgmt_init(); + fail_on(rc, out, "Unable to initialize the fpga_mgmt library"); /* This demo works with single FPGA slot, we pick slot #0 as it works for both f1.2xl and f1.16xl */ diff --git a/hdk/common/shell_v04261818/hlx/hlx_examples/build/IPI/hello_world/software/test_cl.c b/hdk/common/shell_v04261818/hlx/hlx_examples/build/IPI/hello_world/software/test_cl.c index a6f57084..41454624 100755 --- a/hdk/common/shell_v04261818/hlx/hlx_examples/build/IPI/hello_world/software/test_cl.c +++ b/hdk/common/shell_v04261818/hlx/hlx_examples/build/IPI/hello_world/software/test_cl.c @@ -55,8 +55,8 @@ int main(int argc, char **argv) { int rc; int slot_id; - /* initialize the fpga_pci library so we could have access to FPGA PCIe from this applications */ - rc = fpga_pci_init(); + /* initialize the fpga_mgmt library */ + rc = fpga_mgmt_init(); fail_on(rc, out, "Unable to initialize the fpga_pci library"); /* This demo works with single FPGA slot, we pick slot #0 as it works for both f1.2xl and f1.16xl */ @@ -66,19 +66,14 @@ int main(int argc, char **argv) { rc = check_afi_ready(slot_id); fail_on(rc, out, "AFI not ready"); - printf("\n"); printf("===== Hello World Example =====\n"); rc = peek_poke_example(slot_id, FPGA_APP_PF, APP_PF_BAR1); fail_on(rc, out, "peek-poke example failed"); - - - return rc; - out: return 1; } diff --git a/hdk/common/shell_v04261818/hlx/hlx_setup.tcl b/hdk/common/shell_v04261818/hlx/hlx_setup.tcl index 90fbb653..727dbaac 100644 --- a/hdk/common/shell_v04261818/hlx/hlx_setup.tcl +++ b/hdk/common/shell_v04261818/hlx/hlx_setup.tcl @@ -45,4 +45,6 @@ set aws::make_faas::public::bd_faas_examples_directory [file normalize [file joi set aws::make_faas::public::bd_faas_initscript [file join $aws::make_faas::public::bd_faas_build_directory scripts aws_bd_faas_initscript.tcl] set ::env(FAAS_HOOK_TCL) $::aws::make_faas::public::bd_faas_initscript - +# Maintain DONT TOUCH functionality for 2020.2 onwards +if {[string match *2020.2* [version -short]]} {set_param project.replaceDontTouchWithKeepHierarchySoft false} +# diff --git a/hdk/common/software/include/fpga_pci_sv.h b/hdk/common/software/include/fpga_pci_sv.h index 26886b52..0de0bc98 100644 --- a/hdk/common/software/include/fpga_pci_sv.h +++ b/hdk/common/software/include/fpga_pci_sv.h @@ -22,6 +22,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + /** * FPGA_PCI_BARS_MAX: * -compile time tunable via mkall_fpga_mgmt_tools.sh, with the below default. @@ -71,6 +75,23 @@ typedef int pci_bar_handle_t; */ int fpga_pci_init(void); +/** + * Initialize the fpga_mgmt library. + * Calls fpga_pci_init. + * + * @returns 0 on success, non-zero on error + */ +int fpga_mgmt_init(void); + +/** + * Closes the fpga_mgmt library and its dependencies and releases any acquired + * resources. + * + * @returns 0 on success, non-zero on error + */ +int fpga_mgmt_close(void); + + /** * Attach to an FPGA memory space. * diff --git a/hdk/common/software/src/fpga_pci_sv.c b/hdk/common/software/src/fpga_pci_sv.c index d6725f65..88b922ad 100644 --- a/hdk/common/software/src/fpga_pci_sv.c +++ b/hdk/common/software/src/fpga_pci_sv.c @@ -24,6 +24,16 @@ int fpga_pci_init(void) return 0; } +int fpga_mgmt_init(void) +{ + return 0; +} + +int fpga_mgmt_close(void) +{ + return 0; +} + /** * Attach to an FPGA memory space. * diff --git a/hdk/common/verif/include/sh_dpi_tasks.svh b/hdk/common/verif/include/sh_dpi_tasks.svh index e0c8ec73..2423f598 100755 --- a/hdk/common/verif/include/sh_dpi_tasks.svh +++ b/hdk/common/verif/include/sh_dpi_tasks.svh @@ -36,6 +36,14 @@ import tb_type_defines_pkg::*; export "DPI-C" task sv_map_host_memory; export "DPI-C" task cl_peek; export "DPI-C" task cl_poke; + export "DPI-C" task cl_peek_pcis; + export "DPI-C" task cl_poke_pcis; + export "DPI-C" task cl_peek_sda; + export "DPI-C" task cl_poke_sda; + export "DPI-C" task cl_peek_ocl; + export "DPI-C" task cl_poke_ocl; + export "DPI-C" task cl_peek_bar1; + export "DPI-C" task cl_poke_bar1; export "DPI-C" task sv_int_ack; export "DPI-C" task sv_pause; export "DPI-C" task sv_fpga_pci_peek; @@ -45,6 +53,7 @@ import tb_type_defines_pkg::*; export "DPI-C" task sv_fpga_start_cl_to_buffer; `endif export "DPI-C" task init_ddr; + export "DPI-C" task deselect_atg_hw; static int h2c_desc_index = 0; static int c2h_desc_index = 0; @@ -64,6 +73,38 @@ import tb_type_defines_pkg::*; task cl_poke(input longint unsigned addr, int unsigned data); poke_ocl(.addr(addr), .data(data)); endtask + + task cl_peek_pcis(input longint unsigned addr, output int unsigned data); + tb.card.fpga.sh.peek(.addr(addr), .data(data), .intf(AxiPort::PORT_DMA_PCIS)); + endtask + + task cl_poke_pcis(input longint unsigned addr, int unsigned data); + tb.card.fpga.sh.poke(.addr(addr), .data(data), .intf(AxiPort::PORT_DMA_PCIS)); + endtask + + task cl_peek_sda(input longint unsigned addr, output int unsigned data); + tb.card.fpga.sh.peek(.addr(addr), .data(data), .intf(AxiPort::PORT_SDA)); + endtask + + task cl_poke_sda(input longint unsigned addr, int unsigned data); + tb.card.fpga.sh.poke(.addr(addr), .data(data), .intf(AxiPort::PORT_SDA)); + endtask + + task cl_peek_ocl(input longint unsigned addr, output int unsigned data); + tb.card.fpga.sh.peek(.addr(addr), .data(data), .intf(AxiPort::PORT_OCL)); + endtask + + task cl_poke_ocl(input longint unsigned addr, int unsigned data); + tb.card.fpga.sh.poke(.addr(addr), .data(data), .intf(AxiPort::PORT_OCL)); + endtask + + task cl_peek_bar1(input longint unsigned addr, output int unsigned data); + tb.card.fpga.sh.peek(.addr(addr), .data(data), .intf(AxiPort::PORT_BAR1)); + endtask + + task cl_poke_bar1(input longint unsigned addr, int unsigned data); + tb.card.fpga.sh.poke(.addr(addr), .data(data), .intf(AxiPort::PORT_BAR1)); + endtask task sv_int_ack(input int unsigned int_num); tb.card.fpga.sh.set_ack_bit(int_num); @@ -208,16 +249,22 @@ end poke_stat(.addr(8'h0c), .ddr_idx(1), .data(32'h0000_0000)); poke_stat(.addr(8'h0c), .ddr_idx(2), .data(32'h0000_0000)); - //de-select the ATG hardware + + // allow memory to initialize + nsec_delay(27000); + endtask // initialize_sh_model + + + task deselect_atg_hw(); + + //de-select the ATG hardware poke_ocl(.addr(64'h130), .data(0)); poke_ocl(.addr(64'h230), .data(0)); poke_ocl(.addr(64'h330), .data(0)); poke_ocl(.addr(64'h430), .data(0)); - - // allow memory to initialize - nsec_delay(27000); - endtask // initialize_sh_model + nsec_delay(1000); + endtask `ifdef DMA_TEST //DPI task to transfer HOST to CL data. diff --git a/hdk/common/verif/models/sh_bfm/axi_bfm_defines.svh b/hdk/common/verif/models/sh_bfm/axi_bfm_defines.svh index 4c15afe5..ec92932a 100644 --- a/hdk/common/verif/models/sh_bfm/axi_bfm_defines.svh +++ b/hdk/common/verif/models/sh_bfm/axi_bfm_defines.svh @@ -19,7 +19,7 @@ typedef struct { logic [63:0] addr; logic [7:0] len; logic [2:0] size; - logic [5:0] id; + logic [15:0] id; logic [1:0] resp; logic last; } AXI_Command; @@ -27,7 +27,7 @@ typedef struct { typedef struct { logic [511:0] data; logic [63:0] strb; - logic [5:0] id; + logic [15:0] id; logic last; } AXI_Data; diff --git a/hdk/common/verif/models/sh_bfm/sh_bfm.sv b/hdk/common/verif/models/sh_bfm/sh_bfm.sv index 3d9e5938..846bdf81 100644 --- a/hdk/common/verif/models/sh_bfm/sh_bfm.sv +++ b/hdk/common/verif/models/sh_bfm/sh_bfm.sv @@ -1233,7 +1233,10 @@ module sh_bfm #( first_rd_beat = 1'b0; end - beat = {512{1'b1}}; + beat = {512{1'b1}}; + + if (cl_sh_pcim_rready) begin + for(int i=rd_addr[5:2]; i<16; i++) begin logic [31:0] c; @@ -1262,7 +1265,7 @@ module sh_bfm #( $display("[%t] : DEBUG beat 0x%0128x", $realtime, beat); end sh_cl_pcim_rdata <= beat; - + end //if(cl_sh_pcim_rready) end else begin sh_cl_pcim_rvalid <= 1'b0; diff --git a/hdk/common/verif/scripts/.gitignore b/hdk/common/verif/scripts/.gitignore new file mode 100644 index 00000000..1366d166 --- /dev/null +++ b/hdk/common/verif/scripts/.gitignore @@ -0,0 +1,2 @@ +.done +tmp diff --git a/hdk/common/verif/scripts/Makefile b/hdk/common/verif/scripts/Makefile new file mode 100644 index 00000000..3098d515 --- /dev/null +++ b/hdk/common/verif/scripts/Makefile @@ -0,0 +1,34 @@ +#------------------------------------------------------------------------------- +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. +#------------------------------------------------------------------------------- + +DONE_FILE := $(HDK_COMMON_DIR)/verif/scripts/.done +DEPS_FILE := $(HDK_SHELL_DESIGN_DIR)/ip/ddr4_core/ddr4_core.xci +DEPS_FILE += $(HDK_COMMON_DIR)/verif/scripts/init.tcl +DEPS_FILE += $(HDK_COMMON_DIR)/verif/scripts/init.sh + +all: $(DONE_FILE) + +$(DONE_FILE): $(DEPS_FILE) + @ echo "INFO: Building in $(shell dirname $@)" + @ echo "INFO: This could take 5-10 minutes, please be patient!" + @ git clean -fXdq $(shell dirname $@) + @ cd $(shell dirname $@)\ + && ./init.sh $(MODEL_DIR)\ + && echo "INFO: DDR4 model build passed."\ + || (echo "ERROR: DDR4 model build failed." && exit 2) + @ touch $@ + diff --git a/hdk/common/verif/scripts/init.sh b/hdk/common/verif/scripts/init.sh index e72c3192..ce70d347 100755 --- a/hdk/common/verif/scripts/init.sh +++ b/hdk/common/verif/scripts/init.sh @@ -16,12 +16,12 @@ # limitations under the License. if [[ ":$HDK_COMMON_DIR" == ":" ]]; then - echo "error: HDK_COMMON_DIR not set. Source hdk_setup.sh first." + echo "ERROR: HDK_COMMON_DIR not set. Source hdk_setup.sh first." exit 2 fi if [[ ":$VIVADO_VER" == ":" ]]; then - echo "error: VIVADO_VER not set. Source hdk_setup.sh first." + echo "ERROR: VIVADO_VER not set. Source hdk_setup.sh first." exit 2 fi @@ -39,12 +39,12 @@ lockfile_filename=$models_dir/build.lock # Prevent multiple users from building in the same directory. # Set the number of retries to 0 so that we will just fail # and let the other process complete the build. -if [ -e /bin/lockfile ]; then - if ! lockfile -r 0 $lockfile_filename; then - echo "error: $lockfile_filename exists" - echo "error: Another process is already building the models." - exit 2 - fi +if [ -e $lockfile_filename ]; then + echo "ERROR: $lockfile_filename exists" + echo "ERROR: Another process is already building the models." + exit 2 +else + touch $lockfile_filename fi echo "$VIVADO_VER" > $models_dir/.vivado_version diff --git a/hdk/common/verif/scripts/init.tcl b/hdk/common/verif/scripts/init.tcl index 682767b8..d04e4e9e 100644 --- a/hdk/common/verif/scripts/init.tcl +++ b/hdk/common/verif/scripts/init.tcl @@ -26,10 +26,11 @@ export_ip_user_files -of_objects [get_files $::env(HDK_SHELL_DIR)/design/ip/dd remove_files [get_files $::env(HDK_SHELL_DIR)/design/ip/ddr4_core/ddr4_core.xci] -quiet import_files -norecurse $::env(HDK_SHELL_DIR)/design/ip/ddr4_core/ddr4_core.xci -quiet -upgrade_ip [get_ips ddr4_core] -quiet +upgrade_ip -vlnv xilinx.com:ip:ddr4:2.2 [get_ips ddr4_core] -log ip_upgrade.log +generate_target all [get_files $::env(HDK_SHELL_DIR)/design/ip/ddr4_core/ddr4_core.xci] +report_ip_status -file ddr4_core_ip_report.txt open_example_project -force -dir ./tmp/tmp_ddr_ex [get_ips ddr4_core] - exit diff --git a/hdk/common/verif/tb/scripts/Makefile.common.inc b/hdk/common/verif/tb/scripts/Makefile.common.inc index b47e19cc..c9325c4e 100644 --- a/hdk/common/verif/tb/scripts/Makefile.common.inc +++ b/hdk/common/verif/tb/scripts/Makefile.common.inc @@ -98,6 +98,28 @@ endif endif endif +COMMON_LIBLISTS =\ + unisims_ver\ + unisim\ + unifast_ver\ + unifast\ + unimacro_ver\ + unimacro\ + secureip\ + xpm +COMMON_LIBLISTS +=\ + $(shell cd $(COMPLIB_DIR) >/dev/null 2>&1;\ + for i in\ + axi_register_slice_v2_1_\ + axi_infrastructure_v1_1_\ + axi_crossbar_v2_1_\ + axi_clock_converter_v2_1_\ + fifo_generator_v13_2_\ + fifo_generator_v13_1_\ + axi_data_fifo_v2_1_\ + generic_baseblocks_v2_1_;\ + do ls | grep $$i; done) + include $(HDK_COMMON_DIR)/verif/tb/scripts/Makefile.$(SIMULATOR).inc regression: $(SV_TEST_LIST) $(C_TEST_LIST) @@ -115,3 +137,7 @@ $(HDK_COMMON_DIR)/verif/models/sh_bfm/cl_ports_sh_bfm.vh: $(HDK_SHELL_DESIGN_DIR make_sim_dir: $(HDK_COMMON_DIR)/verif/models/sh_bfm/cl_ports_sh_bfm.vh mkdir -p $(SIM_ROOT) + +show_common_liblists: + @ for i in $(COMMON_LIBLISTS); do echo $$i; done + diff --git a/hdk/docs/AFI_Manifest.md b/hdk/docs/AFI_Manifest.md index 1e030854..e6509387 100644 --- a/hdk/docs/AFI_Manifest.md +++ b/hdk/docs/AFI_Manifest.md @@ -40,6 +40,10 @@ The manifest file is a text file formatted with key=value pairs. Some keys are m | vivado tool version | field value | |------------------- | -----------| +| 2020.2 | tool_version=v2020.2 | +| 2020.1 | tool_version=v2020.1 | +| 2019.2 | tool_version=v2019.2 | +| 2019.1 | tool_version=v2019.1 | | 2018.3 | tool_version=v2018.3 | | 2018.2 | tool_version=v2018.2 | | 2017.4 | tool_version=v2017.4 | diff --git a/hdk/docs/AWS_Shell_Interface_Specification.md b/hdk/docs/AWS_Shell_Interface_Specification.md index 827e9964..2733f9db 100644 --- a/hdk/docs/AWS_Shell_Interface_Specification.md +++ b/hdk/docs/AWS_Shell_Interface_Specification.md @@ -99,9 +99,9 @@ Starting from 1.4, The shell is reconfigurable, allowing, in most cases, develop **DW –** Doubleword: referring to 4-byte (32-bit) data size. -[**AXI-4** ARM Advanced eXtensible Interface.](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.set.amba/index.html) +[**AXI-4** ARM Advanced eXtensible Interface.](https://developer.arm.com/architectures/system-architectures/amba/amba-4) -[**AXI-4 Stream –** ARM Advanced eXtensible Stream Interface.](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.set.amba/index.html) +[**AXI-4 Stream –** ARM Advanced eXtensible Stream Interface.](https://developer.arm.com/architectures/system-architectures/amba/amba-4) **M –** Typically refers to the Master side of an AXI bus. @@ -222,9 +222,10 @@ These parameters are used to control which DDR controllers are impemented in the ... ``` -### DRAM Content Preservation between AFI Loads (Future) +### DRAM Content Preservation between AFI Loads -In future Shell versions a DRAM content preservation feature will be implemented. This feature allows the DDR state to be preserved when dynamically changing CL logic. The current Shell version will not guarantee preservation of DRAM contents if the CL logic is re-loaded. +Shell version 1.4 allows the DDR state to be preserved when dynamically changing CL logic. Any AFI generated with a v1.4 shell will enable DRAM content preservation by default. +Please refer to the [guide on how to use the DRAM data retention mode to preserve the content of DRAM across AFI loads](./data_retention.md) for more details on utilizing this feature. ## DMA @@ -316,7 +317,8 @@ Each DRAM interface is accessed via an AXI-4 interface: There is a single status signal that the DRAM interface is trained and ready for access. DDR access should be gated when the DRAM interface is not ready. The addressing uses ROW/COLUMN/BANK (Interleaved) mapping of AXI address to DRAM Row/Col/BankGroup. The Read and Write channels are serviced with round-robin arbitration (i.e. equal priority). -The DRAM interface uses the Xilinx DDR-4 Interface controller. The AXI-4 interface adheres to the Xilinx specification. Uncorrectable ECC errors are signaled with RRESP. ECC error status can be read using AWS Management Software APIs. +The DRAM interface uses the Xilinx DDR-4 Interface controller. The AXI-4 interface adheres to the Xilinx specification. Uncorrectable ECC errors are signaled with RRESP. A CL can be designed to handle ECC errors by monitoring RRESP on the DDR AXI interfaces. The CL will receive a SLVERR RRESP on an uncorrectable ECC error. +**NOTE:** Writing to a DDR location is required before reading the DDR location to initialize the ECC. False ECC errors may occur when un-initialized DDR locations are read. Additionally, there are three statistics interfaces between the Shell and CL (one for each CL DDR controller). If the DDR controllers are being used by the CL, then the interfaces must be connected between the Shell and the DRAM interface controller modules. @@ -648,12 +650,6 @@ It is ideal to place logic that interfaces to the shell in the same SLR as the S For the interfaces that are in both the MID/BOTTOM the recommendation is to use flops for pipelining, but don’t constrain to an SLR. Also it is recommended to not use the SDA interface because it spans two SLR's (use BAR1 or OCL instead). You can constrain logic to a particular SLR by creating PBLOCKs (one per SLR), and assigning logic to the PBLOCKs (refer to cl_dram_dma example [cl_pnr_user.xdc](../cl/examples/cl_dram_dma/build/constraints/cl_pnr_user.xdc)). Dataflow should be mapped so that SLR crossing is minimized (for example a pipeline should be organized such that successive stages are mostly in the same SLR). -Here’s an example post on the Xilinx forum which points to some documentation related to solving this: - - -There are some good timing closure tips in this methodology doc pointed to by the Xilinx forum post: - - ### Logic Levels You can report all paths that are greater than a certain number of logic levels. This can be used to iterate on timing in synthesis rather than waiting for place and route. For example at 250MHz a general rule of thumb is try to keep logic levels to around 10. The following commands report on all paths that have more than 10 logic levels: @@ -691,13 +687,10 @@ You have to be careful that pipeline registers do not infer a shift register com ### Vivado Analysis -Vivado has some nice analysis capabilities: +Vivado has the following analysis capabilities: * report_methodology (includes CDC report) * clock interaction report (see if paths between async clocks are erroneously being timed) * congestion heat map * power analysis * physical implementation analysis (placement, routing) * linked timing/schematic/physical views - - - diff --git a/hdk/docs/AWS_Shell_V1.4_Migration_Guidelines.md b/hdk/docs/AWS_Shell_V1.4_Migration_Guidelines.md index e2a9fab4..f8a4cb17 100644 --- a/hdk/docs/AWS_Shell_V1.4_Migration_Guidelines.md +++ b/hdk/docs/AWS_Shell_V1.4_Migration_Guidelines.md @@ -4,7 +4,7 @@ This document describes the changes required when migrating your design from shell v1.3 to shell v1.4. The HDK build scripts have changed to reflect the new v1.4 shell’s floorplan and newer Vivado tools. It’s strongly recommended users move to these scripts. Users who have already customized v1.3 scripts should diff those with the v1.4 scripts and be sure to include all new parameters that have been added to v1.4 scripts. -1. Upgrade Vivado Tools to version 2017.4 or later. Needs [FPGA DEVELOPER AMI 1.4 or later](../../README.md#overviewdevtools) +1. Upgrade Vivado Tools to version 2019.1 or later. Needs [FPGA DEVELOPER AMI 1.4 or later](../../README.md#fpga-developer-ami) 2. The hierarchy for CL & SH modules have changed. Now they are instantiated in "WRAPPER_INST" Module. The paths in your Build scripts, constraints & verification components have to be updated. diff --git a/hdk/docs/HOWTO_detect_shell_timeout.md b/hdk/docs/HOWTO_detect_shell_timeout.md index 2b7c750c..e304009c 100644 --- a/hdk/docs/HOWTO_detect_shell_timeout.md +++ b/hdk/docs/HOWTO_detect_shell_timeout.md @@ -1,18 +1,22 @@ # AXI Slave Timeouts (DMA_PCIS) -* The Shell provides a timeout mechanism which terminates any outstanding AXI transactions after 8 uS. There is a separate timeout per interface. Upon the first timeout, metrics registers are updated with the offending address and a counter is incremented. Upon further timeouts the counter is incremented. These metrics registers can be read via the fpga-describe-local-image found in [Amazon FPGA Image Management Tools README](../../sdk//userspace/fpga_mgmt_tools/README.md) +* The Shell provides a timeout mechanism which terminates any outstanding AXI transactions after 8 uS. + * There is a separate timeout per interface. + * Upon the first timeout, metrics registers are updated with the offending address and a counter is incremented. + * Upon further timeouts the counter is incremented. + * These metrics registers can be read via the [fpga-describe-local-image found in Amazon FPGA Image Management Tools](../../sdk/userspace/fpga_mgmt_tools/README.md) * Timeouts can occur for three reasons: - 1. The CL doesn’t respond to the address (reserved address space) + 1. The CL doesn't respond to the address (reserved address space) 2. The CL has a protocol violation on AXI which hangs the bus 3. The CL design’s latency is exceeding the timeout value. For example if the cycle is going to DDR, accumulated DDR arbitration and access latenencies may exceed the timeout value. * Best practice is to ensure addresses to reserved address space are fully decoded in your CL design. -* If accesing DDR, note DMA accesses to DDR will accumulate which can lead to timeouts if the transactions are not completed fast enough. This is especially true for CL designs operating at 125MHz or below. See [cl_dram_dma](../cl/examples/cl_dram_dma). This example illustrates best practice for DMA operations to DDR. +* If accessing DDR, note DMA accesses to DDR will accumulate which can lead to timeouts if the transactions are not completed fast enough. This is especially true for CL designs operating at 125MHz or below. See [cl_dram_dma](../cl/examples/cl_dram_dma). This example illustrates best practice for DMA operations to DDR. * CL designs which have multiple masters to the AXI "fabric" will also incur arbitration delays. * If you suspect a timeout, debug by reading the metrics registers. The saved offending address should help narrow whether this is to DDR or registers/RAMs inside the FPGA. The developer should investigate if design parameters allow for long latency responses to the offending address. If not, then the developer should investigate protocol violations. -* **WARNING**: Once a timeout happens the DMA/PCIS interface may no longer be functional and the AFI/Shell must be re-loaded. This can be done by adding the "-F" option to [fpga-load-local-image](../../sdk/userspace/fpga_mgmt_tools/README.md). +* **WARNING**: Once a timeout happens the DMA/PCIS interface may no longer be functional and the AFI/Shell must be re-loaded. This can be done by adding the "-F" option to [fpga-load-local-image](../../sdk/userspace/fpga_mgmt_tools/README.md). # AXI Master Timeouts (PCIM) * AXI Master transactions also have an 8us timeout. Timeout occur when the CL does not respond to some channel within 8us: @@ -74,9 +78,9 @@ DDR3 write-count=0 read-count=0 ``` -* For detailed infomation on metrics, see [Amazon FPGA Image Management Tools README](../../sdk//userspace/fpga_mgmt_tools/README.md) +* For detailed information on metrics, see [Amazon FPGA Image Management Tools README](../../sdk//userspace/fpga_mgmt_tools/README.md) ** NOTE **: The LSB 2 bits of timeout address (sdacl-slave-timeout-addr, virtual-jtag-slave-timeout-addr, ocl-slave-timeout-addr, bar1-slave-timeout-addr and dma-pcis-timeout-addr) in the metrics are used to report whether the timeout occurred due to READ or WRITE transaction. The bits in timeout address should be interpret as follows: > timeout-addr[1:0] == 2'b01 : Interface timed out on READ transaction (Could be either on AR or R channels). > timeout-addr[1:0] == 2'b10 : Interface timed out on WRITE transaction (Could be on AW, W or B channels). - > True 32bit aligned address that triggered first timeout = {timeout-addr[1:0], 2'b00}. \ No newline at end of file + > True 32bit aligned address that triggered first timeout = {timeout-addr[1:0], 2'b00}. diff --git a/hdk/docs/IPI_GUI_Vivado_Setup.md b/hdk/docs/IPI_GUI_Vivado_Setup.md index d5f5c803..1451248f 100644 --- a/hdk/docs/IPI_GUI_Vivado_Setup.md +++ b/hdk/docs/IPI_GUI_Vivado_Setup.md @@ -26,26 +26,29 @@ Open the following file in a text editor ~/.Xilinx/Vivado/init.tcl or ~/.Xilinx/ If either of these files does not exist, change directories into ~/.Xilinx/Vivado and use the following command to create the file. -touch Vivado_init.tcl +`touch Vivado_init.tcl` Get the absolute path of the $HDK\_SHELL\_DIR with the following command. -echo $HDK\_SHELL\_DIR +`echo $HDK_SHELL_DIR` -If your $HDK\_SHELL\_DIR is empty or does not list /$HDK\_SHELL\_DIR/, then you may need to source the [hdk_setup](../README.md). +**NOTE: If your $HDK\_SHELL\_DIR is empty or does not list /$HDK\_SHELL\_DIR/, then you may need to source the [hdk_setup](../README.md).** In init.tcl or Vivado\_init.tcl, add the following line based upon the $HDK\_SHELL\_DIR path. -source /hlx/hlx_setup.tcl +`source $::env(HDK_SHELL_DIR)/hlx/hlx_setup.tcl` -Everytime Vivado is loaded, this script will always be sourced and IP integrator features will be automatically loaded. Remove this line if you no longer wish to use HLx Flow. + +### Switching between HDK and HLx flows +* ~/.Xilinx/Vivado/init.tcl or ~/.Xilinx/Vivado/Vivado_init.tcl scripts are sourced when Vivado starts up. Once you go through the Linux Install setup, IP integrator features will be automatically loaded every time. +* If you wish to switch to the HDK flow, Please remove the `source $::env(HDK_SHELL_DIR)/hlx/hlx_setup.tcl` line from your init.tcl or Vivado\_init.tcl file # Windows Install -Download, install, and configure the license for Vivado SDx 2017.4 or Vivado 2018.2 or Vivado 2018.3 for Windows. More information is provided at: +Download, install, and configure the license for Vivado SDx 2017.4, 2018.2, 2018.3 or 2019.1 for Windows. More information is provided at: -[On-Premises Licensing Help](./on_premise_licensing_help.md) +[On-Premises Licensing Help](../../docs/on_premise_licensing_help.md) Clone the `https://github.com/aws/aws-fpga` repository either through Github Desktop or Download ZIP and extract to a new folder location on the Windows machine. This is the install location. diff --git a/hdk/docs/RTL_Simulating_CL_Designs.md b/hdk/docs/RTL_Simulating_CL_Designs.md index 35997127..c940c719 100644 --- a/hdk/docs/RTL_Simulating_CL_Designs.md +++ b/hdk/docs/RTL_Simulating_CL_Designs.md @@ -4,12 +4,12 @@ Developers tend to simulate their designs to validate the RTL design and functionality, before hitting the build stage and registering it with AWS EC2 as Amazon FPGA Image (AFI). AWS FPGA HDK comes with a shell simulation model that supports RTL-level simulation using Xilinx' Vivado XSIM, MentorGraphics' Questa, Cadence Incisive and Synopsys' VCS RTL simulators. See table below for supported simulator versions. -| 3rd party simulator Tool | 2017.4 Vivado tool | 2018.2 Vivado tool | 2018.3 Vivado tool | -|--------------------------|--------------------|--------------------|--------------------| -| Xilinx Vivado XSIM | Vivado v2017.4.op (64-bit) | Vivado v2018.2_AR71275_op (64-bit) | Vivado v2018.3.op (64-bit) | -| Synopsys VCS | vcs-mx/L-2016.06-1 | vcs-mx/N-2017.12-SP1-1 | vcs-mx/N-2017.12-SP2 | -| Mentor Graphics Questa | 10.6b | 10.6c_1 | 10.6c_1 | -| Cadence Incisive Enterprise Simulator(IES) | 15.20.063 | 15.20.063 | 15.20.063 | +| Simulator | Vivado 2019.1 | Vivado 2019.2 | Vivado 2020.1 | Vivado 2020.2 | +|-----------| --- | --- | --- | --- | +| Xilinx Vivado XSIM | Vivado v2019.1 | Vivado v2019.2 | Vivado v2020.1 | Vivado v2020.2 | +| Synopsys VCS | O-2018.09 | O-2018.09-SP2-1 | P-2019.06-SP1-1 | Q-2020.03 | +| Mentor Graphics Questa | 10.7c | 2019.2 | 2019.4 | 2020.2 | +| Cadence Incisive Enterprise Simulator(IES) | 15.20.065 | 15.20.073 | 15.20.079 | 15.20.083 | Developers can write their tests in SystemVerilog and/or C languages. If a developer chooses to use the supplied C framework, he/she can use the same C code for simulation and for runtime on your FPGA-enabled instance like F1. @@ -21,7 +21,7 @@ Developers can write their tests in SystemVerilog and/or C languages. If a devel One easy way is to have a pre-installed environment is to use the [AWS FPGA Developer AMI available on AWS Marketplace](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) which comes with pre-installed Vivado tools and license. -For developers who like to work on-premises or different AMI in the cloud, AWS recommends following the [required license for on-premise document](./on_premise_licensing_help.md). +For developers who like to work on-premises or different AMI in the cloud, AWS recommends following the [required license for on-premise document](../../docs/on_premise_licensing_help.md). Please refer to the [release notes](../../RELEASE_NOTES.md) or the [supported Vivado version](../../supported_vivado_versions.txt) for the exact version of Vivado tools, and the required license components. diff --git a/hdk/docs/Virtual_JTAG_XVC.md b/hdk/docs/Virtual_JTAG_XVC.md index 3dd7315b..f5121fb7 100644 --- a/hdk/docs/Virtual_JTAG_XVC.md +++ b/hdk/docs/Virtual_JTAG_XVC.md @@ -142,6 +142,7 @@ To connect the debug Xilinx Hardware Manager to Virtual JTAG XVC server on the t `> connect_hw_server -url :3121` +If the above command fails, it is most likely because hw_server is not running on target F1 instance. Please follow see this [FAQ](#hw_serverRunOnF1Instance) on how to start hw_server @@ -153,7 +154,7 @@ To connect the debug Xilinx Hardware Manager to Virtual JTAG XVC server on the t **NOTES:** -- If the above command fails, its most likely that either the virtual jtag server is not running, the IP/Port are wrong, or a firewall/security-group rule is blocking the connection. See the [FAQ](#faq) section in the end of this document. +- If the above command fails, its most likely that either the virtual jtag server or hw_server is not running, the IP/Port are wrong, or a firewall/security-group rule is blocking the connection. See the [FAQ](#faq) section in the end of this document. Upon successful connection, Vivado's Hardware panel will be populated with a debug bridge instance. @@ -187,7 +188,7 @@ The connection Vivado and the target instance can be terminated by closing the X # Embedding Debug Cores in the CL -Before beginning, it should be noted that the following only applies to the HDK flow. For adding debug cores to a design using SDAccel, see [Debug_RTL_Kernel.md](../../SDAccel/docs/Debug_RTL_Kernel.md) for instructions on how to do so. +> ⚠️ **NOTE:** Before beginning, it should be noted that the following only applies to the HDK flow. [SDAccel instructions](../../SDAccel/docs/Debug_RTL_Kernel.md) and [Vitis instructions](../../Vitis/docs/Debug_Vitis_Kernel.md) are also available. The Custom Logic (CL) is required to include the [CL Debug Bridge](../common/shell_v04261818/design/ip/cl_debug_bridge/sim/cl_debug_bridge.v) provided by AWS as part of the HDK, and any required standard Xilinx debug IP components like ILAs and VIOs. @@ -216,6 +217,7 @@ cl_debug_bridge CL_DEBUG_BRIDGE ( .bscanid(bscanid) ); ``` +**NOTE:** According to [UG908](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_2/ug908-vivado-programming-debugging.pdf), the debug hub clock should be atleast 2.5x faster than the JTAG clock frequency. The JTAG clock frequency is fixed in the AWS Shell at 31.25MHz. Therefore the frequency of the clock connected to the cl_debug_bridge should be at-least 2.5 x 31.25MHz = 78.125MHz. Otherwise the debug network will not work. However, the minimum clock frequency requirement does not apply for ILA and rest of the CL logic. If CL design is running on a slower clock from the available [clock_recipes](https://github.com/aws/aws-fpga/blob/master/hdk/docs/clock_recipes.csv) then care must be taken that cl_debug_bridge is clocked at 78.125MHz or above speed. The following list describes the steps to successfully setup debug in a CL: @@ -228,7 +230,6 @@ The following list describes the steps to successfully setup debug in a CL: # Frequently Asked Questions - **Q: Do I need to run Vivado or Hardware Manager on the target EC2 instance to debug?** @@ -238,6 +239,11 @@ No, you may run Vivado on a "remote" host as long as your instance/VPC has the r **Q: How do I configure Linux firewalls and EC2 network security groups to enable remote debug?** +If your OS has the `firewalld` service running, you can disable it for the time being for setting up remote debug by calling: +```sudo systemctl stop firewalld``` + +You will also have to allow incoming and outgoing traffic to TCP ports 3121 and 10201. +To open up incoming and outgoing traffic on those ports for your instance, please refer to the [EC2 Security Group documentation](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/security-group-rules-reference.html#sg-rules-other-instances) **Q: Can I have a secure connection (i.e. SSL/TLS) to the target FPGA-enable EC2 Instance running Virtual JTAG service?** @@ -249,26 +255,41 @@ You may use the ssh "port forwarding" option (-L) to forward connections from th No, you need the Vivado Lab Edition which does not require a license. - **Q: How do I stop the Virtual JTAG service on the target instance?** - +After starting the Virtual JTAG service, you can stop it by calling `Ctrl + C` from your keyboard. **Q: Can I debug multiple FPGAs on same target EC2 instance concurrently?** Yes, you must start the the `$ fpga-start-virtual-jtag` with a different Slot/Port for each FPGA. You can launch multiple Vivado sessions, and have each session connect to the corresponding TCP port associated with the FPGA. - + **Q: What are some of the best practices I should be aware when working with Virtual JTAG?** If you are running Vivado on a remote machine trying to connect to Virtual JTAG - we recommend running the hw_server on the F1 instance - to ensure optimal performance between Vivado and the Virtual JTAG server. - +On your target F1 Instance: +``` +[$] sudo su +[$]# hw_server & +****** Xilinx hw_server v2019.2 + **** Build date : Oct 24 2019 at 19:23:45 + ** Copyright 1986-2019 Xilinx, Inc. All Rights Reserved. + +INFO: hw_server application started +INFO: Use Ctrl-C to exit hw_server application + +INFO: To connect to this hw_server instance use url: TCP:ip-xxx-xx-xx-xxx.ec2.internal:3121 + +[$]# fpga-start-virtual-jtag -P 10201 -S 0 +Starting Virtual JTAG XVC Server for FPGA slot id 0, listening to TCP port 10201. +Press CTRL-C to stop the service. +``` **Q: Can other instances running on the same F1 server access the Virtual JTAG of my instance?** No, other customer instances running on the same F1 server do not have access to the Virtual JTAG of your instance. -**Q: I am getting this error:** +**Q: Why am I getting this error?** ``` % fpga-start-virtual-jtag -P 10201 -S 0 @@ -277,11 +298,24 @@ Press CTRL-C to stop the service. Error: (1) internal-error ``` -This could mean there is already a server running with thtat TCP port. Either find this process and kill it, or choose a different TCP port. +This could mean there is already a server running with that TCP port. Either find this process and kill it, or choose a different TCP port. +**Q: Why am I getting this error?** + +``` +ERROR: [Xicom 50-38] xicom: Device:0, user chain number:1, slave index:3. Reading intermittently wrong data from core. Try slower target speed. Make sure design meets timing requirements. +ERROR: [Xicom 50-38] xicom: Device:0, user chain number:1, slave index:3, is not a valid CseXsdb Slave core. +ERROR: [Labtools 27-3176] hw_server failed during internal command. +Resolution: Check that the hw_server is running and the hardware connectivity to the target + +``` + +This means the clock connected to the cl_debug_bridge module is slower than the required minimum of 78.125MHz. Please choose a faster clock to connect to your cl_debug_bridge. **Q: What is XVC and where can I learn about it?** Xilinc Virtual Cable (XVC) is a protocol for transferring JTAG commands over TCP/IP network connection between a debug tool (like Vivado Lab Edition Hardware Manager) and a debug target. More information including a link to the full specification for XVC version 1.0 is available [here](https://www.xilinx.com/products/intellectual-property/xvc.html). + + diff --git a/hdk/docs/images/AWS_Shell_CL_overview.jpg b/hdk/docs/images/AWS_Shell_CL_overview.jpg old mode 100755 new mode 100644 index 3369a711..cd1508ea Binary files a/hdk/docs/images/AWS_Shell_CL_overview.jpg and b/hdk/docs/images/AWS_Shell_CL_overview.jpg differ diff --git a/hdk/docs/load_times.md b/hdk/docs/load_times.md new file mode 100644 index 00000000..2d458dc4 --- /dev/null +++ b/hdk/docs/load_times.md @@ -0,0 +1,39 @@ +# Reducing AFI load times + +To support customers using multiple FPGA images in sequence, AWS strives to minimize the time to load an Amazon FPGA image (AFI). +Many of these improvements will be available to users with no action required through automatic improvements to Amazon systems, but customers can use AWS F1 features like caching and [data retention](data_retention.md) to maximize AFI pipeline performance. +Data retention (the -D flag) is especially valuable because it significantly reduces AFI load times, and can eliminate the time consumption of copying and reloading data from FPGA DRAM. + +Customers can view locally cached AFIs via fpga-describe-local-image, and request AFIs be cached without actually modifying the FPGA with the -p flag in fpga-describe-local-image. + +## Caching recently used AFIs +AWS will automatically cache the most recent 16 AFIs used on that FPGA slot. If more than 16 AFIs are loaded, the least recently used AFI will be removed from the cache. The cache will also be cleared when an FPGA slot is cleared with fpga-clear-local-image, or when an instance is stopped or terminated. + +## Prefetching an upcoming AFI +If more than 16 AFIs are needed for an AFI pipeline, customers will need to prefetch AFIs into the cache to maximize performance. Prefetching an AFI doesn't affect currently running FPGA images, so it is safe to prefetch an AFI while the currently running AFI is processing data. Prefetching just returns 0 without printing if the prefetch was successful, since it doesn't change the FPGA state. If the cache is already full of 16 AFIs, prefetching an AFI will remove the least recently used AFI from the cache. + +To prefetch an AFI into the cache, use fpga-load-local-image with the -P flag, for example: +``` +sudo fpga-load-local-image -S 0 -I agfi-0fcf87119b8e97bf3 -P +``` + +## Viewing cached AFIs +To see which AGFIs are cached on an FPGA slot, use fpga-describe-local-image with the -M flag: + +``` +sudo fpga-describe-local-image -S 0 -M +AFI 0 agfi-01dc2520aaf357e86 loaded 0 ok 0 0x04261818 +AFIDEVICE 0 0x1d0f 0xf001 0000:00:1d.0 +.... +Cached agfis: + agfi-0fcf87119b8e97bf3 + agfi-01dc2520aaf357e86 +``` + +## Other load time considerations + +To minimize AFI load time, in addition to caching the AGFI and using data retention (-D flag), also ensure: + +* The AFI being loaded is on the same shell as the previous AFI +* The AFI being loaded is using a new shell, especially shell 1.4 or above +* The FPGA image tools are up to date from Github diff --git a/hdk/docs/ppts/simulation.pptx b/hdk/docs/ppts/simulation.pptx deleted file mode 100644 index 657074a7..00000000 Binary files a/hdk/docs/ppts/simulation.pptx and /dev/null differ diff --git a/hdk/docs/ppts/simulation/Slide1.PNG b/hdk/docs/ppts/simulation/Slide1.PNG deleted file mode 100644 index 02f4e9b1..00000000 Binary files a/hdk/docs/ppts/simulation/Slide1.PNG and /dev/null differ diff --git a/hdk/docs/ppts/simulation/Slide2.PNG b/hdk/docs/ppts/simulation/Slide2.PNG deleted file mode 100644 index bbfd4e62..00000000 Binary files a/hdk/docs/ppts/simulation/Slide2.PNG and /dev/null differ diff --git a/hdk/docs/ppts/simulation/Slide3.PNG b/hdk/docs/ppts/simulation/Slide3.PNG deleted file mode 100644 index 4d91a413..00000000 Binary files a/hdk/docs/ppts/simulation/Slide3.PNG and /dev/null differ diff --git a/hdk/hdk_version.txt b/hdk/hdk_version.txt index 960d4ed1..8d821ae0 100644 --- a/hdk/hdk_version.txt +++ b/hdk/hdk_version.txt @@ -1 +1 @@ -HDK_VERSION=1.4.8 +HDK_VERSION=1.4.18 diff --git a/hdk/tests/simulation_tests/run_sim.sh b/hdk/tests/simulation_tests/run_sim.sh index 9c34a7cc..af00f668 100755 --- a/hdk/tests/simulation_tests/run_sim.sh +++ b/hdk/tests/simulation_tests/run_sim.sh @@ -36,9 +36,19 @@ case $key in ;; --simulator) simulator="$2" - shift - shift - ;; + shift + shift + ;; + --batch) + batch="$2" + shift + shift + ;; + --vivado-version) + vivado_version="$2" + shift + shift + ;; --test-type) test_type="$2" shift @@ -51,43 +61,55 @@ case $key in esac done +vivado_version=${vivado_version//./_} +if [ $batch == "TRUE" ]; then +# COMMAND="batch_submit.py -q vcs-lo --jd Cad-centos7_2 --jn github_regress_${test_name}_${test_type}_${vivado_version}_${simulator} --wait --echo -c make" +# COMMAND="sbatch -c 1 --mem 64GB -p regress -J github_regress_${test_name}_${test_type}_${vivado_version}_${simulator} -L VCSMXRunTime_Net -W -o ${test_name}_${test_type}_${simulator}.stdout.sim.log -e ${test_name}_${test_type}_${simulator}.stderr.sim.log sbatch_wrap.sh make" +COMMAND="srun -c 1 --mem 64GB -p regress -J github_regress_${test_name}_${test_type}_${vivado_version}_${simulator} -L VCSMXRunTime_Net make" + +else +COMMAND="make" +fi + +echo "$COMMAND" + # Run the test pushd $test_dir case "$simulator" in vcs) case "$test_type" in sv) - make TEST="$test_name" VCS=1 + $COMMAND TEST="$test_name" VCS=1 ;; sv_fast) - make TEST="$test_name" AXI_MEMORY_MODEL=1 VCS=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 VCS=1 ;; sv_fast_ecc_direct) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_DIRECT=1 ECC_ADDR_HI=1000 ECC_ADDR_LO=0 VCS=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_DIRECT=1 ECC_ADDR_HI=1000 ECC_ADDR_LO=0 VCS=1 ;; sv_fast_ecc_rnd) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 VCS=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 VCS=1 ;; sv_fast_ecc_rnd_100) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 VCS=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 VCS=1 ;; sv_fast_ecc_rnd_50) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=50 VCS=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=50 VCS=1 ;; sv_fast_ecc_rnd_10) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=10 VCS=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=10 VCS=1 ;; sv_fast_ecc_rnd_0) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=0 VCS=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=0 VCS=1 ;; sv_ddr_bkdr) - make TEST="$test_name" DDR_BKDR=1 VCS=1 + $COMMAND TEST="$test_name" DDR_BKDR=1 VCS=1 ;; vhdl) - make TEST="$test_name" VCS=1 + $COMMAND TEST="$test_name" VCS=1 ;; c) - make C_TEST="$test_name" VCS=1 + $COMMAND C_TEST="$test_name" VCS=1 ;; *) echo -e >&2 "ERROR: Invalid option: $1\n" @@ -98,37 +120,37 @@ case "$simulator" in ies) case "$test_type" in sv) - make TEST="$test_name" IES=1 + $COMMAND TEST="$test_name" IES=1 ;; sv_fast) - make TEST="$test_name" AXI_MEMORY_MODEL=1 IES=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 IES=1 ;; sv_fast_ecc_direct) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_DIRECT=1 ECC_ADDR_HI=1000 ECC_ADDR_LO=0 IES=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_DIRECT=1 ECC_ADDR_HI=1000 ECC_ADDR_LO=0 IES=1 ;; sv_fast_ecc_rnd) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 IES=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 IES=1 ;; sv_fast_ecc_rnd_100) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 IES=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 IES=1 ;; sv_fast_ecc_rnd_50) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=50 IES=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=50 IES=1 ;; sv_fast_ecc_rnd_10) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=10 IES=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=10 IES=1 ;; sv_fast_ecc_rnd_0) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=0 IES=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=0 IES=1 ;; sv_ddr_bkdr) - make TEST="$test_name" DDR_BKDR=1 IES=1 + $COMMAND TEST="$test_name" DDR_BKDR=1 IES=1 ;; vhdl) - make TEST="$test_name" IES=1 + $COMMAND TEST="$test_name" IES=1 ;; c) - make C_TEST="$test_name" IES=1 + $COMMAND C_TEST="$test_name" IES=1 ;; *) echo -e >&2 "ERROR: Invalid option: $1\n" @@ -139,37 +161,37 @@ case "$simulator" in questa) case "$test_type" in sv) - make TEST="$test_name" QUESTA=1 + $COMMAND TEST="$test_name" QUESTA=1 ;; sv_fast) - make TEST="$test_name" AXI_MEMORY_MODEL=1 QUESTA=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 QUESTA=1 ;; sv_fast_ecc_direct) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_DIRECT=1 ECC_ADDR_HI=1000 ECC_ADDR_LO=0 QUESTA=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_DIRECT=1 ECC_ADDR_HI=1000 ECC_ADDR_LO=0 QUESTA=1 ;; sv_fast_ecc_rnd) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 QUESTA=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 QUESTA=1 ;; sv_fast_ecc_rnd_100) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 QUESTA=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 QUESTA=1 ;; sv_fast_ecc_rnd_50) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=50 QUESTA=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=50 QUESTA=1 ;; sv_fast_ecc_rnd_10) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=10 QUESTA=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=10 QUESTA=1 ;; sv_fast_ecc_rnd_0) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=0 QUESTA=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=0 QUESTA=1 ;; sv_ddr_bkdr) - make TEST="$test_name" DDR_BKDR=1 QUESTA=1 + $COMMAND TEST="$test_name" DDR_BKDR=1 QUESTA=1 ;; vhdl) - make TEST="$test_name" QUESTA=1 + $COMMAND TEST="$test_name" QUESTA=1 ;; c) - make C_TEST="$test_name" QUESTA=1 + $COMMAND C_TEST="$test_name" QUESTA=1 ;; *) echo -e >&2 "ERROR: Invalid option: $1\n" @@ -180,37 +202,37 @@ case "$simulator" in *) case "$test_type" in sv) - make TEST="$test_name" + $COMMAND TEST="$test_name" ;; sv_fast) - make TEST="$test_name" AXI_MEMORY_MODEL=1 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ;; sv_fast_ecc_direct) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_DIRECT=1 ECC_ADDR_HI=1000 ECC_ADDR_LO=0 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_DIRECT=1 ECC_ADDR_HI=1000 ECC_ADDR_LO=0 ;; sv_fast_ecc_rnd) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 ;; sv_fast_ecc_rnd_100) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=100 ;; sv_fast_ecc_rnd_50) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=50 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=50 ;; sv_fast_ecc_rnd_10) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=10 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=10 ;; sv_fast_ecc_rnd_0) - make TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=0 + $COMMAND TEST="$test_name" AXI_MEMORY_MODEL=1 ECC_RAND=1 RND_ECC_WEIGHT=0 ;; sv_ddr_bkdr) - make TEST="$test_name" DDR_BKDR=1 + $COMMAND TEST="$test_name" DDR_BKDR=1 ;; vhdl) - make TEST="$test_name" + $COMMAND TEST="$test_name" ;; c) - make C_TEST="$test_name" + $COMMAND C_TEST="$test_name" ;; *) echo -e >&2 "ERROR: Invalid option: $1\n" diff --git a/hdk/tests/simulation_tests/test_sims.py b/hdk/tests/simulation_tests/test_sims.py index dada6e34..9e526d3d 100644 --- a/hdk/tests/simulation_tests/test_sims.py +++ b/hdk/tests/simulation_tests/test_sims.py @@ -39,11 +39,12 @@ class TestSims(AwsFpgaTestBase): """ Pytest test class. - + NOTE: Cannot have an __init__ method. """ ADD_SIMULATOR = True + ADD_BATCH = True @classmethod def setup_class(cls): @@ -121,7 +122,9 @@ def parse_simulation_output(cls, test_name, test_type, test_stdout, test_stderr) return return_dict - def run_sim(self, test_dir="", test_name="", test_type="", simulator=""): + def run_sim(self, test_dir="", test_name="", test_type="", simulator="", batch=""): + + vivado_version = os.environ.get('VIVADO_TOOL_VERSION', 'unknown') # Error on defaults if not(test_dir and test_name and test_type): @@ -131,12 +134,13 @@ def run_sim(self, test_dir="", test_name="", test_type="", simulator=""): '--test-name', test_name, '--test-dir', test_dir, '--test-type', test_type, - '--simulator', simulator + '--simulator', simulator, + '--batch', batch, + '--vivado-version', vivado_version ] (rc, stdout_lines, stderr_lines) = self.run_cmd(" ".join(command_line)) - vivado_version = os.environ.get('VIVADO_TOOL_VERSION', 'unknown') # write simulation output if simulator == "vivado": @@ -172,498 +176,505 @@ def run_sim(self, test_dir="", test_name="", test_type="", simulator=""): # cl_dram_dma sv - def test_cl_dram_dma__dram_dma__sv(self, simulator): + def test_cl_dram_dma__dram_dma__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma__sv_fast(self, simulator): + def test_cl_dram_dma__dram_dma__sv_fast(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma' test_type = 'sv_fast' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma_axi_mstr__sv(self, simulator): + def test_cl_dram_dma__dram_dma_axi_mstr__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_axi_mstr' test_type = 'sv' - - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) - def test_cl_dram_dma__dram_dma_rnd__sv(self, simulator): + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) + + def test_cl_dram_dma__dram_dma_rnd__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_rnd' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma_rnd__sv_fast(self, simulator): + def test_cl_dram_dma__dram_dma_rnd__sv_fast(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_rnd' test_type = 'sv_fast' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma_rnd__sv_fast_ecc_direct(self, simulator): + def test_cl_dram_dma__dram_dma_rnd__sv_fast_ecc_direct(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_rnd' test_type = 'sv_fast_ecc_direct' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma_rnd__sv_fast_ecc_rnd(self, simulator): + def test_cl_dram_dma__dram_dma_rnd__sv_fast_ecc_rnd(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_rnd' test_type = 'sv_fast_ecc_rnd' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma_4k_crossing__sv(self, simulator): + def test_cl_dram_dma__dram_dma_4k_crossing__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_4k_crossing' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma_single_beat_4k__sv(self, simulator): + def test_cl_dram_dma__dram_dma_single_beat_4k__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_single_beat_4k' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma_single_beat_4k__sv_fast(self, simulator): + def test_cl_dram_dma__dram_dma_single_beat_4k__sv_fast(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_single_beat_4k' test_type = 'sv_fast' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma_single_beat_4k__sv_fast_ecc_direct(self, simulator): + def test_cl_dram_dma__dram_dma_single_beat_4k__sv_fast_ecc_direct(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_single_beat_4k' test_type = 'sv_fast_ecc_direct' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma_single_beat_4k__sv_fast_ecc_rnd(self, simulator): + def test_cl_dram_dma__dram_dma_single_beat_4k__sv_fast_ecc_rnd(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_single_beat_4k' test_type = 'sv_fast_ecc_rnd' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dma_pcis_concurrent__sv(self, simulator): + def test_cl_dram_dma__dma_pcis_concurrent__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dma_pcis_concurrent' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dma_pcis_concurrent__sv_fast(self, simulator): + def test_cl_dram_dma__dma_pcis_concurrent__sv_fast(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dma_pcis_concurrent' test_type = 'sv_fast' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dma_pcis_concurrent__sv_fast_ecc_direct(self, simulator): + def test_cl_dram_dma__dma_pcis_concurrent__sv_fast_ecc_direct(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dma_pcis_concurrent' test_type = 'sv_fast_ecc_direct' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dma_pcis_concurrent__sv_fast_ecc_rnd(self, simulator): + def test_cl_dram_dma__dma_pcis_concurrent__sv_fast_ecc_rnd(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dma_pcis_concurrent' test_type = 'sv_fast_ecc_rnd' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dma_pcim_concurrent__sv(self, simulator): + def test_cl_dram_dma__host_pcim__sv(self, simulator, batch): + test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' + test_name = 'test_host_pcim' + test_type = 'sv' + + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) + + def test_cl_dram_dma__dma_pcim_concurrent__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dma_pcim_concurrent' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dma_pcim_concurrent__sv_fast(self, simulator): + def test_cl_dram_dma__dma_pcim_concurrent__sv_fast(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dma_pcim_concurrent' test_type = 'sv_fast' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) - def test_cl_dram_dma__dma_pcim_concurrent__sv_fast_ecc_direct(self, simulator): + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) + def test_cl_dram_dma__dma_pcim_concurrent__sv_fast_ecc_direct(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dma_pcim_concurrent' test_type = 'sv_fast_ecc_direct' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dma_pcim_concurrent__sv_fast_ecc_rnd(self, simulator): + def test_cl_dram_dma__dma_pcim_concurrent__sv_fast_ecc_rnd(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dma_pcim_concurrent' test_type = 'sv_fast_ecc_rnd' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dma_sda_concurrent__sv(self, simulator): + def test_cl_dram_dma__dma_sda_concurrent__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dma_sda_concurrent' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dma_sda_concurrent__sv_fast(self, simulator): + def test_cl_dram_dma__dma_sda_concurrent__sv_fast(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dma_sda_concurrent' test_type = 'sv_fast' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dma_sda_concurrent__sv_fast_ecc_direct(self, simulator): + def test_cl_dram_dma__dma_sda_concurrent__sv_fast_ecc_direct(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dma_sda_concurrent' test_type = 'sv_fast_ecc_direct' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) - def test_cl_dram_dma__dma_sda_concurrent__sv_fast_ecc_rnd(self, simulator): + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) + def test_cl_dram_dma__dma_sda_concurrent__sv_fast_ecc_rnd(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dma_sda_concurrent' test_type = 'sv_fast_ecc_rnd' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__ddr__sv(self, simulator): + def test_cl_dram_dma__ddr__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_ddr' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__clk_recipe__sv(self, simulator): + def test_cl_dram_dma__clk_recipe__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_clk_recipe' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__int__sv(self, simulator): + def test_cl_dram_dma__int__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_int' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke__sv(self, simulator): + def test_cl_dram_dma__peek_poke__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke__sv_fast(self, simulator): + def test_cl_dram_dma__peek_poke__sv_fast(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke' test_type = 'sv_fast' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke__sv_fast_ecc_direct(self, simulator): + def test_cl_dram_dma__peek_poke__sv_fast_ecc_direct(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke' test_type = 'sv_fast_ecc_direct' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke__sv_fast_ecc_rnd(self, simulator): + def test_cl_dram_dma__peek_poke__sv_fast_ecc_rnd(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke' test_type = 'sv_fast_ecc_rnd' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke_wc__sv(self, simulator): + def test_cl_dram_dma__peek_poke_wc__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke_wc' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke_wc__sv_fast(self, simulator): + def test_cl_dram_dma__peek_poke_wc__sv_fast(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke_wc' test_type = 'sv_fast' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke_wc__sv_fast_ecc_direct(self, simulator): + def test_cl_dram_dma__peek_poke_wc__sv_fast_ecc_direct(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke_wc' test_type = 'sv_fast_ecc_direct' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke_wc__sv_fast_ecc_rnd(self, simulator): + def test_cl_dram_dma__peek_poke_wc__sv_fast_ecc_rnd(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke_wc' test_type = 'sv_fast_ecc_rnd' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke_len__sv(self, simulator): + def test_cl_dram_dma__peek_poke_len__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke_len' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke_len__sv_fast(self, simulator): + def test_cl_dram_dma__peek_poke_len__sv_fast(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke_len' test_type = 'sv_fast' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke_len__sv_fast_ecc_direct(self, simulator): + def test_cl_dram_dma__peek_poke_len__sv_fast_ecc_direct(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke_len' test_type = 'sv_fast_ecc_direct' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke_len__sv_fast_ecc_rnd(self, simulator): + def test_cl_dram_dma__peek_poke_len__sv_fast_ecc_rnd(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke_len' test_type = 'sv_fast_ecc_rnd' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke_pcis_axsize__sv(self, simulator): + def test_cl_dram_dma__peek_poke_pcis_axsize__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke_pcis_axsize' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke_pcis_axsize__sv_fast(self, simulator): + def test_cl_dram_dma__peek_poke_pcis_axsize__sv_fast(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke_pcis_axsize' test_type = 'sv_fast' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke_pcis_axsize__sv_fast_ecc_direct(self, simulator): + def test_cl_dram_dma__peek_poke_pcis_axsize__sv_fast_ecc_direct(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke_pcis_axsize' test_type = 'sv_fast_ecc_direct' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__peek_poke_pcis_axsize__sv_fast_ecc_rnd(self, simulator): + def test_cl_dram_dma__peek_poke_pcis_axsize__sv_fast_ecc_rnd(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_peek_poke_pcis_axsize' test_type = 'sv_fast_ecc_rnd' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__ddr_peek_poke__sv(self, simulator): + def test_cl_dram_dma__ddr_peek_poke__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_ddr_peek_poke' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__ddr_peek_bdr_walking_ones__sv(self, simulator): + def test_cl_dram_dma__ddr_peek_bdr_walking_ones__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_ddr_peek_bdr_walking_ones' test_type = 'sv_ddr_bkdr' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma_dram_bdr_row_col_combo__sv(self, simulator): + def test_cl_dram_dma__dram_dma_dram_bdr_row_col_combo__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_dram_bdr_row_col_combo' test_type = 'sv_ddr_bkdr' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma_mem_model_bdr_wr__sv(self, simulator): + def test_cl_dram_dma__dram_dma_mem_model_bdr_wr__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_mem_model_bdr_wr' test_type = 'sv_fast' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma_mem_model_bdr_rd__sv(self, simulator): + def test_cl_dram_dma__dram_dma_mem_model_bdr_rd__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_mem_model_bdr_rd' test_type = 'sv_fast' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__axi_mstr_multi_rw__sv(self, simulator): + def test_cl_dram_dma__axi_mstr_multi_rw__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_axi_mstr_multi_rw' test_type = 'sv' - def test_cl_dram_dma__bar1__sv(self, simulator): + def test_cl_dram_dma__bar1__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_bar1' test_type = 'sv' - def test_cl_dram_dma__dram_dma_allgn_addr_4k__sv(self, simulator): + def test_cl_dram_dma__dram_dma_allgn_addr_4k__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_allgn_addr_4k' test_type = 'sv' - def test_ddr_peek_bdr_walking_ones__sv(self, simulator): + def test_ddr_peek_bdr_walking_ones__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_ddr_peek_bdr_walking_ones' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) # cl_uram_example c - def test_cl_uram_example__uram_example__c(self, simulator): + def test_cl_uram_example__uram_example__c(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_uram_example/verif/scripts' test_name = 'test_uram_example' test_type = 'c' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) - + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) + # cl_dram_dma c - def test_cl_dram_dma__sda__sv(self, simulator): + def test_cl_dram_dma__sda__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_sda' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) - def test_cl_dram_dma__dram_dma_hwsw_cosim__c(self, simulator): + def test_cl_dram_dma__dram_dma_hwsw_cosim__c(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_dram_dma/verif/scripts' test_name = 'test_dram_dma_hwsw_cosim' test_type = 'c' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) # cl_hello_world sv - def test_cl_hello_world__hello_world__sv(self, simulator): + def test_cl_hello_world__hello_world__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_hello_world/verif/scripts' test_name = 'test_hello_world' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) # cl_test_gl_cntr sv - def test_cl_hello_world__gl_cntr__sv(self, simulator): + def test_cl_hello_world__gl_cntr__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_hello_world/verif/scripts' test_name = 'test_gl_cntr' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) # cl_hello_world vhdl - def test_cl_hello_world__hello_world__vhdl(self, simulator): + def test_cl_vhdl_hello_world__hello_world__vhdl(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_hello_world_vhdl/verif/scripts' test_name = 'test_hello_world' test_type = 'vhdl' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) # cl_hello_world c - def test_cl_hello_world__hello_world__c(self, simulator): + def test_cl_hello_world__hello_world__c(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_hello_world/verif/scripts' test_name = 'test_hello_world' test_type = 'c' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) # cl_sde_c2h sv - def test_cl_sde__test_simple_c2h__sv(self, simulator): + def test_cl_sde__test_simple_c2h__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_sde/verif/scripts' test_name = 'test_simple_c2h' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) # cl_sde_h2c sv - def test_cl_sde__test_simple_h2c__sv(self, simulator): + def test_cl_sde__test_simple_h2c__sv(self, simulator, batch): test_dir = self.WORKSPACE + '/hdk/cl/examples/cl_sde/verif/scripts' test_name = 'test_simple_h2c' test_type = 'sv' - self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator) + self.run_sim(test_dir=test_dir, test_name=test_name, test_type=test_type, simulator=simulator, batch=batch) diff --git a/hdk/tests/test_gen_dcp.py b/hdk/tests/test_gen_dcp.py index 176e4a4c..e1c7e764 100644 --- a/hdk/tests/test_gen_dcp.py +++ b/hdk/tests/test_gen_dcp.py @@ -69,11 +69,14 @@ def setup_class(cls): def set_allowed_warnings(cls): cls.allowed_warnings = ( - (('.*',), r'^WARNING: \[Constraints 18-838\] Failed to create SRL placer macro for cell SH/SH/MGT_TOP'), + (('.*',), r'^WARNING: \[Constraints 18-838\] Failed to create SRL placer macro for cell SH/SH/MGT_TOP.*'), + (('.*',), r'^WARNING: \[Shape Builder 18-838\] Failed to create SRL placer macro for cell WRAPPER_INST/SH/SH/MGT_TOP.*'), + (('.*',), r'^WARNING: \[Common 17-576\] \'fanout_opt\' is deprecated.*'), (('.*',), r'^CRITICAL WARNING: \[Place 30-823\] Failed to process clock nets that should have matching clock routes\. Reason: Found incompatible user defined or fixed clock roots for related clocks \'CL/SH_DDR/ddr_cores\.DDR4'), (('.*',), r'^CRITICAL WARNING: \[Constraints 18-850\] Failed to place register with ASYNC_REG property shape that starts with register SH/SH/MGT_TOP/SH_ILA_0/inst/ila_core_inst/u_ila_reset_ctrl/asyncrounous_transfer\.arm_in_transfer_inst/dout_reg0_reg\. '), (('.*',), r'^CRITICAL WARNING: \[Constraints 18-850\] Failed to place register with ASYNC_REG property shape that starts with register SH/SH/MGT_TOP/SH_ILA_0/inst/ila_core_inst/capture_qual_ctrl_2_reg\[0\]\. '), (('.*',), r'^CRITICAL WARNING: \[Constraints 18-850\] Failed to place register with ASYNC_REG property shape that starts with register SH/SH/MGT_TOP/SH_ILA_0/inst/ila_core_inst/en_adv_trigger_2_reg\. '), + (('.*',), r'^CRITICAL WARNING: \[Shape Builder 18-850\] Failed to place register with ASYNC_REG property shape that starts with register WRAPPER_INST/SH/SH/MGT_TOP.*'), (('.*',), r'^CRITICAL WARNING: \[Vivado 12-1433\] Expecting a non-empty list of cells to be added to the pblock\. Please verify the correctness of the argument. \[/home/centos/src/project_data/workspace/test_develop_manual/hdk/cl/examples/cl_dram_dma/build/constraints/cl_pnr_user\.xdc:15'), (('.*',), r'^CRITICAL WARNING: \[filemgmt 20-1741\] File \'axi_register_slice_v2_1_vl_rfs.v\'.*'), (('.*',), r'^CRITICAL WARNING: \[filemgmt 20-1741\] File \'blk_mem_gen_v8_3_vhsyn_rfs.vhd\'.*'), @@ -81,16 +84,31 @@ def set_allowed_warnings(cls): (('.*',), r'^CRITICAL WARNING: \[Opt 31-430\].*'), (('.*',), r'WARNING: \[Vivado 12-3731\].*'), (('.*',), r'WARNING: \[Constraints 18-619\] A clock with name \'CLK_300M_DIMM._DP\'.*'), + (('.*',), r'WARNING: \[Constraints 18-5648\] .*'), (('.*',), r'WARNING: \[Vivado_Tcl 4-391\] The following IPs are missing output products for Implementation target. These output products could be required for synthesis, please generate the output products using the generate_target or synth_ip command before running synth_design.*'), (('.*',), r'WARNING: \[DRC RPBF-3\] IO port buffering.*'), (('.*',), r'WARNING: \[Place 46-14\] The placer has determined that this design is highly congested and may have difficulty routing. Run report_design_analysis -congestion for a detailed report\.'), (('.*',), r'WARNING: \[BD 41-1661\] .*'), (('.*',), r'WARNING: \[Vivado 12-584\] No ports matched \'tck\''), (('.*',), r'WARNING: \[Vivado 12-830\] No fanout objects found for'), - (('.*',), r'WARNING: \[Place 30-640\] Place Check.*'), + (('.*',), r'WARNING: \[Place 30-640\] Place Check.*'), (('.*',), r'WARNING: \[BD 41-2180\] Resetting the memory initialization file.*'), + (('.*',), r'WARNING: \[Synth 8-689\] .*'), + (('.*',), r'WARNING: \[Synth 8-6896\] .*'), + (('.*',), r'WARNING: \[Synth 8-7023\] .*'), + (('.*',), r'CRITICAL WARNING: \[DRC HDPR-113\] Check for INOUT ports in RP: Reconfigurable module WRAPPER_INST/SH contains an INOUT port named .*'), + (('.*',), r'WARNING: \[Synth 8-7071\] .*'), + (('.*',), r'WARNING: \[Synth 8-7129\] .*'), + (('.*',), r'WARNING: \[Route 35-3387\] .*'), + (('cl_sde_*',), r'WARNING: \[Vivado 12-180\] No cells matched .*'), + (('cl_sde_*',), r'WARNING: \[Vivado 12-1008\] No clocks found for command.*'), + (('cl_sde_*',), r'CRITICAL WARNING: \[Designutils 20-1280\] .*'), + (('cl_sde_*',), r'^CRITICAL WARNING: \[Constraints 18-952\] .*'), + (('cl_sde_*',), r'^CRITICAL WARNING: \[Vivado 12-1039\] .*'), + (('cl_sde_*',), r'^CRITICAL WARNING: \[Vivado 12-1433\] .*'), + (('cl_sde.*',), r'WARNING: \[Synth 8-6057\] Memory.*'), (('cl_dram_dma_A1_B2_C0_2_(CONGESTION|BASIC)',), r'^CRITICAL WARNING: \[Route 35-39\] The design did not meet timing requirements'), - (('cl_dram_dma_A1_B2_C0_2_(CONGESTION|TIMING)',), r'WARNING: \[Vivado 12-180\] No cells matched \'CL/CL_DMA_PCIS_SLV/CL_TST_DDR_B/CL_TST/sync_rst_n_reg\''), + (('cl_dram_dma_A1_B2_C0_2_(CONGESTION|TIMING)',), r'WARNING: \[Vivado 12-180\] No cells matched \'CL/CL_DMA_PCIS_SLV/CL_TST_DDR_B/CL_TST/sync_rst_n_reg\''), (('cl_dram_dma_*',), r'CRITICAL WARNING: \[Designutils 20-1280\] Could not find module \'bd_bf3f_microblaze_I_0\''), (('cl_dram_dma_*',), r'CRITICAL WARNING: \[Designutils 20-1280\] Could not find module \'bd_bf3f_rst_0_0\''), (('cl_dram_dma_*',), r'CRITICAL WARNING: \[Designutils 20-1280\] Could not find module \'bd_bf3f_ilmb_0\''), @@ -103,10 +121,12 @@ def set_allowed_warnings(cls): (('cl_dram_dma_*',), r'WARNING: \[Synth 8-6104\] Input port \'value\' has an internal driver .*'), (('cl_dram_dma_*',), r'WARNING: \[Vivado 12-180\] No cells matched .*'), (('cl_dram_dma_*',), r'WARNING: \[Vivado 12-1008\] No clocks found for command.*'), - (('cl_dram_dma_*',), r'WARNING: \[Memdata 28-146\] Could not find a netlist instance for the specified SCOPED_TO_REF value of: ddr4_core'), - (('cl_dram_dma_*',), r'WARNING: \[Memdata 28-146\] Could not find a netlist instance for the specified SCOPED_TO_REF value of: bd_bf3f'), + (('.*',), r'WARNING: \[Memdata 28-146\] Could not find a netlist instance for the specified SCOPED_TO_REF value of: ddr4_core'), + (('.*',), r'WARNING: \[Memdata 28-146\] Could not find a netlist instance for the specified SCOPED_TO_REF value of: bd_bf3f'), (('cl_dram_dma_*',), r'WARNING: \[Place 46-14\] The placer has determined'), (('cl_dram_dma_*',), r'WARNING: \[Synth 8-5856\]*'), + (('cl_dram_dma_*',), r'WARNING: \[Physopt 32-894\].*'), + (('cl_dram_dma_*',), r'CRITICAL WARNING: \[Vivado 12-1433\] Expecting a non-empty list of cells to be added to the pblock.*'), (('cl_hello_world_vhdl_A.*',), r'WARNING: \[Memdata 28-146\] Could not find a netlist instance for the specified SCOPED_TO_REF value of: ddr4_core'), (('cl_hello_world_vhdl_A.*',), r'WARNING: \[Memdata 28-146\] Could not find a netlist instance for the specified SCOPED_TO_REF value of: bd_bf3f'), (('cl_hello_world_vhdl_A.*',), r'CRITICAL WARNING: \[Designutils 20-1280\] Could not find module \'bd_bf3f_microblaze_I_0\''), diff --git a/hdk/tests/test_hdk_scripts.py b/hdk/tests/test_hdk_scripts.py index d6b49de2..cfa2c8a2 100644 --- a/hdk/tests/test_hdk_scripts.py +++ b/hdk/tests/test_hdk_scripts.py @@ -58,12 +58,6 @@ def test_create_fpga_image(self): def test_wait_for_afi(self): self.run_cmd("{}/shared/bin/scripts/wait_for_afi.py --afi {}".format(self.WORKSPACE, self.afi)) - def test_wait_for_afi_python27(self): - self.run_cmd("python2.7 {}/shared/bin/scripts/wait_for_afi.py --afi {}".format(self.WORKSPACE, self.afi)) - - def test_wait_for_afi_python34(self): - self.run_cmd("python3.4 {}/shared/bin/scripts/wait_for_afi.py --afi {}".format(self.WORKSPACE, self.afi)) - @pytest.mark.skip(reason="Not implemented") def test_notify_via_sns(self): assert False diff --git a/hdk/tests/test_load_afi.py b/hdk/tests/test_load_afi.py index 43ab95ed..c2d47f36 100644 --- a/hdk/tests/test_load_afi.py +++ b/hdk/tests/test_load_afi.py @@ -248,7 +248,7 @@ def check_runtime_software(self, cl, slot): assert find_fail_re.match(stdout_lines[-2]), "{} didn't fail. stdout:\n{}".format(command, "\n".join(stdout_lines)) elif re.match(r'cl_sde', cl): - (rc, stdout_lines, stderr_lines) = self.run_cmd("cd {}/hdk/cl/examples/{}/software/runtime".format( + (rc, stdout_lines, stderr_lines) = self.run_cmd("cd {}/hdk/cl/examples/{}/software/runtime && make clean && make all".format( self.WORKSPACE, cl), echo=True) assert rc == 0, "Runtime example failed." @@ -264,9 +264,12 @@ def base_test(self, cl, agfi, afi, install_xdma_driver, slots_to_test, option_ta slots_to_test = range(self.num_slots) # Make sure that the test can be built first - logger.info("Building runtime software") - (rc, stdout_lines, stderr_lines) = self.run_cmd("cd {}/hdk/cl/examples/{}/software/runtime && make -f Makefile SDK_DIR={}/sdk".format(self.WORKSPACE, cl, self.WORKSPACE)) - assert rc == 0, "Runtime software build failed." + if cl != 'cl_sde': + logger.info("Building runtime software") + (rc, stdout_lines, stderr_lines) = self.run_cmd("cd {}/hdk/cl/examples/{}/software/runtime && make -f Makefile SDK_DIR={}/sdk".format(self.WORKSPACE, cl, self.WORKSPACE)) + assert rc == 0, "Runtime software build failed." + else: + logger.info("cl_sde runtime test is app. No build needed") # Load the AFI onto all available FPGAs # This is required for the XDMA driver to correctly install for all slots @@ -294,6 +297,10 @@ def test_precompiled_cl_hello_world(self, xilinxVersion): cl = 'cl_hello_world' self.base_precompiled_test(cl, install_xdma_driver=False) + def test_precompiled_cl_sde(self, xilinxVersion): + cl = 'cl_sde' + self.base_precompiled_test(cl, install_xdma_driver=False) + @pytest.mark.parametrize("build_strategy", AwsFpgaTestBase.DCP_BUILD_STRATEGIES) @pytest.mark.parametrize("clock_recipe_c", sorted(AwsFpgaTestBase.DCP_CLOCK_RECIPES['C']['recipes'].keys())) @pytest.mark.parametrize("clock_recipe_b", sorted(AwsFpgaTestBase.DCP_CLOCK_RECIPES['B']['recipes'].keys())) diff --git a/hdk_setup.sh b/hdk_setup.sh index e3d8482c..7086176b 100644 --- a/hdk_setup.sh +++ b/hdk_setup.sh @@ -30,18 +30,6 @@ current_dir=$(pwd) debug=0 -# This function checks if an environment module exists -# Returns 0 if it exists, and returns 1 if it doesn't -function does_module_exist() { - - output=`/usr/bin/ls /usr/local/Modules/$MODULE_VERSION/modulefiles | grep $1` - - if [[ $output == "$1" ]]; then - return 0; - else - return 1; - fi -} function usage { echo -e "USAGE: source [\$AWS_FPGA_REPO_DIR/]$script_name [-d|-debug] [-h|-help]" @@ -88,7 +76,7 @@ hdk_shell_version=$(readlink $HDK_COMMON_DIR/shell_stable) debug_msg "Checking for Vivado install:" # before going too far make sure Vivado is available -if ! is_vivado_available; then +if ! exists vivado; then err_msg "Please install/enable Vivado." err_msg " If you are using the FPGA Developer AMI then please request support." return 1 @@ -127,10 +115,7 @@ debug_msg "Done setting environment variables."; # Download correct shell DCP info_msg "Using HDK shell version $hdk_shell_version" debug_msg "Checking HDK shell's checkpoint version" -hdk_shell_s3_bucket=aws-fpga-hdk-resources -declare -a s3_hdk_ltx_files=("cl_hello_world.debug_probes.ltx" - "cl_dram_dma.debug_probes.ltx" - ) +hdk_resources_s3_bucket=aws-fpga-hdk-resources # Shell files to be downloaded declare -a s3_hdk_files=("SH_CL_BB_routed.dcp" @@ -148,13 +133,14 @@ do fi hdk_shell_dir=$HDK_SHELL_DIR/build/$sub_dir/from_aws hdk_file=$hdk_shell_dir/$shell_file - s3_shell_dir=$hdk_shell_s3_bucket/hdk/$hdk_shell_version/build/$sub_dir/from_aws + s3_shell_dir=hdk/$hdk_shell_version/build/$sub_dir/from_aws # Download the sha256 if [ ! -e $hdk_shell_dir ]; then mkdir -p $hdk_shell_dir || { err_msg "Failed to create $hdk_shell_dir"; return 2; } fi # Use curl instead of AWS CLI so that credentials aren't required. - curl -s https://s3.amazonaws.com/$s3_shell_dir/$shell_file.sha256 -o $hdk_file.sha256 || { err_msg "Failed to download HDK shell's $shell_file version from $s3_shell_dir/$shell_file.sha256 -o $hdk_file.sha256"; return 2; } + debug_msg "curl -s https://$hdk_resources_s3_bucket.s3.amazonaws.com/$s3_shell_dir/$shell_file.sha256 -o $hdk_file.sha256" + curl -s https://$hdk_resources_s3_bucket.s3.amazonaws.com/$s3_shell_dir/$shell_file.sha256 -o $hdk_file.sha256 || { err_msg "Failed to download HDK shell's $shell_file version from $s3_shell_dir/$shell_file.sha256 -o $hdk_file.sha256"; return 2; } if grep -q ' /dev/null - # Run init.sh then clean-up - if ! $HDK_DIR/common/verif/scripts/init.sh $models_dir; then - err_msg "DDR4 model build failed." - err_msg " Build dir=$ddr4_build_dir" - popd &> /dev/null - return 2 - fi - info_msg "DDR4 model build passed." - popd &> /dev/null - rm -rf $ddr4_build_dir -else - debug_msg "DDR4 model files exist in "$ddr4_model_dir/". Skipping model creation step."; +if [[ $models_vivado_version != $VIVADO_VER ]] && [ ! -e $models_dir/build.lock ]; then + rm -rf $HDK_COMMON_DIR/verif/scripts/.done 2>&1 >/dev/null fi +ddr4_build_dir=$HDK_COMMON_DIR/verif/scripts/tmp +if [ -d $ddr4_build_dir ] && [ ! -e $models_dir/build.lock ]; then rm -rf $ddr4_build_dir; fi +if ! make -s -C $HDK_DIR/common/verif/scripts MODEL_DIR=$models_dir; then + err_msg " build dir=$ddr4_build_dir" + return 2 +fi + if [[ ":$CL_DIR" == ':' ]]; then - info_msg "ATTENTION: Don't forget to set the CL_DIR variable for the directory of your Custom Logic."; + info_msg "attention: don't forget to set the CL_DIR variable for the directory of your custom logic."; else info_msg "CL_DIR is $CL_DIR" if [ ! -d $CL_DIR ]; then - err_msg "CL_DIR doesn't exist. Set CL_DIR to a valid directory." + err_msg "CL_DIR doesn't exist. set CL_DIR to a valid directory." unset CL_DIR fi fi diff --git a/runtime_setup.sh b/runtime_setup.sh new file mode 100644 index 00000000..3c4de586 --- /dev/null +++ b/runtime_setup.sh @@ -0,0 +1,15 @@ +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/sdaccel_runtime_setup.sh b/sdaccel_runtime_setup.sh index 375ee292..bc735cd1 100644 --- a/sdaccel_runtime_setup.sh +++ b/sdaccel_runtime_setup.sh @@ -117,8 +117,8 @@ function check_kernel_ver { cat $AWS_FPGA_REPO_DIR/SDAccel/kernel_version.txt warn_msg "Xilinx Runtime not validated against your installed kernel version." fi - } + # Process command line args args=( "$@" ) for (( i = 0; i < ${#args[@]}; i++ )); do @@ -142,13 +142,15 @@ for (( i = 0; i < ${#args[@]}; i++ )); do done -if ! is_vivado_available; then +if ! exists vivado; then if [[ -z "${VIVADO_TOOL_VERSION}" ]]; then - err_msg " You are not using FPGA Developer AMI and VIVADO_TOOL_VERSION ENV variable is Empty. " - err_msg " ENV Variable VIVADO_TOOL_VERSION is required to be set for runtime " + err_msg " VIVADO_TOOL_VERSION ENV variable is not set." + err_msg " ENV Variable VIVADO_TOOL_VERSION needs to be set for runtime usage. " + err_msg " If AFI was generated using V2019.1 tools use the command : export VIVADO_TOOL_VERSION=2019.1 " + err_msg " If AFI was generated using V2018.3 tools use the command : export VIVADO_TOOL_VERSION=2018.3 " err_msg " If AFI was generated using V2018.2 tools use the command : export VIVADO_TOOL_VERSION=2018.2 " err_msg " If AFI was generated using V2017.4 tools use the command : export VIVADO_TOOL_VERSION=2017.4 " - err_msg " If you are using the FPGA Developer AMI then please request support on AWS FPGA Developers Forum." + err_msg " Please set VIVADO_TOOL_VERSION to the correct value and re-run script." return 1 else info_msg " VIVADO tools not found. Reading VIVADO_TOOL_VERSION ENV variable to determine runtime version... " @@ -167,7 +169,7 @@ check_kernel_ver check_xdma_driver check_edma_driver -if [[ "$VIVADO_TOOL_VERSION" =~ .*2018\.2.* || "$VIVADO_TOOL_VERSION" =~ .*2018\.3.* ]]; then +if [[ "$VIVADO_TOOL_VERSION" =~ .*2018\.2.* || "$VIVADO_TOOL_VERSION" =~ .*2018\.3.* || "$VIVADO_TOOL_VERSION" =~ .*2019\.1.* ]]; then info_msg "Xilinx Vivado version is $VIVADO_TOOL_VERSION" if [ $override == 1 ]; then @@ -190,12 +192,12 @@ if [[ "$VIVADO_TOOL_VERSION" =~ .*2018\.2.* || "$VIVADO_TOOL_VERSION" =~ .*2018\ if [ -f "/opt/xilinx/xrt/setup.sh" ]; then source /opt/xilinx/xrt/setup.sh else - err_msg " Cannot find /opt/xilinx/xrt/setup.sh " - err_msg " Please check XRT is installed correctly " - err_msg "Please Refer $AWS_FPGA_REPO/SDAccel/doc/XRT_installation_instructions.md for XRT installation instructions" + err_msg " Cannot find /opt/xilinx/xrt/setup.sh" + err_msg " Please check XRT is installed correctly" + err_msg " Please Refer to $AWS_FPGA_REPO/SDAccel/doc/XRT_installation_instructions.md for XRT installation instructions" return 1 fi - info_msg " XRT Runtime setup Done " + info_msg " XRT Runtime setup Done" else err_msg "$xrt_build_ver does not match recommended versions" cat $AWS_FPGA_REPO_DIR/SDAccel/sdaccel_xrt_version.txt @@ -209,7 +211,6 @@ if [[ "$VIVADO_TOOL_VERSION" =~ .*2018\.2.* || "$VIVADO_TOOL_VERSION" =~ .*2018\ fi else info_msg "Xilinx Vivado version is $VIVADO_TOOL_VERSION " - #info_msg " checking for file: /opt/Xilinx/SDx/${VIVADO_TOOL_VERSION}.rte.dyn/setup.sh" info_msg " Now checking XOCL driver..." check_xocl_driver if [ -f "/opt/Xilinx/SDx/${VIVADO_TOOL_VERSION}.rte.dyn/setup.sh" ]; then diff --git a/sdaccel_setup.sh b/sdaccel_setup.sh index a5d00445..bbce138b 100644 --- a/sdaccel_setup.sh +++ b/sdaccel_setup.sh @@ -158,11 +158,19 @@ for (( i = 0; i < ${#args[@]}; i++ )); do esac done + +if [[ ! -z "$XILINX_VITIS" ]]; then + debug_msg "XILINX_VITIS is set" + err_msg "XILINX_VITIS variable is set, but you are calling sdaccel_setup.sh. This likely means that you are calling source sdaccel_setup.sh with Xilinx Vitis installed. Xilinx has replaced SDAccel with Vitis from 2019.2 release onwards. Please checkout the Vitis README and flow instead." + return 1 +fi + # Check XILINX_SDX is set if ! check_set_xilinx_sdx; then return 1 fi + info_msg " XILINX_SDX is set to $XILINX_SDX" # Install patches as required. info_msg " Checking & installing required patches" @@ -171,7 +179,7 @@ setup_patches # Update Xilinx SDAccel Examples from GitHub info_msg "Using SDx $RELEASE_VER" -if [[ $RELEASE_VER =~ .*2017\.4.* || $RELEASE_VER =~ .*2018\.2.* || $RELEASE_VER =~ .*2018\.3.* ]]; then +if [[ $RELEASE_VER =~ .*2017\.4.* || $RELEASE_VER =~ .*2018\.2.* || $RELEASE_VER =~ .*2018\.3.* || $RELEASE_VER =~ .*2019\.1.* ]]; then info_msg "Updating Xilinx SDAccel Examples $RELEASE_VER" git submodule update --init -- SDAccel/examples/xilinx_$RELEASE_VER export VIVADO_TOOL_VER=$RELEASE_VER @@ -183,8 +191,8 @@ if [[ $RELEASE_VER =~ .*2017\.4.* || $RELEASE_VER =~ .*2018\.2.* || $RELEASE_VER fi ln -sf $SDACCEL_DIR/examples/xilinx_$RELEASE_VER $SDACCEL_DIR/examples/xilinx else - echo " $RELEASE_VER is not supported (2017.4, 2018.2 & 2018.3 are supported).\n" - exit 2 + echo " $RELEASE_VER is not supported (2017.4, 2018.2, 2018.3 and 2019.1 are supported).\n" + return 2 fi # settings64 removal - once we put this in the AMI, we will add a check @@ -194,7 +202,7 @@ export LD_LIBRARY_PATH=`$XILINX_SDX/bin/ldlibpath.sh $XILINX_SDX/lib/lnx64.o`:$X export LD_LIBRARY_PATH=$XILINX_SDX/lnx64/tools/opencv/:$LD_LIBRARY_PATH # add variable to allow compilation using 2017.4 and 2018.2 on newer OSes -export XOCC_ADD_OPTIONS="--xp param:compiler.useHlsGpp=1" +export XOCC_ADD_OPTIONS="--xp param:compiler.useHlsGpp=1 --xp param:compiler.minFrequencyLimit=80" # Check if internet connection is available if ! check_internet; then diff --git a/sdk/apps/byte_swapper/app.py b/sdk/apps/byte_swapper/app.py index a5178011..881f84ac 100644 --- a/sdk/apps/byte_swapper/app.py +++ b/sdk/apps/byte_swapper/app.py @@ -35,6 +35,7 @@ def get_fpga_slot_status(): def sig_handler(sig, frame): logger.info("Signal ({}) received!".format(sig)) fpga.clear_fpga() + fpga.clean_up() sys.exit(0) signal.signal(signal.SIGINT, sig_handler) diff --git a/sdk/apps/byte_swapper/fpga_funcs.py b/sdk/apps/byte_swapper/fpga_funcs.py index ad0b584b..6d8e2328 100644 --- a/sdk/apps/byte_swapper/fpga_funcs.py +++ b/sdk/apps/byte_swapper/fpga_funcs.py @@ -25,6 +25,7 @@ def __init__(self): self.AFI = os.environ['BSWAPPER_AFI'].encode('utf-8') self.register = int(os.environ['BSWAPPER_REG'], 0) logger.info("Slot to be loaded: {}\nAFI: {}\nregister: 0x{:x}".format(self.slot, self.AFI, self.register)) + fpga_mgmt.fpga_mgmt_init() def get_slot_info(self): logger.info("Retrieving slot info (slot={}).".format(self.slot)) @@ -81,3 +82,6 @@ def clear_fpga(self): ret = fpga_mgmt.fpga_mgmt_clear_local_image(self.slot) if ret != 0: raise RuntimeError("Could not clear slot {}".format(self.slot)) + + def clean_up(self): + fpga_mgmt.fpgma_mgmt_close() diff --git a/sdk/apps/virtual-ethernet/doc/Virtual_Ethernet_Application_Guide.md b/sdk/apps/virtual-ethernet/doc/Virtual_Ethernet_Application_Guide.md index 5f772f40..b13f484c 100644 --- a/sdk/apps/virtual-ethernet/doc/Virtual_Ethernet_Application_Guide.md +++ b/sdk/apps/virtual-ethernet/doc/Virtual_Ethernet_Application_Guide.md @@ -328,7 +328,7 @@ lspci -D | grep "1d0f:ec20" Within `/dpdk/drivers/net/spp` there is a file called `spp_ethdev.c`. Your Vendor and Device ID should be added to the following table in `spp_ethdev.c` and then DPDK should be recompiled. -``` +```C static const struct rte_pci_id pci_id_spp_map[] = { { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_SDE_LOOPBACK_CL) }, { RTE_PCI_DEVICE(, ) }, @@ -345,7 +345,7 @@ make Within `/dpdk/usertools` there is a file called `dpdk-devbind.py`. Your Vendor and Device ID should be added to the following table in `dpdk-devbind.py`. -``` +```python aws_fpga_sde = {'Class': '05', 'Vendor': '1d0f', 'Device': 'f002', 'SVendor': None, 'SDevice': None} = {'Class': '05', 'Vendor': '', 'Device': '', diff --git a/sdk/apps/virtual-ethernet/scripts/virtual_ethernet_install.py b/sdk/apps/virtual-ethernet/scripts/virtual_ethernet_install.py old mode 100644 new mode 100755 diff --git a/sdk/apps/virtual-ethernet/scripts/virtual_ethernet_pktgen_install.py b/sdk/apps/virtual-ethernet/scripts/virtual_ethernet_pktgen_install.py old mode 100644 new mode 100755 diff --git a/sdk/apps/virtual-ethernet/scripts/virtual_ethernet_pktgen_setup.py b/sdk/apps/virtual-ethernet/scripts/virtual_ethernet_pktgen_setup.py old mode 100644 new mode 100755 diff --git a/sdk/apps/virtual-ethernet/scripts/virtual_ethernet_setup.py b/sdk/apps/virtual-ethernet/scripts/virtual_ethernet_setup.py old mode 100644 new mode 100755 diff --git a/sdk/linux_kernel_drivers/xdma/10-xdma.rules b/sdk/linux_kernel_drivers/xdma/10-xdma.rules old mode 100755 new mode 100644 diff --git a/sdk/linux_kernel_drivers/xdma/Makefile b/sdk/linux_kernel_drivers/xdma/Makefile old mode 100755 new mode 100644 index 182051b7..2427268a --- a/sdk/linux_kernel_drivers/xdma/Makefile +++ b/sdk/linux_kernel_drivers/xdma/Makefile @@ -42,6 +42,7 @@ all : clean: $(MAKE) -C $(BUILDSYSTEM_DIR) M=$(PWD) clean + @/bin/rm -f *.ko modules.order *.mod.c *.o *.o.ur-safe .*.o.cmd install: all $(MAKE) -C $(BUILDSYSTEM_DIR) M=$(PWD) modules_install diff --git a/sdk/linux_kernel_drivers/xdma/README.md b/sdk/linux_kernel_drivers/xdma/README.md index 52519c27..1a818d3a 100644 --- a/sdk/linux_kernel_drivers/xdma/README.md +++ b/sdk/linux_kernel_drivers/xdma/README.md @@ -42,7 +42,7 @@ Before diving into the detail specification of the XDMA, here’s a short, intui The Program below uses standard Linux system call `open()` to create a file descriptor (fd), mapping to a pair of XDMA channels (one for `read()` and one for `write()`). The XDMA hardware engine is named the `XDMA Core`. The XDMA write channel is called H2C (Host to Core). The XDMA read channel is called C2H (Core to Host). The Core refers to the FPGA and the Host refers to the instance CPU. -``` +```C #include #include #include @@ -199,7 +199,7 @@ Only the POLLIN mask is supported and is used to notify that an event has occurr Refer to [User-defined interrupts events README](./user_defined_interrupts_README.md) for more details. -The application MUST issue a `pread` of the ready file descriptor to return and clear the `events_irq` variable within the XDMA driver in order to be notified of future user interrupts. An example of using `poll` and `pread` for user defined interrupts is provided within the test_dram_dma.c `interrupt_example()`. +The application MUST issue a `pread` of the ready file descriptor to return and clear the `events_irq` variable within the XDMA driver in order to be notified of future user interrupts. An example of using `poll` and `pread` for user defined interrupts is provided within the [test_dram_dma.c](../../../hdk/cl/examples/cl_dram_dma/software/runtime/test_dram_dma.c) `interrupt_example()`. ## Concurrency and Multi-Threading diff --git a/sdk/linux_kernel_drivers/xdma/cdev_bypass.c b/sdk/linux_kernel_drivers/xdma/cdev_bypass.c index 9ab445ea..5e40526b 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_bypass.c +++ b/sdk/linux_kernel_drivers/xdma/cdev_bypass.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -21,11 +21,12 @@ * Karen Xie * ******************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ #include "libxdma_api.h" #include "xdma_cdev.h" -#define write_register(v,mem,off) iowrite32(v, mem) +#define write_register(v, mem, off) iowrite32(v, mem) static int copy_desc_data(struct xdma_transfer *transfer, char __user *buf, size_t *buf_offset, size_t buf_size) @@ -34,8 +35,15 @@ static int copy_desc_data(struct xdma_transfer *transfer, char __user *buf, int copy_err; int rc = 0; - BUG_ON(!buf); - BUG_ON(!buf_offset); + if (!buf) { + pr_err("Invalid user buffer\n"); + return -EINVAL; + } + + if (!buf_offset) { + pr_err("Invalid user buffer offset\n"); + return -EINVAL; + } /* Fill user buffer with descriptor data */ for (i = 0; i < transfer->desc_num; i++) { @@ -76,7 +84,7 @@ static ssize_t char_bypass_read(struct file *file, char __user *buf, xdev = xcdev->xdev; engine = xcdev->engine; - dbg_sg("In char_bypass_read()\n"); + dbg_sg("In %s()\n", __func__); if (count & 3) { dbg_sg("Buffer size must be a multiple of 4 bytes\n"); @@ -119,7 +127,7 @@ static ssize_t char_bypass_write(struct file *file, const char __user *buf, struct xdma_cdev *xcdev = (struct xdma_cdev *)file->private_data; u32 desc_data; - u32 *bypass_addr; + void __iomem *bypass_addr; size_t buf_offset = 0; int rc = 0; int copy_err; @@ -145,18 +153,21 @@ static ssize_t char_bypass_write(struct file *file, const char __user *buf, return -ENODEV; } - dbg_sg("In char_bypass_write()\n"); + dbg_sg("In %s()\n", __func__); spin_lock(&engine->lock); /* Write descriptor data to the bypass BAR */ - bypass_addr = (u32 *)xdev->bar[xdev->bypass_bar_idx]; - bypass_addr += engine->bypass_offset; + bypass_addr = xdev->bar[xdev->bypass_bar_idx]; + bypass_addr = (void __iomem *)( + (u32 __iomem *)bypass_addr + engine->bypass_offset + ); while (buf_offset < count) { copy_err = copy_from_user(&desc_data, &buf[buf_offset], sizeof(u32)); if (!copy_err) { - write_register(desc_data, bypass_addr, bypass_addr - engine->bypass_offset); + write_register(desc_data, bypass_addr, + bypass_addr - engine->bypass_offset); buf_offset += sizeof(u32); rc = buf_offset; } else { @@ -188,5 +199,5 @@ static const struct file_operations bypass_fops = { void cdev_bypass_init(struct xdma_cdev *xcdev) { - cdev_init(&xcdev->cdev, &bypass_fops); + cdev_init(&xcdev->cdev, &bypass_fops); } diff --git a/sdk/linux_kernel_drivers/xdma/cdev_ctrl.c b/sdk/linux_kernel_drivers/xdma/cdev_ctrl.c index 404bbd7f..9fa7a352 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_ctrl.c +++ b/sdk/linux_kernel_drivers/xdma/cdev_ctrl.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -21,6 +21,7 @@ * Karen Xie * ******************************************************************************/ + #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ #include @@ -28,6 +29,12 @@ #include "xdma_cdev.h" #include "cdev_ctrl.h" +#if KERNEL_VERSION(5, 0, 0) <= LINUX_VERSION_CODE +#define xlx_access_ok(X, Y, Z) access_ok(Y, Z) +#else +#define xlx_access_ok(X, Y, Z) access_ok(X, Y, Z) +#endif + /* * character device file operations for control bus (through control bridge) */ @@ -36,13 +43,13 @@ static ssize_t char_ctrl_read(struct file *fp, char __user *buf, size_t count, { struct xdma_cdev *xcdev = (struct xdma_cdev *)fp->private_data; struct xdma_dev *xdev; - void *reg; + void __iomem *reg; u32 w; int rv; rv = xcdev_check(__func__, xcdev, 0); if (rv < 0) - return rv; + return rv; xdev = xcdev->xdev; /* only 32-bit aligned and 32-bit multiples */ @@ -52,8 +59,8 @@ static ssize_t char_ctrl_read(struct file *fp, char __user *buf, size_t count, reg = xdev->bar[xcdev->bar] + *pos; //w = read_register(reg); w = ioread32(reg); - dbg_sg("char_ctrl_read(@%p, count=%ld, pos=%d) value = 0x%08x\n", reg, - (long)count, (int)*pos, w); + dbg_sg("%s(@%p, count=%ld, pos=%d) value = 0x%08x\n", + __func__, reg, (long)count, (int)*pos, w); rv = copy_to_user(buf, &w, 4); if (rv) dbg_sg("Copy to userspace failed but continuing\n"); @@ -67,13 +74,13 @@ static ssize_t char_ctrl_write(struct file *file, const char __user *buf, { struct xdma_cdev *xcdev = (struct xdma_cdev *)file->private_data; struct xdma_dev *xdev; - void *reg; + void __iomem *reg; u32 w; int rv; rv = xcdev_check(__func__, xcdev, 0); if (rv < 0) - return rv; + return rv; xdev = xcdev->xdev; /* only 32-bit aligned and 32-bit multiples */ @@ -83,12 +90,11 @@ static ssize_t char_ctrl_write(struct file *file, const char __user *buf, /* first address is BAR base plus file position offset */ reg = xdev->bar[xcdev->bar] + *pos; rv = copy_from_user(&w, buf, 4); - if (rv) { + if (rv) pr_info("copy from user failed %d/4, but continuing.\n", rv); - } - dbg_sg("char_ctrl_write(0x%08x @%p, count=%ld, pos=%d)\n", w, reg, - (long)count, (int)*pos); + dbg_sg("%s(0x%08x @%p, count=%ld, pos=%d)\n", + __func__, w, reg, (long)count, (int)*pos); //write_register(w, reg); iowrite32(w, reg); *pos += 4; @@ -133,9 +139,13 @@ long char_ctrl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) rv = xcdev_check(__func__, xcdev, 0); if (rv < 0) - return rv; - xdev = xcdev->xdev; + return rv; + xdev = xcdev->xdev; + if (!xdev) { + pr_info("cmd %u, xdev NULL.\n", cmd); + return -EINVAL; + } pr_info("cmd 0x%x, xdev 0x%p, pdev 0x%p.\n", cmd, xdev, xdev->pdev); if (_IOC_TYPE(cmd) != XDMA_IOC_MAGIC) { @@ -145,10 +155,10 @@ long char_ctrl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } if (_IOC_DIR(cmd) & _IOC_READ) - result = !access_ok(VERIFY_WRITE, (void __user *)arg, + result = !xlx_access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd)); else if (_IOC_DIR(cmd) & _IOC_WRITE) - result = !access_ok(VERIFY_READ, (void __user *)arg, + result = !xlx_access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd)); if (result) { @@ -158,7 +168,7 @@ long char_ctrl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { case XDMA_IOCINFO: - if (copy_from_user((void *)&ioctl_obj, (void *) arg, + if (copy_from_user((void *)&ioctl_obj, (void __user *) arg, sizeof(struct xdma_ioc_base))) { pr_err("copy_from_user failed.\n"); return -EFAULT; @@ -169,20 +179,11 @@ long char_ctrl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ioctl_obj.magic, XDMA_XCL_MAGIC); return -ENOTTY; } - return version_ioctl(xcdev, (void __user *)arg); case XDMA_IOCOFFLINE: - if (!xdev) { - pr_info("cmd %u, xdev NULL.\n", cmd); - return -EINVAL; - } xdma_device_offline(xdev->pdev, xdev); break; case XDMA_IOCONLINE: - if (!xdev) { - pr_info("cmd %u, xdev NULL.\n", cmd); - return -EINVAL; - } xdma_device_online(xdev->pdev, xdev); break; default: @@ -205,7 +206,7 @@ int bridge_mmap(struct file *file, struct vm_area_struct *vma) rv = xcdev_check(__func__, xcdev, 0); if (rv < 0) - return rv; + return rv; xdev = xcdev->xdev; off = vma->vm_pgoff << PAGE_SHIFT; diff --git a/sdk/linux_kernel_drivers/xdma/cdev_ctrl.h b/sdk/linux_kernel_drivers/xdma/cdev_ctrl.h index 47e697cd..e0a9047b 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_ctrl.h +++ b/sdk/linux_kernel_drivers/xdma/cdev_ctrl.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -21,6 +21,7 @@ * Karen Xie * ******************************************************************************/ + #ifndef _XDMA_IOCALLS_POSIX_H_ #define _XDMA_IOCALLS_POSIX_H_ @@ -64,14 +65,14 @@ struct xdma_ioc_base { }; struct xdma_ioc_info { - struct xdma_ioc_base base; - unsigned short vendor; - unsigned short device; - unsigned short subsystem_vendor; - unsigned short subsystem_device; - unsigned int dma_engine_version; - unsigned int driver_version; - unsigned long long feature_id; + struct xdma_ioc_base base; + unsigned short vendor; + unsigned short device; + unsigned short subsystem_vendor; + unsigned short subsystem_device; + unsigned int dma_engine_version; + unsigned int driver_version; + unsigned long long feature_id; unsigned short domain; unsigned char bus; unsigned char dev; diff --git a/sdk/linux_kernel_drivers/xdma/cdev_events.c b/sdk/linux_kernel_drivers/xdma/cdev_events.c index 514aaf43..2b468ed7 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_events.c +++ b/sdk/linux_kernel_drivers/xdma/cdev_events.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -39,7 +39,7 @@ static ssize_t char_events_read(struct file *file, char __user *buf, rv = xcdev_check(__func__, xcdev, 0); if (rv < 0) - return rv; + return rv; user_irq = xcdev->user_irq; if (!user_irq) { pr_info("xcdev 0x%p, user_irq NULL.\n", xcdev); @@ -88,7 +88,7 @@ static unsigned int char_events_poll(struct file *file, poll_table *wait) rv = xcdev_check(__func__, xcdev, 0); if (rv < 0) - return rv; + return rv; user_irq = xcdev->user_irq; if (!user_irq) { pr_info("xcdev 0x%p, user_irq NULL.\n", xcdev); diff --git a/sdk/linux_kernel_drivers/xdma/cdev_sgdma.c b/sdk/linux_kernel_drivers/xdma/cdev_sgdma.c index 31854f92..2b615bb1 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_sgdma.c +++ b/sdk/linux_kernel_drivers/xdma/cdev_sgdma.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -23,6 +23,7 @@ ******************************************************************************/ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ +#include #include #include "libxdma_api.h" #include "xdma_cdev.h" @@ -56,12 +57,9 @@ static loff_t char_sgdma_llseek(struct file *file, loff_t off, int whence) if (newpos < 0) return -EINVAL; file->f_pos = newpos; - dbg_fops("char_sgdma_llseek: pos=%lld\n", (signed long long)newpos); -#if 0 - pr_err("0x%p, off 0x%lld, whence %d -> pos %lld.\n", + dbg_fops("0x%p, off %lld, whence %d -> pos %lld.\n", file, (signed long long)off, whence, (signed long long)off); -#endif return newpos; } @@ -84,7 +82,10 @@ static loff_t char_sgdma_llseek(struct file *file, loff_t off, int whence) static int check_transfer_align(struct xdma_engine *engine, const char __user *buf, size_t count, loff_t pos, int sync) { - BUG_ON(!engine); + if (!engine) { + pr_err("Invalid DMA engine\n"); + return -EINVAL; + } /* AXI ST or AXI MM non-incremental addressing mode? */ if (engine->non_incr_addr) { @@ -175,17 +176,16 @@ static int char_sgdma_map_user_buf_to_sgl(struct xdma_io_cb *cb, bool write) { struct sg_table *sgt = &cb->sgt; unsigned long len = cb->len; - char *buf = cb->buf; + char __user *buf = cb->buf; struct scatterlist *sg; - unsigned int pages_nr = (((unsigned long)buf + len + PAGE_SIZE -1) - + unsigned int pages_nr = (((unsigned long)buf + len + PAGE_SIZE - 1) - ((unsigned long)buf & PAGE_MASK)) >> PAGE_SHIFT; int i; int rv; - if (pages_nr == 0) { + if (pages_nr == 0) return -EINVAL; - } if (sg_alloc_table(sgt, pages_nr, GFP_KERNEL)) { pr_err("sgl OOM.\n"); @@ -211,8 +211,8 @@ static int char_sgdma_map_user_buf_to_sgl(struct xdma_io_cb *cb, bool write) if (rv != pages_nr) { pr_err("unable to pin down all %u user pages, %d.\n", pages_nr, rv); - rv = -EFAULT; cb->pages_nr = rv; + rv = -EFAULT; goto err_out; } @@ -228,9 +228,9 @@ static int char_sgdma_map_user_buf_to_sgl(struct xdma_io_cb *cb, bool write) sg = sgt->sgl; for (i = 0; i < pages_nr; i++, sg = sg_next(sg)) { - //unsigned int offset = (uintptr_t)buf & ~PAGE_MASK; unsigned int offset = offset_in_page(buf); - unsigned int nbytes = min_t(unsigned int, PAGE_SIZE - offset, len); + unsigned int nbytes = min_t(unsigned int, + PAGE_SIZE - offset, len); flush_dcache_page(cb->pages[i]); sg_set_page(sg, cb->pages[i], nbytes, offset); @@ -239,7 +239,11 @@ static int char_sgdma_map_user_buf_to_sgl(struct xdma_io_cb *cb, bool write) len -= nbytes; } - BUG_ON(len); + if (len) { + pr_err("Invalid user buffer length. Cannot map to sgl\n"); + return -EINVAL; + } + cb->pages_nr = pages_nr; return 0; @@ -249,7 +253,7 @@ static int char_sgdma_map_user_buf_to_sgl(struct xdma_io_cb *cb, bool write) return rv; } -static ssize_t char_sgdma_read_write(struct file *file, char __user *buf, +static ssize_t char_sgdma_read_write(struct file *file, const char __user *buf, size_t count, loff_t *pos, bool write) { int rv; @@ -283,7 +287,7 @@ static ssize_t char_sgdma_read_write(struct file *file, char __user *buf, } memset(&cb, 0, sizeof(struct xdma_io_cb)); - cb.buf = buf; + cb.buf = (char __user *)buf; cb.len = count; rv = char_sgdma_map_user_buf_to_sgl(&cb, write); if (rv < 0) @@ -291,10 +295,6 @@ static ssize_t char_sgdma_read_write(struct file *file, char __user *buf, res = xdma_xfer_submit(xdev, engine->channel, write, *pos, &cb.sgt, 0, sgdma_timeout * 1000); - //pr_err("xfer_submit return=%lld.\n", (s64)res); - - //interrupt_status(xdev); - char_sgdma_unmap_user_buf(&cb, write); return res; @@ -302,138 +302,134 @@ static ssize_t char_sgdma_read_write(struct file *file, char __user *buf, static ssize_t char_sgdma_write(struct file *file, const char __user *buf, - size_t count, loff_t *pos) + size_t count, loff_t *pos) { - return char_sgdma_read_write(file, (char *)buf, count, pos, 1); + return char_sgdma_read_write(file, (char *)buf, count, pos, 1); } static ssize_t char_sgdma_read(struct file *file, char __user *buf, - size_t count, loff_t *pos) + size_t count, loff_t *pos) { - struct xdma_cdev *xcdev = (struct xdma_cdev *)file->private_data; - struct xdma_engine *engine; - int rv; - - rv = xcdev_check(__func__, xcdev, 1); - if (rv < 0) - return rv; - - engine = xcdev->engine; - - if (engine->streaming && engine->dir == DMA_FROM_DEVICE) { - rv = xdma_cyclic_transfer_setup(engine); - if (rv < 0 && rv != -EBUSY) - return rv; - /* 600 sec. timeout */ - return xdma_engine_read_cyclic(engine, buf, count, 600000); - } - - return char_sgdma_read_write(file, (char *)buf, count, pos, 0); + return char_sgdma_read_write(file, (char *)buf, count, pos, 0); } static int ioctl_do_perf_start(struct xdma_engine *engine, unsigned long arg) { - int rv; - struct xdma_dev *xdev; - - BUG_ON(!engine); - xdev = engine->xdev; - BUG_ON(!xdev); - - /* performance measurement already running on this engine? */ - if (engine->xdma_perf) { - dbg_perf("IOCTL_XDMA_PERF_START failed!\n"); - dbg_perf("Perf measurement already seems to be running!\n"); - return -EBUSY; - } - engine->xdma_perf = kzalloc(sizeof(struct xdma_performance_ioctl), - GFP_KERNEL); + int rv; + struct xdma_dev *xdev; - if (!engine->xdma_perf) - return -ENOMEM; + if (!engine || !engine->xdev) { + pr_err("Invalid DMA engine 0x%p, 0x%p.\n", + engine, engine ? engine->xdev : NULL); + return -EINVAL; + } - rv = copy_from_user(engine->xdma_perf, - (struct xdma_performance_ioctl *)arg, - sizeof(struct xdma_performance_ioctl)); + xdev = engine->xdev; - if (rv < 0) { - dbg_perf("Failed to copy from user space 0x%lx\n", arg); - return -EINVAL; - } - if (engine->xdma_perf->version != IOCTL_XDMA_PERF_V1) { - dbg_perf("Unsupported IOCTL version %d\n", - engine->xdma_perf->version); - return -EINVAL; - } + /* if performance measurement already running on this engine */ + if (engine->xdma_perf) { + dbg_perf("Perf measurement already seems to be running!\n"); + return -EBUSY; + } + + engine->xdma_perf = kzalloc(sizeof(struct xdma_performance_ioctl), + GFP_KERNEL); + if (!engine->xdma_perf) + return -ENOMEM; + + rv = copy_from_user(engine->xdma_perf, + (struct xdma_performance_ioctl *)arg, + sizeof(struct xdma_performance_ioctl)); + if (rv < 0) { + dbg_perf("Failed to copy from user space 0x%lx\n", arg); + return -EINVAL; + } + if (engine->xdma_perf->version != IOCTL_XDMA_PERF_V1) { + dbg_perf("Unsupported IOCTL version %d\n", + engine->xdma_perf->version); + return -EINVAL; + } enable_perf(engine); - dbg_perf("transfer_size = %d\n", engine->xdma_perf->transfer_size); - /* initialize wait queue */ - init_waitqueue_head(&engine->xdma_perf_wq); - xdma_performance_submit(xdev, engine); + dbg_perf("transfer_size = %d\n", engine->xdma_perf->transfer_size); + + /* initialize wait queue */ + init_waitqueue_head(&engine->xdma_perf_wq); + + rv = xdma_performance_submit(xdev, engine); + if (rv < 0) + pr_err("Failed to submit dma performance\n"); - return 0; + return 0; } static int ioctl_do_perf_stop(struct xdma_engine *engine, unsigned long arg) { - struct xdma_transfer *transfer = NULL; - int rv; - - dbg_perf("IOCTL_XDMA_PERF_STOP\n"); + struct xdma_transfer *transfer = NULL; + int rv; - /* no performance measurement running on this engine? */ - if (!engine->xdma_perf) { - dbg_perf("No measurement in progress\n"); - return -EINVAL; - } + if (!engine) { + pr_err("DMA engine NULL.\n"); + return -EINVAL; + } - /* stop measurement */ - transfer = engine_cyclic_stop(engine); - dbg_perf("Waiting for measurement to stop\n"); + dbg_perf("IOCTL_XDMA_PERF_STOP\n"); - if (engine->xdma_perf) { - get_perf_stats(engine); + /* if no performance measurement running on this engine */ + if (!engine->xdma_perf) { + dbg_perf("No measurement in progress\n"); + return -EINVAL; + } - rv = copy_to_user((void __user *)arg, engine->xdma_perf, - sizeof(struct xdma_performance_ioctl)); - if (rv) { - dbg_perf("Error copying result to user\n"); - return -EINVAL; - } - } else { - dbg_perf("engine->xdma_perf == NULL?\n"); + /* stop measurement */ + dbg_perf("Waiting for measurement to stop\n"); + transfer = engine_cyclic_stop(engine); + if (!transfer) { + pr_err("Failed to stop cyclic transfer\n"); + return -EINVAL; } - kfree(engine->xdma_perf); - engine->xdma_perf = NULL; + get_perf_stats(engine); + rv = copy_to_user((void __user *)arg, engine->xdma_perf, + sizeof(struct xdma_performance_ioctl)); + if (rv) { + dbg_perf("Error copying result to user\n"); + return -EINVAL; + } + + kfree(transfer); + kfree(engine->xdma_perf); + engine->xdma_perf = NULL; - return 0; + return 0; } static int ioctl_do_perf_get(struct xdma_engine *engine, unsigned long arg) { - int rc; + int rc; - BUG_ON(!engine); + if (!engine) { + pr_err("DMA engine NULL.\n"); + return -EINVAL; + } - dbg_perf("IOCTL_XDMA_PERF_GET\n"); + dbg_perf("IOCTL_XDMA_PERF_GET\n"); - if (engine->xdma_perf) { + if (engine->xdma_perf) { get_perf_stats(engine); - rc = copy_to_user((void __user *)arg, engine->xdma_perf, - sizeof(struct xdma_performance_ioctl)); - if (rc) { - dbg_perf("Error copying result to user\n"); - return -EINVAL; - } - } else { - dbg_perf("engine->xdma_perf == NULL?\n"); - return -EPROTO; - } + rc = copy_to_user((void __user *)arg, engine->xdma_perf, + sizeof(struct xdma_performance_ioctl)); + if (rc) { + dbg_perf("Error copying result to user\n"); + return -EINVAL; + } + } else { + dbg_perf("engine->xdma_perf == NULL?\n"); + return -EPROTO; + } - return 0; + return 0; } static int ioctl_do_addrmode_set(struct xdma_engine *engine, unsigned long arg) @@ -446,7 +442,10 @@ static int ioctl_do_addrmode_get(struct xdma_engine *engine, unsigned long arg) int rv; unsigned long src; - BUG_ON(!engine); + if (!engine) { + pr_err("DMA engine NULL.\n"); + return -EINVAL; + } src = !!engine->non_incr_addr; dbg_perf("IOCTL_XDMA_ADDRMODE_GET\n"); @@ -455,22 +454,24 @@ static int ioctl_do_addrmode_get(struct xdma_engine *engine, unsigned long arg) return rv; } -static int ioctl_do_align_get(struct xdma_engine *engine, unsigned long arg) +static int ioctl_do_align_get(struct xdma_engine *engine, unsigned long arg) { - BUG_ON(!engine); + if (!engine) { + pr_err("DMA engine NULL.\n"); + return -EINVAL; + } dbg_perf("IOCTL_XDMA_ALIGN_GET\n"); return put_user(engine->addr_align, (int __user *)arg); } static long char_sgdma_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) + unsigned long arg) { struct xdma_cdev *xcdev = (struct xdma_cdev *)file->private_data; struct xdma_dev *xdev; struct xdma_engine *engine; - - int rv = 0; + int rv = 0; rv = xcdev_check(__func__, xcdev, 1); if (rv < 0) @@ -480,15 +481,15 @@ static long char_sgdma_ioctl(struct file *file, unsigned int cmd, engine = xcdev->engine; switch (cmd) { - case IOCTL_XDMA_PERF_START: - rv = ioctl_do_perf_start(engine, arg); - break; - case IOCTL_XDMA_PERF_STOP: - rv = ioctl_do_perf_stop(engine, arg); - break; - case IOCTL_XDMA_PERF_GET: - rv = ioctl_do_perf_get(engine, arg); - break; + case IOCTL_XDMA_PERF_START: + rv = ioctl_do_perf_start(engine, arg); + break; + case IOCTL_XDMA_PERF_STOP: + rv = ioctl_do_perf_stop(engine, arg); + break; + case IOCTL_XDMA_PERF_GET: + rv = ioctl_do_perf_get(engine, arg); + break; case IOCTL_XDMA_ADDRMODE_SET: rv = ioctl_do_addrmode_set(engine, arg); break; @@ -498,13 +499,13 @@ static long char_sgdma_ioctl(struct file *file, unsigned int cmd, case IOCTL_XDMA_ALIGN_GET: rv = ioctl_do_align_get(engine, arg); break; - default: - dbg_perf("Unsupported operation\n"); - rv = -EINVAL; - break; - } + default: + dbg_perf("Unsupported operation 0x%x.\n", cmd); + rv = -EINVAL; + break; + } - return rv; + return rv; } static int char_sgdma_open(struct inode *inode, struct file *file) @@ -520,8 +521,7 @@ static int char_sgdma_open(struct inode *inode, struct file *file) if (engine->streaming && engine->dir == DMA_FROM_DEVICE) { if (engine->device_open == 1) return -EBUSY; - else - engine->device_open = 1; + engine->device_open = 1; } return 0; diff --git a/sdk/linux_kernel_drivers/xdma/cdev_sgdma.h b/sdk/linux_kernel_drivers/xdma/cdev_sgdma.h index c67bf99f..4f0a38cc 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_sgdma.h +++ b/sdk/linux_kernel_drivers/xdma/cdev_sgdma.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -21,6 +21,7 @@ * Karen Xie * ******************************************************************************/ + #ifndef _XDMA_IOCALLS_POSIX_H_ #define _XDMA_IOCALLS_POSIX_H_ @@ -50,17 +51,16 @@ * _IOC_SIZE(nr) returns size */ -struct xdma_performance_ioctl -{ - /* IOCTL_XDMA_IOCTL_Vx */ - uint32_t version; - uint32_t transfer_size; - /* measurement */ - uint32_t stopped; - uint32_t iterations; - uint64_t clock_cycle_count; - uint64_t data_cycle_count; - uint64_t pending_count; +struct xdma_performance_ioctl { + /* IOCTL_XDMA_IOCTL_Vx */ + uint32_t version; + uint32_t transfer_size; + /* measurement */ + uint32_t stopped; + uint32_t iterations; + uint64_t clock_cycle_count; + uint64_t data_cycle_count; + uint64_t pending_count; }; diff --git a/sdk/linux_kernel_drivers/xdma/cdev_xvc.c b/sdk/linux_kernel_drivers/xdma/cdev_xvc.c index adafa7fc..e346bc79 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_xvc.c +++ b/sdk/linux_kernel_drivers/xdma/cdev_xvc.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -21,6 +21,7 @@ * Karen Xie * ******************************************************************************/ + #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ #include "xdma_cdev.h" @@ -36,30 +37,30 @@ #ifdef __REG_DEBUG__ /* SECTION: Function definitions */ -inline void __write_register(const char *fn, u32 value, void *base, +inline void __write_register(const char *fn, u32 value, void __iomem *base, unsigned int off) { - pr_info("%s: 0x%p, W reg 0x%lx, 0x%x.\n", fn, base, off, value); - iowrite32(value, base + off); + pr_info("%s: 0x%p, W reg 0x%lx, 0x%x.\n", fn, base, off, value); + iowrite32(value, base + off); } -inline u32 __read_register(const char *fn, void *base, unsigned int off) +inline u32 __read_register(const char *fn, void __iomem *base, unsigned int off) { u32 v = ioread32(base + off); - pr_info("%s: 0x%p, R reg 0x%lx, 0x%x.\n", fn, base, off, v); - return v; + pr_info("%s: 0x%p, R reg 0x%lx, 0x%x.\n", fn, base, off, v); + return v; } -#define write_register(v,base,off) __write_register(__func__, v, base, off) -#define read_register(base,off) __read_register(__func__, base, off) +#define write_register(v, base, off) __write_register(__func__, v, base, off) +#define read_register(base, off) __read_register(__func__, base, off) #else -#define write_register(v,base,off) iowrite32(v, (base) + (off)) -#define read_register(base,off) ioread32((base) + (off)) +#define write_register(v, base, off) iowrite32(v, (base) + (off)) +#define read_register(base, off) ioread32((base) + (off)) #endif /* #ifdef __REG_DEBUG__ */ -static int xvc_shift_bits(void *base, u32 tms_bits, u32 tdi_bits, +static int xvc_shift_bits(void __iomem *base, u32 tms_bits, u32 tdi_bits, u32 *tdo_bits) { u32 control; @@ -96,7 +97,7 @@ static int xvc_shift_bits(void *base, u32 tms_bits, u32 tdi_bits, static long xvc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct xdma_cdev *xcdev = (struct xdma_cdev *)filp->private_data; + struct xdma_cdev *xcdev = (struct xdma_cdev *)filp->private_data; struct xdma_dev *xdev; struct xvc_ioc xvc_obj; unsigned int opcode; @@ -113,6 +114,7 @@ static long xvc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) rv = xcdev_check(__func__, xcdev, 0); if (rv < 0) return rv; + xdev = xcdev->xdev; if (cmd != XDMA_IOCXVC) { @@ -139,7 +141,7 @@ static long xvc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) total_bits = xvc_obj.length; total_bytes = (total_bits + 7) >> 3; - buffer = (char *)kmalloc(total_bytes * 3, GFP_KERNEL); + buffer = kmalloc(total_bytes * 3, GFP_KERNEL); if (!buffer) { pr_info("OOM %u, op 0x%x, len %u bits, %u bytes.\n", 3 * total_bytes, opcode, total_bits, total_bytes); @@ -150,12 +152,16 @@ static long xvc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) tdi_buf = tms_buf + total_bytes; tdo_buf = tdi_buf + total_bytes; - rv = copy_from_user((void *)tms_buf, xvc_obj.tms_buf, total_bytes); + rv = copy_from_user((void *)tms_buf, + (const char __user *)xvc_obj.tms_buf, + total_bytes); if (rv) { pr_info("copy tmfs_buf failed: %d/%u.\n", rv, total_bytes); goto cleanup; } - rv = copy_from_user((void *)tdi_buf, xvc_obj.tdi_buf, total_bytes); + rv = copy_from_user((void *)tdi_buf, + (const char __user *)xvc_obj.tdi_buf, + total_bytes); if (rv) { pr_info("copy tdi_buf failed: %d/%u.\n", rv, total_bytes); goto cleanup; @@ -166,7 +172,8 @@ static long xvc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) iobase = xdev->bar[xcdev->bar] + xcdev->base; /* set length register to 32 initially if more than one - * word-transaction is to be done */ + * word-transaction is to be done + */ if (total_bits >= 32) write_register(0x20, iobase, XVC_BAR_LENGTH_REG); @@ -177,7 +184,7 @@ static long xvc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) u32 tms_store = 0; u32 tdi_store = 0; u32 tdo_store = 0; - + if (bits_left < 32) { /* set number of bits to shift out */ write_register(bits_left, iobase, XVC_BAR_LENGTH_REG); @@ -190,33 +197,35 @@ static long xvc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /* Shift data out and copy to output buffer */ rv = xvc_shift_bits(iobase, tms_store, tdi_store, &tdo_store); if (rv < 0) - goto cleanup; + break; memcpy(tdo_buf + bytes, &tdo_store, shift_bytes); } + if (rv < 0) + goto unlock; + /* if testing bar access swap tdi and tdo bufferes to "loopback" */ if (opcode == 0x2) { - char *tmp = tdo_buf; + unsigned char *tmp = tdo_buf; tdo_buf = tdi_buf; tdi_buf = tmp; } - rv = copy_to_user((void *)xvc_obj.tdo_buf, tdo_buf, total_bytes); - if (rv) { + rv = copy_to_user(xvc_obj.tdo_buf, (const void *)tdo_buf, total_bytes); + if (rv) pr_info("copy back tdo_buf failed: %d/%u.\n", rv, total_bytes); - rv = -EFAULT; - goto cleanup; - } - -cleanup: - if (buffer) - kfree(buffer); - mmiowb(); +unlock: +#if KERNEL_VERSION(5, 1, 0) >= LINUX_VERSION_CODE + wmb(); +#endif spin_unlock(&xcdev->lock); +cleanup: + kfree(buffer); + return rv; } @@ -224,10 +233,10 @@ static long xvc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) * character device file operations for the XVC */ static const struct file_operations xvc_fops = { - .owner = THIS_MODULE, - .open = char_open, - .release = char_close, - .unlocked_ioctl = xvc_ioctl, + .owner = THIS_MODULE, + .open = char_open, + .release = char_close, + .unlocked_ioctl = xvc_ioctl, }; void cdev_xvc_init(struct xdma_cdev *xcdev) diff --git a/sdk/linux_kernel_drivers/xdma/cdev_xvc.h b/sdk/linux_kernel_drivers/xdma/cdev_xvc.h index de9473a3..9a2b8689 100644 --- a/sdk/linux_kernel_drivers/xdma/cdev_xvc.h +++ b/sdk/linux_kernel_drivers/xdma/cdev_xvc.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -21,14 +21,14 @@ * Karen Xie * ******************************************************************************/ + #ifndef __XVC_IOCTL_H__ #define __XVC_IOCTL_H__ #include /* - * !!! TODO !!! - * need a better way set the bar offset dynamicly + * the bar offset can be changed at compile time via xvc_bar_offset */ #define XVC_BAR_OFFSET_DFLT 0x40000 /* DSA 4.0 */ @@ -37,9 +37,9 @@ struct xvc_ioc { unsigned int opcode; unsigned int length; - unsigned char *tms_buf; - unsigned char *tdi_buf; - unsigned char *tdo_buf; + const char __user *tms_buf; + const char __user *tdi_buf; + void __user *tdo_buf; }; #define XDMA_IOCXVC _IOWR(XVC_MAGIC, 1, struct xvc_ioc) diff --git a/sdk/linux_kernel_drivers/xdma/libxdma.c b/sdk/linux_kernel_drivers/xdma/libxdma.c old mode 100755 new mode 100644 index 32523e50..9dc519b6 --- a/sdk/linux_kernel_drivers/xdma/libxdma.c +++ b/sdk/linux_kernel_drivers/xdma/libxdma.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -36,23 +36,6 @@ #include "libxdma_api.h" #include "cdev_sgdma.h" -/* SECTION: Module licensing */ - -#ifdef __LIBXDMA_MOD__ -#include "version.h" -#define DRV_MODULE_NAME "libxdma" -#define DRV_MODULE_DESC "Xilinx XDMA Base Driver" -#define DRV_MODULE_RELDATE "Feb. 2017" - -static char version[] = - DRV_MODULE_DESC " " DRV_MODULE_NAME " v" DRV_MODULE_VERSION "\n"; - -MODULE_AUTHOR("Xilinx, Inc."); -MODULE_DESCRIPTION(DRV_MODULE_DESC); -MODULE_VERSION(DRV_MODULE_VERSION); -MODULE_LICENSE("GPL v2"); -#endif - /* Module Parameters */ static unsigned int poll_mode; module_param(poll_mode, uint, 0644); @@ -123,19 +106,18 @@ static inline void xdev_list_remove(struct xdma_dev *xdev) struct xdma_dev *xdev_find_by_pdev(struct pci_dev *pdev) { - struct xdma_dev *xdev, *tmp; - - mutex_lock(&xdev_mutex); - list_for_each_entry_safe(xdev, tmp, &xdev_list, list_head) { - if (xdev->pdev == pdev) { - mutex_unlock(&xdev_mutex); - return xdev; - } - } - mutex_unlock(&xdev_mutex); - return NULL; + struct xdma_dev *xdev, *tmp; + + mutex_lock(&xdev_mutex); + list_for_each_entry_safe(xdev, tmp, &xdev_list, list_head) { + if (xdev->pdev == pdev) { + mutex_unlock(&xdev_mutex); + return xdev; + } + } + mutex_unlock(&xdev_mutex); + return NULL; } -EXPORT_SYMBOL_GPL(xdev_find_by_pdev); static inline int debug_check_dev_hndl(const char *fname, struct pci_dev *pdev, void *hndl) @@ -167,9 +149,9 @@ inline void __write_register(const char *fn, u32 value, void *iomem, unsigned lo pr_err("%s: w reg 0x%lx(0x%p), 0x%x.\n", fn, off, iomem, value); iowrite32(value, iomem); } -#define write_register(v,mem,off) __write_register(__func__, v, mem, off) +#define write_register(v, mem, off) __write_register(__func__, v, mem, off) #else -#define write_register(v,mem,off) iowrite32(v, mem) +#define write_register(v, mem, off) iowrite32(v, mem) #endif inline u32 read_register(void *iomem) @@ -200,7 +182,7 @@ static void check_nonzero_interrupt_status(struct xdma_dev *xdev) w = read_register(®->channel_int_enable); if (w) - pr_info("%s xdma%d channel_int_enable = 0x%08x\n", + pr_info("%s xdma%d channel_int_enable = 0x%08x\n", dev_name(&xdev->pdev->dev), xdev->idx, w); w = read_register(®->user_int_request); @@ -296,14 +278,16 @@ void enable_perf(struct xdma_engine *engine) dbg_perf("IOCTL_XDMA_PERF_START\n"); } -EXPORT_SYMBOL_GPL(enable_perf); void get_perf_stats(struct xdma_engine *engine) { u32 hi; u32 lo; - BUG_ON(!engine); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return; + } if (!engine->xdma_perf) { pr_info("%s perf struct not set up.\n", engine->name); @@ -327,13 +311,15 @@ void get_perf_stats(struct xdma_engine *engine) lo = read_register(&engine->regs->perf_pnd_lo); engine->xdma_perf->pending_count = build_u64(hi, lo); } -EXPORT_SYMBOL_GPL(get_perf_stats); static void engine_reg_dump(struct xdma_engine *engine) { u32 w; - BUG_ON(!engine); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return; + } w = read_register(&engine->regs->identifier); pr_info("%s: ioread32(0x%p) = 0x%08x (id).\n", @@ -391,8 +377,8 @@ static void engine_status_dump(struct xdma_engine *engine) if ((v & XDMA_STAT_DESC_COMPLETED)) len += sprintf(buf + len, "DESC_COMPL,"); - /* common H2C & C2H */ - if ((v & XDMA_STAT_COMMON_ERR_MASK)) { + /* common H2C & C2H */ + if ((v & XDMA_STAT_COMMON_ERR_MASK)) { if ((v & XDMA_STAT_ALIGN_MISMATCH)) len += sprintf(buf + len, "ALIGN_MISMATCH "); if ((v & XDMA_STAT_MAGIC_STOPPED)) @@ -404,7 +390,7 @@ static void engine_status_dump(struct xdma_engine *engine) buf[len - 1] = ','; } - if ((engine->dir == DMA_TO_DEVICE)) { + if (engine->dir == DMA_TO_DEVICE) { /* H2C only */ if ((v & XDMA_STAT_H2C_R_ERR_MASK)) { len += sprintf(buf + len, "R:"); @@ -442,8 +428,8 @@ static void engine_status_dump(struct xdma_engine *engine) } } - /* common H2C & C2H */ - if ((v & XDMA_STAT_DESC_ERR_MASK)) { + /* common H2C & C2H */ + if ((v & XDMA_STAT_DESC_ERR_MASK)) { len += sprintf(buf + len, "DESC_ERR:"); if ((v & XDMA_STAT_DESC_UNSUPP_REQ)) len += sprintf(buf + len, "UNSUPP_REQ "); @@ -462,26 +448,24 @@ static void engine_status_dump(struct xdma_engine *engine) pr_info("%s\n", buffer); } -static u32 engine_status_read(struct xdma_engine *engine, bool clear, bool dump) +static void engine_status_read(struct xdma_engine *engine, bool clr, bool dump) { - u32 value; - - BUG_ON(!engine); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return; + } if (dump) engine_reg_dump(engine); /* read status register */ - if (clear) - value = engine->status = - read_register(&engine->regs->status_rc); + if (clr) + engine->status = read_register(&engine->regs->status_rc); else - value = engine->status = read_register(&engine->regs->status); + engine->status = read_register(&engine->regs->status); if (dump) engine_status_dump(engine); - - return value; } /** @@ -492,7 +476,10 @@ static void xdma_engine_stop(struct xdma_engine *engine) { u32 w; - BUG_ON(!engine); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return; + } dbg_tfr("xdma_engine_stop(engine=%p)\n", engine); w = 0; @@ -526,7 +513,10 @@ static void engine_start_mode_config(struct xdma_engine *engine) { u32 w; - BUG_ON(!engine); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return; + } /* If a perf test is running, enable the engine interrupts */ if (engine->xdma_perf) { @@ -559,11 +549,10 @@ static void engine_start_mode_config(struct xdma_engine *engine) if ((engine->streaming && (engine->dir == DMA_FROM_DEVICE)) || (engine->xdma_perf)) w |= (u32)XDMA_CTRL_IE_IDLE_STOPPED; - - /* set non-incremental addressing mode */ - if (engine->non_incr_addr) - w |= (u32)XDMA_CTRL_NON_INCR_ADDR; } + /* set non-incremental addressing mode */ + if (engine->non_incr_addr) + w |= (u32)XDMA_CTRL_NON_INCR_ADDR; dbg_tfr("iowrite32(0x%08x to 0x%p) (control)\n", w, (void *)&engine->regs->control); @@ -601,13 +590,22 @@ static struct xdma_transfer *engine_start(struct xdma_engine *engine) int extra_adj = 0; /* engine must be idle */ - BUG_ON(engine->running); + if (unlikely(!engine || engine->running)) { + pr_err("engine 0x%p running.\n", engine); + return NULL; + } /* engine transfer queue must not be empty */ - BUG_ON(list_empty(&engine->transfer_list)); + if (unlikely(list_empty(&engine->transfer_list))) { + pr_err("engine %s queue empty.\n", engine->name); + return NULL; + } /* inspect first transfer queued on the engine */ transfer = list_entry(engine->transfer_list.next, struct xdma_transfer, entry); - BUG_ON(!transfer); + if (unlikely(!transfer)) { + pr_err("engine %s no xfer queued.\n", engine->name); + return NULL; + } /* engine is no longer shutdown */ engine->shutdown = ENGINE_SHUTDOWN_NONE; @@ -645,8 +643,9 @@ static struct xdma_transfer *engine_start(struct xdma_engine *engine) dbg_tfr("ioread32(0x%p) (dummy read flushes writes).\n", &engine->regs->status); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(5, 1, 0) mmiowb(); - +#endif engine_start_mode_config(engine); engine_status_read(engine, 0, 0); @@ -679,7 +678,10 @@ static void engine_service_shutdown(struct xdma_engine *engine) struct xdma_transfer *engine_transfer_completion(struct xdma_engine *engine, struct xdma_transfer *transfer) { - BUG_ON(!engine); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return NULL; + } if (unlikely(!transfer)) { pr_info("%s: xfer empty.\n", engine->name); @@ -696,12 +698,9 @@ struct xdma_transfer *engine_transfer_completion(struct xdma_engine *engine, struct xdma_transfer *engine_service_transfer_list(struct xdma_engine *engine, struct xdma_transfer *transfer, u32 *pdesc_completed) { - BUG_ON(!engine); - BUG_ON(!pdesc_completed); - - if (unlikely(!transfer)) { - pr_info("%s xfer empty, pdesc completed %u.\n", - engine->name, *pdesc_completed); + if (unlikely(!engine || !pdesc_completed || !transfer)) { + pr_err("engine 0x%p, pdesc_completed 0x%p, xfer 0x%p.\n", + engine, pdesc_completed, transfer); return NULL; } @@ -752,16 +751,16 @@ static void engine_err_handle(struct xdma_engine *engine, */ if (engine->status & XDMA_STAT_BUSY) { value = read_register(&engine->regs->status); - if ((value & XDMA_STAT_BUSY) && printk_ratelimit()) - pr_info("%s has errors but is still BUSY\n", - engine->name); + if ((value & XDMA_STAT_BUSY)) + printk_ratelimited(KERN_INFO + "%s has errors but is still BUSY\n", + engine->name); } - if (printk_ratelimit()) { - pr_info("%s, s 0x%x, aborted xfer 0x%p, cmpl %d/%d\n", + printk_ratelimited(KERN_INFO + "%s, s 0x%x, aborted xfer 0x%p, cmpl %d/%d\n", engine->name, engine->status, transfer, desc_completed, transfer->desc_num); - } /* mark transfer as failed */ transfer->state = TRANSFER_STATE_FAILED; @@ -771,72 +770,71 @@ static void engine_err_handle(struct xdma_engine *engine, struct xdma_transfer *engine_service_final_transfer(struct xdma_engine *engine, struct xdma_transfer *transfer, u32 *pdesc_completed) { - BUG_ON(!engine); - BUG_ON(!pdesc_completed); - - /* inspect the current transfer */ - if (unlikely(!transfer)) { - pr_info("%s xfer empty, pdesc completed %u.\n", - engine->name, *pdesc_completed); + if (unlikely(!engine || !pdesc_completed || !transfer)) { + pr_err("engine 0x%p, pdesc_completed 0x%p, xfer 0x%p.\n", + engine, pdesc_completed, transfer); return NULL; - } else { - if (((engine->dir == DMA_FROM_DEVICE) && - (engine->status & XDMA_STAT_C2H_ERR_MASK)) || - ((engine->dir == DMA_TO_DEVICE) && - (engine->status & XDMA_STAT_H2C_ERR_MASK))) { - pr_info("engine %s, status error 0x%x.\n", - engine->name, engine->status); - engine_status_dump(engine); - engine_err_handle(engine, transfer, *pdesc_completed); - goto transfer_del; - } + } + /* inspect the current transfer */ + if (((engine->dir == DMA_FROM_DEVICE) && + (engine->status & XDMA_STAT_C2H_ERR_MASK)) || + ((engine->dir == DMA_TO_DEVICE) && + (engine->status & XDMA_STAT_H2C_ERR_MASK))) { + pr_info("engine %s, status error 0x%x.\n", + engine->name, engine->status); + engine_status_dump(engine); + engine_err_handle(engine, transfer, *pdesc_completed); + goto transfer_del; + } - if (engine->status & XDMA_STAT_BUSY) - pr_debug("engine %s is unexpectedly busy - ignoring\n", - engine->name); + if (engine->status & XDMA_STAT_BUSY) + pr_debug("engine %s is unexpectedly busy - ignoring\n", + engine->name); - /* the engine stopped on current transfer? */ - if (*pdesc_completed < transfer->desc_num) { - transfer->state = TRANSFER_STATE_FAILED; - pr_info("%s, xfer 0x%p, stopped half-way, %d/%d.\n", - engine->name, transfer, *pdesc_completed, - transfer->desc_num); - } else { - dbg_tfr("engine %s completed transfer\n", engine->name); - dbg_tfr("Completed transfer ID = 0x%p\n", transfer); - dbg_tfr("*pdesc_completed=%d, transfer->desc_num=%d", - *pdesc_completed, transfer->desc_num); - - if (!transfer->cyclic) { - /* - * if the engine stopped on this transfer, - * it should be the last - */ - WARN_ON(*pdesc_completed > transfer->desc_num); - } - /* mark transfer as succesfully completed */ - transfer->state = TRANSFER_STATE_COMPLETED; + /* the engine stopped on current transfer? */ + if (*pdesc_completed < transfer->desc_num) { + transfer->state = TRANSFER_STATE_FAILED; + pr_info("%s, xfer 0x%p, stopped half-way, %d/%d.\n", + engine->name, transfer, *pdesc_completed, + transfer->desc_num); + } else { + dbg_tfr("engine %s completed transfer\n", engine->name); + dbg_tfr("Completed transfer ID = 0x%p\n", transfer); + dbg_tfr("*pdesc_completed=%d, transfer->desc_num=%d", + *pdesc_completed, transfer->desc_num); + + if (!transfer->cyclic) { + /* + * if the engine stopped on this transfer, + * it should be the last + */ + WARN_ON(*pdesc_completed > transfer->desc_num); } + /* mark transfer as succesfully completed */ + transfer->state = TRANSFER_STATE_COMPLETED; + } transfer_del: - /* remove completed transfer from list */ - list_del(engine->transfer_list.next); - /* add to dequeued number of descriptors during this run */ - engine->desc_dequeued += transfer->desc_num; + /* remove completed transfer from list */ + list_del(engine->transfer_list.next); + /* add to dequeued number of descriptors during this run */ + engine->desc_dequeued += transfer->desc_num; - /* - * Complete transfer - sets transfer to NULL if an asynchronous - * transfer has completed - */ - transfer = engine_transfer_completion(engine, transfer); - } + /* + * Complete transfer - sets transfer to NULL if an asynchronous + * transfer has completed + */ + transfer = engine_transfer_completion(engine, transfer); return transfer; } static void engine_service_perf(struct xdma_engine *engine, u32 desc_completed) { - BUG_ON(!engine); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return; + } /* performance measurement is running? */ if (engine->xdma_perf) { @@ -864,7 +862,10 @@ static void engine_transfer_dequeue(struct xdma_engine *engine) { struct xdma_transfer *transfer; - BUG_ON(!engine); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return; + } /* pick first transfer on the queue (was submitted to the engine) */ transfer = list_entry(engine->transfer_list.next, struct xdma_transfer, @@ -886,9 +887,12 @@ static int engine_ring_process(struct xdma_engine *engine) int start; int eop_count = 0; - BUG_ON(!engine); + if (unlikely(!engine || !engine->cyclic_result)) { + pr_err("engine 0x%p, cyclic_result 0x%p.\n", + engine, engine ? engine->cyclic_result : NULL); + return -EINVAL; + } result = engine->cyclic_result; - BUG_ON(!result); /* where we start receiving in the ring buffer */ start = engine->rx_tail; @@ -929,8 +933,11 @@ static int engine_service_cyclic_polled(struct xdma_engine *engine) struct xdma_poll_wb *writeback_data; u32 sched_limit = 0; - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); + if (unlikely(!engine || (engine->magic != MAGIC_ENGINE))) { + pr_err("bad engine 0x%p, magic 0x%lx.\n", + engine, engine ? engine->magic : 0UL); + return -EINVAL; + } writeback_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; @@ -948,6 +955,11 @@ static int engine_service_cyclic_polled(struct xdma_engine *engine) } eop_count = engine_ring_process(engine); + if (eop_count < 0) { + pr_err("%s failed to process engine ring\n", + engine->name); + return eop_count; + } } if (eop_count == 0) { @@ -969,8 +981,11 @@ static int engine_service_cyclic_interrupt(struct xdma_engine *engine) int eop_count = 0; struct xdma_transfer *xfer; - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); + if (unlikely(!engine || (engine->magic != MAGIC_ENGINE))) { + pr_err("bad engine 0x%p, magic 0x%lx.\n", + engine, engine ? engine->magic : 0UL); + return -EINVAL; + } engine_status_read(engine, 1, 0); @@ -981,14 +996,12 @@ static int engine_service_cyclic_interrupt(struct xdma_engine *engine) */ xfer = &engine->cyclic_req->xfer; if(enable_credit_mp){ - if (eop_count > 0) { - //engine->eop_found = 1; - } wake_up_interruptible(&xfer->wq); }else{ if (eop_count > 0) { /* awake task on transfer's wait queue */ - dbg_tfr("wake_up_interruptible() due to %d EOP's\n", eop_count); + dbg_tfr("wake_up_interruptible() due to %d EOP's\n", + eop_count); engine->eop_found = 1; wake_up_interruptible(&xfer->wq); } @@ -1013,8 +1026,11 @@ static int engine_service_cyclic(struct xdma_engine *engine) dbg_tfr("engine_service_cyclic()"); - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); + if (unlikely(!engine || (engine->magic != MAGIC_ENGINE))) { + pr_err("bad engine 0x%p, magic 0x%lx.\n", + engine, engine ? engine->magic : 0UL); + return -EINVAL; + } if (poll_mode) rc = engine_service_cyclic_polled(engine); @@ -1029,7 +1045,10 @@ static void engine_service_resume(struct xdma_engine *engine) { struct xdma_transfer *transfer_started; - BUG_ON(!engine); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return; + } /* engine stopped? */ if (!engine->running) { @@ -1037,8 +1056,14 @@ static void engine_service_resume(struct xdma_engine *engine) if (!list_empty(&engine->transfer_list)) { /* (re)start engine */ transfer_started = engine_start(engine); - pr_info("re-started %s engine with pending xfer 0x%p\n", + if (!transfer_started) { + pr_err("%s failed to start dma engine\n", + engine->name); + return; + } + dbg_tfr("re-started %s engine with pending xfer 0x%p\n", engine->name, transfer_started); + /* engine was requested to be shutdown? */ } else if (engine->shutdown & ENGINE_SHUTDOWN_REQUEST) { engine->shutdown |= ENGINE_SHUTDOWN_IDLE; @@ -1074,7 +1099,10 @@ static int engine_service(struct xdma_engine *engine, int desc_writeback) int rv = 0; struct xdma_poll_wb *wb_data; - BUG_ON(!engine); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return -EINVAL; + } /* If polling detected an error, signal to the caller */ if (err_flag) @@ -1142,7 +1170,7 @@ static int engine_service(struct xdma_engine *engine, int desc_writeback) transfer = engine_service_final_transfer(engine, transfer, &desc_count); /* Before starting engine again, clear the writeback data */ - if (poll_mode) { + if (poll_mode) { wb_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; wb_data->completed_desc_count = 0; } @@ -1160,7 +1188,11 @@ static void engine_service_work(struct work_struct *work) unsigned long flags; engine = container_of(work, struct xdma_engine, work); - BUG_ON(engine->magic != MAGIC_ENGINE); + if (unlikely(!engine || (engine->magic != MAGIC_ENGINE))) { + pr_err("bad engine 0x%p, magic 0x%lx.\n", + engine, engine ? engine->magic : 0UL); + return; + } /* lock the engine */ spin_lock_irqsave(&engine->lock, flags); @@ -1185,15 +1217,18 @@ static void engine_service_work(struct work_struct *work) spin_unlock_irqrestore(&engine->lock, flags); } -static u32 engine_service_wb_monitor(struct xdma_engine *engine, - u32 expected_wb) +static int engine_service_wb_monitor(struct xdma_engine *engine, + u32 expected_wb, u32 *wb) { struct xdma_poll_wb *wb_data; u32 desc_wb = 0; u32 sched_limit = 0; unsigned long timeout; - BUG_ON(!engine); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return -EINVAL; + } wb_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; /* @@ -1235,7 +1270,8 @@ static u32 engine_service_wb_monitor(struct xdma_engine *engine, sched_limit++; } - return desc_wb; + *wb = desc_wb; + return 0; } static int engine_service_poll(struct xdma_engine *engine, @@ -1246,8 +1282,11 @@ static int engine_service_poll(struct xdma_engine *engine, unsigned long flags; int rv = 0; - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); + if (unlikely(!engine || (engine->magic != MAGIC_ENGINE))) { + pr_err("bad engine 0x%p, magic 0x%lx.\n", + engine, engine ? engine->magic : 0UL); + return -EINVAL; + } writeback_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; @@ -1263,15 +1302,16 @@ static int engine_service_poll(struct xdma_engine *engine, * determined before the function is called */ - desc_wb = engine_service_wb_monitor(engine, expected_desc_count); + rv = engine_service_wb_monitor(engine, expected_desc_count, &desc_wb); + if (rv < 0) + return rv; spin_lock_irqsave(&engine->lock, flags); dbg_tfr("%s service.\n", engine->name); - if (engine->cyclic_req) { + if (engine->cyclic_req) rv = engine_service_cyclic(engine); - } else { + else rv = engine_service(engine, desc_wb); - } spin_unlock_irqrestore(&engine->lock, flags); return rv; @@ -1281,7 +1321,10 @@ static irqreturn_t user_irq_service(int irq, struct xdma_user_irq *user_irq) { unsigned long flags; - BUG_ON(!user_irq); + if (unlikely(!user_irq)) { + pr_err("user_irq NULL.\n"); + return IRQ_NONE; + } if (user_irq->handler) return user_irq->handler(user_irq->user_idx, user_irq->dev); @@ -1309,16 +1352,14 @@ static irqreturn_t xdma_isr(int irq, void *dev_id) struct xdma_dev *xdev; struct interrupt_regs *irq_regs; - dbg_irq("(irq=%d, dev 0x%p) <<<< ISR.\n", irq, dev_id); - BUG_ON(!dev_id); - xdev = (struct xdma_dev *)dev_id; - - if (!xdev) { - WARN_ON(!xdev); - dbg_irq("xdma_isr(irq=%d) xdev=%p ??\n", irq, xdev); + if (unlikely(!dev_id)) { + pr_err("irq %d, xdev NULL.\n", irq); return IRQ_NONE; } + dbg_irq("(irq=%d, dev 0x%p) <<<< ISR.\n", irq, dev_id); + xdev = (struct xdma_dev *)dev_id; + irq_regs = (struct interrupt_regs *)(xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); @@ -1360,8 +1401,8 @@ static irqreturn_t xdma_isr(int irq, void *dev_id) struct xdma_engine *engine = &xdev->engine_h2c[channel]; /* engine present and its interrupt fired? */ - if((engine->irq_bitmask & mask) && - (engine->magic == MAGIC_ENGINE)) { + if ((engine->irq_bitmask & mask) && + (engine->magic == MAGIC_ENGINE)) { mask &= ~engine->irq_bitmask; dbg_tfr("schedule_work, %s.\n", engine->name); schedule_work(&engine->work); @@ -1379,8 +1420,8 @@ static irqreturn_t xdma_isr(int irq, void *dev_id) struct xdma_engine *engine = &xdev->engine_c2h[channel]; /* engine present and its interrupt fired? */ - if((engine->irq_bitmask & mask) && - (engine->magic == MAGIC_ENGINE)) { + if ((engine->irq_bitmask & mask) && + (engine->magic == MAGIC_ENGINE)) { mask &= ~engine->irq_bitmask; dbg_tfr("schedule_work, %s.\n", engine->name); schedule_work(&engine->work); @@ -1401,12 +1442,15 @@ static irqreturn_t xdma_user_irq(int irq, void *dev_id) { struct xdma_user_irq *user_irq; - dbg_irq("(irq=%d) <<<< INTERRUPT SERVICE ROUTINE\n", irq); + if (unlikely(!dev_id)) { + pr_err("irq %d, dev_id NULL.\n", irq); + return IRQ_NONE; + } - BUG_ON(!dev_id); + dbg_irq("(irq=%d) <<<< INTERRUPT SERVICE ROUTINE\n", irq); user_irq = (struct xdma_user_irq *)dev_id; - return user_irq_service(irq, user_irq); + return user_irq_service(irq, user_irq); } /* @@ -1420,15 +1464,18 @@ static irqreturn_t xdma_channel_irq(int irq, void *dev_id) struct xdma_engine *engine; struct interrupt_regs *irq_regs; + if (unlikely(!dev_id)) { + pr_err("irq %d, dev_id NULL.\n", irq); + return IRQ_NONE; + } dbg_irq("(irq=%d) <<<< INTERRUPT service ROUTINE\n", irq); - BUG_ON(!dev_id); engine = (struct xdma_engine *)dev_id; xdev = engine->xdev; - if (!xdev) { - WARN_ON(!xdev); - dbg_irq("xdma_channel_irq(irq=%d) xdev=%p ??\n", irq, xdev); + if (unlikely(!xdev)) { + pr_err("xdma_channel_irq(irq=%d) engine 0x%p, xdev NULL.\n", + irq, engine); return IRQ_NONE; } @@ -1446,10 +1493,6 @@ static irqreturn_t xdma_channel_irq(int irq, void *dev_id) /* Schedule the bottom half */ schedule_work(&engine->work); - /* - * RTO - need to protect access here if multiple MSI-X are used for - * user interrupts - */ xdev->irq_count++; return IRQ_HANDLED; } @@ -1541,6 +1584,7 @@ static int is_config_bar(struct xdma_dev *xdev, int idx) return flag; } +#ifndef XDMA_CONFIG_BAR_NUM static void identify_bars(struct xdma_dev *xdev, int *bar_id_list, int num_bars, int config_bar_pos) { @@ -1558,8 +1602,10 @@ static void identify_bars(struct xdma_dev *xdev, int *bar_id_list, int num_bars, * correctly with both 32-bit and 64-bit BARs. */ - BUG_ON(!xdev); - BUG_ON(!bar_id_list); + if (unlikely(!xdev || !bar_id_list)) { + pr_err("xdev 0x%p, bar_id_list 0x%p.\n", xdev, bar_id_list); + return; + } dbg_init("xdev 0x%p, bars %d, config at %d.\n", xdev, num_bars, config_bar_pos); @@ -1604,6 +1650,7 @@ static void identify_bars(struct xdma_dev *xdev, int *bar_id_list, int num_bars, num_bars, config_bar_pos, xdev->user_bar_idx, xdev->bypass_bar_idx); } +#endif /* map_bars() -- map device regions into kernel virtual address space * @@ -1613,6 +1660,24 @@ static void identify_bars(struct xdma_dev *xdev, int *bar_id_list, int num_bars, static int map_bars(struct xdma_dev *xdev, struct pci_dev *dev) { int rv; + +#ifdef XDMA_CONFIG_BAR_NUM + rv = map_single_bar(xdev, dev, XDMA_CONFIG_BAR_NUM); + if (rv <= 0) { + pr_info("%s, map config bar %d failed, %d.\n", + dev_name(&dev->dev), XDMA_CONFIG_BAR_NUM, rv); + return -EINVAL; + } + + if (is_config_bar(xdev, XDMA_CONFIG_BAR_NUM) == 0) { + pr_info("%s, unable to identify config bar %d.\n", + dev_name(&dev->dev), XDMA_CONFIG_BAR_NUM); + return -EINVAL; + } + xdev->config_bar_idx = XDMA_CONFIG_BAR_NUM; + + return 0; +#else int i; int bar_id_list[XDMA_BAR_NUM]; int bar_id_idx = 0; @@ -1661,20 +1726,21 @@ static int map_bars(struct xdma_dev *xdev, struct pci_dev *dev) /* unwind; unmap any BARs that we did map */ unmap_bars(xdev, dev); return rv; +#endif } /* - * MSI-X interrupt: + * MSI-X interrupt: * vectors, followed by vectors */ /* - * RTO - code to detect if MSI/MSI-X capability exists is derived + * code to detect if MSI/MSI-X capability exists is derived * from linux/pci/msi.c - pci_msi_check_device */ #ifndef arch_msi_check_device -int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) +static int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) { return 0; } @@ -1718,8 +1784,10 @@ static int enable_msi_msix(struct xdma_dev *xdev, struct pci_dev *pdev) { int rv = 0; - BUG_ON(!xdev); - BUG_ON(!pdev); + if (unlikely(!xdev || !pdev)) { + pr_err("xdev 0x%p, pdev 0x%p.\n", xdev, pdev); + return -EINVAL; + } if (!interrupt_mode && msi_msix_capable(pdev, PCI_CAP_ID_MSIX)) { int req_nvec = xdev->c2h_channel_max + xdev->h2c_channel_max + @@ -1859,7 +1927,7 @@ static void irq_msix_channel_teardown(struct xdma_dev *xdev) prog_irq_msix_channel(xdev, 1); - engine = xdev->engine_h2c; + engine = xdev->engine_h2c; for (i = 0; i < xdev->h2c_channel_max; i++, j++, engine++) { if (!engine->msix_irq_line) break; @@ -1868,7 +1936,7 @@ static void irq_msix_channel_teardown(struct xdma_dev *xdev) free_irq(engine->msix_irq_line, engine); } - engine = xdev->engine_c2h; + engine = xdev->engine_c2h; for (i = 0; i < xdev->c2h_channel_max; i++, j++, engine++) { if (!engine->msix_irq_line) break; @@ -1881,15 +1949,19 @@ static void irq_msix_channel_teardown(struct xdma_dev *xdev) static int irq_msix_channel_setup(struct xdma_dev *xdev) { int i; - int j = xdev->h2c_channel_max; + int j; int rv = 0; u32 vector; struct xdma_engine *engine; - BUG_ON(!xdev); + if (unlikely(!xdev)) { + pr_err("xdev NULL.\n"); + return -EINVAL; + } if (!xdev->msix_enabled) return 0; + j = xdev->h2c_channel_max; engine = xdev->engine_h2c; for (i = 0; i < xdev->h2c_channel_max; i++, engine++) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) @@ -1932,15 +2004,19 @@ static int irq_msix_channel_setup(struct xdma_dev *xdev) static void irq_msix_user_teardown(struct xdma_dev *xdev) { int i; - int j = xdev->h2c_channel_max + xdev->c2h_channel_max; + int j; - BUG_ON(!xdev); + if (unlikely(!xdev)) { + pr_err("xdev NULL.\n"); + return; + } if (!xdev->msix_enabled) return; prog_irq_msix_user(xdev, 1); + j = xdev->h2c_channel_max + xdev->c2h_channel_max; for (i = 0; i < xdev->user_max; i++, j++) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) u32 vector = pci_irq_vector(xdev->pdev, j); @@ -1956,7 +2032,7 @@ static int irq_msix_user_setup(struct xdma_dev *xdev) { int i; int j = xdev->h2c_channel_max + xdev->c2h_channel_max; - int rv = 0; + int rv = 0; /* vectors set in probe_scan_for_msi() */ for (i = 0; i < xdev->user_max; i++, j++) { @@ -1974,7 +2050,7 @@ static int irq_msix_user_setup(struct xdma_dev *xdev) } pr_info("%d-USR-%d, IRQ#%d with 0x%p\n", xdev->idx, i, vector, &xdev->user_irq[i]); - } + } /* If any errors occur, free IRQs that were successfully requested */ if (rv) { @@ -2016,17 +2092,17 @@ static int irq_legacy_setup(struct xdma_dev *xdev, struct pci_dev *pdev) dbg_init("Legacy Interrupt register value = %d\n", val); if (val > 1) { val--; - w = (val<<24) | (val<<16) | (val<<8)| val; + w = (val << 24) | (val << 16) | (val << 8)| val; /* Program IRQ Block Channel vactor and IRQ Block User vector * with Legacy interrupt value */ - reg = xdev->bar[xdev->config_bar_idx] + 0x2080; // IRQ user + reg = xdev->bar[xdev->config_bar_idx] + 0x2080; // IRQ user write_register(w, reg, 0x2080); - write_register(w, reg+0x4, 0x2084); - write_register(w, reg+0x8, 0x2088); - write_register(w, reg+0xC, 0x208C); - reg = xdev->bar[xdev->config_bar_idx] + 0x20A0; // IRQ Block + write_register(w, reg + 0x4, 0x2084); + write_register(w, reg + 0x8, 0x2088); + write_register(w, reg + 0xC, 0x208C); + reg = xdev->bar[xdev->config_bar_idx] + 0x20A0; // IRQ Block write_register(w, reg, 0x20A0); - write_register(w, reg+0x4, 0x20A4); + write_register(w, reg + 0x4, 0x20A4); } xdev->irq_line = (int)pdev->irq; @@ -2077,10 +2153,14 @@ static void dump_desc(struct xdma_desc *desc_virt) { int j; u32 *p = (u32 *)desc_virt; - static char * const field_name[] = { - "magic|extra_adjacent|control", "bytes", "src_addr_lo", - "src_addr_hi", "dst_addr_lo", "dst_addr_hi", "next_addr", - "next_addr_pad"}; + static char * const field_name[] = { "magic|extra_adjacent|control", + "bytes", + "src_addr_lo", + "src_addr_hi", + "dst_addr_lo", + "dst_addr_hi", + "next_addr", + "next_addr_pad"}; char *dummy; /* remove warning about unused variable when debug printing is off */ @@ -2112,21 +2192,19 @@ static void transfer_dump(struct xdma_transfer *transfer) } #endif /* __LIBXDMA_DEBUG__ */ -/* xdma_desc_alloc() - Allocate cache-coherent array of N descriptors. - * - * Allocates an array of 'number' descriptors in contiguous PCI bus addressable - * memory. Chains the descriptors as a singly-linked list; the descriptor's - * next * pointer specifies the bus address of the next descriptor. +/* transfer_desc_init() - Chains the descriptors as a singly-linked list * + * Each descriptor's next * pointer specifies the bus address + * of the next descriptor. + * Terminates the last descriptor to form a singly-linked list * - * @dev Pointer to pci_dev - * @number Number of descriptors to be allocated - * @desc_bus_p Pointer where to store the first descriptor bus address - * - * @return Virtual address of the first descriptor + * @transfer Pointer to SG DMA transfers + * @count Number of descriptors allocated in continuous PCI bus addressable + * memory * + * @return 0 on success, EINVAL on failure */ -static void transfer_desc_init(struct xdma_transfer *transfer, int count) +static int transfer_desc_init(struct xdma_transfer *transfer, int count) { struct xdma_desc *desc_virt = transfer->desc_virt; dma_addr_t desc_bus = transfer->desc_bus; @@ -2135,7 +2213,10 @@ static void transfer_desc_init(struct xdma_transfer *transfer, int count) int extra_adj; u32 temp_control; - BUG_ON(count > XDMA_TRANSFER_MAX_DESC); + if (unlikely(count > XDMA_TRANSFER_MAX_DESC)) { + pr_err("xfer 0x%p, too many desc 0x%x.\n", transfer, count); + return -EINVAL; + } /* create singly-linked list for SG DMA controller */ for (i = 0; i < count - 1; i++) { @@ -2171,6 +2252,8 @@ static void transfer_desc_init(struct xdma_transfer *transfer, int count) temp_control = DESC_MAGIC; desc_virt[i].control = cpu_to_le32(temp_control); + + return 0; } /* xdma_desc_link() - Link two descriptors @@ -2188,8 +2271,7 @@ static void xdma_desc_link(struct xdma_desc *first, struct xdma_desc *second, * remember reserved control in first descriptor, but zero * extra_adjacent! */ - /* RTO - what's this about? Shouldn't it be 0x0000c0ffUL? */ - u32 control = le32_to_cpu(first->control) & 0x0000f0ffUL; + u32 control = le32_to_cpu(first->control) & 0x00FFC0FFUL; /* second descriptor given? */ if (second) { /* @@ -2215,42 +2297,35 @@ static void xdma_desc_link(struct xdma_desc *first, struct xdma_desc *second, /* xdma_desc_adjacent -- Set how many descriptors are adjacent to this one */ static void xdma_desc_adjacent(struct xdma_desc *desc, int next_adjacent) { - int extra_adj = 0; /* remember reserved and control bits */ - u32 control = le32_to_cpu(desc->control) & 0x0000f0ffUL; - u32 max_adj_4k = 0; + u32 control = le32_to_cpu(desc->control) & 0xFFFFC0FFUL; + + if (next_adjacent) + next_adjacent = next_adjacent - 1; + if (next_adjacent > MAX_EXTRA_ADJ) + next_adjacent = MAX_EXTRA_ADJ; + control |= (next_adjacent << 8); - if (next_adjacent > 0) { - extra_adj = next_adjacent - 1; - if (extra_adj > MAX_EXTRA_ADJ){ - extra_adj = MAX_EXTRA_ADJ; - } - max_adj_4k = (0x1000 - ((le32_to_cpu(desc->next_lo))&0xFFF))/32 - 1; - if (extra_adj>max_adj_4k) { - extra_adj = max_adj_4k; - } - if(extra_adj<0){ - printk("Warning: extra_adj<0, converting it to 0\n"); - extra_adj = 0; - } - } - /* merge adjacent and control field */ - control |= 0xAD4B0000UL | (extra_adj << 8); /* write control and next_adjacent */ desc->control = cpu_to_le32(control); } /* xdma_desc_control -- Set complete control field of a descriptor. */ -static void xdma_desc_control_set(struct xdma_desc *first, u32 control_field) +static int xdma_desc_control_set(struct xdma_desc *first, u32 control_field) { /* remember magic and adjacent number */ u32 control = le32_to_cpu(first->control) & ~(LS_BYTE_MASK); - BUG_ON(control_field & ~(LS_BYTE_MASK)); + if (unlikely(control_field & ~(LS_BYTE_MASK))) { + pr_err("control_field bad 0x%x.\n", control_field); + return -EINVAL; + } /* merge adjacent and control field */ control |= control_field; /* write control and next_adjacent */ first->control = cpu_to_le32(control); + + return 0; } /* xdma_desc_clear -- Clear bits in control field of a descriptor. */ @@ -2259,26 +2334,12 @@ static void xdma_desc_control_clear(struct xdma_desc *first, u32 clear_mask) /* remember magic and adjacent number */ u32 control = le32_to_cpu(first->control); - BUG_ON(clear_mask & ~(LS_BYTE_MASK)); - /* merge adjacent and control field */ control &= (~clear_mask); /* write control and next_adjacent */ first->control = cpu_to_le32(control); } -/* xdma_desc_done - recycle cache-coherent linked list of descriptors. - * - * @dev Pointer to pci_dev - * @number Number of descriptors to be allocated - * @desc_virt Pointer to (i.e. virtual address of) first descriptor in list - * @desc_bus Bus address of first descriptor in list - */ -static inline void xdma_desc_done(struct xdma_desc *desc_virt) -{ - memset(desc_virt, 0, XDMA_TRANSFER_MAX_DESC * sizeof(struct xdma_desc)); -} - /* xdma_desc() - Fill a descriptor with the transfer details * * @desc pointer to descriptor to be filled @@ -2320,9 +2381,15 @@ static void transfer_abort(struct xdma_engine *engine, { struct xdma_transfer *head; - BUG_ON(!engine); - BUG_ON(!transfer); - BUG_ON(transfer->desc_num == 0); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return; + } + if (unlikely(!transfer || (transfer->desc_num == 0))) { + pr_err("engine %s, xfer 0x%p, desc 0.\n", + engine->name, transfer); + return; + } pr_info("abort transfer 0x%p, desc %d, engine desc queued %d.\n", transfer, transfer->desc_num, engine->desc_dequeued); @@ -2354,10 +2421,16 @@ static int transfer_queue(struct xdma_engine *engine, struct xdma_dev *xdev; unsigned long flags; - BUG_ON(!engine); - BUG_ON(!engine->xdev); - BUG_ON(!transfer); - BUG_ON(transfer->desc_num == 0); + if (unlikely(!engine || !engine->xdev)) { + pr_err("bad engine 0x%p, xdev 0x%p.\n", + engine, engine ? engine->xdev : NULL); + return -EINVAL; + } + if (unlikely(!transfer || (transfer->desc_num == 0))) { + pr_err("engine %s, xfer 0x%p, desc 0.\n", + engine->name, transfer); + return -EINVAL; + } dbg_tfr("transfer_queue(transfer=0x%p).\n", transfer); xdev = engine->xdev; @@ -2473,8 +2546,10 @@ static void engine_free_resource(struct xdma_engine *engine) static void engine_destroy(struct xdma_dev *xdev, struct xdma_engine *engine) { - BUG_ON(!xdev); - BUG_ON(!engine); + if (unlikely(!xdev || !engine)) { + pr_err("xdev 0x%p, engine 0x%p.\n", xdev, engine); + return; + } dbg_sg("Shutting down engine %s%d", engine->name, engine->channel); @@ -2514,26 +2589,26 @@ struct xdma_transfer *engine_cyclic_stop(struct xdma_engine *engine) /* pick first transfer on the queue (was submitted to engine) */ transfer = list_entry(engine->transfer_list.next, struct xdma_transfer, entry); - BUG_ON(!transfer); xdma_engine_stop(engine); + engine->running = 0; - if (transfer->cyclic) { + if (transfer && transfer->cyclic) { if (engine->xdma_perf) dbg_perf("Stopping perf transfer on %s\n", engine->name); else dbg_perf("Stopping cyclic transfer on %s\n", engine->name); - /* make sure the handler sees correct transfer state */ - transfer->cyclic = 1; - /* - * set STOP flag and interrupt on completion, on the - * last descriptor - */ - xdma_desc_control_set( - transfer->desc_virt + transfer->desc_num - 1, - XDMA_DESC_COMPLETED | XDMA_DESC_STOPPED); + + /* free up the buffer allocated for perf run */ + if (engine->perf_buf_virt) + dma_free_coherent(&engine->xdev->pdev->dev, + engine->xdma_perf->transfer_size, + engine->perf_buf_virt, + engine->perf_buf_bus); + engine->perf_buf_virt = NULL; + list_del(&transfer->entry); } else { dbg_sg("(engine=%p) running transfer is not cyclic\n", engine); @@ -2543,7 +2618,6 @@ struct xdma_transfer *engine_cyclic_stop(struct xdma_engine *engine) } return transfer; } -EXPORT_SYMBOL_GPL(engine_cyclic_stop); static int engine_writeback_setup(struct xdma_engine *engine) { @@ -2551,9 +2625,11 @@ static int engine_writeback_setup(struct xdma_engine *engine) struct xdma_dev *xdev; struct xdma_poll_wb *writeback; - BUG_ON(!engine); + if (unlikely(!engine || !engine->xdev)) { + pr_err("engine 0x%p, xdev NULL.\n", engine); + return -EINVAL; + } xdev = engine->xdev; - BUG_ON(!xdev); /* * RTO - doing the allocation per engine is wasteful since a full page @@ -2755,7 +2831,7 @@ static int engine_init(struct xdma_engine *engine, struct xdma_dev *xdev, static void transfer_destroy(struct xdma_dev *xdev, struct xdma_transfer *xfer) { /* free descriptors */ - xdma_desc_done(xfer->desc_virt); + memset(xfer->desc_virt, 0, xfer->desc_num * sizeof(struct xdma_desc)); if (xfer->last_in_request && (xfer->flags & XFER_FLAG_NEED_UNMAP)) { struct sg_table *sgt = xfer->sgt; @@ -2803,6 +2879,7 @@ static int transfer_init(struct xdma_engine *engine, struct xdma_request_cb *req int i = 0; int last = 0; u32 control; + int rv; memset(xfer, 0, sizeof(*xfer)); @@ -2815,7 +2892,9 @@ static int transfer_init(struct xdma_engine *engine, struct xdma_request_cb *req xfer->desc_virt = engine->desc; xfer->desc_bus = engine->desc_bus; - transfer_desc_init(xfer, desc_max); + rv = transfer_desc_init(xfer, desc_max); + if (rv < 0) + return rv; dbg_sg("transfer->desc_bus = 0x%llx.\n", (u64)xfer->desc_bus); @@ -2828,7 +2907,9 @@ static int transfer_init(struct xdma_engine *engine, struct xdma_request_cb *req control = XDMA_DESC_STOPPED; control |= XDMA_DESC_EOP; control |= XDMA_DESC_COMPLETED; - xdma_desc_control_set(xfer->desc_virt + last, control); + rv = xdma_desc_control_set(xfer->desc_virt + last, control); + if (rv < 0) + return rv; xfer->desc_num = xfer->desc_adjacent = desc_max; @@ -2940,9 +3021,19 @@ static struct xdma_request_cb * xdma_init_request(struct sg_table *sgt, tlen = 0; } j++; + if (j > max) + break; } } - BUG_ON(j > max); + + if (unlikely(j > max)) { + pr_err("too many sdesc %d > %d\n", j, max); +#ifdef __LIBXDMA_DEBUG__ + xdma_request_cb_dump(req); +#endif + xdma_request_free(req); + return NULL; + } req->sw_desc_cnt = j; #ifdef __LIBXDMA_DEBUG__ @@ -2988,8 +3079,11 @@ ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, return -EINVAL; } - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); + if (unlikely(!engine || (engine->magic != MAGIC_ENGINE))) { + pr_err("bad engine 0x%p, magic 0x%lx.\n", + engine, engine ? engine->magic : 0UL); + return -EINVAL; + } xdev = engine->xdev; if (xdma_device_flag_check(xdev, XDEV_FLAG_OFFLINE)) { @@ -3012,7 +3106,10 @@ ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, } sgt->nents = nents; } else { - BUG_ON(!sgt->nents); + if (unlikely(!sgt->nents)) { + pr_err("%s, sgt NOT dma_mapped.\n", engine->name); + return -EINVAL; + } } req = xdma_init_request(sgt, ep_addr); @@ -3026,17 +3123,16 @@ ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, sg = sgt->sgl; nents = req->sw_desc_cnt; + mutex_lock(&engine->desc_lock); + while (nents) { unsigned long flags; struct xdma_transfer *xfer; - /* one transfer at a time */ - spin_lock(&engine->desc_lock); - /* build transfer */ rv = transfer_init(engine, req); if (rv < 0) { - spin_unlock(&engine->desc_lock); + mutex_unlock(&engine->desc_lock); goto unmap_sgl; } xfer = &req->xfer; @@ -3061,7 +3157,7 @@ ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, rv = transfer_queue(engine, xfer); if (rv < 0) { - spin_unlock(&engine->desc_lock); + mutex_unlock(&engine->desc_lock); pr_info("unable to submit %s, %d.\n", engine->name, rv); goto unmap_sgl; } @@ -3128,11 +3224,11 @@ ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, } transfer_destroy(xdev, xfer); - spin_unlock(&engine->desc_lock); if (rv < 0) - goto unmap_sgl; + break; } /* while (sg) */ + mutex_unlock(&engine->desc_lock); unmap_sgl: if (!dma_mapped && sgt->nents) { @@ -3148,33 +3244,43 @@ ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, return done; } -EXPORT_SYMBOL_GPL(xdma_xfer_submit); int xdma_performance_submit(struct xdma_dev *xdev, struct xdma_engine *engine) { - u8 *buffer_virt; u32 max_consistent_size = 128 * 32 * 1024; /* 1024 pages, 4MB */ - dma_addr_t buffer_bus; /* bus address */ struct xdma_transfer *transfer; u64 ep_addr = 0; int num_desc_in_a_loop = 128; int size_in_desc = engine->xdma_perf->transfer_size; int size = size_in_desc * num_desc_in_a_loop; + int free_desc = 0; int i; + int rv = -ENOMEM; - BUG_ON(size_in_desc > max_consistent_size); + if (unlikely(size_in_desc > max_consistent_size)) { + pr_err("%s, size too big %d > %u.\n", + engine->name, size_in_desc, max_consistent_size); + return -EINVAL; + } if (size > max_consistent_size) { size = max_consistent_size; num_desc_in_a_loop = size / size_in_desc; } - buffer_virt = dma_alloc_coherent(&xdev->pdev->dev, size, - &buffer_bus, GFP_KERNEL); + engine->perf_buf_virt = dma_alloc_coherent(&xdev->pdev->dev, size, + &engine->perf_buf_bus, GFP_KERNEL); + if (unlikely(!engine->perf_buf_virt)) { + pr_err("engine %s perf buf OOM.\n", engine->name); + return -ENOMEM; + } /* allocate transfer data structure */ transfer = kzalloc(sizeof(struct xdma_transfer), GFP_KERNEL); - BUG_ON(!transfer); + if (unlikely(!transfer)) { + pr_err("engine %s transfer OOM.\n", engine->name); + goto free_buffer; + } /* 0 = write engine (to_dev=0) , 1 = read engine (to_dev=1) */ transfer->dir = engine->dir; @@ -3186,21 +3292,28 @@ int xdma_performance_submit(struct xdma_dev *xdev, struct xdma_engine *engine) engine->desc = dma_alloc_coherent(&xdev->pdev->dev, num_desc_in_a_loop * sizeof(struct xdma_desc), &engine->desc_bus, GFP_KERNEL); - BUG_ON(!engine->desc); + if (unlikely(!engine->desc)) { + pr_err("%s desc OOM.\n", engine->name); + goto free_xfer; + } dbg_init("device %s, engine %s pre-alloc desc 0x%p,0x%llx.\n", dev_name(&xdev->pdev->dev), engine->name, engine->desc, engine->desc_bus); + free_desc = 1; } transfer->desc_virt = engine->desc; transfer->desc_bus = engine->desc_bus; - transfer_desc_init(transfer, transfer->desc_num); + rv = transfer_desc_init(transfer, transfer->desc_num); + if (rv < 0) + goto free_desc; dbg_sg("transfer->desc_bus = 0x%llx.\n", (u64)transfer->desc_bus); for (i = 0; i < transfer->desc_num; i++) { struct xdma_desc *desc = transfer->desc_virt + i; - dma_addr_t rc_bus_addr = buffer_bus + size_in_desc * i; + dma_addr_t rc_bus_addr = engine->perf_buf_bus + + size_in_desc * i; /* fill in descriptor entry with transfer details */ xdma_desc_set(desc, rc_bus_addr, ep_addr, size_in_desc, @@ -3208,7 +3321,12 @@ int xdma_performance_submit(struct xdma_dev *xdev, struct xdma_engine *engine) } /* stop engine and request interrupt on last descriptor */ - xdma_desc_control_set(transfer->desc_virt, 0); + rv = xdma_desc_control_set(transfer->desc_virt, 0); + if (rv < 0) { + pr_err("%s: Failed to set desc control\n", engine->name); + goto free_desc; + } + /* create a linked loop */ xdma_desc_link(transfer->desc_virt + transfer->desc_num - 1, transfer->desc_virt, transfer->desc_bus); @@ -3218,16 +3336,34 @@ int xdma_performance_submit(struct xdma_dev *xdev, struct xdma_engine *engine) /* initialize wait queue */ init_waitqueue_head(&transfer->wq); - //printk("=== Descriptor print for PERF \n"); - //transfer_dump(transfer); - dbg_perf("Queueing XDMA I/O %s request for performance measurement.\n", engine->dir ? "write (to dev)" : "read (from dev)"); - transfer_queue(engine, transfer); + rv = transfer_queue(engine, transfer); + if (rv < 0) + goto free_desc; + return 0; +free_desc: + if (free_desc && engine->desc) + dma_free_coherent(&xdev->pdev->dev, + num_desc_in_a_loop * sizeof(struct xdma_desc), + engine->desc, engine->desc_bus); + engine->desc = NULL; + +free_xfer: + if (transfer) { + list_del(&transfer->entry); + kfree(transfer); + } + +free_buffer: + if (engine->perf_buf_virt) + dma_free_coherent(&xdev->pdev->dev, size_in_desc, + engine->perf_buf_virt, engine->perf_buf_bus); + engine->perf_buf_virt = NULL; + return rv; } -EXPORT_SYMBOL_GPL(xdma_performance_submit); static struct xdma_dev *alloc_dev_instance(struct pci_dev *pdev) { @@ -3235,12 +3371,15 @@ static struct xdma_dev *alloc_dev_instance(struct pci_dev *pdev) struct xdma_dev *xdev; struct xdma_engine *engine; - BUG_ON(!pdev); + if (unlikely(!pdev)) { + pr_err("pdev NULL.\n"); + return NULL; + } /* allocate zeroed device book keeping structure */ xdev = kzalloc(sizeof(struct xdma_dev), GFP_KERNEL); if (!xdev) { - pr_info("OOM, xdma_dev.\n"); + pr_info("xdev OOM.\n"); return NULL; } spin_lock_init(&xdev->lock); @@ -3267,7 +3406,7 @@ static struct xdma_dev *alloc_dev_instance(struct pci_dev *pdev) engine = xdev->engine_h2c; for (i = 0; i < XDMA_CHANNEL_NUM_MAX; i++, engine++) { spin_lock_init(&engine->lock); - spin_lock_init(&engine->desc_lock); + mutex_init(&engine->desc_lock); INIT_LIST_HEAD(&engine->transfer_list); init_waitqueue_head(&engine->shutdown_wq); init_waitqueue_head(&engine->xdma_perf_wq); @@ -3276,7 +3415,7 @@ static struct xdma_dev *alloc_dev_instance(struct pci_dev *pdev) engine = xdev->engine_c2h; for (i = 0; i < XDMA_CHANNEL_NUM_MAX; i++, engine++) { spin_lock_init(&engine->lock); - spin_lock_init(&engine->desc_lock); + mutex_init(&engine->desc_lock); INIT_LIST_HEAD(&engine->transfer_list); init_waitqueue_head(&engine->shutdown_wq); init_waitqueue_head(&engine->xdma_perf_wq); @@ -3289,8 +3428,10 @@ static int request_regions(struct xdma_dev *xdev, struct pci_dev *pdev) { int rv; - BUG_ON(!xdev); - BUG_ON(!pdev); + if (unlikely(!xdev || !pdev)) { + pr_err("xdev 0x%p, pdev 0x%p.\n", xdev, pdev); + return -EINVAL; + } dbg_init("pci_request_regions()\n"); rv = pci_request_regions(pdev, xdev->mod_name); @@ -3308,7 +3449,10 @@ static int request_regions(struct xdma_dev *xdev, struct pci_dev *pdev) static int set_dma_mask(struct pci_dev *pdev) { - BUG_ON(!pdev); + if (unlikely(!pdev)) { + pr_err("pdev NULL.\n"); + return -EINVAL; + } dbg_init("sizeof(dma_addr_t) == %ld\n", sizeof(dma_addr_t)); /* 64-bit addressing capability for XDMA? */ @@ -3338,7 +3482,10 @@ static u32 get_engine_channel_id(struct engine_regs *regs) { u32 value; - BUG_ON(!regs); + if (unlikely(!regs)) { + pr_err("regs NULL.\n"); + return 0xFFFFFFFF; + } value = read_register(®s->identifier); @@ -3349,7 +3496,10 @@ static u32 get_engine_id(struct engine_regs *regs) { u32 value; - BUG_ON(!regs); + if (unlikely(!regs)) { + pr_err("regs NULL.\n"); + return 0xFFFFFFFF; + } value = read_register(®s->identifier); return (value & 0xffff0000U) >> 16; @@ -3360,7 +3510,10 @@ static void remove_engines(struct xdma_dev *xdev) struct xdma_engine *engine; int i; - BUG_ON(!xdev); + if (unlikely(!xdev)) { + pr_err("xdev NULL.\n"); + return; + } /* iterate over channels */ for (i = 0; i < xdev->h2c_channel_max; i++) { @@ -3439,7 +3592,10 @@ static int probe_engines(struct xdma_dev *xdev) int i; int rv = 0; - BUG_ON(!xdev); + if (unlikely(!xdev)) { + pr_err("xdev NULL.\n"); + return -EINVAL; + } /* iterate over channels */ for (i = 0; i < xdev->h2c_channel_max; i++) { @@ -3460,12 +3616,12 @@ static int probe_engines(struct xdma_dev *xdev) } #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) -static void pci_enable_relaxed_ordering(struct pci_dev *pdev) +static void pci_enable_capability(struct pci_dev *pdev, int cap) { - pcie_capability_set_word(pdev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN); + pcie_capability_set_word(pdev, PCI_EXP_DEVCTL, cap); } #else -static void pci_enable_relaxed_ordering(struct pci_dev *pdev) +static void pci_enable_capability(struct pci_dev *pdev, int cap) { u16 v; int pos; @@ -3473,50 +3629,12 @@ static void pci_enable_relaxed_ordering(struct pci_dev *pdev) pos = pci_pcie_cap(pdev); if (pos > 0) { pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &v); - v |= PCI_EXP_DEVCTL_RELAX_EN; + v |= cap; pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, v); } } #endif -static void pci_check_extended_tag(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - u16 cap; - u32 v; - void *__iomem reg; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) - pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); -#else - int pos; - - pos = pci_pcie_cap(pdev); - if (pos > 0) - pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &cap); - else { - pr_info("pdev 0x%p, unable to access pcie cap.\n", pdev); - return; - } -#endif - - if ((cap & PCI_EXP_DEVCTL_EXT_TAG)) - return; - - /* extended tag not enabled */ - pr_info("0x%p EXT_TAG disabled.\n", pdev); - - if (xdev->config_bar_idx < 0) { - pr_info("pdev 0x%p, xdev 0x%p, config bar UNKNOWN.\n", - pdev, xdev); - return; - } - - reg = xdev->bar[xdev->config_bar_idx] + XDMA_OFS_CONFIG + 0x4C; - v = read_register(reg); - v = (v & 0xFF) | (((u32)32) << 8); - write_register(v, reg, XDMA_OFS_CONFIG + 0x4C); -} - void *xdma_device_open(const char *mname, struct pci_dev *pdev, int *user_max, int *h2c_channel_max, int *c2h_channel_max) { @@ -3556,9 +3674,10 @@ void *xdma_device_open(const char *mname, struct pci_dev *pdev, int *user_max, pci_check_intr_pend(pdev); /* enable relaxed ordering */ - pci_enable_relaxed_ordering(pdev); + pci_enable_capability(pdev, PCI_EXP_DEVCTL_RELAX_EN); - pci_check_extended_tag(xdev, pdev); + /* enable extended tag */ + pci_enable_capability(pdev, PCI_EXP_DEVCTL_EXT_TAG); /* force MRRS to be 512 */ rv = pcie_set_readrq(pdev, 512); @@ -3631,7 +3750,6 @@ void *xdma_device_open(const char *mname, struct pci_dev *pdev, int *user_max, kfree(xdev); return NULL; } -EXPORT_SYMBOL_GPL(xdma_device_open); void xdma_device_close(struct pci_dev *pdev, void *dev_hndl) { @@ -3676,7 +3794,6 @@ void xdma_device_close(struct pci_dev *pdev, void *dev_hndl) kfree(xdev); } -EXPORT_SYMBOL_GPL(xdma_device_close); void xdma_device_offline(struct pci_dev *pdev, void *dev_hndl) { @@ -3690,11 +3807,11 @@ void xdma_device_offline(struct pci_dev *pdev, void *dev_hndl) if (debug_check_dev_hndl(__func__, pdev, dev_hndl) < 0) return; -pr_info("pdev 0x%p, xdev 0x%p.\n", pdev, xdev); + pr_info("pdev 0x%p, xdev 0x%p.\n", pdev, xdev); xdma_device_flag_set(xdev, XDEV_FLAG_OFFLINE); /* wait for all engines to be idle */ - for (i = 0; i < xdev->h2c_channel_max; i++) { + for (i = 0; i < xdev->h2c_channel_max; i++) { unsigned long flags; engine = &xdev->engine_h2c[i]; @@ -3709,7 +3826,7 @@ pr_info("pdev 0x%p, xdev 0x%p.\n", pdev, xdev); } } - for (i = 0; i < xdev->c2h_channel_max; i++) { + for (i = 0; i < xdev->c2h_channel_max; i++) { unsigned long flags; engine = &xdev->engine_c2h[i]; @@ -3731,7 +3848,6 @@ pr_info("pdev 0x%p, xdev 0x%p.\n", pdev, xdev); pr_info("xdev 0x%p, done.\n", xdev); } -EXPORT_SYMBOL_GPL(xdma_device_offline); void xdma_device_online(struct pci_dev *pdev, void *dev_hndl) { @@ -3778,9 +3894,8 @@ pr_info("pdev 0x%p, xdev 0x%p.\n", pdev, xdev); } xdma_device_flag_clear(xdev, XDEV_FLAG_OFFLINE); -pr_info("xdev 0x%p, done.\n", xdev); + pr_info("xdev 0x%p, done.\n", xdev); } -EXPORT_SYMBOL_GPL(xdma_device_online); int xdma_device_restart(struct pci_dev *pdev, void *dev_hndl) { @@ -3795,7 +3910,6 @@ int xdma_device_restart(struct pci_dev *pdev, void *dev_hndl) pr_info("NOT implemented, 0x%p.\n", xdev); return -EINVAL; } -EXPORT_SYMBOL_GPL(xdma_device_restart); int xdma_user_isr_register(void *dev_hndl, unsigned int mask, irq_handler_t handler, void *dev) @@ -3822,7 +3936,6 @@ int xdma_user_isr_register(void *dev_hndl, unsigned int mask, return 0; } -EXPORT_SYMBOL_GPL(xdma_user_isr_register); int xdma_user_isr_enable(void *dev_hndl, unsigned int mask) { @@ -3841,7 +3954,6 @@ int xdma_user_isr_enable(void *dev_hndl, unsigned int mask) return 0; } -EXPORT_SYMBOL_GPL(xdma_user_isr_enable); int xdma_user_isr_disable(void *dev_hndl, unsigned int mask) { @@ -3859,23 +3971,7 @@ int xdma_user_isr_disable(void *dev_hndl, unsigned int mask) return 0; } -EXPORT_SYMBOL_GPL(xdma_user_isr_disable); - -#ifdef __LIBXDMA_MOD__ -static int __init xdma_base_init(void) -{ - printk(KERN_INFO "%s", version); - return 0; -} -static void __exit xdma_base_exit(void) -{ - return; -} - -module_init(xdma_base_init); -module_exit(xdma_base_exit); -#endif /* makes an existing transfer cyclic */ static void xdma_transfer_cyclic(struct xdma_transfer *transfer) { @@ -3892,11 +3988,13 @@ static int transfer_monitor_cyclic(struct xdma_engine *engine, struct xdma_result *result; int rc = 0; - BUG_ON(!engine); - BUG_ON(!transfer); + if (unlikely(!engine || !engine->cyclic_result || !transfer)) { + pr_err("engine 0x%p, cyclic_result 0x%p, xfer 0x%p.\n", + engine, engine->cyclic_result, transfer); + return -EINVAL; + } result = engine->cyclic_result; - BUG_ON(!result); if (poll_mode) { int i ; @@ -3956,8 +4054,10 @@ static int copy_cyclic_to_user(struct xdma_engine *engine, int pkt_length, struct scatterlist *sg; int more = pkt_length; - BUG_ON(!engine); - BUG_ON(!buf); + if (unlikely(!buf || !engine)) { + pr_err("engine 0x%p, buf 0x%p.\n", engine, buf); + return -EINVAL; + } dbg_tfr("%s, pkt_len %d, head %d, user buf idx %u.\n", engine->name, pkt_length, head, engine->user_buffer_index); @@ -4021,9 +4121,11 @@ static int complete_cyclic(struct xdma_engine *engine, char __user *buf, int num_credit = 0; unsigned long flags; - BUG_ON(!engine); + if (unlikely(!engine || !engine->cyclic_result)) { + pr_err("engine 0x%p, cyclic_result NULL.\n", engine); + return -EINVAL; + } result = engine->cyclic_result; - BUG_ON(!result); spin_lock_irqsave(&engine->lock, flags); @@ -4104,11 +4206,17 @@ ssize_t xdma_engine_read_cyclic(struct xdma_engine *engine, char __user *buf, int rc_len = 0; struct xdma_transfer *transfer; - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); + if (unlikely(!engine || (engine->magic != MAGIC_ENGINE))) { + pr_err("bad engine 0x%p, magic 0x%lx.\n", + engine, engine ? engine->magic : 0UL); + return -EINVAL; + } + if (unlikely(!engine->cyclic_req)) { + pr_err("engine %s, cyclic_req NULL.\n", engine->name); + return -EINVAL; + } transfer = &engine->cyclic_req->xfer; - BUG_ON(!transfer); engine->user_buffer_index = 0; @@ -4207,9 +4315,11 @@ int xdma_cyclic_transfer_setup(struct xdma_engine *engine) int i; int rc; - BUG_ON(!engine); + if (unlikely(!engine || !engine->xdev)) { + pr_err("engine 0x%p, xdev NULL.\n", engine); + return -EINVAL; + } xdev = engine->xdev; - BUG_ON(!xdev); if (engine->cyclic_req) { pr_info("%s: exclusive access already taken.\n", @@ -4272,18 +4382,17 @@ int xdma_cyclic_transfer_setup(struct xdma_engine *engine) transfer_dump(xfer); #endif - if(enable_credit_mp){ - //write_register(RX_BUF_PAGES,&engine->sgdma_regs->credits); + if (enable_credit_mp) write_register(128, &engine->sgdma_regs->credits, 0); - } spin_unlock_irqrestore(&engine->lock, flags); /* start cyclic transfer */ - transfer_queue(engine, xfer); - - return 0; + rc = transfer_queue(engine, xfer); + if (!rc) + return 0; + spin_lock_irqsave(&engine->lock, flags); /* unwind on errors */ err_out: if (engine->cyclic_req) { @@ -4304,10 +4413,12 @@ int xdma_cyclic_transfer_setup(struct xdma_engine *engine) return rc; } - static int cyclic_shutdown_polled(struct xdma_engine *engine) { - BUG_ON(!engine); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return -EINVAL; + } spin_lock(&engine->lock); @@ -4336,18 +4447,13 @@ static int cyclic_shutdown_interrupt(struct xdma_engine *engine) { int rc; - BUG_ON(!engine); + if (unlikely(!engine)) { + pr_err("engine NULL.\n"); + return -EINVAL; + } rc = wait_event_interruptible_timeout(engine->shutdown_wq, !engine->running, msecs_to_jiffies(10000)); - -#if 0 - if (rc) { - dbg_tfr("wait_event_interruptible=%d\n", rc); - return rc; - } -#endif - if (engine->running) { pr_info("%s still running?!, %d\n", engine->name, rc); return -EINVAL; @@ -4364,6 +4470,10 @@ int xdma_cyclic_transfer_teardown(struct xdma_engine *engine) unsigned long flags; transfer = engine_cyclic_stop(engine); + if (transfer == NULL) { + pr_err("Failed to stop cyclic engine\n"); + return -EINVAL; + } spin_lock_irqsave(&engine->lock, flags); if (transfer) { @@ -4378,16 +4488,20 @@ int xdma_cyclic_transfer_teardown(struct xdma_engine *engine) spin_unlock_irqrestore(&engine->lock, flags); /* wait for engine to be no longer running */ - if (poll_mode) + if (poll_mode) rc = cyclic_shutdown_polled(engine); else rc = cyclic_shutdown_interrupt(engine); + if (rc < 0) { + pr_err("Failed to shutdown cyclic transfers\n"); + return rc; + } /* obtain spin lock to atomically remove resources */ spin_lock_irqsave(&engine->lock, flags); if (engine->cyclic_req) { - xdma_request_free(engine->cyclic_req); + xdma_request_free(engine->cyclic_req); engine->cyclic_req = NULL; } @@ -4413,7 +4527,7 @@ int engine_addrmode_set(struct xdma_engine *engine, unsigned long arg) dbg_perf("IOCTL_XDMA_ADDRMODE_SET\n"); rv = get_user(dst, (int __user *)arg); - if (rv == 0) { + if (rv == 0) { engine->non_incr_addr = !!dst; if (engine->non_incr_addr) write_register(w, &engine->regs->control_w1s, @@ -4428,4 +4542,3 @@ int engine_addrmode_set(struct xdma_engine *engine, unsigned long arg) return rv; } - diff --git a/sdk/linux_kernel_drivers/xdma/libxdma.h b/sdk/linux_kernel_drivers/xdma/libxdma.h old mode 100755 new mode 100644 index 07d016c2..1fbee5aa --- a/sdk/linux_kernel_drivers/xdma/libxdma.h +++ b/sdk/linux_kernel_drivers/xdma/libxdma.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -36,26 +36,32 @@ #include #include +/* + * if the config bar is fixed, the driver does not neeed to search through + * all of the bars + */ +//#define XDMA_CONFIG_BAR_NUM 1 + /* Switch debug printing on/off */ -#define XDMA_DEBUG 0 +#define XDMA_DEBUG 0 /* SECTION: Preprocessor macros/constants */ -#define XDMA_BAR_NUM (6) +#define XDMA_BAR_NUM (6) /* maximum amount of register space to map */ -#define XDMA_BAR_SIZE (0x8000UL) +#define XDMA_BAR_SIZE (0x8000UL) /* Use this definition to poll several times between calls to schedule */ -#define NUM_POLLS_PER_SCHED 100 +#define NUM_POLLS_PER_SCHED 100 -#define XDMA_CHANNEL_NUM_MAX (4) +#define XDMA_CHANNEL_NUM_MAX (4) /* * interrupts per engine, rad2_vul.sv:237 * .REG_IRQ_OUT (reg_irq_from_ch[(channel*2) +: 2]), */ -#define XDMA_ENG_IRQ_NUM (1) -#define MAX_EXTRA_ADJ (15) -#define RX_STATUS_EOP (1) +#define XDMA_ENG_IRQ_NUM (1) +#define MAX_EXTRA_ADJ (0x3F) +#define RX_STATUS_EOP (1) /* Target internal components on XDMA control BAR */ #define XDMA_OFS_INT_CTRL (0x2000UL) @@ -65,7 +71,7 @@ #define XDMA_TRANSFER_MAX_DESC (2048) /* maximum size of a single DMA transfer descriptor */ -#define XDMA_DESC_BLEN_BITS 28 +#define XDMA_DESC_BLEN_BITS 28 #define XDMA_DESC_BLEN_MAX ((1 << (XDMA_DESC_BLEN_BITS)) - 1) /* bits of the SG DMA control register */ @@ -157,7 +163,7 @@ #define XDMA_ID_C2H 0x1fc1U /* for C2H AXI-ST mode */ -#define CYCLIC_RX_PAGES_MAX 256 +#define CYCLIC_RX_PAGES_MAX 256 #define LS_BYTE_MASK 0x000000FFUL @@ -442,7 +448,8 @@ struct xdma_engine { int max_extra_adj; /* descriptor prefetch capability */ int desc_dequeued; /* num descriptors of completed transfers */ u32 status; /* last known status of device */ - u32 interrupt_enable_mask_value;/* only used for MSIX mode to store per-engine interrupt mask value */ + /* only used for MSIX mode to store per-engine interrupt mask value */ + u32 interrupt_enable_mask_value; /* Transfer list management */ struct list_head transfer_list; /* queue of transfers */ @@ -452,6 +459,10 @@ struct xdma_engine { dma_addr_t cyclic_result_bus; /* bus addr for transfer */ struct xdma_request_cb *cyclic_req; struct sg_table cyclic_sgt; + + u8 *perf_buf_virt; + dma_addr_t perf_buf_bus; /* bus address */ + u8 eop_found; /* used only for cyclic(rx:c2h) */ int rx_tail; /* follows the HW */ @@ -473,7 +484,7 @@ struct xdma_engine { u32 irq_bitmask; /* IRQ bit mask for this engine */ struct work_struct work; /* Work queue for interrupt handling */ - spinlock_t desc_lock; /* protects concurrent access */ + struct mutex desc_lock; /* protects concurrent access */ dma_addr_t desc_bus; struct xdma_desc *desc; @@ -490,14 +501,14 @@ struct xdma_user_irq { wait_queue_head_t events_wq; /* wait queue to sync waiting threads */ irq_handler_t handler; - void *dev; + void *dev; }; /* XDMA PCIe device specific book-keeping */ #define XDEV_FLAG_OFFLINE 0x1 struct xdma_dev { struct list_head list_head; - struct list_head rcu_node; + struct list_head rcu_node; unsigned long magic; /* structure ID for sanity checks */ struct pci_dev *pdev; /* pci device struct from probe() */ @@ -509,7 +520,7 @@ struct xdma_dev { unsigned int flags; /* PCIe BAR management */ - void *__iomem bar[XDMA_BAR_NUM]; /* addresses for mapped BARs */ + void __iomem *bar[XDMA_BAR_NUM]; /* addresses for mapped BARs */ int user_bar_idx; /* BAR index of user logic */ int config_bar_idx; /* BAR index of XDMA config logic */ int bypass_bar_idx; /* BAR index of XDMA bypass logic */ @@ -605,8 +616,8 @@ void get_perf_stats(struct xdma_engine *engine); int xdma_cyclic_transfer_setup(struct xdma_engine *engine); int xdma_cyclic_transfer_teardown(struct xdma_engine *engine); -ssize_t xdma_engine_read_cyclic(struct xdma_engine *, char __user *, size_t, - int); +ssize_t xdma_engine_read_cyclic(struct xdma_engine *engine, char __user *buf, + size_t count, int timeout_ms); int engine_addrmode_set(struct xdma_engine *engine, unsigned long arg); #endif /* XDMA_LIB_H */ diff --git a/sdk/linux_kernel_drivers/xdma/libxdma_api.h b/sdk/linux_kernel_drivers/xdma/libxdma_api.h index bf043eb1..d4ed4ec5 100644 --- a/sdk/linux_kernel_drivers/xdma/libxdma_api.h +++ b/sdk/linux_kernel_drivers/xdma/libxdma_api.h @@ -1,12 +1,24 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver + * Copyright(c) 2015 - 2020 Xilinx, Inc. * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "LICENSE". * * Karen Xie - * Leon Woestenberg * ******************************************************************************/ @@ -70,10 +82,7 @@ void xdma_device_close(struct pci_dev *pdev, void *dev_handle); /* * xdma_device_restart - restart the fpga * @pdev: ptr to struct pci_dev - * TODO: - * may need more refining on the parameter list * return < 0 in case of error - * TODO: exact error code will be defined later */ int xdma_device_restart(struct pci_dev *pdev, void *dev_handle); @@ -94,7 +103,6 @@ int xdma_device_restart(struct pci_dev *pdev, void *dev_handle); * @name: to be passed to the handler, ignored if handler is NULL` * @dev: to be passed to the handler, ignored if handler is NULL` * return < 0 in case of error - * TODO: exact error code will be defined later */ int xdma_user_isr_register(void *dev_hndl, unsigned int mask, irq_handler_t handler, void *dev); @@ -104,7 +112,6 @@ int xdma_user_isr_register(void *dev_hndl, unsigned int mask, * @pdev: ptr to the the pci_dev struct * @mask: bitmask of user interrupts (0 ~ 15)to be registered * return < 0 in case of error - * TODO: exact error code will be defined later */ int xdma_user_isr_enable(void *dev_hndl, unsigned int mask); int xdma_user_isr_disable(void *dev_hndl, unsigned int mask); @@ -121,15 +128,8 @@ int xdma_user_isr_disable(void *dev_hndl, unsigned int mask); * @timeout: timeout in mili-seconds, *currently ignored * return # of bytes transfered or * < 0 in case of error - * TODO: exact error code will be defined later */ ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, struct sg_table *sgt, bool dma_mapped, int timeout_ms); - - -/////////////////////missing API//////////////////// - -//xdma_get_channle_state - if no interrupt on DMA hang is available -//xdma_channle_restart #endif diff --git a/sdk/linux_kernel_drivers/xdma/user_defined_interrupts_README.md b/sdk/linux_kernel_drivers/xdma/user_defined_interrupts_README.md index aa8709f1..f8968d99 100644 --- a/sdk/linux_kernel_drivers/xdma/user_defined_interrupts_README.md +++ b/sdk/linux_kernel_drivers/xdma/user_defined_interrupts_README.md @@ -34,7 +34,7 @@ The driver needs to be installed once, regardless of how many FPGA slots are ava The next example shows how an application can register to two events (aka user-defined interrupts) on slot 0 -``` +```C fd4=open(“/dev/xdma0_events_4”, O_RDONLY); fd6=open(“/dev/xdma0_events_6”, O_RDONLY); diff --git a/sdk/linux_kernel_drivers/xdma/version.h b/sdk/linux_kernel_drivers/xdma/version.h old mode 100755 new mode 100644 index 64b91799..5ed57832 --- a/sdk/linux_kernel_drivers/xdma/version.h +++ b/sdk/linux_kernel_drivers/xdma/version.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -21,12 +21,13 @@ * Karen Xie * ******************************************************************************/ + #ifndef __XDMA_VERSION_H__ #define __XDMA_VERSION_H__ -#define DRV_MOD_MAJOR 2017 +#define DRV_MOD_MAJOR 2020 #define DRV_MOD_MINOR 1 -#define DRV_MOD_PATCHLEVEL 47 +#define DRV_MOD_PATCHLEVEL 01 #define DRV_MODULE_VERSION \ __stringify(DRV_MOD_MAJOR) "." \ diff --git a/sdk/linux_kernel_drivers/xdma/xdma_cdev.c b/sdk/linux_kernel_drivers/xdma/xdma_cdev.c index 8a331161..a5c3ac55 100644 --- a/sdk/linux_kernel_drivers/xdma/xdma_cdev.c +++ b/sdk/linux_kernel_drivers/xdma/xdma_cdev.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -21,11 +21,14 @@ * Karen Xie * ******************************************************************************/ + #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ #include "xdma_cdev.h" -struct class *g_xdma_class; +static struct class *g_xdma_class; + +struct kmem_cache *cdev_cache; enum cdev_type { CHAR_USER, @@ -52,12 +55,12 @@ static const char * const devnode_names[] = { }; enum xpdev_flags_bits { - XDF_CDEV_USER, - XDF_CDEV_CTRL, - XDF_CDEV_XVC, - XDF_CDEV_EVENT, - XDF_CDEV_SG, - XDF_CDEV_BYPASS, + XDF_CDEV_USER, + XDF_CDEV_CTRL, + XDF_CDEV_XVC, + XDF_CDEV_EVENT, + XDF_CDEV_SG, + XDF_CDEV_BYPASS, }; static inline void xpdev_flag_set(struct xdma_pci_dev *xpdev, @@ -79,16 +82,18 @@ static inline int xpdev_flag_test(struct xdma_pci_dev *xpdev, } #ifdef __XDMA_SYSFS__ -ssize_t show_device_numbers(struct device *dev, struct device_attribute *attr, - char *buf) +ssize_t xdma_dev_instance_show(struct device *dev, + struct device_attribute *attr, + char *buf) { - struct xdma_pci_dev *xpdev = (struct xdma_pci_dev *)dev_get_drvdata(dev); + struct xdma_pci_dev *xpdev = + (struct xdma_pci_dev *)dev_get_drvdata(dev); return snprintf(buf, PAGE_SIZE, "%d\t%d\n", xpdev->major, xpdev->xdev->idx); } -static DEVICE_ATTR(xdma_dev_instance, S_IRUGO, show_device_numbers, NULL); +static DEVICE_ATTR_RO(xdma_dev_instance); #endif static int config_kobject(struct xdma_cdev *xcdev, enum cdev_type type) @@ -102,7 +107,10 @@ static int config_kobject(struct xdma_cdev *xcdev, enum cdev_type type) case CHAR_XDMA_C2H: case CHAR_BYPASS_H2C: case CHAR_BYPASS_C2H: - BUG_ON(!engine); + if (!engine) { + pr_err("Invalid DMA engine\n"); + return rv; + } rv = kobject_set_name(&xcdev->cdev.kobj, devnode_names[type], xdev->idx, engine->channel); break; @@ -133,22 +141,23 @@ int xcdev_check(const char *fname, struct xdma_cdev *xcdev, bool check_engine) if (!xcdev || xcdev->magic != MAGIC_CHAR) { pr_info("%s, xcdev 0x%p, magic 0x%lx.\n", - fname, xcdev, xcdev ? xcdev->magic : 0xFFFFFFFF); + fname, xcdev, xcdev ? xcdev->magic : 0xFFFFFFFF); return -EINVAL; } - xdev = xcdev->xdev; + xdev = xcdev->xdev; if (!xdev || xdev->magic != MAGIC_DEVICE) { pr_info("%s, xdev 0x%p, magic 0x%lx.\n", - fname, xdev, xdev ? xdev->magic : 0xFFFFFFFF); + fname, xdev, xdev ? xdev->magic : 0xFFFFFFFF); return -EINVAL; } if (check_engine) { - struct xdma_engine *engine = xcdev->engine; + struct xdma_engine *engine = xcdev->engine; + if (!engine || engine->magic != MAGIC_ENGINE) { pr_info("%s, engine 0x%p, magic 0x%lx.\n", fname, - engine, engine ? engine->magic : 0xFFFFFFFF); + engine, engine ? engine->magic : 0xFFFFFFFF); return -EINVAL; } } @@ -162,7 +171,11 @@ int char_open(struct inode *inode, struct file *file) /* pointer to containing structure of the character device inode */ xcdev = container_of(inode->i_cdev, struct xdma_cdev, cdev); - BUG_ON(xcdev->magic != MAGIC_CHAR); + if (xcdev->magic != MAGIC_CHAR) { + pr_err("xcdev 0x%p inode 0x%lx magic mismatch 0x%lx\n", + xcdev, inode->i_ino, xcdev->magic); + return -EINVAL; + } /* create a reference to our char device in the opened file */ file->private_data = xcdev; @@ -177,13 +190,30 @@ int char_close(struct inode *inode, struct file *file) struct xdma_dev *xdev; struct xdma_cdev *xcdev = (struct xdma_cdev *)file->private_data; - BUG_ON(!xcdev); - BUG_ON(xcdev->magic != MAGIC_CHAR); + if (!xcdev) { + pr_err("char device with inode 0x%lx xcdev NULL\n", + inode->i_ino); + return -EINVAL; + } + + if (xcdev->magic != MAGIC_CHAR) { + pr_err("xcdev 0x%p magic mismatch 0x%lx\n", + xcdev, xcdev->magic); + return -EINVAL; + } /* fetch device specific data stored earlier during open */ xdev = xcdev->xdev; - BUG_ON(!xdev); - BUG_ON(xdev->magic != MAGIC_DEVICE); + if (!xdev) { + pr_err("char device with inode 0x%lx xdev NULL\n", + inode->i_ino); + return -EINVAL; + } + + if (xdev->magic != MAGIC_DEVICE) { + pr_err("xdev 0x%p magic mismatch 0x%lx\n", xdev, xdev->magic); + return -EINVAL; + } return 0; } @@ -197,40 +227,52 @@ int char_close(struct inode *inode, struct file *file) static int create_sys_device(struct xdma_cdev *xcdev, enum cdev_type type) { - struct xdma_dev *xdev = xcdev->xdev; - struct xdma_engine *engine = xcdev->engine; - int last_param; + struct xdma_dev *xdev = xcdev->xdev; + struct xdma_engine *engine = xcdev->engine; + int last_param; - if (type == CHAR_EVENTS) - last_param = xcdev->bar; - else - last_param = engine ? engine->channel : 0; + if (type == CHAR_EVENTS) + last_param = xcdev->bar; + else + last_param = engine ? engine->channel : 0; - xcdev->sys_device = device_create(g_xdma_class, &xdev->pdev->dev, - xcdev->cdevno, NULL, devnode_names[type], xdev->idx, - last_param); + xcdev->sys_device = device_create(g_xdma_class, &xdev->pdev->dev, + xcdev->cdevno, NULL, devnode_names[type], xdev->idx, + last_param); - if (!xcdev->sys_device) { - pr_err("device_create(%s) failed\n", devnode_names[type]); - return -1; - } + if (!xcdev->sys_device) { + pr_err("device_create(%s) failed\n", devnode_names[type]); + return -1; + } - return 0; + return 0; } static int destroy_xcdev(struct xdma_cdev *cdev) { if (!cdev) { pr_warn("cdev NULL.\n"); - return 0; + return -EINVAL; } if (cdev->magic != MAGIC_CHAR) { pr_warn("cdev 0x%p magic mismatch 0x%lx\n", cdev, cdev->magic); - return 0; + return -EINVAL; + } + + if (!cdev->xdev) { + pr_err("xdev NULL\n"); + return -EINVAL; + } + + if (!g_xdma_class) { + pr_err("g_xdma_class NULL\n"); + return -EINVAL; + } + + if (!cdev->sys_device) { + pr_err("cdev sys_device NULL\n"); + return -EINVAL; } - BUG_ON(!cdev->xdev); - BUG_ON(!g_xdma_class); - BUG_ON(!cdev->sys_device); if (cdev->sys_device) device_destroy(g_xdma_class, cdev->cdevno); @@ -341,58 +383,91 @@ static int create_xcdev(struct xdma_pci_dev *xpdev, struct xdma_cdev *xcdev, del_cdev: cdev_del(&xcdev->cdev); unregister_region: - unregister_chrdev_region(dev, XDMA_MINOR_COUNT); + unregister_chrdev_region(xcdev->cdevno, XDMA_MINOR_COUNT); return rv; } void xpdev_destroy_interfaces(struct xdma_pci_dev *xpdev) { - int i; - + int i = 0; + int rv; #ifdef __XDMA_SYSFS__ - device_remove_file(&xpdev->pdev->dev, &dev_attr_xdma_dev_instance); + device_remove_file(&xpdev->pdev->dev, &dev_attr_xdma_dev_instance); #endif if (xpdev_flag_test(xpdev, XDF_CDEV_SG)) { /* iterate over channels */ - for (i = 0; i < xpdev->h2c_channel_max; i++) + for (i = 0; i < xpdev->h2c_channel_max; i++) { /* remove SG DMA character device */ - destroy_xcdev(&xpdev->sgdma_h2c_cdev[i]); - for (i = 0; i < xpdev->c2h_channel_max; i++) - destroy_xcdev(&xpdev->sgdma_c2h_cdev[i]); + rv = destroy_xcdev(&xpdev->sgdma_h2c_cdev[i]); + if (rv < 0) + pr_err("Failed to destroy h2c xcdev %d error :0x%x\n", + i, rv); + } + for (i = 0; i < xpdev->c2h_channel_max; i++) { + rv = destroy_xcdev(&xpdev->sgdma_c2h_cdev[i]); + if (rv < 0) + pr_err("Failed to destroy c2h xcdev %d error 0x%x\n", + i, rv); + } } if (xpdev_flag_test(xpdev, XDF_CDEV_EVENT)) { - for (i = 0; i < xpdev->user_max; i++) - destroy_xcdev(&xpdev->events_cdev[i]); + for (i = 0; i < xpdev->user_max; i++) { + rv = destroy_xcdev(&xpdev->events_cdev[i]); + if (rv < 0) + pr_err("Failed to destroy cdev event %d error 0x%x\n", + i, rv); + } } /* remove control character device */ if (xpdev_flag_test(xpdev, XDF_CDEV_CTRL)) { - destroy_xcdev(&xpdev->ctrl_cdev); + rv = destroy_xcdev(&xpdev->ctrl_cdev); + if (rv < 0) + pr_err("Failed to destroy cdev ctrl event %d error 0x%x\n", + i, rv); } /* remove user character device */ if (xpdev_flag_test(xpdev, XDF_CDEV_USER)) { - destroy_xcdev(&xpdev->user_cdev); + rv = destroy_xcdev(&xpdev->user_cdev); + if (rv < 0) + pr_err("Failed to destroy user cdev %d error 0x%x\n", + i, rv); } if (xpdev_flag_test(xpdev, XDF_CDEV_XVC)) { - destroy_xcdev(&xpdev->xvc_cdev); + rv = destroy_xcdev(&xpdev->xvc_cdev); + if (rv < 0) + pr_err("Failed to destroy xvc cdev %d error 0x%x\n", + i, rv); } if (xpdev_flag_test(xpdev, XDF_CDEV_BYPASS)) { /* iterate over channels */ - for (i = 0; i < xpdev->h2c_channel_max; i++) + for (i = 0; i < xpdev->h2c_channel_max; i++) { /* remove DMA Bypass character device */ - destroy_xcdev(&xpdev->bypass_h2c_cdev[i]); - for (i = 0; i < xpdev->c2h_channel_max; i++) - destroy_xcdev(&xpdev->bypass_c2h_cdev[i]); - destroy_xcdev(&xpdev->bypass_cdev_base); + rv = destroy_xcdev(&xpdev->bypass_h2c_cdev[i]); + if (rv < 0) + pr_err("Failed to destroy bypass h2c cdev %d error 0x%x\n", + i, rv); + } + for (i = 0; i < xpdev->c2h_channel_max; i++) { + rv = destroy_xcdev(&xpdev->bypass_c2h_cdev[i]); + if (rv < 0) + pr_err("Failed to destroy bypass c2h %d error 0x%x\n", + i, rv); + } + rv = destroy_xcdev(&xpdev->bypass_cdev_base); + if (rv < 0) + pr_err("Failed to destroy base cdev\n"); } if (xpdev->major) - unregister_chrdev_region(MKDEV(xpdev->major, XDMA_MINOR_BASE), XDMA_MINOR_COUNT); + unregister_chrdev_region( + MKDEV(xpdev->major, XDMA_MINOR_BASE), + XDMA_MINOR_COUNT); } int xpdev_create_interfaces(struct xdma_pci_dev *xpdev) @@ -452,9 +527,8 @@ int xpdev_create_interfaces(struct xdma_pci_dev *xpdev) } xpdev_flag_set(xpdev, XDF_CDEV_SG); - /* ??? Bypass */ /* Initialize Bypass Character Device */ - if (xdev->bypass_bar_idx > 0){ + if (xdev->bypass_bar_idx > 0) { for (i = 0; i < xpdev->h2c_channel_max; i++) { engine = &xdev->engine_h2c[i]; @@ -519,7 +593,7 @@ int xpdev_create_interfaces(struct xdma_pci_dev *xpdev) rv = device_create_file(&xpdev->pdev->dev, &dev_attr_xdma_dev_instance); if (rv) { - pr_err("Failed to create device file \n"); + pr_err("Failed to create device file\n"); goto fail; } #endif @@ -535,10 +609,10 @@ int xpdev_create_interfaces(struct xdma_pci_dev *xpdev) int xdma_cdev_init(void) { g_xdma_class = class_create(THIS_MODULE, XDMA_NODE_NAME); - if (IS_ERR(g_xdma_class)) { - dbg_init(XDMA_NODE_NAME ": failed to create class"); - return -1; - } + if (IS_ERR(g_xdma_class)) { + dbg_init(XDMA_NODE_NAME ": failed to create class"); + return -EINVAL; + } return 0; } diff --git a/sdk/linux_kernel_drivers/xdma/xdma_cdev.h b/sdk/linux_kernel_drivers/xdma/xdma_cdev.h index 47441fca..3361e8eb 100644 --- a/sdk/linux_kernel_drivers/xdma/xdma_cdev.h +++ b/sdk/linux_kernel_drivers/xdma/xdma_cdev.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -21,6 +21,7 @@ * Karen Xie * ******************************************************************************/ + #ifndef __XDMA_CHRDEV_H__ #define __XDMA_CHRDEV_H__ @@ -39,13 +40,13 @@ int xdma_cdev_init(void); int char_open(struct inode *inode, struct file *file); int char_close(struct inode *inode, struct file *file); -int xcdev_check(const char *, struct xdma_cdev *, bool); - +int xcdev_check(const char *fname, struct xdma_cdev *xcdev, bool check_engine); void cdev_ctrl_init(struct xdma_cdev *xcdev); void cdev_xvc_init(struct xdma_cdev *xcdev); void cdev_event_init(struct xdma_cdev *xcdev); void cdev_sgdma_init(struct xdma_cdev *xcdev); void cdev_bypass_init(struct xdma_cdev *xcdev); +long char_ctrl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); void xpdev_destroy_interfaces(struct xdma_pci_dev *xpdev); int xpdev_create_interfaces(struct xdma_pci_dev *xpdev); diff --git a/sdk/linux_kernel_drivers/xdma/xdma_install.md b/sdk/linux_kernel_drivers/xdma/xdma_install.md index db2aaef1..30b440f4 100644 --- a/sdk/linux_kernel_drivers/xdma/xdma_install.md +++ b/sdk/linux_kernel_drivers/xdma/xdma_install.md @@ -66,7 +66,7 @@ __*For Suse*__ __**Step 2**__: Clone the git repo locally under my_fpga_dir for example: ``` - $ mkdir -p + $ mkdir -p $ cd $ git clone https://github.com/aws/aws-fpga ``` @@ -194,12 +194,12 @@ DEVAMI 1.5.0 or Later instances come with preinstalled Xilinx Runtime Environmen lsmod | grep xocl ``` - To Remove XOCL driver + To Remove XRT and XOCL driver ``` - sudo rmmod xocl - + sudo systemctl stop mpd + sudo yum remove -y xrt xrt-aws ``` - XDMA driver install can proceed once XOCL driver is removed. + XDMA driver install can proceed once XRT is removed. diff --git a/sdk/linux_kernel_drivers/xdma/xdma_ioctl.h b/sdk/linux_kernel_drivers/xdma/xdma_ioctl.h deleted file mode 100755 index a250a1de..00000000 --- a/sdk/linux_kernel_drivers/xdma/xdma_ioctl.h +++ /dev/null @@ -1,78 +0,0 @@ -/******************************************************************************* - * - * Xilinx XDMA IP Core Linux Driver - * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. - * - * Karen Xie - * Leon Woestenberg - * - ******************************************************************************/ -#ifndef _XDMA_IOCALLS_POSIX_H_ -#define _XDMA_IOCALLS_POSIX_H_ - -#include - -/* Use 'x' as magic number */ -#define XDMA_IOC_MAGIC 'x' -/* XL OpenCL X->58(ASCII), L->6C(ASCII), O->0 C->C L->6C(ASCII); */ -#define XDMA_XCL_MAGIC 0X586C0C6C - -#define IOCTL_XDMA_PERF_V1 (1) -#define XDMA_ADDRMODE_MEMORY (0) -#define XDMA_ADDRMODE_FIXED (1) - -/* - * S means "Set" through a ptr, - * T means "Tell" directly with the argument value - * G means "Get": reply by setting through a pointer - * Q means "Query": response is on the return value - * X means "eXchange": switch G and S atomically - * H means "sHift": switch T and Q atomically - * - * _IO(type,nr) no arguments - * _IOR(type,nr,datatype) read data from driver - * _IOW(type,nr.datatype) write data to driver - * _IORW(type,nr,datatype) read/write data - * - * _IOC_DIR(nr) returns direction - * _IOC_TYPE(nr) returns magic - * _IOC_NR(nr) returns number - * _IOC_SIZE(nr) returns size - */ - -enum XDMA_IOC_TYPES { - XDMA_IOC_NOP, - XDMA_IOC_INFO, - XDMA_IOC_MAX -}; - -struct xdma_ioc_base { - unsigned int magic; - unsigned int command; -}; - -struct xdma_ioc_info { - struct xdma_ioc_base base; - unsigned short vendor; - unsigned short device; - unsigned short subsystem_vendor; - unsigned short subsystem_device; - unsigned dma_engine_version; - unsigned driver_version; - unsigned long long feature_id; - unsigned short domain; - unsigned char bus; - unsigned char dev; - unsigned char func; -}; - -/* IOCTL codes */ -#define XDMA_IOCINFO _IOWR(XDMA_IOC_MAGIC, XDMA_IOC_INFO, struct xdma_ioc_info) - -#define IOCTL_XDMA_ADDRMODE_SET _IOW('q', 4, int) -#define IOCTL_XDMA_ADDRMODE_GET _IOR('q', 5, int) -#define IOCTL_XDMA_ALIGN_GET _IOR('q', 6, int) - -#endif /* _XDMA_IOCALLS_POSIX_H_ */ diff --git a/sdk/linux_kernel_drivers/xdma/xdma_mod.c b/sdk/linux_kernel_drivers/xdma/xdma_mod.c old mode 100755 new mode 100644 index 3b094322..b9dbfcfe --- a/sdk/linux_kernel_drivers/xdma/xdma_mod.c +++ b/sdk/linux_kernel_drivers/xdma/xdma_mod.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -21,6 +21,7 @@ * Karen Xie * ******************************************************************************/ + #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ #include @@ -35,8 +36,7 @@ #include "version.h" #define DRV_MODULE_NAME "xdma" -#define DRV_MODULE_DESC "Xilinx XDMA Classic Driver" -#define DRV_MODULE_RELDATE "Feb. 2017" +#define DRV_MODULE_DESC "Xilinx XDMA Reference Driver" static char version[] = DRV_MODULE_DESC " " DRV_MODULE_NAME " v" DRV_MODULE_VERSION "\n"; @@ -47,48 +47,52 @@ MODULE_VERSION(DRV_MODULE_VERSION); MODULE_LICENSE("GPL v2"); /* SECTION: Module global variables */ -static int xpdev_cnt = 0; +static int xpdev_cnt; static const struct pci_device_id pci_ids[] = { + { PCI_DEVICE(0x10ee, 0x9048), }, + { PCI_DEVICE(0x10ee, 0x9044), }, + { PCI_DEVICE(0x10ee, 0x9042), }, + { PCI_DEVICE(0x10ee, 0x9041), }, { PCI_DEVICE(0x10ee, 0x903f), }, { PCI_DEVICE(0x10ee, 0x9038), }, { PCI_DEVICE(0x10ee, 0x9028), }, - { PCI_DEVICE(0x10ee, 0x9018), }, + { PCI_DEVICE(0x10ee, 0x9018), }, { PCI_DEVICE(0x10ee, 0x9034), }, { PCI_DEVICE(0x10ee, 0x9024), }, - { PCI_DEVICE(0x10ee, 0x9014), }, + { PCI_DEVICE(0x10ee, 0x9014), }, { PCI_DEVICE(0x10ee, 0x9032), }, { PCI_DEVICE(0x10ee, 0x9022), }, - { PCI_DEVICE(0x10ee, 0x9012), }, + { PCI_DEVICE(0x10ee, 0x9012), }, { PCI_DEVICE(0x10ee, 0x9031), }, { PCI_DEVICE(0x10ee, 0x9021), }, - { PCI_DEVICE(0x10ee, 0x9011), }, + { PCI_DEVICE(0x10ee, 0x9011), }, { PCI_DEVICE(0x10ee, 0x8011), }, { PCI_DEVICE(0x10ee, 0x8012), }, - { PCI_DEVICE(0x10ee, 0x8014), }, - { PCI_DEVICE(0x10ee, 0x8018), }, - { PCI_DEVICE(0x10ee, 0x8021), }, - { PCI_DEVICE(0x10ee, 0x8022), }, - { PCI_DEVICE(0x10ee, 0x8024), }, - { PCI_DEVICE(0x10ee, 0x8028), }, - { PCI_DEVICE(0x10ee, 0x8031), }, - { PCI_DEVICE(0x10ee, 0x8032), }, - { PCI_DEVICE(0x10ee, 0x8034), }, - { PCI_DEVICE(0x10ee, 0x8038), }, - - { PCI_DEVICE(0x10ee, 0x7011), }, - { PCI_DEVICE(0x10ee, 0x7012), }, - { PCI_DEVICE(0x10ee, 0x7014), }, - { PCI_DEVICE(0x10ee, 0x7018), }, - { PCI_DEVICE(0x10ee, 0x7021), }, - { PCI_DEVICE(0x10ee, 0x7022), }, - { PCI_DEVICE(0x10ee, 0x7024), }, + { PCI_DEVICE(0x10ee, 0x8014), }, + { PCI_DEVICE(0x10ee, 0x8018), }, + { PCI_DEVICE(0x10ee, 0x8021), }, + { PCI_DEVICE(0x10ee, 0x8022), }, + { PCI_DEVICE(0x10ee, 0x8024), }, + { PCI_DEVICE(0x10ee, 0x8028), }, + { PCI_DEVICE(0x10ee, 0x8031), }, + { PCI_DEVICE(0x10ee, 0x8032), }, + { PCI_DEVICE(0x10ee, 0x8034), }, + { PCI_DEVICE(0x10ee, 0x8038), }, + + { PCI_DEVICE(0x10ee, 0x7011), }, + { PCI_DEVICE(0x10ee, 0x7012), }, + { PCI_DEVICE(0x10ee, 0x7014), }, + { PCI_DEVICE(0x10ee, 0x7018), }, + { PCI_DEVICE(0x10ee, 0x7021), }, + { PCI_DEVICE(0x10ee, 0x7022), }, + { PCI_DEVICE(0x10ee, 0x7024), }, { PCI_DEVICE(0x10ee, 0x7028), }, - { PCI_DEVICE(0x10ee, 0x7031), }, - { PCI_DEVICE(0x10ee, 0x7032), }, - { PCI_DEVICE(0x10ee, 0x7034), }, - { PCI_DEVICE(0x10ee, 0x7038), }, + { PCI_DEVICE(0x10ee, 0x7031), }, + { PCI_DEVICE(0x10ee, 0x7032), }, + { PCI_DEVICE(0x10ee, 0x7034), }, + { PCI_DEVICE(0x10ee, 0x7038), }, { PCI_DEVICE(0x10ee, 0x6828), }, { PCI_DEVICE(0x10ee, 0x6830), }, @@ -105,13 +109,12 @@ static const struct pci_device_id pci_ids[] = { { PCI_DEVICE(0x10ee, 0x4B28), }, { PCI_DEVICE(0x10ee, 0x2808), }, + { PCI_DEVICE(0x1d0f, 0xf000), }, + { PCI_DEVICE(0x1d0f, 0xf001), }, - { PCI_DEVICE(0x10ee, 0x2808), }, - - { PCI_DEVICE(0x1d0f, 0xf000), }, - { PCI_DEVICE(0x1d0f, 0xf001), }, - { PCI_DEVICE(0x1d0f, 0x1042), }, - +#ifdef INTERNAL_TESTING + { PCI_DEVICE(0x1d0f, 0x1042), 0}, +#endif {0,} }; MODULE_DEVICE_TABLE(pci, pci_ids); @@ -132,7 +135,7 @@ static void xpdev_free(struct xdma_pci_dev *xpdev) static struct xdma_pci_dev *xpdev_alloc(struct pci_dev *pdev) { - struct xdma_pci_dev *xpdev = kmalloc(sizeof(*xpdev), GFP_KERNEL); + struct xdma_pci_dev *xpdev = kmalloc(sizeof(*xpdev), GFP_KERNEL); if (!xpdev) return NULL; @@ -161,12 +164,28 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) hndl = xdma_device_open(DRV_MODULE_NAME, pdev, &xpdev->user_max, &xpdev->h2c_channel_max, &xpdev->c2h_channel_max); - if (!hndl) - return -EINVAL; + if (!hndl) { + rv = -EINVAL; + goto err_out; + } - BUG_ON(xpdev->user_max > MAX_USER_IRQ); - BUG_ON(xpdev->h2c_channel_max > XDMA_CHANNEL_NUM_MAX); - BUG_ON(xpdev->c2h_channel_max > XDMA_CHANNEL_NUM_MAX); + if (xpdev->user_max > MAX_USER_IRQ) { + pr_err("Maximum users limit reached\n"); + rv = -EINVAL; + goto err_out; + } + + if (xpdev->h2c_channel_max > XDMA_CHANNEL_NUM_MAX) { + pr_err("Maximun H2C channel limit reached\n"); + rv = -EINVAL; + goto err_out; + } + + if (xpdev->c2h_channel_max > XDMA_CHANNEL_NUM_MAX) { + pr_err("Maximun C2H channel limit reached\n"); + rv = -EINVAL; + goto err_out; + } if (!xpdev->h2c_channel_max && !xpdev->c2h_channel_max) pr_warn("NO engine found!\n"); @@ -183,9 +202,15 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) xdev = xdev_find_by_pdev(pdev); if (!xdev) { pr_warn("NO xdev found!\n"); - return -EINVAL; + rv = -EINVAL; + goto err_out; + } + + if (hndl != xdev) { + pr_err("xdev handle mismatch\n"); + rv = -EINVAL; + goto err_out; } - BUG_ON(hndl != xdev ); pr_info("%s xdma%d, pdev 0x%p, xdev 0x%p, 0x%p, usr %d, ch %d,%d.\n", dev_name(&pdev->dev), xdev->idx, pdev, xpdev, xdev, @@ -198,11 +223,11 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) if (rv) goto err_out; - dev_set_drvdata(&pdev->dev, xpdev); + dev_set_drvdata(&pdev->dev, xpdev); return 0; -err_out: +err_out: pr_err("pdev 0x%p, err %d.\n", pdev, rv); xpdev_free(xpdev); return rv; @@ -223,7 +248,7 @@ static void remove_one(struct pci_dev *pdev) pdev, xpdev, xpdev->xdev); xpdev_free(xpdev); - dev_set_drvdata(&pdev->dev, NULL); + dev_set_drvdata(&pdev->dev, NULL); } static pci_ers_result_t xdma_error_detected(struct pci_dev *pdev, @@ -274,7 +299,7 @@ static void xdma_error_resume(struct pci_dev *pdev) pci_cleanup_aer_uncorrect_error_status(pdev); } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,13,0) +#if KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE static void xdma_reset_prepare(struct pci_dev *pdev) { struct xdma_pci_dev *xpdev = dev_get_drvdata(&pdev->dev); @@ -291,7 +316,7 @@ static void xdma_reset_done(struct pci_dev *pdev) xdma_device_online(pdev, xpdev->xdev); } -#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) +#elif KERNEL_VERSION(3, 16, 0) <= LINUX_VERSION_CODE static void xdma_reset_notify(struct pci_dev *pdev, bool prepare) { struct xdma_pci_dev *xpdev = dev_get_drvdata(&pdev->dev); @@ -309,10 +334,10 @@ static const struct pci_error_handlers xdma_err_handler = { .error_detected = xdma_error_detected, .slot_reset = xdma_slot_reset, .resume = xdma_error_resume, -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,13,0) +#if KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE .reset_prepare = xdma_reset_prepare, .reset_done = xdma_reset_done, -#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) +#elif KERNEL_VERSION(3, 16, 0) <= LINUX_VERSION_CODE .reset_notify = xdma_reset_notify, #endif }; @@ -328,8 +353,6 @@ static struct pci_driver pci_driver = { static int __init xdma_mod_init(void) { int rv; - extern unsigned int desc_blen_max; - extern unsigned int sgdma_timeout; pr_info("%s", version); diff --git a/sdk/linux_kernel_drivers/xdma/xdma_mod.h b/sdk/linux_kernel_drivers/xdma/xdma_mod.h old mode 100755 new mode 100644 index 0ede7a08..abea67ee --- a/sdk/linux_kernel_drivers/xdma/xdma_mod.h +++ b/sdk/linux_kernel_drivers/xdma/xdma_mod.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. + * Copyright(c) 2015 - 2020 Xilinx, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -21,6 +21,7 @@ * Karen Xie * ******************************************************************************/ + #ifndef __XDMA_MODULE_H__ #define __XDMA_MODULE_H__ @@ -48,6 +49,7 @@ #include #include #include +#include #include "libxdma.h" @@ -56,6 +58,9 @@ #define MAGIC_CHAR 0xCCCCCCCCUL #define MAGIC_BITSTREAM 0xBBBBBBBBUL +extern unsigned int desc_blen_max; +extern unsigned int sgdma_timeout; + struct xdma_cdev { unsigned long magic; /* structure ID for sanity checks */ struct xdma_pci_dev *xpdev; diff --git a/sdk/linux_kernel_drivers/xocl/10-xocl.rules b/sdk/linux_kernel_drivers/xocl/10-xocl.rules deleted file mode 100644 index 297a72b0..00000000 --- a/sdk/linux_kernel_drivers/xocl/10-xocl.rules +++ /dev/null @@ -1 +0,0 @@ -KERNEL=="renderD*",MODE="0666" diff --git a/sdk/linux_kernel_drivers/xocl/LICENSE b/sdk/linux_kernel_drivers/xocl/LICENSE deleted file mode 100644 index d159169d..00000000 --- a/sdk/linux_kernel_drivers/xocl/LICENSE +++ /dev/null @@ -1,339 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Lesser General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. diff --git a/sdk/linux_kernel_drivers/xocl/Makefile b/sdk/linux_kernel_drivers/xocl/Makefile deleted file mode 100644 index 9a334137..00000000 --- a/sdk/linux_kernel_drivers/xocl/Makefile +++ /dev/null @@ -1,70 +0,0 @@ -# Amazon FPGA Hardware Development Kit -# -# Copyright 2016-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Amazon Software License (the "License"). You may not use -# this file except in compliance with the License. A copy of the License is -# located at -# -# http://aws.amazon.com/asl/ -# -# or in the "license" file accompanying this file. This file is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or -# implied. See the License for the specific language governing permissions and -# limitations under the License. - -XOCL_DIR = $(shell pwd) - -obj-m += xocl.o -ccflags-y := -Iinclude/drm - -xocl-y := \ - xocl_sysfs.o \ - xocl_bo.o \ - xocl_drv.o \ - xocl_xdma.o \ - xocl_ioctl.o \ - xocl_test.o \ - xocl_ctx.o \ - xocl_xvc.o \ - xocl_exec.o \ - libxdma.o - -CONFIG_MODULE_SIG=n -KERNELDIR ?= /lib/modules/$(shell uname -r)/build - -PWD := $(shell pwd) -ROOT := $(dir $(M)) -XILINXINCLUDE := -I$(SDACCEL_DIR)/userspace/include -I$(XOCL_DIR) -XILINXINCLUDE += -I$(XOCL_DIR)/../xdma/ - -all: - echo "include: $(XILINXINCLUDE)" - echo "sdaccel_dir: $(SDACCEL_DIR)" - echo "ROOT: $(ROOT)" - echo "XOCL_DIR: $(XOCL_DIR)" - $(MAKE) -C $(KERNELDIR) M=$(PWD) modules - -install: all - $(MAKE) -C $(KERNELDIR) M=$(PWD) modules_install - depmod -a - install -m 644 10-xocl.rules /etc/udev/rules.d - -rmmod -s xocl || true - -rmmod -s xdma || true - -rmmod -s edma_drv || true - -modprobe xocl - -clean: - rm -rf *.o *.o.d *~ core .depend .*.cmd *.ko *.ko.unsigned *.mod.c .tmp_versions *.symvers .#* *.save *.bak Modules.* modules.order Module.markers *.bin - - -CFLAGS_xocl_xdma.o := $(XILINXINCLUDE) -CFLAGS_xocl_sysfs.o := $(XILINXINCLUDE) -CFLAGS_xocl_bo.o := $(XILINXINCLUDE) -CFLAGS_xocl_drv.o := $(XILINXINCLUDE) -CFLAGS_xocl_ioctl.o := $(XILINXINCLUDE) -CFLAGS_xocl_test.o := $(XILINXINCLUDE) -CFLAGS_xocl_ctx.o := $(XILINXINCLUDE) -CFLAGS_xocl_exec.o := $(XILINXINCLUDE) -CFLAGS_xocl_xvc.o := $(XILINXINCLUDE) -CFLAGS_libxdma.o := $(XILINXINCLUDE) diff --git a/sdk/linux_kernel_drivers/xocl/cdev_sgdma.h b/sdk/linux_kernel_drivers/xocl/cdev_sgdma.h deleted file mode 100644 index d3700260..00000000 --- a/sdk/linux_kernel_drivers/xocl/cdev_sgdma.h +++ /dev/null @@ -1,79 +0,0 @@ -/******************************************************************************* - * - * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - * - * The full GNU General Public License is included in this distribution in - * the file called "LICENSE". - * - * Karen Xie - * - ******************************************************************************/ -#ifndef _XDMA_IOCALLS_POSIX_H_ -#define _XDMA_IOCALLS_POSIX_H_ - -#include - - -#define IOCTL_XDMA_PERF_V1 (1) -#define XDMA_ADDRMODE_MEMORY (0) -#define XDMA_ADDRMODE_FIXED (1) - -/* - * S means "Set" through a ptr, - * T means "Tell" directly with the argument value - * G means "Get": reply by setting through a pointer - * Q means "Query": response is on the return value - * X means "eXchange": switch G and S atomically - * H means "sHift": switch T and Q atomically - * - * _IO(type,nr) no arguments - * _IOR(type,nr,datatype) read data from driver - * _IOW(type,nr.datatype) write data to driver - * _IORW(type,nr,datatype) read/write data - * - * _IOC_DIR(nr) returns direction - * _IOC_TYPE(nr) returns magic - * _IOC_NR(nr) returns number - * _IOC_SIZE(nr) returns size - */ - -struct xdma_performance_ioctl -{ - /* IOCTL_XDMA_IOCTL_Vx */ - uint32_t version; - uint32_t transfer_size; - /* measurement */ - uint32_t stopped; - uint32_t iterations; - uint64_t clock_cycle_count; - uint64_t data_cycle_count; - uint64_t pending_count; -}; - - - -/* IOCTL codes */ - -#define IOCTL_XDMA_PERF_START _IOW('q', 1, struct xdma_performance_ioctl *) -#define IOCTL_XDMA_PERF_STOP _IOW('q', 2, struct xdma_performance_ioctl *) -#define IOCTL_XDMA_PERF_GET _IOR('q', 3, struct xdma_performance_ioctl *) -#define IOCTL_XDMA_ADDRMODE_SET _IOW('q', 4, int) -#define IOCTL_XDMA_ADDRMODE_GET _IOR('q', 5, int) -#define IOCTL_XDMA_ALIGN_GET _IOR('q', 6, int) - -#endif /* _XDMA_IOCALLS_POSIX_H_ */ - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/ert.h b/sdk/linux_kernel_drivers/xocl/ert.h deleted file mode 100644 index 6b5c5bda..00000000 --- a/sdk/linux_kernel_drivers/xocl/ert.h +++ /dev/null @@ -1,310 +0,0 @@ -/** - * Copyright (C) 2018 Xilinx, Inc - * - * This file is dual licensed. It may be redistributed and/or modified - * under the terms of the Apache 2.0 License OR version 2 of the GNU - * General Public License. - * - * Apache License Verbiage - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * GPL license Verbiage: - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. This program is - * distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - * License for more details. You should have received a copy of the - * GNU General Public License along with this program; if not, write - * to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, - * Boston, MA 02111-1307 USA - * - */ - -/** - * Xilinx SDAccel Embedded Runtime definition - * Copyright (C) 2018, Xilinx Inc - All rights reserved - * - * This file is dual licensed. It may be redistributed and/or modified - * under the terms of the Apache 2.0 License OR version 2 of the GNU - * General Public License. - */ - -#ifndef _ERT_H_ -#define _ERT_H_ - -#if defined(__KERNEL__) -# include -#else -# include -#endif - -/** - * ERT generic packet format - * - * @state: [3-0] current state of a command - * @custom: [11-4] custom per specific commands - * @count: [22-12] number of words in payload (data) - * @opcode: [27-23] opcode identifying specific command - * @type: [31-27] type of command (currently 0) - * @data: count number of words representing packet payload - */ -struct ert_packet { - union { - struct { - uint32_t state:4; /* [3-0] */ - uint32_t custom:8; /* [11-4] */ - uint32_t count:11; /* [22-12] */ - uint32_t opcode:5; /* [27-23] */ - uint32_t type:4; /* [31-27] */ - }; - uint32_t header; - }; - uint32_t data[1]; /* count number of words */ -}; - -/** - * ERT start kernel command format - * - * @state: [3-0] current state of a command - * @extra_cu_masks: [11-10] extra CU masks in addition to mandatory mask - * @count: [22-12] number of words in payload (data) - * @opcode: [27-23] 0, opcode for start_kernel - * @type: [31-27] 0, type of start_kernel - * - * @cu_mask: first mandatory CU mask - * @data: count number of words representing command payload - * - * The packet payload is comprised of 1 mandatory CU mask plus - * extra_cu_masks per header field, followed a CU register map of size - * (count - (1 + extra_cu_masks)) uint32_t words. - */ -struct ert_start_kernel_cmd { - union { - struct { - uint32_t state:4; /* [3-0] */ - uint32_t unused:6; /* [9-4] */ - uint32_t extra_cu_masks:2; /* [11-10] */ - uint32_t count:11; /* [22-12] */ - uint32_t opcode:5; /* [27-23] */ - uint32_t type:4; /* [31-27] */ - }; - uint32_t header; - }; - - /* payload */ - uint32_t cu_mask; /* mandatory cu mask */ - uint32_t data[1]; /* count-1 number of words */ -}; - -/** - * ERT configure command format - * - * @state: [3-0] current state of a command - * @count: [22-12] 5, number of words in payload - * @opcode: [27-23] 1, opcode for configure - * @type: [31-27] 0, type of configure - * - * @slot_size: command queue slot size - * @num_cus: number of compute units in program - * @cu_shift: shift value to convert CU idx to CU addr - * @cu_base_addr: base address to add to CU addr for actual physical address - * - * @ert:1 enable embedded HW scheduler - * @polling:1 poll for command completion - * @cu_dma:1 enable CUDMA custom module for HW scheduler - * @cu_isr:1 enable CUISR custom module for HW scheduler - * @cq_int:1 enable interrupt from host to HW scheduler - */ -struct ert_configure_cmd { - union { - struct { - uint32_t state:4; /* [3-0] */ - uint32_t unused:8; /* [11-4] */ - uint32_t count:11; /* [22-12] */ - uint32_t opcode:5; /* [27-23] */ - uint32_t type:4; /* [31-27] */ - }; - uint32_t header; - }; - - /* payload */ - uint32_t slot_size; - uint32_t num_cus; - uint32_t cu_shift; - uint32_t cu_base_addr; - - /* features */ - uint32_t ert:1; - uint32_t polling:1; - uint32_t cu_dma:1; - uint32_t cu_isr:1; - uint32_t cq_int:1; - uint32_t unusedf:27; -}; - -/** - * ERT command state - * - * @ERT_CMD_STATE_NEW: Set by host before submitting a command to scheduler - * @ERT_CMD_STATE_QUEUED: Internal scheduler state - * @ERT_CMD_STATE_RUNNING: Internal scheduler state - * @ERT_CMD_STATE_COMPLETE: Set by scheduler when command completes - * @ERT_CMD_STATE_ERROR: Set by scheduler if command failed - * @ERT_CMD_STATE_ABORT: Set by scheduler if command abort - */ -enum ert_cmd_state { - ERT_CMD_STATE_NEW = 1, - ERT_CMD_STATE_QUEUED = 2, - ERT_CMD_STATE_RUNNING = 3, - ERT_CMD_STATE_COMPLETED = 4, - ERT_CMD_STATE_ERROR = 5, - ERT_CMD_STATE_ABORT = 6, -}; - -/** - * Opcode types for commands - * - * @ERT_START_CU: start a workgroup on a CU - * @ERT_START_KERNEL: currently aliased to ERT_START_CU - * @ERT_CONFIGURE: configure command scheduler - */ -enum ert_cmd_opcode { - ERT_START_CU = 0, - ERT_START_KERNEL = 0, - ERT_CONFIGURE = 1, -}; - -/** - * Address constants per spec - */ -#define ERT_WORD_SIZE 4 /* 4 bytes */ -#define ERT_CQ_SIZE 0x10000 /* 64K */ -#define ERT_CQ_BASE_ADDR 0x190000 -#define ERT_CSR_ADDR 0x180000 - -/** - * The STATUS REGISTER is for communicating completed CQ slot indices - * MicroBlaze write, host reads. MB(W) / HOST(COR) - */ -#define ERT_STATUS_REGISTER_ADDR (ERT_CSR_ADDR) -#define ERT_STATUS_REGISTER_ADDR0 (ERT_CSR_ADDR) -#define ERT_STATUS_REGISTER_ADDR1 (ERT_CSR_ADDR + 0x4) -#define ERT_STATUS_REGISTER_ADDR2 (ERT_CSR_ADDR + 0x8) -#define ERT_STATUS_REGISTER_ADDR3 (ERT_CSR_ADDR + 0xC) - -/** - * The CU DMA REGISTER is for communicating which CQ slot is to be started - * on a specific CU. MB selects a free CU on which the command can - * run, then writes the 1< - * Leon Woestenberg - * - ******************************************************************************/ -#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ - -#include -#include -#include -#include -#include -#include -#include - -#include "libxdma.h" -#include "libxdma_api.h" -#include "cdev_sgdma.h" - -/* SECTION: Module licensing */ - -#ifdef __LIBXDMA_MOD__ -#include "version.h" -#define DRV_MODULE_NAME "libxdma" -#define DRV_MODULE_DESC "Xilinx XDMA Base Driver" -#define DRV_MODULE_RELDATE "Feb. 2017" - -static char version[] = - DRV_MODULE_DESC " " DRV_MODULE_NAME " v" DRV_MODULE_VERSION "\n"; - -MODULE_AUTHOR("Xilinx, Inc."); -MODULE_DESCRIPTION(DRV_MODULE_DESC); -MODULE_VERSION(DRV_MODULE_VERSION); -MODULE_LICENSE("GPL v2"); -#endif - -/* Module Parameters */ -static unsigned int poll_mode; -module_param(poll_mode, uint, 0644); -MODULE_PARM_DESC(poll_mode, "Set 1 for hw polling, default is 0 (interrupts)"); - -static unsigned int interrupt_mode; -module_param(interrupt_mode, uint, 0644); -MODULE_PARM_DESC(interrupt_mode, "0 - MSI-x , 1 - MSI, 2 - Legacy"); - -static unsigned int enable_credit_mp; -module_param(enable_credit_mp, uint, 0644); -MODULE_PARM_DESC(enable_credit_mp, "Set 1 to enable creidt feature, default is 0 (no credit control)"); - -/* - * xdma device management - * maintains a list of the xdma devices - */ -static LIST_HEAD(xdev_list); -static DEFINE_MUTEX(xdev_mutex); - -static LIST_HEAD(xdev_rcu_list); -static DEFINE_SPINLOCK(xdev_rcu_lock); - -#ifndef list_last_entry -#define list_last_entry(ptr, type, member) \ - list_entry((ptr)->prev, type, member) -#endif - -static inline void xdev_list_add(struct xdma_dev *xdev) -{ - mutex_lock(&xdev_mutex); - if (list_empty(&xdev_list)) - xdev->idx = 0; - else { - struct xdma_dev *last; - - last = list_last_entry(&xdev_list, struct xdma_dev, list_head); - xdev->idx = last->idx + 1; - } - list_add_tail(&xdev->list_head, &xdev_list); - mutex_unlock(&xdev_mutex); - - dbg_init("dev %s, xdev 0x%p, xdma idx %d.\n", - dev_name(&xdev->pdev->dev), xdev, xdev->idx); - - spin_lock(&xdev_rcu_lock); - list_add_tail_rcu(&xdev->rcu_node, &xdev_rcu_list); - spin_unlock(&xdev_rcu_lock); -} - -#undef list_last_entry - -static inline void xdev_list_remove(struct xdma_dev *xdev) -{ - mutex_lock(&xdev_mutex); - list_del(&xdev->list_head); - mutex_unlock(&xdev_mutex); - - spin_lock(&xdev_rcu_lock); - list_del_rcu(&xdev->rcu_node); - spin_unlock(&xdev_rcu_lock); - synchronize_rcu(); -} - -struct xdma_dev *xdev_find_by_pdev(struct pci_dev *pdev) -{ - struct xdma_dev *xdev, *tmp; - - mutex_lock(&xdev_mutex); - list_for_each_entry_safe(xdev, tmp, &xdev_list, list_head) { - if (xdev->pdev == pdev) { - mutex_unlock(&xdev_mutex); - return xdev; - } - } - mutex_unlock(&xdev_mutex); - return NULL; -} -EXPORT_SYMBOL_GPL(xdev_find_by_pdev); - -static inline int debug_check_dev_hndl(const char *fname, struct pci_dev *pdev, - void *hndl) -{ - struct xdma_dev *xdev; - - if (!pdev) - return -EINVAL; - - xdev = xdev_find_by_pdev(pdev); - if (!xdev) { - pr_info("%s pdev 0x%p, hndl 0x%p, NO match found!\n", - fname, pdev, hndl); - return -EINVAL; - } - if (xdev != hndl) { - pr_err("%s pdev 0x%p, hndl 0x%p != 0x%p!\n", - fname, pdev, hndl, xdev); - return -EINVAL; - } - - return 0; -} - -#ifdef __LIBXDMA_DEBUG__ -/* SECTION: Function definitions */ -inline void __write_register(const char *fn, u32 value, void *iomem, unsigned long off) -{ - pr_err("%s: w reg 0x%lx(0x%p), 0x%x.\n", fn, off, iomem, value); - iowrite32(value, iomem); -} -#define write_register(v,mem,off) __write_register(__func__, v, mem, off) -#else -#define write_register(v,mem,off) iowrite32(v, mem) -#endif - -inline u32 read_register(void *iomem) -{ - return ioread32(iomem); -} - -static inline u32 build_u32(u32 hi, u32 lo) -{ - return ((hi & 0xFFFFUL) << 16) | (lo & 0xFFFFUL); -} - -static inline u64 build_u64(u64 hi, u64 lo) -{ - return ((hi & 0xFFFFFFFULL) << 32) | (lo & 0xFFFFFFFFULL); -} - -static void check_nonzero_interrupt_status(struct xdma_dev *xdev) -{ - struct interrupt_regs *reg = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - u32 w; - - w = read_register(®->user_int_enable); - if (w) - pr_info("%s xdma%d user_int_enable = 0x%08x\n", - dev_name(&xdev->pdev->dev), xdev->idx, w); - - w = read_register(®->channel_int_enable); - if (w) - pr_info("%s xdma%d channel_int_enable = 0x%08x\n", - dev_name(&xdev->pdev->dev), xdev->idx, w); - - w = read_register(®->user_int_request); - if (w) - pr_info("%s xdma%d user_int_request = 0x%08x\n", - dev_name(&xdev->pdev->dev), xdev->idx, w); - w = read_register(®->channel_int_request); - if (w) - pr_info("%s xdma%d channel_int_request = 0x%08x\n", - dev_name(&xdev->pdev->dev), xdev->idx, w); - - w = read_register(®->user_int_pending); - if (w) - pr_info("%s xdma%d user_int_pending = 0x%08x\n", - dev_name(&xdev->pdev->dev), xdev->idx, w); - w = read_register(®->channel_int_pending); - if (w) - pr_info("%s xdma%d channel_int_pending = 0x%08x\n", - dev_name(&xdev->pdev->dev), xdev->idx, w); -} - -/* channel_interrupts_enable -- Enable interrupts we are interested in */ -static void channel_interrupts_enable(struct xdma_dev *xdev, u32 mask) -{ - struct interrupt_regs *reg = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - - write_register(mask, ®->channel_int_enable_w1s, XDMA_OFS_INT_CTRL); -} - -/* channel_interrupts_disable -- Disable interrupts we not interested in */ -static void channel_interrupts_disable(struct xdma_dev *xdev, u32 mask) -{ - struct interrupt_regs *reg = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - - write_register(mask, ®->channel_int_enable_w1c, XDMA_OFS_INT_CTRL); -} - -/* user_interrupts_enable -- Enable interrupts we are interested in */ -static void user_interrupts_enable(struct xdma_dev *xdev, u32 mask) -{ - struct interrupt_regs *reg = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - - write_register(mask, ®->user_int_enable_w1s, XDMA_OFS_INT_CTRL); -} - -/* user_interrupts_disable -- Disable interrupts we not interested in */ -static void user_interrupts_disable(struct xdma_dev *xdev, u32 mask) -{ - struct interrupt_regs *reg = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - - write_register(mask, ®->user_int_enable_w1c, XDMA_OFS_INT_CTRL); -} - -/* read_interrupts -- Print the interrupt controller status */ -static u32 read_interrupts(struct xdma_dev *xdev) -{ - struct interrupt_regs *reg = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + XDMA_OFS_INT_CTRL); - u32 lo; - u32 hi; - - /* extra debugging; inspect complete engine set of registers */ - hi = read_register(®->user_int_request); - dbg_io("ioread32(0x%p) returned 0x%08x (user_int_request).\n", - ®->user_int_request, hi); - lo = read_register(®->channel_int_request); - dbg_io("ioread32(0x%p) returned 0x%08x (channel_int_request)\n", - ®->channel_int_request, lo); - - /* return interrupts: user in upper 16-bits, channel in lower 16-bits */ - return build_u32(hi, lo); -} - -void enable_perf(struct xdma_engine *engine) -{ - u32 w; - - w = XDMA_PERF_CLEAR; - write_register(w, &engine->regs->perf_ctrl, - (unsigned long)(&engine->regs->perf_ctrl) - - (unsigned long)(&engine->regs)); - read_register(&engine->regs->identifier); - w = XDMA_PERF_AUTO | XDMA_PERF_RUN; - write_register(w, &engine->regs->perf_ctrl, - (unsigned long)(&engine->regs->perf_ctrl) - - (unsigned long)(&engine->regs)); - read_register(&engine->regs->identifier); - - dbg_perf("IOCTL_XDMA_PERF_START\n"); - -} -EXPORT_SYMBOL_GPL(enable_perf); - -void get_perf_stats(struct xdma_engine *engine) -{ - u32 hi; - u32 lo; - - BUG_ON(!engine); - BUG_ON(!engine->xdma_perf); - - hi = 0; - lo = read_register(&engine->regs->completed_desc_count); - engine->xdma_perf->iterations = build_u64(hi, lo); - - hi = read_register(&engine->regs->perf_cyc_hi); - lo = read_register(&engine->regs->perf_cyc_lo); - - engine->xdma_perf->clock_cycle_count = build_u64(hi, lo); - - hi = read_register(&engine->regs->perf_dat_hi); - lo = read_register(&engine->regs->perf_dat_lo); - engine->xdma_perf->data_cycle_count = build_u64(hi, lo); - - hi = read_register(&engine->regs->perf_pnd_hi); - lo = read_register(&engine->regs->perf_pnd_lo); - engine->xdma_perf->pending_count = build_u64(hi, lo); -} -EXPORT_SYMBOL_GPL(get_perf_stats); - -static void engine_reg_dump(struct xdma_engine *engine) -{ - u32 w; - - BUG_ON(!engine); - - w = read_register(&engine->regs->identifier); - pr_info("%s: ioread32(0x%p) = 0x%08x (id).\n", - engine->name, &engine->regs->identifier, w); - w &= BLOCK_ID_MASK; - if (w != BLOCK_ID_HEAD) { - pr_info("%s: engine id missing, 0x%08x exp. & 0x%x = 0x%x\n", - engine->name, w, BLOCK_ID_MASK, BLOCK_ID_HEAD); - return; - } - /* extra debugging; inspect complete engine set of registers */ - w = read_register(&engine->regs->status); - pr_info("%s: ioread32(0x%p) = 0x%08x (status).\n", - engine->name, &engine->regs->status, w); - w = read_register(&engine->regs->control); - pr_info("%s: ioread32(0x%p) = 0x%08x (control)\n", - engine->name, &engine->regs->control, w); - w = read_register(&engine->sgdma_regs->first_desc_lo); - pr_info("%s: ioread32(0x%p) = 0x%08x (first_desc_lo)\n", - engine->name, &engine->sgdma_regs->first_desc_lo, w); - w = read_register(&engine->sgdma_regs->first_desc_hi); - pr_info("%s: ioread32(0x%p) = 0x%08x (first_desc_hi)\n", - engine->name, &engine->sgdma_regs->first_desc_hi, w); - w = read_register(&engine->sgdma_regs->first_desc_adjacent); - pr_info("%s: ioread32(0x%p) = 0x%08x (first_desc_adjacent).\n", - engine->name, &engine->sgdma_regs->first_desc_adjacent, w); - w = read_register(&engine->regs->completed_desc_count); - pr_info("%s: ioread32(0x%p) = 0x%08x (completed_desc_count).\n", - engine->name, &engine->regs->completed_desc_count, w); - w = read_register(&engine->regs->interrupt_enable_mask); - pr_info("%s: ioread32(0x%p) = 0x%08x (interrupt_enable_mask)\n", - engine->name, &engine->regs->interrupt_enable_mask, w); -} - -/** - * engine_status_read() - read status of SG DMA engine (optionally reset) - * - * Stores status in engine->status. - * - * @return -1 on failure, status register otherwise - */ -static void engine_status_dump(struct xdma_engine *engine) -{ - u32 v = engine->status; - char buffer[256]; - char *buf = buffer; - int len = 0; - - len = sprintf(buf, "SG engine %s status: 0x%08x: ", engine->name, v); - - if ((v & XDMA_STAT_BUSY)) - len += sprintf(buf + len, "BUSY,"); - if ((v & XDMA_STAT_DESC_STOPPED)) - len += sprintf(buf + len, "DESC_STOPPED,"); - if ((v & XDMA_STAT_DESC_COMPLETED)) - len += sprintf(buf + len, "DESC_COMPL,"); - - /* common H2C & C2H */ - if ((v & XDMA_STAT_COMMON_ERR_MASK)) { - if ((v & XDMA_STAT_ALIGN_MISMATCH)) - len += sprintf(buf + len, "ALIGN_MISMATCH "); - if ((v & XDMA_STAT_MAGIC_STOPPED)) - len += sprintf(buf + len, "MAGIC_STOPPED "); - if ((v & XDMA_STAT_INVALID_LEN)) - len += sprintf(buf + len, "INVLIAD_LEN "); - if ((v & XDMA_STAT_IDLE_STOPPED)) - len += sprintf(buf + len, "IDLE_STOPPED "); - buf[len - 1] = ','; - } - - if ((engine->dir == DMA_TO_DEVICE)) { - /* H2C only */ - if ((v & XDMA_STAT_H2C_R_ERR_MASK)) { - len += sprintf(buf + len, "R:"); - if ((v & XDMA_STAT_H2C_R_UNSUPP_REQ)) - len += sprintf(buf + len, "UNSUPP_REQ "); - if ((v & XDMA_STAT_H2C_R_COMPL_ABORT)) - len += sprintf(buf + len, "COMPL_ABORT "); - if ((v & XDMA_STAT_H2C_R_PARITY_ERR)) - len += sprintf(buf + len, "PARITY "); - if ((v & XDMA_STAT_H2C_R_HEADER_EP)) - len += sprintf(buf + len, "HEADER_EP "); - if ((v & XDMA_STAT_H2C_R_UNEXP_COMPL)) - len += sprintf(buf + len, "UNEXP_COMPL "); - buf[len - 1] = ','; - } - - if ((v & XDMA_STAT_H2C_W_ERR_MASK)) { - len += sprintf(buf + len, "W:"); - if ((v & XDMA_STAT_H2C_W_DECODE_ERR)) - len += sprintf(buf + len, "DECODE_ERR "); - if ((v & XDMA_STAT_H2C_W_SLAVE_ERR)) - len += sprintf(buf + len, "SLAVE_ERR "); - buf[len - 1] = ','; - } - - } else { - /* C2H only */ - if ((v & XDMA_STAT_C2H_R_ERR_MASK)) { - len += sprintf(buf + len, "R:"); - if ((v & XDMA_STAT_C2H_R_DECODE_ERR)) - len += sprintf(buf + len, "DECODE_ERR "); - if ((v & XDMA_STAT_C2H_R_SLAVE_ERR)) - len += sprintf(buf + len, "SLAVE_ERR "); - buf[len - 1] = ','; - } - } - - /* common H2C & C2H */ - if ((v & XDMA_STAT_DESC_ERR_MASK)) { - len += sprintf(buf + len, "DESC_ERR:"); - if ((v & XDMA_STAT_DESC_UNSUPP_REQ)) - len += sprintf(buf + len, "UNSUPP_REQ "); - if ((v & XDMA_STAT_DESC_COMPL_ABORT)) - len += sprintf(buf + len, "COMPL_ABORT "); - if ((v & XDMA_STAT_DESC_PARITY_ERR)) - len += sprintf(buf + len, "PARITY "); - if ((v & XDMA_STAT_DESC_HEADER_EP)) - len += sprintf(buf + len, "HEADER_EP "); - if ((v & XDMA_STAT_DESC_UNEXP_COMPL)) - len += sprintf(buf + len, "UNEXP_COMPL "); - buf[len - 1] = ','; - } - - buf[len - 1] = '\0'; - pr_info("%s\n", buffer); -} - -static u32 engine_status_read(struct xdma_engine *engine, bool clear, bool dump) -{ - u32 value; - - BUG_ON(!engine); - - if (dump) - engine_reg_dump(engine); - - /* read status register */ - if (clear) - value = engine->status = - read_register(&engine->regs->status_rc); - else - value = engine->status = read_register(&engine->regs->status); - - if (dump) - engine_status_dump(engine); - - return value; -} - -/** - * xdma_engine_stop() - stop an SG DMA engine - * - */ -static void xdma_engine_stop(struct xdma_engine *engine) -{ - u32 w; - - BUG_ON(!engine); - dbg_tfr("xdma_engine_stop(engine=%p)\n", engine); - - w = 0; - w |= (u32)XDMA_CTRL_IE_DESC_ALIGN_MISMATCH; - w |= (u32)XDMA_CTRL_IE_MAGIC_STOPPED; - w |= (u32)XDMA_CTRL_IE_READ_ERROR; - w |= (u32)XDMA_CTRL_IE_DESC_ERROR; - - if (poll_mode) { - w |= (u32) XDMA_CTRL_POLL_MODE_WB; - } else { - w |= (u32)XDMA_CTRL_IE_DESC_STOPPED; - w |= (u32)XDMA_CTRL_IE_DESC_COMPLETED; - - /* Disable IDLE STOPPED for MM */ - if ((engine->streaming && (engine->dir == DMA_FROM_DEVICE)) || - (engine->xdma_perf)) - w |= (u32)XDMA_CTRL_IE_IDLE_STOPPED; - } - - dbg_tfr("Stopping SG DMA %s engine; writing 0x%08x to 0x%p.\n", - engine->name, w, (u32 *)&engine->regs->control); - write_register(w, &engine->regs->control, - (unsigned long)(&engine->regs->control) - - (unsigned long)(&engine->regs)); - /* dummy read of status register to flush all previous writes */ - dbg_tfr("xdma_engine_stop(%s) done\n", engine->name); -} - -static void engine_start_mode_config(struct xdma_engine *engine) -{ - u32 w; - - BUG_ON(!engine); - - /* If a perf test is running, enable the engine interrupts */ - if (engine->xdma_perf) { - w = XDMA_CTRL_IE_DESC_STOPPED; - w |= XDMA_CTRL_IE_DESC_COMPLETED; - w |= XDMA_CTRL_IE_DESC_ALIGN_MISMATCH; - w |= XDMA_CTRL_IE_MAGIC_STOPPED; - w |= XDMA_CTRL_IE_IDLE_STOPPED; - w |= XDMA_CTRL_IE_READ_ERROR; - w |= XDMA_CTRL_IE_DESC_ERROR; - - write_register(w, &engine->regs->interrupt_enable_mask, - (unsigned long)(&engine->regs->interrupt_enable_mask) - - (unsigned long)(&engine->regs)); - } - - /* write control register of SG DMA engine */ - w = (u32)XDMA_CTRL_RUN_STOP; - w |= (u32)XDMA_CTRL_IE_READ_ERROR; - w |= (u32)XDMA_CTRL_IE_DESC_ERROR; - w |= (u32)XDMA_CTRL_IE_DESC_ALIGN_MISMATCH; - w |= (u32)XDMA_CTRL_IE_MAGIC_STOPPED; - - if (poll_mode) { - w |= (u32)XDMA_CTRL_POLL_MODE_WB; - } else { - w |= (u32)XDMA_CTRL_IE_DESC_STOPPED; - w |= (u32)XDMA_CTRL_IE_DESC_COMPLETED; - - if ((engine->streaming && (engine->dir == DMA_FROM_DEVICE)) || - (engine->xdma_perf)) - w |= (u32)XDMA_CTRL_IE_IDLE_STOPPED; - - /* set non-incremental addressing mode */ - if (engine->non_incr_addr) - w |= (u32)XDMA_CTRL_NON_INCR_ADDR; - } - - dbg_tfr("iowrite32(0x%08x to 0x%p) (control)\n", w, - (void *)&engine->regs->control); - /* start the engine */ - write_register(w, &engine->regs->control, - (unsigned long)(&engine->regs->control) - - (unsigned long)(&engine->regs)); - - /* dummy read of status register to flush all previous writes */ - w = read_register(&engine->regs->status); - dbg_tfr("ioread32(0x%p) = 0x%08x (dummy read flushes writes).\n", - &engine->regs->status, w); -} - -/** - * engine_start() - start an idle engine with its first transfer on queue - * - * The engine will run and process all transfers that are queued using - * transfer_queue() and thus have their descriptor lists chained. - * - * During the run, new transfers will be processed if transfer_queue() has - * chained the descriptors before the hardware fetches the last descriptor. - * A transfer that was chained too late will invoke a new run of the engine - * initiated from the engine_service() routine. - * - * The engine must be idle and at least one transfer must be queued. - * This function does not take locks; the engine spinlock must already be - * taken. - * - */ -static struct xdma_transfer *engine_start(struct xdma_engine *engine) -{ - struct xdma_transfer *transfer; - u32 w; - int extra_adj = 0; - - /* engine must be idle */ - BUG_ON(engine->running); - /* engine transfer queue must not be empty */ - BUG_ON(list_empty(&engine->transfer_list)); - /* inspect first transfer queued on the engine */ - transfer = list_entry(engine->transfer_list.next, struct xdma_transfer, - entry); - BUG_ON(!transfer); - - /* engine is no longer shutdown */ - engine->shutdown = ENGINE_SHUTDOWN_NONE; - - dbg_tfr("engine_start(%s): transfer=0x%p.\n", engine->name, transfer); - - /* initialize number of descriptors of dequeued transfers */ - engine->desc_dequeued = 0; - - /* write lower 32-bit of bus address of transfer first descriptor */ - w = cpu_to_le32(PCI_DMA_L(transfer->desc_bus)); - dbg_tfr("iowrite32(0x%08x to 0x%p) (first_desc_lo)\n", w, - (void *)&engine->sgdma_regs->first_desc_lo); - write_register(w, &engine->sgdma_regs->first_desc_lo, - (unsigned long)(&engine->sgdma_regs->first_desc_lo) - - (unsigned long)(&engine->sgdma_regs)); - /* write upper 32-bit of bus address of transfer first descriptor */ - w = cpu_to_le32(PCI_DMA_H(transfer->desc_bus)); - dbg_tfr("iowrite32(0x%08x to 0x%p) (first_desc_hi)\n", w, - (void *)&engine->sgdma_regs->first_desc_hi); - write_register(w, &engine->sgdma_regs->first_desc_hi, - (unsigned long)(&engine->sgdma_regs->first_desc_hi) - - (unsigned long)(&engine->sgdma_regs)); - - if (transfer->desc_adjacent > 0) { - extra_adj = transfer->desc_adjacent - 1; - if (extra_adj > MAX_EXTRA_ADJ) - extra_adj = MAX_EXTRA_ADJ; - } - dbg_tfr("iowrite32(0x%08x to 0x%p) (first_desc_adjacent)\n", - extra_adj, (void *)&engine->sgdma_regs->first_desc_adjacent); - write_register(extra_adj, &engine->sgdma_regs->first_desc_adjacent, - (unsigned long)(&engine->sgdma_regs->first_desc_adjacent) - - (unsigned long)(&engine->sgdma_regs)); - - dbg_tfr("ioread32(0x%p) (dummy read flushes writes).\n", - &engine->regs->status); - mmiowb(); - - engine_start_mode_config(engine); - - engine_status_read(engine, 0, 0); - - dbg_tfr("%s engine 0x%p now running\n", engine->name, engine); - /* remember the engine is running */ - engine->running = 1; - return transfer; -} - -/** - * engine_service() - service an SG DMA engine - * - * must be called with engine->lock already acquired - * - * @engine pointer to struct xdma_engine - * - */ -static void engine_service_shutdown(struct xdma_engine *engine) -{ - /* if the engine stopped with RUN still asserted, de-assert RUN now */ - dbg_tfr("engine just went idle, resetting RUN_STOP.\n"); - xdma_engine_stop(engine); - engine->running = 0; - - /* awake task on engine's shutdown wait queue */ - wake_up_interruptible(&engine->shutdown_wq); -} - -struct xdma_transfer *engine_transfer_completion(struct xdma_engine *engine, - struct xdma_transfer *transfer) -{ - BUG_ON(!engine); - BUG_ON(!transfer); - - /* synchronous I/O? */ - /* awake task on transfer's wait queue */ - wake_up_interruptible(&transfer->wq); - - return transfer; -} - -struct xdma_transfer *engine_service_transfer_list(struct xdma_engine *engine, - struct xdma_transfer *transfer, u32 *pdesc_completed) -{ - BUG_ON(!engine); - BUG_ON(!pdesc_completed); - - if (!transfer) { - pr_info("%s xfer empty, pdesc completed %u.\n", - engine->name, *pdesc_completed); - return NULL; - } - - /* - * iterate over all the transfers completed by the engine, - * except for the last (i.e. use > instead of >=). - */ - while (transfer && (!transfer->cyclic) && - (*pdesc_completed > transfer->desc_num)) { - /* remove this transfer from pdesc_completed */ - *pdesc_completed -= transfer->desc_num; - dbg_tfr("%s engine completed non-cyclic xfer 0x%p (%d desc)\n", - engine->name, transfer, transfer->desc_num); - /* remove completed transfer from list */ - list_del(engine->transfer_list.next); - /* add to dequeued number of descriptors during this run */ - engine->desc_dequeued += transfer->desc_num; - /* mark transfer as succesfully completed */ - transfer->state = TRANSFER_STATE_COMPLETED; - - /* Complete transfer - sets transfer to NULL if an async - * transfer has completed */ - transfer = engine_transfer_completion(engine, transfer); - - /* if exists, get the next transfer on the list */ - if (!list_empty(&engine->transfer_list)) { - transfer = list_entry(engine->transfer_list.next, - struct xdma_transfer, entry); - dbg_tfr("Non-completed transfer %p\n", transfer); - } else { - /* no further transfers? */ - transfer = NULL; - } - } - - return transfer; -} - -static void engine_err_handle(struct xdma_engine *engine, - struct xdma_transfer *transfer, u32 desc_completed) -{ - u32 value; - - /* - * The BUSY bit is expected to be clear now but older HW has a race - * condition which could cause it to be still set. If it's set, re-read - * and check again. If it's still set, log the issue. - */ - if (engine->status & XDMA_STAT_BUSY) { - value = read_register(&engine->regs->status); - if ((value & XDMA_STAT_BUSY) && printk_ratelimit()) - pr_info("%s has errors but is still BUSY\n", - engine->name); - } - - if (printk_ratelimit()) { - pr_info("%s, s 0x%x, aborted xfer 0x%p, cmpl %d/%d\n", - engine->name, engine->status, transfer, desc_completed, - transfer->desc_num); - } - - /* mark transfer as failed */ - transfer->state = TRANSFER_STATE_FAILED; - xdma_engine_stop(engine); -} - -struct xdma_transfer *engine_service_final_transfer(struct xdma_engine *engine, - struct xdma_transfer *transfer, u32 *pdesc_completed) -{ - BUG_ON(!engine); - BUG_ON(!transfer); - BUG_ON(!pdesc_completed); - - /* inspect the current transfer */ - if (transfer) { - if (((engine->dir == DMA_FROM_DEVICE) && - (engine->status & XDMA_STAT_C2H_ERR_MASK)) || - ((engine->dir == DMA_TO_DEVICE) && - (engine->status & XDMA_STAT_H2C_ERR_MASK))) { - pr_info("engine %s, status error 0x%x.\n", - engine->name, engine->status); - engine_status_dump(engine); - engine_err_handle(engine, transfer, *pdesc_completed); - goto transfer_del; - } - - if (engine->status & XDMA_STAT_BUSY) - dbg_tfr("Engine %s is unexpectedly busy - ignoring\n", - engine->name); - - /* the engine stopped on current transfer? */ - if (*pdesc_completed < transfer->desc_num) { - transfer->state = TRANSFER_STATE_FAILED; - pr_info("%s, xfer 0x%p, stopped half-way, %d/%d.\n", - engine->name, transfer, *pdesc_completed, - transfer->desc_num); - } else { - dbg_tfr("engine %s completed transfer\n", engine->name); - dbg_tfr("Completed transfer ID = 0x%p\n", transfer); - dbg_tfr("*pdesc_completed=%d, transfer->desc_num=%d", - *pdesc_completed, transfer->desc_num); - - if (!transfer->cyclic) { - /* - * if the engine stopped on this transfer, - * it should be the last - */ - WARN_ON(*pdesc_completed > transfer->desc_num); - } - /* mark transfer as succesfully completed */ - transfer->state = TRANSFER_STATE_COMPLETED; - } - -transfer_del: - /* remove completed transfer from list */ - list_del(engine->transfer_list.next); - /* add to dequeued number of descriptors during this run */ - engine->desc_dequeued += transfer->desc_num; - - /* - * Complete transfer - sets transfer to NULL if an asynchronous - * transfer has completed - */ - transfer = engine_transfer_completion(engine, transfer); - } - - return transfer; -} - -static void engine_service_perf(struct xdma_engine *engine, u32 desc_completed) -{ - BUG_ON(!engine); - - /* performance measurement is running? */ - if (engine->xdma_perf) { - /* a descriptor was completed? */ - if (engine->status & XDMA_STAT_DESC_COMPLETED) { - engine->xdma_perf->iterations = desc_completed; - dbg_perf("transfer->xdma_perf->iterations=%d\n", - engine->xdma_perf->iterations); - } - - /* a descriptor stopped the engine? */ - if (engine->status & XDMA_STAT_DESC_STOPPED) { - engine->xdma_perf->stopped = 1; - /* - * wake any XDMA_PERF_IOCTL_STOP waiting for - * the performance run to finish - */ - wake_up_interruptible(&engine->xdma_perf_wq); - dbg_perf("transfer->xdma_perf stopped\n"); - } - } -} - -static void engine_transfer_dequeue(struct xdma_engine *engine) -{ - struct xdma_transfer *transfer; - - BUG_ON(!engine); - - /* pick first transfer on the queue (was submitted to the engine) */ - transfer = list_entry(engine->transfer_list.next, struct xdma_transfer, - entry); - BUG_ON(!transfer); - BUG_ON(transfer != &engine->cyclic_req->xfer); - dbg_tfr("%s engine completed cyclic transfer 0x%p (%d desc).\n", - engine->name, transfer, transfer->desc_num); - /* remove completed transfer from list */ - list_del(engine->transfer_list.next); -} - -static int engine_ring_process(struct xdma_engine *engine) -{ - struct xdma_result *result; - int start; - int eop_count = 0; - - BUG_ON(!engine); - result = engine->cyclic_result; - BUG_ON(!result); - - /* where we start receiving in the ring buffer */ - start = engine->rx_tail; - - /* iterate through all newly received RX result descriptors */ - dbg_tfr("%s, result %d, 0x%x, len 0x%x.\n", - engine->name, engine->rx_tail, result[engine->rx_tail].status, - result[engine->rx_tail].length); - while (result[engine->rx_tail].status && !engine->rx_overrun) { - /* EOP bit set in result? */ - if (result[engine->rx_tail].status & RX_STATUS_EOP){ - eop_count++; - } - - /* increment tail pointer */ - engine->rx_tail = (engine->rx_tail + 1) % CYCLIC_RX_PAGES_MAX; - - dbg_tfr("%s, head %d, tail %d, 0x%x, len 0x%x.\n", - engine->name, engine->rx_head, engine->rx_tail, - result[engine->rx_tail].status, - result[engine->rx_tail].length); - - /* overrun? */ - if (engine->rx_tail == engine->rx_head) { - dbg_tfr("%s: overrun\n", engine->name); - /* flag to user space that overrun has occurred */ - engine->rx_overrun = 1; - } - } - - return eop_count; -} - -static int engine_service_cyclic_polled(struct xdma_engine *engine) -{ - int eop_count = 0; - int rc = 0; - struct xdma_poll_wb *writeback_data; - u32 sched_limit = 0; - - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); - - writeback_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; - - while (eop_count == 0) { - if (sched_limit != 0) { - if ((sched_limit % NUM_POLLS_PER_SCHED) == 0) - schedule(); - } - sched_limit++; - - /* Monitor descriptor writeback address for errors */ - if ((writeback_data->completed_desc_count) & WB_ERR_MASK) { - rc = -1; - break; - } - - eop_count = engine_ring_process(engine); - } - - if (eop_count == 0) { - engine_status_read(engine, 1, 0); - if ((engine->running) && !(engine->status & XDMA_STAT_BUSY)) { - /* transfers on queue? */ - if (!list_empty(&engine->transfer_list)) - engine_transfer_dequeue(engine); - - engine_service_shutdown(engine); - } - } - - return rc; -} - -static int engine_service_cyclic_interrupt(struct xdma_engine *engine) -{ - int eop_count = 0; - struct xdma_transfer *xfer; - - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); - - engine_status_read(engine, 1, 0); - - eop_count = engine_ring_process(engine); - /* - * wake any reader on EOP, as one or more packets are now in - * the RX buffer - */ - xfer = &engine->cyclic_req->xfer; - if(enable_credit_mp){ - if (eop_count > 0) { - //engine->eop_found = 1; - } - wake_up_interruptible(&xfer->wq); - }else{ - if (eop_count > 0) { - /* awake task on transfer's wait queue */ - dbg_tfr("wake_up_interruptible() due to %d EOP's\n", eop_count); - engine->eop_found = 1; - wake_up_interruptible(&xfer->wq); - } - } - - /* engine was running but is no longer busy? */ - if ((engine->running) && !(engine->status & XDMA_STAT_BUSY)) { - /* transfers on queue? */ - if (!list_empty(&engine->transfer_list)) - engine_transfer_dequeue(engine); - - engine_service_shutdown(engine); - } - - return 0; -} - -/* must be called with engine->lock already acquired */ -static int engine_service_cyclic(struct xdma_engine *engine) -{ - int rc = 0; - - dbg_tfr("engine_service_cyclic()"); - - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); - - if (poll_mode) - rc = engine_service_cyclic_polled(engine); - else - rc = engine_service_cyclic_interrupt(engine); - - return rc; -} - - -static void engine_service_resume(struct xdma_engine *engine) -{ - struct xdma_transfer *transfer_started; - - BUG_ON(!engine); - - /* engine stopped? */ - if (!engine->running) { - /* in the case of shutdown, let it finish what's in the Q */ - if (!list_empty(&engine->transfer_list)) { - /* (re)start engine */ - transfer_started = engine_start(engine); - dbg_tfr("re-started %s engine with pending xfer 0x%p\n", - engine->name, transfer_started); - /* engine was requested to be shutdown? */ - } else if (engine->shutdown & ENGINE_SHUTDOWN_REQUEST) { - engine->shutdown |= ENGINE_SHUTDOWN_IDLE; - /* awake task on engine's shutdown wait queue */ - wake_up_interruptible(&engine->shutdown_wq); - } else { - dbg_tfr("no pending transfers, %s engine stays idle.\n", - engine->name); - } - } else { - /* engine is still running? */ - if (list_empty(&engine->transfer_list)) { - pr_warn("no queued transfers but %s engine running!\n", - engine->name); - WARN_ON(1); - } - } -} - -/** - * engine_service() - service an SG DMA engine - * - * must be called with engine->lock already acquired - * - * @engine pointer to struct xdma_engine - * - */ -static int engine_service(struct xdma_engine *engine, int desc_writeback) -{ - struct xdma_transfer *transfer = NULL; - u32 desc_count = desc_writeback & WB_COUNT_MASK; - u32 err_flag = desc_writeback & WB_ERR_MASK; - int rv = 0; - struct xdma_poll_wb *wb_data; - - BUG_ON(!engine); - - /* If polling detected an error, signal to the caller */ - if (err_flag) - rv = -1; - - /* Service the engine */ - if (!engine->running) { - dbg_tfr("Engine was not running!!! Clearing status\n"); - engine_status_read(engine, 1, 0); - return 0; - } - - /* - * If called by the ISR or polling detected an error, read and clear - * engine status. For polled mode descriptor completion, this read is - * unnecessary and is skipped to reduce latency - */ - if ((desc_count == 0) || (err_flag != 0)) - engine_status_read(engine, 1, 0); - - /* - * engine was running but is no longer busy, or writeback occurred, - * shut down - */ - if ((engine->running && !(engine->status & XDMA_STAT_BUSY)) || - (desc_count != 0)) - engine_service_shutdown(engine); - - /* - * If called from the ISR, or if an error occurred, the descriptor - * count will be zero. In this scenario, read the descriptor count - * from HW. In polled mode descriptor completion, this read is - * unnecessary and is skipped to reduce latency - */ - if (!desc_count) - desc_count = read_register(&engine->regs->completed_desc_count); - dbg_tfr("desc_count = %d\n", desc_count); - - /* transfers on queue? */ - if (!list_empty(&engine->transfer_list)) { - /* pick first transfer on queue (was submitted to the engine) */ - transfer = list_entry(engine->transfer_list.next, - struct xdma_transfer, entry); - - dbg_tfr("head of queue transfer 0x%p has %d descriptors\n", - transfer, (int)transfer->desc_num); - - dbg_tfr("Engine completed %d desc, %d not yet dequeued\n", - (int)desc_count, - (int)desc_count - engine->desc_dequeued); - - engine_service_perf(engine, desc_count); - } - - /* account for already dequeued transfers during this engine run */ - desc_count -= engine->desc_dequeued; - - /* Process all but the last transfer */ - transfer = engine_service_transfer_list(engine, transfer, &desc_count); - - /* - * Process final transfer - includes checks of number of descriptors to - * detect faulty completion - */ - transfer = engine_service_final_transfer(engine, transfer, &desc_count); - - /* Before starting engine again, clear the writeback data */ - if (poll_mode) { - wb_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; - wb_data->completed_desc_count = 0; - } - - /* Restart the engine following the servicing */ - engine_service_resume(engine); - - return 0; -} - -/* engine_service_work */ -static void engine_service_work(struct work_struct *work) -{ - struct xdma_engine *engine; - unsigned long flags; - - engine = container_of(work, struct xdma_engine, work); - BUG_ON(engine->magic != MAGIC_ENGINE); - - /* lock the engine */ - spin_lock_irqsave(&engine->lock, flags); - - dbg_tfr("engine_service() for %s engine %p\n", - engine->name, engine); - if (engine->cyclic_req) - engine_service_cyclic(engine); - else - engine_service(engine, 0); - - /* re-enable interrupts for this engine */ - if (engine->xdev->msix_enabled){ - write_register(engine->interrupt_enable_mask_value, - &engine->regs->interrupt_enable_mask_w1s, - (unsigned long)(&engine->regs->interrupt_enable_mask_w1s) - - (unsigned long)(&engine->regs)); - } else - channel_interrupts_enable(engine->xdev, engine->irq_bitmask); - - /* unlock the engine */ - spin_unlock_irqrestore(&engine->lock, flags); -} - -static u32 engine_service_wb_monitor(struct xdma_engine *engine, - u32 expected_wb) -{ - struct xdma_poll_wb *wb_data; - u32 desc_wb = 0; - u32 sched_limit = 0; - unsigned long timeout; - - BUG_ON(!engine); - wb_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; - - /* - * Poll the writeback location for the expected number of - * descriptors / error events This loop is skipped for cyclic mode, - * where the expected_desc_count passed in is zero, since it cannot be - * determined before the function is called - */ - - timeout = jiffies + (POLL_TIMEOUT_SECONDS * HZ); - while (expected_wb != 0) { - desc_wb = wb_data->completed_desc_count; - - if (desc_wb & WB_ERR_MASK) - break; - else if (desc_wb == expected_wb) - break; - - /* RTO - prevent system from hanging in polled mode */ - if (time_after(jiffies, timeout)) { - dbg_tfr("Polling timeout occurred"); - dbg_tfr("desc_wb = 0x%08x, expected 0x%08x\n", desc_wb, - expected_wb); - if ((desc_wb & WB_COUNT_MASK) > expected_wb) - desc_wb = expected_wb | WB_ERR_MASK; - - break; - } - - /* - * Define NUM_POLLS_PER_SCHED to limit how much time is spent - * in the scheduler - */ - - if (sched_limit != 0) { - if ((sched_limit % NUM_POLLS_PER_SCHED) == 0) - schedule(); - } - sched_limit++; - } - - return desc_wb; -} - -static int engine_service_poll(struct xdma_engine *engine, - u32 expected_desc_count) -{ - struct xdma_poll_wb *writeback_data; - u32 desc_wb = 0; - unsigned long flags; - int rv = 0; - - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); - - writeback_data = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; - - if ((expected_desc_count & WB_COUNT_MASK) != expected_desc_count) { - dbg_tfr("Queued descriptor count is larger than supported\n"); - return -1; - } - - /* - * Poll the writeback location for the expected number of - * descriptors / error events This loop is skipped for cyclic mode, - * where the expected_desc_count passed in is zero, since it cannot be - * determined before the function is called - */ - - desc_wb = engine_service_wb_monitor(engine, expected_desc_count); - - spin_lock_irqsave(&engine->lock, flags); - dbg_tfr("%s service.\n", engine->name); - if (engine->cyclic_req) { - rv = engine_service_cyclic(engine); - } else { - rv = engine_service(engine, desc_wb); - } - spin_unlock_irqrestore(&engine->lock, flags); - - return rv; -} - -static irqreturn_t user_irq_service(int irq, struct xdma_user_irq *user_irq) -{ - unsigned long flags; - - BUG_ON(!user_irq); - - if (user_irq->handler) - return user_irq->handler(user_irq->user_idx, user_irq->dev); - - spin_lock_irqsave(&(user_irq->events_lock), flags); - if (!user_irq->events_irq) { - user_irq->events_irq = 1; - wake_up_interruptible(&(user_irq->events_wq)); - } - spin_unlock_irqrestore(&(user_irq->events_lock), flags); - - return IRQ_HANDLED; -} - -/* - * xdma_isr() - Interrupt handler - * - * @dev_id pointer to xdma_dev - */ -static irqreturn_t xdma_isr(int irq, void *dev_id) -{ - u32 ch_irq; - u32 user_irq; - u32 mask; - struct xdma_dev *xdev; - struct interrupt_regs *irq_regs; - - dbg_irq("(irq=%d, dev 0x%p) <<<< ISR.\n", irq, dev_id); - BUG_ON(!dev_id); - xdev = (struct xdma_dev *)dev_id; - - if (!xdev) { - WARN_ON(!xdev); - dbg_irq("xdma_isr(irq=%d) xdev=%p ??\n", irq, xdev); - return IRQ_NONE; - } - - irq_regs = (struct interrupt_regs *)(xdev->bar[xdev->config_bar_idx] + - XDMA_OFS_INT_CTRL); - - /* read channel interrupt requests */ - ch_irq = read_register(&irq_regs->channel_int_request); - dbg_irq("ch_irq = 0x%08x\n", ch_irq); - - /* - * disable all interrupts that fired; these are re-enabled individually - * after the causing module has been fully serviced. - */ - if (ch_irq) - channel_interrupts_disable(xdev, ch_irq); - - /* read user interrupts - this read also flushes the above write */ - user_irq = read_register(&irq_regs->user_int_request); - dbg_irq("user_irq = 0x%08x\n", user_irq); - - if (user_irq) { - int user = 0; - u32 mask = 1; - int max = xdev->h2c_channel_max; - - for (; user < max && user_irq; user++, mask <<= 1) { - if (user_irq & mask) { - user_irq &= ~mask; - user_irq_service(irq, &xdev->user_irq[user]); - } - } - } - - mask = ch_irq & xdev->mask_irq_h2c; - if (mask) { - int channel = 0; - int max = xdev->h2c_channel_max; - - /* iterate over H2C (PCIe read) */ - for (channel = 0; channel < max && mask; channel++) { - struct xdma_engine *engine = &xdev->engine_h2c[channel]; - - /* engine present and its interrupt fired? */ - if((engine->irq_bitmask & mask) && - (engine->magic == MAGIC_ENGINE)) { - mask &= ~engine->irq_bitmask; - dbg_tfr("schedule_work, %s.\n", engine->name); - schedule_work(&engine->work); - } - } - } - - mask = ch_irq & xdev->mask_irq_c2h; - if (mask) { - int channel = 0; - int max = xdev->c2h_channel_max; - - /* iterate over C2H (PCIe write) */ - for (channel = 0; channel < max && mask; channel++) { - struct xdma_engine *engine = &xdev->engine_c2h[channel]; - - /* engine present and its interrupt fired? */ - if((engine->irq_bitmask & mask) && - (engine->magic == MAGIC_ENGINE)) { - mask &= ~engine->irq_bitmask; - dbg_tfr("schedule_work, %s.\n", engine->name); - schedule_work(&engine->work); - } - } - } - - xdev->irq_count++; - return IRQ_HANDLED; -} - -/* - * xdma_user_irq() - Interrupt handler for user interrupts in MSI-X mode - * - * @dev_id pointer to xdma_dev - */ -static irqreturn_t xdma_user_irq(int irq, void *dev_id) -{ - struct xdma_user_irq *user_irq; - - dbg_irq("(irq=%d) <<<< INTERRUPT SERVICE ROUTINE\n", irq); - - BUG_ON(!dev_id); - user_irq = (struct xdma_user_irq *)dev_id; - - return user_irq_service(irq, user_irq); -} - -/* - * xdma_channel_irq() - Interrupt handler for channel interrupts in MSI-X mode - * - * @dev_id pointer to xdma_dev - */ -static irqreturn_t xdma_channel_irq(int irq, void *dev_id) -{ - struct xdma_dev *xdev; - struct xdma_engine *engine; - struct interrupt_regs *irq_regs; - - dbg_irq("(irq=%d) <<<< INTERRUPT service ROUTINE\n", irq); - BUG_ON(!dev_id); - - engine = (struct xdma_engine *)dev_id; - xdev = engine->xdev; - - if (!xdev) { - WARN_ON(!xdev); - dbg_irq("xdma_channel_irq(irq=%d) xdev=%p ??\n", irq, xdev); - return IRQ_NONE; - } - - irq_regs = (struct interrupt_regs *)(xdev->bar[xdev->config_bar_idx] + - XDMA_OFS_INT_CTRL); - - /* Disable the interrupt for this engine */ - write_register(engine->interrupt_enable_mask_value, - &engine->regs->interrupt_enable_mask_w1c, - (unsigned long) - (&engine->regs->interrupt_enable_mask_w1c) - - (unsigned long)(&engine->regs)); - /* Dummy read to flush the above write */ - read_register(&irq_regs->channel_int_pending); - /* Schedule the bottom half */ - schedule_work(&engine->work); - - /* - * RTO - need to protect access here if multiple MSI-X are used for - * user interrupts - */ - xdev->irq_count++; - return IRQ_HANDLED; -} - -/* - * Unmap the BAR regions that had been mapped earlier using map_bars() - */ -static void unmap_bars(struct xdma_dev *xdev, struct pci_dev *dev) -{ - int i; - - for (i = 0; i < XDMA_BAR_NUM; i++) { - /* is this BAR mapped? */ - if (xdev->bar[i]) { - /* unmap BAR */ - pci_iounmap(dev, xdev->bar[i]); - /* mark as unmapped */ - xdev->bar[i] = NULL; - } - } -} - -static int map_single_bar(struct xdma_dev *xdev, struct pci_dev *dev, int idx) -{ - resource_size_t bar_start; - resource_size_t bar_len; - resource_size_t map_len; - - bar_start = pci_resource_start(dev, idx); - bar_len = pci_resource_len(dev, idx); - map_len = bar_len; - - xdev->bar[idx] = NULL; - - /* do not map BARs with length 0. Note that start MAY be 0! */ - if (!bar_len) { - //pr_info("BAR #%d is not present - skipping\n", idx); - return 0; - } - - /* BAR size exceeds maximum desired mapping? */ - if (bar_len > INT_MAX) { - pr_info("Limit BAR %d mapping from %llu to %d bytes\n", idx, - (u64)bar_len, INT_MAX); - map_len = (resource_size_t)INT_MAX; - } - /* - * map the full device memory or IO region into kernel virtual - * address space - */ - dbg_init("BAR%d: %llu bytes to be mapped.\n", idx, (u64)map_len); - xdev->bar[idx] = pci_iomap(dev, idx, map_len); - - if (!xdev->bar[idx]) { - pr_info("Could not map BAR %d.\n", idx); - return -1; - } - - pr_info("BAR%d at 0x%llx mapped at 0x%p, length=%llu(/%llu)\n", idx, - (u64)bar_start, xdev->bar[idx], (u64)map_len, (u64)bar_len); - - return (int)map_len; -} - -static int is_config_bar(struct xdma_dev *xdev, int idx) -{ - u32 irq_id = 0; - u32 cfg_id = 0; - int flag = 0; - u32 mask = 0xffff0000; /* Compare only XDMA ID's not Version number */ - struct interrupt_regs *irq_regs = - (struct interrupt_regs *) (xdev->bar[idx] + XDMA_OFS_INT_CTRL); - struct config_regs *cfg_regs = - (struct config_regs *)(xdev->bar[idx] + XDMA_OFS_CONFIG); - - irq_id = read_register(&irq_regs->identifier); - cfg_id = read_register(&cfg_regs->identifier); - - if (((irq_id & mask)== IRQ_BLOCK_ID) && - ((cfg_id & mask)== CONFIG_BLOCK_ID)) { - dbg_init("BAR %d is the XDMA config BAR\n", idx); - flag = 1; - } else { - dbg_init("BAR %d is NOT the XDMA config BAR: 0x%x, 0x%x.\n", - idx, irq_id, cfg_id); - flag = 0; - } - - return flag; -} - -static void identify_bars(struct xdma_dev *xdev, int *bar_id_list, int num_bars, - int config_bar_pos) -{ - /* - * The following logic identifies which BARs contain what functionality - * based on the position of the XDMA config BAR and the number of BARs - * detected. The rules are that the user logic and bypass logic BARs - * are optional. When both are present, the XDMA config BAR will be the - * 2nd BAR detected (config_bar_pos = 1), with the user logic being - * detected first and the bypass being detected last. When one is - * omitted, the type of BAR present can be identified by whether the - * XDMA config BAR is detected first or last. When both are omitted, - * only the XDMA config BAR is present. This somewhat convoluted - * approach is used instead of relying on BAR numbers in order to work - * correctly with both 32-bit and 64-bit BARs. - */ - - BUG_ON(!xdev); - BUG_ON(!bar_id_list); - - dbg_init("xdev 0x%p, bars %d, config at %d.\n", - xdev, num_bars, config_bar_pos); - - switch (num_bars) { - case 1: - /* Only one BAR present - no extra work necessary */ - break; - - case 2: - if (config_bar_pos == 0) { - xdev->bypass_bar_idx = bar_id_list[1]; - } else if (config_bar_pos == 1) { - xdev->user_bar_idx = bar_id_list[0]; - } else { - pr_info("2, XDMA config BAR unexpected %d.\n", - config_bar_pos); - } - break; - - case 3: - case 4: - if ((config_bar_pos == 1) || (config_bar_pos == 2)) { - /* user bar at bar #0 */ - xdev->user_bar_idx = bar_id_list[0]; - /* bypass bar at the last bar */ - xdev->bypass_bar_idx = bar_id_list[num_bars - 1]; - } else { - pr_info("3/4, XDMA config BAR unexpected %d.\n", - config_bar_pos); - } - break; - - default: - /* Should not occur - warn user but safe to continue */ - pr_info("Unexpected # BARs (%d), XDMA config BAR only.\n", - num_bars); - break; - - } - pr_info("%d BARs: config %d, user %d, bypass %d.\n", - num_bars, config_bar_pos, xdev->user_bar_idx, - xdev->bypass_bar_idx); -} - -/* map_bars() -- map device regions into kernel virtual address space - * - * Map the device memory regions into kernel virtual address space after - * verifying their sizes respect the minimum sizes needed - */ -static int map_bars(struct xdma_dev *xdev, struct pci_dev *dev) -{ - int rv; - int i; - int bar_id_list[XDMA_BAR_NUM]; - int bar_id_idx = 0; - int config_bar_pos = 0; - - /* iterate through all the BARs */ - for (i = 0; i < XDMA_BAR_NUM; i++) { - int bar_len; - - bar_len = map_single_bar(xdev, dev, i); - if (bar_len == 0) { - continue; - } else if (bar_len < 0) { - rv = -EINVAL; - goto fail; - } - - /* Try to identify BAR as XDMA control BAR */ - if ((bar_len >= XDMA_BAR_SIZE) && (xdev->config_bar_idx < 0)) { - - if (is_config_bar(xdev, i)) { - xdev->config_bar_idx = i; - config_bar_pos = bar_id_idx; - pr_info("config bar %d, pos %d.\n", - xdev->config_bar_idx, config_bar_pos); - } - } - - bar_id_list[bar_id_idx] = i; - bar_id_idx++; - } - - /* The XDMA config BAR must always be present */ - if (xdev->config_bar_idx < 0) { - pr_info("Failed to detect XDMA config BAR\n"); - rv = -EINVAL; - goto fail; - } - - identify_bars(xdev, bar_id_list, bar_id_idx, config_bar_pos); - - /* successfully mapped all required BAR regions */ - return 0; - -fail: - /* unwind; unmap any BARs that we did map */ - unmap_bars(xdev, dev); - return rv; -} - -/* - * MSI-X interrupt: - * vectors, followed by vectors - */ - -/* - * RTO - code to detect if MSI/MSI-X capability exists is derived - * from linux/pci/msi.c - pci_msi_check_device - */ - -#ifndef arch_msi_check_device -int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) -{ - return 0; -} -#endif - -/* type = PCI_CAP_ID_MSI or PCI_CAP_ID_MSIX */ -static int msi_msix_capable(struct pci_dev *dev, int type) -{ - struct pci_bus *bus; - int ret; - - if (!dev || dev->no_msi) - return 0; - - for (bus = dev->bus; bus; bus = bus->parent) - if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) - return 0; - - ret = arch_msi_check_device(dev, 1, type); - if (ret) - return 0; - - if (!pci_find_capability(dev, type)) - return 0; - - return 1; -} - -static void disable_msi_msix(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - if (xdev->msix_enabled) { - pci_disable_msix(pdev); - xdev->msix_enabled = 0; - } else if (xdev->msi_enabled) { - pci_disable_msi(pdev); - xdev->msi_enabled = 0; - } -} - -static int enable_msi_msix(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - int rv = 0; - - BUG_ON(!xdev); - BUG_ON(!pdev); - - if (!interrupt_mode && msi_msix_capable(pdev, PCI_CAP_ID_MSIX)) { - int req_nvec = xdev->c2h_channel_max + xdev->h2c_channel_max + - xdev->user_max; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) - dbg_init("Enabling MSI-X\n"); - rv = pci_alloc_irq_vectors(pdev, req_nvec, req_nvec, - PCI_IRQ_MSIX); -#else - int i; - - dbg_init("Enabling MSI-X\n"); - for (i = 0; i < req_nvec; i++) - xdev->entry[i].entry = i; - - rv = pci_enable_msix(pdev, xdev->entry, req_nvec); -#endif - if (rv < 0) - dbg_init("Couldn't enable MSI-X mode: %d\n", rv); - - xdev->msix_enabled = 1; - - } else if (interrupt_mode == 1 && - msi_msix_capable(pdev, PCI_CAP_ID_MSI)) { - /* enable message signalled interrupts */ - dbg_init("pci_enable_msi()\n"); - rv = pci_enable_msi(pdev); - if (rv < 0) - dbg_init("Couldn't enable MSI mode: %d\n", rv); - xdev->msi_enabled = 1; - - } else { - dbg_init("MSI/MSI-X not detected - using legacy interrupts\n"); - } - - return rv; -} - -static void pci_check_intr_pend(struct pci_dev *pdev) -{ - u16 v; - - pci_read_config_word(pdev, PCI_STATUS, &v); - if (v & PCI_STATUS_INTERRUPT) { - pr_info("%s PCI STATUS Interrupt pending 0x%x.\n", - dev_name(&pdev->dev), v); - pci_write_config_word(pdev, PCI_STATUS, PCI_STATUS_INTERRUPT); - } -} - -static void pci_keep_intx_enabled(struct pci_dev *pdev) -{ - /* workaround to a h/w bug: - * when msix/msi become unavaile, default to legacy. - * However the legacy enable was not checked. - * If the legacy was disabled, no ack then everything stuck - */ - u16 pcmd, pcmd_new; - - pci_read_config_word(pdev, PCI_COMMAND, &pcmd); - pcmd_new = pcmd & ~PCI_COMMAND_INTX_DISABLE; - if (pcmd_new != pcmd) { - pr_info("%s: clear INTX_DISABLE, 0x%x -> 0x%x.\n", - dev_name(&pdev->dev), pcmd, pcmd_new); - pci_write_config_word(pdev, PCI_COMMAND, pcmd_new); - } -} - -static void prog_irq_msix_user(struct xdma_dev *xdev, bool clear) -{ - /* user */ - struct interrupt_regs *int_regs = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + - XDMA_OFS_INT_CTRL); - u32 i = xdev->c2h_channel_max + xdev->h2c_channel_max; - u32 max = i + xdev->user_max; - int j; - - for (j = 0; i < max; j++) { - u32 val = 0; - int k; - int shift = 0; - - if (clear) - i += 4; - else - for (k = 0; k < 4 && i < max; i++, k++, shift += 8) - val |= (i & 0x1f) << shift; - - write_register(val, &int_regs->user_msi_vector[j], - XDMA_OFS_INT_CTRL + - ((unsigned long)&int_regs->user_msi_vector[j] - - (unsigned long)int_regs)); - - dbg_init("vector %d, 0x%x.\n", j, val); - } -} - -static void prog_irq_msix_channel(struct xdma_dev *xdev, bool clear) -{ - struct interrupt_regs *int_regs = (struct interrupt_regs *) - (xdev->bar[xdev->config_bar_idx] + - XDMA_OFS_INT_CTRL); - u32 max = xdev->c2h_channel_max + xdev->h2c_channel_max; - u32 i; - int j; - - /* engine */ - for (i = 0, j = 0; i < max; j++) { - u32 val = 0; - int k; - int shift = 0; - - if (clear) - i += 4; - else - for (k = 0; k < 4 && i < max; i++, k++, shift += 8) - val |= (i & 0x1f) << shift; - - write_register(val, &int_regs->channel_msi_vector[j], - XDMA_OFS_INT_CTRL + - ((unsigned long)&int_regs->channel_msi_vector[j] - - (unsigned long)int_regs)); - dbg_init("vector %d, 0x%x.\n", j, val); - } -} - -static void irq_msix_channel_teardown(struct xdma_dev *xdev) -{ - struct xdma_engine *engine; - int j = 0; - int i = 0; - - if (!xdev->msix_enabled) - return; - - prog_irq_msix_channel(xdev, 1); - - engine = xdev->engine_h2c; - for (i = 0; i < xdev->h2c_channel_max; i++, j++, engine++) { - if (!engine->msix_irq_line) - break; - dbg_sg("Release IRQ#%d for engine %p\n", engine->msix_irq_line, - engine); - free_irq(engine->msix_irq_line, engine); - } - - engine = xdev->engine_c2h; - for (i = 0; i < xdev->c2h_channel_max; i++, j++, engine++) { - if (!engine->msix_irq_line) - break; - dbg_sg("Release IRQ#%d for engine %p\n", engine->msix_irq_line, - engine); - free_irq(engine->msix_irq_line, engine); - } -} - -static int irq_msix_channel_setup(struct xdma_dev *xdev) -{ - int i; - int j = xdev->h2c_channel_max; - int rv = 0; - u32 vector; - struct xdma_engine *engine; - - BUG_ON(!xdev); - if (!xdev->msix_enabled) - return 0; - - engine = xdev->engine_h2c; - for (i = 0; i < xdev->h2c_channel_max; i++, engine++) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) - vector = pci_irq_vector(xdev->pdev, i); -#else - vector = xdev->entry[i].vector; -#endif - rv = request_irq(vector, xdma_channel_irq, 0, xdev->mod_name, - engine); - if (rv) { - pr_info("requesti irq#%d failed %d, engine %s.\n", - vector, rv, engine->name); - return rv; - } - pr_info("engine %s, irq#%d.\n", engine->name, vector); - engine->msix_irq_line = vector; - } - - engine = xdev->engine_c2h; - for (i = 0; i < xdev->c2h_channel_max; i++, j++, engine++) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) - vector = pci_irq_vector(xdev->pdev, j); -#else - vector = xdev->entry[j].vector; -#endif - rv = request_irq(vector, xdma_channel_irq, 0, xdev->mod_name, - engine); - if (rv) { - pr_info("requesti irq#%d failed %d, engine %s.\n", - vector, rv, engine->name); - return rv; - } - pr_info("engine %s, irq#%d.\n", engine->name, vector); - engine->msix_irq_line = vector; - } - - return 0; -} - -static void irq_msix_user_teardown(struct xdma_dev *xdev) -{ - int i; - int j = xdev->h2c_channel_max + xdev->c2h_channel_max; - - BUG_ON(!xdev); - - if (!xdev->msix_enabled) - return; - - prog_irq_msix_user(xdev, 1); - - for (i = 0; i < xdev->user_max; i++, j++) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) - u32 vector = pci_irq_vector(xdev->pdev, j); -#else - u32 vector = xdev->entry[j].vector; -#endif - dbg_init("user %d, releasing IRQ#%d\n", i, vector); - free_irq(vector, &xdev->user_irq[i]); - } -} - -static int irq_msix_user_setup(struct xdma_dev *xdev) -{ - int i; - int j = xdev->h2c_channel_max + xdev->c2h_channel_max; - int rv = 0; - - /* vectors set in probe_scan_for_msi() */ - for (i = 0; i < xdev->user_max; i++, j++) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) - u32 vector = pci_irq_vector(xdev->pdev, j); -#else - u32 vector = xdev->entry[j].vector; -#endif - rv = request_irq(vector, xdma_user_irq, 0, xdev->mod_name, - &xdev->user_irq[i]); - if (rv) { - pr_info("user %d couldn't use IRQ#%d, %d\n", - i, vector, rv); - break; - } - pr_info("%d-USR-%d, IRQ#%d with 0x%p\n", xdev->idx, i, vector, - &xdev->user_irq[i]); - } - - /* If any errors occur, free IRQs that were successfully requested */ - if (rv) { - for (i--, j--; i >= 0; i--, j--) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) - u32 vector = pci_irq_vector(xdev->pdev, j); -#else - u32 vector = xdev->entry[j].vector; -#endif - free_irq(vector, &xdev->user_irq[i]); - } - } - - return rv; -} - -static int irq_msi_setup(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - int rv; - - xdev->irq_line = (int)pdev->irq; - rv = request_irq(pdev->irq, xdma_isr, 0, xdev->mod_name, xdev); - if (rv) - dbg_init("Couldn't use IRQ#%d, %d\n", pdev->irq, rv); - else - dbg_init("Using IRQ#%d with 0x%p\n", pdev->irq, xdev); - - return rv; -} - -static int irq_legacy_setup(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - u32 w; - u8 val; - void *reg; - int rv; - - pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &val); - dbg_init("Legacy Interrupt register value = %d\n", val); - if (val > 1) { - val--; - w = (val<<24) | (val<<16) | (val<<8)| val; - /* Program IRQ Block Channel vactor and IRQ Block User vector - * with Legacy interrupt value */ - reg = xdev->bar[xdev->config_bar_idx] + 0x2080; // IRQ user - write_register(w, reg, 0x2080); - write_register(w, reg+0x4, 0x2084); - write_register(w, reg+0x8, 0x2088); - write_register(w, reg+0xC, 0x208C); - reg = xdev->bar[xdev->config_bar_idx] + 0x20A0; // IRQ Block - write_register(w, reg, 0x20A0); - write_register(w, reg+0x4, 0x20A4); - } - - xdev->irq_line = (int)pdev->irq; - rv = request_irq(pdev->irq, xdma_isr, IRQF_SHARED, xdev->mod_name, - xdev); - if (rv) - dbg_init("Couldn't use IRQ#%d, %d\n", pdev->irq, rv); - else - dbg_init("Using IRQ#%d with 0x%p\n", pdev->irq, xdev); - - return rv; -} - -static void irq_teardown(struct xdma_dev *xdev) -{ - if (xdev->msix_enabled) { - irq_msix_channel_teardown(xdev); - irq_msix_user_teardown(xdev); - } else if (xdev->irq_line != -1) { - dbg_init("Releasing IRQ#%d\n", xdev->irq_line); - free_irq(xdev->irq_line, xdev); - } -} - -static int irq_setup(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - pci_keep_intx_enabled(pdev); - - if (xdev->msix_enabled) { - int rv = irq_msix_channel_setup(xdev); - if (rv) - return rv; - rv = irq_msix_user_setup(xdev); - if (rv) - return rv; - prog_irq_msix_channel(xdev, 0); - prog_irq_msix_user(xdev, 0); - - return 0; - } else if (xdev->msi_enabled) - return irq_msi_setup(xdev, pdev); - - return irq_legacy_setup(xdev, pdev); -} - -#ifdef __LIBXDMA_DEBUG__ -static void dump_desc(struct xdma_desc *desc_virt) -{ - int j; - u32 *p = (u32 *)desc_virt; - static char * const field_name[] = { - "magic|extra_adjacent|control", "bytes", "src_addr_lo", - "src_addr_hi", "dst_addr_lo", "dst_addr_hi", "next_addr", - "next_addr_pad"}; - char *dummy; - - /* remove warning about unused variable when debug printing is off */ - dummy = field_name[0]; - - for (j = 0; j < 8; j += 1) { - pr_info("0x%08lx/0x%02lx: 0x%08x 0x%08x %s\n", - (uintptr_t)p, (uintptr_t)p & 15, (int)*p, - le32_to_cpu(*p), field_name[j]); - p++; - } - pr_info("\n"); -} - -static void transfer_dump(struct xdma_transfer *transfer) -{ - int i; - struct xdma_desc *desc_virt = transfer->desc_virt; - - pr_info("xfer 0x%p, state 0x%x, f 0x%x, dir %d, len %u, last %d.\n", - transfer, transfer->state, transfer->flags, transfer->dir, - transfer->len, transfer->last_in_request); - - pr_info("transfer 0x%p, desc %d, bus 0x%llx, adj %d.\n", - transfer, transfer->desc_num, (u64)transfer->desc_bus, - transfer->desc_adjacent); - for (i = 0; i < transfer->desc_num; i += 1) - dump_desc(desc_virt + i); -} -#endif /* __LIBXDMA_DEBUG__ */ - -/* xdma_desc_alloc() - Allocate cache-coherent array of N descriptors. - * - * Allocates an array of 'number' descriptors in contiguous PCI bus addressable - * memory. Chains the descriptors as a singly-linked list; the descriptor's - * next * pointer specifies the bus address of the next descriptor. - * - * - * @dev Pointer to pci_dev - * @number Number of descriptors to be allocated - * @desc_bus_p Pointer where to store the first descriptor bus address - * - * @return Virtual address of the first descriptor - * - */ -static void transfer_desc_init(struct xdma_transfer *transfer, int count) -{ - struct xdma_desc *desc_virt = transfer->desc_virt; - dma_addr_t desc_bus = transfer->desc_bus; - int i; - int adj = count - 1; - int extra_adj; - u32 temp_control; - - BUG_ON(count > XDMA_TRANSFER_MAX_DESC); - - /* create singly-linked list for SG DMA controller */ - for (i = 0; i < count - 1; i++) { - /* increment bus address to next in array */ - desc_bus += sizeof(struct xdma_desc); - - /* singly-linked list uses bus addresses */ - desc_virt[i].next_lo = cpu_to_le32(PCI_DMA_L(desc_bus)); - desc_virt[i].next_hi = cpu_to_le32(PCI_DMA_H(desc_bus)); - desc_virt[i].bytes = cpu_to_le32(0); - - /* any adjacent descriptors? */ - if (adj > 0) { - extra_adj = adj - 1; - if (extra_adj > MAX_EXTRA_ADJ) - extra_adj = MAX_EXTRA_ADJ; - - adj--; - } else { - extra_adj = 0; - } - - temp_control = DESC_MAGIC | (extra_adj << 8); - - desc_virt[i].control = cpu_to_le32(temp_control); - } - /* { i = number - 1 } */ - /* zero the last descriptor next pointer */ - desc_virt[i].next_lo = cpu_to_le32(0); - desc_virt[i].next_hi = cpu_to_le32(0); - desc_virt[i].bytes = cpu_to_le32(0); - - temp_control = DESC_MAGIC; - - desc_virt[i].control = cpu_to_le32(temp_control); -} - -/* xdma_desc_link() - Link two descriptors - * - * Link the first descriptor to a second descriptor, or terminate the first. - * - * @first first descriptor - * @second second descriptor, or NULL if first descriptor must be set as last. - * @second_bus bus address of second descriptor - */ -static void xdma_desc_link(struct xdma_desc *first, struct xdma_desc *second, - dma_addr_t second_bus) -{ - /* - * remember reserved control in first descriptor, but zero - * extra_adjacent! - */ - /* RTO - what's this about? Shouldn't it be 0x0000c0ffUL? */ - u32 control = le32_to_cpu(first->control) & 0x0000f0ffUL; - /* second descriptor given? */ - if (second) { - /* - * link last descriptor of 1st array to first descriptor of - * 2nd array - */ - first->next_lo = cpu_to_le32(PCI_DMA_L(second_bus)); - first->next_hi = cpu_to_le32(PCI_DMA_H(second_bus)); - WARN_ON(first->next_hi); - /* no second descriptor given */ - } else { - /* first descriptor is the last */ - first->next_lo = 0; - first->next_hi = 0; - } - /* merge magic, extra_adjacent and control field */ - control |= DESC_MAGIC; - - /* write bytes and next_num */ - first->control = cpu_to_le32(control); -} - -/* xdma_desc_adjacent -- Set how many descriptors are adjacent to this one */ -static void xdma_desc_adjacent(struct xdma_desc *desc, int next_adjacent) -{ - int extra_adj = 0; - /* remember reserved and control bits */ - u32 control = le32_to_cpu(desc->control) & 0x0000f0ffUL; - u32 max_adj_4k = 0; - - if (next_adjacent > 0) { - extra_adj = next_adjacent - 1; - if (extra_adj > MAX_EXTRA_ADJ){ - extra_adj = MAX_EXTRA_ADJ; - } - max_adj_4k = (0x1000 - ((le32_to_cpu(desc->next_lo))&0xFFF))/32 - 1; - if (extra_adj>max_adj_4k) { - extra_adj = max_adj_4k; - } - if(extra_adj<0){ - printk("Warning: extra_adj<0, converting it to 0\n"); - extra_adj = 0; - } - } - /* merge adjacent and control field */ - control |= 0xAD4B0000UL | (extra_adj << 8); - /* write control and next_adjacent */ - desc->control = cpu_to_le32(control); -} - -/* xdma_desc_control -- Set complete control field of a descriptor. */ -static void xdma_desc_control_set(struct xdma_desc *first, u32 control_field) -{ - /* remember magic and adjacent number */ - u32 control = le32_to_cpu(first->control) & ~(LS_BYTE_MASK); - - BUG_ON(control_field & ~(LS_BYTE_MASK)); - /* merge adjacent and control field */ - control |= control_field; - /* write control and next_adjacent */ - first->control = cpu_to_le32(control); -} - -/* xdma_desc_clear -- Clear bits in control field of a descriptor. */ -static void xdma_desc_control_clear(struct xdma_desc *first, u32 clear_mask) -{ - /* remember magic and adjacent number */ - u32 control = le32_to_cpu(first->control); - - BUG_ON(clear_mask & ~(LS_BYTE_MASK)); - - /* merge adjacent and control field */ - control &= (~clear_mask); - /* write control and next_adjacent */ - first->control = cpu_to_le32(control); -} - -/* xdma_desc_done - recycle cache-coherent linked list of descriptors. - * - * @dev Pointer to pci_dev - * @number Number of descriptors to be allocated - * @desc_virt Pointer to (i.e. virtual address of) first descriptor in list - * @desc_bus Bus address of first descriptor in list - */ -static inline void xdma_desc_done(struct xdma_desc *desc_virt) -{ - memset(desc_virt, 0, XDMA_TRANSFER_MAX_DESC * sizeof(struct xdma_desc)); -} - -/* xdma_desc() - Fill a descriptor with the transfer details - * - * @desc pointer to descriptor to be filled - * @addr root complex address - * @ep_addr end point address - * @len number of bytes, must be a (non-negative) multiple of 4. - * @dir, dma direction - * is the end point address. If zero, vice versa. - * - * Does not modify the next pointer - */ -static void xdma_desc_set(struct xdma_desc *desc, dma_addr_t rc_bus_addr, - u64 ep_addr, int len, int dir) -{ - /* transfer length */ - desc->bytes = cpu_to_le32(len); - if (dir == DMA_TO_DEVICE) { - /* read from root complex memory (source address) */ - desc->src_addr_lo = cpu_to_le32(PCI_DMA_L(rc_bus_addr)); - desc->src_addr_hi = cpu_to_le32(PCI_DMA_H(rc_bus_addr)); - /* write to end point address (destination address) */ - desc->dst_addr_lo = cpu_to_le32(PCI_DMA_L(ep_addr)); - desc->dst_addr_hi = cpu_to_le32(PCI_DMA_H(ep_addr)); - } else { - /* read from end point address (source address) */ - desc->src_addr_lo = cpu_to_le32(PCI_DMA_L(ep_addr)); - desc->src_addr_hi = cpu_to_le32(PCI_DMA_H(ep_addr)); - /* write to root complex memory (destination address) */ - desc->dst_addr_lo = cpu_to_le32(PCI_DMA_L(rc_bus_addr)); - desc->dst_addr_hi = cpu_to_le32(PCI_DMA_H(rc_bus_addr)); - } -} - -/* - * should hold the engine->lock; - */ -static void transfer_abort(struct xdma_engine *engine, - struct xdma_transfer *transfer) -{ - struct xdma_transfer *head; - - BUG_ON(!engine); - BUG_ON(!transfer); - BUG_ON(transfer->desc_num == 0); - - pr_info("abort transfer 0x%p, desc %d, engine desc queued %d.\n", - transfer, transfer->desc_num, engine->desc_dequeued); - - head = list_entry(engine->transfer_list.next, struct xdma_transfer, - entry); - if (head == transfer) - list_del(engine->transfer_list.next); - else - pr_info("engine %s, transfer 0x%p NOT found, 0x%p.\n", - engine->name, transfer, head); - - if (transfer->state == TRANSFER_STATE_SUBMITTED) - transfer->state = TRANSFER_STATE_ABORTED; -} - -/* transfer_queue() - Queue a DMA transfer on the engine - * - * @engine DMA engine doing the transfer - * @transfer DMA transfer submitted to the engine - * - * Takes and releases the engine spinlock - */ -static int transfer_queue(struct xdma_engine *engine, - struct xdma_transfer *transfer) -{ - int rv = 0; - struct xdma_transfer *transfer_started; - struct xdma_dev *xdev; - unsigned long flags; - - BUG_ON(!engine); - BUG_ON(!engine->xdev); - BUG_ON(!transfer); - BUG_ON(transfer->desc_num == 0); - dbg_tfr("transfer_queue(transfer=0x%p).\n", transfer); - - xdev = engine->xdev; - if (xdma_device_flag_check(xdev, XDEV_FLAG_OFFLINE)) { - pr_info("dev 0x%p offline, transfer 0x%p not queued.\n", - xdev, transfer); - return -EBUSY; - } - - /* lock the engine state */ - spin_lock_irqsave(&engine->lock, flags); - - engine->prev_cpu = get_cpu(); - put_cpu(); - - /* engine is being shutdown; do not accept new transfers */ - if (engine->shutdown & ENGINE_SHUTDOWN_REQUEST) { - pr_info("engine %s offline, transfer 0x%p not queued.\n", - engine->name, transfer); - rv = -EBUSY; - goto shutdown; - } - - /* mark the transfer as submitted */ - transfer->state = TRANSFER_STATE_SUBMITTED; - /* add transfer to the tail of the engine transfer queue */ - list_add_tail(&transfer->entry, &engine->transfer_list); - - /* engine is idle? */ - if (!engine->running) { - /* start engine */ - dbg_tfr("transfer_queue(): starting %s engine.\n", - engine->name); - transfer_started = engine_start(engine); - dbg_tfr("transfer=0x%p started %s engine with transfer 0x%p.\n", - transfer, engine->name, transfer_started); - } else { - dbg_tfr("transfer=0x%p queued, with %s engine running.\n", - transfer, engine->name); - } - -shutdown: - /* unlock the engine state */ - dbg_tfr("engine->running = %d\n", engine->running); - spin_unlock_irqrestore(&engine->lock, flags); - return rv; -} - -static void engine_alignments(struct xdma_engine *engine) -{ - u32 w; - u32 align_bytes; - u32 granularity_bytes; - u32 address_bits; - - w = read_register(&engine->regs->alignments); - dbg_init("engine %p name %s alignments=0x%08x\n", engine, - engine->name, (int)w); - - /* RTO - add some macros to extract these fields */ - align_bytes = (w & 0x00ff0000U) >> 16; - granularity_bytes = (w & 0x0000ff00U) >> 8; - address_bits = (w & 0x000000ffU); - - dbg_init("align_bytes = %d\n", align_bytes); - dbg_init("granularity_bytes = %d\n", granularity_bytes); - dbg_init("address_bits = %d\n", address_bits); - - if (w) { - engine->addr_align = align_bytes; - engine->len_granularity = granularity_bytes; - engine->addr_bits = address_bits; - } else { - /* Some default values if alignments are unspecified */ - engine->addr_align = 1; - engine->len_granularity = 1; - engine->addr_bits = 64; - } -} - -static void engine_free_resource(struct xdma_engine *engine) -{ - struct xdma_dev *xdev = engine->xdev; - - /* Release memory use for descriptor writebacks */ - if (engine->poll_mode_addr_virt) { - dbg_sg("Releasing memory for descriptor writeback\n"); - dma_free_coherent(&xdev->pdev->dev, - sizeof(struct xdma_poll_wb), - engine->poll_mode_addr_virt, - engine->poll_mode_bus); - dbg_sg("Released memory for descriptor writeback\n"); - engine->poll_mode_addr_virt = NULL; - } - - if (engine->desc) { - dbg_init("device %s, engine %s pre-alloc desc 0x%p,0x%llx.\n", - dev_name(&xdev->pdev->dev), engine->name, - engine->desc, engine->desc_bus); - dma_free_coherent(&xdev->pdev->dev, - XDMA_TRANSFER_MAX_DESC * sizeof(struct xdma_desc), - engine->desc, engine->desc_bus); - engine->desc = NULL; - } - - if (engine->cyclic_result) { - dma_free_coherent(&xdev->pdev->dev, - CYCLIC_RX_PAGES_MAX * sizeof(struct xdma_result), - engine->cyclic_result, engine->cyclic_result_bus); - engine->cyclic_result = NULL; - } -} - -static void engine_destroy(struct xdma_dev *xdev, struct xdma_engine *engine) -{ - BUG_ON(!xdev); - BUG_ON(!engine); - - dbg_sg("Shutting down engine %s%d", engine->name, engine->channel); - - /* Disable interrupts to stop processing new events during shutdown */ - write_register(0x0, &engine->regs->interrupt_enable_mask, - (unsigned long)(&engine->regs->interrupt_enable_mask) - - (unsigned long)(&engine->regs)); - - if (enable_credit_mp && engine->streaming && - engine->dir == DMA_FROM_DEVICE) { - u32 reg_value = (0x1 << engine->channel) << 16; - struct sgdma_common_regs *reg = (struct sgdma_common_regs *) - (xdev->bar[xdev->config_bar_idx] + - (0x6*TARGET_SPACING)); - write_register(reg_value, ®->credit_mode_enable_w1c, 0); - } - - /* Release memory use for descriptor writebacks */ - engine_free_resource(engine); - - memset(engine, 0, sizeof(struct xdma_engine)); - /* Decrement the number of engines available */ - xdev->engines_num--; -} - -/** - *engine_cyclic_stop() - stop a cyclic transfer running on an SG DMA engine - * - *engine->lock must be taken - */ -struct xdma_transfer *engine_cyclic_stop(struct xdma_engine *engine) -{ - struct xdma_transfer *transfer = 0; - - /* transfers on queue? */ - if (!list_empty(&engine->transfer_list)) { - /* pick first transfer on the queue (was submitted to engine) */ - transfer = list_entry(engine->transfer_list.next, - struct xdma_transfer, entry); - BUG_ON(!transfer); - - xdma_engine_stop(engine); - - if (transfer->cyclic) { - if (engine->xdma_perf) - dbg_perf("Stopping perf transfer on %s\n", - engine->name); - else - dbg_perf("Stopping cyclic transfer on %s\n", - engine->name); - /* make sure the handler sees correct transfer state */ - transfer->cyclic = 1; - /* - * set STOP flag and interrupt on completion, on the - * last descriptor - */ - xdma_desc_control_set( - transfer->desc_virt + transfer->desc_num - 1, - XDMA_DESC_COMPLETED | XDMA_DESC_STOPPED); - } else { - dbg_sg("(engine=%p) running transfer is not cyclic\n", - engine); - } - } else { - dbg_sg("(engine=%p) found not running transfer.\n", engine); - } - return transfer; -} -EXPORT_SYMBOL_GPL(engine_cyclic_stop); - -static int engine_writeback_setup(struct xdma_engine *engine) -{ - u32 w; - struct xdma_dev *xdev; - struct xdma_poll_wb *writeback; - - BUG_ON(!engine); - xdev = engine->xdev; - BUG_ON(!xdev); - - /* - * RTO - doing the allocation per engine is wasteful since a full page - * is allocated each time - better to allocate one page for the whole - * device during probe() and set per-engine offsets here - */ - writeback = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; - writeback->completed_desc_count = 0; - - dbg_init("Setting writeback location to 0x%llx for engine %p", - engine->poll_mode_bus, engine); - w = cpu_to_le32(PCI_DMA_L(engine->poll_mode_bus)); - write_register(w, &engine->regs->poll_mode_wb_lo, - (unsigned long)(&engine->regs->poll_mode_wb_lo) - - (unsigned long)(&engine->regs)); - w = cpu_to_le32(PCI_DMA_H(engine->poll_mode_bus)); - write_register(w, &engine->regs->poll_mode_wb_hi, - (unsigned long)(&engine->regs->poll_mode_wb_hi) - - (unsigned long)(&engine->regs)); - - return 0; -} - - -/* engine_create() - Create an SG DMA engine bookkeeping data structure - * - * An SG DMA engine consists of the resources for a single-direction transfer - * queue; the SG DMA hardware, the software queue and interrupt handling. - * - * @dev Pointer to pci_dev - * @offset byte address offset in BAR[xdev->config_bar_idx] resource for the - * SG DMA * controller registers. - * @dir: DMA_TO/FROM_DEVICE - * @streaming Whether the engine is attached to AXI ST (rather than MM) - */ -static int engine_init_regs(struct xdma_engine *engine) -{ - u32 reg_value; - int rv = 0; - - write_register(XDMA_CTRL_NON_INCR_ADDR, &engine->regs->control_w1c, - (unsigned long)(&engine->regs->control_w1c) - - (unsigned long)(&engine->regs)); - - engine_alignments(engine); - - /* Configure error interrupts by default */ - reg_value = XDMA_CTRL_IE_DESC_ALIGN_MISMATCH; - reg_value |= XDMA_CTRL_IE_MAGIC_STOPPED; - reg_value |= XDMA_CTRL_IE_MAGIC_STOPPED; - reg_value |= XDMA_CTRL_IE_READ_ERROR; - reg_value |= XDMA_CTRL_IE_DESC_ERROR; - - /* if using polled mode, configure writeback address */ - if (poll_mode) { - rv = engine_writeback_setup(engine); - if (rv) { - dbg_init("%s descr writeback setup failed.\n", - engine->name); - goto fail_wb; - } - } else { - /* enable the relevant completion interrupts */ - reg_value |= XDMA_CTRL_IE_DESC_STOPPED; - reg_value |= XDMA_CTRL_IE_DESC_COMPLETED; - - if (engine->streaming && engine->dir == DMA_FROM_DEVICE) - reg_value |= XDMA_CTRL_IE_IDLE_STOPPED; - } - - /* Apply engine configurations */ - write_register(reg_value, &engine->regs->interrupt_enable_mask, - (unsigned long)(&engine->regs->interrupt_enable_mask) - - (unsigned long)(&engine->regs)); - - engine->interrupt_enable_mask_value = reg_value; - - /* only enable credit mode for AXI-ST C2H */ - if (enable_credit_mp && engine->streaming && - engine->dir == DMA_FROM_DEVICE) { - - struct xdma_dev *xdev = engine->xdev; - u32 reg_value = (0x1 << engine->channel) << 16; - struct sgdma_common_regs *reg = (struct sgdma_common_regs *) - (xdev->bar[xdev->config_bar_idx] + - (0x6*TARGET_SPACING)); - - write_register(reg_value, ®->credit_mode_enable_w1s, 0); - } - - return 0; - -fail_wb: - return rv; -} - -static int engine_alloc_resource(struct xdma_engine *engine) -{ - struct xdma_dev *xdev = engine->xdev; - - engine->desc = dma_alloc_coherent(&xdev->pdev->dev, - XDMA_TRANSFER_MAX_DESC * sizeof(struct xdma_desc), - &engine->desc_bus, GFP_KERNEL); - if (!engine->desc) { - pr_warn("dev %s, %s pre-alloc desc OOM.\n", - dev_name(&xdev->pdev->dev), engine->name); - goto err_out; - } - - if (poll_mode) { - engine->poll_mode_addr_virt = dma_alloc_coherent( - &xdev->pdev->dev, - sizeof(struct xdma_poll_wb), - &engine->poll_mode_bus, GFP_KERNEL); - if (!engine->poll_mode_addr_virt) { - pr_warn("%s, %s poll pre-alloc writeback OOM.\n", - dev_name(&xdev->pdev->dev), engine->name); - goto err_out; - } - } - - if (engine->streaming && engine->dir == DMA_FROM_DEVICE) { - engine->cyclic_result = dma_alloc_coherent(&xdev->pdev->dev, - CYCLIC_RX_PAGES_MAX * sizeof(struct xdma_result), - &engine->cyclic_result_bus, GFP_KERNEL); - - if (!engine->cyclic_result) { - pr_warn("%s, %s pre-alloc result OOM.\n", - dev_name(&xdev->pdev->dev), engine->name); - goto err_out; - } - } - - return 0; - -err_out: - engine_free_resource(engine); - return -ENOMEM; -} - -static int engine_init(struct xdma_engine *engine, struct xdma_dev *xdev, - int offset, enum dma_data_direction dir, int channel) -{ - int rv; - u32 val; - - dbg_init("channel %d, offset 0x%x, dir %d.\n", channel, offset, dir); - - /* set magic */ - engine->magic = MAGIC_ENGINE; - - engine->channel = channel; - - /* engine interrupt request bit */ - engine->irq_bitmask = (1 << XDMA_ENG_IRQ_NUM) - 1; - engine->irq_bitmask <<= (xdev->engines_num * XDMA_ENG_IRQ_NUM); - engine->bypass_offset = xdev->engines_num * BYPASS_MODE_SPACING; - - /* parent */ - engine->xdev = xdev; - /* register address */ - engine->regs = (xdev->bar[xdev->config_bar_idx] + offset); - engine->sgdma_regs = xdev->bar[xdev->config_bar_idx] + offset + - SGDMA_OFFSET_FROM_CHANNEL; - val = read_register(&engine->regs->identifier); - if (val & 0x8000U) - engine->streaming = 1; - - /* remember SG DMA direction */ - engine->dir = dir; - sprintf(engine->name, "%d-%s%d-%s", xdev->idx, - (dir == DMA_TO_DEVICE) ? "H2C" : "C2H", channel, - engine->streaming ? "ST" : "MM"); - - dbg_init("engine %p name %s irq_bitmask=0x%08x\n", engine, engine->name, - (int)engine->irq_bitmask); - - /* initialize the deferred work for transfer completion */ - INIT_WORK(&engine->work, engine_service_work); - - if (dir == DMA_TO_DEVICE) - xdev->mask_irq_h2c |= engine->irq_bitmask; - else - xdev->mask_irq_c2h |= engine->irq_bitmask; - xdev->engines_num++; - - rv = engine_alloc_resource(engine); - if (rv) - return rv; - - rv = engine_init_regs(engine); - if (rv) - return rv; - - return 0; -} - -/* transfer_destroy() - free transfer */ -static void transfer_destroy(struct xdma_dev *xdev, struct xdma_transfer *xfer) -{ - /* free descriptors */ - xdma_desc_done(xfer->desc_virt); - - if (xfer->last_in_request && (xfer->flags & XFER_FLAG_NEED_UNMAP)) { - struct sg_table *sgt = xfer->sgt; - - if (sgt->nents) { - pci_unmap_sg(xdev->pdev, sgt->sgl, sgt->nents, - xfer->dir); - sgt->nents = 0; - } - } -} - -static int transfer_build(struct xdma_engine *engine, - struct xdma_request_cb *req, unsigned int desc_max) -{ - struct xdma_transfer *xfer = &req->xfer; - struct sw_desc *sdesc = &(req->sdesc[req->sw_desc_idx]); - int i = 0; - int j = 0; - - for (; i < desc_max; i++, j++, sdesc++) { - dbg_desc("sw desc %d/%u: 0x%llx, 0x%x, ep 0x%llx.\n", - i + req->sw_desc_idx, req->sw_desc_cnt, - sdesc->addr, sdesc->len, req->ep_addr); - - /* fill in descriptor entry j with transfer details */ - xdma_desc_set(xfer->desc_virt + j, sdesc->addr, req->ep_addr, - sdesc->len, xfer->dir); - xfer->len += sdesc->len; - - /* for non-inc-add mode don't increment ep_addr */ - if (!engine->non_incr_addr) - req->ep_addr += sdesc->len; - } - req->sw_desc_idx += desc_max; - return 0; -} - -static int transfer_init(struct xdma_engine *engine, struct xdma_request_cb *req) -{ - struct xdma_transfer *xfer = &req->xfer; - unsigned int desc_max = min_t(unsigned int, - req->sw_desc_cnt - req->sw_desc_idx, - XDMA_TRANSFER_MAX_DESC); - int i = 0; - int last = 0; - u32 control; - - memset(xfer, 0, sizeof(*xfer)); - - /* initialize wait queue */ - init_waitqueue_head(&xfer->wq); - - /* remember direction of transfer */ - xfer->dir = engine->dir; - - xfer->desc_virt = engine->desc; - xfer->desc_bus = engine->desc_bus; - - transfer_desc_init(xfer, desc_max); - - dbg_sg("transfer->desc_bus = 0x%llx.\n", (u64)xfer->desc_bus); - - transfer_build(engine, req, desc_max); - - /* terminate last descriptor */ - last = desc_max - 1; - xdma_desc_link(xfer->desc_virt + last, 0, 0); - /* stop engine, EOP for AXI ST, req IRQ on last descriptor */ - control = XDMA_DESC_STOPPED; - control |= XDMA_DESC_EOP; - control |= XDMA_DESC_COMPLETED; - xdma_desc_control_set(xfer->desc_virt + last, control); - - xfer->desc_num = xfer->desc_adjacent = desc_max; - - dbg_sg("transfer 0x%p has %d descriptors\n", xfer, xfer->desc_num); - /* fill in adjacent numbers */ - for (i = 0; i < xfer->desc_num; i++) - xdma_desc_adjacent(xfer->desc_virt + i, xfer->desc_num - i - 1); - - return 0; -} - -#ifdef __LIBXDMA_DEBUG__ -static void sgt_dump(struct sg_table *sgt) -{ - int i; - struct scatterlist *sg = sgt->sgl; - - pr_info("sgt 0x%p, sgl 0x%p, nents %u/%u.\n", - sgt, sgt->sgl, sgt->nents, sgt->orig_nents); - - for (i = 0; i < sgt->orig_nents; i++, sg = sg_next(sg)) - pr_info("%d, 0x%p, pg 0x%p,%u+%u, dma 0x%llx,%u.\n", - i, sg, sg_page(sg), sg->offset, sg->length, - sg_dma_address(sg), sg_dma_len(sg)); -} - -static void xdma_request_cb_dump(struct xdma_request_cb *req) -{ - int i; - - pr_info("request 0x%p, total %u, ep 0x%llx, sw_desc %u, sgt 0x%p.\n", - req, req->total_len, req->ep_addr, req->sw_desc_cnt, req->sgt); - sgt_dump(req->sgt); - for (i = 0; i < req->sw_desc_cnt; i++) - pr_info("%d/%u, 0x%llx, %u.\n", - i, req->sw_desc_cnt, req->sdesc[i].addr, - req->sdesc[i].len); -} -#endif - -static void xdma_request_free(struct xdma_request_cb *req) -{ - if (((unsigned long)req) >= VMALLOC_START && - ((unsigned long)req) < VMALLOC_END) - vfree(req); - else - kfree(req); -} - -static struct xdma_request_cb * xdma_request_alloc(unsigned int sdesc_nr) -{ - struct xdma_request_cb *req; - unsigned int size = sizeof(struct xdma_request_cb) + - sdesc_nr * sizeof(struct sw_desc); - - req = kzalloc(size, GFP_KERNEL); - if (!req) { - req = vmalloc(size); - if (req) - memset(req, 0, size); - } - if (!req) { - pr_info("OOM, %u sw_desc, %u.\n", sdesc_nr, size); - return NULL; - } - - return req; -} - -static struct xdma_request_cb * xdma_init_request(struct sg_table *sgt, - u64 ep_addr) -{ - struct xdma_request_cb *req; - struct scatterlist *sg = sgt->sgl; - int max = sgt->nents; - int extra = 0; - int i, j = 0; - - for (i = 0; i < max; i++, sg = sg_next(sg)) { - unsigned int len = sg_dma_len(sg); - - if (unlikely(len > XDMA_DESC_BLEN_MAX)) - extra += len >> XDMA_DESC_BLEN_BITS; - } - -//pr_info("ep 0x%llx, desc %u+%u.\n", ep_addr, max, extra); - - max += extra; - req = xdma_request_alloc(max); - if (!req) - return NULL; - - req->sgt = sgt; - req->ep_addr = ep_addr; - - for (i = 0, sg = sgt->sgl; i < sgt->nents; i++, sg = sg_next(sg)) { - unsigned int tlen = sg_dma_len(sg); - dma_addr_t addr = sg_dma_address(sg); - - req->total_len += tlen; - while (tlen) { - req->sdesc[j].addr = addr; - if (tlen > XDMA_DESC_BLEN_MAX) { - req->sdesc[j].len = XDMA_DESC_BLEN_MAX; - addr += XDMA_DESC_BLEN_MAX; - tlen -= XDMA_DESC_BLEN_MAX; - } else { - req->sdesc[j].len = tlen; - tlen = 0; - } - j++; - } - } - BUG_ON(j > max); - - req->sw_desc_cnt = j; -#ifdef __LIBXDMA_DEBUG__ - xdma_request_cb_dump(req); -#endif - return req; -} - -ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, - struct sg_table *sgt, bool dma_mapped, int timeout_ms) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - struct xdma_engine *engine; - int rv = 0; - ssize_t done = 0; - struct scatterlist *sg = sgt->sgl; - int nents; - enum dma_data_direction dir = write ? DMA_TO_DEVICE : DMA_FROM_DEVICE; - struct xdma_request_cb *req = NULL; - - if (!dev_hndl) - return -EINVAL; - - if (debug_check_dev_hndl(__func__, xdev->pdev, dev_hndl) < 0) - return -EINVAL; - - if (write == 1) { - if (channel >= xdev->h2c_channel_max) { - pr_warn("H2C channel %d >= %d.\n", - channel, xdev->h2c_channel_max); - return -EINVAL; - } - engine = &xdev->engine_h2c[channel]; - } else if (write == 0) { - if (channel >= xdev->c2h_channel_max) { - pr_warn("C2H channel %d >= %d.\n", - channel, xdev->c2h_channel_max); - return -EINVAL; - } - engine = &xdev->engine_c2h[channel]; - } else { - pr_warn("write %d, exp. 0|1.\n", write); - return -EINVAL; - } - - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); - - xdev = engine->xdev; - if (xdma_device_flag_check(xdev, XDEV_FLAG_OFFLINE)) { - pr_info("xdev 0x%p, offline.\n", xdev); - return -EBUSY; - } - - /* check the direction */ - if (engine->dir != dir) { - pr_info("0x%p, %s, %d, W %d, 0x%x/0x%x mismatch.\n", - engine, engine->name, channel, write, engine->dir, dir); - return -EINVAL; - } - - if (!dma_mapped) { - nents = pci_map_sg(xdev->pdev, sg, sgt->orig_nents, dir); - if (!nents) { - pr_info("map sgl failed, sgt 0x%p.\n", sgt); - return -EIO; - } - sgt->nents = nents; - } else { - BUG_ON(!sgt->nents); - } - - req = xdma_init_request(sgt, ep_addr); - if (!req) { - rv = -ENOMEM; - goto unmap_sgl; - } - - dbg_tfr("%s, len %u sg cnt %u.\n", - engine->name, req->total_len, req->sw_desc_cnt); - - sg = sgt->sgl; - nents = req->sw_desc_cnt; - while (nents) { - unsigned long flags; - struct xdma_transfer *xfer; - - /* one transfer at a time */ - spin_lock(&engine->desc_lock); - - /* build transfer */ - rv = transfer_init(engine, req); - if (rv < 0) { - spin_unlock(&engine->desc_lock); - goto unmap_sgl; - } - xfer = &req->xfer; - - if (!dma_mapped) - xfer->flags = XFER_FLAG_NEED_UNMAP; - - /* last transfer for the given request? */ - nents -= xfer->desc_num; - if (!nents) { - xfer->last_in_request = 1; - xfer->sgt = sgt; - } - - dbg_tfr("xfer, %u, ep 0x%llx, done %lu, sg %u/%u.\n", - xfer->len, req->ep_addr, done, req->sw_desc_idx, - req->sw_desc_cnt); - -#ifdef __LIBXDMA_DEBUG__ - transfer_dump(xfer); -#endif - - rv = transfer_queue(engine, xfer); - if (rv < 0) { - spin_unlock(&engine->desc_lock); - pr_info("unable to submit %s, %d.\n", engine->name, rv); - goto unmap_sgl; - } - - /* - * When polling, determine how many descriptors have been queued * on the engine to determine the writeback value expected - */ - if (poll_mode) { - unsigned int desc_count; - - spin_lock_irqsave(&engine->lock, flags); - desc_count = xfer->desc_num; - spin_unlock_irqrestore(&engine->lock, flags); - - dbg_tfr("%s poll desc_count=%d\n", - engine->name, desc_count); - rv = engine_service_poll(engine, desc_count); - - } else { - rv = wait_event_interruptible_timeout(xfer->wq, - (xfer->state != TRANSFER_STATE_SUBMITTED), - msecs_to_jiffies(timeout_ms)); - } - - spin_lock_irqsave(&engine->lock, flags); - - switch(xfer->state) { - case TRANSFER_STATE_COMPLETED: - spin_unlock_irqrestore(&engine->lock, flags); - - dbg_tfr("transfer %p, %u, ep 0x%llx compl, +%lu.\n", - xfer, xfer->len, req->ep_addr - xfer->len, done); - done += xfer->len; - rv = 0; - break; - case TRANSFER_STATE_FAILED: - pr_info("xfer 0x%p,%u, failed, ep 0x%llx.\n", - xfer, xfer->len, req->ep_addr - xfer->len); - spin_unlock_irqrestore(&engine->lock, flags); - -#ifdef __LIBXDMA_DEBUG__ - transfer_dump(xfer); - sgt_dump(sgt); -#endif - rv = -EIO; - break; - default: - /* transfer can still be in-flight */ - pr_info("xfer 0x%p,%u, s 0x%x timed out, ep 0x%llx.\n", - xfer, xfer->len, xfer->state, req->ep_addr); - engine_status_read(engine, 0, 1); - //engine_status_dump(engine); - transfer_abort(engine, xfer); - - xdma_engine_stop(engine); - spin_unlock_irqrestore(&engine->lock, flags); - -#ifdef __LIBXDMA_DEBUG__ - transfer_dump(xfer); - sgt_dump(sgt); -#endif - rv = -ERESTARTSYS; - break; - } - transfer_destroy(xdev, xfer); - spin_unlock(&engine->desc_lock); - - if (rv < 0) - goto unmap_sgl; - } /* while (sg) */ - -unmap_sgl: - if (!dma_mapped && sgt->nents) { - pci_unmap_sg(xdev->pdev, sgt->sgl, sgt->orig_nents, dir); - sgt->nents = 0; - } - - if (req) - xdma_request_free(req); - - if (rv < 0) - return rv; - - return done; -} -EXPORT_SYMBOL_GPL(xdma_xfer_submit); - -int xdma_performance_submit(struct xdma_dev *xdev, struct xdma_engine *engine) -{ - u8 *buffer_virt; - u32 max_consistent_size = 128 * 32 * 1024; /* 1024 pages, 4MB */ - dma_addr_t buffer_bus; /* bus address */ - struct xdma_transfer *transfer; - u64 ep_addr = 0; - int num_desc_in_a_loop = 128; - int size_in_desc = engine->xdma_perf->transfer_size; - int size = size_in_desc * num_desc_in_a_loop; - int i; - - BUG_ON(size_in_desc > max_consistent_size); - - if (size > max_consistent_size) { - size = max_consistent_size; - num_desc_in_a_loop = size / size_in_desc; - } - - buffer_virt = dma_alloc_coherent(&xdev->pdev->dev, size, - &buffer_bus, GFP_KERNEL); - - /* allocate transfer data structure */ - transfer = kzalloc(sizeof(struct xdma_transfer), GFP_KERNEL); - BUG_ON(!transfer); - - /* 0 = write engine (to_dev=0) , 1 = read engine (to_dev=1) */ - transfer->dir = engine->dir; - /* set number of descriptors */ - transfer->desc_num = num_desc_in_a_loop; - - /* allocate descriptor list */ - if (!engine->desc) { - engine->desc = dma_alloc_coherent(&xdev->pdev->dev, - num_desc_in_a_loop * sizeof(struct xdma_desc), - &engine->desc_bus, GFP_KERNEL); - BUG_ON(!engine->desc); - dbg_init("device %s, engine %s pre-alloc desc 0x%p,0x%llx.\n", - dev_name(&xdev->pdev->dev), engine->name, - engine->desc, engine->desc_bus); - } - transfer->desc_virt = engine->desc; - transfer->desc_bus = engine->desc_bus; - - transfer_desc_init(transfer, transfer->desc_num); - - dbg_sg("transfer->desc_bus = 0x%llx.\n", (u64)transfer->desc_bus); - - for (i = 0; i < transfer->desc_num; i++) { - struct xdma_desc *desc = transfer->desc_virt + i; - dma_addr_t rc_bus_addr = buffer_bus + size_in_desc * i; - - /* fill in descriptor entry with transfer details */ - xdma_desc_set(desc, rc_bus_addr, ep_addr, size_in_desc, - engine->dir); - } - - /* stop engine and request interrupt on last descriptor */ - xdma_desc_control_set(transfer->desc_virt, 0); - /* create a linked loop */ - xdma_desc_link(transfer->desc_virt + transfer->desc_num - 1, - transfer->desc_virt, transfer->desc_bus); - - transfer->cyclic = 1; - - /* initialize wait queue */ - init_waitqueue_head(&transfer->wq); - - //printk("=== Descriptor print for PERF \n"); - //transfer_dump(transfer); - - dbg_perf("Queueing XDMA I/O %s request for performance measurement.\n", - engine->dir ? "write (to dev)" : "read (from dev)"); - transfer_queue(engine, transfer); - return 0; - -} -EXPORT_SYMBOL_GPL(xdma_performance_submit); - -static struct xdma_dev *alloc_dev_instance(struct pci_dev *pdev) -{ - int i; - struct xdma_dev *xdev; - struct xdma_engine *engine; - - BUG_ON(!pdev); - - /* allocate zeroed device book keeping structure */ - xdev = kzalloc(sizeof(struct xdma_dev), GFP_KERNEL); - if (!xdev) { - pr_info("OOM, xdma_dev.\n"); - return NULL; - } - spin_lock_init(&xdev->lock); - - xdev->magic = MAGIC_DEVICE; - xdev->config_bar_idx = -1; - xdev->user_bar_idx = -1; - xdev->bypass_bar_idx = -1; - xdev->irq_line = -1; - - /* create a driver to device reference */ - xdev->pdev = pdev; - dbg_init("xdev = 0x%p\n", xdev); - - /* Set up data user IRQ data structures */ - for (i = 0; i < 16; i++) { - xdev->user_irq[i].xdev = xdev; - spin_lock_init(&xdev->user_irq[i].events_lock); - init_waitqueue_head(&xdev->user_irq[i].events_wq); - xdev->user_irq[i].handler = NULL; - xdev->user_irq[i].user_idx = i; /* 0 based */ - } - - engine = xdev->engine_h2c; - for (i = 0; i < XDMA_CHANNEL_NUM_MAX; i++, engine++) { - spin_lock_init(&engine->lock); - spin_lock_init(&engine->desc_lock); - INIT_LIST_HEAD(&engine->transfer_list); - init_waitqueue_head(&engine->shutdown_wq); - init_waitqueue_head(&engine->xdma_perf_wq); - } - - engine = xdev->engine_c2h; - for (i = 0; i < XDMA_CHANNEL_NUM_MAX; i++, engine++) { - spin_lock_init(&engine->lock); - spin_lock_init(&engine->desc_lock); - INIT_LIST_HEAD(&engine->transfer_list); - init_waitqueue_head(&engine->shutdown_wq); - init_waitqueue_head(&engine->xdma_perf_wq); - } - - return xdev; -} - -static int request_regions(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - int rv; - - BUG_ON(!xdev); - BUG_ON(!pdev); - - dbg_init("pci_request_regions()\n"); - rv = pci_request_regions(pdev, xdev->mod_name); - /* could not request all regions? */ - if (rv) { - dbg_init("pci_request_regions() = %d, device in use?\n", rv); - /* assume device is in use so do not disable it later */ - xdev->regions_in_use = 1; - } else { - xdev->got_regions = 1; - } - - return rv; -} - -static int set_dma_mask(struct pci_dev *pdev) -{ - BUG_ON(!pdev); - - dbg_init("sizeof(dma_addr_t) == %ld\n", sizeof(dma_addr_t)); - /* 64-bit addressing capability for XDMA? */ - if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - /* query for DMA transfer */ - /* @see Documentation/DMA-mapping.txt */ - dbg_init("pci_set_dma_mask()\n"); - /* use 64-bit DMA */ - dbg_init("Using a 64-bit DMA mask.\n"); - /* use 32-bit DMA for descriptors */ - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - /* use 64-bit DMA, 32-bit for consistent */ - } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { - dbg_init("Could not set 64-bit DMA mask.\n"); - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - /* use 32-bit DMA */ - dbg_init("Using a 32-bit DMA mask.\n"); - } else { - dbg_init("No suitable DMA possible.\n"); - return -EINVAL; - } - - return 0; -} - -static u32 get_engine_channel_id(struct engine_regs *regs) -{ - u32 value; - - BUG_ON(!regs); - - value = read_register(®s->identifier); - - return (value & 0x00000f00U) >> 8; -} - -static u32 get_engine_id(struct engine_regs *regs) -{ - u32 value; - - BUG_ON(!regs); - - value = read_register(®s->identifier); - return (value & 0xffff0000U) >> 16; -} - -static void remove_engines(struct xdma_dev *xdev) -{ - struct xdma_engine *engine; - int i; - - BUG_ON(!xdev); - - /* iterate over channels */ - for (i = 0; i < xdev->h2c_channel_max; i++) { - engine = &xdev->engine_h2c[i]; - if (engine->magic == MAGIC_ENGINE) { - dbg_sg("Remove %s, %d", engine->name, i); - engine_destroy(xdev, engine); - dbg_sg("%s, %d removed", engine->name, i); - } - } - - for (i = 0; i < xdev->c2h_channel_max; i++) { - engine = &xdev->engine_c2h[i]; - if (engine->magic == MAGIC_ENGINE) { - dbg_sg("Remove %s, %d", engine->name, i); - engine_destroy(xdev, engine); - dbg_sg("%s, %d removed", engine->name, i); - } - } -} - -static int probe_for_engine(struct xdma_dev *xdev, enum dma_data_direction dir, - int channel) -{ - struct engine_regs *regs; - int offset = channel * CHANNEL_SPACING; - u32 engine_id; - u32 engine_id_expected; - u32 channel_id; - struct xdma_engine *engine; - int rv; - - /* register offset for the engine */ - /* read channels at 0x0000, write channels at 0x1000, - * channels at 0x100 interval */ - if (dir == DMA_TO_DEVICE) { - engine_id_expected = XDMA_ID_H2C; - engine = &xdev->engine_h2c[channel]; - } else { - offset += H2C_CHANNEL_OFFSET; - engine_id_expected = XDMA_ID_C2H; - engine = &xdev->engine_c2h[channel]; - } - - regs = xdev->bar[xdev->config_bar_idx] + offset; - engine_id = get_engine_id(regs); - channel_id = get_engine_channel_id(regs); - - if ((engine_id != engine_id_expected) || (channel_id != channel)) { - dbg_init("%s %d engine, reg off 0x%x, id mismatch 0x%x,0x%x," - "exp 0x%x,0x%x, SKIP.\n", - dir == DMA_TO_DEVICE ? "H2C" : "C2H", - channel, offset, engine_id, channel_id, - engine_id_expected, channel_id != channel); - return -EINVAL; - } - - dbg_init("found AXI %s %d engine, reg. off 0x%x, id 0x%x,0x%x.\n", - dir == DMA_TO_DEVICE ? "H2C" : "C2H", channel, - offset, engine_id, channel_id); - - /* allocate and initialize engine */ - rv = engine_init(engine, xdev, offset, dir, channel); - if (rv != 0) { - pr_info("failed to create AXI %s %d engine.\n", - dir == DMA_TO_DEVICE ? "H2C" : "C2H", - channel); - return rv; - } - - return 0; -} - -static int probe_engines(struct xdma_dev *xdev) -{ - int i; - int rv = 0; - - BUG_ON(!xdev); - - /* iterate over channels */ - for (i = 0; i < xdev->h2c_channel_max; i++) { - rv = probe_for_engine(xdev, DMA_TO_DEVICE, i); - if (rv) - break; - } - xdev->h2c_channel_max = i; - - for (i = 0; i < xdev->c2h_channel_max; i++) { - rv = probe_for_engine(xdev, DMA_FROM_DEVICE, i); - if (rv) - break; - } - xdev->c2h_channel_max = i; - - return 0; -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) -static void pci_enable_relaxed_ordering(struct pci_dev *pdev) -{ - pcie_capability_set_word(pdev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN); -} -#else -static void pci_enable_relaxed_ordering(struct pci_dev *pdev) -{ - u16 v; - int pos; - - pos = pci_pcie_cap(pdev); - if (pos > 0) { - pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &v); - v |= PCI_EXP_DEVCTL_RELAX_EN; - pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, v); - } -} -#endif - -static void pci_check_extended_tag(struct xdma_dev *xdev, struct pci_dev *pdev) -{ - u16 cap; - u32 v; - void *__iomem reg; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) - pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); -#else - int pos; - - pos = pci_pcie_cap(pdev); - if (pos > 0) - pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &cap); - else { - pr_info("pdev 0x%p, unable to access pcie cap.\n", pdev); - return; - } -#endif - - if ((cap & PCI_EXP_DEVCTL_EXT_TAG)) - return; - - /* extended tag not enabled */ - pr_info("0x%p EXT_TAG disabled.\n", pdev); - - if (xdev->config_bar_idx < 0) { - pr_info("pdev 0x%p, xdev 0x%p, config bar UNKNOWN.\n", - pdev, xdev); - return; - } - - reg = xdev->bar[xdev->config_bar_idx] + XDMA_OFS_CONFIG + 0x4C; - v = read_register(reg); - v = (v & 0xFF) | (((u32)32) << 8); - write_register(v, reg, XDMA_OFS_CONFIG + 0x4C); -} - -void *xdma_device_open(const char *mname, struct pci_dev *pdev, int *user_max, - int *h2c_channel_max, int *c2h_channel_max) -{ - struct xdma_dev *xdev = NULL; - int rv = 0; - - pr_info("%s device %s, 0x%p.\n", mname, dev_name(&pdev->dev), pdev); - - /* allocate zeroed device book keeping structure */ - xdev = alloc_dev_instance(pdev); - if (!xdev) - return NULL; - xdev->mod_name = mname; - xdev->user_max = *user_max; - xdev->h2c_channel_max = *h2c_channel_max; - xdev->c2h_channel_max = *c2h_channel_max; - - xdma_device_flag_set(xdev, XDEV_FLAG_OFFLINE); - xdev_list_add(xdev); - - if (xdev->user_max == 0 || xdev->user_max > MAX_USER_IRQ) - xdev->user_max = MAX_USER_IRQ; - if (xdev->h2c_channel_max == 0 || - xdev->h2c_channel_max > XDMA_CHANNEL_NUM_MAX) - xdev->h2c_channel_max = XDMA_CHANNEL_NUM_MAX; - if (xdev->c2h_channel_max == 0 || - xdev->c2h_channel_max > XDMA_CHANNEL_NUM_MAX) - xdev->c2h_channel_max = XDMA_CHANNEL_NUM_MAX; - - rv = pci_enable_device(pdev); - if (rv) { - dbg_init("pci_enable_device() failed, %d.\n", rv); - goto err_enable; - } - - /* keep INTx enabled */ - pci_check_intr_pend(pdev); - - /* enable relaxed ordering */ - pci_enable_relaxed_ordering(pdev); - - pci_check_extended_tag(xdev, pdev); - - /* force MRRS to be 512 */ - rv = pcie_set_readrq(pdev, 512); - if (rv) - pr_info("device %s, error set PCI_EXP_DEVCTL_READRQ: %d.\n", - dev_name(&pdev->dev), rv); - - /* enable bus master capability */ - pci_set_master(pdev); - - rv = request_regions(xdev, pdev); - if (rv) - goto err_regions; - - rv = map_bars(xdev, pdev); - if (rv) - goto err_map; - - rv = set_dma_mask(pdev); - if (rv) - goto err_mask; - - check_nonzero_interrupt_status(xdev); - /* explicitely zero all interrupt enable masks */ - channel_interrupts_disable(xdev, ~0); - user_interrupts_disable(xdev, ~0); - read_interrupts(xdev); - - rv = probe_engines(xdev); - if (rv) - goto err_engines; - - rv = enable_msi_msix(xdev, pdev); - if (rv < 0) - goto err_enable_msix; - - rv = irq_setup(xdev, pdev); - if (rv < 0) - goto err_interrupts; - - if (!poll_mode) - channel_interrupts_enable(xdev, ~0); - - /* Flush writes */ - read_interrupts(xdev); - - *user_max = xdev->user_max; - *h2c_channel_max = xdev->h2c_channel_max; - *c2h_channel_max = xdev->c2h_channel_max; - - xdma_device_flag_clear(xdev, XDEV_FLAG_OFFLINE); - return (void *)xdev; - -err_interrupts: - irq_teardown(xdev); -err_enable_msix: - disable_msi_msix(xdev, pdev); -err_engines: - remove_engines(xdev); -err_mask: - unmap_bars(xdev, pdev); -err_map: - if (xdev->got_regions) - pci_release_regions(pdev); -err_regions: - if (!xdev->regions_in_use) - pci_disable_device(pdev); -err_enable: - xdev_list_remove(xdev); - kfree(xdev); - return NULL; -} -EXPORT_SYMBOL_GPL(xdma_device_open); - -void xdma_device_close(struct pci_dev *pdev, void *dev_hndl) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - - dbg_init("pdev 0x%p, xdev 0x%p.\n", pdev, dev_hndl); - - if (!dev_hndl) - return; - - if (debug_check_dev_hndl(__func__, pdev, dev_hndl) < 0) - return; - - dbg_sg("remove(dev = 0x%p) where pdev->dev.driver_data = 0x%p\n", - pdev, xdev); - if (xdev->pdev != pdev) { - dbg_sg("pci_dev(0x%lx) != pdev(0x%lx)\n", - (unsigned long)xdev->pdev, (unsigned long)pdev); - } - - channel_interrupts_disable(xdev, ~0); - user_interrupts_disable(xdev, ~0); - read_interrupts(xdev); - - irq_teardown(xdev); - disable_msi_msix(xdev, pdev); - - remove_engines(xdev); - unmap_bars(xdev, pdev); - - if (xdev->got_regions) { - dbg_init("pci_release_regions 0x%p.\n", pdev); - pci_release_regions(pdev); - } - - if (!xdev->regions_in_use) { - dbg_init("pci_disable_device 0x%p.\n", pdev); - pci_disable_device(pdev); - } - - xdev_list_remove(xdev); - - kfree(xdev); -} -EXPORT_SYMBOL_GPL(xdma_device_close); - -void xdma_device_offline(struct pci_dev *pdev, void *dev_hndl) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - struct xdma_engine *engine; - int i; - - if (!dev_hndl) - return; - - if (debug_check_dev_hndl(__func__, pdev, dev_hndl) < 0) - return; - -pr_info("pdev 0x%p, xdev 0x%p.\n", pdev, xdev); - xdma_device_flag_set(xdev, XDEV_FLAG_OFFLINE); - - /* wait for all engines to be idle */ - for (i = 0; i < xdev->h2c_channel_max; i++) { - unsigned long flags; - - engine = &xdev->engine_h2c[i]; - - if (engine->magic == MAGIC_ENGINE) { - spin_lock_irqsave(&engine->lock, flags); - engine->shutdown |= ENGINE_SHUTDOWN_REQUEST; - - xdma_engine_stop(engine); - engine->running = 0; - spin_unlock_irqrestore(&engine->lock, flags); - } - } - - for (i = 0; i < xdev->c2h_channel_max; i++) { - unsigned long flags; - - engine = &xdev->engine_c2h[i]; - if (engine->magic == MAGIC_ENGINE) { - spin_lock_irqsave(&engine->lock, flags); - engine->shutdown |= ENGINE_SHUTDOWN_REQUEST; - - xdma_engine_stop(engine); - engine->running = 0; - spin_unlock_irqrestore(&engine->lock, flags); - } - } - - /* turn off interrupts */ - channel_interrupts_disable(xdev, ~0); - user_interrupts_disable(xdev, ~0); - read_interrupts(xdev); - irq_teardown(xdev); - - pr_info("xdev 0x%p, done.\n", xdev); -} -EXPORT_SYMBOL_GPL(xdma_device_offline); - -void xdma_device_online(struct pci_dev *pdev, void *dev_hndl) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - struct xdma_engine *engine; - unsigned long flags; - int i; - - if (!dev_hndl) - return; - - if (debug_check_dev_hndl(__func__, pdev, dev_hndl) < 0) - return; - -pr_info("pdev 0x%p, xdev 0x%p.\n", pdev, xdev); - - for (i = 0; i < xdev->h2c_channel_max; i++) { - engine = &xdev->engine_h2c[i]; - if (engine->magic == MAGIC_ENGINE) { - engine_init_regs(engine); - spin_lock_irqsave(&engine->lock, flags); - engine->shutdown &= ~ENGINE_SHUTDOWN_REQUEST; - spin_unlock_irqrestore(&engine->lock, flags); - } - } - - for (i = 0; i < xdev->c2h_channel_max; i++) { - engine = &xdev->engine_c2h[i]; - if (engine->magic == MAGIC_ENGINE) { - engine_init_regs(engine); - spin_lock_irqsave(&engine->lock, flags); - engine->shutdown &= ~ENGINE_SHUTDOWN_REQUEST; - spin_unlock_irqrestore(&engine->lock, flags); - } - } - - /* re-write the interrupt table */ - if (!poll_mode) { - irq_setup(xdev, pdev); - - channel_interrupts_enable(xdev, ~0); - user_interrupts_enable(xdev, xdev->mask_irq_user); - read_interrupts(xdev); - } - - xdma_device_flag_clear(xdev, XDEV_FLAG_OFFLINE); -pr_info("xdev 0x%p, done.\n", xdev); -} -EXPORT_SYMBOL_GPL(xdma_device_online); - -int xdma_device_restart(struct pci_dev *pdev, void *dev_hndl) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - - if (!dev_hndl) - return -EINVAL; - - if (debug_check_dev_hndl(__func__, pdev, dev_hndl) < 0) - return -EINVAL; - - pr_info("NOT implemented, 0x%p.\n", xdev); - return -EINVAL; -} -EXPORT_SYMBOL_GPL(xdma_device_restart); - -int xdma_user_isr_register(void *dev_hndl, unsigned int mask, - irq_handler_t handler, void *dev) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - int i; - - if (!dev_hndl) - return -EINVAL; - - if (debug_check_dev_hndl(__func__, xdev->pdev, dev_hndl) < 0) - return -EINVAL; - - for (i = 0; i < xdev->user_max && mask; i++) { - unsigned int bit = (1 << i); - - if ((bit & mask) == 0) - continue; - - mask &= ~bit; - xdev->user_irq[i].handler = handler; - xdev->user_irq[i].dev = dev; - } - - return 0; -} -EXPORT_SYMBOL_GPL(xdma_user_isr_register); - -int xdma_user_isr_enable(void *dev_hndl, unsigned int mask) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - - if (!dev_hndl) - return -EINVAL; - - if (debug_check_dev_hndl(__func__, xdev->pdev, dev_hndl) < 0) - return -EINVAL; - - xdev->mask_irq_user |= mask; - /* enable user interrupts */ - user_interrupts_enable(xdev, mask); - read_interrupts(xdev); - - return 0; -} -EXPORT_SYMBOL_GPL(xdma_user_isr_enable); - -int xdma_user_isr_disable(void *dev_hndl, unsigned int mask) -{ - struct xdma_dev *xdev = (struct xdma_dev *)dev_hndl; - - if (!dev_hndl) - return -EINVAL; - - if (debug_check_dev_hndl(__func__, xdev->pdev, dev_hndl) < 0) - return -EINVAL; - - xdev->mask_irq_user &= ~mask; - user_interrupts_disable(xdev, mask); - read_interrupts(xdev); - - return 0; -} -EXPORT_SYMBOL_GPL(xdma_user_isr_disable); - -#ifdef __LIBXDMA_MOD__ -static int __init xdma_base_init(void) -{ - printk(KERN_INFO "%s", version); - return 0; -} - -static void __exit xdma_base_exit(void) -{ - return; -} - -module_init(xdma_base_init); -module_exit(xdma_base_exit); -#endif -/* makes an existing transfer cyclic */ -static void xdma_transfer_cyclic(struct xdma_transfer *transfer) -{ - /* link last descriptor to first descriptor */ - xdma_desc_link(transfer->desc_virt + transfer->desc_num - 1, - transfer->desc_virt, transfer->desc_bus); - /* remember transfer is cyclic */ - transfer->cyclic = 1; -} - -static int transfer_monitor_cyclic(struct xdma_engine *engine, - struct xdma_transfer *transfer, int timeout_ms) -{ - struct xdma_result *result; - int rc = 0; - - BUG_ON(!engine); - BUG_ON(!transfer); - - result = engine->cyclic_result; - BUG_ON(!result); - - if (poll_mode) { - int i ; - for (i = 0; i < 5; i++) { - rc = engine_service_poll(engine, 0); - if (rc) { - pr_info("%s service_poll failed %d.\n", - engine->name, rc); - rc = -ERESTARTSYS; - } - if (result[engine->rx_head].status) - return 0; - } - } else { - if (enable_credit_mp){ - dbg_tfr("%s: rx_head=%d,rx_tail=%d, wait ...\n", - engine->name, engine->rx_head, engine->rx_tail); - rc = wait_event_interruptible_timeout( transfer->wq, - (engine->rx_head!=engine->rx_tail || - engine->rx_overrun), - msecs_to_jiffies(timeout_ms)); - dbg_tfr("%s: wait returns %d, rx %d/%d, overrun %d.\n", - engine->name, rc, engine->rx_head, - engine->rx_tail, engine->rx_overrun); - } else { - rc = wait_event_interruptible_timeout( transfer->wq, - engine->eop_found, - msecs_to_jiffies(timeout_ms)); - dbg_tfr("%s: wait returns %d, eop_found %d.\n", - engine->name, rc, engine->eop_found); - } - } - - return 0; -} - -struct scatterlist *sglist_index(struct sg_table *sgt, unsigned int idx) -{ - struct scatterlist *sg = sgt->sgl; - int i; - - if (idx >= sgt->orig_nents) - return NULL; - - if (!idx) - return sg; - - for (i = 0; i < idx; i++, sg = sg_next(sg)) - ; - - return sg; -} - -static int copy_cyclic_to_user(struct xdma_engine *engine, int pkt_length, - int head, char __user *buf, size_t count) -{ - struct scatterlist *sg; - int more = pkt_length; - - BUG_ON(!engine); - BUG_ON(!buf); - - dbg_tfr("%s, pkt_len %d, head %d, user buf idx %u.\n", - engine->name, pkt_length, head, engine->user_buffer_index); - - sg = sglist_index(&engine->cyclic_sgt, head); - if (!sg) { - pr_info("%s, head %d OOR, sgl %u.\n", - engine->name, head, engine->cyclic_sgt.orig_nents); - return -EIO; - } - - /* EOP found? Transfer anything from head to EOP */ - while (more) { - unsigned int copy = more > PAGE_SIZE ? PAGE_SIZE : more; - unsigned int blen = count - engine->user_buffer_index; - int rv; - - if (copy > blen) - copy = blen; - - dbg_tfr("%s sg %d, 0x%p, copy %u to user %u.\n", - engine->name, head, sg, copy, - engine->user_buffer_index); - - rv = copy_to_user(&buf[engine->user_buffer_index], - page_address(sg_page(sg)), copy); - if (rv) { - pr_info("%s copy_to_user %u failed %d\n", - engine->name, copy, rv); - return -EIO; - } - - more -= copy; - engine->user_buffer_index += copy; - - if (engine->user_buffer_index == count) { - /* user buffer used up */ - break; - } - - head++; - if (head >= CYCLIC_RX_PAGES_MAX) { - head = 0; - sg = engine->cyclic_sgt.sgl; - } else - sg = sg_next(sg); - } - - return pkt_length; -} - -static int complete_cyclic(struct xdma_engine *engine, char __user *buf, - size_t count) -{ - struct xdma_result *result; - int pkt_length = 0; - int fault = 0; - int eop = 0; - int head; - int rc = 0; - int num_credit = 0; - unsigned long flags; - - BUG_ON(!engine); - result = engine->cyclic_result; - BUG_ON(!result); - - spin_lock_irqsave(&engine->lock, flags); - - /* where the host currently is in the ring buffer */ - head = engine->rx_head; - - /* iterate over newly received results */ - while (engine->rx_head != engine->rx_tail||engine->rx_overrun) { - - WARN_ON(result[engine->rx_head].status==0); - - dbg_tfr("%s, result[%d].status = 0x%x length = 0x%x.\n", - engine->name, engine->rx_head, - result[engine->rx_head].status, - result[engine->rx_head].length); - - if ((result[engine->rx_head].status >> 16) != C2H_WB) { - pr_info("%s, result[%d].status 0x%x, no magic.\n", - engine->name, engine->rx_head, - result[engine->rx_head].status); - fault = 1; - } else if (result[engine->rx_head].length > PAGE_SIZE) { - pr_info("%s, result[%d].len 0x%x, > PAGE_SIZE 0x%lx.\n", - engine->name, engine->rx_head, - result[engine->rx_head].length, PAGE_SIZE); - fault = 1; - } else if (result[engine->rx_head].length == 0) { - pr_info("%s, result[%d].length 0x%x.\n", - engine->name, engine->rx_head, - result[engine->rx_head].length); - fault = 1; - /* valid result */ - } else { - pkt_length += result[engine->rx_head].length; - num_credit++; - /* seen eop? */ - //if (result[engine->rx_head].status & RX_STATUS_EOP) - if (result[engine->rx_head].status & RX_STATUS_EOP){ - eop = 1; - engine->eop_found = 1; - } - - dbg_tfr("%s, pkt_length=%d (%s)\n", - engine->name, pkt_length, - eop ? "with EOP" : "no EOP yet"); - } - /* clear result */ - result[engine->rx_head].status = 0; - result[engine->rx_head].length = 0; - /* proceed head pointer so we make progress, even when fault */ - engine->rx_head = (engine->rx_head + 1) % CYCLIC_RX_PAGES_MAX; - - /* stop processing if a fault/eop was detected */ - if (fault || eop){ - break; - } - } - - spin_unlock_irqrestore(&engine->lock, flags); - - if (fault) - return -EIO; - - rc = copy_cyclic_to_user(engine, pkt_length, head, buf, count); - engine->rx_overrun = 0; - /* if copy is successful, release credits */ - if(rc > 0) - write_register(num_credit,&engine->sgdma_regs->credits, 0); - - return rc; -} - -ssize_t xdma_engine_read_cyclic(struct xdma_engine *engine, char __user *buf, - size_t count, int timeout_ms) -{ - int i = 0; - int rc = 0; - int rc_len = 0; - struct xdma_transfer *transfer; - - BUG_ON(!engine); - BUG_ON(engine->magic != MAGIC_ENGINE); - - transfer = &engine->cyclic_req->xfer; - BUG_ON(!transfer); - - engine->user_buffer_index = 0; - - do { - rc = transfer_monitor_cyclic(engine, transfer, timeout_ms); - if (rc < 0) - return rc; - rc = complete_cyclic(engine, buf, count); - if (rc < 0) - return rc; - rc_len += rc; - - i++; - if (i > 10) - break; - } while (!engine->eop_found); - - if(enable_credit_mp) - engine->eop_found = 0; - - return rc_len; -} - -static void sgt_free_with_pages(struct sg_table *sgt, int dir, - struct pci_dev *pdev) -{ - struct scatterlist *sg = sgt->sgl; - int npages = sgt->orig_nents; - int i; - - for (i = 0; i < npages; i++, sg = sg_next(sg)) { - struct page *pg = sg_page(sg); - dma_addr_t bus = sg_dma_address(sg); - - if (pg) { - if (pdev) - pci_unmap_page(pdev, bus, PAGE_SIZE, dir); - __free_page(pg); - } else - break; - } - sg_free_table(sgt); - memset(sgt, 0, sizeof(struct sg_table)); -} - -static int sgt_alloc_with_pages(struct sg_table *sgt, unsigned int npages, - int dir, struct pci_dev *pdev) -{ - struct scatterlist *sg; - int i; - - if (sg_alloc_table(sgt, npages, GFP_KERNEL)) { - pr_info("sgt OOM.\n"); - return -ENOMEM; - } - - sg = sgt->sgl; - for (i = 0; i < npages; i++, sg = sg_next(sg)) { - struct page *pg = alloc_page(GFP_KERNEL); - - if (!pg) { - pr_info("%d/%u, page OOM.\n", i, npages); - goto err_out; - } - - if (pdev) { - dma_addr_t bus = pci_map_page(pdev, pg, 0, PAGE_SIZE, - dir); - if (unlikely(pci_dma_mapping_error(pdev, bus))) { - pr_info("%d/%u, page 0x%p map err.\n", - i, npages, pg); - __free_page(pg); - goto err_out; - } - sg_dma_address(sg) = bus; - sg_dma_len(sg) = PAGE_SIZE; - } - sg_set_page(sg, pg, PAGE_SIZE, 0); - } - - sgt->orig_nents = sgt->nents = npages; - - return 0; - -err_out: - sgt_free_with_pages(sgt, dir, pdev); - return -ENOMEM; -} - -int xdma_cyclic_transfer_setup(struct xdma_engine *engine) -{ - struct xdma_dev *xdev; - struct xdma_transfer *xfer; - dma_addr_t bus; - unsigned long flags; - int i; - int rc; - - BUG_ON(!engine); - xdev = engine->xdev; - BUG_ON(!xdev); - - if (engine->cyclic_req) { - pr_info("%s: exclusive access already taken.\n", - engine->name); - return -EBUSY; - } - - spin_lock_irqsave(&engine->lock, flags); - - engine->rx_tail = 0; - engine->rx_head = 0; - engine->rx_overrun = 0; - engine->eop_found = 0; - - rc = sgt_alloc_with_pages(&engine->cyclic_sgt, CYCLIC_RX_PAGES_MAX, - engine->dir, xdev->pdev); - if (rc < 0) { - pr_info("%s cyclic pages %u OOM.\n", - engine->name, CYCLIC_RX_PAGES_MAX); - goto err_out; - } - - engine->cyclic_req = xdma_init_request(&engine->cyclic_sgt, 0); - if (!engine->cyclic_req) { - pr_info("%s cyclic request OOM.\n", engine->name); - rc = -ENOMEM; - goto err_out; - } - -#ifdef __LIBXDMA_DEBUG__ - xdma_request_cb_dump(engine->cyclic_req); -#endif - - rc = transfer_init(engine, engine->cyclic_req); - if (rc < 0) - goto err_out; - - xfer = &engine->cyclic_req->xfer; - - /* replace source addresses with result write-back addresses */ - memset(engine->cyclic_result, 0, - CYCLIC_RX_PAGES_MAX * sizeof(struct xdma_result)); - bus = engine->cyclic_result_bus; - for (i = 0; i < xfer->desc_num; i++) { - xfer->desc_virt[i].src_addr_lo = cpu_to_le32(PCI_DMA_L(bus)); - xfer->desc_virt[i].src_addr_hi = cpu_to_le32(PCI_DMA_H(bus)); - bus += sizeof(struct xdma_result); - } - /* set control of all descriptors */ - for (i = 0; i < xfer->desc_num; i++) { - xdma_desc_control_clear(xfer->desc_virt + i, LS_BYTE_MASK); - xdma_desc_control_set(xfer->desc_virt + i, - XDMA_DESC_EOP | XDMA_DESC_COMPLETED); - } - - /* make this a cyclic transfer */ - xdma_transfer_cyclic(xfer); - -#ifdef __LIBXDMA_DEBUG__ - transfer_dump(xfer); -#endif - - if(enable_credit_mp){ - //write_register(RX_BUF_PAGES,&engine->sgdma_regs->credits); - write_register(128, &engine->sgdma_regs->credits, 0); - } - - spin_unlock_irqrestore(&engine->lock, flags); - - /* start cyclic transfer */ - transfer_queue(engine, xfer); - - return 0; - - /* unwind on errors */ -err_out: - if (engine->cyclic_req) { - xdma_request_free(engine->cyclic_req); - engine->cyclic_req = NULL; - } - - if (engine->cyclic_sgt.orig_nents) { - sgt_free_with_pages(&engine->cyclic_sgt, engine->dir, - xdev->pdev); - engine->cyclic_sgt.orig_nents = 0; - engine->cyclic_sgt.nents = 0; - engine->cyclic_sgt.sgl = NULL; - } - - spin_unlock_irqrestore(&engine->lock, flags); - - return rc; -} - - -static int cyclic_shutdown_polled(struct xdma_engine *engine) -{ - BUG_ON(!engine); - - spin_lock(&engine->lock); - - dbg_tfr("Polling for shutdown completion\n"); - do { - engine_status_read(engine, 1, 0); - schedule(); - } while (engine->status & XDMA_STAT_BUSY); - - if ((engine->running) && !(engine->status & XDMA_STAT_BUSY)) { - dbg_tfr("Engine has stopped\n"); - - if (!list_empty(&engine->transfer_list)) - engine_transfer_dequeue(engine); - - engine_service_shutdown(engine); - } - - dbg_tfr("Shutdown completion polling done\n"); - spin_unlock(&engine->lock); - - return 0; -} - -static int cyclic_shutdown_interrupt(struct xdma_engine *engine) -{ - int rc; - - BUG_ON(!engine); - - rc = wait_event_interruptible_timeout(engine->shutdown_wq, - !engine->running, msecs_to_jiffies(10000)); - -#if 0 - if (rc) { - dbg_tfr("wait_event_interruptible=%d\n", rc); - return rc; - } -#endif - - if (engine->running) { - pr_info("%s still running?!, %d\n", engine->name, rc); - return -EINVAL; - } - - return rc; -} - -int xdma_cyclic_transfer_teardown(struct xdma_engine *engine) -{ - int rc; - struct xdma_dev *xdev = engine->xdev; - struct xdma_transfer *transfer; - unsigned long flags; - - transfer = engine_cyclic_stop(engine); - - spin_lock_irqsave(&engine->lock, flags); - if (transfer) { - dbg_tfr("%s: stop transfer 0x%p.\n", engine->name, transfer); - if (transfer != &engine->cyclic_req->xfer) { - pr_info("%s unexpected transfer 0x%p/0x%p\n", - engine->name, transfer, - &engine->cyclic_req->xfer); - } - } - /* allow engine to be serviced after stop request */ - spin_unlock_irqrestore(&engine->lock, flags); - - /* wait for engine to be no longer running */ - if (poll_mode) - rc = cyclic_shutdown_polled(engine); - else - rc = cyclic_shutdown_interrupt(engine); - - /* obtain spin lock to atomically remove resources */ - spin_lock_irqsave(&engine->lock, flags); - - if (engine->cyclic_req) { - xdma_request_free(engine->cyclic_req); - engine->cyclic_req = NULL; - } - - if (engine->cyclic_sgt.orig_nents) { - sgt_free_with_pages(&engine->cyclic_sgt, engine->dir, - xdev->pdev); - engine->cyclic_sgt.orig_nents = 0; - engine->cyclic_sgt.nents = 0; - engine->cyclic_sgt.sgl = NULL; - } - - spin_unlock_irqrestore(&engine->lock, flags); - - return 0; -} - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/libxdma.h b/sdk/linux_kernel_drivers/xocl/libxdma.h deleted file mode 100644 index d7620827..00000000 --- a/sdk/linux_kernel_drivers/xocl/libxdma.h +++ /dev/null @@ -1,612 +0,0 @@ -/******************************************************************************* - * - * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - * - * The full GNU General Public License is included in this distribution in - * the file called "LICENSE". - * - * Karen Xie - * - ******************************************************************************/ -#ifndef XDMA_LIB_H -#define XDMA_LIB_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Switch debug printing on/off */ -#define XDMA_DEBUG 0 - -/* SECTION: Preprocessor macros/constants */ -#define XDMA_BAR_NUM (6) - -/* maximum amount of register space to map */ -#define XDMA_BAR_SIZE (0x8000UL) - -/* Use this definition to poll several times between calls to schedule */ -#define NUM_POLLS_PER_SCHED 100 - -#define XDMA_CHANNEL_NUM_MAX (4) -/* - * interrupts per engine, rad2_vul.sv:237 - * .REG_IRQ_OUT (reg_irq_from_ch[(channel*2) +: 2]), - */ -#define XDMA_ENG_IRQ_NUM (1) -#define MAX_EXTRA_ADJ (15) -#define RX_STATUS_EOP (1) - -/* Target internal components on XDMA control BAR */ -#define XDMA_OFS_INT_CTRL (0x2000UL) -#define XDMA_OFS_CONFIG (0x3000UL) - -/* maximum number of desc per transfer request */ -#define XDMA_TRANSFER_MAX_DESC (2048) - -/* maximum size of a single DMA transfer descriptor */ -#define XDMA_DESC_BLEN_BITS 28 -#define XDMA_DESC_BLEN_MAX ((1 << (XDMA_DESC_BLEN_BITS)) - 1) - -/* bits of the SG DMA control register */ -#define XDMA_CTRL_RUN_STOP (1UL << 0) -#define XDMA_CTRL_IE_DESC_STOPPED (1UL << 1) -#define XDMA_CTRL_IE_DESC_COMPLETED (1UL << 2) -#define XDMA_CTRL_IE_DESC_ALIGN_MISMATCH (1UL << 3) -#define XDMA_CTRL_IE_MAGIC_STOPPED (1UL << 4) -#define XDMA_CTRL_IE_IDLE_STOPPED (1UL << 6) -#define XDMA_CTRL_IE_READ_ERROR (0x1FUL << 9) -#define XDMA_CTRL_IE_DESC_ERROR (0x1FUL << 19) -#define XDMA_CTRL_NON_INCR_ADDR (1UL << 25) -#define XDMA_CTRL_POLL_MODE_WB (1UL << 26) - -/* bits of the SG DMA status register */ -#define XDMA_STAT_BUSY (1UL << 0) -#define XDMA_STAT_DESC_STOPPED (1UL << 1) -#define XDMA_STAT_DESC_COMPLETED (1UL << 2) -#define XDMA_STAT_ALIGN_MISMATCH (1UL << 3) -#define XDMA_STAT_MAGIC_STOPPED (1UL << 4) -#define XDMA_STAT_INVALID_LEN (1UL << 5) -#define XDMA_STAT_IDLE_STOPPED (1UL << 6) - -#define XDMA_STAT_COMMON_ERR_MASK \ - (XDMA_STAT_ALIGN_MISMATCH | XDMA_STAT_MAGIC_STOPPED | \ - XDMA_STAT_INVALID_LEN) - -/* desc_error, C2H & H2C */ -#define XDMA_STAT_DESC_UNSUPP_REQ (1UL << 19) -#define XDMA_STAT_DESC_COMPL_ABORT (1UL << 20) -#define XDMA_STAT_DESC_PARITY_ERR (1UL << 21) -#define XDMA_STAT_DESC_HEADER_EP (1UL << 22) -#define XDMA_STAT_DESC_UNEXP_COMPL (1UL << 23) - -#define XDMA_STAT_DESC_ERR_MASK \ - (XDMA_STAT_DESC_UNSUPP_REQ | XDMA_STAT_DESC_COMPL_ABORT | \ - XDMA_STAT_DESC_PARITY_ERR | XDMA_STAT_DESC_HEADER_EP | \ - XDMA_STAT_DESC_UNEXP_COMPL) - -/* read error: H2C */ -#define XDMA_STAT_H2C_R_UNSUPP_REQ (1UL << 9) -#define XDMA_STAT_H2C_R_COMPL_ABORT (1UL << 10) -#define XDMA_STAT_H2C_R_PARITY_ERR (1UL << 11) -#define XDMA_STAT_H2C_R_HEADER_EP (1UL << 12) -#define XDMA_STAT_H2C_R_UNEXP_COMPL (1UL << 13) - -#define XDMA_STAT_H2C_R_ERR_MASK \ - (XDMA_STAT_H2C_R_UNSUPP_REQ | XDMA_STAT_H2C_R_COMPL_ABORT | \ - XDMA_STAT_H2C_R_PARITY_ERR | XDMA_STAT_H2C_R_HEADER_EP | \ - XDMA_STAT_H2C_R_UNEXP_COMPL) - -/* write error, H2C only */ -#define XDMA_STAT_H2C_W_DECODE_ERR (1UL << 14) -#define XDMA_STAT_H2C_W_SLAVE_ERR (1UL << 15) - -#define XDMA_STAT_H2C_W_ERR_MASK \ - (XDMA_STAT_H2C_W_DECODE_ERR | XDMA_STAT_H2C_W_SLAVE_ERR) - -/* read error: C2H */ -#define XDMA_STAT_C2H_R_DECODE_ERR (1UL << 9) -#define XDMA_STAT_C2H_R_SLAVE_ERR (1UL << 10) - -#define XDMA_STAT_C2H_R_ERR_MASK \ - (XDMA_STAT_C2H_R_DECODE_ERR | XDMA_STAT_C2H_R_SLAVE_ERR) - -/* all combined */ -#define XDMA_STAT_H2C_ERR_MASK \ - (XDMA_STAT_COMMON_ERR_MASK | XDMA_STAT_DESC_ERR_MASK | \ - XDMA_STAT_H2C_R_ERR_MASK | XDMA_STAT_H2C_W_ERR_MASK) - -#define XDMA_STAT_C2H_ERR_MASK \ - (XDMA_STAT_COMMON_ERR_MASK | XDMA_STAT_DESC_ERR_MASK | \ - XDMA_STAT_C2H_R_ERR_MASK) - -/* bits of the SGDMA descriptor control field */ -#define XDMA_DESC_STOPPED (1UL << 0) -#define XDMA_DESC_COMPLETED (1UL << 1) -#define XDMA_DESC_EOP (1UL << 4) - -#define XDMA_PERF_RUN (1UL << 0) -#define XDMA_PERF_CLEAR (1UL << 1) -#define XDMA_PERF_AUTO (1UL << 2) - -#define MAGIC_ENGINE 0xEEEEEEEEUL -#define MAGIC_DEVICE 0xDDDDDDDDUL - -/* upper 16-bits of engine identifier register */ -#define XDMA_ID_H2C 0x1fc0U -#define XDMA_ID_C2H 0x1fc1U - -/* for C2H AXI-ST mode */ -#define CYCLIC_RX_PAGES_MAX 256 - -#define LS_BYTE_MASK 0x000000FFUL - -#define BLOCK_ID_MASK 0xFFF00000 -#define BLOCK_ID_HEAD 0x1FC00000 - -#define IRQ_BLOCK_ID 0x1fc20000UL -#define CONFIG_BLOCK_ID 0x1fc30000UL - -#define WB_COUNT_MASK 0x00ffffffUL -#define WB_ERR_MASK (1UL << 31) -#define POLL_TIMEOUT_SECONDS 10 - -#define MAX_USER_IRQ 16 - -#define MAX_DESC_BUS_ADDR (0xffffffffULL) - -#define DESC_MAGIC 0xAD4B0000UL - -#define C2H_WB 0x52B4UL - -#define MAX_NUM_ENGINES (XDMA_CHANNEL_NUM_MAX * 2) -#define H2C_CHANNEL_OFFSET 0x1000 -#define SGDMA_OFFSET_FROM_CHANNEL 0x4000 -#define CHANNEL_SPACING 0x100 -#define TARGET_SPACING 0x1000 - -#define BYPASS_MODE_SPACING 0x0100 - -/* obtain the 32 most significant (high) bits of a 32-bit or 64-bit address */ -#define PCI_DMA_H(addr) ((addr >> 16) >> 16) -/* obtain the 32 least significant (low) bits of a 32-bit or 64-bit address */ -#define PCI_DMA_L(addr) (addr & 0xffffffffUL) - -#ifndef VM_RESERVED - #define VMEM_FLAGS (VM_IO | VM_DONTEXPAND | VM_DONTDUMP) -#else - #define VMEM_FLAGS (VM_IO | VM_RESERVED) -#endif - -#ifdef __LIBXDMA_DEBUG__ -#define dbg_io pr_err -#define dbg_fops pr_err -#define dbg_perf pr_err -#define dbg_sg pr_err -#define dbg_tfr pr_err -#define dbg_irq pr_err -#define dbg_init pr_err -#define dbg_desc pr_err -#else -/* disable debugging */ -#define dbg_io(...) -#define dbg_fops(...) -#define dbg_perf(...) -#define dbg_sg(...) -#define dbg_tfr(...) -#define dbg_irq(...) -#define dbg_init(...) -#define dbg_desc(...) -#endif - -/* SECTION: Enum definitions */ -enum transfer_state { - TRANSFER_STATE_NEW = 0, - TRANSFER_STATE_SUBMITTED, - TRANSFER_STATE_COMPLETED, - TRANSFER_STATE_FAILED, - TRANSFER_STATE_ABORTED -}; - -enum shutdown_state { - ENGINE_SHUTDOWN_NONE = 0, /* No shutdown in progress */ - ENGINE_SHUTDOWN_REQUEST = 1, /* engine requested to shutdown */ - ENGINE_SHUTDOWN_IDLE = 2 /* engine has shutdown and is idle */ -}; - -enum dev_capabilities { - CAP_64BIT_DMA = 2, - CAP_64BIT_DESC = 4, - CAP_ENGINE_WRITE = 8, - CAP_ENGINE_READ = 16 -}; - -/* SECTION: Structure definitions */ - -struct config_regs { - u32 identifier; - u32 reserved_1[4]; - u32 msi_enable; -}; - -/** - * SG DMA Controller status and control registers - * - * These registers make the control interface for DMA transfers. - * - * It sits in End Point (FPGA) memory BAR[0] for 32-bit or BAR[0:1] for 64-bit. - * It references the first descriptor which exists in Root Complex (PC) memory. - * - * @note The registers must be accessed using 32-bit (PCI DWORD) read/writes, - * and their values are in little-endian byte ordering. - */ -struct engine_regs { - u32 identifier; - u32 control; - u32 control_w1s; - u32 control_w1c; - u32 reserved_1[12]; /* padding */ - - u32 status; - u32 status_rc; - u32 completed_desc_count; - u32 alignments; - u32 reserved_2[14]; /* padding */ - - u32 poll_mode_wb_lo; - u32 poll_mode_wb_hi; - u32 interrupt_enable_mask; - u32 interrupt_enable_mask_w1s; - u32 interrupt_enable_mask_w1c; - u32 reserved_3[9]; /* padding */ - - u32 perf_ctrl; - u32 perf_cyc_lo; - u32 perf_cyc_hi; - u32 perf_dat_lo; - u32 perf_dat_hi; - u32 perf_pnd_lo; - u32 perf_pnd_hi; -} __packed; - -struct engine_sgdma_regs { - u32 identifier; - u32 reserved_1[31]; /* padding */ - - /* bus address to first descriptor in Root Complex Memory */ - u32 first_desc_lo; - u32 first_desc_hi; - /* number of adjacent descriptors at first_desc */ - u32 first_desc_adjacent; - u32 credits; -} __packed; - -struct msix_vec_table_entry { - u32 msi_vec_addr_lo; - u32 msi_vec_addr_hi; - u32 msi_vec_data_lo; - u32 msi_vec_data_hi; -} __packed; - -struct msix_vec_table { - struct msix_vec_table_entry entry_list[32]; -} __packed; - -struct interrupt_regs { - u32 identifier; - u32 user_int_enable; - u32 user_int_enable_w1s; - u32 user_int_enable_w1c; - u32 channel_int_enable; - u32 channel_int_enable_w1s; - u32 channel_int_enable_w1c; - u32 reserved_1[9]; /* padding */ - - u32 user_int_request; - u32 channel_int_request; - u32 user_int_pending; - u32 channel_int_pending; - u32 reserved_2[12]; /* padding */ - - u32 user_msi_vector[8]; - u32 channel_msi_vector[8]; -} __packed; - -struct sgdma_common_regs { - u32 padding[8]; - u32 credit_mode_enable; - u32 credit_mode_enable_w1s; - u32 credit_mode_enable_w1c; -} __packed; - - -/* Structure for polled mode descriptor writeback */ -struct xdma_poll_wb { - u32 completed_desc_count; - u32 reserved_1[7]; -} __packed; - - -/** - * Descriptor for a single contiguous memory block transfer. - * - * Multiple descriptors are linked by means of the next pointer. An additional - * extra adjacent number gives the amount of extra contiguous descriptors. - * - * The descriptors are in root complex memory, and the bytes in the 32-bit - * words must be in little-endian byte ordering. - */ -struct xdma_desc { - u32 control; - u32 bytes; /* transfer length in bytes */ - u32 src_addr_lo; /* source address (low 32-bit) */ - u32 src_addr_hi; /* source address (high 32-bit) */ - u32 dst_addr_lo; /* destination address (low 32-bit) */ - u32 dst_addr_hi; /* destination address (high 32-bit) */ - /* - * next descriptor in the single-linked list of descriptors; - * this is the PCIe (bus) address of the next descriptor in the - * root complex memory - */ - u32 next_lo; /* next desc address (low 32-bit) */ - u32 next_hi; /* next desc address (high 32-bit) */ -} __packed; - -/* 32 bytes (four 32-bit words) or 64 bytes (eight 32-bit words) */ -struct xdma_result { - u32 status; - u32 length; - u32 reserved_1[6]; /* padding */ -} __packed; - -struct sw_desc { - dma_addr_t addr; - unsigned int len; -}; - -/* Describes a (SG DMA) single transfer for the engine */ -struct xdma_transfer { - struct list_head entry; /* queue of non-completed transfers */ - struct xdma_desc *desc_virt; /* virt addr of the 1st descriptor */ - dma_addr_t desc_bus; /* bus addr of the first descriptor */ - int desc_adjacent; /* adjacent descriptors at desc_bus */ - int desc_num; /* number of descriptors in transfer */ - enum dma_data_direction dir; - wait_queue_head_t wq; /* wait queue for transfer completion */ - - enum transfer_state state; /* state of the transfer */ - unsigned int flags; -#define XFER_FLAG_NEED_UNMAP 0x1 - int cyclic; /* flag if transfer is cyclic */ - int last_in_request; /* flag if last within request */ - unsigned int len; - struct sg_table *sgt; -}; - -struct xdma_request_cb { - struct sg_table *sgt; - unsigned int total_len; - u64 ep_addr; - - struct xdma_transfer xfer; - - unsigned int sw_desc_idx; - unsigned int sw_desc_cnt; - struct sw_desc sdesc[0]; -}; - -struct xdma_engine { - unsigned long magic; /* structure ID for sanity checks */ - struct xdma_dev *xdev; /* parent device */ - char name[5]; /* name of this engine */ - int version; /* version of this engine */ - //dev_t cdevno; /* character device major:minor */ - //struct cdev cdev; /* character device (embedded struct) */ - - /* HW register address offsets */ - struct engine_regs *regs; /* Control reg BAR offset */ - struct engine_sgdma_regs *sgdma_regs; /* SGDAM reg BAR offset */ - u32 bypass_offset; /* Bypass mode BAR offset */ - - /* Engine state, configuration and flags */ - enum shutdown_state shutdown; /* engine shutdown mode */ - enum dma_data_direction dir; - int running; /* flag if the driver started engine */ - int non_incr_addr; /* flag if non-incremental addressing used */ - int streaming; - int addr_align; /* source/dest alignment in bytes */ - int len_granularity; /* transfer length multiple */ - int addr_bits; /* HW datapath address width */ - int channel; /* engine indices */ - int max_extra_adj; /* descriptor prefetch capability */ - int desc_dequeued; /* num descriptors of completed transfers */ - u32 status; /* last known status of device */ - u32 interrupt_enable_mask_value;/* only used for MSIX mode to store per-engine interrupt mask value */ - - /* Transfer list management */ - struct list_head transfer_list; /* queue of transfers */ - - /* Members applicable to AXI-ST C2H (cyclic) transfers */ - struct xdma_result *cyclic_result; - dma_addr_t cyclic_result_bus; /* bus addr for transfer */ - struct xdma_request_cb *cyclic_req; - struct sg_table cyclic_sgt; - u8 eop_found; /* used only for cyclic(rx:c2h) */ - - int rx_tail; /* follows the HW */ - int rx_head; /* where the SW reads from */ - int rx_overrun; /* flag if overrun occured */ - - /* for copy from cyclic buffer to user buffer */ - unsigned int user_buffer_index; - - /* Members associated with polled mode support */ - u8 *poll_mode_addr_virt; /* virt addr for descriptor writeback */ - dma_addr_t poll_mode_bus; /* bus addr for descriptor writeback */ - - /* Members associated with interrupt mode support */ - wait_queue_head_t shutdown_wq; /* wait queue for shutdown sync */ - spinlock_t lock; /* protects concurrent access */ - int prev_cpu; /* remember CPU# of (last) locker */ - int msix_irq_line; /* MSI-X vector for this engine */ - u32 irq_bitmask; /* IRQ bit mask for this engine */ - struct work_struct work; /* Work queue for interrupt handling */ - - spinlock_t desc_lock; /* protects concurrent access */ - dma_addr_t desc_bus; - struct xdma_desc *desc; - - /* for performance test support */ - struct xdma_performance_ioctl *xdma_perf; /* perf test control */ - wait_queue_head_t xdma_perf_wq; /* Perf test sync */ -}; - -struct xdma_user_irq { - struct xdma_dev *xdev; /* parent device */ - u8 user_idx; /* 0 ~ 15 */ - u8 events_irq; /* accumulated IRQs */ - spinlock_t events_lock; /* lock to safely update events_irq */ - wait_queue_head_t events_wq; /* wait queue to sync waiting threads */ - irq_handler_t handler; - - void *dev; -}; - -/* XDMA PCIe device specific book-keeping */ -#define XDEV_FLAG_OFFLINE 0x1 -struct xdma_dev { - struct list_head list_head; - struct list_head rcu_node; - - unsigned long magic; /* structure ID for sanity checks */ - struct pci_dev *pdev; /* pci device struct from probe() */ - int idx; /* dev index */ - - const char *mod_name; /* name of module owning the dev */ - - spinlock_t lock; /* protects concurrent access */ - unsigned int flags; - - /* PCIe BAR management */ - void *__iomem bar[XDMA_BAR_NUM]; /* addresses for mapped BARs */ - int user_bar_idx; /* BAR index of user logic */ - int config_bar_idx; /* BAR index of XDMA config logic */ - int bypass_bar_idx; /* BAR index of XDMA bypass logic */ - int regions_in_use; /* flag if dev was in use during probe() */ - int got_regions; /* flag if probe() obtained the regions */ - - int user_max; - int c2h_channel_max; - int h2c_channel_max; - - /* Interrupt management */ - int irq_count; /* interrupt counter */ - int irq_line; /* flag if irq allocated successfully */ - int msi_enabled; /* flag if msi was enabled for the device */ - int msix_enabled; /* flag if msi-x was enabled for the device */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(4,12,0) - struct msix_entry entry[32]; /* msi-x vector/entry table */ -#endif - struct xdma_user_irq user_irq[16]; /* user IRQ management */ - unsigned int mask_irq_user; - - /* XDMA engine management */ - int engines_num; /* Total engine count */ - u32 mask_irq_h2c; - u32 mask_irq_c2h; - struct xdma_engine engine_h2c[XDMA_CHANNEL_NUM_MAX]; - struct xdma_engine engine_c2h[XDMA_CHANNEL_NUM_MAX]; - - /* SD_Accel specific */ - enum dev_capabilities capabilities; - u64 feature_id; -}; - -static inline int xdma_device_flag_check(struct xdma_dev *xdev, unsigned int f) -{ - unsigned long flags; - - spin_lock_irqsave(&xdev->lock, flags); - if (xdev->flags & f) { - spin_unlock_irqrestore(&xdev->lock, flags); - return 1; - } - spin_unlock_irqrestore(&xdev->lock, flags); - return 0; -} - -static inline int xdma_device_flag_test_n_set(struct xdma_dev *xdev, - unsigned int f) -{ - unsigned long flags; - int rv = 0; - - spin_lock_irqsave(&xdev->lock, flags); - if (xdev->flags & f) { - spin_unlock_irqrestore(&xdev->lock, flags); - rv = 1; - } else - xdev->flags |= f; - spin_unlock_irqrestore(&xdev->lock, flags); - return rv; -} - -static inline void xdma_device_flag_set(struct xdma_dev *xdev, unsigned int f) -{ - unsigned long flags; - - spin_lock_irqsave(&xdev->lock, flags); - xdev->flags |= f; - spin_unlock_irqrestore(&xdev->lock, flags); -} - -static inline void xdma_device_flag_clear(struct xdma_dev *xdev, unsigned int f) -{ - unsigned long flags; - - spin_lock_irqsave(&xdev->lock, flags); - xdev->flags &= ~f; - spin_unlock_irqrestore(&xdev->lock, flags); -} - -void write_register(u32 value, void *iomem); -u32 read_register(void *iomem); - -struct xdma_dev *xdev_find_by_pdev(struct pci_dev *pdev); - -void xdma_device_offline(struct pci_dev *pdev, void *dev_handle); -void xdma_device_online(struct pci_dev *pdev, void *dev_handle); - -int xdma_performance_submit(struct xdma_dev *xdev, struct xdma_engine *engine); -struct xdma_transfer *engine_cyclic_stop(struct xdma_engine *engine); -void enable_perf(struct xdma_engine *engine); -void get_perf_stats(struct xdma_engine *engine); - -int xdma_cyclic_transfer_setup(struct xdma_engine *engine); -int xdma_cyclic_transfer_teardown(struct xdma_engine *engine); -ssize_t xdma_engine_read_cyclic(struct xdma_engine *, char __user *, size_t, - int); - -#endif /* XDMA_LIB_H */ - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/libxdma_api.h b/sdk/linux_kernel_drivers/xocl/libxdma_api.h deleted file mode 100644 index bf043eb1..00000000 --- a/sdk/linux_kernel_drivers/xocl/libxdma_api.h +++ /dev/null @@ -1,135 +0,0 @@ -/******************************************************************************* - * - * Xilinx XDMA IP Core Linux Driver - * - * Copyright(c) Sidebranch. - * Copyright(c) Xilinx, Inc. - * - * Karen Xie - * Leon Woestenberg - * - ******************************************************************************/ - -#ifndef __XDMA_BASE_API_H__ -#define __XDMA_BASE_API_H__ - -#include -#include -#include - -/* - * functions exported by the xdma driver - */ - -typedef struct { - u64 write_submitted; - u64 write_completed; - u64 read_requested; - u64 read_completed; - u64 restart; - u64 open; - u64 close; - u64 msix_trigger; -} xdma_statistics; - -/* - * This struct should be constantly updated by XMDA using u64_stats_* APIs - * The front end will read the structure without locking (That's why updating atomically is a must) - * every time it prints the statistics. - */ -//static XDMA_Statistics stats; - -/* - * xdma_device_open - read the pci bars and configure the fpga - * should be called from probe() - * NOTE: - * user interrupt will not enabled until xdma_user_isr_enable() - * is called - * @pdev: ptr to pci_dev - * @mod_name: the module name to be used for request_irq - * @user_max: max # of user/event (interrupts) to be configured - * @channel_max: max # of c2h and h2c channels to be configured - * NOTE: if the user/channel provisioned is less than the max specified, - * libxdma will update the user_max/channel_max - * returns - * a opaque handle (for libxdma to identify the device) - * NULL, in case of error - */ -void *xdma_device_open(const char *mod_name, struct pci_dev *pdev, - int *user_max, int *h2c_channel_max, int *c2h_channel_max); - -/* - * xdma_device_close - prepare fpga for removal: disable all interrupts (users - * and xdma) and release all resources - * should called from remove() - * @pdev: ptr to struct pci_dev - * @tuples: from xdma_device_open() - */ -void xdma_device_close(struct pci_dev *pdev, void *dev_handle); - -/* - * xdma_device_restart - restart the fpga - * @pdev: ptr to struct pci_dev - * TODO: - * may need more refining on the parameter list - * return < 0 in case of error - * TODO: exact error code will be defined later - */ -int xdma_device_restart(struct pci_dev *pdev, void *dev_handle); - -/* - * xdma_user_isr_register - register a user ISR handler - * It is expected that the xdma will register the ISR, and for the user - * interrupt, it will call the corresponding handle if it is registered and - * enabled. - * - * @pdev: ptr to the the pci_dev struct - * @mask: bitmask of user interrupts (0 ~ 15)to be registered - * bit 0: user interrupt 0 - * ... - * bit 15: user interrupt 15 - * any bit above bit 15 will be ignored. - * @handler: the correspoinding handler - * a NULL handler will be treated as de-registeration - * @name: to be passed to the handler, ignored if handler is NULL` - * @dev: to be passed to the handler, ignored if handler is NULL` - * return < 0 in case of error - * TODO: exact error code will be defined later - */ -int xdma_user_isr_register(void *dev_hndl, unsigned int mask, - irq_handler_t handler, void *dev); - -/* - * xdma_user_isr_enable/disable - enable or disable user interrupt - * @pdev: ptr to the the pci_dev struct - * @mask: bitmask of user interrupts (0 ~ 15)to be registered - * return < 0 in case of error - * TODO: exact error code will be defined later - */ -int xdma_user_isr_enable(void *dev_hndl, unsigned int mask); -int xdma_user_isr_disable(void *dev_hndl, unsigned int mask); - -/* - * xdma_xfer_submit - submit data for dma operation (for both read and write) - * This is a blocking call - * @channel: channle number (< channel_max) - * == channel_max means libxdma can pick any channel available:q - - * @dir: DMA_FROM/TO_DEVICE - * @offset: offset into the DDR/BRAM memory to read from or write to - * @sg_tbl: the scatter-gather list of data buffers - * @timeout: timeout in mili-seconds, *currently ignored - * return # of bytes transfered or - * < 0 in case of error - * TODO: exact error code will be defined later - */ -ssize_t xdma_xfer_submit(void *dev_hndl, int channel, bool write, u64 ep_addr, - struct sg_table *sgt, bool dma_mapped, int timeout_ms); - - -/////////////////////missing API//////////////////// - -//xdma_get_channle_state - if no interrupt on DMA hang is available -//xdma_channle_restart - -#endif diff --git a/sdk/linux_kernel_drivers/xocl/xclfeatures.h b/sdk/linux_kernel_drivers/xocl/xclfeatures.h deleted file mode 100644 index 5709b93c..00000000 --- a/sdk/linux_kernel_drivers/xocl/xclfeatures.h +++ /dev/null @@ -1,146 +0,0 @@ -/** - * Copyright (C) 2015-2018 Xilinx, Inc - * - * This file is dual licensed. It may be redistributed and/or modified - * under the terms of the Apache 2.0 License OR version 2 of the GNU - * General Public License. - * - * Apache License Verbiage - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * GPL license Verbiage: - * - * This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - * You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - - -/** - * Xilinx SDAccel FPGA BIOS definition - * Copyright (C) 2016-2017, Xilinx Inc - All rights reserved - */ - - -//Layout: At address 0xB0000, we will have the FeatureRomHeader that comprises: -// -//1. First have FeatureRomHeader: 152 bytes of information followed by -//2. Then, as a part of FeatureRomHeader we have the PRRegion struct(s). -// The number of such structs will be same as OCLRegionCount. -//3. After this the freq scaling table is laid out. -// - -//#include - -typedef struct PartialRegion { - uint16_t clk[4]; - uint8_t XPR; //0 : non-xpt, 1: xpr -} PRRegion; - -// Each entry represents one row in freq scaling table. -struct FreqScalingTableRow{ - short config0; - short freq; - short config2; -}; - -enum PROMType { - BPI = 0 - ,SPI = 1 - //room for 6 more types of flash devices. -}; - -enum DebugType { - DT_NIFD = 0x01, - DT_FIREWALL = 0x02 - //There is room for future expansion upto 8 IPs -}; - -// This bit mask is used with the FeatureBitMap to calculate 64 bool features -// -// To test if a feature is provided: -// FeatureRomHeader header; -// if (FeatureBitMask::FBM_IS_UNIFIED & header.FeatureBitMap) -// // it is supported -// else -// // it is not supported -// -// To set if a feature is provided: -// header.FeatureBitMap = 0; -// header.FeatureBitMap |= FeatureBitMask::FBM_IS_UNIFIED; -// -enum FeatureBitMask -{ - UNIFIED_PLATFORM = 0x0000000000000001 /* bit 1 : Unified platform */ - ,XARE_ENBLD = 0x0000000000000002 /* bit 2 : Aurora link enabled DSA */ - ,BOARD_MGMT_ENBLD = 0x0000000000000004 /* bit 3 : Has MB based power monitoring */ - ,MB_SCHEDULER = 0x0000000000000008 /* bit 4: Has MB based scheduler */ - ,PROM_MASK = 0x0000000000000070 /* bits 5,6 &7 : 3 bits for PROMType */ - /** ------ Bit 8 unused **/ - ,DEBUG_MASK = 0x000000000000FF00 /* bits 9 through 16 : 8 bits for DebugType */ - - //....more -}; - - - -// In the following data structures, the EntryPointString, MajorVersion, and MinorVersion -// values are all used in the Runtime to identify if the ROM is producing valid data, and -// to pick the schema to read the rest of the data; Ergo, these values shall not change. - -/* - * Struct used for > 2017.2_sdx - * This struct should be used for version (==) 10.0 (Major: 10, Minor: 0) - */ -struct FeatureRomHeader { - unsigned char EntryPointString[4]; // This is "xlnx" - uint8_t MajorVersion; // Feature ROM's major version eg 1 - uint8_t MinorVersion; // minor version eg 2. - // -- DO NOT CHANGE THE TYPES ABOVE THIS LINE -- - uint32_t VivadoBuildID; // Vivado Software Build (e.g., 1761098 ). From ./vivado --version - uint32_t IPBuildID; // IP Build (e.g., 1759159 from abve) - uint64_t TimeSinceEpoch; // linux time(NULL) call, at write_dsa_rom invocation - unsigned char FPGAPartName[64]; // The hardware FPGA part. Null termninated - unsigned char VBNVName[64]; // eg : xilinx:xil-accel-rd-ku115:4ddr-xpr:3.4: null terminated - uint8_t DDRChannelCount; // 4 for TUL - uint8_t DDRChannelSize; // 4 (in GB) - uint64_t DRBaseAddress; // The Dynamic Range's (AppPF/CL/Userspace) Base Address - uint64_t FeatureBitMap; // Feature Bit Map, specifies 64 different bool features, maps to enum FeatureBitMask -}; - - -/* - * Struct used for 2017.1_sdx - * This struct should be used for all versions below (<) 10.0 (Major: 10, Minor: 0) -struct FeatureRomHeader { - unsigned char EntryPointString[4]; // This is "xlnx" - uint8_t MajorVersion; // Feature ROM's major version eg 1 - uint8_t MinorVersion; // minor version eg 2. - // -- DO NOT CHANGE THE TYPES ABOVE THIS LINE -- - uint32_t VivadoBuildID; // Vivado Software Build (e.g., 1761098 ). From ./vivado --version - uint32_t IPBuildID; // IP Build (e.g., 1759159 from abve) - uint64_t TimeSinceEpoch; // linux time(NULL) call, at write_dsa_rom invocation - unsigned char FPGAPartName[64]; // The hardware FPGA part. Null termninated - unsigned char VBNVName[64]; // eg : xilinx:xil-accel-rd-ku115:4ddr-xpr:3.4: null terminated - uint8_t DDRChannelCount; // 4 for TUL - uint8_t DDRChannelSize; // 4 (in GB) - uint8_t OCLRegionCount; // Number of OCL regions - uint8_t FPGAType; // maps to enum FPGAGeneration - uint8_t NumFreqTableRows; // Number of rows in freq scaling table. - PRRegion region[1]; // The PRRegion struct, lay them out one after another totalling OCLRegionCount. - unsigned char FreqTable[1]; // NumFreqTableRows of FreqScalingTableRow struct -}; -*/ - diff --git a/sdk/linux_kernel_drivers/xocl/xocl_bo.c b/sdk/linux_kernel_drivers/xocl/xocl_bo.c deleted file mode 100644 index b8aedfc6..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_bo.c +++ /dev/null @@ -1,1041 +0,0 @@ -/* - * Copyright (C) 2016-2018 Xilinx, Inc - * - * Authors: - * Sonal Santan - * Sarabjeet Singh - * - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#include -#include -#include -#ifdef XOCL_CMA_ALLOC -#include -#endif -#if LINUX_VERSION_CODE <= KERNEL_VERSION(3,0,0) -#include -#endif -#include -#include "xocl_drv.h" -#include "xocl_ioctl.h" -#include "xocl_xdma.h" - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -#define XOCL_DRM_FREE_MALLOC -#elif defined(RHEL_RELEASE_CODE) -#if RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(7,4) -#define XOCL_DRM_FREE_MALLOC -#endif -#endif - -#if defined(XOCL_DRM_FREE_MALLOC) -static inline void drm_free_large(void *ptr) -{ - kvfree(ptr); -} - -static inline void *drm_malloc_ab(size_t nmemb, size_t size) -{ - return kvmalloc_array(nmemb, sizeof(struct page *), GFP_KERNEL); -} -#endif - -static inline int xocl_drm_mm_insert_node(struct drm_mm *mm, - struct drm_mm_node *node, - u64 size) -{ -#if defined(XOCL_DRM_FREE_MALLOC) - return drm_mm_insert_node_generic(mm, node, size, PAGE_SIZE, 0, 0); -#else - return drm_mm_insert_node_generic(mm, node, size, PAGE_SIZE, 0, 0, 0); -#endif -} - - -static inline void __user *to_user_ptr(u64 address) -{ - return (void __user *)(uintptr_t)address; -} - -static size_t xocl_bo_physical_addr(const struct drm_xocl_bo *xobj) -{ - uint64_t paddr = xobj->mm_node ? xobj->mm_node->start : 0xffffffffffffffffull; - - //Sarab: Need to check for number of hops & size of DDRs - if (xobj->flags & XOCL_BO_ARE) - paddr |= XOCL_ARE_HOP; - return paddr; -} - -void xocl_describe(const struct drm_xocl_bo *xobj) -{ - size_t size_in_kb = xobj->base.size / 1024; - size_t physical_addr = xocl_bo_physical_addr(xobj); - unsigned ddr = xocl_bo_ddr_idx(xobj->flags); - unsigned userptr = xocl_bo_userptr(xobj) ? 1 : 0; - - DRM_DEBUG("%p: H[%p] SIZE[0x%zxKB] D[0x%zx] DDR[%u] UPTR[%u] SGLCOUNT[%u]\n", - xobj, xobj->vmapping, size_in_kb, physical_addr, ddr, userptr, xobj->sgt->orig_nents); -} - -static void xocl_free_mm_node(struct drm_xocl_bo *xobj) -{ - struct drm_xocl_dev *xdev = xobj->base.dev->dev_private; - unsigned ddr = xocl_bo_ddr_idx(xobj->flags); - if (!xobj->mm_node) - return; - - mutex_lock(&xdev->mm_lock); - xdev->mm_usage_stat[ddr].memory_usage -= xobj->base.size; - xdev->mm_usage_stat[ddr].bo_count--; - drm_mm_remove_node(xobj->mm_node); - mutex_unlock(&xdev->mm_lock); - kfree(xobj->mm_node); - xobj->mm_node = NULL; -} - -void xocl_free_bo(struct drm_gem_object *obj) -{ - struct drm_xocl_bo *xobj = to_xocl_bo(obj); - struct drm_xocl_dev *xdev = xobj->base.dev->dev_private; - int npages = obj->size >> PAGE_SHIFT; - DRM_DEBUG("Freeing BO %p\n", xobj); - - if (xobj->vmapping) - vunmap(xobj->vmapping); - xobj->vmapping = NULL; - - if (xobj->pages) { - if (xocl_bo_userptr(xobj)) { - release_pages(xobj->pages, npages, 0); - drm_free_large(xobj->pages); - } -#ifdef XOCL_CMA_ALLOC - else if (xocl_bo_cma(xobj)) { - if (xobj->pages[0]) - cma_release(xdev->cma_blk, xobj->pages[0], npages); - drm_free_large(xobj->pages); - } -#endif - else if (!xocl_bo_import(xobj)) { - drm_gem_put_pages(obj, xobj->pages, false, false); - } - } - xobj->pages = NULL; - - if (!xocl_bo_import(xobj)) { - DRM_DEBUG("Freeing regular buffer\n"); - if (xobj->sgt) { - sg_free_table(xobj->sgt); - kfree(xobj->sgt); - xobj->sgt = NULL; - } - xocl_free_mm_node(xobj); - } - else { - DRM_DEBUG("Freeing imported buffer\n"); - if (!(xobj->flags & XOCL_BO_ARE)) - xocl_free_mm_node(xobj); - - if (obj->import_attach) { - DRM_DEBUG("Unnmapping attached dma buf\n"); - dma_buf_unmap_attachment(obj->import_attach, xobj->sgt, DMA_TO_DEVICE); - drm_prime_gem_destroy(obj, NULL); - } - } - - //If it is imported BO then we do not delete SG Table - //And if is imported from ARE device then we do not free the mm_node as well - - //Sarab: Call detach here........ - //to let the exporting device know that importing device do not need it anymore.. - //else free_bo i.e this function is not called for exporting device - //as it assumes that the exported buffer is still being used - //dmabuf->ops->release(dmabuf); - //The drm_driver.gem_free_object callback is responsible for cleaning up the dma_buf attachment and references acquired at import time. - - /* This crashes machine.. Using above code instead - * drm_prime_gem_destroy calls detach function.. - struct dma_buf *imported_dma_buf = obj->dma_buf; - if (imported_dma_buf->ops->detach) - imported_dma_buf->ops->detach(imported_dma_buf, obj->import_attach); - */ - - drm_gem_object_release(obj); - kfree(xobj); -} - - -static inline int check_bo_user_flags(const struct drm_device *dev, unsigned flags) -{ - const unsigned ddr_count = xocl_ddr_channel_count(dev); - struct drm_xocl_dev *xdev = dev->dev_private; - unsigned ddr; - - if(ddr_count == 0) - return -EINVAL; - if (flags == 0xffffffff) - return 0; - if (flags == DRM_XOCL_BO_EXECBUF) - return 0; -#ifdef XOCL_CMA_ALLOC - if (flags == DRM_XOCL_BO_CMA) - return 0; -#else - if (flags == DRM_XOCL_BO_CMA) - return -EINVAL; -#endif - ddr = xocl_bo_ddr_idx(flags); - if (ddr == 0xffffffff) - return 0; - if (ddr >= ddr_count) - return -EINVAL; - if (xdev->unified) { - if (xdev->topology.m_data[ddr].m_used != 1) { - printk(KERN_INFO "Bank %d is marked as unused in axlf\n", ddr); - return -EINVAL; - } - } - return 0; -} - - -static struct drm_xocl_bo *xocl_create_bo(struct drm_device *dev, - uint64_t unaligned_size, - unsigned user_flags) -{ - size_t size = PAGE_ALIGN(unaligned_size); - struct drm_xocl_bo *xobj; - struct drm_xocl_dev *xdev = dev->dev_private; - unsigned ddr = xocl_bo_ddr_idx(user_flags); - const unsigned ddr_count = xocl_ddr_channel_count(dev); - int err = 0; - - if (!size) - return ERR_PTR(-EINVAL); - - /* Either none or only one DDR should be specified */ - if (check_bo_user_flags(dev, user_flags)) - return ERR_PTR(-EINVAL); - - xobj = kzalloc(sizeof(*xobj), GFP_KERNEL); - if (!xobj) - return ERR_PTR(-ENOMEM); - - err = drm_gem_object_init(dev, &xobj->base, size); - if (err) - goto out3; - - if (user_flags == DRM_XOCL_BO_EXECBUF) { - xobj->flags = XOCL_BO_EXECBUF; - xobj->mm_node = NULL; - xobj->metadata.state = DRM_XOCL_EXECBUF_STATE_ABORT; - return xobj; - } - -#ifdef XOCL_CMA_ALLOC - if (user_flags == DRM_XOCL_BO_CMA) { - xobj->flags = XOCL_BO_CMA; - xobj->mm_node = NULL; - return xobj; - } -#endif - - xobj->mm_node = kzalloc(sizeof(*xobj->mm_node), GFP_KERNEL); - if (!xobj->mm_node) { - err = -ENOMEM; - goto out3; - } - - mutex_lock(&xdev->mm_lock); - if (ddr != 0xffffffff) { - /* Attempt to allocate buffer on the requested DDR */ - DRM_DEBUG("%s:%s:%d: %u\n", __FILE__, __func__, __LINE__, ddr); - err = xocl_drm_mm_insert_node(&xdev->mm[ddr], xobj->mm_node, xobj->base.size); - if (err) - goto out2; - } - else { - /* Attempt to allocate buffer on any DDR */ - for (ddr = 0; ddr < ddr_count; ddr++) { - DRM_DEBUG("%s:%s:%d: %u\n", __FILE__, __func__, __LINE__, ddr); - if(xdev->unified && !xdev->topology.m_data[ddr].m_used) - continue; - err = xocl_drm_mm_insert_node(&xdev->mm[ddr], xobj->mm_node, xobj->base.size); - if (err == 0) - break; - } - if (err) - goto out2; - } - xdev->mm_usage_stat[ddr].memory_usage += xobj->base.size; - xdev->mm_usage_stat[ddr].bo_count++; - mutex_unlock(&xdev->mm_lock); - /* Record the DDR we allocated the buffer on */ - xobj->flags |= (1 << ddr); - - return xobj; -out2: - mutex_unlock(&xdev->mm_lock); - kfree(xobj->mm_node); - drm_gem_object_release(&xobj->base); -out3: - kfree(xobj); - return ERR_PTR(err); -} - -/* - * For ARE device do not reserve DDR space - * In below import it will reuse the mm_node which is already created by other application - */ - -static struct drm_xocl_bo *xocl_create_bo_forARE(struct drm_device *dev, - uint64_t unaligned_size, - struct drm_mm_node *exporting_mm_node) -{ - struct drm_xocl_bo *xobj; - size_t size = PAGE_ALIGN(unaligned_size); - int err = 0; - - if (!size) - return ERR_PTR(-EINVAL); - - xobj = kzalloc(sizeof(*xobj), GFP_KERNEL); - if (!xobj) - return ERR_PTR(-ENOMEM); - - err = drm_gem_object_init(dev, &xobj->base, size); - if (err) - goto out3; - - xobj->mm_node = exporting_mm_node; - if (!xobj->mm_node) { - err = -ENOMEM; - goto out3; - } - - /* Record that this buffer is on remote device to be access over ARE*/ - xobj->flags = XOCL_BO_ARE; - return xobj; -out3: - kfree(xobj); - return ERR_PTR(err); -} - - -int xocl_create_bo_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - int ret; - int j; - struct drm_xocl_bo *xobj; - struct page *cpages; - unsigned int page_count; - struct drm_xocl_create_bo *args = data; - unsigned ddr = args->flags & 0xf; - struct drm_xocl_dev *xdev = dev->dev_private; - - if (args->flags && (args->flags != DRM_XOCL_BO_EXECBUF)) { - if (hweight_long(ddr) > 1) - return -EINVAL; - } - - xobj = xocl_create_bo(dev, args->size, args->flags); - - if (IS_ERR(xobj)) { - DRM_DEBUG("object creation failed\n"); - return PTR_ERR(xobj); - } - -#ifdef XOCL_CMA_ALLOC - if (args->flags == DRM_XOCL_BO_CMA) { - page_count = xobj->base.size >> PAGE_SHIFT; - xobj->pages = drm_malloc_ab(page_count, sizeof(*xobj->pages)); - if (!xobj->pages) { - ret = -ENOMEM; - goto out_free; - } - cpages = cma_alloc(xdev->cma_blk, page_count, 0, GFP_KERNEL); - if (!cpages) { - ret = -ENOMEM; - goto out_free; - } - for (j = 0; j < page_count; j++) - xobj->pages[j] = cpages++; - } - else { - xobj->pages = drm_gem_get_pages(&xobj->base); - } -#else - xobj->pages = drm_gem_get_pages(&xobj->base); -#endif - if (IS_ERR(xobj->pages)) { - ret = PTR_ERR(xobj->pages); - goto out_free; - } - - xobj->sgt = drm_prime_pages_to_sg(xobj->pages, xobj->base.size >> PAGE_SHIFT); - if (IS_ERR(xobj->sgt)) { - ret = PTR_ERR(xobj->sgt); - goto out_free; - } - - xobj->vmapping = vmap(xobj->pages, xobj->base.size >> PAGE_SHIFT, VM_MAP, PAGE_KERNEL); - - if (!xobj->vmapping) { - ret = -ENOMEM; - goto out_free; - } - - ret = drm_gem_create_mmap_offset(&xobj->base); - if (ret < 0) - goto out_free; - - ret = drm_gem_handle_create(filp, &xobj->base, &args->handle); - if (ret < 0) - goto out_free; - - xocl_describe(xobj); - drm_gem_object_unreference_unlocked(&xobj->base); - return ret; - -out_free: - xocl_free_bo(&xobj->base); - return ret; -} - -int xocl_userptr_bo_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - int ret; - struct drm_xocl_bo *xobj; - unsigned int page_count; - struct drm_xocl_userptr_bo *args = data; - unsigned ddr = args->flags & 0xf; - - if (offset_in_page(args->addr)) - return -EINVAL; - - if (args->flags & DRM_XOCL_BO_EXECBUF) - return -EINVAL; - - if (args->flags & DRM_XOCL_BO_CMA) - return -EINVAL; - - if (args->flags && (hweight_long(ddr) > 1)) - return -EINVAL; - - xobj = xocl_create_bo(dev, args->size, args->flags); - - if (IS_ERR(xobj)) { - DRM_DEBUG("object creation failed\n"); - return PTR_ERR(xobj); - } - - /* Use the page rounded size so we can accurately account for number of pages */ - page_count = xobj->base.size >> PAGE_SHIFT; - - xobj->pages = drm_malloc_ab(page_count, sizeof(*xobj->pages)); - if (!xobj->pages) { - ret = -ENOMEM; - goto out1; - } - ret = get_user_pages_fast(args->addr, page_count, 1, xobj->pages); - - if (ret != page_count) - goto out0; - - xobj->sgt = drm_prime_pages_to_sg(xobj->pages, page_count); - if (IS_ERR(xobj->sgt)) { - ret = PTR_ERR(xobj->sgt); - goto out0; - } - - /* TODO: resolve the cache issue */ - xobj->vmapping = vmap(xobj->pages, page_count, VM_MAP, PAGE_KERNEL); - - if (!xobj->vmapping) { - ret = -ENOMEM; - goto out1; - } - - ret = drm_gem_handle_create(filp, &xobj->base, &args->handle); - if (ret) - goto out1; - - xobj->flags |= XOCL_BO_USERPTR; - xocl_describe(xobj); - drm_gem_object_unreference_unlocked(&xobj->base); - return ret; - -out0: - drm_free_large(xobj->pages); - xobj->pages = NULL; -out1: - xocl_free_bo(&xobj->base); - DRM_DEBUG("handle creation failed\n"); - return ret; -} - - -int xocl_map_bo_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - int ret = 0; - struct drm_xocl_map_bo *args = data; - struct drm_gem_object *obj; - - obj = xocl_gem_object_lookup(dev, filp, args->handle); - if (!obj) { - DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); - return -ENOENT; - } - - if (xocl_bo_userptr(to_xocl_bo(obj))) { - ret = -EPERM; - goto out; - } - /* The mmap offset was set up at BO allocation time. */ - args->offset = drm_vma_node_offset_addr(&obj->vma_node); - xocl_describe(to_xocl_bo(obj)); -out: - drm_gem_object_unreference_unlocked(obj); - return ret; -} - -static struct sg_table *alloc_onetime_sg_table(struct page **pages, uint64_t offset, uint64_t size) -{ - int ret; - unsigned int nr_pages; - struct sg_table *sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL); - if (!sgt) - return ERR_PTR(-ENOMEM); - - pages += (offset >> PAGE_SHIFT); - offset &= (~PAGE_MASK); - nr_pages = PAGE_ALIGN(size + offset) >> PAGE_SHIFT; - - ret = sg_alloc_table_from_pages(sgt, pages, nr_pages, offset, size, GFP_KERNEL); - if (ret) - goto cleanup; - return sgt; - -cleanup: - kfree(sgt); - return ERR_PTR(-ENOMEM); -} - -static int acquire_channel(struct drm_xocl_dev *xdev, enum drm_xocl_sync_bo_dir dir) -{ - int channel = 0; - int result = 0; - - if (down_interruptible(&xdev->channel_sem[dir])) { - channel = -ERESTARTSYS; - goto out; - } - - for (channel = 0; channel < xdev->channel; channel++) { - result = test_and_clear_bit(channel, &xdev->channel_bitmap[dir]); - if (result) - break; - } - if (!result) { - // How is this possible? - DRM_ERROR("Failed to acquire a valid channel\n"); - up(&xdev->channel_sem[dir]); - channel = -EIO; - } -out: - return channel; -} - -static void release_channel(struct drm_xocl_dev *xdev, enum drm_xocl_sync_bo_dir dir, int channel) -{ - set_bit(channel, &xdev->channel_bitmap[dir]); - up(&xdev->channel_sem[dir]); -} - - -int xocl_sync_bo_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - const struct drm_xocl_bo *xobj; - struct sg_table *sgt; - u64 paddr = 0; - int channel = 0; - ssize_t ret = 0; - const struct drm_xocl_sync_bo *args = data; - struct drm_xocl_dev *xdev = dev->dev_private; - const bool dir = (args->dir == DRM_XOCL_SYNC_BO_TO_DEVICE) ? true : false; - struct drm_gem_object *gem_obj = xocl_gem_object_lookup(dev, filp, - args->handle); - if (!gem_obj) { - DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); - return -ENOENT; - } - - xobj = to_xocl_bo(gem_obj); - sgt = xobj->sgt; - - //Sarab: If it is a remote BO then why do sync over ARE. - //We should do sync directly using the other device which this bo locally. - //So that txfer is: HOST->PCIE->DDR; Else it will be HOST->PCIE->ARE->DDR - paddr = xocl_bo_physical_addr(xobj); - - if (paddr == 0xffffffffffffffffull) - return -EINVAL; - - /* If device is offline (due to error), reject all DMA requests */ - if (xdev->offline) - return -ENODEV; - - - if ((args->offset >= gem_obj->size) || (args->size > gem_obj->size) || - ((args->offset + args->size) > gem_obj->size)) { - ret = -EINVAL; - goto out; - } - - /* only invalidate the range of addresses requested by the user */ - /* - if (args->dir == DRM_XOCL_SYNC_BO_TO_DEVICE) - flush_kernel_vmap_range(kaddr, args->size); - else if (args->dir == DRM_XOCL_SYNC_BO_FROM_DEVICE) - invalidate_kernel_vmap_range(kaddr, args->size); - else { - ret = -EINVAL; - goto out; - } - */ - paddr += args->offset; - - if (args->offset || (args->size != xobj->base.size)) { - sgt = alloc_onetime_sg_table(xobj->pages, args->offset, args->size); - if (IS_ERR(sgt)) { - ret = PTR_ERR(sgt); - goto out; - } - } - - //drm_clflush_sg(sgt); - channel = acquire_channel(xdev, args->dir); - - if (channel < 0) { - ret = -EINVAL; - goto clear; - } - /* Now perform DMA */ - ret = xdma_migrate_bo(xdev, sgt, dir, paddr, channel); - if (ret >= 0) { - xdev->channel_usage[args->dir][channel] += ret; - ret = (ret == args->size) ? 0 : -EIO; - } - release_channel(xdev, args->dir, channel); -clear: - if (args->offset || (args->size != xobj->base.size)) { - sg_free_table(sgt); - kfree(sgt); - } -out: - drm_gem_object_unreference_unlocked(gem_obj); - return ret; -} - -int xocl_info_bo_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - const struct drm_xocl_bo *xobj; - struct drm_xocl_info_bo *args = data; - struct drm_gem_object *gem_obj = xocl_gem_object_lookup(dev, filp, - args->handle); - - if (!gem_obj) { - DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); - return -ENOENT; - } - - xobj = to_xocl_bo(gem_obj); - - args->size = xobj->base.size; - - args->paddr = xocl_bo_physical_addr(xobj); - xocl_describe(xobj); - drm_gem_object_unreference_unlocked(gem_obj); - - return 0; -} - -int xocl_pwrite_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - struct drm_xocl_bo *xobj; - const struct drm_xocl_pwrite_bo *args = data; - struct drm_gem_object *gem_obj = xocl_gem_object_lookup(dev, filp, - args->handle); - char __user *user_data = to_user_ptr(args->data_ptr); - int ret = 0; - void *kaddr; - - if (!gem_obj) { - DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); - return -ENOENT; - } - - if ((args->offset > gem_obj->size) || (args->size > gem_obj->size) - || ((args->offset + args->size) > gem_obj->size)) { - ret = -EINVAL; - goto out; - } - - if (args->size == 0) { - ret = 0; - goto out; - } - - if (!access_ok(VERIFY_READ, user_data, args->size)) { - ret = -EFAULT; - goto out; - } - - xobj = to_xocl_bo(gem_obj); - - if (xocl_bo_userptr(xobj)) { - ret = -EPERM; - goto out; - } - - kaddr = xobj->vmapping; - kaddr += args->offset; - - ret = copy_from_user(kaddr, user_data, args->size); -out: - drm_gem_object_unreference_unlocked(gem_obj); - - return ret; -} - -int xocl_pread_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - struct drm_xocl_bo *xobj; - const struct drm_xocl_pread_bo *args = data; - struct drm_gem_object *gem_obj = xocl_gem_object_lookup(dev, filp, - args->handle); - char __user *user_data = to_user_ptr(args->data_ptr); - int ret = 0; - void *kaddr; - - if (!gem_obj) { - DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); - return -ENOENT; - } - - if (xocl_bo_userptr(to_xocl_bo(gem_obj))) { - ret = -EPERM; - goto out; - } - - if ((args->offset > gem_obj->size) || (args->size > gem_obj->size) - || ((args->offset + args->size) > gem_obj->size)) { - ret = -EINVAL; - goto out; - } - - if (args->size == 0) { - ret = 0; - goto out; - } - - if (!access_ok(VERIFY_WRITE, user_data, args->size)) { - ret = EFAULT; - goto out; - } - - xobj = to_xocl_bo(gem_obj); - kaddr = xobj->vmapping;; - kaddr += args->offset; - - ret = copy_to_user(user_data, kaddr, args->size); - -out: - drm_gem_object_unreference_unlocked(gem_obj); - - return ret; -} - -struct sg_table *xocl_gem_prime_get_sg_table(struct drm_gem_object *obj) -{ - struct drm_xocl_bo *xobj = to_xocl_bo(obj); - return drm_prime_pages_to_sg(xobj->pages, xobj->base.size >> PAGE_SHIFT); -} - - -static struct drm_xocl_bo *xocl_is_exporting_xare(struct drm_device *dev, struct dma_buf_attachment *attach) -{ - struct drm_gem_object *exporting_gem_obj; - struct drm_device *exporting_drm_dev; - struct drm_xocl_dev *exporting_xdev; - - struct device_driver *importing_dma_driver = dev->dev->driver; - struct dma_buf *exporting_dma_buf = attach->dmabuf; - struct device_driver *exporting_dma_driver = attach->dev->driver; - struct drm_xocl_dev *xdev = dev->dev_private; - - if (!strstr(xdev->header.VBNVName, "-xare")) - return NULL; - - //We don't know yet if the exporting device is Xilinx/XOCL or third party or USB device - //So checking it in below code - if (importing_dma_driver != exporting_dma_driver) - return NULL; - - //Exporting devices have same driver as us. So this is Xilinx device - //So now we can get gem_object, drm_device & xocl_dev - exporting_gem_obj = exporting_dma_buf->priv; - exporting_drm_dev = exporting_gem_obj->dev; - exporting_xdev = exporting_drm_dev->dev_private; - //exporting_xdev->header;//This has FeatureROM header - if (strstr(exporting_xdev->header.VBNVName, "-xare")) - return to_xocl_bo(exporting_gem_obj); - - return NULL; -} - -struct drm_gem_object *xocl_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, struct sg_table *sgt) -{ - int ret = 0; - // This is exporting device - struct drm_xocl_bo *exporting_xobj = xocl_is_exporting_xare(dev, attach); - - // For ARE device resue the mm node from exporting xobj - - // For non ARE devices we need to create a full BO but share the SG table - // ???? add flags to create_bo.. for DDR bank?? - - struct drm_xocl_bo *importing_xobj = exporting_xobj ? xocl_create_bo_forARE(dev, attach->dmabuf->size, exporting_xobj->mm_node) : - xocl_create_bo(dev, attach->dmabuf->size, 0); - - if (IS_ERR(importing_xobj)) { - DRM_DEBUG("object creation failed\n"); - return (struct drm_gem_object *)importing_xobj; - } - - importing_xobj->flags |= XOCL_BO_IMPORT; - importing_xobj->sgt = sgt; - importing_xobj->pages = drm_malloc_ab(attach->dmabuf->size >> PAGE_SHIFT, sizeof(*importing_xobj->pages)); - if (!importing_xobj->pages) { - ret = -ENOMEM; - goto out_free; - } - - ret = drm_prime_sg_to_page_addr_arrays(sgt, importing_xobj->pages, - NULL, attach->dmabuf->size >> PAGE_SHIFT); - if (ret) - goto out_free; - - importing_xobj->vmapping = vmap(importing_xobj->pages, importing_xobj->base.size >> PAGE_SHIFT, VM_MAP, - PAGE_KERNEL); - - if (!importing_xobj->vmapping) { - ret = -ENOMEM; - goto out_free; - } - - ret = drm_gem_create_mmap_offset(&importing_xobj->base); - if (ret < 0) - goto out_free; - - xocl_describe(importing_xobj); - return &importing_xobj->base; - -out_free: - xocl_free_bo(&importing_xobj->base); - DRM_ERROR("Buffer import failed\n"); - return ERR_PTR(ret); -} - -void *xocl_gem_prime_vmap(struct drm_gem_object *obj) -{ - struct drm_xocl_bo *xobj = to_xocl_bo(obj); - return xobj->vmapping; -} - -void xocl_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) -{ - -} - -static int xocl_init_unmgd(struct drm_xocl_unmgd *unmgd, uint64_t data_ptr, uint64_t size, - enum drm_xocl_sync_bo_dir dir) -{ - int ret; - char __user *user_data = to_user_ptr(data_ptr); - - if (!access_ok((dir == DRM_XOCL_SYNC_BO_TO_DEVICE) ? VERIFY_READ : VERIFY_WRITE, user_data, size)) - return -EFAULT; - - memset(unmgd, 0, sizeof(struct drm_xocl_unmgd)); - - unmgd->npages = (((unsigned long)user_data + size + PAGE_SIZE - 1) - - ((unsigned long)user_data & PAGE_MASK)) >> PAGE_SHIFT; - - unmgd->pages = drm_malloc_ab(unmgd->npages, sizeof(*unmgd->pages)); - if (!unmgd->pages) - return -ENOMEM; - - ret = get_user_pages_fast(data_ptr, unmgd->npages, (dir == DRM_XOCL_SYNC_BO_FROM_DEVICE) ? 1 : 0, unmgd->pages); - - if (ret != unmgd->npages) - goto clear_pages; - - unmgd->sgt = alloc_onetime_sg_table(unmgd->pages, data_ptr & ~PAGE_MASK, size); - if (IS_ERR(unmgd->sgt)) { - ret = PTR_ERR(unmgd->sgt); - goto clear_release; - } - - return 0; - -clear_release: - release_pages(unmgd->pages, unmgd->npages, 0); -clear_pages: - drm_free_large(unmgd->pages); - unmgd->pages = NULL; - return ret; -} - -static void xocl_finish_unmgd(struct drm_xocl_unmgd *unmgd) -{ - if (!unmgd->pages) - return; - sg_free_table(unmgd->sgt); - kfree(unmgd->sgt); - release_pages(unmgd->pages, unmgd->npages, 0); - drm_free_large(unmgd->pages); - unmgd->pages = NULL; -} - - -int xocl_pwrite_unmgd_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - int channel; - struct drm_xocl_unmgd unmgd; - const struct drm_xocl_pwrite_unmgd *args = data; - struct drm_xocl_dev *xdev = dev->dev_private; - const enum drm_xocl_sync_bo_dir dir = DRM_XOCL_SYNC_BO_TO_DEVICE; - ssize_t ret = 0; - - if (args->address_space != 0) - return -EFAULT; - - if (args->size == 0) - return 0; - - DRM_DEBUG("%s:%d\n", __func__, __LINE__); - ret = xocl_init_unmgd(&unmgd, args->data_ptr, args->size, dir); - if (ret) - return ret; - - channel = acquire_channel(xdev, dir); - DRM_DEBUG("%s:%d\n", __func__, __LINE__); - if (channel < 0) { - ret = -EINVAL; - goto clear; - } - /* Now perform DMA */ - ret = xdma_migrate_bo(xdev, unmgd.sgt, (dir == DRM_XOCL_SYNC_BO_TO_DEVICE), args->paddr, channel); - if (ret >= 0) { - xdev->channel_usage[dir][channel] += ret; - ret = (ret == args->size) ? 0 : -EIO; - } - release_channel(xdev, dir, channel); - DRM_DEBUG("%s:%llx\n", __func__, xdev->channel_usage[dir][channel]); -clear: - xocl_finish_unmgd(&unmgd); - return ret; -} - -int xocl_pread_unmgd_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - int channel; - struct drm_xocl_unmgd unmgd; - const struct drm_xocl_pwrite_unmgd *args = data; - struct drm_xocl_dev *xdev = dev->dev_private; - const enum drm_xocl_sync_bo_dir dir = DRM_XOCL_SYNC_BO_FROM_DEVICE; - ssize_t ret = 0; - - DRM_DEBUG("%s:%d\n", __func__, __LINE__); - if (args->address_space != 0) - return -EFAULT; - - if (args->size == 0) - return 0; - - ret = xocl_init_unmgd(&unmgd, args->data_ptr, args->size, dir); - if (ret) - return ret; - - DRM_DEBUG("%s:%d\n", __func__, __LINE__); - channel = acquire_channel(xdev, dir); - - if (channel < 0) { - ret = -EINVAL; - goto clear; - } - /* Now perform DMA */ - ret = xdma_migrate_bo(xdev, unmgd.sgt, (dir == DRM_XOCL_SYNC_BO_TO_DEVICE), args->paddr, channel); - if (ret >= 0) { - xdev->channel_usage[dir][channel] += ret; - ret = (ret == args->size) ? 0 : -EIO; - } - release_channel(xdev, dir, channel); - DRM_DEBUG("%s:%llx\n", __func__, xdev->channel_usage[dir][channel]); -clear: - xocl_finish_unmgd(&unmgd); - return ret; -} - -int xocl_usage_stat_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_usage_stat *args = data; - args->mm_channel_count = xocl_ddr_channel_count(dev); - if (args->mm_channel_count > 8) - args->mm_channel_count = 8; - memcpy(args->mm, xdev->mm_usage_stat, sizeof(struct drm_xocl_mm_stat) * args->mm_channel_count); - args->dma_channel_count = xdev->channel; - if (args->dma_channel_count > 8) - args->dma_channel_count = 8; - memcpy(args->h2c, xdev->channel_usage[DRM_XOCL_SYNC_BO_TO_DEVICE], sizeof(unsigned long long) * args->dma_channel_count); - memcpy(args->c2h, xdev->channel_usage[DRM_XOCL_SYNC_BO_FROM_DEVICE], sizeof(unsigned long long) * args->dma_channel_count); - DRM_INFO("%s h2c[0] 0%llx\n", __func__, args->h2c[0]); - DRM_INFO("%s c2h[0] 0%llx\n", __func__, args->c2h[0]); - DRM_INFO("%s\n", __func__); - return 0; -} - - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/xocl_ctx.c b/sdk/linux_kernel_drivers/xocl/xocl_ctx.c deleted file mode 100644 index 24af6f2f..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_ctx.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2017-2018 Xilinx, Inc - * - * Authors: - * Sonal Santan - * - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include "xocl_drv.h" -#include "xocl_ioctl.h" -#include "xocl_xdma.h" - - -void xocl_track_ctx(struct drm_xocl_dev *xdev, struct drm_xocl_client_ctx *fpriv) -{ - unsigned long flags; - - spin_lock_irqsave(&xdev->exec.ctx_list_lock, flags); - list_add_tail(&fpriv->link, &xdev->exec.ctx_list); - spin_unlock_irqrestore(&xdev->exec.ctx_list_lock, flags); -} - -void xocl_untrack_ctx(struct drm_xocl_dev *xdev, struct drm_xocl_client_ctx *fpriv) -{ - unsigned long flags; - - spin_lock_irqsave(&xdev->exec.ctx_list_lock, flags); - list_del(&fpriv->link); - spin_unlock_irqrestore(&xdev->exec.ctx_list_lock, flags); -} - diff --git a/sdk/linux_kernel_drivers/xocl/xocl_drv.c b/sdk/linux_kernel_drivers/xocl/xocl_drv.c deleted file mode 100644 index c97835b0..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_drv.c +++ /dev/null @@ -1,832 +0,0 @@ -/* - * Copyright (C) 2016-2018 Xilinx, Inc - * - * Authors: - * Sonal Santan - * Hem Neema - * - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#if LINUX_VERSION_CODE <= KERNEL_VERSION(3,0,0) -#include -#endif -#include -#include -#include -#include -#ifdef XOCL_CMA_ALLOC -#include -#endif -#include "xocl_drv.h" -#include "xocl_ioctl.h" -#include "xocl_xdma.h" -#include "xclbin.h" - -#define XOCL_DRIVER_NAME "xocl" -#define XOCL_DRIVER_DESC "Xilinx PCIe Accelerator Device Manager" -#define XOCL_DRIVER_DATE "20171111" -#define XOCL_DRIVER_MAJOR 2017 -#define XOCL_DRIVER_MINOR 4 -#define XOCL_DRIVER_PATCHLEVEL 5 - - -#define XOCL_DRIVER_VERSION \ - __stringify(XOCL_DRIVER_MAJOR) "." \ - __stringify(XOCL_DRIVER_MINOR) "." \ - __stringify(XOCL_DRIVER_PATCHLEVEL) - -#define XOCL_DRIVER_VERSION_NUMBER \ - ((XOCL_DRIVER_MAJOR)*1000 + (XOCL_DRIVER_MINOR)*100 + XOCL_DRIVER_PATCHLEVEL) - - -#define XOCL_FILE_PAGE_OFFSET 0x100000 - -#ifndef VM_RESERVED -#define VM_RESERVED (VM_DONTEXPAND | VM_DONTDUMP) -#endif - -static const struct pci_device_id pciidlist[] = { - { PCI_DEVICE(0x10ee, 0x4A48), }, - { PCI_DEVICE(0x10ee, 0x4A88), }, - { PCI_DEVICE(0x10ee, 0x4B48), }, - { PCI_DEVICE(0x10ee, 0x4B88), }, - { PCI_DEVICE(0x10ee, 0x6850), }, - { PCI_DEVICE(0x10ee, 0x6890), }, - { PCI_DEVICE(0x10ee, 0x6950), }, - { PCI_DEVICE(0x10ee, 0x6990), }, - { PCI_DEVICE(0x10ee, 0x6A50), }, - { PCI_DEVICE(0x10ee, 0x6A90), }, - { PCI_DEVICE(0x10ee, 0x6E50), }, - { PCI_DEVICE(0x10ee, 0x6B10), }, - { PCI_DEVICE(0x1d0f, 0xf010), }, // shell 1.4 - { 0, } -}; - -MODULE_DEVICE_TABLE(pci, pciidlist); - -static struct cma *xocl_cma = NULL; - -static void xocl_print_dev_info(const struct drm_xocl_dev *xdev) -{ - DRM_INFO("%s [Timestamp 0x%llx]\n", xdev->header.VBNVName, xdev->header.TimeSinceEpoch); - DRM_INFO("%d bi-directional DMA channels\n", xdev->channel); - DRM_INFO("%d DDR channels, Total RAM = %dGB\n", xdev->header.DDRChannelCount, - xdev->header.DDRChannelSize * xdev->header.DDRChannelCount); - DRM_INFO("PCI Resource 0x%llx [Size 0x%llxKB]\n", xdev->res_start, xdev->res_len/1024); -} - -static int probe_feature_rom(struct drm_xocl_dev *xdev) -{ - u32 val; - unsigned short ddr = (xdev->ddev->pdev->subsystem_device >> 12) & 0x000f; - val = ioread32(xdev->user_bar + XOCL_FEATURE_ROM); - // Magic number check - if (val != 0x786e6c78) { - if (xdev->ddev->pdev->vendor == 0x1d0f && (xdev->ddev->pdev->device == 0x1042 || xdev->ddev->pdev->device == 0xf010)) { - printk(KERN_INFO "XOCL: Found AWS VU9P Device without featureROM\n"); - //This is AWS device. Fill the FeatureROM struct. Right now it doesn't have FeatureROM - memset(xdev->header.EntryPointString, 0, sizeof(xdev->header.EntryPointString)); - strncpy(xdev->header.EntryPointString, "xlnx", 4); - memset(xdev->header.FPGAPartName, 0, sizeof(xdev->header.FPGAPartName)); - strncpy(xdev->header.FPGAPartName, "AWS VU9P", 8); - memset(xdev->header.VBNVName, 0, sizeof(xdev->header.VBNVName)); - strncpy(xdev->header.VBNVName, "xilinx_aws-vu9p-f1_dynamic_5_0", 35); - xdev->header.MajorVersion = 4; - xdev->header.MinorVersion = 0; - xdev->header.VivadoBuildID = 0xabcd; - xdev->header.IPBuildID = 0xabcd; - xdev->header.TimeSinceEpoch = 0xabcd; - xdev->header.DDRChannelCount = 4; - xdev->header.DDRChannelSize = 16; - xdev->header.FeatureBitMap = 0x0; - printk(KERN_INFO "XOCL: Enabling AWS dynamic 5.0 DSA\n"); - xdev->header.FeatureBitMap = UNIFIED_PLATFORM; - xdev->unified = true; - } else { - DRM_ERROR("XOCL: Probe of Feature ROM failed\n"); - return -ENODEV; - } - } else { - printk(KERN_INFO "XOCL: Printing PCI VendorID: %llx\n", xdev->ddev->pdev->vendor); - printk(KERN_INFO "XOCL: Printing PCI DeviceID: %llx\n", xdev->ddev->pdev->device); - memcpy_fromio(&xdev->header, xdev->user_bar + XOCL_FEATURE_ROM, sizeof(struct FeatureRomHeader)); - // Sanity check - if (strstr(xdev->header.VBNVName, "-xare")) {//This is ARE device - xdev->header.DDRChannelCount = xdev->header.DDRChannelCount - 1; //ARE is mapped like another DDR inside FPGA; map_connects as M04_AXI - } - if (ddr != xdev->header.DDRChannelCount) { - DRM_ERROR("XOCL: Feature ROM DDR channel count not consistent\n"); - return -ENODEV; - } - - if(xdev->header.FeatureBitMap & UNIFIED_PLATFORM) { - xdev->unified = true; - } - } - - printk(KERN_INFO "XOCL: ROM magic : %s\n", xdev->header.EntryPointString); - printk(KERN_INFO "XOCL: VBNV: %s", xdev->header.VBNVName); - printk(KERN_INFO "XOCL: DDR channel count : %d\n", xdev->header.DDRChannelCount); - printk(KERN_INFO "XOCL: DDR channel size: %d GB\n", xdev->header.DDRChannelSize); - printk(KERN_INFO "XOCL: Major Version: %d \n", xdev->header.MajorVersion); - printk(KERN_INFO "XOCL: Minor Version: %d \n", xdev->header.MinorVersion); - printk(KERN_INFO "XOCL: IPBuildID: %u\n", xdev->header.IPBuildID); - printk(KERN_INFO "XOCL: TimeSinceEpoch: %llx\n", xdev->header.TimeSinceEpoch); - printk(KERN_INFO "XOCL: FeatureBitMap: %llx\n", xdev->header.FeatureBitMap); - -// if(xdev->header.MajorVersion >= 10) -// printk(KERN_INFO "Printing DRBaseAddress: %llx\n", xdev->header.DRBaseAddress); - return 0; -} - -static int xocl_drm_load(struct drm_device *ddev, unsigned long flags) -{ - struct drm_xocl_dev *xdev; - unsigned i; - int result = 0; - unsigned long long segment = 0; - unsigned short ddr = 0; - unsigned long long ddr_size = 0; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(4,4,0) - drm_dev_set_unique(ddev, dev_name(ddev->dev)); -#endif - - xdev = devm_kzalloc(ddev->dev, sizeof(*xdev), GFP_KERNEL); - if (!xdev) - return -ENOMEM; - xdev->ddev = ddev; - ddev->dev_private = xdev; - - xdev->res_start = pci_resource_start(xdev->ddev->pdev, 0); - xdev->res_len = pci_resource_end(xdev->ddev->pdev, 0) - xdev->res_start + 1; - - xdev->user_bar = pci_iomap(xdev->ddev->pdev, 0, xdev->res_len); - if (!xdev->user_bar) - return -EIO; - - result = probe_feature_rom(xdev); - if (result) - goto bar_cleanup; - - - if (xdev->unified) { - memset(&xdev->topology, 0, sizeof(struct drm_xocl_mem_topology)); - memset(&xdev->connectivity, 0, sizeof(struct drm_xocl_connectivity)); - memset(&xdev->layout, 0, sizeof(struct drm_xocl_layout)); - memset(&xdev->debug_layout, 0, sizeof(struct drm_xocl_debug_layout)); - } else { - printk(KERN_INFO "XOCL : non-unified ddr initialization.\n"); - ddr = xocl_ddr_channel_count(ddev); - ddr_size = xocl_ddr_channel_size(ddev); - - xdev->mm = devm_kzalloc(ddev->dev, sizeof(struct drm_mm) * ddr, GFP_KERNEL); - xdev->mm_usage_stat = devm_kzalloc(ddev->dev, sizeof(struct drm_xocl_mm_stat) * ddr, GFP_KERNEL); - if (!xdev->mm || !xdev->mm_usage_stat) { - result = -ENOMEM; - goto bar_cleanup; - } - - for (i = 0; i < ddr; i++) { - drm_mm_init(&xdev->mm[i], segment, ddr_size); - segment += ddr_size; - } - } - - mutex_init(&xdev->mm_lock); - // Now call XDMA core init - DRM_INFO("Enable XDMA core\n"); - result = xdma_init_glue(xdev); - if (result) { - DRM_ERROR("XDMA device initialization failed with err code: %d\n", result); - goto mm_cleanup; - } - - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - - sema_init(&xdev->channel_sem[0], xdev->channel); - sema_init(&xdev->channel_sem[1], xdev->channel); - /* Initialize bit mask to represent individual channels */ - xdev->channel_bitmap[0] = BIT(xdev->channel); - xdev->channel_bitmap[0]--; - xdev->channel_bitmap[1] = xdev->channel_bitmap[0]; - - xdev->channel_usage[0] = devm_kzalloc(ddev->dev, sizeof(unsigned long long) * xdev->channel, GFP_KERNEL); - xdev->channel_usage[1] = devm_kzalloc(ddev->dev, sizeof(unsigned long long) * xdev->channel, GFP_KERNEL); - - if (!xdev->channel_usage[0] || !xdev->channel_usage[1]) { - result = -ENOMEM; - goto xdma_cleanup; - } - - xdev->cma_blk = xocl_cma; - - mutex_init(&xdev->stat_lock); - xdev->offline = false; - xocl_print_dev_info(xdev); - - //Init xocl sysfs - xocl_fini_sysfs(&xdev->ddev->pdev->dev); - result = xocl_init_sysfs(&xdev->ddev->pdev->dev); - if (result) { - DRM_ERROR("failed to create sysds file for xocl: %d\n", result); - goto all_cleanup; - } - - xocl_init_exec(xdev); - xdev->xvc.bar = xdev->user_bar; -#ifdef XOCL_BUILTIN_XVC - xocl_xvc_device_init(&xdev->xvc, &xdev->ddev->pdev->dev); -#endif - return result; - -all_cleanup: - mutex_destroy(&xdev->stat_lock); -xdma_cleanup: - xdma_fini_glue(xdev); -mm_cleanup: - if (!xdev->unified) { - for (i = 0; i < ddr; i++) { - drm_mm_takedown(&xdev->mm[i]); - } - } - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); -bar_cleanup: - pci_iounmap(xdev->ddev->pdev, xdev->user_bar); - xdev->user_bar = NULL; - return result; -} - -static int xocl_drm_unload(struct drm_device *drm) -{ - int i = 0; - struct drm_xocl_dev *xdev = drm->dev_private; - const unsigned short ddr = xocl_ddr_channel_count(drm); - - xdev->offline = true; -#ifdef XOCL_BUILTIN_XVC - xocl_xvc_device_fini(&xdev->xvc); -#endif - xocl_fini_exec(xdev); - - if(xdev->unified) { - for (i = 0; i < ddr; i++) { - if(xdev->topology.m_data[i].m_used) - drm_mm_takedown(&xdev->mm[i]); - } - vfree(xdev->topology.m_data); - vfree(xdev->topology.topology); - memset(&xdev->topology, 0, sizeof(xdev->topology)); - vfree(xdev->connectivity.connections); - memset(&xdev->connectivity, 0, sizeof(xdev->connectivity)); - vfree(xdev->layout.layout); - memset(&xdev->layout, 0, sizeof(xdev->layout)); - vfree(xdev->debug_layout.layout); - memset(&xdev->debug_layout, 0, sizeof(xdev->debug_layout)); - } else { - for (i = 0; i < ddr; i++) { - drm_mm_takedown(&xdev->mm[i]); - } - } - - mutex_destroy(&xdev->stat_lock); - mutex_destroy(&xdev->mm_lock); - - pci_iounmap(xdev->ddev->pdev, xdev->user_bar); - xdma_fini_glue(xdev); - xocl_fini_sysfs(&xdev->ddev->pdev->dev); - dev_set_drvdata(&xdev->ddev->pdev->dev, NULL); - return 0; -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -static void xocl_drm_unload2(struct drm_device *drm) -{ - xocl_drm_unload(drm); -} -#endif - -static void xocl_free_object(struct drm_gem_object *obj) -{ - xocl_free_bo(obj); -} - -static int xocl_mmap(struct file *filp, struct vm_area_struct *vma) -{ - int ret; - struct drm_file *priv = filp->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_xocl_dev *xdev = dev->dev_private; - struct mm_struct *mm = current->mm; - unsigned long vsize; - - //DRM_DEBUG("mmap operation 0x%lx 0x%lx 0x%lx\n", vma->vm_start, vma->vm_end, vma->vm_pgoff); - /* If the page offset is > than 4G, then let GEM handle that and do what - * it thinks is best,we will only handle page offsets less than 4G. - */ - if (likely(vma->vm_pgoff >= XOCL_FILE_PAGE_OFFSET)) { - ret = drm_gem_mmap(filp, vma); - if (ret) - return ret; - /* Clear VM_PFNMAP flag set by drm_gem_mmap() - * we have "struct page" for all backing pages for bo - */ - vma->vm_flags &= ~VM_PFNMAP; - /* Clear VM_IO flag set by drm_gem_mmap() - * it prevents gdb from accessing mapped buffers - */ - vma->vm_flags &= ~VM_IO; - vma->vm_flags |= VM_MIXEDMAP; - vma->vm_flags |= mm->def_flags; - vma->vm_pgoff = 0; - - /* Override pgprot_writecombine() mapping setup by drm_gem_mmap() - * which results in very poor read performance - */ - if (vma->vm_flags & (VM_READ | VM_MAYREAD)) - vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - else - vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); - return ret; - } - - if (vma->vm_pgoff != 0) - return -EINVAL; - - vsize = vma->vm_end - vma->vm_start; - if (vsize > xdev->res_len) - return -EINVAL; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_IO; - vma->vm_flags |= VM_RESERVED; - - ret = io_remap_pfn_range(vma, vma->vm_start, - xdev->res_start >> PAGE_SHIFT, - vsize, vma->vm_page_prot); - DRM_INFO("io_remap_pfn_range ret code: %d", ret); - - return ret; - -} - -int xocl_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct drm_xocl_bo *xobj = to_xocl_bo(vma->vm_private_data); - loff_t num_pages; - unsigned int page_offset; - int ret = 0; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) - page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; -#else - page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT; -#endif - - if (!xobj->pages) - return VM_FAULT_SIGBUS; - - num_pages = DIV_ROUND_UP(xobj->base.size, PAGE_SIZE); - if (page_offset > num_pages) - return VM_FAULT_SIGBUS; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) - ret = vm_insert_page(vma, vmf->address, xobj->pages[page_offset]); -#else - ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address, xobj->pages[page_offset]); -#endif - switch (ret) { - case -EAGAIN: - case 0: - case -ERESTARTSYS: - return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - default: - return VM_FAULT_SIGBUS; - } -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -int xocl_gem_fault2(struct vm_fault *vmf) -{ - return xocl_gem_fault(vmf->vma, vmf); -} -#endif - -static int xocl_info_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - struct drm_xocl_info *obj = data; - struct drm_xocl_dev *xdev = dev->dev_private; - struct pci_dev *pdev = xdev->ddev->pdev; - printk(KERN_INFO "%s %s INFO IOCTL \n", DRV_NAME, __FUNCTION__); - - obj->vendor = pdev->vendor; - obj->device = pdev->device; - obj->subsystem_vendor = pdev->subsystem_vendor; - obj->subsystem_device = pdev->subsystem_device; - obj->driver_version = XOCL_DRIVER_VERSION_NUMBER; - obj->pci_slot = PCI_SLOT(pdev->devfn); - - printk(KERN_INFO "%s %s PCI Slot: %d \n", DRV_NAME, __FUNCTION__, obj->pci_slot); - return 0; -} - -static int xocl_client_open(struct drm_device *dev, struct drm_file *filp) -{ - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_client_ctx *fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); - if (!fpriv) - return -ENOMEM; - filp->driver_priv = fpriv; - mutex_init(&fpriv->lock); - atomic_set(&fpriv->trigger, 0); - xocl_track_ctx(xdev, fpriv); - DRM_INFO("Pid %d opened device\n", pid_nr(task_tgid(current))); - return 0; -} - -static void xocl_client_release(struct drm_device *dev, struct drm_file *filp) -{ - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_client_ctx *fpriv = filp->driver_priv; - int i; - unsigned bit; - - if (!fpriv) - return; - - xocl_untrack_ctx(xdev, fpriv); - if (!fpriv->eventfd_bitmap) - goto out; - - /* Clear all the eventfd structures */ - mutex_lock(&xdev->exec.user_msix_table_lock); - for (i = XOCL_USER_INTR_START; i < XOCL_USER_INTR_END; i++) { - bit = 1 << i; - if (!(fpriv->eventfd_bitmap & bit)) - continue; - xdma_user_interrupt_config(xdev, i, false); - eventfd_ctx_put(xdev->exec.user_msix_table[i]); - xdev->exec.user_msix_table[i] = NULL; - } - fpriv->eventfd_bitmap = 0; - mutex_unlock(&xdev->exec.user_msix_table_lock); -out: - mutex_destroy(&fpriv->lock); - kfree(fpriv); - filp->driver_priv = NULL; - DRM_INFO("Pid %d closed device\n", pid_nr(task_tgid(current))); -} - -static unsigned int xocl_poll(struct file *filp, poll_table *wait) -{ - int counter; - struct drm_file *priv = filp->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_client_ctx *fpriv = priv->driver_priv; - int ret = 0; - - BUG_ON(!fpriv); - poll_wait(filp, &xdev->exec.poll_wait_queue, wait); - /* - * Mutex lock protects from two threads from the same application - * calling poll concurrently using the same file handle - */ - mutex_lock(&fpriv->lock); - counter = atomic_read(&fpriv->trigger); - if (counter > 0) { - /* - * Use atomic here since the trigger may be incremented by interrupt - * handler running concurrently - */ - atomic_dec(&fpriv->trigger); - ret = POLLIN; - } - mutex_unlock(&fpriv->lock); - return ret; -} - -static const struct drm_ioctl_desc xocl_ioctls[] = { - DRM_IOCTL_DEF_DRV(XOCL_CREATE_BO, xocl_create_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_USERPTR_BO, xocl_userptr_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_MAP_BO, xocl_map_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_SYNC_BO, xocl_sync_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_INFO_BO, xocl_info_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_PWRITE_BO, xocl_pwrite_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_PREAD_BO, xocl_pread_bo_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_CTX, xocl_ctx_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_INFO, xocl_info_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_READ_AXLF, xocl_read_axlf_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_PWRITE_UNMGD, xocl_pwrite_unmgd_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_PREAD_UNMGD, xocl_pread_unmgd_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_USAGE_STAT, xocl_usage_stat_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_USER_INTR, xocl_user_intr_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XOCL_EXECBUF, xocl_execbuf_ioctl, - DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), -}; - -static const struct file_operations xocl_driver_fops = { - .owner = THIS_MODULE, - .open = drm_open, - .mmap = xocl_mmap, - .poll = xocl_poll, - .read = drm_read, - .unlocked_ioctl = drm_ioctl, - .release = drm_release, -}; - -static const struct vm_operations_struct xocl_vm_ops = { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) - .fault = xocl_gem_fault2, -#else - .fault = xocl_gem_fault, -#endif - .open = drm_gem_vm_open, - .close = drm_gem_vm_close, -}; - -static struct drm_driver xocl_drm_driver = { - .driver_features = DRIVER_GEM | DRIVER_PRIME | - DRIVER_RENDER, - .postclose = xocl_client_release, - .open = xocl_client_open, - .load = xocl_drm_load, -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) - .unload = xocl_drm_unload2, -#else - .unload = xocl_drm_unload, -#endif - .gem_free_object = xocl_free_object, - .gem_vm_ops = &xocl_vm_ops, - .prime_handle_to_fd = drm_gem_prime_handle_to_fd, - .prime_fd_to_handle = drm_gem_prime_fd_to_handle, - .gem_prime_import = drm_gem_prime_import, - .gem_prime_export = drm_gem_prime_export, - .gem_prime_get_sg_table = xocl_gem_prime_get_sg_table, - .gem_prime_import_sg_table = xocl_gem_prime_import_sg_table, - .gem_prime_vmap = xocl_gem_prime_vmap, - .gem_prime_vunmap = xocl_gem_prime_vunmap, - .ioctls = xocl_ioctls, - .num_ioctls = ARRAY_SIZE(xocl_ioctls), - .fops = &xocl_driver_fops, -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0) - .set_busid = drm_pci_set_busid, -#endif - .name = XOCL_DRIVER_NAME, - .desc = XOCL_DRIVER_DESC, - .date = XOCL_DRIVER_DATE, - .major = XOCL_DRIVER_MAJOR, - .minor = XOCL_DRIVER_MINOR, - .patchlevel = XOCL_DRIVER_PATCHLEVEL, -}; - -// TODO: Umang remove the additional DRM_INFO's once this driver has been -// in production for some time. 07/06/2017. -static int xocl_driver_load(struct pci_dev *pdev, - const struct pci_device_id *ent) -{ - struct drm_device *dev; - int ret; - - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - - dev = drm_dev_alloc(&xocl_drm_driver, &pdev->dev); - if (IS_ERR(dev)) - return PTR_ERR(dev); - - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - ret = pci_enable_device(pdev); - if (ret) - goto err_free; - - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - dev->pdev = pdev; - pci_set_drvdata(pdev, dev); - - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - ret = drm_dev_register(dev, ent->driver_data); - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - if (ret) { - goto err_reg; - } - - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - return 0; - -err_reg: - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - pci_disable_device(pdev); -err_free: - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - drm_dev_unref(dev); - return ret; - -} - -static int xocl_pci_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) -{ - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - return xocl_driver_load(pdev, ent); -} - -static void xocl_pci_remove(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - pci_disable_device(pdev); - drm_put_dev(dev); -} - -static pci_ers_result_t xocl_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - struct xdma_pci_dev *xpdev = dev_get_drvdata(&pdev->dev); - - switch (state) { - case pci_channel_io_normal: - return PCI_ERS_RESULT_CAN_RECOVER; - case pci_channel_io_frozen: - DRM_INFO("dev 0x%p,0x%p, frozen state error, reset controller\n", - pdev, xpdev); - //xdma_dev_disable(xpdev, false); - return PCI_ERS_RESULT_NEED_RESET; - case pci_channel_io_perm_failure: - DRM_INFO("dev 0x%p,0x%p, failure state error, req. disconnect\n", - pdev, xpdev); - return PCI_ERS_RESULT_DISCONNECT; - } - return PCI_ERS_RESULT_NEED_RESET; -} - -static pci_ers_result_t xocl_slot_reset(struct pci_dev *pdev) -{ - struct drm_device *ddev = pci_get_drvdata(pdev); - - DRM_INFO("0x%p restart after slot reset\n", ddev->dev_private); - pci_restore_state(pdev); - //queue_work(xdma_workq, &dev->reset_work); - return PCI_ERS_RESULT_RECOVERED; -} - -static void xocl_error_resume(struct pci_dev *pdev) -{ - struct drm_device *ddev = pci_get_drvdata(pdev); - - DRM_INFO("dev 0x%p,0x%p.\n", pdev, ddev->dev_private); - pci_cleanup_aer_uncorrect_error_status(pdev); -} - -void xocl_reset_notify(struct pci_dev *pdev, bool prepare) -{ - struct drm_device *ddev = dev_get_drvdata(&pdev->dev); - struct drm_xocl_dev *xdev; - - if(ddev) { - xdev = ddev->dev_private; - } - else { - DRM_ERROR("%s: %s ddev is null", DRV_NAME, __FUNCTION__); - return; - } - - if(xdev) - DRM_INFO("%s: %s dev 0x%p,0x%p, prepare %d.\n", DRV_NAME, __FUNCTION__, - pdev, ddev->dev_private, prepare); - else { - DRM_ERROR("%s: %s xdev is null", DRV_NAME, __FUNCTION__); - return; - } - - if (prepare) { - xdev->offline = true; - xdma_device_offline(pdev, xdev->xdma_handle); - } - else { - xdma_device_online(pdev, xdev->xdma_handle); - xdev->offline = false; - } -} -EXPORT_SYMBOL_GPL(xocl_reset_notify); - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -static void xocl_reset_prepare(struct pci_dev *pdev) -{ - xocl_reset_notify(pdev, true); -} - -static void xocl_reset_done(struct pci_dev *pdev) -{ - xocl_reset_notify(pdev, false); -} -#endif - -static const struct pci_error_handlers xocl_err_handler = { - .error_detected = xocl_error_detected, - .slot_reset = xocl_slot_reset, - .resume = xocl_error_resume, -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) - .reset_prepare = xocl_reset_prepare, - .reset_done = xocl_reset_done, -#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) - .reset_notify = xocl_reset_notify, -#endif -}; - - -static struct pci_driver xocl_pci_driver = { - .name = XOCL_DRIVER_NAME, - .id_table = pciidlist, - .probe = xocl_pci_probe, - .remove = xocl_pci_remove, - .err_handler = &xocl_err_handler, -}; - -/* init xilinx opencl drm platform */ -static int __init xocl_init(void) -{ - int result; -#ifdef XOCL_BUILTIN_XVC - result = xocl_xvc_chardev_init(); - if (result) { - DRM_ERROR("XVC registration failed with error code: %d\n", result); - return result; - } -#endif - result = pci_register_driver(&xocl_pci_driver); - if (result) { - DRM_ERROR("PCIe registration failed with error code: %d\n", result); - goto unregister_xvc; - } - -#ifdef XOCL_CMA_ALLOC - result = cma_init_reserved_mem(XOCL_CMA_BASE, XOCL_CMA_SIZE, 0, &xocl_cma); - if (result) { - DRM_ERROR("CMA region allocation for PCI Slave failed with error code: %d\n", result); - goto unregister_pci; - } -#endif - return 0; - -unregister_pci: - pci_unregister_driver(&xocl_pci_driver); - -unregister_xvc: -#ifdef XOCL_BUILTIN_XVC - xocl_xvc_chardev_exit(); -#endif - return result; -} - -static void __exit xocl_exit(void) -{ - DRM_INFO("%s:%d:%s()", __FILE__, __LINE__, __func__); - pci_unregister_driver(&xocl_pci_driver); -#ifdef XOCL_BUILTIN_XVC - xocl_xvc_chardev_exit(); -#endif -} - -module_init(xocl_init); -module_exit(xocl_exit); - - -MODULE_VERSION(__stringify(XOCL_DRIVER_MAJOR) "." - __stringify(XOCL_DRIVER_MINOR) "." - __stringify(XOCL_DRIVER_PATCHLEVEL)); - -MODULE_DESCRIPTION(XOCL_DRIVER_DESC); -MODULE_AUTHOR("Sonal Santan "); -MODULE_LICENSE("GPL"); - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/xocl_drv.h b/sdk/linux_kernel_drivers/xocl/xocl_drv.h deleted file mode 100644 index 7349cd30..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_drv.h +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright (C) 2016-2018 Xilinx, Inc - * - * Authors: - * Sonal Santan - * - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _XCL_XOCL_DRV_H_ -#define _XCL_XOCL_DRV_H_ - -#include -#if LINUX_VERSION_CODE <= KERNEL_VERSION(3,0,0) -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include "xclfeatures.h" -#include "xclbin2.h" // originally xclbin.h -#include "xocl_ioctl.h" -#include "xocl_exec.h" -#include "xocl_xvc.h" -#include "libxdma.h" - -#define DRV_NAME "xocl" - -// For CMA kernel command line should be cma=nn[MG]@[start[MG] - -#define XOCL_BO_USERPTR (1 << 31) -#define XOCL_BO_IMPORT (1 << 30) -#define XOCL_BO_EXECBUF (1 << 29) -#define XOCL_BO_CMA (1 << 28) -#define XOCL_BO_DDR0 (1 << 0) -#define XOCL_BO_DDR1 (1 << 1) -#define XOCL_BO_DDR2 (1 << 2) -#define XOCL_BO_DDR3 (1 << 3) -#define XOCL_BO_ARE (1 << 4) //When the BO is imported from an ARE device. This is remote BO to be accessed over ARE - -#define XOCL_CHANNEL_COUNT 4 -#define XOCL_RD_MTX 0 -#define XOCL_WR_MTX 1 - -#define XOCL_CMA_BASE 0x200000000 // (8 GB) -#define XOCL_CMA_SIZE 0x020000000 // (512 MB) -#define XOCL_CMA_NAME "PCISlave" - -#define XOCL_ARE_HOP 0x400000000ull - -#define XOCL_FEATURE_ROM 0x0B0000 -#define XOCL_SCHD_HW 0x180000 -#define XOCL_SCHD_CMD_QUEUE 0x190000 -#define XOCL_SCHD_CMD_STATUS 0x190000 - -struct cma; - -struct drm_xocl_exec_metadata { - enum drm_xocl_execbuf_state state; - unsigned int index; -}; - -struct drm_xocl_bo { - /* drm base object */ - struct drm_gem_object base; - struct drm_mm_node *mm_node; - struct drm_xocl_exec_metadata metadata; - struct page **pages; - struct sg_table *sgt; - void *vmapping; - unsigned flags; -}; - -struct drm_xocl_unmgd { - struct page **pages; - struct sg_table *sgt; - unsigned int npages; - unsigned flags; -}; - -struct drm_xocl_mem_topology { - //TODO : check the first 4 entries - remove unneccessary ones. - int32_t bank_count; - struct mem_data* m_data; - u32 m_data_length; //length of the mem_data section. - uint64_t bank_size; //in KB. Currently only fixed sizes are supported. - uint64_t size; - struct mem_topology *topology; -}; - -struct drm_xocl_connectivity { - uint64_t size; - struct connectivity *connections; -}; - -struct drm_xocl_layout { - uint64_t size; - struct ip_layout *layout; -}; - -struct drm_xocl_debug_layout { - uint64_t size; - struct debug_ip_layout *layout; -}; - -struct drm_xocl_dev { - struct drm_device *ddev; - /* The feature Rom header */ - struct FeatureRomHeader header; - /* Number of bidirectional channels */ - unsigned channel; - /* Memory manager array, one per DDR channel */ - struct drm_mm *mm; - /* Memory manager lock */ - struct mutex mm_lock; - /* Semaphore, one for each direction */ - struct semaphore channel_sem[2]; - /* Channel usage bitmasks, one for each direction - * bit 1 indicates channel is free, bit 0 indicates channel is free - */ - volatile unsigned long channel_bitmap[2]; - unsigned long long *channel_usage[2]; - struct drm_xocl_mm_stat *mm_usage_stat; - struct xdma_dev *xdma_handle; - struct cma *cma_blk; - bool offline; - /* Lock for stats */ - struct mutex stat_lock; - void *__iomem user_bar; - phys_addr_t res_start; - resource_size_t res_len; - bool unified; //unified platform, populated from FeatureROM, - u64 unique_id_last_bitstream; - struct xocl_xvc xvc; - struct drm_xocl_exec_core exec; - struct drm_xocl_mem_topology topology; - struct drm_xocl_layout layout; - struct drm_xocl_debug_layout debug_layout; - struct drm_xocl_connectivity connectivity; -}; - -static inline struct drm_gem_object *xocl_gem_object_lookup(struct drm_device *dev, - struct drm_file *filp, - u32 handle) -{ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,7,0) - return drm_gem_object_lookup(filp, handle); -#elif defined(RHEL_RELEASE_CODE) -#if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,4) - return drm_gem_object_lookup(filp, handle); -#else - return drm_gem_object_lookup(dev, filp, handle); -#endif -#else - return drm_gem_object_lookup(dev, filp, handle); -#endif -} - -static inline struct drm_xocl_bo *to_xocl_bo(struct drm_gem_object *bo) -{ - return (struct drm_xocl_bo *)bo; -} - -static inline struct drm_xocl_dev *bo_xocl_dev(const struct drm_xocl_bo *bo) -{ - return bo->base.dev->dev_private; -} - -static inline unsigned xocl_bo_ddr_idx(unsigned flags) -{ - const unsigned ddr = flags & 0xf; - if (!ddr) - return 0xffffffff; - return __builtin_ctz(ddr); -} - -static inline unsigned short xocl_ddr_channel_count(const struct drm_device *drm) -{ - struct drm_xocl_dev *xdev = drm->dev_private; - struct drm_xocl_mem_topology *topology; - if(!xdev->unified) - return xdev->header.DDRChannelCount; - topology = &xdev->topology; - return topology->bank_count; -} - -static inline unsigned long long xocl_ddr_channel_size(const struct drm_device *drm) -{ - struct drm_xocl_dev *xdev = drm->dev_private; - struct drm_xocl_mem_topology *topology; - - if(!xdev->unified) { - /* Channel size is in GB */ - return xdev->header.DDRChannelSize * 0x40000000ull; - } - topology = &xdev->topology; - return topology->bank_size; -} - -static inline bool xocl_bo_userptr(const struct drm_xocl_bo *bo) -{ - return (bo->flags & XOCL_BO_USERPTR); -} - -static inline bool xocl_bo_import(const struct drm_xocl_bo *bo) -{ - return (bo->flags & XOCL_BO_IMPORT); -} - -static inline bool xocl_bo_execbuf(const struct drm_xocl_bo *bo) -{ - return (bo->flags & XOCL_BO_EXECBUF); -} - -static inline bool xocl_bo_cma(const struct drm_xocl_bo *bo) -{ - return (bo->flags & XOCL_BO_CMA); -} - -int xocl_create_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_userptr_bo_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp); -int xocl_sync_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_map_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_info_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_pwrite_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_pread_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_ctx_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_pwrite_unmgd_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_pread_unmgd_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_usage_stat_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_read_axlf_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); - - -void xocl_describe(const struct drm_xocl_bo *obj); - -void xocl_free_bo(struct drm_gem_object *obj); - -int xocl_migrate_bo(struct drm_device *ddev, const struct drm_xocl_bo *xobj, - enum drm_xocl_sync_bo_dir dir); - -int xocl_user_event(int irq, struct drm_xocl_dev *xdev); - -/** - * DMA-BUF support - */ -struct drm_gem_object *xocl_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, struct sg_table *sgt); - -struct sg_table *xocl_gem_prime_get_sg_table(struct drm_gem_object *obj); - -void *xocl_gem_prime_vmap(struct drm_gem_object *obj); - -void xocl_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); - -/** - * Sysfs related functions - */ -int xocl_init_sysfs(struct device *dev); -void xocl_fini_sysfs(struct device *dev); - -/** - * DEBUG and EXEC support - */ - -int xocl_debug_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int xocl_execbuf_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); - -int xocl_user_intr_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); - -#endif - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/xocl_exec.c b/sdk/linux_kernel_drivers/xocl/xocl_exec.c deleted file mode 100644 index c9e7865a..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_exec.c +++ /dev/null @@ -1,1426 +0,0 @@ -/* - * Copyright (C) 2017-2018 Xilinx, Inc - * - * Authors: - * Soren Soe - * - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ -#include -#include -#include -#include -#include "ert.h" -#include "xocl_drv.h" -#include "xocl_exec.h" -#include "xocl_xdma.h" - -//#define SCHED_VERBOSE -#define SCHED_THREAD_ENABLE - -#if 0 -static unsigned long zero = 0; -static unsigned long time_ns(void) -{ - struct timeval now; - do_gettimeofday(&now); - if (!zero) - zero = timeval_to_ns(&now); - return timeval_to_ns(&now) - zero; -} -#endif - -#define sched_error_on(xdev,expr,msg) \ -({ \ - unsigned int ret = 0; \ - if ((expr)) { \ - DRM_INFO("Assertion failed: %s:%d:%s:%s %s\n" \ - ,__FILE__,__LINE__,__FUNCTION__,#expr,msg); \ - xdev->exec.scheduler->error=1; \ - ret = 1; \ - } \ - (ret); \ -}) - - -#ifdef SCHED_VERBOSE -# define SCHED_DEBUG(msg) printk(msg) -# define SCHED_DEBUGF(format,...) printk(format, ##__VA_ARGS__) -#else -# define SCHED_DEBUG(msg) -# define SCHED_DEBUGF(format,...) -#endif - -#define XOCL_U32_MASK 0xFFFFFFFF - -/** - * struct xocl_sched: scheduler for xocl_cmd objects - * - * @scheduler_thread: thread associated with this scheduler - * @use_count: use count for this scheduler - * @wait_queue: conditional wait queue for scheduler thread - * @error: set to 1 to indicate scheduler error - * @command_queue: list of command objects managed by scheduler - * @intc: boolean flag set when there is a pending interrupt for command completion - * @poll: number of running commands in polling mode - */ -struct xocl_sched -{ - struct task_struct *scheduler_thread; - unsigned int use_count; - - wait_queue_head_t wait_queue; - unsigned int error; - - struct list_head command_queue; - atomic_t intc; /* pending interrupt */ - atomic_t poll; /* number of cmds to poll */ -}; - -static struct xocl_sched global_scheduler0; - -/** - * Command data used by scheduler - * - * @list: command object moves from list to list - * @bo: underlying drm buffer object - * @xdev: device handle - * @xs: scehduler processing this commands - * @state: state of command object per scheduling - * @cu_idx: index of CU executing this cmd object; used in penguin mode only - * @slot_idx: command queue index of this command object - * @packet: mapped ert packet object from user space - */ -struct xocl_cmd -{ - struct list_head list; - struct drm_xocl_bo *bo; - struct drm_xocl_dev *xdev; - struct xocl_sched *xs; - enum ert_cmd_state state; - int cu_idx; - int slot_idx; - - struct ert_packet *packet; -}; - -/** - * set_cmd_int_state() - Set internal command state used by scheduler only - * - * @xcmd: command to change internal state on - * @state: new command state per ert.h - */ -inline void -set_cmd_int_state(struct xocl_cmd* xcmd, enum ert_cmd_state state) -{ - SCHED_DEBUGF("->set_cmd_int_state(,%d)\n",state); - xcmd->state = state; - SCHED_DEBUG("<-set_cmd_int_state\n"); -} - -/** - * set_cmd_state() - Set both internal and external state of a command - * - * The state is reflected externally through the command packet - * as well as being captured in internal state variable - * - * @xcmd: command object - * @state: new state - */ -inline void -set_cmd_state(struct xocl_cmd* xcmd, enum ert_cmd_state state) -{ - SCHED_DEBUGF("->set_cmd_state(,%d)\n",state); - xcmd->state = state; - xcmd->packet->state = state; - SCHED_DEBUG("<-set_cmd_state\n"); -} - -/** - * List of free xocl_cmd objects. - * - * @free_cmds: populated with recycled xocl_cmd objects - * @cmd_mutex: mutex lock for cmd_list - * - * Command objects are recycled for later use and only freed when kernel - * module is unloaded. - */ -static LIST_HEAD(free_cmds); -static DEFINE_MUTEX(free_cmds_mutex); - -/** - * List of new pending xocl_cmd objects - * - * @pending_cmds: populated from user space with new commands for buffer objects - * @num_pending: number of pending commands - * - * Scheduler copies pending commands to its private queue when necessary - */ -static LIST_HEAD(pending_cmds); -static DEFINE_MUTEX(pending_cmds_mutex); -static atomic_t num_pending = ATOMIC_INIT(0); - -/** - * get_free_xocl_cmd() - Get a free command object - * - * Get from free/recycled list or allocate a new command if necessary. - * - * Return: Free command object - */ -static struct xocl_cmd* -get_free_xocl_cmd(void) -{ - struct xocl_cmd* cmd; - SCHED_DEBUG("-> get_free_xocl_cmd\n"); - mutex_lock(&free_cmds_mutex); - cmd=list_first_entry_or_null(&free_cmds,struct xocl_cmd,list); - if (cmd) - list_del(&cmd->list); - mutex_unlock(&free_cmds_mutex); - if (!cmd) - cmd = kmalloc(sizeof(struct xocl_cmd),GFP_KERNEL); - if (!cmd) - return ERR_PTR(-ENOMEM); - SCHED_DEBUGF("<- get_free_xocl_cmd %p\n",cmd); - return cmd; -} - -/** - * add_cmd() - Add a new command to pending list - * - * @xdev: device owning adding the buffer object - * @bo: buffer objects from user space from which new command is created - * - * Scheduler copies pending commands to its internal command queue. - * - * Return: 0 on success, -errno on failure - */ -static int -add_cmd(struct drm_xocl_dev *xdev, struct drm_xocl_bo* bo) -{ - struct xocl_cmd *xcmd = get_free_xocl_cmd(); - SCHED_DEBUG("-> add_cmd\n"); - xcmd->bo=bo; - xcmd->xdev=xdev; - xcmd->cu_idx=-1; - xcmd->slot_idx=-1; - xcmd->packet = (struct ert_packet*)bo->vmapping; - xcmd->xs = xdev->exec.scheduler; - set_cmd_state(xcmd,ERT_CMD_STATE_NEW); - mutex_lock(&pending_cmds_mutex); - list_add_tail(&xcmd->list,&pending_cmds); - mutex_unlock(&pending_cmds_mutex); - - /* wake scheduler */ - atomic_inc(&num_pending); - wake_up_interruptible(&xcmd->xs->wait_queue); - - SCHED_DEBUG("<- add_cmd\n"); - return 0; -} - -/** - * recycle_cmd() - recycle a command objects - * - * @xcmd: command object to recycle - * - * Command object is added to the freelist - * - * Return: 0 - */ -static int -recycle_cmd(struct xocl_cmd* xcmd) -{ - SCHED_DEBUGF("recycle %p\n",xcmd); - mutex_lock(&free_cmds_mutex); - list_del(&xcmd->list); - list_add_tail(&xcmd->list,&free_cmds); - mutex_unlock(&free_cmds_mutex); - return 0; -} - -/** - * delete_cmd_list() - reclaim memory for all allocated command objects - */ -static void -delete_cmd_list(void) -{ - struct xocl_cmd *xcmd; - struct list_head *pos, *next; - - mutex_lock(&free_cmds_mutex); - list_for_each_safe(pos, next, &free_cmds) { - xcmd = list_entry(pos, struct xocl_cmd, list); - list_del(pos); - kfree(xcmd); - } - mutex_unlock(&free_cmds_mutex); -} - - - -/** - * struct xocl_sched_ops: scheduler specific operations - * - * Scheduler can operate in MicroBlaze mode (mb/ert) or in penguin mode. This - * struct differentiates specific operations. The struct is per device node, - * meaning that one device can operate in ert mode while another can operate in - * penguin mode. - */ -struct xocl_sched_ops -{ - int (*submit) (struct xocl_cmd *xcmd); - void (*query) (struct xocl_cmd *xcmd); -}; - -static struct xocl_sched_ops mb_ops; -static struct xocl_sched_ops penguin_ops; - -/** - * is_ert() - Check if running in embedded (ert) mode. - * - * Return: %true of ert mode, %false otherwise - */ -inline unsigned int -is_ert(struct drm_xocl_dev *xdev) -{ - return xdev->exec.ops == &mb_ops; -} - -/** - * ffs_or_neg_one() - Find first set bit in a 32 bit mask. - * - * @mask: mask to check - * - * First LSBit is at position 0. - * - * Return: Position of first set bit, or -1 if none - */ -inline int -ffs_or_neg_one(u32 mask) -{ - if (!mask) - return -1; - return ffs(mask)-1; -} - -/** - * ffz_or_neg_one() - First first zero bit in bit mask - * - * @mask: mask to check - * Return: Position of first zero bit, or -1 if none - */ -inline int -ffz_or_neg_one(u32 mask) -{ - if (mask==XOCL_U32_MASK) - return -1; - return ffz(mask); -} - - -/** - * slot_size() - slot size per device configuration - * - * Return: Command queue slot size - */ -inline unsigned int -slot_size(struct drm_xocl_dev *xdev) -{ - return ERT_CQ_SIZE / xdev->exec.num_slots; -} - -/** - * cu_mask_idx() - CU mask index for a given cu index - * - * @cu_idx: Global [0..127] index of a CU - * Return: Index of the CU mask containing the CU with cu_idx - */ -inline unsigned int -cu_mask_idx(unsigned int cu_idx) -{ - return cu_idx >> 5; /* 32 cus per mask */ -} - -/** - * cu_idx_in_mask() - CU idx within its mask - * - * @cu_idx: Global [0..127] index of a CU - * Return: Index of the CU within the mask that contains it - */ -inline unsigned int -cu_idx_in_mask(unsigned int cu_idx) -{ - return cu_idx - (cu_mask_idx(cu_idx) << 5); -} - -/** - * cu_idx_from_mask() - Given CU idx within a mask return its global idx [0..127] - * - * @cu_idx: Index of CU with mask identified by mask_idx - * @mask_idx: Mask index of the has CU with cu_idx - * Return: Global cu_idx [0..127] - */ -inline unsigned int -cu_idx_from_mask(unsigned int cu_idx, unsigned int mask_idx) -{ - return cu_idx + (mask_idx << 5); -} - -/** - * slot_mask_idx() - Slot mask idx index for a given slot_idx - * - * @slot_idx: Global [0..127] index of a CQ slot - * Return: Index of the slot mask containing the slot_idx - */ -inline unsigned int -slot_mask_idx(unsigned int slot_idx) -{ - return slot_idx >> 5; -} - -/** - * slot_idx_in_mask() - Index of command queue slot within the mask that contains it - * - * @slot_idx: Global [0..127] index of a CQ slot - * Return: Index of slot within the mask that contains it - */ -inline unsigned int -slot_idx_in_mask(unsigned int slot_idx) -{ - return slot_idx - (slot_mask_idx(slot_idx) << 5); -} - -/** - * slot_idx_from_mask_idx() - Given slot idx within a mask, return its global idx [0..127] - * - * @slot_idx: Index of slot with mask identified by mask_idx - * @mask_idx: Mask index of the mask hat has slot with slot_idx - * Return: Global slot_idx [0..127] - */ -inline unsigned int -slot_idx_from_mask_idx(unsigned int slot_idx,unsigned int mask_idx) -{ - return slot_idx + (mask_idx << 5); -} - -/** - * opcode() - Command opcode - * - * @cmd: Command object - * Return: Opcode per command packet - */ -inline u32 -opcode(struct xocl_cmd* xcmd) -{ - return xcmd->packet->opcode; -} - -/** - * payload_size() - Command payload size - * - * @xcmd: Command object - * Return: Size in number of words of command packet payload - */ -inline u32 -payload_size(struct xocl_cmd *xcmd) -{ - return xcmd->packet->count; -} - -/** - * packet_size() - Command packet size - * - * @xcmd: Command object - * Return: Size in number of words of command packet - */ -inline u32 -packet_size(struct xocl_cmd *xcmd) -{ - return payload_size(xcmd) + 1; -} - -/** - * cu_masks() - Number of command packet cu_masks - * - * @xcmd: Command object - * Return: Total number of CU masks in command packet - */ -inline u32 -cu_masks(struct xocl_cmd *xcmd) -{ - struct ert_start_kernel_cmd *sk; - if (opcode(xcmd)!=ERT_START_KERNEL) - return 0; - sk = (struct ert_start_kernel_cmd *)xcmd->packet; - return 1 + sk->extra_cu_masks; -} - -/** - * regmap_size() - Size of regmap is payload size (n) minus the number of cu_masks - * - * @xcmd: Command object - * Return: Size of register map in number of words - */ -inline u32 -regmap_size(struct xocl_cmd* xcmd) -{ - return payload_size(xcmd) - cu_masks(xcmd); -} - -/** - * cu_idx_to_addr() - Convert CU idx into it relative bar address. - * - * @xdev: Device handle - * @cu_idx: Global CU idx - * Return: Address of CU relative to bar - */ -inline u32 -cu_idx_to_addr(struct drm_xocl_dev *xdev,unsigned int cu_idx) -{ - return (cu_idx << xdev->exec.cu_shift_offset) + xdev->exec.cu_base_addr; -} - -/** - * cu_idx_to_bitmask() - Compute the cu bitmask for cu_idx - * - * Subtract 32 * lower bitmasks prior to bitmask repsenting - * this index. For example, f.x cu_idx=67 - * 1 << (67 - (67>>5)<<5) = - * 1 << (67 - (2<<5)) = - * 1 << (67 - 64) = - * 1 << 3 = - * 0b1000 for position 4 in third bitmask - * - * @xdev: Device handle - * @cu_idx: Global index [0..127] of CU - * - * This function computes the bitmask for cu_idx in the mask that stores cu_idx - * - * Return: Bitmask with bit set for corresponding CU - */ -inline u32 -cu_idx_to_bitmask(struct drm_xocl_dev *xdev, u32 cu_idx) -{ - return 1 << (cu_idx - (cu_mask_idx(cu_idx)<<5)); -} - - -/** - * configure() - Configure the scheduler - * - * Process the configure command sent from user space. Only one process can - * configure the scheduler, so if scheduler is already configured, the - * function should verify that another process doesn't expect different - * configuration. - * - * Future may need ability to query current configuration so as to keep - * multiple processes in sync. - * - * Return: 0 on success, 1 on failure - */ -static int -configure(struct xocl_cmd *xcmd) -{ - struct drm_xocl_dev *xdev=xcmd->xdev; - struct ert_configure_cmd *cfg; - - if (sched_error_on(xdev,opcode(xcmd)!=ERT_CONFIGURE,"expected configure command")) - return 1; - - cfg = (struct ert_configure_cmd *)(xcmd->packet); - - if (xdev->exec.configured==0) { - SCHED_DEBUG("configuring scheduler\n"); - xdev->exec.num_slots = ERT_CQ_SIZE / cfg->slot_size; - xdev->exec.num_cus = cfg->num_cus; - xdev->exec.cu_shift_offset = cfg->cu_shift; - xdev->exec.cu_base_addr = cfg->cu_base_addr; - xdev->exec.num_cu_masks = ((xdev->exec.num_cus-1)>>5) + 1; - - if (cfg->ert) { - SCHED_DEBUG("++ configuring embedded scheduler mode\n"); - xdev->exec.ops = &mb_ops; - xdev->exec.polling_mode = cfg->polling; - xdev->exec.cq_interrupt = cfg->cq_int; - } - else { - SCHED_DEBUG("++ configuring penguin scheduler mode\n"); - xdev->exec.ops = &penguin_ops; - xdev->exec.polling_mode = 1; - } - - DRM_INFO("scheduler config ert(%d) slots(%d), cus(%d), cu_shift(%d), cu_base(0x%x), cu_masks(%d)\n" - ,is_ert(xdev) - ,xdev->exec.num_slots - ,xdev->exec.num_cus - ,xdev->exec.cu_shift_offset - ,xdev->exec.cu_base_addr - ,xdev->exec.num_cu_masks); - - return 0; - } - - DRM_INFO("reconfiguration of scheduler not supported\n"); - - return 1; -} - -/** - * acquire_slot_idx() - Acquire a slot index if available. Update slot status to busy - * so it cannot be reacquired. - * - * This function is called from scheduler thread - * - * Return: Command queue slot index, or -1 if none avaiable - */ -static int -acquire_slot_idx(struct drm_xocl_dev *xdev) -{ - unsigned int mask_idx=0, slot_idx=-1; - u32 mask; - SCHED_DEBUG("-> acquire_slot_idx\n"); - for (mask_idx=0; mask_idxexec.num_slot_masks; ++mask_idx) { - mask = xdev->exec.slot_status[mask_idx]; - slot_idx = ffz_or_neg_one(mask); - if (slot_idx==-1 || slot_idx_from_mask_idx(slot_idx,mask_idx)>=xdev->exec.num_slots) - continue; - xdev->exec.slot_status[mask_idx] ^= (1< release_slot_idx slot_status[%d]=0x%x, pos=%d\n" - ,mask_idx,xdev->exec.slot_status[mask_idx],pos); - xdev->exec.slot_status[mask_idx] ^= (1<exec.submitted_cmds[cmd_idx]; - if (sched_error_on(xdev,!xcmd,"no submtted cmd")) - return -1; - return xcmd->cu_idx; -} - -/** - * cu_done() - Check status of CU - * - * @cu_idx: Index of cu to check - * - * This function is called in polling mode only. The cu_idx - * is guaranteed to have been started - * - * Return: %true if cu done, %false otherwise - */ -inline int -cu_done(struct drm_xocl_dev *xdev, unsigned int cu_idx) -{ - u32 cu_addr = cu_idx_to_addr(xdev,cu_idx); - SCHED_DEBUGF("-> cu_done(,%d) checks cu at address 0x%x\n",cu_idx,cu_addr); - /* done is indicated by AP_DONE(2) alone or by AP_DONE(2) | AP_IDLE(4) - * but not by AP_IDLE itself. Since 0x10 | (0x10 | 0x100) = 0x110 - * checking for 0x10 is sufficient. */ - if (ioread32(xdev->user_bar + cu_addr) & 2) { - unsigned int mask_idx = cu_mask_idx(cu_idx); - unsigned int pos = cu_idx_in_mask(cu_idx); - xdev->exec.cu_status[mask_idx] ^= 1<exec.submitted_cmds[cmd_idx]; - u32 opc = 0; - SCHED_DEBUGF("-> cmd_done(,%d)\n",cmd_idx); - - if (sched_error_on(xdev,!xcmd || xcmd->slot_idx!=cmd_idx,"no command or missing slot index")) - return false; - - opc = opcode(xcmd); - if (opc==ERT_START_CU) { - int val = cu_done(xdev,get_cu_idx(xdev,cmd_idx)); - SCHED_DEBUGF("<- cmd_done (cu_done) returns %d\n",val); - return val; - } - if (opc==ERT_CONFIGURE) { - SCHED_DEBUG("<- cmd_done (configure) returns 1\n"); - return true; - } - SCHED_DEBUG("<- cmd_done returns 0\n"); - return false; -} - -/** - * notify_host() - Notify user space that a command is complete. - */ -static void -notify_host(struct xocl_cmd *xcmd) -{ - struct list_head *ptr; - struct drm_xocl_client_ctx *entry; - struct drm_xocl_dev *xdev = xcmd->xdev; - unsigned long flags = 0; - - SCHED_DEBUG("-> notify_host\n"); - - /* now for each client update the trigger counter in the context */ - spin_lock_irqsave(&xdev->exec.ctx_list_lock, flags); - list_for_each(ptr, &xdev->exec.ctx_list) { - entry = list_entry(ptr, struct drm_xocl_client_ctx, link); - atomic_inc(&entry->trigger); - } - spin_unlock_irqrestore(&xdev->exec.ctx_list_lock, flags); - /* wake up all the clients */ - wake_up_interruptible(&xdev->exec.poll_wait_queue); - SCHED_DEBUG("<- notify_host\n"); -} - -/** - * mark_cmd_complete() - Move a command to complete state - * - * Commands are marked complete in two ways - * 1. Through polling of CUs or polling of MB status register - * 2. Through interrupts from MB - * In both cases, the completed commands are residing in the completed_cmds - * list and the number of completed commands is reflected in num_completed. - * - * @xcmd: Command to mark complete - * - * The command is removed from the slot it occupies in the device command - * queue. The slot is released so new commands can be submitted. The host - * is notified that some command has completed. - */ -static void -mark_cmd_complete(struct xocl_cmd *xcmd) -{ - SCHED_DEBUGF("-> mark_cmd_complete(,%d)\n",xcmd->slot_idx); - xcmd->xdev->exec.submitted_cmds[xcmd->slot_idx] = NULL; - set_cmd_state(xcmd,ERT_CMD_STATE_COMPLETED); - if (xcmd->xdev->exec.polling_mode) - atomic_dec(&xcmd->xs->poll); - release_slot_idx(xcmd->xdev,xcmd->slot_idx); - notify_host(xcmd); - SCHED_DEBUGF("<- mark_cmd_complete\n"); -} - -/** - * mark_mask_complete() - Move all commands in mask to complete state - * - * @mask: Bitmask with queried statuses of commands - * @mask_idx: Index of the command mask. Used to offset the actual cmd slot index - */ -static void -mark_mask_complete(struct drm_xocl_dev *xdev, u32 mask, unsigned int mask_idx) -{ - int bit_idx=0,cmd_idx=0; - SCHED_DEBUGF("-> mark_mask_complete(,0x%x,%d)\n",mask,mask_idx); - if (!mask) - return; - for (bit_idx=0, cmd_idx=mask_idx<<5; bit_idx<32; mask>>=1,++bit_idx,++cmd_idx) - if (mask & 0x1) - mark_cmd_complete(xdev->exec.submitted_cmds[cmd_idx]); - SCHED_DEBUG("<- mark_mask_complete\n"); -} - -/** - * queued_to_running() - Move a command from queued to running state if possible - * - * @xcmd: Command to start - * - * Upon success, the command is not necessarily running. In ert mode the - * command will have been submitted to the embedded scheduler, whereas in - * penguin mode the command has been started on a CU. - * - * Return: %true if command was submitted to device, %false otherwise - */ -static int -queued_to_running(struct xocl_cmd *xcmd) -{ - int retval = false; - - SCHED_DEBUG("-> queued_to_running\n"); - - if (opcode(xcmd)==ERT_CONFIGURE) - configure(xcmd); - - if (xcmd->xdev->exec.ops->submit(xcmd)) { - set_cmd_int_state(xcmd,ERT_CMD_STATE_RUNNING); - if (xcmd->xdev->exec.polling_mode) - atomic_inc(&xcmd->xs->poll); - xcmd->xdev->exec.submitted_cmds[xcmd->slot_idx] = xcmd; - retval = true; - } - - SCHED_DEBUGF("<- queued_to_running returns %d\n",retval); - - return retval; -} - -/** - * running_to_complete() - Check status of running commands - * - * @xcmd: Command is in running state - * - * If a command is found to be complete, it marked complete prior to return - * from this function. - */ -static void -running_to_complete(struct xocl_cmd *xcmd) -{ - SCHED_DEBUG("-> running_to_complete\n"); - - xcmd->xdev->exec.ops->query(xcmd); - - SCHED_DEBUG("<- running_to_complete\n"); -} - -/** - * complete_to_free() - Recycle a complete command objects - * - * @xcmd: Command is in complete state - */ -static void -complete_to_free(struct xocl_cmd *xcmd) -{ - SCHED_DEBUG("-> complete_to_free\n"); - - drm_gem_object_unreference_unlocked(&xcmd->bo->base); - recycle_cmd(xcmd); - - SCHED_DEBUG("<- complete_to_free\n"); -} - -/** - * scheduler_queue_cmds() - Queue any pending commands - * - * The scheduler copies pending commands to its internal command queue where - * is is now in queued state. - */ -static void -scheduler_queue_cmds(struct xocl_sched *xs) -{ - struct xocl_cmd *xcmd; - - SCHED_DEBUG("-> scheduler_queue_cmds\n"); - mutex_lock(&pending_cmds_mutex); - while (!list_empty(&pending_cmds)) { - xcmd = list_first_entry(&pending_cmds,struct xocl_cmd,list); - if (xcmd->xs != xs) - continue; - list_del(&xcmd->list); - list_add_tail(&xcmd->list,&xs->command_queue); - set_cmd_int_state(xcmd,ERT_CMD_STATE_QUEUED); - atomic_dec(&num_pending); - } - mutex_unlock(&pending_cmds_mutex); - SCHED_DEBUG("<- scheduler_queue_cmds\n"); -} - -/** - * scheduler_iterator_cmds() - Iterate all commands in scheduler command queue - */ -static void -scheduler_iterate_cmds(struct xocl_sched *xs) -{ - struct xocl_cmd *xcmd; - struct list_head *pos, *next; - - SCHED_DEBUG("-> scheduler_iterate_cmds\n"); - list_for_each_safe(pos, next, &xs->command_queue) { - xcmd = list_entry(pos, struct xocl_cmd, list); - - if (xcmd->state == ERT_CMD_STATE_QUEUED) - queued_to_running(xcmd); - if (xcmd->state == ERT_CMD_STATE_RUNNING) - running_to_complete(xcmd); - if (xcmd->state == ERT_CMD_STATE_COMPLETED) - complete_to_free(xcmd); - - } - SCHED_DEBUG("<- scheduler_iterate_cmds\n"); -} - -/** - * scheduler_wait_condition() - Check status of scheduler wait condition - * - * Scheduler must wait (sleep) if - * 1. there are no pending commands - * 2. no pending interrupt from embedded scheduler - * 3. no pending complete commands in polling mode - * - * Return: 1 if scheduler must wait, 0 othewise - */ -static int -scheduler_wait_condition(struct xocl_sched *xs) -{ - if (kthread_should_stop() || xs->error) { - SCHED_DEBUG("scheduler wakes kthread_should_stop\n"); - return 0; - } - - if (atomic_read(&num_pending)) { - SCHED_DEBUG("scheduler wakes to copy new pending commands\n"); - return 0; - } - - if (atomic_read(&xs->intc)) { - SCHED_DEBUG("scheduler wakes on interrupt\n"); - atomic_set(&xs->intc,0); - return 0; - } - - if (atomic_read(&xs->poll)) { - SCHED_DEBUG("scheduler wakes to poll\n"); - return 0; - } - - SCHED_DEBUG("scheduler waits ...\n"); - return 1; -} - -/** - * scheduler_wait() - check if scheduler should wait - * - * See scheduler_wait_condition(). - */ -static void -scheduler_wait(struct xocl_sched *xs) -{ - wait_event_interruptible(xs->wait_queue,scheduler_wait_condition(xs)==0); -} - -/** - * scheduler_loop() - Run one loop of the scheduler - */ -static void -scheduler_loop(struct xocl_sched *xs) -{ - SCHED_DEBUG("scheduler_loop\n"); - - scheduler_wait(xs); - - if (xs->error) { - DRM_INFO("scheduler encountered unexpected error and exits\n"); - return; - } - - /* queue new pending commands */ - scheduler_queue_cmds(xs); - - /* iterate all commands */ - scheduler_iterate_cmds(xs); -} - -/** - * scheduler() - Command scheduler thread routine - */ -#if defined(__GNUC__) && !defined(SCHED_THREAD_ENABLE) -__attribute__((unused)) -#endif -static int -scheduler(void* data) -{ - struct xocl_sched *xs = (struct xocl_sched *)data; - while (!kthread_should_stop() && !xs->error) - scheduler_loop(xs); - DRM_INFO("%s:%d scheduler thread exits with value %d\n",__FILE__,__LINE__,xs->error); - return xs->error; -} - -/** - * init_scheduler_thread() - Initialize scheduler thread if necessary - * - * Return: 0 on success, -errno otherwise - */ -static int -init_scheduler_thread(void) -{ -#ifdef SCHED_THREAD_ENABLE - SCHED_DEBUGF("init_scheduler_thread use_count=%d\n",global_scheduler0.use_count); - if (global_scheduler0.use_count++) - return 0; - - init_waitqueue_head(&global_scheduler0.wait_queue); - global_scheduler0.error = 0; - - INIT_LIST_HEAD(&global_scheduler0.command_queue); - atomic_set(&global_scheduler0.intc,0); - atomic_set(&global_scheduler0.poll,0); - - global_scheduler0.scheduler_thread = kthread_run(scheduler,(void*)&global_scheduler0,"xocl-scheduler-thread0"); - if (IS_ERR(global_scheduler0.scheduler_thread)) { - int ret = PTR_ERR(global_scheduler0.scheduler_thread); - DRM_ERROR(__func__); - return ret; - } -#endif - return 0; -} - -/** - * fini_scheduler_thread() - Finalize scheduler thread if unused - * - * Return: 0 on success, -errno otherwise - */ -static int -fini_scheduler_thread(void) -{ - int retval = 0; - SCHED_DEBUGF("fini_scheduler_thread use_count=%d\n",global_scheduler0.use_count); - if (--global_scheduler0.use_count) - return 0; - - retval = kthread_stop(global_scheduler0.scheduler_thread); - - /* clear stale command objects if any */ - while (!list_empty(&pending_cmds)) { - struct xocl_cmd *xcmd = list_first_entry(&pending_cmds,struct xocl_cmd,list); - DRM_INFO("deleting stale pending cmd\n"); - list_del(&xcmd->list); - drm_gem_object_unreference_unlocked(&xcmd->bo->base); - } - while (!list_empty(&global_scheduler0.command_queue)) { - struct xocl_cmd *xcmd = list_first_entry(&global_scheduler0.command_queue,struct xocl_cmd,list); - DRM_INFO("deleting stale scheduler cmd\n"); - list_del(&xcmd->list); - drm_gem_object_unreference_unlocked(&xcmd->bo->base); - } - - delete_cmd_list(); - - return retval; -} - - -/** - * mb_query() - Check command status of argument command - * - * @xcmd: Command to check - * - * This function is for ERT mode. In polling mode, check the command status - * register containing the slot assigned to the command. In interrupt mode - * check the interrupting status register. The function checks all commands in - * the same command status register as argument command so more than one - * command may be marked complete by this function. - */ -static void -mb_query(struct xocl_cmd *xcmd) -{ - struct drm_xocl_dev *xdev = xcmd->xdev; - unsigned int cmd_mask_idx = slot_mask_idx(xcmd->slot_idx); - - SCHED_DEBUGF("-> mb_query slot_idx=%d, cmd_mask_idx=%d\n",xcmd->slot_idx,cmd_mask_idx); - - if (xdev->exec.polling_mode - || (cmd_mask_idx==0 && atomic_read(&xdev->exec.sr0)) - || (cmd_mask_idx==1 && atomic_read(&xdev->exec.sr1)) - || (cmd_mask_idx==2 && atomic_read(&xdev->exec.sr2)) - || (cmd_mask_idx==3 && atomic_read(&xdev->exec.sr3))) { - u32 csr_addr = ERT_STATUS_REGISTER_ADDR + (cmd_mask_idx<<2); - u32 mask = ioread32(xcmd->xdev->user_bar + csr_addr); - if (mask) - mark_mask_complete(xcmd->xdev,mask,cmd_mask_idx); - SCHED_DEBUGF("++ mb_query csr_addr=0x%x mask=0x%x\n",csr_addr,mask); - } - - SCHED_DEBUGF("<- mb_query\n"); -} - -/** - * penguin_query() - Check command status of argument command - * - * @xcmd: Command to check - * - * Function is called in penguin mode (no embedded scheduler). - */ -static void -penguin_query(struct xocl_cmd *xcmd) -{ - u32 opc = opcode(xcmd); - - SCHED_DEBUGF("-> penguin_queury() slot_idx=%d\n",xcmd->slot_idx); - - if (opc==ERT_CONFIGURE || (opc==ERT_START_CU && cu_done(xcmd->xdev,get_cu_idx(xcmd->xdev,xcmd->slot_idx)))) - mark_cmd_complete(xcmd); - - SCHED_DEBUG("<- penguin_queury\n"); -} - -/** - * mb_submit() - Submit a command the embedded scheduler command queue - * - * @xcmd: Command to submit - * Return: %true if successfully submitted, %false otherwise - */ -static int -mb_submit(struct xocl_cmd *xcmd) -{ - u32 slot_addr; - - SCHED_DEBUG("-> mb_submit\n"); - - xcmd->slot_idx = acquire_slot_idx(xcmd->xdev); - if (xcmd->slot_idx<0) { - SCHED_DEBUG("<- mb_submit returns 0\n"); - return 0; - } - - slot_addr = ERT_CQ_BASE_ADDR + xcmd->slot_idx*slot_size(xcmd->xdev); - SCHED_DEBUGF("++ mb_submit slot_idx=%d, slot_addr=0x%x\n",xcmd->slot_idx,slot_addr); - - /* write packet minus header */ - memcpy_toio(xcmd->xdev->user_bar + slot_addr + 4,xcmd->packet->data,(packet_size(xcmd)-1)*sizeof(u32)); - - /* write header */ - iowrite32(xcmd->packet->header,xcmd->xdev->user_bar + slot_addr); - - /* trigger interrupt to embedded scheduler if feature is enabled */ - if (xcmd->xdev->exec.cq_interrupt) { - u32 cq_int_addr = ERT_CQ_STATUS_REGISTER_ADDR + (slot_mask_idx(xcmd->slot_idx)<<2); - u32 mask = 1<slot_idx); - SCHED_DEBUGF("++ mb_submit writes slot mask 0x%x to CQ_INT register at addr 0x%x\n", - mask,cq_int_addr); - iowrite32(mask,xcmd->xdev->user_bar + cq_int_addr); - } - - SCHED_DEBUG("<- mb_submit returns 1\n"); - return 1; -} - -/** - * get_free_cu() - get index of first available CU per command cu mask - * - * @xcmd: command containing CUs to check for availability - * - * This function is called kernel software scheduler mode only, in embedded - * scheduler mode, the hardware scheduler handles the commands directly. - * - * Return: Index of free CU, -1 of no CU is available. - */ -static int -get_free_cu(struct xocl_cmd *xcmd) -{ - int mask_idx=0; - SCHED_DEBUG("-> get_free_cu\n"); - for (mask_idx=0; mask_idxxdev->exec.num_cu_masks; ++mask_idx) { - u32 cmd_mask = xcmd->packet->data[mask_idx]; /* skip header */ - u32 busy_mask = xcmd->xdev->exec.cu_status[mask_idx]; - int cu_idx = ffs_or_neg_one((cmd_mask | busy_mask) ^ busy_mask); - if (cu_idx>=0) { - xcmd->xdev->exec.cu_status[mask_idx] ^= 1<xdev->user_bar; - u32 cu_addr = cu_idx_to_addr(xcmd->xdev,cu_idx); - u32 size = regmap_size(xcmd); - struct ert_start_kernel_cmd *ecmd = (struct ert_start_kernel_cmd *)xcmd->packet; - - SCHED_DEBUGF("-> configure_cu cu_idx=%d, cu_addr=0x%x, regmap_size=%d\n" - ,cu_idx,cu_addr,size); - - /* write register map, but skip first word (AP_START) */ - /* can't get memcpy_toio to work */ - /* memcpy_toio(user_bar + cu_addr + 4,ecmd->data + ecmd->extra_cu_masks + 1,(size-1)*4); */ - for (i=1; idata + ecmd->extra_cu_masks + i),user_bar + cu_addr + (i<<2)); - - /* start CU at base + 0x0 */ - iowrite32(0x1,user_bar + cu_addr); - - SCHED_DEBUG("<- configure_cu\n"); -} - -/** - * penguin_submit() - penguin submit of a command - * - * @xcmd: command to submit - * - * Special processing for configure command. Configuration itself is - * done/called by queued_to_running before calling penguin_submit. In penguin - * mode configuration need to ensure that the command is retired properly by - * scheduler, so assign it a slot index and let normal flow continue. - * - * Return: %true on successful submit, %false otherwise - */ -static int -penguin_submit(struct xocl_cmd *xcmd) -{ - SCHED_DEBUG("-> penguin_submit\n"); - - /* configuration was done by submit_cmds, ensure the cmd retired properly */ - if (opcode(xcmd)==ERT_CONFIGURE) { - xcmd->slot_idx = acquire_slot_idx(xcmd->xdev); - SCHED_DEBUG("<- penguin_submit (configure)\n"); - return 1; - } - - if (opcode(xcmd)!=ERT_START_CU) - return 0; - - /* extract cu list */ - xcmd->cu_idx = get_free_cu(xcmd); - if (xcmd->cu_idx<0) - return 0; - - xcmd->slot_idx = acquire_slot_idx(xcmd->xdev); - if (xcmd->slot_idx<0) - return 0; - - /* found free cu, transfer regmap and start it */ - configure_cu(xcmd,xcmd->cu_idx); - - SCHED_DEBUGF("<- penguin_submit cu_idx=%d slot=%d\n",xcmd->cu_idx,xcmd->slot_idx); - - return 1; -} - - -/** - * mb_ops: operations for ERT scheduling - */ -static struct xocl_sched_ops mb_ops = { - .submit = mb_submit, - .query = mb_query, -}; - -/** - * penguin_ops: operations for kernel mode scheduling - */ -static struct xocl_sched_ops penguin_ops = { - .submit = penguin_submit, - .query = penguin_query, -}; - -/** - * xocl_user_event() - Interrupt service routine for MB interrupts - * - * Called by xocl_xdma_user_isr() which is our stub for user ISR registered with libxdma - * Kernel doc says eventfd_signal() does not sleep so it should be okay to call this in ISR - * TODO: Add support for locking so xdev->user_msix_table[irq] is not deleted/changed by - * xocl_user_intr_ioctl() while we are using it. - */ -int -xocl_user_event(int irq, struct drm_xocl_dev *xdev) -{ - SCHED_DEBUGF("xocl_user_event %d\n",irq); - if (irq>=XOCL_CSR_INTR0 && irq<=XOCL_CSR_INTR3 && is_ert(xdev) && !xdev->exec.polling_mode) { - - if (irq==0) - atomic_set(&xdev->exec.sr0,1); - else if (irq==1) - atomic_set(&xdev->exec.sr1,1); - else if (irq==2) - atomic_set(&xdev->exec.sr2,1); - else if (irq==3) - atomic_set(&xdev->exec.sr3,1); - - /* wake up all scheduler ... currently one only */ - atomic_set(&global_scheduler0.intc,1); - wake_up_interruptible(&global_scheduler0.wait_queue); - return 0; - } - if (!xdev->exec.user_msix_table[irq]) - return -EFAULT; - if (eventfd_signal(xdev->exec.user_msix_table[irq], 1) == 1) - return 0; - return -EFAULT; -} - - -/** - * xocl_execbuf_ioctl() - Entry point for exec buffer. - * - * @dev: Device node calling execbuf - * @data: Payload - * @filp: - * - * Function adds exec buffer to the pending list of commands - * - * Return: 0 on success, -errno otherwise - */ -int -xocl_execbuf_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - struct drm_gem_object *obj; - struct drm_xocl_bo *xobj; - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_execbuf *args = data; - int ret = 0; - - SCHED_DEBUG("-> xocl_execbuf_ioctl\n"); - obj = xocl_gem_object_lookup(dev, filp, args->exec_bo_handle); - if (!obj) { - DRM_INFO("Failed to look up GEM BO %d\n", args->exec_bo_handle); - return -ENOENT; - } - - xobj = to_xocl_bo(obj); - if (!xocl_bo_execbuf(xobj)) { - ret = -EINVAL; - goto out; - } - - /* Add the command to pending list */ - if (add_cmd(xdev,xobj)) { - ret = -EINVAL; - goto out; - } - - /* we keep a bo reference which is released later when the bo is retired when task is done */ - SCHED_DEBUG("<- xocl_execbuf_ioctl\n"); - return ret; -out: - drm_gem_object_unreference_unlocked(&xobj->base); - return ret; -} - -/** - * xocl_init_exec() - Initialize the command execution for device - * - * @xdev: Device node to initialize - * - * Return: 0 on success, -errno otherwise - */ -int -xocl_init_exec(struct drm_xocl_dev *xdev) -{ - unsigned int i; - - mutex_init(&xdev->exec.user_msix_table_lock); - spin_lock_init(&xdev->exec.ctx_list_lock); - INIT_LIST_HEAD(&xdev->exec.ctx_list); - init_waitqueue_head(&xdev->exec.poll_wait_queue); - - xdev->exec.scheduler = &global_scheduler0; - - for (i=0; iexec.submitted_cmds[i] = NULL; - - xdev->exec.num_slots = 16; - xdev->exec.num_cus = 0; - xdev->exec.cu_base_addr = 0; - xdev->exec.cu_shift_offset = 0; - xdev->exec.cq_interrupt = 0; - xdev->exec.polling_mode = 1; - - for (i=0; iexec.slot_status[i] = 0; - xdev->exec.num_slot_masks = 1; - - for (i=0; iexec.cu_status[i] = 0; - xdev->exec.num_cu_masks = 0; - - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR0, true); - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR1, true); - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR2, true); - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR3, true); - xdev->exec.ops = &penguin_ops; - - atomic_set(&xdev->exec.sr0,0); - atomic_set(&xdev->exec.sr1,0); - atomic_set(&xdev->exec.sr2,0); - atomic_set(&xdev->exec.sr3,0); - - init_scheduler_thread(); - return 0; -} - -/** - * xocl_fini_exec() - Finalize the command execution for device - * - * @xdev: Device node to finalize - * - * Return: 0 on success, -errno otherwise - */ -int xocl_fini_exec(struct drm_xocl_dev *xdev) -{ - int i; - - fini_scheduler_thread(); - - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR0, false); - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR1, false); - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR2, false); - xdma_user_interrupt_config(xdev, XOCL_CSR_INTR3, false); - for (i=0; i<16; i++) { - xdma_user_interrupt_config(xdev, i, false); - if (xdev->exec.user_msix_table[i]) - eventfd_ctx_put(xdev->exec.user_msix_table[i]); - } - mutex_destroy(&xdev->exec.user_msix_table_lock); - - return 0; -} diff --git a/sdk/linux_kernel_drivers/xocl/xocl_exec.h b/sdk/linux_kernel_drivers/xocl/xocl_exec.h deleted file mode 100644 index e36f3990..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_exec.h +++ /dev/null @@ -1,128 +0,0 @@ -/** - * Copyright (C) 2017-2018 Xilinx, Inc - * - * Authors: - * Sonal Santan - * - * Compute unit execution, interrupt management and client context core data structures. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _XCL_XOCL_EXEC_H_ -#define _XCL_XOCL_EXEC_H_ - -#include -#include -#include -#include - -#define XOCL_CSR_INTR0 0 -#define XOCL_CSR_INTR1 1 -#define XOCL_CSR_INTR2 2 -#define XOCL_CSR_INTR3 3 - -#define XOCL_USER_INTR_START 4 -#define XOCL_USER_INTR_END 16 - -#define XOCL_MAX_SLOTS 128 -#define XOCL_MAX_CUS 128 -#define XOCL_MAX_U32_SLOT_MASKS (((XOCL_MAX_SLOTS-1)>>5) + 1) -#define XOCL_MAX_U32_CU_MASKS (((XOCL_MAX_CUS-1)>>5) + 1) - -struct eventfd_ctx; -struct drm_xocl_dev; - -struct drm_xocl_client_ctx { - struct list_head link; - atomic_t trigger; - /* - * Bitmap to indicate all the user interrupts registered. These are unmanaged - * interrupts directly used by the non-OpenCL application. The corresponding - * eventfd objects are stored in drm_xocl_dev::user_msix_table - */ - unsigned int eventfd_bitmap; - struct mutex lock; -}; - -/** - * struct drm_xocl_exec_core: Core data structure for command execution on a device - * - * @user_msix_table: Eventfd table for user interrupts - * @user_msix_table_lock: Eventfd table lock - * @ctx_list: Context list populated with device context - * @ctx_list_lock: Context list lock - * @poll_wait_queue: Wait queue for device polling - * @scheduler: Command queue scheduler - * @submitted_cmds: Tracking of command submitted for execution on this device - * @num_slots: Number of command queue slots - * @num_cus: Number of CUs in loaded program - * @cu_shift_offset: CU idx to CU address shift value - * @cu_base_addr: Base address of CU address space - * @polling_mode: If set then poll for command completion - * @cq_interrupt: If set then trigger interrupt to MB on new commands - * @configured: Flag to indicate that the core data structure has been initialized - * @slot_status: Bitmap to track status (busy(1)/free(0)) slots in command queue - * @num_slot_masks: Number of slots status masks used (computed from @num_slots) - * @cu_status: Bitmap to track status (busy(1)/free(0)) of CUs. Unused in ERT mode. - * @num_cu_masks: Number of CU masks used (computed from @num_cus) - * @sr0: If set, then status register [0..31] is pending with completed commands (ERT only). - * @sr1: If set, then status register [32..63] is pending with completed commands (ERT only). - * @sr2: If set, then status register [64..95] is pending with completed commands (ERT only). - * @sr3: If set, then status register [96..127] is pending with completed commands (ERT only). - * @ops: Scheduler operations vtable - */ -struct drm_xocl_exec_core { - struct eventfd_ctx *user_msix_table[16]; - struct mutex user_msix_table_lock; - - struct list_head ctx_list; - spinlock_t ctx_list_lock; - wait_queue_head_t poll_wait_queue; - - struct xocl_sched *scheduler; - - struct xocl_cmd *submitted_cmds[XOCL_MAX_SLOTS]; - - unsigned int num_slots; - unsigned int num_cus; - unsigned int cu_shift_offset; - u32 cu_base_addr; - unsigned int polling_mode; - unsigned int cq_interrupt; - unsigned int configured; - - /* Bitmap tracks busy(1)/free(0) slots in cmd_slots*/ - u32 slot_status[XOCL_MAX_U32_SLOT_MASKS]; - unsigned int num_slot_masks; /* ((num_slots-1)>>5)+1 */ - - u32 cu_status[XOCL_MAX_U32_CU_MASKS]; - unsigned int num_cu_masks; /* ((num_cus-1)>>5+1 */ - - /* Status register pending complete. Written by ISR, cleared by scheduler */ - atomic_t sr0; - atomic_t sr1; - atomic_t sr2; - atomic_t sr3; - - /* Operations for dynamic indirection dependt on MB or kernel scheduler */ - struct xocl_sched_ops* ops; -}; - -int xocl_init_exec(struct drm_xocl_dev *xdev); -int xocl_fini_exec(struct drm_xocl_dev *xdev); - -int xocl_init_test_thread(struct drm_xocl_dev *xdev); -int xocl_fini_test_thread(struct drm_xocl_dev *xdev); - -void xocl_track_ctx(struct drm_xocl_dev *xdev, struct drm_xocl_client_ctx *fpriv); -void xocl_untrack_ctx(struct drm_xocl_dev *xdev, struct drm_xocl_client_ctx *fpriv); - -#endif diff --git a/sdk/linux_kernel_drivers/xocl/xocl_ioctl.c b/sdk/linux_kernel_drivers/xocl/xocl_ioctl.c deleted file mode 100644 index 0de72558..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_ioctl.c +++ /dev/null @@ -1,433 +0,0 @@ -/* - * Copyright (C) 2016-2018 Xilinx, Inc - * - * Authors: - * Sonal Santan - * - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#include -#include -#include -#if LINUX_VERSION_CODE <= KERNEL_VERSION(3,0,0) -#include -#endif -#include -#include -#include "xocl_drv.h" -#include "xocl_ioctl.h" -#include "xocl_xdma.h" - -static const struct axlf_section_header* get_axlf_section(const struct axlf* top, enum axlf_section_kind kind) -{ - int i = 0; - printk(KERN_INFO "Trying to find section header for axlf section %d", kind); - for(i = 0; i < top->m_header.m_numSections; i++) - { - printk(KERN_INFO "Section is %d",top->m_sections[i].m_sectionKind); - if(top->m_sections[i].m_sectionKind == kind) { - printk(KERN_INFO "Found section header for axlf"); - return &top->m_sections[i]; - } - } - printk(KERN_INFO "Did NOT find section header for axlf section %d", kind); - return NULL; -} - - -static long xclbin_precheck_cleanup(struct drm_device *dev, int preserve_mem) -{ - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_mem_topology *topology = &xdev->topology; - long err = 0; - short ddr = 0; - unsigned i = 0; - printk(KERN_INFO "%s XOCL: Existing bank count = %d\n", __FUNCTION__, topology->bank_count); - ddr = 0; - if( !preserve_mem ) { // Data Retention - for (i= 0; i < topology->bank_count; i++) { - if (topology->m_data[i].m_used) { - ddr++; - if (xdev->mm_usage_stat[ddr -1].bo_count !=0 ) { - err = -EBUSY; - printk(KERN_INFO "%s The ddr %d has pre-existing buffer allocations, please exit and re-run.\n", __FUNCTION__, ddr -1); - return err; - } - } - } - - printk(KERN_INFO "XOCL: Marker 2.1\n"); - //Cleanup the topology struct from the previous xclbin - ddr = xocl_ddr_channel_count(dev); - printk( KERN_INFO "%s XOCL: xocl_ddr_channel_count(dev): %d\n", __FUNCTION__, ddr ); - for (i = 0; i < ddr; i++) { - if(topology->m_data[i].m_used) { - printk(KERN_INFO "Taking down DDR : %d", i); - drm_mm_takedown(&xdev->mm[i]); - } - } - - vfree(topology->m_data); - vfree(topology->topology); - memset(topology, 0, sizeof(struct drm_xocl_mem_topology)); - } - - vfree(xdev->connectivity.connections); - memset(&xdev->connectivity, 0, sizeof(xdev->connectivity)); - vfree(xdev->layout.layout); - memset(&xdev->layout, 0, sizeof(xdev->layout)); - vfree(xdev->debug_layout.layout); - memset(&xdev->debug_layout, 0, sizeof(xdev->debug_layout)); - - return err; -} - - -int xocl_read_axlf_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - struct drm_xocl_axlf *axlf_obj_ptr = data; - struct drm_xocl_dev *xdev = dev->dev_private; - long err = 0; - unsigned i = 0; - uint64_t copy_buffer_size = 0; - struct axlf* copy_buffer = 0; - const struct axlf_section_header *memHeader = 0; - char __user *buffer =0; - int32_t bank_count = 0; - short ddr = 0; - struct axlf bin_obj; - int preserve_mem; - struct drm_xocl_mem_topology *topology; - struct drm_xocl_mem_topology new_topology; - new_topology.topology = NULL; - new_topology.m_data = NULL; - - printk(KERN_INFO "%s %s READ_AXLF IOCTL \n", DRV_NAME, __FUNCTION__); - - if(!xdev->unified) { - printk(KERN_INFO "XOCL: not unified dsa"); - return err; - } - - printk(KERN_INFO "XOCL: Marker 0 %p\n", data); - if (copy_from_user((void *)&bin_obj, (void*)axlf_obj_ptr->xclbin, sizeof(struct axlf))) - return -EFAULT; - if (memcmp(bin_obj.m_magic, "xclbin2", 8)) - return -EINVAL; - //Ignore timestamp matching for AWS platform - if(bin_obj.m_header.m_featureRomTimeStamp != xdev->header.TimeSinceEpoch && strstr(xdev->header.VBNVName, "xilinx_aw") == NULL) { - printk(KERN_ERR "TimeStamp of ROM did not match Xclbin\n"); - return -EINVAL; - } - - printk(KERN_INFO "XOCL: VBNV and TimeStamps matched\n"); - - if(bin_obj.m_uniqueId == xdev->unique_id_last_bitstream) { - printk(KERN_INFO "Skipping repopulating topology, connectivity,ip_layout data\n"); - return err; - } - - //Copy from user space and proceed. - copy_buffer_size = (bin_obj.m_header.m_numSections)*sizeof(struct axlf_section_header) + sizeof(struct axlf); - copy_buffer = (struct axlf*)vmalloc(copy_buffer_size); - if(!copy_buffer) { - printk(KERN_ERR "Unable to create copy_buffer"); - return -EFAULT; - } - printk(KERN_INFO "XOCL: Marker 1\n"); - - if (copy_from_user((void *)copy_buffer, (void *)axlf_obj_ptr->xclbin, copy_buffer_size)) { - err = -EFAULT; - goto done; - } - - buffer = (char __user *)axlf_obj_ptr->xclbin; - err = !access_ok(VERIFY_READ, buffer, bin_obj.m_header.m_length); - if (err) { - err = -EFAULT; - goto done; - } - - //--- - printk(KERN_INFO "Finding MEM_TOPOLOGY section\n"); - memHeader = get_axlf_section(copy_buffer, MEM_TOPOLOGY); - if (memHeader == 0) { - printk(KERN_INFO "Did not find MEM_TOPOLOGY section.\n"); - err = -EINVAL; - goto done; - } - printk(KERN_INFO "XOCL: Marker 2\n"); - - printk(KERN_INFO "%s XOCL: MEM_TOPOLOGY offset = %llx, size = %llx\n", __FUNCTION__, memHeader->m_sectionOffset , memHeader->m_sectionSize); - - if((memHeader->m_sectionOffset + memHeader->m_sectionSize) > bin_obj.m_header.m_length) { - err = -EINVAL; - goto done; - } - - buffer = (char __user *)axlf_obj_ptr->xclbin; - buffer += memHeader->m_sectionOffset; - - new_topology.topology = vmalloc(memHeader->m_sectionSize); - err = copy_from_user(new_topology.topology, buffer, memHeader->m_sectionSize); - if (err) - goto done; - - get_user(bank_count, buffer); - new_topology.size = memHeader->m_sectionSize; - new_topology.bank_count = bank_count; - new_topology.m_data_length = bank_count*sizeof(struct mem_data); - buffer += offsetof(struct mem_topology, m_mem_data); - new_topology.m_data = vmalloc(new_topology.m_data_length); - err = copy_from_user(new_topology.m_data, buffer, bank_count*sizeof(struct mem_data)); - if (err ) - goto done; - - //check for null pointer, then do mem compare - preserve_mem = 0; - if( xdev->topology.topology != NULL ) { - // m_data can be of different length but we would not compare them if topology match fails - if( !memcmp(new_topology.topology, xdev->topology.topology, memHeader->m_sectionSize) && - !memcmp(new_topology.m_data, xdev->topology.m_data, new_topology.bank_count*sizeof(struct mem_data) ) ) { - printk( KERN_INFO "XOCL: MEM_TOPOLOGY match, preserve mem_topology.\n" ); - preserve_mem = 1; - } else { - printk( KERN_INFO "XOCL: MEM_TOPOLOGY mismatch, do not preserve mem_topology.\n" ); - } - } - - //Switching the xclbin, make sure none of the buffers are used. - err = xclbin_precheck_cleanup(dev, preserve_mem); - if(err) - goto done; - - if( !preserve_mem ) { // Data Retention - xdev->topology.topology = new_topology.topology; - xdev->topology.size = new_topology.size; - xdev->topology.bank_count = new_topology.bank_count; - xdev->topology.m_data_length = new_topology.m_data_length; - xdev->topology.m_data = new_topology.m_data; - new_topology.topology = NULL; - new_topology.m_data = NULL; - } - - //---- - printk(KERN_INFO "Finding IP_LAYOUT section\n"); - memHeader = get_axlf_section(copy_buffer, IP_LAYOUT); - if (memHeader == 0) { - printk(KERN_INFO "Did not find IP_LAYOUT section.\n"); - } else { - printk(KERN_INFO "%s XOCL: IP_LAYOUT offset = %llx, size = %llx, xclbin length = %llx\n", __FUNCTION__, memHeader->m_sectionOffset , memHeader->m_sectionSize, bin_obj.m_header.m_length); - - if((memHeader->m_sectionOffset + memHeader->m_sectionSize) > bin_obj.m_header.m_length) { - printk(KERN_INFO "%s XOCL: IP_LAYOUT section extends beyond xclbin boundary %llx\n", __FUNCTION__, bin_obj.m_header.m_length); - err = -EINVAL; - goto done; - } - printk(KERN_INFO "XOCL: Marker 3.1\n"); - buffer += memHeader->m_sectionOffset; - xdev->layout.layout = vmalloc(memHeader->m_sectionSize); - err = copy_from_user(xdev->layout.layout, buffer, memHeader->m_sectionSize); - printk(KERN_INFO "XOCL: Marker 3.2\n"); - if (err) - goto done; - xdev->layout.size = memHeader->m_sectionSize; - printk(KERN_INFO "XOCL: Marker 3.3\n"); - } - - //---- - printk(KERN_INFO "Finding DEBUG_IP_LAYOUT section\n"); - memHeader = get_axlf_section(copy_buffer, DEBUG_IP_LAYOUT); - if (memHeader == 0) { - printk(KERN_INFO "Did not find DEBUG_IP_LAYOUT section.\n"); - } else { - printk(KERN_INFO "%s XOCL: DEBUG_IP_LAYOUT offset = %llx, size = %llx, xclbin length = %llx\n", __FUNCTION__, memHeader->m_sectionOffset , memHeader->m_sectionSize, bin_obj.m_header.m_length); - - if((memHeader->m_sectionOffset + memHeader->m_sectionSize) > bin_obj.m_header.m_length) { - printk(KERN_INFO "%s XOCL: DEBUG_IP_LAYOUT section extends beyond xclbin boundary %llx\n", __FUNCTION__, bin_obj.m_header.m_length); - err = -EINVAL; - goto done; - } - printk(KERN_INFO "XOCL: Marker 4.1\n"); - buffer = (char __user *)axlf_obj_ptr->xclbin; - buffer += memHeader->m_sectionOffset; - xdev->debug_layout.layout = vmalloc(memHeader->m_sectionSize); - err = copy_from_user(xdev->debug_layout.layout, buffer, memHeader->m_sectionSize); - printk(KERN_INFO "XOCL: Marker 4.2\n"); - if (err) - goto done; - xdev->debug_layout.size = memHeader->m_sectionSize; - printk(KERN_INFO "XOCL: Marker 4.3\n"); - } - - //--- - printk(KERN_INFO "Finding CONNECTIVITY section\n"); - memHeader = get_axlf_section(copy_buffer, CONNECTIVITY); - if (memHeader == 0) { - printk(KERN_INFO "Did not find CONNECTIVITY section.\n"); - } else { - printk(KERN_INFO "%s XOCL: CONNECTIVITY offset = %llx, size = %llx\n", __FUNCTION__, memHeader->m_sectionOffset , memHeader->m_sectionSize); - if((memHeader->m_sectionOffset + memHeader->m_sectionSize) > bin_obj.m_header.m_length) { - err = -EINVAL; - goto done; - } - buffer = (char __user *)axlf_obj_ptr->xclbin; - buffer += memHeader->m_sectionOffset; - xdev->connectivity.connections = vmalloc(memHeader->m_sectionSize); - err = copy_from_user(xdev->connectivity.connections, buffer, memHeader->m_sectionSize); - if (err) - goto done; - xdev->connectivity.size = memHeader->m_sectionSize; - } - - printk(KERN_INFO "XOCL: Marker 5\n"); - - topology = &xdev->topology; - - printk(KERN_INFO "XOCL: Topology Bank count = %d, data_length = %d\n", topology->bank_count, xdev->topology.m_data_length); - - if (!preserve_mem) { // Data Retention - xdev->mm = devm_kzalloc(dev->dev, sizeof(struct drm_mm) * topology->bank_count, GFP_KERNEL); - xdev->mm_usage_stat = devm_kzalloc(dev->dev, sizeof(struct drm_xocl_mm_stat) * topology->bank_count, GFP_KERNEL); - if (!xdev->mm || !xdev->mm_usage_stat) { - err = -ENOMEM; - goto done; - } - } - - //Check if sizes are same across banks. - ddr = 0; - for (i=0; i < topology->bank_count; i++) - { - printk(KERN_INFO "XOCL, DDR Info Index: %d Type:%d Used:%d Size:%llx Base_addr:%llx\n", i, - topology->m_data[i].m_type, topology->m_data[i].m_used, topology->m_data[i].m_size, - topology->m_data[i].m_base_address); - if (topology->m_data[i].m_used) - { - ddr++; - if ((topology->bank_size !=0) && (topology->bank_size != topology->m_data[i].m_size)) { - //we support only same sized banks for initial testing, so return error. - printk(KERN_INFO "%s err: %ld\n", __FUNCTION__, err); - err = -EFAULT; - vfree(xdev->topology.m_data); - memset(&xdev->topology, 0, sizeof(xdev->topology)); - goto done; - } - topology->bank_size = topology->m_data[i].m_size; - } - } - - //xdev->topology.used_bank_count = ddr; - printk(KERN_INFO "XOCL: Unified flow, used bank count :%d bank size(KB):%llx\n", ddr, xdev->topology.bank_size); - - if (!preserve_mem) { // Data Retention - //initialize the used banks and their sizes. Currently only fixed sizes are supported. - for (i=0; i < topology->bank_count; i++) - { - if (topology->m_data[i].m_used) { - printk(KERN_INFO "%s Allocating DDR:%d with base_addr:%llx, size %llx \n", __FUNCTION__, i, - topology->m_data[i].m_base_address, topology->m_data[i].m_size*1024); - drm_mm_init(&xdev->mm[i], topology->m_data[i].m_base_address, topology->m_data[i].m_size*1024); - printk(KERN_INFO "drm_mm_init called \n"); - } - } - } - - //Populate with "this" bitstream, so avoid redownload the next time - xdev->unique_id_last_bitstream = bin_obj.m_uniqueId; - -done: - printk(KERN_INFO "%s err: %ld\n", __FUNCTION__, err); - vfree(copy_buffer); - if (new_topology.topology != NULL) - vfree(new_topology.topology); - if (new_topology.m_data != NULL) - vfree(new_topology.m_data); - return err; - -} - -int xocl_ctx_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - unsigned long flags; - int ret = 0; - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_ctx *args = data; - - if (args->op == XOCL_CTX_OP_FREE_CTX) { - DRM_INFO("Releasing context for pid %d\n", pid_nr(task_tgid(current))); - spin_lock_irqsave(&xdev->exec.ctx_list_lock, flags); - spin_unlock_irqrestore(&xdev->exec.ctx_list_lock, flags); - return 0; - } - - if (args->op != XOCL_CTX_OP_ALLOC_CTX) - return -EINVAL; - - DRM_INFO("Creating context for pid %d\n", pid_nr(task_tgid(current))); - - spin_lock_irqsave(&xdev->exec.ctx_list_lock, flags); - - spin_unlock_irqrestore(&xdev->exec.ctx_list_lock, flags); - return ret; -} - - -int xocl_debug_ioctl(struct drm_device *dev, - void *data, - struct drm_file *filp) -{ - int ret = -EINVAL; - //struct drm_xocl_debug *args = data; - return ret; -} - - - -int xocl_user_intr_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) - -{ - struct eventfd_ctx *trigger; - int ret = 0; - struct drm_xocl_user_intr *args = data; - struct drm_xocl_dev *xdev = dev->dev_private; - struct drm_xocl_client_ctx *fpriv = filp->driver_priv; - - if ((args->msix >= XOCL_USER_INTR_END) || (args->msix < XOCL_USER_INTR_START)) - return -EINVAL; - mutex_lock(&xdev->exec.user_msix_table_lock); - if (xdev->exec.user_msix_table[args->msix]) { - ret = -EPERM; - goto out; - } - - if (args->fd < 0) - goto out; - trigger = eventfd_ctx_fdget(args->fd); - if (IS_ERR(trigger)) { - ret = PTR_ERR(trigger); - goto out; - } - xdev->exec.user_msix_table[args->msix] = trigger; - xdma_user_interrupt_config(xdev, args->msix, true); - fpriv->eventfd_bitmap |= (1 << args->msix); -out: - mutex_unlock(&xdev->exec.user_msix_table_lock); - return ret; -} diff --git a/sdk/linux_kernel_drivers/xocl/xocl_ioctl.h b/sdk/linux_kernel_drivers/xocl/xocl_ioctl.h deleted file mode 100644 index 49ffa85c..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_ioctl.h +++ /dev/null @@ -1,375 +0,0 @@ -/** - * Copyright (C) 2015-2018 Xilinx, Inc - * - * This file is dual licensed. It may be redistributed and/or modified - * under the terms of the Apache 2.0 License OR version 2 of the GNU - * General Public License. - * - * Apache License Verbiage - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * GPL license Verbiage: - * - * This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - * You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* - * A GEM style device manager for PCIe based OpenCL accelerators. - * - * Copyright (C) 2017 Xilinx, Inc. All rights reserved. - * - * Authors: - * Sonal Santan - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _XCL_XOCL_IOCTL_H_ -#define _XCL_XOCL_IOCTL_H_ - -#if defined(__KERNEL__) -#include -#elif defined(__cplusplus) -#include -#include -#else -#include -#include -#endif - -enum { - /* GEM core ioctls */ - /* Buffer creation */ - DRM_XOCL_CREATE_BO = 0, - /* Buffer creation from user provided pointer */ - DRM_XOCL_USERPTR_BO, - /* Map buffer into application user space (no DMA is performed) */ - DRM_XOCL_MAP_BO, - /* Sync buffer (like fsync) in the desired direction by using DMA */ - DRM_XOCL_SYNC_BO, - /* Get information about the buffer such as physical address in the device, etc */ - DRM_XOCL_INFO_BO, - /* Update host cached copy of buffer wih user's data */ - DRM_XOCL_PWRITE_BO, - /* Update user's data with host cached copy of buffer */ - DRM_XOCL_PREAD_BO, - /* Other ioctls */ - DRM_XOCL_OCL_RESET, - /* Currently unused */ - DRM_XOCL_CTX, - /* Get information from device */ - DRM_XOCL_INFO, - /* Unmanaged DMA from/to device */ - DRM_XOCL_PREAD_UNMGD, - DRM_XOCL_PWRITE_UNMGD, - /* Various usage metrics */ - DRM_XOCL_USAGE_STAT, - /* Hardware debug command */ - DRM_XOCL_DEBUG, - /* Command to run on one or more CUs */ - DRM_XOCL_EXECBUF, - /* Register eventfd for user interrupts */ - DRM_XOCL_USER_INTR, - /* Read xclbin/axlf */ - DRM_XOCL_READ_AXLF, - DRM_XOCL_NUM_IOCTLS -}; - -enum drm_xocl_sync_bo_dir { - DRM_XOCL_SYNC_BO_TO_DEVICE = 0, - DRM_XOCL_SYNC_BO_FROM_DEVICE -}; - -/* - * Higher 4 bits are for DDR, one for each DDR - * LSB bit for execbuf - */ -#define DRM_XOCL_BO_BANK0 (0x1) -#define DRM_XOCL_BO_BANK1 (0x1 << 1) -#define DRM_XOCL_BO_BANK2 (0x1 << 2) -#define DRM_XOCL_BO_BANK3 (0x1 << 3) -#define DRM_XOCL_BO_CMA (0x1 << 30) -#define DRM_XOCL_BO_EXECBUF (0x1 << 31) - -struct drm_xocl_create_bo { - uint64_t size; - uint32_t handle; - uint32_t flags; -}; - -struct drm_xocl_userptr_bo { - uint64_t addr; - uint64_t size; - uint32_t handle; - uint32_t flags; -}; - -struct drm_xocl_map_bo { - uint32_t handle; - uint32_t pad; - uint64_t offset; -}; - -/** - * struct drm_xocl_sync_bo - used for SYNQ_BO IOCTL - * @handle: GEM object handle - * @flags: Unused - * @size: Number of bytes to migrate - * @offset: Offset into the object to write to - * @dir: DRM_XOCL_SYNC_DIR_XXX - */ -struct drm_xocl_sync_bo { - uint32_t handle; - uint32_t flags; - uint64_t size; - uint64_t offset; - enum drm_xocl_sync_bo_dir dir; -}; - -/** - * struct drm_xocl_info_bo - used for INFO_BO IOCTL - * @handle: GEM object handle - * @size: Size of buffer object in bytes - * @paddr: physical address (out) - */ -struct drm_xocl_info_bo { - uint32_t handle; - uint32_t flags; - uint64_t size; - uint64_t paddr; -}; - -struct drm_xocl_axlf { - struct axlf *xclbin; -}; - -/** - * struct drm_xocl_pwrite_bo - used for PWRITE_BO IOCTL - * @handle: GEM object handle - * @pad: Padding - * @offset: Offset into the buffer object to write to - * @size: Length of data to write - * @data_ptr: Pointer to read the data from - */ -struct drm_xocl_pwrite_bo { - uint32_t handle; - uint32_t pad; - uint64_t offset; - uint64_t size; - uint64_t data_ptr; -}; - -/** - * struct drm_xocl_pread_bo - used for PREAD_BO IOCTL - * @handle: GEM object handle - * @pad: Padding - * @offset: Offset into the buffer object to read from - * @size: Length of data to read - * @data_ptr: Pointer to write the data into - */ -struct drm_xocl_pread_bo { - uint32_t handle; - uint32_t pad; - uint64_t offset; - uint64_t size; - uint64_t data_ptr; -}; - -enum drm_xocl_ctx_code { - XOCL_CTX_OP_ALLOC_CTX = 0, - XOCL_CTX_OP_FREE_CTX -}; - -struct drm_xocl_ctx { - enum drm_xocl_ctx_code op; - char uuid[16]; - uint32_t cu_bitmap; - uint32_t flags; -}; - -struct drm_xocl_info { - unsigned short vendor; - unsigned short device; - unsigned short subsystem_vendor; - unsigned short subsystem_device; - unsigned int dma_engine_version; - unsigned int driver_version; - unsigned int pci_slot; - char reserved[64]; -}; - - -/** - * struct drm_xocl_pwrite_unmgd (used with PWRITE_UNMGD IOCTL) - * @address_space: Address space in the DSA; currently only 0 is suported - * @pad: Padding - * @offset: Physical address in the specified address space - * @size: Length of data to write - * @data_ptr: Pointer to read the data from - */ -struct drm_xocl_pwrite_unmgd { - uint32_t address_space; - uint32_t pad; - uint64_t paddr; - uint64_t size; - uint64_t data_ptr; -}; - -/** - * struct drm_xocl_pread_unmgd (used for PREAD_UNMGD IOCTL) - * @address_space: Address space in the DSA; currently only 0 is valid - * @pad: Padding - * @offset: Physical address in the specified address space - * @size: Length of data to write - * @data_ptr: Pointer to write the data to - */ -struct drm_xocl_pread_unmgd { - uint32_t address_space; - uint32_t pad; - uint64_t paddr; - uint64_t size; - uint64_t data_ptr; -}; - - -struct drm_xocl_mm_stat { - size_t memory_usage; - unsigned int bo_count; -}; - -/** - * struct drm_xocl_stats (used for STATS IOCTL) - * @address_space: Address space in the DSA; currently only 0 is valid - * @pad: Padding - * @offset: Physical address in the specified address space - * @size: Length of data to write - * @data_ptr: Pointer to write the data to - */ -struct drm_xocl_usage_stat { - unsigned dma_channel_count; - unsigned mm_channel_count; - uint64_t h2c[8]; - uint64_t c2h[8]; - struct drm_xocl_mm_stat mm[8]; -}; - -enum drm_xocl_debug_code { - DRM_XOCL_DEBUG_ACQUIRE_CU = 0, - DRM_XOCL_DEBUG_RELEASE_CU, - DRM_XOCL_DEBUG_NIFD_RD, - DRM_XOCL_DEBUG_NIFD_WR, -}; - -struct drm_xocl_debug { - uint32_t ctx_id; - enum drm_xocl_debug_code code; - unsigned int code_size; - uint64_t code_ptr; -}; - -/** - * Opcodes for the embedded scheduler provided by the client to the driver - */ -enum drm_xocl_execbuf_code { - DRM_XOCL_EXECBUF_RUN_KERNEL = 0, - DRM_XOCL_EXECBUF_RUN_KERNEL_XYZ, - DRM_XOCL_EXECBUF_PING, - DRM_XOCL_EXECBUF_DEBUG, -}; - -/** - * State of exec request managed by the kernel driver - */ -enum drm_xocl_execbuf_state { - DRM_XOCL_EXECBUF_STATE_COMPLETE = 0, - DRM_XOCL_EXECBUF_STATE_RUNNING, - DRM_XOCL_EXECBUF_STATE_SUBMITTED, - DRM_XOCL_EXECBUF_STATE_QUEUED, - DRM_XOCL_EXECBUF_STATE_ERROR, - DRM_XOCL_EXECBUF_STATE_ABORT, -}; - -/** - * Layout of BO of EXECBUF kind - */ -struct drm_xocl_execbuf_bo { - enum drm_xocl_execbuf_state state; - enum drm_xocl_execbuf_code code; - uint64_t cu_bitmap; - uint64_t token; - char buf[3584]; // inline regmap layout -}; - -struct drm_xocl_execbuf { - uint32_t ctx_id; - uint32_t exec_bo_handle; -}; - -/** - * struct drm_xocl_user_intr (used for XOCL_USER_INTR IOCTL) - * @ctx_id: Context created before with CTX ioctl - * @fd: File descriptor created with eventfd system call - * @msix: User interrupt number (0 to 15) - */ -struct drm_xocl_user_intr { - uint32_t ctx_id; - int fd; - int msix; -}; - - -#define DRM_IOCTL_XOCL_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_CREATE_BO, struct drm_xocl_create_bo) -#define DRM_IOCTL_XOCL_USERPTR_BO DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_USERPTR_BO, struct drm_xocl_userptr_bo) -#define DRM_IOCTL_XOCL_MAP_BO DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_MAP_BO, struct drm_xocl_map_bo) -#define DRM_IOCTL_XOCL_SYNC_BO DRM_IOW (DRM_COMMAND_BASE + \ - DRM_XOCL_SYNC_BO, struct drm_xocl_sync_bo) -#define DRM_IOCTL_XOCL_INFO_BO DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_INFO_BO, struct drm_xocl_info_bo) -#define DRM_IOCTL_XOCL_PWRITE_BO DRM_IOW (DRM_COMMAND_BASE + \ - DRM_XOCL_PWRITE_BO, struct drm_xocl_pwrite_bo) -#define DRM_IOCTL_XOCL_PREAD_BO DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_PREAD_BO, struct drm_xocl_pread_bo) -#define DRM_IOCTL_XOCL_CTX DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_CTX, struct drm_xocl_ctx) -#define DRM_IOCTL_XOCL_INFO DRM_IOR(DRM_COMMAND_BASE + \ - DRM_XOCL_INFO, struct drm_xocl_info) -#define DRM_IOCTL_XOCL_READ_AXLF DRM_IOW(DRM_COMMAND_BASE + \ - DRM_XOCL_READ_AXLF, struct drm_xocl_axlf) -#define DRM_IOCTL_XOCL_PWRITE_UNMGD DRM_IOW (DRM_COMMAND_BASE + \ - DRM_XOCL_PWRITE_UNMGD, struct drm_xocl_pwrite_unmgd) -#define DRM_IOCTL_XOCL_PREAD_UNMGD DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_PREAD_UNMGD, struct drm_xocl_pread_unmgd) -#define DRM_IOCTL_XOCL_USAGE_STAT DRM_IOR(DRM_COMMAND_BASE + \ - DRM_XOCL_USAGE_STAT, struct drm_xocl_usage_stat) -#define DRM_IOCTL_XOCL_DEBUG DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_DEBUG, struct drm_xocl_debug) -#define DRM_IOCTL_XOCL_EXECBUF DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_EXECBUF, struct drm_xocl_execbuf) -#define DRM_IOCTL_XOCL_USER_INTR DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_XOCL_USER_INTR, struct drm_xocl_user_intr) - -#endif diff --git a/sdk/linux_kernel_drivers/xocl/xocl_sysfs.c b/sdk/linux_kernel_drivers/xocl/xocl_sysfs.c deleted file mode 100644 index dab368be..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_sysfs.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (C) 2016-2018 Xilinx, Inc - * - * Authors: - * Umang Parekh - * - * sysfs for the device attributes. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include "xocl_drv.h" - -//-xclbinid-- -static ssize_t xclbinid_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct drm_xocl_dev *xdev = ddev->dev_private; - return sprintf(buf, "%llx\n", xdev->unique_id_last_bitstream); -} - -static DEVICE_ATTR_RO(xclbinid); - -//-Base address-- -static ssize_t dr_base_addr_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct drm_xocl_dev *xdev = ddev->dev_private; - - //TODO: Fix: DRBaseAddress no longer required in feature rom - if(xdev->header.MajorVersion >= 10) - return sprintf(buf, "%llu\n", xdev->header.DRBaseAddress); - else - return sprintf(buf, "%u\n", 0); -} - -static DEVICE_ATTR_RO(dr_base_addr); - - -//-Mem_topology-- -static ssize_t mem_topology_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - printk(KERN_INFO "%s %s In mem_topology_show function \n", DRV_NAME, __FUNCTION__); - struct drm_device *ddev = dev_get_drvdata(dev); - struct drm_xocl_dev *xdev = ddev->dev_private; - memcpy(buf, xdev->topology.topology, xdev->topology.size); - printk(KERN_INFO "%s %s Mem-copied %llx bytes \n", DRV_NAME, __FUNCTION__, xdev->topology.size); - return xdev->topology.size; -} - -static DEVICE_ATTR_RO(mem_topology); - -//-Connectivity-- -static ssize_t connectivity_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - printk(KERN_INFO "%s %s In connectivity_show function \n", DRV_NAME, __FUNCTION__); - struct drm_device *ddev = dev_get_drvdata(dev); - struct drm_xocl_dev *xdev = ddev->dev_private; - memcpy(buf, xdev->connectivity.connections, xdev->connectivity.size); - return xdev->connectivity.size; -} - -static DEVICE_ATTR_RO(connectivity); - -//-IP_layout-- -static ssize_t ip_layout_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - printk(KERN_INFO "%s %s In ip_layout_show function \n", DRV_NAME, __FUNCTION__); - struct drm_device *ddev = dev_get_drvdata(dev); - struct drm_xocl_dev *xdev = ddev->dev_private; - memcpy(buf, xdev->layout.layout, xdev->layout.size); - return xdev->layout.size; -} - -static DEVICE_ATTR_RO(ip_layout); - -//- Debug IP_layout-- -static ssize_t debug_ip_layout_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - printk(KERN_INFO "%s %s In debug_ip_layout_show function \n", DRV_NAME, __FUNCTION__); - struct drm_device *ddev = dev_get_drvdata(dev); - struct drm_xocl_dev *xdev = ddev->dev_private; - memcpy(buf, xdev->debug_layout.layout, xdev->debug_layout.size); - printk(KERN_INFO "%s %s Mem-copied %llx bytes \n", DRV_NAME, __FUNCTION__, xdev->debug_layout.size); - return xdev->debug_layout.size; -} - -static DEVICE_ATTR_RO(debug_ip_layout); - - -//--- -int xocl_init_sysfs(struct device *dev) -{ - int result = device_create_file(dev, &dev_attr_xclbinid); - if(result) - return result; - result = device_create_file(dev, &dev_attr_dr_base_addr); - if(result) - return result; - result = device_create_file(dev, &dev_attr_connectivity); - if(result) - return result; - result = device_create_file(dev, &dev_attr_ip_layout); - if(result) - return result; - result = device_create_file(dev, &dev_attr_debug_ip_layout); - if(result) - return result; - result = device_create_file(dev, &dev_attr_mem_topology); - return result; -} - -void xocl_fini_sysfs(struct device *dev) -{ - printk(KERN_INFO "%s %s Cleaning up sys files \n", DRV_NAME, __FUNCTION__); - device_remove_file(dev, &dev_attr_xclbinid); - device_remove_file(dev, &dev_attr_dr_base_addr); - device_remove_file(dev, &dev_attr_mem_topology); - device_remove_file(dev, &dev_attr_connectivity); - device_remove_file(dev, &dev_attr_ip_layout); - device_remove_file(dev, &dev_attr_debug_ip_layout); -} diff --git a/sdk/linux_kernel_drivers/xocl/xocl_test.c b/sdk/linux_kernel_drivers/xocl/xocl_test.c deleted file mode 100644 index 94c1fee4..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_test.c +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Copyright (C) 2018 Xilinx, Inc - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include "xocl_drv.h" - -int xocl_test_interval = 5; -bool xocl_test_on = true; - -/** - * TODO: - * Test drm_send_event() with event object initialized with drm_event_reserve_init() - * to send events for CUs - */ -static int xocl_test_thread_main(void *data) -{ -#if 0 - struct timeval now; - struct drm_xocl_dev *xdev = (struct drm_xocl_dev *)data; - int irq = 0; - int count = 0; - while (!kthread_should_stop()) { - ssleep(xocl_test_interval); - do_gettimeofday(&now); - DRM_INFO("irq[%d] tv_sec[%ld]tv_usec[%ld]\n", irq, now.tv_sec, now.tv_usec); - xocl_user_event(irq, xdev); - irq++; - irq &= 0xf; - count++; - } - printk(KERN_INFO "The xocl test thread has terminated."); -#endif - return 0; -} - -int xocl_init_test_thread(struct drm_xocl_dev *xdev) -{ - int ret = 0; -#if 0 - xdev->exec.test_kthread = kthread_run(xocl_test_thread_main, (void *)xdev, "xocl-test-thread"); - DRM_DEBUG(__func__); - if (IS_ERR(xdev->exec.test_kthread)) { - DRM_ERROR(__func__); - ret = PTR_ERR(xdev->exec.test_kthread); - xdev->exec.test_kthread = NULL; - } -#endif - return ret; -} - -int xocl_fini_test_thread(struct drm_xocl_dev *xdev) -{ - int ret = 0; -#if 0 - if (!xdev->exec.test_kthread) - return 0; - ret = kthread_stop(xdev->exec.test_kthread); - ssleep(xocl_test_interval); - xdev->exec.test_kthread = NULL; - DRM_DEBUG(__func__); -#endif - return ret; -} diff --git a/sdk/linux_kernel_drivers/xocl/xocl_xdma.c b/sdk/linux_kernel_drivers/xocl/xocl_xdma.c deleted file mode 100644 index edc47f27..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_xdma.c +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Copyright (C) 2015-2018 Xilinx, Inc - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include "xocl_drv.h" -#include "xocl_xdma.h" -#include "libxdma_api.h" - -static irqreturn_t xocl_xdma_user_isr(int irq, void *arg) -{ - struct drm_xocl_dev *xdev = (struct drm_xocl_dev *)arg; - xocl_user_event(irq, xdev); - return IRQ_HANDLED; -} - -int xdma_init_glue(struct drm_xocl_dev *xdev) -{ - int ret = 0; - int user = 0; - unsigned short mask = ~0; - xdev->xdma_handle = (struct xdma_dev *) xdma_device_open(DRV_NAME, xdev->ddev->pdev, &user, - &xdev->channel, &xdev->channel); - if (xdev->xdma_handle == NULL) { - DRM_INFO("%s: XDMA Device Open failed. \n", DRV_NAME); - ret = -ENOENT; // TBD: Get the error code from XDMA API. - } - ret = xdma_user_isr_register(xdev->xdma_handle, mask, xocl_xdma_user_isr, xdev); - if (ret) - xdma_device_close(xdev->ddev->pdev, xdev->xdma_handle); - else - DRM_INFO("%s: XDMA Device Open successful. \n", DRV_NAME); - return ret; -} - -void xdma_fini_glue(struct drm_xocl_dev *xdev) -{ - unsigned short mask = ~0; - xdma_user_isr_register(xdev->xdma_handle, mask, NULL, xdev); - xdma_device_close(xdev->ddev->pdev, xdev->xdma_handle); - xdev->xdma_handle = NULL; - DRM_INFO("%s: XDMA Device Close successful. \n", DRV_NAME); -} - - -ssize_t xdma_migrate_bo(const struct drm_xocl_dev *xdev, struct sg_table *sgt, bool write, - u64 paddr, int channel) -{ - struct page *pg; - struct scatterlist *sg = sgt->sgl; - int nents = sgt->orig_nents; - pid_t pid = current->pid; - const char* dirstr = write ? "to" : "from"; - int i = 0; - ssize_t ret; - unsigned long long pgaddr; - DRM_DEBUG("%s TID %d, Channel:" - "%d, Offset: 0x%llx, Direction: %d\n", __func__, pid, channel, paddr, write ? 1 : 0); - ret = xdma_xfer_submit(xdev->xdma_handle, channel, write ? 1 : 0, paddr, sgt, false, 10000); - if (ret >= 0) - return ret; - - DRM_ERROR("DMA failed %s device addr 0x%llx, tid %d, channel %d\n", dirstr, paddr, pid, channel); - DRM_ERROR("Dumping SG Page Table\n"); - for (i = 0; i < nents; i++, sg = sg_next(sg)) { - if (!sg) - break; - pg = sg_page(sg); - if (!pg) - continue; - pgaddr = page_to_phys(pg); - DRM_ERROR("%i, 0x%llx\n", i, pgaddr); - } - return ret; -} - - -int xdma_user_interrupt_config(struct drm_xocl_dev *xdev, int user_intr_number, bool enable) -{ - const unsigned int mask = 1 << user_intr_number; - return enable ? xdma_user_isr_enable(xdev->xdma_handle, mask) : xdma_user_isr_disable(xdev->xdma_handle, mask); -} diff --git a/sdk/linux_kernel_drivers/xocl/xocl_xdma.h b/sdk/linux_kernel_drivers/xocl/xocl_xdma.h deleted file mode 100644 index 5e6d1d64..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_xdma.h +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Copyright (C) 2015-2018 Xilinx, Inc - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _XCL_XOCL_XDMA_H_ -#define _XCL_XOCL_XDMA_H_ - -#include -#include -#include -#include - -int xdma_init_glue(struct drm_xocl_dev *xdev); -void xdma_fini_glue(struct drm_xocl_dev *xdev); -ssize_t xdma_migrate_bo(const struct drm_xocl_dev *xdev, struct sg_table *sgt, bool write, - u64 paddr, int channel); -int xdma_user_interrupt_config(struct drm_xocl_dev *xdev, int user_intr_number, bool enable); -#endif - -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/linux_kernel_drivers/xocl/xocl_xvc.c b/sdk/linux_kernel_drivers/xocl/xocl_xvc.c deleted file mode 100644 index dc543c5c..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_xvc.c +++ /dev/null @@ -1,330 +0,0 @@ -/******************************************************************************* - * - * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - * - * The full GNU General Public License is included in this distribution in - * the file called "LICENSE". - * - * Karen Xie - * Sonal Santan - * - ******************************************************************************/ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xocl_xvc.h" -#include "xvc_pcie_ioctl.h" - -#define DEFAULT_XVC_BAR_OFFSET 0xC0000 // DSA 4.1 and 5.0 -#define DEFAULT_XVC_BAR 0 - -#define COMPLETION_LOOP_MAX 100 - -#define XVC_BAR_LENGTH_REG 0x0 -#define XVC_BAR_TMS_REG 0x4 -#define XVC_BAR_TDI_REG 0x8 -#define XVC_BAR_TDO_REG 0xC -#define XVC_BAR_CTRL_REG 0x10 - -#define XVC_DEV_NAME "xvc" - -static dev_t xvc_dev; -static int instance = 0; -static struct class *xvc_class = NULL; - -#ifdef __REG_DEBUG__ -/* SECTION: Function definitions */ -static inline void __write_register(const char *fn, u32 value, void *base, - unsigned int off) -{ - pr_info("%s: 0x%p, W reg 0x%lx, 0x%x.\n", fn, base, off, value); - iowrite32(value, base + off); -} - -static inline u32 __read_register(const char *fn, void *base, unsigned int off) -{ - u32 v = ioread32(base + off); - - pr_info("%s: 0x%p, R reg 0x%lx, 0x%x.\n", fn, base, off, v); - return v; -} -#define write_register(v,base,off) __write_register(__func__, v, base, off) -#define read_register(base,off) __read_register(__func__, base, off) - -#else -#define write_register(v,base,off) iowrite32(v, (base) + (off)) -#define read_register(base,off) ioread32((base) + (off)) -#endif /* #ifdef __REG_DEBUG__ */ - - -static int xvc_shift_bits(void *base, u32 tms_bits, u32 tdi_bits, - u32 *tdo_bits) -{ - u32 control; - int count; - - /* set tms bit */ - write_register(tms_bits, base, XVC_BAR_TMS_REG); - /* set tdi bits and shift data out */ - write_register(tdi_bits, base, XVC_BAR_TDI_REG); - /* enable shift operation */ - write_register(0x1, base, XVC_BAR_CTRL_REG); - - /* poll for completion */ - count = COMPLETION_LOOP_MAX; - while (count) { - /* read control reg to check shift operation completion */ - control = read_register(base, XVC_BAR_CTRL_REG); - if ((control & 0x01) == 0) - break; - - count--; - } - - if (!count) { - pr_warn("XVC bar transaction timed out (0x%0X)\n", control); - return -ETIMEDOUT; - } - - /* read tdo bits back out */ - *tdo_bits = read_register(base, XVC_BAR_TDO_REG); - - return 0; -} - -static long xvc_ioctl_helper(struct xocl_xvc *xvc, const void __user *arg) -{ - struct xil_xvc_ioc xvc_obj; - unsigned int opcode; - unsigned int total_bits; - unsigned int total_bytes; - unsigned int bits, bits_left; - unsigned char *buffer = NULL; - unsigned char *tms_buf = NULL; - unsigned char *tdi_buf = NULL; - unsigned char *tdo_buf = NULL; - void __iomem *iobase = xvc->bar + DEFAULT_XVC_BAR_OFFSET; - int rv; - - rv = copy_from_user((void *)&xvc_obj, arg, - sizeof(struct xil_xvc_ioc)); - /* anything not copied ? */ - if (rv) { - pr_info("copy_from_user xvc_obj failed: %d.\n", rv); - goto cleanup; - } - - opcode = xvc_obj.opcode; - - /* Invalid operation type, no operation performed */ - if (opcode != 0x01 && opcode != 0x02) { - pr_info("UNKNOWN opcode 0x%x.\n", opcode); - return -EINVAL; - } - - total_bits = xvc_obj.length; - total_bytes = (total_bits + 7) >> 3; - - buffer = (char *)kmalloc(total_bytes * 3, GFP_KERNEL); - if (!buffer) { - pr_info("OOM %u, op 0x%x, len %u bits, %u bytes.\n", - 3 * total_bytes, opcode, total_bits, total_bytes); - rv = -ENOMEM; - goto cleanup; - } - tms_buf = buffer; - tdi_buf = tms_buf + total_bytes; - tdo_buf = tdi_buf + total_bytes; - - rv = copy_from_user((void *)tms_buf, xvc_obj.tms_buf, total_bytes); - if (rv) { - pr_info("copy tmfs_buf failed: %d/%u.\n", rv, total_bytes); - goto cleanup; - } - rv = copy_from_user((void *)tdi_buf, xvc_obj.tdi_buf, total_bytes); - if (rv) { - pr_info("copy tdi_buf failed: %d/%u.\n", rv, total_bytes); - goto cleanup; - } - - /* set length register to 32 initially if more than one - * word-transaction is to be done */ - if (total_bits >= 32) - write_register(0x20, iobase, XVC_BAR_LENGTH_REG); - - for (bits = 0, bits_left = total_bits; bits < total_bits; bits += 32, - bits_left -= 32) { - unsigned int bytes = bits >> 3; - unsigned int shift_bytes = 4; - u32 tms_store = 0; - u32 tdi_store = 0; - u32 tdo_store = 0; - - if (bits_left < 32) { - /* set number of bits to shift out */ - write_register(bits_left, iobase, XVC_BAR_LENGTH_REG); - shift_bytes = (bits_left + 7) >> 3; - } - - memcpy(&tms_store, tms_buf + bytes, shift_bytes); - memcpy(&tdi_store, tdi_buf + bytes, shift_bytes); - - /* Shift data out and copy to output buffer */ - rv = xvc_shift_bits(iobase, tms_store, tdi_store, &tdo_store); - if (rv < 0) - goto cleanup; - - memcpy(tdo_buf + bytes, &tdo_store, shift_bytes); - } - - /* if testing bar access swap tdi and tdo bufferes to "loopback" */ - if (opcode == 0x2) { - char *tmp = tdo_buf; - - tdo_buf = tdi_buf; - tdi_buf = tmp; - } - - rv = copy_to_user((void *)xvc_obj.tdo_buf, tdo_buf, total_bytes); - if (rv) { - pr_info("copy back tdo_buf failed: %d/%u.\n", rv, total_bytes); - rv = -EFAULT; - goto cleanup; - } - -cleanup: - if (buffer) - kfree(buffer); - - mmiowb(); - - return rv; -} - -long xvc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct xocl_xvc *xvc = filp->private_data; - return xvc_ioctl_helper(xvc, (void __user *)arg); -} - -static int char_open(struct inode *inode, struct file *file) -{ - struct xocl_xvc *xvc = NULL; - - /* pointer to containing structure of the character device inode */ - xvc = container_of(inode->i_cdev, struct xocl_xvc, sys_cdev); - /* create a reference to our char device in the opened file */ - file->private_data = xvc; - return 0; -} - -/* - * Called when the device goes from used to unused. - */ -static int char_close(struct inode *inode, struct file *file) -{ - return 0; -} - - -/* - * character device file operations for the XVC - */ -static const struct file_operations xvc_fops = { - .owner = THIS_MODULE, - .open = char_open, - .release = char_close, - .unlocked_ioctl = xvc_ioctl, -}; - -int xocl_xvc_device_init(struct xocl_xvc *xvc, struct device *dev) -{ - int err; -#ifdef __XVC_BAR_NUM__ - xcdev->bar = __XVC_BAR_NUM__; -#endif -#ifdef __XVC_BAR_OFFSET__ - xcdev->base = __XVC_BAR_OFFSET__; -#else - xvc->base = XVC_BAR_OFFSET_DFLT; -#endif - pr_info("xcdev 0x%p, offset 0x%lx.\n", - xvc, xvc->base); - - cdev_init(&xvc->sys_cdev, &xvc_fops); - xvc->sys_cdev.owner = THIS_MODULE; - xvc->instance = instance++; - xvc->sys_cdev.dev = MKDEV(MAJOR(xvc_dev), xvc->instance); - err = cdev_add(&xvc->sys_cdev, xvc->sys_cdev.dev, 1); - if (err) - return err; - - xvc->sys_device = device_create(xvc_class, dev, - xvc->sys_cdev.dev, - NULL, XVC_DEV_NAME "%d", xvc->instance); - if (IS_ERR(xvc->sys_device)) { - err = PTR_ERR(xvc->sys_device); - cdev_del(&xvc->sys_cdev); - } - - if (!err) - pr_info("XVC device instance %d initialized\n", xvc->instance); - return err; -} - - -int xocl_xvc_device_fini(struct xocl_xvc *xvc) -{ - device_destroy(xvc_class, xvc->sys_cdev.dev); - cdev_del(&xvc->sys_cdev); - return 0; -} - -int xocl_xvc_chardev_init() -{ - int err = 0; - - err = alloc_chrdev_region(&xvc_dev, 0, 16, XVC_DEV_NAME); - if (err < 0) - goto err_register_chrdev; - - xvc_class = class_create(THIS_MODULE, XVC_DEV_NAME); - if (IS_ERR(xvc_class)) { - err = PTR_ERR(xvc_class); - goto err_class_create; - } - return 0; - -err_class_create: - unregister_chrdev_region(xvc_dev, 16); -err_register_chrdev: - return err; -} - -void xocl_xvc_chardev_exit() -{ - unregister_chrdev_region(xvc_dev, 16); - class_destroy(xvc_class); -} diff --git a/sdk/linux_kernel_drivers/xocl/xocl_xvc.h b/sdk/linux_kernel_drivers/xocl/xocl_xvc.h deleted file mode 100644 index 76b62c8e..00000000 --- a/sdk/linux_kernel_drivers/xocl/xocl_xvc.h +++ /dev/null @@ -1,44 +0,0 @@ -/******************************************************************************* - * - * Xilinx XDMA IP Core Linux Driver - * Copyright(c) 2015 - 2017 Xilinx, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - * - * The full GNU General Public License is included in this distribution in - * the file called "LICENSE". - * - * Karen Xie - * Sonal Santan - * - ******************************************************************************/ - -#ifndef _XCL_XOCL_XVC_DRV_H_ -#define _XCL_XOCL_XVC_DRV_H_ - -#define XVC_BAR_OFFSET_DFLT 0x40000 - -struct xocl_xvc { - unsigned long base; /* bar access offset */ - unsigned int instance; - struct cdev sys_cdev; - struct device *sys_device; - void *__iomem bar; -}; - -int xocl_xvc_chardev_init(void); -void xocl_xvc_chardev_exit(void); -int xocl_xvc_device_init(struct xocl_xvc *xvc, struct device *dev); -int xocl_xvc_device_fini(struct xocl_xvc *xvc); - -#endif diff --git a/sdk/linux_kernel_drivers/xocl/xvc_pcie_ioctl.h b/sdk/linux_kernel_drivers/xocl/xvc_pcie_ioctl.h deleted file mode 100644 index c81d813b..00000000 --- a/sdk/linux_kernel_drivers/xocl/xvc_pcie_ioctl.h +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Copyright (C) 2017-2018 Xilinx, Inc - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _XIL_XVC_IOCALLS_POSIX_H_ -#define _XIL_XVC_IOCALLS_POSIX_H_ - -#ifndef _WINDOWS -// TODO: Windows build support -#include -#endif - -#define XIL_XVC_MAGIC 0x58564344 // "XVCD" - -struct xil_xvc_ioc { - unsigned opcode; - unsigned length; - unsigned char *tms_buf; - unsigned char *tdi_buf; - unsigned char *tdo_buf; -}; - -#define XDMA_IOCXVC _IOWR(XIL_XVC_MAGIC, 1, struct xil_xvc_ioc) - -#endif // _XIL_XVC_IOCALLS_POSIX_H_ -// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/tests/test_fpga_tools.py b/sdk/tests/test_fpga_tools.py index def4a6ca..668748e5 100644 --- a/sdk/tests/test_fpga_tools.py +++ b/sdk/tests/test_fpga_tools.py @@ -26,6 +26,7 @@ import time import traceback import ctypes +import multiprocessing.dummy try: import aws_fpga_test_utils from aws_fpga_test_utils.AwsFpgaTestBase import AwsFpgaTestBase @@ -41,18 +42,22 @@ class TestFpgaTools(BaseSdkTools): ''' Pytest test class. - + NOTE: Cannot have an __init__ method. - + Test FPGA AFI Management tools described in ../userspace/fpga_mgmt_tools/README.md ''' + @pytest.mark.flaky(reruns=2, reruns_delay=5) def test_describe_local_image_slots(self): for slot in range(self.num_slots): self.fpga_clear_local_image(slot) logger.info("PCI devices:\n{}".format("\n".join(self.list_pci_devices()))) + logger.info("verify that the slots are in order") + assert self.slot2device.values() == sorted(self.slot2device.values()) + (rc, stdout, stderr) = self.run_cmd("sudo fpga-describe-local-image-slots", echo=True) assert len(stdout) == self.num_slots + 1 assert len(stderr) == 1 @@ -84,6 +89,7 @@ def test_describe_local_image_slots(self): assert stdout[slot * 3 + 1] == 'AFIDEVICE {} 0x1d0f 0x1042 {}'.format(slot, self.slot2device[slot]), "slot={}\n{}".format(slot, "\n".join(stdout)) assert stdout[slot * 3 + 2] == 'AFIDEVICE {} 0x1d0f 0x1041 {}'.format(slot, self.slot2mbox_device[slot]), "slot={}\n{}".format(slot, "\n".join(stdout)) + @pytest.mark.flaky(reruns=2, reruns_delay=5) def test_describe_local_image(self): for slot in range(self.num_slots): self.fpga_clear_local_image(slot) @@ -104,24 +110,26 @@ def test_describe_local_image(self): # Test -M (Return FPGA image hardware metrics.) (rc, stdout, stderr) = self.run_cmd("sudo fpga-describe-local-image -M -S {}".format(slot), echo=True) - assert len(stdout) == 57 + assert len(stdout) == 59 assert len(stderr) == 1 assert stdout[0] == 'AFI {} none cleared 1 ok 0 {}'.format(slot, self.shell_version) assert stdout[1] == 'AFIDEVICE {} 0x1d0f 0x1042 {}'.format(slot, self.slot2device[slot]) assert stdout[2] == 'sdacl-slave-timeout=0' - assert stdout[50] == 'Clock Group C Frequency (Mhz)' - assert stdout[51] == '0 0 ' + assert stdout[51] == 'Clock Group C Frequency (Mhz)' + assert stdout[52] == '0 0 ' + assert stdout[-2].startswith('Cached agfis:') # Test -C (Return FPGA image hardware metrics (clear on read).) - (rc, stdout, stderr) = self.run_cmd("sudo fpga-describe-local-image -M -S {}".format(slot), echo=True) - assert len(stdout) == 57 + (rc, stdout, stderr) = self.run_cmd("sudo fpga-describe-local-image -C -M -S {}".format(slot), echo=True) + assert len(stdout) == 59 assert len(stderr) == 1 assert stdout[0] == 'AFI {} none cleared 1 ok 0 {}'.format(slot, self.shell_version) assert stdout[1] == 'AFIDEVICE {} 0x1d0f 0x1042 {}'.format(slot, self.slot2device[slot]) assert stdout[2] == 'sdacl-slave-timeout=0' - assert stdout[50] == 'Clock Group C Frequency (Mhz)' - assert stdout[51] == '0 0 ' + assert stdout[51] == 'Clock Group C Frequency (Mhz)' + assert stdout[52] == '0 0 ' + @pytest.mark.flaky(reruns=2, reruns_delay=5) def test_load_local_image(self): for slot in range(self.num_slots): (rc, stdout, stderr) = self.run_cmd("sudo fpga-load-local-image --request-timeout {} -S {} -I {}".format(self.DEFAULT_REQUEST_TIMEOUT, slot, self.cl_hello_world_agfi), echo=True) @@ -168,6 +176,7 @@ def test_load_local_image(self): assert stdout[1] == 'AFIDEVICE {} 0x1d0f 0xf000 {}'.format(slot, self.slot2device[slot]) self.fpga_clear_local_image(slot) + @pytest.mark.flaky(reruns=2, reruns_delay=5) def test_clear_local_image(self): for slot in range(self.num_slots): # Test clearing already cleared @@ -204,6 +213,14 @@ def test_clear_local_image(self): assert stdout[1] == 'AFIDEVICE {} 0x1d0f 0x1042 {}'.format(slot, self.slot2device[slot]) break + def test_afi_caching(self): + for slot in range(self.num_slots): + self.fpga_clear_local_image(slot) + (rc, stdout, stderr) = self.run_cmd("sudo fpga-load-local-image --request-timeout {} -S {} -I {} -P".format(self.DEFAULT_REQUEST_TIMEOUT, slot, self.cl_dram_dma_agfi), echo=True) + assert rc == 0 + (rc, stdout, stderr) = self.run_cmd("sudo fpga-describe-local-image -M -S {}".format(slot), echo=True) + assert re.match(self.cl_dram_dma_agfi, stdout[-2].strip()) + @pytest.mark.skip(reason="No way to test right now.") def test_start_virtual_jtag(self): assert False @@ -217,6 +234,7 @@ def test_start_virtual_jtag(self): assert stdout[0] == 'AFI {} none cleared 1 ok 0 {}'.format(slot, self.shell_version) assert stdout[1] == 'AFIDEVICE {} 0x1d0f 0x1042 {}'.format(self.slot2device[slot]) + @pytest.mark.flaky(reruns=2, reruns_delay=5) def test_get_virtual_led(self): # This is tested in the cl_hello_world example for slot in range(self.num_slots): @@ -228,6 +246,7 @@ def test_get_virtual_led(self): assert stdout[0] == 'FPGA slot id {} have the following Virtual LED:'.format(slot) assert re.match('[01]{4}-[01]{4}-[01]{4}-[01]{4}', stdout[1]) + @pytest.mark.flaky(reruns=2, reruns_delay=5) def test_virtual_dip_switch(self): for slot in range(self.num_slots): # Start it on an empty slot @@ -246,3 +265,17 @@ def test_virtual_dip_switch(self): assert len(stderr) == 1 assert stdout[0] == 'FPGA slot id {} has the following Virtual DIP Switches:'.format(slot) assert stdout[1] == '1111-1111-1111-1111' + + # Add extra delay in case we have a lot of slot loads + @pytest.mark.flaky(reruns=2, reruns_delay=10) + def test_parallel_slot_loads(self): + def run_slot(slot): + for afi in [self.cl_dram_dma_agfi, self.cl_hello_world_agfi, self.cl_dram_dma_agfi]: + (rc, stdout, stderr) = self.run_cmd("sudo fpga-load-local-image -HS{} -I {}".format(slot, afi)) + assert rc == 0 + logger.info(stdout) + + + slots = range(self.num_slots) + pool = multiprocessing.dummy.Pool(len(slots)) + pool.map(run_slot, slots) diff --git a/sdk/tests/test_non_root_access.py b/sdk/tests/test_non_root_access.py index 09482b0b..df3303f7 100755 --- a/sdk/tests/test_non_root_access.py +++ b/sdk/tests/test_non_root_access.py @@ -78,6 +78,6 @@ def test_hello_world_as_non_root_user(self): for slot in range(AwsFpgaTestBase.num_slots): (rc, out, err) = self.run_cmd("bash -x {}/sdk/tests/non_root_log_into_group.sh {}".format(os.environ['WORKSPACE'], slot)) logger.info("{}\n{}".format(out, err)) - assert rc == 0 + assert rc == 0 AwsFpgaTestBase.fpga_set_virtual_dip_switch("1111111111111111", slot, as_root=False) - assert AwsFpgaTestBase.fpga_get_virtual_led(slot, as_root=False) == "1010-1101-1101-1110" + assert AwsFpgaTestBase.fpga_get_virtual_led(slot, as_root=False) == "1010-1101-1101-1110" diff --git a/sdk/tests/test_sdk_scripts.py b/sdk/tests/test_sdk_scripts.py index c6964d87..2d5738cf 100644 --- a/sdk/tests/test_sdk_scripts.py +++ b/sdk/tests/test_sdk_scripts.py @@ -54,8 +54,5 @@ def test_sdk_setup(self): logger.info(self) assert False - def test_fio_tools_setup_python27(self): - self.setup_fio_tools(python_version=2.7) - - def test_fio_tools_setup_python34(self): - self.setup_fio_tools(python_version=3.4) + def test_fio_tools_setup(self): + self.setup_fio_tools() diff --git a/sdk/userspace/fpga_libs/fpga_dma/fpga_dma_utils.c b/sdk/userspace/fpga_libs/fpga_dma/fpga_dma_utils.c index ca3de9a8..75c6a7d8 100644 --- a/sdk/userspace/fpga_libs/fpga_dma/fpga_dma_utils.c +++ b/sdk/userspace/fpga_libs/fpga_dma/fpga_dma_utils.c @@ -221,7 +221,7 @@ int fpga_pci_get_dma_device_num(enum fpga_dma_driver which_driver, * this function, which always reads from the same directory. The man page * for readdir says the POSIX spec does not require threadsafety. */ - pthread_mutex_lock(&fpga_pci_readdir_mutex); + fpga_acquire_readdir_lock(); #endif while (true) { @@ -263,7 +263,7 @@ int fpga_pci_get_dma_device_num(enum fpga_dma_driver which_driver, /* continue... */ } #if !defined(FPGA_PCI_USE_READDIR_R) - pthread_mutex_unlock(&fpga_pci_readdir_mutex); + fpga_release_readdir_lock(); #endif fail_on_with_code(_device_num == -1, err, rc, FPGA_ERR_PCI_MISSING, "Unable to find device num"); @@ -275,7 +275,7 @@ int fpga_pci_get_dma_device_num(enum fpga_dma_driver which_driver, err_unlock: #if !defined(FPGA_PCI_USE_READDIR_R) - pthread_mutex_unlock(&fpga_pci_readdir_mutex); + fpga_release_readdir_lock(); #endif err: diff --git a/sdk/userspace/fpga_libs/fpga_mgmt/afi_cmd_api.h b/sdk/userspace/fpga_libs/fpga_mgmt/afi_cmd_api.h index dc0f0508..262b7eff 100644 --- a/sdk/userspace/fpga_libs/fpga_mgmt/afi_cmd_api.h +++ b/sdk/userspace/fpga_libs/fpga_mgmt/afi_cmd_api.h @@ -23,7 +23,7 @@ #include -#define AFI_CMD_DATA_LEN 512 +#define AFI_CMD_DATA_LEN 4096 #define AFI_CMD_API_MAJOR_VERSION 0x2 diff --git a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt.c b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt.c index 530b1a3d..5eba76c1 100644 --- a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt.c +++ b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt.c @@ -24,8 +24,8 @@ #include "fpga_mgmt_internal.h" /** Synchronous API (load/clear) default timeout and delay msecs */ -#define FPGA_MGMT_SYNC_TIMEOUT 3000 -#define FPGA_MGMT_SYNC_DELAY_MSEC 20 +#define FPGA_MGMT_SYNC_TIMEOUT 30000 +#define FPGA_MGMT_SYNC_DELAY_MSEC 2 struct fgpa_mgmt_state_s fpga_mgmt_state = { .timeout = FPGA_MGMT_TIMEOUT_DFLT, @@ -37,11 +37,21 @@ int fpga_mgmt_init(void) for (unsigned int i = 0; i < sizeof_array(fpga_mgmt_state.slots); ++i) { fpga_mgmt_state.slots[i].handle = PCI_BAR_HANDLE_INIT; } + fpga_mgmt_state.initialized = true; return fpga_pci_init(); } int fpga_mgmt_close(void) { + if (!fpga_mgmt_state.initialized) { + return FPGA_ERR_OK; + } + fpga_mgmt_state.initialized = false; + for (unsigned int i = 0; i < sizeof_array(fpga_mgmt_state.slots); ++i) { + if (fpga_mgmt_state.slots[i].handle != PCI_BAR_HANDLE_INIT) { + fpga_mgmt_mbox_detach(i); + } + } return FPGA_ERR_OK; } @@ -75,14 +85,10 @@ int fpga_mgmt_get_sh_version(int slot_id, uint32_t *sh_version) *sh_version = ver.sh_version; err: - if (handle != PCI_BAR_HANDLE_INIT) { - fpga_mgmt_mbox_detach(slot_id); - } - return ret; } -int fpga_mgmt_describe_local_image(int slot_id, +static int fpga_mgmt_describe_cmd(int slot_id, struct fpga_mgmt_image_info *info, uint32_t flags) { int ret; @@ -125,9 +131,6 @@ int fpga_mgmt_describe_local_image(int slot_id, fail_on(ret, out, "fpga_mgmt_get_sh_version failed"); info->sh_version = sh_version; - ret = fpga_pci_get_slot_spec(slot_id, &info->spec); - fail_on(ret, out, "fpga_pci_get_slot_spec failed"); - /* copy the metrics into the out param */ info->metrics = metrics->fmc; @@ -135,11 +138,34 @@ int fpga_mgmt_describe_local_image(int slot_id, return ret; } +int fpga_mgmt_describe_local_image(int slot_id, + struct fpga_mgmt_image_info *info, uint32_t flags) +{ + int ret; + + fail_on_with_code(!fpga_mgmt_state.initialized, out, ret, + FPGA_ERR_SOFTWARE_PROBLEM, + "fpga_mgmt_init must be called before the library can be used"); + + ret = fpga_mgmt_describe_cmd(slot_id, info, flags); + fail_on(ret, out, "fpga_mgmt_describe_cmd"); + + ret = fpga_pci_get_slot_spec(slot_id, &info->spec); + fail_on(ret, out, "fpga_pci_get_slot_spec failed"); + +out: + return ret; +} + int fpga_mgmt_get_status(int slot_id, int *status, int *status_q) { int ret; struct fpga_mgmt_image_info info; + fail_on_with_code(!fpga_mgmt_state.initialized, out, ret, + FPGA_ERR_SOFTWARE_PROBLEM, + "fpga_mgmt_init must be called before the library can be used"); + fail_slot_id(slot_id, out, ret); if (!status) { @@ -268,6 +294,10 @@ int fpga_mgmt_clear_local_image(int slot_id) union afi_cmd cmd; union afi_cmd rsp; + fail_on_with_code(!fpga_mgmt_state.initialized, out, ret, + FPGA_ERR_SOFTWARE_PROBLEM, + "fpga_mgmt_init must be called before the library can be used"); + fail_slot_id(slot_id, out, ret); memset(&cmd, 0, sizeof(union afi_cmd)); @@ -298,12 +328,18 @@ int fpga_mgmt_clear_local_image_sync(int slot_id, int status; int ret; + fail_on_with_code(!fpga_mgmt_state.initialized, out, ret, + FPGA_ERR_SOFTWARE_PROBLEM, + "fpga_mgmt_init must be called before the library can be used"); + /** Allow timeout adjustments that are greater than the defaults */ uint32_t timeout_tmp = (timeout > FPGA_MGMT_SYNC_TIMEOUT) ? timeout : FPGA_MGMT_SYNC_TIMEOUT; uint32_t delay_msec_tmp = (delay_msec > FPGA_MGMT_SYNC_DELAY_MSEC) ? delay_msec : FPGA_MGMT_SYNC_DELAY_MSEC; + fail_on_with_code(slot_id >= FPGA_SLOT_MAX, out, ret, -EINVAL, "invalid slot"); + memset(&tmp_info, 0, sizeof(tmp_info)); /** @@ -322,7 +358,7 @@ int fpga_mgmt_clear_local_image_sync(int slot_id, /** Wait until the status is "cleared" or timeout */ while (!done) { - ret = fpga_mgmt_describe_local_image(slot_id, &tmp_info, 0); /** flags==0 */ + ret = fpga_mgmt_describe_cmd(slot_id, &tmp_info, 0); /** flags==0 */ status = (ret == 0) ? tmp_info.status : FPGA_STATUS_END; if (status == FPGA_STATUS_CLEARED) { @@ -373,6 +409,10 @@ int fpga_mgmt_clear_local_image_sync(int slot_id, fail_on(ret, out, "fpga_pci_rescan_slot_app_pfs failed"); } + /* now fill in the slot spec information after the rescan */ + ret = fpga_pci_get_slot_spec(slot_id, &tmp_info.spec); + fail_on(ret, out, "fpga_pci_get_slot_spec failed"); + if (info) { *info = tmp_info; } @@ -408,13 +448,17 @@ int fpga_mgmt_load_local_image_with_options(union fpga_mgmt_load_local_image_opt union afi_cmd cmd; union afi_cmd rsp; + fail_on_with_code(!fpga_mgmt_state.initialized, out, ret, + FPGA_ERR_SOFTWARE_PROBLEM, + "fpga_mgmt_init must be called before the library can be used"); + fail_slot_id(opt->slot_id, out, ret); memset(&cmd, 0, sizeof(union afi_cmd)); memset(&rsp, 0, sizeof(union afi_cmd)); /* mask off any unsupported flags */ - opt->flags &= FPGA_CMD_DRAM_DATA_RETENTION | FPGA_CMD_FORCE_SHELL_RELOAD; + opt->flags &= FPGA_CMD_ALL_FLAGS; /* initialize the command structure */ fpga_mgmt_cmd_init_load(&cmd, &len, opt); @@ -463,12 +507,18 @@ int fpga_mgmt_load_local_image_sync_with_options(union fpga_mgmt_load_local_imag int status; int ret; + fail_on_with_code(!fpga_mgmt_state.initialized, out, ret, + FPGA_ERR_SOFTWARE_PROBLEM, + "fpga_mgmt_init must be called before the library can be used"); + /** Allow timeout adjustments that are greater than the defaults */ uint32_t timeout_tmp = (timeout > FPGA_MGMT_SYNC_TIMEOUT) ? timeout : FPGA_MGMT_SYNC_TIMEOUT; uint32_t delay_msec_tmp = (delay_msec > FPGA_MGMT_SYNC_DELAY_MSEC) ? delay_msec : FPGA_MGMT_SYNC_DELAY_MSEC; + fail_on_with_code(opt->slot_id >= FPGA_SLOT_MAX, out, ret, -EINVAL, "invalid slot"); + memset(&tmp_info, 0, sizeof(tmp_info)); /** @@ -487,7 +537,7 @@ int fpga_mgmt_load_local_image_sync_with_options(union fpga_mgmt_load_local_imag /** Wait until the status is "loaded" or timeout */ while (!done) { - ret = fpga_mgmt_describe_local_image(opt->slot_id, &tmp_info, 0); /** flags==0 */ + ret = fpga_mgmt_describe_cmd(opt->slot_id, &tmp_info, 0); /** flags==0 */ status = (ret == 0) ? tmp_info.status : FPGA_STATUS_END; if (status == FPGA_STATUS_LOADED) { @@ -543,6 +593,10 @@ int fpga_mgmt_load_local_image_sync_with_options(union fpga_mgmt_load_local_imag fail_on(ret, out, "fpga_pci_rescan_slot_app_pfs failed"); } + /* now fill in the slot spec information after the rescan */ + ret = fpga_pci_get_slot_spec(opt->slot_id, &tmp_info.spec); + fail_on(ret, out, "fpga_pci_get_slot_spec failed"); + if (info) { *info = tmp_info; } diff --git a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_cmd.c b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_cmd.c index 54e9b175..d5b59bad 100644 --- a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_cmd.c +++ b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_cmd.c @@ -177,7 +177,7 @@ fpga_mgmt_cmd_init_metrics(union afi_cmd *cmd, uint32_t *len, uint32_t flags) afi_cmd_hdr_set_flags(cmd, 0); /** Fill in cmd body; only allow specific flags to be set */ - req->fpga_cmd_flags = flags & + req->fpga_cmd_flags = FPGA_CMD_EXTENDED_METRICS_SIZE | flags & (FPGA_CMD_GET_HW_METRICS | FPGA_CMD_CLEAR_HW_METRICS); *len = sizeof(struct afi_cmd_hdr) + payload_len; @@ -249,22 +249,26 @@ fpga_mgmt_mbox_attach(int slot_id) int ret; pci_bar_handle_t handle; - ret = fpga_pci_attach(slot_id, - FPGA_MGMT_PF, - F1_MBOX_RESOURCE_NUM, - 0 /* flags */, - &handle); - fail_on(ret != 0, err, "Unable to attach to mbox bar"); - - fpga_mgmt_state.slots[slot_id].handle = handle; - - struct fpga_hal_mbox mbox = { - .timeout = fpga_mgmt_state.timeout, - .delay_msec = fpga_mgmt_state.delay_msec, - }; - - ret = fpga_hal_mbox_init(&mbox); - fail_on(ret != 0, err, "fpga_hal_mbox_init failed"); + if (fpga_mgmt_state.slots[slot_id].handle == PCI_BAR_HANDLE_INIT) { + ret = fpga_pci_attach(slot_id, + FPGA_MGMT_PF, + F1_MBOX_RESOURCE_NUM, + 0 /* flags */, + &handle); + fail_on(ret != 0, err, "Unable to attach to mbox bar"); + + fpga_mgmt_state.slots[slot_id].handle = handle; + + struct fpga_hal_mbox mbox = { + .timeout = fpga_mgmt_state.timeout, + .delay_msec = fpga_mgmt_state.delay_msec, + }; + + ret = fpga_hal_mbox_init(&mbox); + fail_on(ret != 0, err, "fpga_hal_mbox_init failed"); + } else { + handle = fpga_mgmt_state.slots[slot_id].handle; + } ret = fpga_hal_mbox_attach(handle, true); /**< clear_state=true */ fail_on(ret != 0, err, "fpga_hal_mbox_attach failed"); @@ -449,7 +453,6 @@ int fpga_mgmt_process_cmd(int slot_id, const union afi_cmd *cmd, union afi_cmd *rsp, uint32_t *len) { - bool attached = false; int ret; fail_slot_id(slot_id, err, ret); @@ -457,14 +460,8 @@ fpga_mgmt_process_cmd(int slot_id, ret = fpga_mgmt_mbox_attach(slot_id); fail_on(ret, err, "fpga_mgmt_mbox_attach failed"); - attached = true; - ret = fpga_mgmt_send_cmd(slot_id, cmd, rsp, len); fail_on(ret, err, "fpga_mgmt_send_cmd failed"); err: - if (attached) { - fpga_mgmt_mbox_detach(slot_id); - } - return ret; } diff --git a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_internal.h b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_internal.h index a0713966..0469891c 100644 --- a/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_internal.h +++ b/sdk/userspace/fpga_libs/fpga_mgmt/fpga_mgmt_internal.h @@ -13,12 +13,20 @@ * permissions and limitations under the License. */ +#pragma once + +#include + +#include "afi_cmd_api.h" +#include "hal/fpga_common.h" +#include "fpga_pci.h" + /** * Default timeout: * CLI_TIMEOUT_DFLT * CLI_DELAY_MSEC_DFLT */ -#define FPGA_MGMT_TIMEOUT_DFLT 250 -#define FPGA_MGMT_DELAY_MSEC_DFLT 20 +#define FPGA_MGMT_TIMEOUT_DFLT 2500 +#define FPGA_MGMT_DELAY_MSEC_DFLT 2 /** First flag bit, @see afi_cmd_hdr#len_flags */ #define AFI_CMD_HDR_FLAGS_SHIFT 24 @@ -40,6 +48,7 @@ extern struct fgpa_mgmt_state_s { } slots[FPGA_SLOT_MAX]; uint32_t timeout; uint32_t delay_msec; + bool initialized; } fpga_mgmt_state; // FIXME diff --git a/sdk/userspace/fpga_libs/fpga_pci/fpga_pci_sysfs.c b/sdk/userspace/fpga_libs/fpga_pci/fpga_pci_sysfs.c index 95808c3a..0e3f32a6 100644 --- a/sdk/userspace/fpga_libs/fpga_pci/fpga_pci_sysfs.c +++ b/sdk/userspace/fpga_libs/fpga_pci/fpga_pci_sysfs.c @@ -30,10 +30,16 @@ #include static int fpga_pci_rescan(void); -static int fpga_pci_check_app_pf(struct fpga_pci_resource_map *app_map, +static int fpga_pci_check_app_pf(struct fpga_pci_resource_map *app_map, bool exists); static int fpga_pci_check_app_pf_sysfs(char *dir_name, bool exists); +#if !defined(_BSD_SOURCE) && !defined(_SVID_SOURCE) +pthread_mutex_t fpga_pci_readdir_mutex = PTHREAD_MUTEX_INITIALIZER; +#else +#define FPGA_PCI_USE_READDIR_R +#endif + /** * Return the ID from the given sysfs file (e.g. Vendor ID, Device ID). * @@ -143,7 +149,7 @@ fpga_pci_get_dbdf(char *dir_name, struct fpga_pci_resource_map *map) * @param[in] dir_name the PCI device directory name * @param[in] resource_num the resource number * @param[in,out] resource_size the returned resource size - * @param[in,out] burstable the returned resource burstable flag + * @param[in,out] burstable the returned resource burstable flag * * @returns * 0 on success @@ -206,7 +212,7 @@ fpga_pci_get_pci_resource_info(char *dir_name, * Return the PCI resources for the given sysfs directory name. * * @param[in] dir_name the PCI device directory name - * @param[in,out] map the PCI resource map + * @param[in,out] map the PCI resource map * * @returns * 0 on success @@ -247,7 +253,7 @@ fpga_pci_get_resources(char *dir_name, struct fpga_pci_resource_map *map) * Return the PCI resource map identifiers for the given sysfs directory name. * * @param[in] dir_name the PCI device directory name - * @param[in,out] map the PCI resource map + * @param[in,out] map the PCI resource map * * @returns * 0 on success @@ -345,7 +351,7 @@ fpga_pci_get_resource_map_ids(char *dir_name, struct fpga_pci_resource_map *map) * -1 on failure */ static int -fpga_pci_mbox2app(struct fpga_pci_resource_map *mbox_map, +fpga_pci_mbox2app(struct fpga_pci_resource_map *mbox_map, struct fpga_pci_resource_map *app_map, char *app_dir_name, size_t app_dir_name_size) { @@ -356,7 +362,7 @@ fpga_pci_mbox2app(struct fpga_pci_resource_map *mbox_map, /** Construct the app dir name based on the mbox_map */ ret = snprintf(app_dir_name, app_dir_name_size, PCI_DEV_FMT, - mbox_map->domain, mbox_map->bus, + mbox_map->domain, mbox_map->bus, F1_MBOX_DEV2APP_DEV(mbox_map->dev), mbox_map->func); fail_on_with_code(ret < 0, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, @@ -364,28 +370,28 @@ fpga_pci_mbox2app(struct fpga_pci_resource_map *mbox_map, fail_on_with_code((size_t) ret >= app_dir_name_size, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "app_dir_name too long"); - /** - * Check that the app_pf exists. If not found, make a minimal attempt to + /** + * Check that the app_pf exists. If not found, make a minimal attempt to * recover it. */ ret = fpga_pci_check_app_pf_sysfs(app_dir_name, true); /** exists==true*/ fail_on(ret != 0, err, "fpga_pci_check_app_pf_sysfs failed"); /** - * Fill in the app_map for the given app_dir_name. If the app_dir_name is - * not yet ready (e.g. sysfs files are in the process of being recreated + * Fill in the app_map for the given app_dir_name. If the app_dir_name is + * not yet ready (e.g. sysfs files are in the process of being recreated * due to a remove/rescan) make a minimal retry attempt. */ bool done = false; uint32_t retries = 0; while (!done) { - ret = fpga_pci_get_resource_map_ids(app_dir_name, app_map); + ret = fpga_pci_get_resource_map_ids(app_dir_name, app_map); if (ret == 0) { done = true; - } else { + } else { fail_on_with_code(retries >= F1_CHECK_APP_PF_MAX_RETRIES, err, ret, FPGA_ERR_UNRESPONSIVE, - "fpga_pci_get_resource_map_ids failed for app_dir_name=%s", + "fpga_pci_get_resource_map_ids failed for app_dir_name=%s", app_dir_name); msleep(F1_CHECK_APP_PF_DELAY_MSEC); retries++; @@ -397,12 +403,59 @@ fpga_pci_mbox2app(struct fpga_pci_resource_map *mbox_map, return ret; } +int fpga_acquire_readdir_lock() { #if !defined(FPGA_PCI_USE_READDIR_R) -pthread_mutex_t fpga_pci_readdir_mutex = PTHREAD_MUTEX_INITIALIZER; + return pthread_mutex_lock(&fpga_pci_readdir_mutex); +#else + return 0; #endif +} -int -fpga_pci_get_all_slot_specs(struct fpga_slot_spec spec_array[], int size) +int fpga_release_readdir_lock() { +#if !defined(FPGA_PCI_USE_READDIR_R) + return pthread_mutex_unlock(&fpga_pci_readdir_mutex); +#else + return 0; +#endif +} + +static inline bool fpga_slot_spec_is_initialized(struct fpga_slot_spec *spec) +{ + struct fpga_pci_resource_map *map = &spec->map[FPGA_MGMT_PF]; + return !(map->domain == 0 && map->bus == 0 && + map->dev == 0 && map->func == 0); +} + + +static int +fpga_pci_slot_spec_compare(const void *a, const void *b) +{ + struct fpga_slot_spec *spec_a = (*((struct fpga_slot_spec **)a)); + struct fpga_slot_spec *spec_b = (*((struct fpga_slot_spec **)b)); + + int test; + + /* make sure than uninitialized entries fall to the bottom of the list */ + bool a_initialized = fpga_slot_spec_is_initialized(spec_a); + bool b_initialized = fpga_slot_spec_is_initialized(spec_b); + if (a_initialized != b_initialized) { + return (a_initialized) ? -1 : 1; + } + + test = spec_a->map[FPGA_MGMT_PF].domain - spec_b->map[FPGA_MGMT_PF].domain; + if (test != 0) return test; + + test = spec_a->map[FPGA_MGMT_PF].bus - spec_b->map[FPGA_MGMT_PF].bus; + if (test != 0) return test; + + test = spec_a->map[FPGA_MGMT_PF].dev - spec_b->map[FPGA_MGMT_PF].dev; + if (test != 0) return test; + + return spec_a->map[FPGA_MGMT_PF].func - spec_b->map[FPGA_MGMT_PF].func; +} + +static int +fpga_pci_mbox_scan(struct fpga_slot_spec spec_array_out[], int size) { int ret; bool found_afi_slot = false; @@ -416,29 +469,30 @@ fpga_pci_get_all_slot_specs(struct fpga_slot_spec spec_array[], int size) entry = &entry_stack; memset(entry, 0, sizeof(struct dirent)); #else - /** + /* * Protect calls to readdir with a mutex because multiple threads may call * this function, which always reads from the same directory. The man page * for readdir says the POSIX spec does not require threadsafety. */ - pthread_mutex_lock(&fpga_pci_readdir_mutex); + fpga_acquire_readdir_lock(); #endif - int slot_dev_index = 0; - struct fpga_slot_spec search_spec; - struct fpga_pci_resource_map search_map, app_map; - char app_dir_name[NAME_MAX + 1]; + unsigned int slot_dev_index = 0; + struct fpga_pci_resource_map search_map; - memset(&search_spec, 0, sizeof(struct fpga_slot_spec)); + /* allocate space for sorting the spec_array */ + struct fpga_slot_spec *spec_array[FPGA_SLOT_MAX]; + struct fpga_slot_spec spec_array_storage[FPGA_SLOT_MAX]; + memset(spec_array_storage, 0, sizeof(spec_array_storage)); + for (int i = 0; i < FPGA_SLOT_MAX; ++i) { + spec_array[i] = &spec_array_storage[i]; + } - /** - * Loop through the sysfs device directories + /* + * Loop through the sysfs device directories * -we first find the mbox dev then handle the app dev as a fixed - * mapping based off of the mbox dev's pci resource map + * mapping based off of the mbox dev's pci resource map * (see fpga_pci_mbox2app). - * -this approach is simple and more efficient than the - * alternative of requiring an additional sort of the dirent entries by - * the PCI device number (DBDF). */ while (true) { @@ -464,47 +518,42 @@ fpga_pci_get_all_slot_specs(struct fpga_slot_spec spec_array[], int size) continue; } - if (search_map.vendor_id == F1_MBOX_VENDOR_ID && + if (search_map.vendor_id == F1_MBOX_VENDOR_ID && search_map.device_id == F1_MBOX_DEVICE_ID) { /* mbox resources */ ret = fpga_pci_get_resources(entry->d_name, &search_map); fail_on(ret != 0, err_unlock, "Error retrieving resource information"); - /* app resources */ - memset(&app_map, 0, sizeof(struct fpga_pci_resource_map)); - app_dir_name[0] = 0; - ret = fpga_pci_mbox2app(&search_map, &app_map, - app_dir_name, sizeof(app_dir_name)); - fail_on(ret != 0, err_unlock, "Error retrieving app pf information"); - - ret = fpga_pci_get_resources(app_dir_name, &app_map); - fail_on(ret != 0, err_unlock, "Error retrieving resource information"); - /* copy the results into the spec_array */ - spec_array[slot_dev_index].map[FPGA_APP_PF] = app_map; - spec_array[slot_dev_index].map[FPGA_MGMT_PF] = search_map; - + spec_array[slot_dev_index]->map[FPGA_MGMT_PF] = search_map; found_afi_slot = true; slot_dev_index += 1; - if (slot_dev_index >= size) { + if (slot_dev_index >= sizeof_array(spec_array)) { break; } } } #if !defined(FPGA_PCI_USE_READDIR_R) - pthread_mutex_unlock(&fpga_pci_readdir_mutex); + fpga_release_readdir_lock(); #endif fail_on_with_code(!found_afi_slot, err, ret, FPGA_ERR_PCI_MISSING, "No fpga-image-slots found"); closedir(dirp); + /* sort the spec_array and copy it into the out parameter */ + qsort(spec_array, sizeof_array(spec_array), sizeof(spec_array[0]), + fpga_pci_slot_spec_compare); + for (unsigned int i = 0; i < min((unsigned) size, sizeof_array(spec_array)); ++i) { + spec_array_out[i] = *spec_array[i]; + } + errno = 0; return 0; err_unlock: #if !defined(FPGA_PCI_USE_READDIR_R) - pthread_mutex_unlock(&fpga_pci_readdir_mutex); + fpga_release_readdir_lock(); #endif err: @@ -515,6 +564,56 @@ fpga_pci_get_all_slot_specs(struct fpga_slot_spec spec_array[], int size) return ret; } +/** + * Fill in the application PF information in a slot spec which already + * has the mailbox PF initialized. + */ +static int +fpga_pci_complete_slot_spec(struct fpga_slot_spec *spec) +{ + int ret; + char app_dir_name[NAME_MAX + 1]; + struct fpga_pci_resource_map app_map; + + /* fill in app resources */ + memset(&app_map, 0, sizeof(struct fpga_pci_resource_map)); + app_dir_name[0] = 0; + ret = fpga_pci_mbox2app(&spec->map[FPGA_MGMT_PF], &app_map, + app_dir_name, sizeof(app_dir_name)); + fail_on(ret != 0, out, "Error retrieving app pf information"); + + ret = fpga_pci_get_resources(app_dir_name, &app_map); + fail_on(ret != 0, out, "Error retrieving resource information"); + + /* copy the results into the spec_array */ + spec->map[FPGA_APP_PF] = app_map; + +out: + return ret; +} + +int +fpga_pci_get_all_slot_specs(struct fpga_slot_spec spec_array[], int size) +{ + int rc; + + rc = fpga_pci_mbox_scan(spec_array, size); + fail_on(rc, out, "failed to enumerate FPGA slots"); + + for (int i = 0; i < size; ++i) { + /* after encountering the first empty slot, stop iterating */ + if (!fpga_slot_spec_is_initialized(&spec_array[i])) { + break; + } + /* fill in app resources */ + rc = fpga_pci_complete_slot_spec(&spec_array[i]); + fail_on(rc, out, "unabled to get APP PF info for slot %d", i); + } + +out: + return rc; +} + int fpga_pci_get_slot_spec(int slot_id, struct fpga_slot_spec *spec) { @@ -530,10 +629,14 @@ fpga_pci_get_slot_spec(int slot_id, struct fpga_slot_spec *spec) /* tell fpga_pci_get_all_slot_specs not to search past the slot number */ size = min(sizeof_array(spec_array), (unsigned) slot_id + 1); - ret = fpga_pci_get_all_slot_specs(spec_array, size); - fail_on(ret, err, "Unable to read PCI device information."); - if (spec_array[slot_id].map[FPGA_APP_PF].vendor_id == 0) { + ret = fpga_pci_mbox_scan(spec_array, size); + fail_on(ret, err, "failed to enumerate FPGA slots"); + + ret = fpga_pci_complete_slot_spec(&spec_array[slot_id]); + fail_on(ret, err, "unabled to get APP PF info for slot %d", slot_id); + + if (!fpga_slot_spec_is_initialized(&spec_array[slot_id])) { log_error("No device matching specified id: %d", slot_id); return -ENOENT; } @@ -623,16 +726,16 @@ fpga_pci_check_app_pf_driver(struct fpga_pci_resource_map *app_map, } /** - * Check that the application PF exists or not based on the dir_name and + * Check that the application PF exists or not based on the dir_name and * exists flag. If the application PF is supposed to exist but was not * found, perform a minimal attempt at recovery be performing a PCI rescan. - * - * @param[in] dir_name the application PF device directory name - * @param[in] exists flag to check existence or non-existence - * + * + * @param[in] dir_name the application PF device directory name + * @param[in] exists flag to check existence or non-existence + * * @returns * 0 on success, non-zero on error - */ + */ static int fpga_pci_check_app_pf_sysfs(char *dir_name, bool exists) { @@ -658,7 +761,7 @@ fpga_pci_check_app_pf_sysfs(char *dir_name, bool exists) ret = stat(sysfs_name, &file_stat); if (!!ret == !exists) { done = true; - } else { + } else { fail_on_with_code(retries >= F1_CHECK_APP_PF_MAX_RETRIES, err, ret, FPGA_ERR_UNRESPONSIVE, "exists=%u, failed for path=%s", exists, sysfs_name); @@ -678,16 +781,16 @@ fpga_pci_check_app_pf_sysfs(char *dir_name, bool exists) } /** - * Check that the application PF exists or not based on the app_map and + * Check that the application PF exists or not based on the app_map and * exists flag. If the application PF is supposed to exist but was not * found, perform a minimal attempt at recovery be performing a PCI rescan. - * + * * @param[in] app_map the application device resource map - * @param[in] exists flag to check existence or non-existence - * + * @param[in] exists flag to check existence or non-existence + * * @returns * 0 on success, non-zero on error - */ + */ static int fpga_pci_check_app_pf(struct fpga_pci_resource_map *app_map, bool exists) { @@ -695,12 +798,12 @@ fpga_pci_check_app_pf(struct fpga_pci_resource_map *app_map, bool exists) fail_on_with_code(!app_map, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "app_map is NULL"); - + /** Construct the PCI device directory name using the PCI_DEV_FMT */ char dir_name[NAME_MAX + 1]; ret = snprintf(dir_name, sizeof(dir_name), PCI_DEV_FMT, app_map->domain, app_map->bus, app_map->dev, app_map->func); - + fail_on(ret < 0, err, "Error building the dir_name"); fail_on((size_t) ret >= sizeof(dir_name), err, "dir_name too long"); @@ -714,24 +817,24 @@ fpga_pci_check_app_pf(struct fpga_pci_resource_map *app_map, bool exists) /** * Remove the application PF for the given app map. - * - * @param[out] app_map the application device resource map to remove - * + * + * @param[out] app_map the application device resource map to remove + * * @returns * 0 on success, non-zero on error - */ + */ static int fpga_pci_remove_app_pf(struct fpga_pci_resource_map *app_map) -{ +{ int ret = 0; fail_on_with_code(!app_map, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "app_map is NULL"); - + /** Construct the PCI device directory name using the PCI_DEV_FMT */ char dir_name[NAME_MAX + 1]; ret = snprintf(dir_name, sizeof(dir_name), PCI_DEV_FMT, app_map->domain, app_map->bus, app_map->dev, app_map->func); - + fail_on_with_code(ret < 0, err, ret, FPGA_ERR_SOFTWARE_PROBLEM, "Error building the dir_name"); fail_on_with_code((size_t) ret >= sizeof(dir_name), err, ret, @@ -752,13 +855,13 @@ fpga_pci_remove_app_pf(struct fpga_pci_resource_map *app_map) fail_on_with_code(ret != 0, err, ret, FPGA_ERR_UNRESPONSIVE, "cli_write_one2file failed"); -#if 0 +#if 0 /** Check that the app_pf does not exist */ - /** + /** * NOTE: - * A concurrent (remove)+rescan action on another FPGA slot will make this + * A concurrent (remove)+rescan action on another FPGA slot will make this * FPGA's app_pf visible again, so we should not error out here if we see - * that the app_pf is still present. + * that the app_pf is still present. */ ret = fpga_pci_check_app_pf_sysfs(dir_name, false); /** exists==false */ fail_on(ret != 0, err, "fpga_pci_check_app_pf_sysfs failed"); @@ -770,9 +873,9 @@ fpga_pci_remove_app_pf(struct fpga_pci_resource_map *app_map) return ret; } -/** +/** * PCI rescan. - * + * * @returns * 0 on success, non-zero on error */ @@ -802,8 +905,8 @@ int fpga_pci_rescan_slot_app_pfs(int slot_id) { /** Get the slot spec */ - struct fpga_slot_spec spec; - int ret = fpga_pci_get_slot_spec(slot_id, &spec); + struct fpga_slot_spec spec; + int ret = fpga_pci_get_slot_spec(slot_id, &spec); fail_on(ret != 0, err, "fpga_pci_get_slot_spec failed"); /** Check if there is a driver attached to the given app_map */ @@ -812,21 +915,21 @@ fpga_pci_rescan_slot_app_pfs(int slot_id) ret = fpga_pci_check_app_pf_driver(app_map, &attached); fail_on(ret != 0, err, "fpga_pci_check_app_pf_driver failed"); - /** Remove the app_pf */ - ret = fpga_pci_remove_app_pf(app_map); + /** Remove the app_pf */ + ret = fpga_pci_remove_app_pf(app_map); fail_on(ret != 0, err, "fpga_pci_remove_app_pf failed"); - /** + /** * If we found a driver attached to the given app_map, increase * the wait time between remove and rescan. * Note that if the driver takes a long time to complete the - * PCI remove fuction (e.g. longer than the below wait time), + * PCI remove fuction (e.g. longer than the below wait time), * we may still fail to expose the changed PCI IDs in the rescan step. */ - uint32_t delay_msec = (attached) ? + uint32_t delay_msec = (attached) ? F1_REMOVE_APP_PF_LONG_DELAY_MSEC : F1_REMOVE_APP_PF_SHORT_DELAY_MSEC; - log_info("Driver for " PCI_DEV_FMT " %s attached, waiting %u msec before rescan", + log_info("Driver for " PCI_DEV_FMT " %s attached, waiting %u msec before rescan", app_map->domain, app_map->bus, app_map->dev, app_map->func, (attached) ? "is" : "is not", delay_msec); diff --git a/sdk/userspace/fpga_mgmt_tools/README.md b/sdk/userspace/fpga_mgmt_tools/README.md index f59bc9f6..6f21b393 100644 --- a/sdk/userspace/fpga_mgmt_tools/README.md +++ b/sdk/userspace/fpga_mgmt_tools/README.md @@ -144,7 +144,7 @@ The following command displays the current state for the given FPGA slot number. ### Looking at Metrics -The `fpga-describe-local-image` **`metrics`** option may be used to display FPGA image hardware metrics including FPGA PCI and DDR ECC metrics. +The `fpga-describe-local-image` **`metrics`** option may be used to display FPGA image hardware metrics including FPGA PCI and DDR metrics. Additionally, the `fpga-describe-local-image` **`clear-metrics`** option may be used to display and clear FPGA image hardware metrics (clear on read). diff --git a/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.c b/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.c index c0b37b8c..720113b7 100644 --- a/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.c +++ b/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.c @@ -27,6 +27,8 @@ #include #include +#include + #include #include @@ -36,11 +38,11 @@ #define TYPE_FMT "%-10s" /** - * Globals + * Globals */ struct ec2_fpga_cmd f1; -/** +/** * Use dmesg as the default logger, stdout is available for debug. */ #if defined(FPGA_ALLOW_NON_ROOT) @@ -58,14 +60,14 @@ const struct logger *logger = &logger_kmsg; * @returns * 0 on success, non-zero on failure */ -static int +static int cli_show_slot_app_pfs(int slot_id, struct fpga_slot_spec *spec) { - fail_on(slot_id >= FPGA_SLOT_MAX, err, "slot_id(%d) >= %d", + fail_on(slot_id >= FPGA_SLOT_MAX, err, "slot_id(%d) >= %d", slot_id, FPGA_SLOT_MAX); if (f1.show_headers) { - printf("Type FpgaImageSlot VendorId DeviceId DBDF\n"); + printf("Type FpgaImageSlot VendorId DeviceId DBDF\n"); } /** Retrieve and display associated application PFs (if any) */ @@ -80,12 +82,12 @@ cli_show_slot_app_pfs(int slot_id, struct fpga_slot_spec *spec) } printf(TYPE_FMT " %2u 0x%04x 0x%04x " PCI_DEV_FMT "\n", - "AFIDEVICE", slot_id, app_map->vendor_id, app_map->device_id, + "AFIDEVICE", slot_id, app_map->vendor_id, app_map->device_id, app_map->domain, app_map->bus, app_map->dev, app_map->func); found_app_pf = true; } if (!found_app_pf) { - printf(TYPE_FMT " unknown unknown unknown\n", "AFIDEVICE"); + printf(TYPE_FMT " unknown unknown unknown\n", "AFIDEVICE"); } return 0; @@ -96,12 +98,12 @@ cli_show_slot_app_pfs(int slot_id, struct fpga_slot_spec *spec) /** * Display the FPGA image information. * - * @param[in] info the fpga info + * @param[in] info the fpga info * * @returns * 0 on success, non-zero on failure */ -static int +static int cli_show_image_info(struct fpga_mgmt_image_info *info) { assert(info); @@ -118,9 +120,9 @@ cli_show_image_info(struct fpga_mgmt_image_info *info) char *afi_id = (!info->ids.afi_id[0]) ? "none" : info->ids.afi_id; printf(TYPE_FMT " %2u %-22s", "AFI", f1.afi_slot, afi_id); - printf(" %-8s %2d %-8s %2d 0x%08x\n", - FPGA_STATUS2STR(info->status), info->status, - FPGA_ERR2STR(info->status_q), info->status_q, + printf(" %-8s %2d %-8s %2d 0x%08x\n", + FPGA_STATUS2STR(info->status), info->status, + FPGA_ERR2STR(info->status_q), info->status_q, info->sh_version); if (f1.rescan) { @@ -141,114 +143,118 @@ cli_show_image_info(struct fpga_mgmt_image_info *info) } struct fpga_metrics_common *fmc = &info->metrics; - printf("sdacl-slave-timeout=%u\n", + printf("sdacl-slave-timeout=%u\n", (fmc->int_status & FPGA_INT_STATUS_SDACL_SLAVE_TIMEOUT) ? 1 : 0); - printf("virtual-jtag-slave-timeout=%u\n", + printf("virtual-jtag-slave-timeout=%u\n", (fmc->int_status & FPGA_INT_STATUS_VIRTUAL_JTAG_SLAVE_TIMEOUT) ? 1 : 0); - printf("ocl-slave-timeout=%u\n", + printf("ocl-slave-timeout=%u\n", (fmc->int_status & FPGA_INT_STATUS_OCL_SLAVE_TIMEOUT) ? - 1 : 0); + 1 : 0); - printf("bar1-slave-timeout=%u\n", + printf("bar1-slave-timeout=%u\n", (fmc->int_status & FPGA_INT_STATUS_BAR1_SLAVE_TIMEOUT) ? - 1 : 0); + 1 : 0); - printf("dma-pcis-timeout=%u\n", + printf("dma-pcis-timeout=%u\n", (fmc->int_status & FPGA_INT_STATUS_DMA_PCI_SLAVE_TIMEOUT) ? - 1 : 0); + 1 : 0); - printf("pcim-range-error=%u\n", - (fmc->int_status & FPGA_INT_STATUS_PCI_MASTER_RANGE_ERROR) ? - 1 : 0); + printf("pcim-range-error=%u\n", + (fmc->int_status & FPGA_INT_STATUS_PCI_MASTER_RANGE_ERROR) ? + 1 : 0); - printf("pcim-axi-protocol-error=%u\n", + printf("pcim-axi-protocol-error=%u\n", (fmc->int_status & FPGA_INT_STATUS_PCI_MASTER_AXI_PROTOCOL_ERROR) ? - 1 : 0); + 1 : 0); - printf("pcim-axi-protocol-4K-cross-error=%u\n", + printf("dma-range-error=%u\n", + (fmc->int_status & FPGA_INT_STATUS_DMA_RANGE_ERROR) ? + 1 : 0); + + printf("pcim-axi-protocol-4K-cross-error=%u\n", (fmc->pcim_axi_protocol_error_status & FPGA_PAP_4K_CROSS_ERROR) ? - 1 : 0); + 1 : 0); - printf("pcim-axi-protocol-bus-master-enable-error=%u\n", + printf("pcim-axi-protocol-bus-master-enable-error=%u\n", (fmc->pcim_axi_protocol_error_status & FPGA_PAP_BM_EN_ERROR) ? - 1 : 0); + 1 : 0); - printf("pcim-axi-protocol-request-size-error=%u\n", + printf("pcim-axi-protocol-request-size-error=%u\n", (fmc->pcim_axi_protocol_error_status & FPGA_PAP_REQ_SIZE_ERROR) ? - 1 : 0); + 1 : 0); - printf("pcim-axi-protocol-write-incomplete-error=%u\n", + printf("pcim-axi-protocol-write-incomplete-error=%u\n", (fmc->pcim_axi_protocol_error_status & FPGA_PAP_WR_INCOMPLETE_ERROR) ? - 1 : 0); + 1 : 0); - printf("pcim-axi-protocol-first-byte-enable-error=%u\n", + printf("pcim-axi-protocol-first-byte-enable-error=%u\n", (fmc->pcim_axi_protocol_error_status & FPGA_PAP_FIRST_BYTE_EN_ERROR) ? - 1 : 0); + 1 : 0); - printf("pcim-axi-protocol-last-byte-enable-error=%u\n", + printf("pcim-axi-protocol-last-byte-enable-error=%u\n", (fmc->pcim_axi_protocol_error_status & FPGA_PAP_LAST_BYTE_EN_ERROR) ? - 1 : 0); + 1 : 0); - printf("pcim-axi-protocol-bready-error=%u\n", + printf("pcim-axi-protocol-bready-error=%u\n", (fmc->pcim_axi_protocol_error_status & FPGA_PAP_BREADY_TIMEOUT_ERROR) ? - 1 : 0); + 1 : 0); - printf("pcim-axi-protocol-rready-error=%u\n", + printf("pcim-axi-protocol-rready-error=%u\n", (fmc->pcim_axi_protocol_error_status & FPGA_PAP_RREADY_TIMEOUT_ERROR) ? - 1 : 0); + 1 : 0); - printf("pcim-axi-protocol-wchannel-error=%u\n", + printf("pcim-axi-protocol-wchannel-error=%u\n", (fmc->pcim_axi_protocol_error_status & FPGA_PAP_WCHANNEL_TIMEOUT_ERROR) ? - 1 : 0); + 1 : 0); - printf("sdacl-slave-timeout-addr=0x%" PRIx32 "\n", fmc->sdacl_slave_timeout_addr); - printf("sdacl-slave-timeout-count=%u\n", fmc->sdacl_slave_timeout_count); + printf("sdacl-slave-timeout-addr=0x%" PRIx32 "\n", fmc->sdacl_slave_timeout_addr); + printf("sdacl-slave-timeout-count=%u\n", fmc->sdacl_slave_timeout_count); - printf("virtual-jtag-slave-timeout-addr=0x%" PRIx32 "\n", fmc->virtual_jtag_slave_timeout_addr); - printf("virtual-jtag-slave-timeout-count=%u\n", fmc->virtual_jtag_slave_timeout_count); + printf("virtual-jtag-slave-timeout-addr=0x%" PRIx32 "\n", fmc->virtual_jtag_slave_timeout_addr); + printf("virtual-jtag-slave-timeout-count=%u\n", fmc->virtual_jtag_slave_timeout_count); - printf("ocl-slave-timeout-addr=0x%" PRIx64 "\n", fmc->ocl_slave_timeout_addr); - printf("ocl-slave-timeout-count=%u\n", fmc->ocl_slave_timeout_count); + printf("ocl-slave-timeout-addr=0x%" PRIx64 "\n", fmc->ocl_slave_timeout_addr); + printf("ocl-slave-timeout-count=%u\n", fmc->ocl_slave_timeout_count); - printf("bar1-slave-timeout-addr=0x%" PRIx64 "\n", fmc->bar1_slave_timeout_addr); - printf("bar1-slave-timeout-count=%u\n", fmc->bar1_slave_timeout_count); + printf("bar1-slave-timeout-addr=0x%" PRIx64 "\n", fmc->bar1_slave_timeout_addr); + printf("bar1-slave-timeout-count=%u\n", fmc->bar1_slave_timeout_count); - printf("dma-pcis-timeout-addr=0x%" PRIx64 "\n", fmc->dma_pcis_timeout_addr); - printf("dma-pcis-timeout-count=%u\n", fmc->dma_pcis_timeout_count); + printf("dma-pcis-timeout-addr=0x%" PRIx64 "\n", fmc->dma_pcis_timeout_addr); + printf("dma-pcis-timeout-count=%u\n", fmc->dma_pcis_timeout_count); - printf("pcim-range-error-addr=0x%" PRIx64 "\n", fmc->pcim_range_error_addr); - printf("pcim-range-error-count=%u\n", fmc->pcim_range_error_count); + printf("pcim-range-error-addr=0x%" PRIx64 "\n", fmc->pcim_range_error_addr); + printf("pcim-range-error-count=%u\n", fmc->pcim_range_error_count); - printf("pcim-axi-protocol-error-addr=0x%" PRIx64 "\n", fmc->pcim_axi_protocol_error_addr); - printf("pcim-axi-protocol-error-count=%u\n", fmc->pcim_axi_protocol_error_count); + printf("pcim-axi-protocol-error-addr=0x%" PRIx64 "\n", fmc->pcim_axi_protocol_error_addr); + printf("pcim-axi-protocol-error-count=%u\n", fmc->pcim_axi_protocol_error_count); - printf("pcim-write-count=%" PRIu64 "\n", fmc->pcim_write_count); - printf("pcim-read-count=%" PRIu64 "\n", fmc->pcim_read_count); + printf("pcim-write-count=%" PRIu64 "\n", fmc->pcim_write_count); + printf("pcim-read-count=%" PRIu64 "\n", fmc->pcim_read_count); for (i = 0; i < sizeof_array(fmc->ddr_ifs); i++) { struct fpga_ddr_if_metrics_common *ddr_if = &fmc->ddr_ifs[i]; printf("DDR%u\n", i); - printf(" write-count=%" PRIu64 "\n", ddr_if->write_count); - printf(" read-count=%" PRIu64 "\n", ddr_if->read_count); + printf(" write-count=%" PRIu64 "\n", ddr_if->write_count); + printf(" read-count=%" PRIu64 "\n", ddr_if->read_count); } printf("Clock Group A Frequency (Mhz)\n"); for (i = 0; i < CLOCK_COUNT_A; i++) { frequency = fmc->clocks[0].frequency[i] / 1000000; - printf("%" PRIu64 " ", frequency); + printf("%" PRIu64 " ", frequency); } printf("\nClock Group B Frequency (Mhz)\n"); for (i = 0; i < CLOCK_COUNT_B; i++) { frequency = fmc->clocks[1].frequency[i] / 1000000; - printf("%" PRIu64 " ", frequency); + printf("%" PRIu64 " ", frequency); } printf("\nClock Group C Frequency (Mhz)\n"); for (i = 0; i < CLOCK_COUNT_C; i++) { frequency = fmc->clocks[2].frequency[i] / 1000000; - printf("%" PRIu64 " ", frequency); + printf("%" PRIu64 " ", frequency); } printf("\n"); @@ -256,6 +262,13 @@ cli_show_image_info(struct fpga_mgmt_image_info *info) printf(" Last measured: %" PRIu64 " watts\n",fmc->power); printf(" Average: %" PRIu64 " watts\n",fmc->power_mean); printf(" Max measured: %" PRIu64 " watts\n",fmc->power_max); + printf("Cached agfis:\n"); + for (i = 0; i < sizeof_array(fmc->cached_agfis); i++) { + if (fmc->cached_agfis[i] == 0) { + break; + } + printf(" agfi-0%016" PRIx64 "\n", fmc->cached_agfis[i]); + } } return 0; @@ -275,9 +288,9 @@ cli_attach(void) int ret = FPGA_ERR_FAIL; if (f1.opcode == CLI_CMD_DESCRIBE_SLOTS) { - /** + /** * ec2-afi-describe-slots does not use the Mbox logic, local - * information only + * information only */ goto out; } @@ -321,6 +334,7 @@ static int command_load(void) uint32_t flags = 0; flags |= (f1.force_shell_reload ) ? FPGA_CMD_FORCE_SHELL_RELOAD : 0; flags |= (f1.dram_data_retention) ? FPGA_CMD_DRAM_DATA_RETENTION : 0; + flags |= (f1.prefetch) ? FPGA_CMD_PREFETCH : 0; fpga_mgmt_init_load_local_image_options(&opt); opt.slot_id = f1.afi_slot; @@ -354,7 +368,7 @@ static int command_load(void) * @returns * 0 on success, non-zero on failure */ -static int +static int command_clear(void) { int ret; @@ -366,7 +380,7 @@ command_clear(void) struct fpga_mgmt_image_info info; memset(&info, 0, sizeof(struct fpga_mgmt_image_info)); - ret = fpga_mgmt_clear_local_image_sync(f1.afi_slot, + ret = fpga_mgmt_clear_local_image_sync(f1.afi_slot, f1.sync_timeout, f1.sync_delay_msec, &info); fail_on(ret != 0, err, "fpga_mgmt_clear_local_image_sync failed"); @@ -444,7 +458,7 @@ command_describe_slots(void) * @returns * 0 on success, non-zero on failure */ -static int +static int command_start_virtual_jtag(void) { printf("Starting Virtual JTAG XVC Server for FPGA slot id %u, listening to TCP port %s.\n", @@ -457,7 +471,7 @@ command_start_virtual_jtag(void) /** * Display the virtual status from the get virtual led or dip command. * - * @param[in] status the virtual led or dip status to display. + * @param[in] status the virtual led or dip status to display. * * @returns * 0 on success, non-zero on failure @@ -487,7 +501,7 @@ cli_show_virtual_led_dip_status(uint16_t status) * @returns * 0 on success, non-zero on failure */ -static int +static int command_get_virtual_led(void) { uint16_t status; @@ -508,7 +522,7 @@ command_get_virtual_led(void) * @returns * 0 on success, non-zero on failure */ -static int +static int command_get_virtual_dip(void) { uint16_t status; @@ -529,7 +543,7 @@ command_get_virtual_dip(void) * @returns * 0 on success, non-zero on failure */ -static int +static int command_set_virtual_dip(void) { int ret; @@ -553,7 +567,7 @@ static const command_func_t command_table[CLI_CMD_END] = { }; /** - * Main CLI cmd/rsp processing engine. + * Main CLI cmd/rsp processing engine. * * @returns * 0 on success, non-zero on failure @@ -576,7 +590,7 @@ cli_main(void) * @returns * 0 on success, non-zero on failure */ -static int +static int cli_init_f1(void) { memset(&f1, 0, sizeof(f1)); @@ -599,7 +613,7 @@ cli_init_f1(void) * @returns * 0 on success, non-zero on failure */ -static int +static int cli_create(void) { return cli_init_f1(); @@ -611,7 +625,7 @@ cli_create(void) * @returns * 0 on success, non-zero on failure */ -static int +static int cli_destroy(void) { return cli_init_f1(); @@ -620,24 +634,24 @@ cli_destroy(void) /** * CLI main * - * @param[in] argc argument count + * @param[in] argc argument count * @param[in] argv argument vector * * @returns * 0 on success, non-zero on failure */ -int +int main(int argc, char *argv[]) { int ret = cli_create(); fail_on(ret != 0, err, "cli_create failed"); - ret = log_init("fpga-local-cmd"); + ret = log_init("fpga-local-cmd(%u)", getpid()); fail_on(ret != 0, err, "log_init failed"); ret = log_attach(logger, NULL, 0); fail_on_user(ret != 0, err, "%s", CLI_ROOT_ACCESS_ERR_STR); - + ret = parse_args(argc, argv); fail_on(ret != 0, err, "parse_args failed"); @@ -647,8 +661,8 @@ main(int argc, char *argv[]) ret = cli_main(); fail_on(ret != 0, err, "cli_main failed"); err: - /** - * f1.parser_completed may be set by parse_args when it internally + /** + * f1.parser_completed may be set by parse_args when it internally * completes the command without error due to help or version output. * In this case a non-zero error is returned by parse_args and we do not * want to print the "Error" below. diff --git a/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.h b/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.h index 52887fb5..06f5235f 100644 --- a/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.h +++ b/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd.h @@ -60,8 +60,8 @@ enum { * e.g. load + describe multi-AFI command sequences. * timeout * delay_msec */ -#define CLI_SYNC_TIMEOUT_DFLT 3000 -#define CLI_SYNC_DELAY_MSEC_DFLT 20 +#define CLI_SYNC_TIMEOUT_DFLT 30000 +#define CLI_SYNC_DELAY_MSEC_DFLT 2 /** * Request timeout: timeout * delay_msec @@ -109,6 +109,8 @@ struct ec2_fpga_cmd { bool force_shell_reload; /** Attempt dram data retention on load */ bool dram_data_retention; + /** Don't actually load the FPGA, just cache the files for a later load */ + bool prefetch; /** Virtual DIP switch */ uint16_t v_dip_switch; /** Virtual JTAG TCP port */ diff --git a/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd_parse.c b/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd_parse.c index 446086db..aaebceb5 100644 --- a/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd_parse.c +++ b/sdk/userspace/fpga_mgmt_tools/src/fpga_local_cmd_parse.c @@ -99,10 +99,10 @@ static const char *describe_afi_usage[] = { " Rescan the AFIDEVICE to update the per-AFI PCI VendorId and", " DeviceId that may be dynamically modified due to a", " fpga-load-local-image or fpga-clear-local-image command.", - " NOTE1: this option removes the AFIDEVICE from the sysfs PCI", + " NOTE1: this option removes the AFIDEVICE from the sysfs PCI", " subsystem and then rescans the PCI subsystem in order for", " the modified AFI PCI IDs to be refreshed.", - " NOTE2: it is the developer's responsibility to remove any", + " NOTE2: it is the developer's responsibility to remove any", " driver previously installed on the older PCIe VendorId", " and DeviceId before fpga-clear-local-image,", " fpga-load-local-image, or re-scan.", @@ -130,10 +130,10 @@ static const char *load_afi_usage[] = { " NOTE: By default, this command automatically rescans the AFIDEVICE", " to update the per-AFI PCI VendorId and DeviceId that may be", " dynamically modified during each FPGA image load.", - " The rescan operation removes the AFIDEVICE from the sysfs PCI", + " The rescan operation removes the AFIDEVICE from the sysfs PCI", " subsystem and then rescans the PCI subsystem in order for", " the modified AFI PCI IDs to be refreshed.", - " It is the developer's responsibility to remove any", + " It is the developer's responsibility to remove any", " driver previously installed on the older PCIe VendorId", " and DeviceId before the FPGA image is loaded.", " GENERAL OPTIONS", @@ -176,6 +176,10 @@ static const char *load_afi_usage[] = { " This will try to detect if retention is possible and reject the", " load if it is not. To use, call load with another afi already", " loaded.", + " -P, --prefetch-image", + " Prefetch the indicated AFI and store it in the cache for faster loading.", + " Fastest load times can be achieved by using cached AFIs and enabling data retention (-D).", + " See Reducing AFI load times documentation.", }; static const char *clear_afi_usage[] = { @@ -193,10 +197,10 @@ static const char *clear_afi_usage[] = { " NOTE: By default, this command automatically rescans the AFIDEVICE", " to update the default AFI PCI VendorId and DeviceId that are", " dynamically modified during each FPGA image clear.", - " The rescan operation removes the AFIDEVICE from the sysfs PCI", + " The rescan operation removes the AFIDEVICE from the sysfs PCI", " subsystem and then rescans the PCI subsystem in order for", " the modified AFI PCI IDs to be refreshed.", - " It is the developer's responsibility to remove any", + " It is the developer's responsibility to remove any", " driver previously installed on the older PCIe VendorId", " and DeviceId before the FPGA image is cleared.", " GENERAL OPTIONS", @@ -263,7 +267,7 @@ static const char *get_virtual_led_usage[] = { " Example: fpga-get-virtual-led -S 0", " DESCRIPTION", " Returns the current status of the virtual LED exposed by the AFI, a", - " series of 0 (zeros) and 1 (ones), first digit from the righti maps", + " series of 0 (zeros) and 1 (ones), first digit from the righti maps", " to cl_sh_vled[0]. For example, a return value 0000000001000000", " indicates that cl_sh_vled[6] is set(on)", " GENERAL OPTIONS", @@ -327,15 +331,15 @@ static const char *set_virtual_dip_usage[] = { }; /** - * Generic usage printing engine. + * Generic usage printing engine. * * @param[in] prog_name program name * @param[in] usage usage array of strings * @param[in] num_entries number of entries in the usage array of strings */ -static void +static void print_usage(const char *prog_name, const char *usage[], size_t num_entries) -{ +{ (void)prog_name; size_t i; @@ -345,27 +349,27 @@ print_usage(const char *prog_name, const char *usage[], size_t num_entries) } /** - * Print the version number of this program. + * Print the version number of this program. */ -static void +static void print_version(void) -{ +{ printf("AFI Management Tools Version: %s\n", CLI_VERSION); } /** * Check the given option and set the f1.parser_completed flag. * - * -parser_completed is set when the parser will complete the option - * (help or version output) and no further command processing is necessary, + * -parser_completed is set when the parser will complete the option + * (help or version output) and no further command processing is necessary, * though a non-zero return value is still returned from parse_args. * -the parser_completed flag may then be used to skip the "Error" output - * that is generically used for parsing or other errors beyond the parsing + * that is generically used for parsing or other errors beyond the parsing * stage. * * @param[in] opt the option to check */ -static void +static void get_parser_completed(char opt) { if ((opt == 'h') || (opt == 'V')) { @@ -379,19 +383,19 @@ get_parser_completed(char opt) * @param[in] timeout timeout in seconds * * @returns - * 0 on success + * 0 on success * -1 on failure */ static int config_request_timeout(uint32_t timeout) { - size_t timeout_tmp = + size_t timeout_tmp = CLI_REQUEST_TIMEOUT_DFLT * CLI_REQUEST_DELAY_MSEC_DFLT / MSEC_PER_SEC; - size_t timeout_max = + size_t timeout_max = ((size_t)(uint32_t)-1) * CLI_REQUEST_DELAY_MSEC_DFLT / MSEC_PER_SEC; /** Check min and max values */ - fail_on_user((timeout < timeout_tmp) || (timeout > timeout_max), err, + fail_on_user((timeout < timeout_tmp) || (timeout > timeout_max), err, "Error: The timeout must be between %zu and %zu seconds", timeout_tmp, timeout_max); @@ -404,7 +408,7 @@ config_request_timeout(uint32_t timeout) f1.request_timeout = timeout_tmp; f1.request_delay_msec = CLI_REQUEST_DELAY_MSEC_DFLT; - log_debug("Setting timeout to %u secs, request_timeout=%u, request_delay_msec=%u", + log_debug("Setting timeout to %u secs, request_timeout=%u, request_delay_msec=%u", timeout, f1.request_timeout, f1.request_delay_msec); return 0; err: @@ -417,19 +421,19 @@ config_request_timeout(uint32_t timeout) * @param[in] timeout timeout in seconds * * @returns - * 0 on success + * 0 on success * -1 on failure */ static int config_sync_timeout(uint32_t timeout) { - size_t timeout_tmp = + size_t timeout_tmp = CLI_SYNC_TIMEOUT_DFLT * CLI_SYNC_DELAY_MSEC_DFLT / MSEC_PER_SEC; - size_t timeout_max = + size_t timeout_max = ((size_t)(uint32_t)-1) * CLI_SYNC_DELAY_MSEC_DFLT / MSEC_PER_SEC; /** Check min and max values */ - fail_on_user((timeout < timeout_tmp) || (timeout > timeout_max), err, + fail_on_user((timeout < timeout_tmp) || (timeout > timeout_max), err, "Error: The timeout must be between %zu and %zu seconds", timeout_tmp, timeout_max); @@ -442,7 +446,7 @@ config_sync_timeout(uint32_t timeout) f1.sync_timeout = timeout_tmp; f1.sync_delay_msec = CLI_SYNC_DELAY_MSEC_DFLT; - log_debug("Setting timeout to %u secs, sync_timeout=%u, sync_delay_msec=%u", + log_debug("Setting timeout to %u secs, sync_timeout=%u, sync_delay_msec=%u", timeout, f1.sync_timeout, f1.sync_delay_msec); return 0; err: @@ -455,7 +459,7 @@ config_sync_timeout(uint32_t timeout) * @param[in] argc Argument count. * @param[in] argv Argument string vector. */ -static int +static int parse_args_load_afi(int argc, char *argv[]) { int opt = 0; @@ -474,16 +478,17 @@ parse_args_load_afi(int argc, char *argv[]) {"version", no_argument, 0, 'V' }, {"force-shell-reload", no_argument, 0, 'F' }, {"dram-data-retention", no_argument, 0, 'D' }, + {"prefetch-image", no_argument, 0, 'P' }, {0, 0, 0, 0 }, }; int long_index = 0; - while ((opt = getopt_long(argc, argv, "S:I:r:s:a:b:c:AH?hVFD", + while ((opt = getopt_long(argc, argv, "S:I:r:s:a:b:c:AH?hVFDP", long_options, &long_index)) != -1) { switch (opt) { case 'S': { string_to_uint(&f1.afi_slot, optarg); - fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, "fpga-image-slot must be less than %u", + fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, "fpga-image-slot must be less than %u", FPGA_SLOT_MAX); break; } @@ -491,8 +496,8 @@ parse_args_load_afi(int argc, char *argv[]) fail_on_user(strnlen(optarg, AFI_ID_STR_MAX) == AFI_ID_STR_MAX, err, "fpga-image-id must be less than %u bytes", AFI_ID_STR_MAX); - strncpy(f1.afi_id, optarg, sizeof(f1.afi_id)); - f1.afi_id[sizeof(f1.afi_id) - 1] = 0; + strncpy(f1.afi_id, optarg, sizeof(f1.afi_id)); + f1.afi_id[sizeof(f1.afi_id) - 1] = 0; break; } case 'a': { @@ -545,13 +550,18 @@ parse_args_load_afi(int argc, char *argv[]) f1.dram_data_retention = true; break; } + case 'P': { + f1.prefetch = true; + f1.async = true; + break; + } default: { get_parser_completed(opt); - goto err; + goto err; } } } - + if ((f1.afi_slot == (uint32_t) -1) || (f1.afi_id[0] == 0)) { goto err; @@ -570,7 +580,7 @@ parse_args_load_afi(int argc, char *argv[]) * @param[in] argc Argument count. * @param[in] argv Argument string vector. */ -static int +static int parse_args_clear_afi(int argc, char *argv[]) { int opt = 0; @@ -592,7 +602,7 @@ parse_args_clear_afi(int argc, char *argv[]) switch (opt) { case 'S': { string_to_uint(&f1.afi_slot, optarg); - fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, "fpga-image-slot must be less than %u", + fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, "fpga-image-slot must be less than %u", FPGA_SLOT_MAX); break; } @@ -625,12 +635,12 @@ parse_args_clear_afi(int argc, char *argv[]) } default: { get_parser_completed(opt); - goto err; + goto err; } } } - - if (f1.afi_slot == (uint32_t) -1) { + + if (f1.afi_slot == (uint32_t) -1) { goto err; } @@ -647,7 +657,7 @@ parse_args_clear_afi(int argc, char *argv[]) * @param[in] argc Argument count. * @param[in] argv Argument string vector. */ -static int +static int parse_args_describe_afi(int argc, char *argv[]) { int opt = 0; @@ -670,7 +680,7 @@ parse_args_describe_afi(int argc, char *argv[]) switch (opt) { case 'S': { string_to_uint(&f1.afi_slot, optarg); - fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, + fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, "fpga-image-slot must be less than %u", FPGA_SLOT_MAX); break; } @@ -705,12 +715,12 @@ parse_args_describe_afi(int argc, char *argv[]) } default: { get_parser_completed(opt); - goto err; + goto err; } } } - - if (f1.afi_slot == (uint32_t) -1) { + + if (f1.afi_slot == (uint32_t) -1) { goto err; } @@ -728,7 +738,7 @@ parse_args_describe_afi(int argc, char *argv[]) * @param[in] argc Argument count. * @param[in] argv Argument string vector. */ -static int +static int parse_args_describe_afi_slots(int argc, char *argv[]) { int opt = 0; @@ -768,14 +778,14 @@ parse_args_describe_afi_slots(int argc, char *argv[]) } default: { get_parser_completed(opt); - goto err; + goto err; } } } - + return 0; err: - print_usage(argv[0], describe_afi_slots_usage, + print_usage(argv[0], describe_afi_slots_usage, sizeof_array(describe_afi_slots_usage)); out_ver: return -EINVAL; @@ -789,7 +799,7 @@ static char default_tcp_port[5] = "10201"; * @param[in] argc Argument count. * @param[in] argv Argument string vector. */ -static int +static int parse_args_start_virtual_jtag(int argc, char *argv[]) { int opt = 0; @@ -813,7 +823,7 @@ parse_args_start_virtual_jtag(int argc, char *argv[]) switch (opt) { case 'S': { string_to_uint(&f1.afi_slot, optarg); - fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, + fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, "fpga-image-slot must be less than %u", FPGA_SLOT_MAX); break; } @@ -838,16 +848,16 @@ parse_args_start_virtual_jtag(int argc, char *argv[]) } default: { get_parser_completed(opt); - goto err; + goto err; } } } - - if (f1.afi_slot == (uint32_t) -1) { + + if (f1.afi_slot == (uint32_t) -1) { printf("Error: Invalid Slot Id !"); goto err; } - + return 0; err: @@ -862,7 +872,7 @@ parse_args_start_virtual_jtag(int argc, char *argv[]) * @param[in] argc Argument count. * @param[in] argv Argument string vector. */ -static int +static int parse_args_get_virtual_led(int argc, char *argv[]) { int opt; @@ -881,7 +891,7 @@ parse_args_get_virtual_led(int argc, char *argv[]) switch (opt) { case 'S': { string_to_uint(&f1.afi_slot, optarg); - fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, + fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, "fpga-image-slot must be less than %u", FPGA_SLOT_MAX); break; } @@ -897,12 +907,12 @@ parse_args_get_virtual_led(int argc, char *argv[]) } default: { get_parser_completed(opt); - goto err; + goto err; } } } - - if (f1.afi_slot == (uint32_t) -1) { + + if (f1.afi_slot == (uint32_t) -1) { printf("Error: Invalid Slot Id !"); goto err; } @@ -919,7 +929,7 @@ parse_args_get_virtual_led(int argc, char *argv[]) * @param[in] argc Argument count. * @param[in] argv Argument string vector. */ -static int +static int parse_args_get_virtual_dip(int argc, char *argv[]) { int opt; @@ -938,7 +948,7 @@ parse_args_get_virtual_dip(int argc, char *argv[]) switch (opt) { case 'S': { string_to_uint(&f1.afi_slot, optarg); - fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, + fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, "fpga-image-slot must be less than %u", FPGA_SLOT_MAX); break; } @@ -954,16 +964,16 @@ parse_args_get_virtual_dip(int argc, char *argv[]) } default: { get_parser_completed(opt); - goto err; + goto err; } } } - - if (f1.afi_slot == (uint32_t) -1) { + + if (f1.afi_slot == (uint32_t) -1) { printf("Error: Invalid Slot Id !"); goto err; } - + return 0; err: print_usage(argv[0], get_virtual_dip_usage, sizeof_array(get_virtual_dip_usage)); @@ -977,7 +987,7 @@ parse_args_get_virtual_dip(int argc, char *argv[]) * @param[in] argc Argument count. * @param[in] argv Argument string vector. */ -static int +static int parse_args_set_virtual_dip(int argc, char *argv[]) { int opt; @@ -1000,20 +1010,20 @@ parse_args_set_virtual_dip(int argc, char *argv[]) switch (opt) { case 'S': { string_to_uint(&f1.afi_slot, optarg); - fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, + fail_on_user(f1.afi_slot >= FPGA_SLOT_MAX, err, "fpga-image-slot must be less than %u", FPGA_SLOT_MAX); break; } case 'D': { - fail_on_user(strlen(optarg) != 16, err, + fail_on_user(strlen(optarg) != 16, err, "virtual-dip must be 16 digits of zero or one"); for (i=0;i<16;i++) { if (optarg[i] == '1') status = status | 0x1; else if (optarg[i] == '0') status = status; - else - fail_on_user(1, err, + else + fail_on_user(1, err, "illegal digit for virtual-dip %c", optarg[i]); if (i!=15) status = status << 1; @@ -1035,12 +1045,12 @@ parse_args_set_virtual_dip(int argc, char *argv[]) default: { get_parser_completed(opt); - goto err; + goto err; } } } - - if (f1.afi_slot == (uint32_t) -1) { + + if (f1.afi_slot == (uint32_t) -1) { printf("Error: Invalid Slot Id !"); goto err; } @@ -1048,14 +1058,14 @@ parse_args_set_virtual_dip(int argc, char *argv[]) printf("Error: Missing DIP Switch values !"); goto err; } - - return 0; + + return 0; err: print_usage(argv[0], set_virtual_dip_usage, sizeof_array(set_virtual_dip_usage)); out_ver: return -EINVAL; } - + typedef int (*parse_args_func_t)(int argc, char *argv[]); struct parse_args_str2func { @@ -1070,11 +1080,11 @@ struct parse_args_str2func { * @param[in] argc Argument count. * @param[in] argv Argument string vector. */ -int +int parse_args(int argc, char *argv[]) { fail_on(argc < 2, err, "Error: opcode string must be specified"); - fail_on_user(!argv[0] || !argv[1], err, + fail_on_user(!argv[0] || !argv[1], err, "Error: program name or opcode string is NULL"); static struct parse_args_str2func str2func[] = { diff --git a/sdk/userspace/include/fpga_pci.h b/sdk/userspace/include/fpga_pci.h index b60681fb..336b2aec 100644 --- a/sdk/userspace/include/fpga_pci.h +++ b/sdk/userspace/include/fpga_pci.h @@ -247,19 +247,17 @@ int fpga_pci_memset(pci_bar_handle_t handle, uint64_t offset, uint32_t value, * Glibc 2.19 and lower support readdir_r, a reentrant version of readdir. * Newer versions of glibc deprecate readdir_r and therefore require external * synchronization on readdir. - */ -#if !defined(_BSD_SOURCE) && !defined(_SVID_SOURCE) -/** - * This mutex is used internally in fpga_pci_get_all_slot_specs to provide - * synchronization for calls to readdir. The mutex is exported so that if - * software which links with this library also uses readdir in a threaded + * + * The mutex is used internally in fpga_pci_get_all_slot_specs to provide + * synchronization for calls to readdir. The calls to lock/unlock this mutex is exported + * so that if software which links with this library also uses readdir in a threaded * environment, it can use this lock to protect calls to readdir. */ -extern pthread_mutex_t fpga_pci_readdir_mutex; -#else -#define FPGA_PCI_USE_READDIR_R -#endif +__attribute__((visibility("hidden"))) extern pthread_mutex_t fpga_pci_readdir_mutex; + +int fpga_acquire_readdir_lock(void); +int fpga_release_readdir_lock(void); #ifdef __cplusplus } diff --git a/sdk/userspace/include/hal/fpga_common.h b/sdk/userspace/include/hal/fpga_common.h index 0bb1e321..c3d7d833 100644 --- a/sdk/userspace/include/hal/fpga_common.h +++ b/sdk/userspace/include/hal/fpga_common.h @@ -14,7 +14,7 @@ */ /** @file - * FPGA common header + * FPGA common header */ #pragma once @@ -25,6 +25,7 @@ #define FPGA_SLOT_MAX 8 #define AFI_ID_STR_MAX 64 #define FPGA_DDR_IFS_MAX 4 +#define FPGA_CACHED_AGFIS_MAX 16 /** * FPGA Mixed Mode Clock Manager (MMCM) config. @@ -43,30 +44,34 @@ enum { /** reserved */ FPGA_CMD_RSVD = 1 << 0, - /** return FPGA image hardware metrics */ + /** return FPGA image hardware metrics */ FPGA_CMD_GET_HW_METRICS = 1 << 1, - /** return FPGA image hardware metrics (clear on read */ + /** return FPGA image hardware metrics (clear on read */ FPGA_CMD_CLEAR_HW_METRICS = 1 << 2, FPGA_CMD_FORCE_SHELL_RELOAD = 1 << 3, - - /** request that ddr data retention is used during load */ FPGA_CMD_DRAM_DATA_RETENTION = 1 << 4, + FPGA_CMD_EXTENDED_METRICS_SIZE = 1 << 6, + FPGA_CMD_PREFETCH = 1 << 7, + + - FPGA_CMD_ALL_FLAGS = FPGA_CMD_GET_HW_METRICS | + FPGA_CMD_ALL_FLAGS = FPGA_CMD_GET_HW_METRICS | FPGA_CMD_CLEAR_HW_METRICS | FPGA_CMD_FORCE_SHELL_RELOAD | - FPGA_CMD_DRAM_DATA_RETENTION , + FPGA_CMD_DRAM_DATA_RETENTION | + FPGA_CMD_EXTENDED_METRICS_SIZE | + FPGA_CMD_PREFETCH, }; -/** +/** * FPGA specific errors * e.g. as returned by fpga-load-local-image, fpga-clear-local-image, * and fpga-describe-local-image. * * -note that these must fit into an int32_t and must be positive integers. - * -this is compatible with the standard errno values such as -EINVAL, -EIO, + * -this is compatible with the standard errno values such as -EINVAL, -EIO, * -EPERM, -ENOENT that are also used. * * Any additions should also be added to FPGA_ERR2STR (see below). @@ -91,7 +96,7 @@ enum { /** Reserved: 6-10 */ /** Invalid AFI_CMD_API_VERSION, see afi_cmd_api.h */ - FPGA_ERR_AFI_CMD_API_VERSION_INVALID = 11, + FPGA_ERR_AFI_CMD_API_VERSION_INVALID = 11, /** CL PCI IDs did not match (e.g. between LF and CL reported values */ FPGA_ERR_CL_ID_MISMATCH = 12, /** CL DDR calibration failed */ @@ -107,6 +112,8 @@ enum { * possible. This prevents the loss of data when retention cannot work. */ FPGA_ERR_DRAM_DATA_RETENTION_NOT_POSSIBLE = 18, + FPGA_ERR_HARDWARE_BUSY = 19, + /** Reserved: 19 */ /** Unable to locate PCI devices/resources */ @@ -141,7 +148,7 @@ enum { ((error) == FPGA_ERR_CL_ID_MISMATCH) ? "cl-id-mismatch" : \ ((error) == FPGA_ERR_CL_DDR_CALIB_FAILED) ? "cl-ddr-calib-failed" : \ ((error) == FPGA_ERR_FAIL) ? "unspecified-error" : \ - ((error) == FPGA_ERR_SHELL_MISMATCH) ? "afi-shell-version-mismatch" : \ + ((error) == FPGA_ERR_SHELL_MISMATCH) ? "shell-version-not-supported" : \ ((error) == FPGA_ERR_POWER_VIOLATION) ? "afi-power-violation" : \ ((error) == FPGA_ERR_DRAM_DATA_RETENTION_NOT_POSSIBLE) ? "dram-data-retention-not-possible" : \ ((error) == FPGA_ERR_DRAM_DATA_RETENTION_FAILED) ? "dram-data-retention-failed" : \ @@ -150,6 +157,7 @@ enum { ((error) == FPGA_ERR_SOFTWARE_PROBLEM) ? "software-problem": \ ((error) == FPGA_ERR_UNRESPONSIVE) ? "unresponsive": \ ((error) == FPGA_ERR_AFI_CMD_MALFORMED) ? "afi-command-malformed" : \ + ((error) == FPGA_ERR_HARDWARE_BUSY) ? "hardware-busy" : \ "internal-error" @@ -160,7 +168,7 @@ enum { * Any additions should also be added to FPGA_STATUS2STR (see below). */ enum { - /**< FPGA slot has an AFI loaded */ + /**< FPGA slot has an AFI loaded */ FPGA_STATUS_LOADED = 0, /**< FPGA slot is cleared */ FPGA_STATUS_CLEARED = 1, @@ -223,7 +231,7 @@ struct fpga_pci_resource_map { uint16_t device_id; uint16_t subsystem_device_id; uint16_t subsystem_vendor_id; - + /** e.g. PCI Domain:Bus:Device.Function */ uint16_t domain; uint8_t bus; @@ -290,7 +298,7 @@ struct fpga_metrics_common { uint64_t pcim_axi_protocol_error_addr; uint32_t pcim_axi_protocol_error_count; /** reserved */ - uint8_t reserved2[12]; + uint8_t reserved2[12]; /** FPGA_INT_STATUS_OCL_SLAVE_TIMEOUT: address and count */ uint64_t ocl_slave_timeout_addr; uint32_t ocl_slave_timeout_count; @@ -317,14 +325,18 @@ struct fpga_metrics_common { uint64_t power_mean; uint64_t power_max; uint64_t power; + uint64_t cached_agfis[FPGA_CACHED_AGFIS_MAX]; + uint64_t flags; } __attribute__((packed)); /** Common int_status */ enum { - /** SDACL slave timeout (CL did not respond to cycle from host) */ + /** SDACL slave timeout (CL did not respond to cycle from host) */ FPGA_INT_STATUS_SDACL_SLAVE_TIMEOUT = 1 << 0, /** Virtual JTAG timeout */ - FPGA_INT_STATUS_VIRTUAL_JTAG_SLAVE_TIMEOUT = 1 << 1, + FPGA_INT_STATUS_VIRTUAL_JTAG_SLAVE_TIMEOUT = 1 << 1, + /** A DMA engine made an out of range access */ + FPGA_INT_STATUS_DMA_RANGE_ERROR = 1 << 7, /** CL did not respond to DMA cycle from host */ FPGA_INT_STATUS_DMA_PCI_SLAVE_TIMEOUT = 1 << 17, /** PCIe master cycle from CL out of range */ @@ -336,11 +348,12 @@ enum { /** CL BAR1 did not respond to cycle from host */ FPGA_INT_STATUS_BAR1_SLAVE_TIMEOUT = 1 << 29, - FPGA_INT_STATUS_ALL = + FPGA_INT_STATUS_ALL = FPGA_INT_STATUS_SDACL_SLAVE_TIMEOUT | FPGA_INT_STATUS_VIRTUAL_JTAG_SLAVE_TIMEOUT | + FPGA_INT_STATUS_DMA_RANGE_ERROR | FPGA_INT_STATUS_DMA_PCI_SLAVE_TIMEOUT | - FPGA_INT_STATUS_PCI_MASTER_RANGE_ERROR | + FPGA_INT_STATUS_PCI_MASTER_RANGE_ERROR | FPGA_INT_STATUS_PCI_MASTER_AXI_PROTOCOL_ERROR | FPGA_INT_STATUS_OCL_SLAVE_TIMEOUT | FPGA_INT_STATUS_BAR1_SLAVE_TIMEOUT, @@ -358,11 +371,11 @@ enum { FPGA_PAP_RREADY_TIMEOUT_ERROR = 1 << 9, FPGA_PAP_WCHANNEL_TIMEOUT_ERROR = 1 << 10, - FPGA_PAP_ERROR_STATUS_ALL = + FPGA_PAP_ERROR_STATUS_ALL = FPGA_PAP_4K_CROSS_ERROR | FPGA_PAP_BM_EN_ERROR | FPGA_PAP_REQ_SIZE_ERROR | FPGA_PAP_WR_INCOMPLETE_ERROR | FPGA_PAP_FIRST_BYTE_EN_ERROR | FPGA_PAP_LAST_BYTE_EN_ERROR | FPGA_PAP_BREADY_TIMEOUT_ERROR | - FPGA_PAP_RREADY_TIMEOUT_ERROR | + FPGA_PAP_RREADY_TIMEOUT_ERROR | FPGA_PAP_WCHANNEL_TIMEOUT_ERROR, }; diff --git a/sdk/userspace/include/utils/sh_dpi_tasks.h b/sdk/userspace/include/utils/sh_dpi_tasks.h index 439dfe99..0d0430bc 100644 --- a/sdk/userspace/include/utils/sh_dpi_tasks.h +++ b/sdk/userspace/include/utils/sh_dpi_tasks.h @@ -33,11 +33,20 @@ extern void sv_map_host_memory(uint8_t *memory); extern void cl_peek(uint64_t addr, uint32_t *data); extern void cl_poke(uint64_t addr, uint32_t data); +extern void cl_peek_pcis(uint64_t addr, uint32_t *data); +extern void cl_poke_pcis(uint64_t addr, uint32_t data); +extern void cl_peek_sda(uint64_t addr, uint32_t *data); +extern void cl_poke_sda(uint64_t addr, uint32_t data); +extern void cl_peek_ocl(uint64_t addr, uint32_t *data); +extern void cl_poke_ocl(uint64_t addr, uint32_t data); +extern void cl_peek_bar1(uint64_t addr, uint32_t *data); +extern void cl_poke_bar1(uint64_t addr, uint32_t data); extern void sv_int_ack(uint32_t int_num); extern void sv_pause(uint32_t x); extern void sv_fpga_start_buffer_to_cl(uint32_t slot_id, uint32_t chan, uint32_t buf_size, uint64_t wr_buffer_addr, uint64_t cl_addr); extern void sv_fpga_start_cl_to_buffer(uint32_t slot_id, uint32_t chan, uint32_t buf_size, uint64_t rd_buffer_addr, uint64_t cl_addr); extern void init_ddr(void); +extern void deselect_atg_hw(void); extern void hm_put_byte(uint64_t addr, uint8_t data); diff --git a/sdk/userspace/install_fpga_mgmt_tools.sh b/sdk/userspace/install_fpga_mgmt_tools.sh index e929bc44..740665ca 100755 --- a/sdk/userspace/install_fpga_mgmt_tools.sh +++ b/sdk/userspace/install_fpga_mgmt_tools.sh @@ -68,6 +68,7 @@ echo "AWS FPGA: Copying Amazon FPGA Image (AFI) Management Tools to $AFI_MGMT_TO cp -f $AFI_MGMT_TOOLS_SRC_DIR/fpga-* $AFI_MGMT_TOOLS_DST_DIR cp -f $AFI_MGMT_TOOLS_LIB_DIR/libfpga_mgmt.so.1.0.0 $AFI_MGMT_LIBS_DST_DIR ln -sf libfpga_mgmt.so.1 $AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so +ln -sf libfpga_mgmt.so.1.0.0 $AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so.1 source /tmp/sdk_root_env.exp if allow_non_root ; then diff --git a/sdk/userspace/python_bindings/fpga_dma.py b/sdk/userspace/python_bindings/fpga_dma.py index 25e965f5..64f01a98 100644 --- a/sdk/userspace/python_bindings/fpga_dma.py +++ b/sdk/userspace/python_bindings/fpga_dma.py @@ -12,7 +12,6 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # - # Python bindings for dma library # -*- coding: utf-8 -*- # @@ -24,7 +23,7 @@ _libraries = {} -_libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'] = ctypes.CDLL('PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so') +_libraries['libfpga_mgmt.so'] = ctypes.CDLL('libfpga_mgmt.so') # if local wordsize is same as target, keep ctypes pointer function. if ctypes.sizeof(ctypes.c_void_p) == 8: POINTER_T = ctypes.POINTER @@ -75,19 +74,22 @@ def __init__(self, **args): FPGA_DMA_EDMA = 0 FPGA_DMA_XDMA = 1 fpga_dma_driver = ctypes.c_int # enum -fpga_dma_open_queue = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_dma_open_queue +fpga_dma_open_queue = _libraries['libfpga_mgmt.so'].fpga_dma_open_queue fpga_dma_open_queue.restype = ctypes.c_int32 fpga_dma_open_queue.argtypes = [fpga_dma_driver, ctypes.c_int32, ctypes.c_int32, ctypes.c_bool] -fpga_dma_device_id = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_dma_device_id +fpga_dma_device_id = _libraries['libfpga_mgmt.so'].fpga_dma_device_id fpga_dma_device_id.restype = ctypes.c_int32 fpga_dma_device_id.argtypes = [fpga_dma_driver, ctypes.c_int32, ctypes.c_int32, ctypes.c_bool, ctypes.c_char * 256] size_t = ctypes.c_uint64 -fpga_dma_burst_read = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_dma_burst_read +fpga_dma_burst_read = _libraries['libfpga_mgmt.so'].fpga_dma_burst_read fpga_dma_burst_read.restype = ctypes.c_int32 fpga_dma_burst_read.argtypes = [ctypes.c_int32, POINTER_T(ctypes.c_ubyte), size_t, size_t] -fpga_dma_burst_write = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_dma_burst_write +fpga_dma_burst_write = _libraries['libfpga_mgmt.so'].fpga_dma_burst_write fpga_dma_burst_write.restype = ctypes.c_int32 fpga_dma_burst_write.argtypes = [ctypes.c_int32, POINTER_T(ctypes.c_ubyte), size_t, size_t] +fpga_pci_get_dma_device_num = _libraries['libfpga_mgmt.so'].fpga_pci_get_dma_device_num +fpga_pci_get_dma_device_num.restype = ctypes.c_int32 +fpga_pci_get_dma_device_num.argtypes = [fpga_dma_driver, ctypes.c_int32, POINTER_T(ctypes.c_int32)] # values for enumeration 'c__Ea_FPGA_CMD_RSVD' c__Ea_FPGA_CMD_RSVD__enumvalues = { @@ -96,14 +98,18 @@ def __init__(self, **args): 4: 'FPGA_CMD_CLEAR_HW_METRICS', 8: 'FPGA_CMD_FORCE_SHELL_RELOAD', 16: 'FPGA_CMD_DRAM_DATA_RETENTION', - 30: 'FPGA_CMD_ALL_FLAGS', + 64: 'FPGA_CMD_EXTENDED_METRICS_SIZE', + 128: 'FPGA_CMD_PREFETCH', + 222: 'FPGA_CMD_ALL_FLAGS', } FPGA_CMD_RSVD = 1 FPGA_CMD_GET_HW_METRICS = 2 FPGA_CMD_CLEAR_HW_METRICS = 4 FPGA_CMD_FORCE_SHELL_RELOAD = 8 FPGA_CMD_DRAM_DATA_RETENTION = 16 -FPGA_CMD_ALL_FLAGS = 30 +FPGA_CMD_EXTENDED_METRICS_SIZE = 64 +FPGA_CMD_PREFETCH = 128 +FPGA_CMD_ALL_FLAGS = 222 c__Ea_FPGA_CMD_RSVD = ctypes.c_int # enum # values for enumeration 'c__Ea_FPGA_ERR_OK' @@ -118,9 +124,14 @@ def __init__(self, **args): 16: 'FPGA_ERR_SHELL_MISMATCH', 17: 'FPGA_ERR_POWER_VIOLATION', 18: 'FPGA_ERR_DRAM_DATA_RETENTION_NOT_POSSIBLE', + 19: 'FPGA_ERR_HARDWARE_BUSY', + 20: 'FPGA_ERR_PCI_MISSING', + 21: 'FPGA_ERR_AFI_CMD_MALFORMED', 22: 'FPGA_ERR_DRAM_DATA_RETENTION_FAILED', 23: 'FPGA_ERR_DRAM_DATA_RETENTION_SETUP_FAILED', - 24: 'FPGA_ERR_END', + 24: 'FPGA_ERR_SOFTWARE_PROBLEM', + 25: 'FPGA_ERR_UNRESPONSIVE', + 26: 'FPGA_ERR_END', } FPGA_ERR_OK = 0 FPGA_ERR_AFI_CMD_BUSY = 3 @@ -132,9 +143,14 @@ def __init__(self, **args): FPGA_ERR_SHELL_MISMATCH = 16 FPGA_ERR_POWER_VIOLATION = 17 FPGA_ERR_DRAM_DATA_RETENTION_NOT_POSSIBLE = 18 +FPGA_ERR_HARDWARE_BUSY = 19 +FPGA_ERR_PCI_MISSING = 20 +FPGA_ERR_AFI_CMD_MALFORMED = 21 FPGA_ERR_DRAM_DATA_RETENTION_FAILED = 22 FPGA_ERR_DRAM_DATA_RETENTION_SETUP_FAILED = 23 -FPGA_ERR_END = 24 +FPGA_ERR_SOFTWARE_PROBLEM = 24 +FPGA_ERR_UNRESPONSIVE = 25 +FPGA_ERR_END = 26 c__Ea_FPGA_ERR_OK = ctypes.c_int # enum # values for enumeration 'c__Ea_FPGA_STATUS_LOADED' @@ -273,6 +289,8 @@ class struct_fpga_metrics_common(ctypes.Structure): ('power_mean', ctypes.c_uint64), ('power_max', ctypes.c_uint64), ('power', ctypes.c_uint64), + ('cached_agfis', ctypes.c_uint64 * 16), + ('flags', ctypes.c_uint64), ] @@ -324,16 +342,19 @@ class struct_fpga_metrics_common(ctypes.Structure): __all__ = \ ['APP_PF_BAR0', 'APP_PF_BAR1', 'APP_PF_BAR4', 'APP_PF_BAR_MAX', 'FPGA_APP_PF', 'FPGA_CMD_ALL_FLAGS', 'FPGA_CMD_CLEAR_HW_METRICS', - 'FPGA_CMD_DRAM_DATA_RETENTION', 'FPGA_CMD_FORCE_SHELL_RELOAD', - 'FPGA_CMD_GET_HW_METRICS', 'FPGA_CMD_RSVD', 'FPGA_DMA_EDMA', + 'FPGA_CMD_DRAM_DATA_RETENTION', 'FPGA_CMD_EXTENDED_METRICS_SIZE', + 'FPGA_CMD_FORCE_SHELL_RELOAD', 'FPGA_CMD_GET_HW_METRICS', + 'FPGA_CMD_PREFETCH', 'FPGA_CMD_RSVD', 'FPGA_DMA_EDMA', 'FPGA_DMA_XDMA', 'FPGA_ERR_AFI_CMD_API_VERSION_INVALID', - 'FPGA_ERR_AFI_CMD_BUSY', 'FPGA_ERR_AFI_ID_INVALID', - 'FPGA_ERR_CL_DDR_CALIB_FAILED', 'FPGA_ERR_CL_ID_MISMATCH', - 'FPGA_ERR_DRAM_DATA_RETENTION_FAILED', + 'FPGA_ERR_AFI_CMD_BUSY', 'FPGA_ERR_AFI_CMD_MALFORMED', + 'FPGA_ERR_AFI_ID_INVALID', 'FPGA_ERR_CL_DDR_CALIB_FAILED', + 'FPGA_ERR_CL_ID_MISMATCH', 'FPGA_ERR_DRAM_DATA_RETENTION_FAILED', 'FPGA_ERR_DRAM_DATA_RETENTION_NOT_POSSIBLE', 'FPGA_ERR_DRAM_DATA_RETENTION_SETUP_FAILED', 'FPGA_ERR_END', - 'FPGA_ERR_FAIL', 'FPGA_ERR_OK', 'FPGA_ERR_POWER_VIOLATION', - 'FPGA_ERR_SHELL_MISMATCH', 'FPGA_INT_STATUS_ALL', + 'FPGA_ERR_FAIL', 'FPGA_ERR_HARDWARE_BUSY', 'FPGA_ERR_OK', + 'FPGA_ERR_PCI_MISSING', 'FPGA_ERR_POWER_VIOLATION', + 'FPGA_ERR_SHELL_MISMATCH', 'FPGA_ERR_SOFTWARE_PROBLEM', + 'FPGA_ERR_UNRESPONSIVE', 'FPGA_INT_STATUS_ALL', 'FPGA_INT_STATUS_BAR1_SLAVE_TIMEOUT', 'FPGA_INT_STATUS_DMA_PCI_SLAVE_TIMEOUT', 'FPGA_INT_STATUS_OCL_SLAVE_TIMEOUT', @@ -355,8 +376,8 @@ class struct_fpga_metrics_common(ctypes.Structure): 'c__Ea_FPGA_PAP_4K_CROSS_ERROR', 'c__Ea_FPGA_STATUS_LOADED', 'c__Ea_MGMT_PF_BAR0', 'fpga_dma_burst_read', 'fpga_dma_burst_write', 'fpga_dma_device_id', 'fpga_dma_driver', - 'fpga_dma_open_queue', 'size_t', 'struct_afi_device_ids', - 'struct_fpga_clocks_common', 'struct_fpga_common_cfg', - 'struct_fpga_ddr_if_metrics_common', 'struct_fpga_meta_ids', - 'struct_fpga_metrics_common', 'struct_fpga_pci_resource_map', - 'struct_fpga_slot_spec'] + 'fpga_dma_open_queue', 'fpga_pci_get_dma_device_num', 'size_t', + 'struct_afi_device_ids', 'struct_fpga_clocks_common', + 'struct_fpga_common_cfg', 'struct_fpga_ddr_if_metrics_common', + 'struct_fpga_meta_ids', 'struct_fpga_metrics_common', + 'struct_fpga_pci_resource_map', 'struct_fpga_slot_spec'] diff --git a/sdk/userspace/python_bindings/fpga_mgmt.py b/sdk/userspace/python_bindings/fpga_mgmt.py index ae42d906..feaff30f 100644 --- a/sdk/userspace/python_bindings/fpga_mgmt.py +++ b/sdk/userspace/python_bindings/fpga_mgmt.py @@ -12,8 +12,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # - -# Python bindings for management library +# Python bindings for mgmt library # -*- coding: utf-8 -*- # # WORD_SIZE is: 8 @@ -24,7 +23,7 @@ _libraries = {} -_libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'] = ctypes.CDLL('PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so') +_libraries['libfpga_mgmt.so'] = ctypes.CDLL('libfpga_mgmt.so') # if local wordsize is same as target, keep ctypes pointer function. if ctypes.sizeof(ctypes.c_void_p) == 8: POINTER_T = ctypes.POINTER @@ -74,25 +73,69 @@ def __init__(self, **args): -fpga_mgmt_init = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_init +fpga_mgmt_init = _libraries['libfpga_mgmt.so'].fpga_mgmt_init fpga_mgmt_init.restype = ctypes.c_int32 fpga_mgmt_init.argtypes = [] -fpga_mgmt_close = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_close +fpga_mgmt_close = _libraries['libfpga_mgmt.so'].fpga_mgmt_close fpga_mgmt_close.restype = ctypes.c_int32 fpga_mgmt_close.argtypes = [] -fpga_mgmt_strerror = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_strerror +fpga_mgmt_strerror = _libraries['libfpga_mgmt.so'].fpga_mgmt_strerror fpga_mgmt_strerror.restype = POINTER_T(ctypes.c_char) fpga_mgmt_strerror.argtypes = [ctypes.c_int32] +fpga_mgmt_strerror_long = _libraries['libfpga_mgmt.so'].fpga_mgmt_strerror_long +fpga_mgmt_strerror_long.restype = POINTER_T(ctypes.c_char) +fpga_mgmt_strerror_long.argtypes = [ctypes.c_int32] uint32_t = ctypes.c_uint32 -fpga_mgmt_set_cmd_timeout = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_set_cmd_timeout +fpga_mgmt_set_cmd_timeout = _libraries['libfpga_mgmt.so'].fpga_mgmt_set_cmd_timeout fpga_mgmt_set_cmd_timeout.restype = None fpga_mgmt_set_cmd_timeout.argtypes = [uint32_t] -fpga_mgmt_set_cmd_delay_msec = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_set_cmd_delay_msec +fpga_mgmt_set_cmd_delay_msec = _libraries['libfpga_mgmt.so'].fpga_mgmt_set_cmd_delay_msec fpga_mgmt_set_cmd_delay_msec.restype = None fpga_mgmt_set_cmd_delay_msec.argtypes = [uint32_t] class struct_fpga_mgmt_image_info(ctypes.Structure): pass +class struct_fpga_meta_ids(ctypes.Structure): + pass + +class struct_afi_device_ids(ctypes.Structure): + _pack_ = True # source:True + _fields_ = [ + ('vendor_id', ctypes.c_uint16), + ('device_id', ctypes.c_uint16), + ('svid', ctypes.c_uint16), + ('ssid', ctypes.c_uint16), + ] + +struct_fpga_meta_ids._pack_ = True # source:True +struct_fpga_meta_ids._fields_ = [ + ('afi_id', ctypes.c_char * 64), + ('afi_device_ids', struct_afi_device_ids), +] + +class struct_fpga_slot_spec(ctypes.Structure): + pass + +class struct_fpga_pci_resource_map(ctypes.Structure): + _pack_ = True # source:True + _fields_ = [ + ('vendor_id', ctypes.c_uint16), + ('device_id', ctypes.c_uint16), + ('subsystem_device_id', ctypes.c_uint16), + ('subsystem_vendor_id', ctypes.c_uint16), + ('domain', ctypes.c_uint16), + ('bus', ctypes.c_ubyte), + ('dev', ctypes.c_ubyte), + ('func', ctypes.c_ubyte), + ('resource_burstable', ctypes.c_bool * 5), + ('resource_size', ctypes.c_uint64 * 5), + ] + +struct_fpga_slot_spec._pack_ = True # source:True +struct_fpga_slot_spec._fields_ = [ + ('map', struct_fpga_pci_resource_map * 2), +] + class struct_fpga_metrics_common(ctypes.Structure): pass @@ -135,47 +178,8 @@ class struct_fpga_clocks_common(ctypes.Structure): ('power_mean', ctypes.c_uint64), ('power_max', ctypes.c_uint64), ('power', ctypes.c_uint64), -] - -class struct_fpga_slot_spec(ctypes.Structure): - pass - -class struct_fpga_pci_resource_map(ctypes.Structure): - _pack_ = True # source:True - _fields_ = [ - ('vendor_id', ctypes.c_uint16), - ('device_id', ctypes.c_uint16), - ('subsystem_device_id', ctypes.c_uint16), - ('subsystem_vendor_id', ctypes.c_uint16), - ('domain', ctypes.c_uint16), - ('bus', ctypes.c_ubyte), - ('dev', ctypes.c_ubyte), - ('func', ctypes.c_ubyte), - ('resource_burstable', ctypes.c_bool * 5), - ('resource_size', ctypes.c_uint64 * 5), - ] - -struct_fpga_slot_spec._pack_ = True # source:True -struct_fpga_slot_spec._fields_ = [ - ('map', struct_fpga_pci_resource_map * 2), -] - -class struct_fpga_meta_ids(ctypes.Structure): - pass - -class struct_afi_device_ids(ctypes.Structure): - _pack_ = True # source:True - _fields_ = [ - ('vendor_id', ctypes.c_uint16), - ('device_id', ctypes.c_uint16), - ('svid', ctypes.c_uint16), - ('ssid', ctypes.c_uint16), - ] - -struct_fpga_meta_ids._pack_ = True # source:True -struct_fpga_meta_ids._fields_ = [ - ('afi_id', ctypes.c_char * 64), - ('afi_device_ids', struct_afi_device_ids), + ('cached_agfis', ctypes.c_uint64 * 16), + ('flags', ctypes.c_uint64), ] struct_fpga_mgmt_image_info._pack_ = True # source:False @@ -189,25 +193,25 @@ class struct_afi_device_ids(ctypes.Structure): ('metrics', struct_fpga_metrics_common), ] -fpga_mgmt_describe_local_image = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_describe_local_image +fpga_mgmt_describe_local_image = _libraries['libfpga_mgmt.so'].fpga_mgmt_describe_local_image fpga_mgmt_describe_local_image.restype = ctypes.c_int32 fpga_mgmt_describe_local_image.argtypes = [ctypes.c_int32, POINTER_T(struct_fpga_mgmt_image_info), uint32_t] -fpga_mgmt_get_status = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_get_status +fpga_mgmt_get_status = _libraries['libfpga_mgmt.so'].fpga_mgmt_get_status fpga_mgmt_get_status.restype = ctypes.c_int32 fpga_mgmt_get_status.argtypes = [ctypes.c_int32, POINTER_T(ctypes.c_int32), POINTER_T(ctypes.c_int32)] -fpga_mgmt_get_status_name = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_get_status_name +fpga_mgmt_get_status_name = _libraries['libfpga_mgmt.so'].fpga_mgmt_get_status_name fpga_mgmt_get_status_name.restype = POINTER_T(ctypes.c_char) fpga_mgmt_get_status_name.argtypes = [ctypes.c_int32] -fpga_mgmt_clear_local_image = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_clear_local_image +fpga_mgmt_clear_local_image = _libraries['libfpga_mgmt.so'].fpga_mgmt_clear_local_image fpga_mgmt_clear_local_image.restype = ctypes.c_int32 fpga_mgmt_clear_local_image.argtypes = [ctypes.c_int32] -fpga_mgmt_clear_local_image_sync = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_clear_local_image_sync +fpga_mgmt_clear_local_image_sync = _libraries['libfpga_mgmt.so'].fpga_mgmt_clear_local_image_sync fpga_mgmt_clear_local_image_sync.restype = ctypes.c_int32 fpga_mgmt_clear_local_image_sync.argtypes = [ctypes.c_int32, uint32_t, uint32_t, POINTER_T(struct_fpga_mgmt_image_info)] -fpga_mgmt_load_local_image = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_load_local_image +fpga_mgmt_load_local_image = _libraries['libfpga_mgmt.so'].fpga_mgmt_load_local_image fpga_mgmt_load_local_image.restype = ctypes.c_int32 fpga_mgmt_load_local_image.argtypes = [ctypes.c_int32, POINTER_T(ctypes.c_char)] -fpga_mgmt_load_local_image_flags = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_load_local_image_flags +fpga_mgmt_load_local_image_flags = _libraries['libfpga_mgmt.so'].fpga_mgmt_load_local_image_flags fpga_mgmt_load_local_image_flags.restype = ctypes.c_int32 fpga_mgmt_load_local_image_flags.argtypes = [ctypes.c_int32, POINTER_T(ctypes.c_char), uint32_t] class union_fpga_mgmt_load_local_image_options(ctypes.Union): @@ -230,29 +234,29 @@ class struct_fpga_mgmt_load_local_image_options_0(ctypes.Structure): ('PADDING_0', ctypes.c_ubyte * 992), ] -fpga_mgmt_init_load_local_image_options = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_init_load_local_image_options +fpga_mgmt_init_load_local_image_options = _libraries['libfpga_mgmt.so'].fpga_mgmt_init_load_local_image_options fpga_mgmt_init_load_local_image_options.restype = ctypes.c_int32 fpga_mgmt_init_load_local_image_options.argtypes = [POINTER_T(union_fpga_mgmt_load_local_image_options)] -fpga_mgmt_load_local_image_with_options = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_load_local_image_with_options +fpga_mgmt_load_local_image_with_options = _libraries['libfpga_mgmt.so'].fpga_mgmt_load_local_image_with_options fpga_mgmt_load_local_image_with_options.restype = ctypes.c_int32 fpga_mgmt_load_local_image_with_options.argtypes = [POINTER_T(union_fpga_mgmt_load_local_image_options)] -fpga_mgmt_load_local_image_sync = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_load_local_image_sync +fpga_mgmt_load_local_image_sync = _libraries['libfpga_mgmt.so'].fpga_mgmt_load_local_image_sync fpga_mgmt_load_local_image_sync.restype = ctypes.c_int32 fpga_mgmt_load_local_image_sync.argtypes = [ctypes.c_int32, POINTER_T(ctypes.c_char), uint32_t, uint32_t, POINTER_T(struct_fpga_mgmt_image_info)] -fpga_mgmt_load_local_image_sync_flags = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_load_local_image_sync_flags +fpga_mgmt_load_local_image_sync_flags = _libraries['libfpga_mgmt.so'].fpga_mgmt_load_local_image_sync_flags fpga_mgmt_load_local_image_sync_flags.restype = ctypes.c_int32 fpga_mgmt_load_local_image_sync_flags.argtypes = [ctypes.c_int32, POINTER_T(ctypes.c_char), uint32_t, uint32_t, uint32_t, POINTER_T(struct_fpga_mgmt_image_info)] -fpga_mgmt_load_local_image_sync_with_options = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_load_local_image_sync_with_options +fpga_mgmt_load_local_image_sync_with_options = _libraries['libfpga_mgmt.so'].fpga_mgmt_load_local_image_sync_with_options fpga_mgmt_load_local_image_sync_with_options.restype = ctypes.c_int32 fpga_mgmt_load_local_image_sync_with_options.argtypes = [POINTER_T(union_fpga_mgmt_load_local_image_options), uint32_t, uint32_t, POINTER_T(struct_fpga_mgmt_image_info)] -fpga_mgmt_get_vLED_status = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_get_vLED_status +fpga_mgmt_get_vLED_status = _libraries['libfpga_mgmt.so'].fpga_mgmt_get_vLED_status fpga_mgmt_get_vLED_status.restype = ctypes.c_int32 fpga_mgmt_get_vLED_status.argtypes = [ctypes.c_int32, POINTER_T(ctypes.c_uint16)] uint16_t = ctypes.c_uint16 -fpga_mgmt_set_vDIP = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_set_vDIP +fpga_mgmt_set_vDIP = _libraries['libfpga_mgmt.so'].fpga_mgmt_set_vDIP fpga_mgmt_set_vDIP.restype = ctypes.c_int32 fpga_mgmt_set_vDIP.argtypes = [ctypes.c_int32, uint16_t] -fpga_mgmt_get_vDIP_status = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_mgmt_get_vDIP_status +fpga_mgmt_get_vDIP_status = _libraries['libfpga_mgmt.so'].fpga_mgmt_get_vDIP_status fpga_mgmt_get_vDIP_status.restype = ctypes.c_int32 fpga_mgmt_get_vDIP_status.argtypes = [ctypes.c_int32, POINTER_T(ctypes.c_uint16)] @@ -263,14 +267,18 @@ class struct_fpga_mgmt_load_local_image_options_0(ctypes.Structure): 4: 'FPGA_CMD_CLEAR_HW_METRICS', 8: 'FPGA_CMD_FORCE_SHELL_RELOAD', 16: 'FPGA_CMD_DRAM_DATA_RETENTION', - 30: 'FPGA_CMD_ALL_FLAGS', + 64: 'FPGA_CMD_EXTENDED_METRICS_SIZE', + 128: 'FPGA_CMD_PREFETCH', + 222: 'FPGA_CMD_ALL_FLAGS', } FPGA_CMD_RSVD = 1 FPGA_CMD_GET_HW_METRICS = 2 FPGA_CMD_CLEAR_HW_METRICS = 4 FPGA_CMD_FORCE_SHELL_RELOAD = 8 FPGA_CMD_DRAM_DATA_RETENTION = 16 -FPGA_CMD_ALL_FLAGS = 30 +FPGA_CMD_EXTENDED_METRICS_SIZE = 64 +FPGA_CMD_PREFETCH = 128 +FPGA_CMD_ALL_FLAGS = 222 c__Ea_FPGA_CMD_RSVD = ctypes.c_int # enum # values for enumeration 'c__Ea_FPGA_ERR_OK' @@ -285,9 +293,14 @@ class struct_fpga_mgmt_load_local_image_options_0(ctypes.Structure): 16: 'FPGA_ERR_SHELL_MISMATCH', 17: 'FPGA_ERR_POWER_VIOLATION', 18: 'FPGA_ERR_DRAM_DATA_RETENTION_NOT_POSSIBLE', + 19: 'FPGA_ERR_HARDWARE_BUSY', + 20: 'FPGA_ERR_PCI_MISSING', + 21: 'FPGA_ERR_AFI_CMD_MALFORMED', 22: 'FPGA_ERR_DRAM_DATA_RETENTION_FAILED', 23: 'FPGA_ERR_DRAM_DATA_RETENTION_SETUP_FAILED', - 24: 'FPGA_ERR_END', + 24: 'FPGA_ERR_SOFTWARE_PROBLEM', + 25: 'FPGA_ERR_UNRESPONSIVE', + 26: 'FPGA_ERR_END', } FPGA_ERR_OK = 0 FPGA_ERR_AFI_CMD_BUSY = 3 @@ -299,9 +312,14 @@ class struct_fpga_mgmt_load_local_image_options_0(ctypes.Structure): FPGA_ERR_SHELL_MISMATCH = 16 FPGA_ERR_POWER_VIOLATION = 17 FPGA_ERR_DRAM_DATA_RETENTION_NOT_POSSIBLE = 18 +FPGA_ERR_HARDWARE_BUSY = 19 +FPGA_ERR_PCI_MISSING = 20 +FPGA_ERR_AFI_CMD_MALFORMED = 21 FPGA_ERR_DRAM_DATA_RETENTION_FAILED = 22 FPGA_ERR_DRAM_DATA_RETENTION_SETUP_FAILED = 23 -FPGA_ERR_END = 24 +FPGA_ERR_SOFTWARE_PROBLEM = 24 +FPGA_ERR_UNRESPONSIVE = 25 +FPGA_ERR_END = 26 c__Ea_FPGA_ERR_OK = ctypes.c_int # enum # values for enumeration 'c__Ea_FPGA_STATUS_LOADED' @@ -412,15 +430,19 @@ class struct_fpga_common_cfg(ctypes.Structure): __all__ = \ ['APP_PF_BAR0', 'APP_PF_BAR1', 'APP_PF_BAR4', 'APP_PF_BAR_MAX', 'FPGA_APP_PF', 'FPGA_CMD_ALL_FLAGS', 'FPGA_CMD_CLEAR_HW_METRICS', - 'FPGA_CMD_DRAM_DATA_RETENTION', 'FPGA_CMD_FORCE_SHELL_RELOAD', - 'FPGA_CMD_GET_HW_METRICS', 'FPGA_CMD_RSVD', + 'FPGA_CMD_DRAM_DATA_RETENTION', 'FPGA_CMD_EXTENDED_METRICS_SIZE', + 'FPGA_CMD_FORCE_SHELL_RELOAD', 'FPGA_CMD_GET_HW_METRICS', + 'FPGA_CMD_PREFETCH', 'FPGA_CMD_RSVD', 'FPGA_ERR_AFI_CMD_API_VERSION_INVALID', 'FPGA_ERR_AFI_CMD_BUSY', - 'FPGA_ERR_AFI_ID_INVALID', 'FPGA_ERR_CL_DDR_CALIB_FAILED', - 'FPGA_ERR_CL_ID_MISMATCH', 'FPGA_ERR_DRAM_DATA_RETENTION_FAILED', + 'FPGA_ERR_AFI_CMD_MALFORMED', 'FPGA_ERR_AFI_ID_INVALID', + 'FPGA_ERR_CL_DDR_CALIB_FAILED', 'FPGA_ERR_CL_ID_MISMATCH', + 'FPGA_ERR_DRAM_DATA_RETENTION_FAILED', 'FPGA_ERR_DRAM_DATA_RETENTION_NOT_POSSIBLE', 'FPGA_ERR_DRAM_DATA_RETENTION_SETUP_FAILED', 'FPGA_ERR_END', - 'FPGA_ERR_FAIL', 'FPGA_ERR_OK', 'FPGA_ERR_POWER_VIOLATION', - 'FPGA_ERR_SHELL_MISMATCH', 'FPGA_INT_STATUS_ALL', + 'FPGA_ERR_FAIL', 'FPGA_ERR_HARDWARE_BUSY', 'FPGA_ERR_OK', + 'FPGA_ERR_PCI_MISSING', 'FPGA_ERR_POWER_VIOLATION', + 'FPGA_ERR_SHELL_MISMATCH', 'FPGA_ERR_SOFTWARE_PROBLEM', + 'FPGA_ERR_UNRESPONSIVE', 'FPGA_INT_STATUS_ALL', 'FPGA_INT_STATUS_BAR1_SLAVE_TIMEOUT', 'FPGA_INT_STATUS_DMA_PCI_SLAVE_TIMEOUT', 'FPGA_INT_STATUS_OCL_SLAVE_TIMEOUT', @@ -453,10 +475,10 @@ class struct_fpga_common_cfg(ctypes.Structure): 'fpga_mgmt_load_local_image_with_options', 'fpga_mgmt_set_cmd_delay_msec', 'fpga_mgmt_set_cmd_timeout', 'fpga_mgmt_set_vDIP', 'fpga_mgmt_strerror', - 'struct_afi_device_ids', 'struct_fpga_clocks_common', - 'struct_fpga_common_cfg', 'struct_fpga_ddr_if_metrics_common', - 'struct_fpga_meta_ids', 'struct_fpga_metrics_common', - 'struct_fpga_mgmt_image_info', + 'fpga_mgmt_strerror_long', 'struct_afi_device_ids', + 'struct_fpga_clocks_common', 'struct_fpga_common_cfg', + 'struct_fpga_ddr_if_metrics_common', 'struct_fpga_meta_ids', + 'struct_fpga_metrics_common', 'struct_fpga_mgmt_image_info', 'struct_fpga_mgmt_load_local_image_options_0', 'struct_fpga_pci_resource_map', 'struct_fpga_slot_spec', 'uint16_t', 'uint32_t', diff --git a/sdk/userspace/python_bindings/fpga_pci.py b/sdk/userspace/python_bindings/fpga_pci.py index d0748a2f..46bd8e88 100644 --- a/sdk/userspace/python_bindings/fpga_pci.py +++ b/sdk/userspace/python_bindings/fpga_pci.py @@ -12,7 +12,6 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # - # Python bindings for pci library # -*- coding: utf-8 -*- # @@ -24,7 +23,7 @@ _libraries = {} -_libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'] = ctypes.CDLL('PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so') +_libraries['libfpga_mgmt.so'] = ctypes.CDLL('libfpga_mgmt.so') # if local wordsize is same as target, keep ctypes pointer function. if ctypes.sizeof(ctypes.c_void_p) == 8: POINTER_T = ctypes.POINTER @@ -75,11 +74,11 @@ def __init__(self, **args): pci_bar_handle_t = ctypes.c_int32 -fpga_pci_init = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_init +fpga_pci_init = _libraries['libfpga_mgmt.so'].fpga_pci_init fpga_pci_init.restype = ctypes.c_int32 fpga_pci_init.argtypes = [] uint32_t = ctypes.c_uint32 -fpga_pci_attach = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_attach +fpga_pci_attach = _libraries['libfpga_mgmt.so'].fpga_pci_attach fpga_pci_attach.restype = ctypes.c_int32 fpga_pci_attach.argtypes = [ctypes.c_int32, ctypes.c_int32, ctypes.c_int32, uint32_t, POINTER_T(ctypes.c_int32)] @@ -91,36 +90,36 @@ def __init__(self, **args): BURST_CAPABLE = 1 FPGA_ATTACH_RESERVED = 4294967294 c__Ea_BURST_CAPABLE = ctypes.c_int # enum -fpga_pci_detach = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_detach +fpga_pci_detach = _libraries['libfpga_mgmt.so'].fpga_pci_detach fpga_pci_detach.restype = ctypes.c_int32 fpga_pci_detach.argtypes = [pci_bar_handle_t] uint64_t = ctypes.c_uint64 -fpga_pci_poke = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_poke +fpga_pci_poke = _libraries['libfpga_mgmt.so'].fpga_pci_poke fpga_pci_poke.restype = ctypes.c_int32 fpga_pci_poke.argtypes = [pci_bar_handle_t, uint64_t, uint32_t] uint8_t = ctypes.c_uint8 -fpga_pci_poke8 = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_poke8 +fpga_pci_poke8 = _libraries['libfpga_mgmt.so'].fpga_pci_poke8 fpga_pci_poke8.restype = ctypes.c_int32 fpga_pci_poke8.argtypes = [pci_bar_handle_t, uint64_t, uint8_t] -fpga_pci_poke64 = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_poke64 +fpga_pci_poke64 = _libraries['libfpga_mgmt.so'].fpga_pci_poke64 fpga_pci_poke64.restype = ctypes.c_int32 fpga_pci_poke64.argtypes = [pci_bar_handle_t, uint64_t, uint64_t] -fpga_pci_write_burst = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_write_burst +fpga_pci_write_burst = _libraries['libfpga_mgmt.so'].fpga_pci_write_burst fpga_pci_write_burst.restype = ctypes.c_int32 fpga_pci_write_burst.argtypes = [pci_bar_handle_t, uint64_t, POINTER_T(ctypes.c_uint32), uint64_t] -fpga_pci_peek = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_peek +fpga_pci_peek = _libraries['libfpga_mgmt.so'].fpga_pci_peek fpga_pci_peek.restype = ctypes.c_int32 fpga_pci_peek.argtypes = [pci_bar_handle_t, uint64_t, POINTER_T(ctypes.c_uint32)] -fpga_pci_peek8 = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_peek8 +fpga_pci_peek8 = _libraries['libfpga_mgmt.so'].fpga_pci_peek8 fpga_pci_peek8.restype = ctypes.c_int32 fpga_pci_peek8.argtypes = [pci_bar_handle_t, uint64_t, POINTER_T(ctypes.c_ubyte)] -fpga_pci_peek64 = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_peek64 +fpga_pci_peek64 = _libraries['libfpga_mgmt.so'].fpga_pci_peek64 fpga_pci_peek64.restype = ctypes.c_int32 fpga_pci_peek64.argtypes = [pci_bar_handle_t, uint64_t, POINTER_T(ctypes.c_uint64)] class struct_fpga_slot_spec(ctypes.Structure): pass -fpga_pci_get_slot_spec = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_get_slot_spec +fpga_pci_get_slot_spec = _libraries['libfpga_mgmt.so'].fpga_pci_get_slot_spec fpga_pci_get_slot_spec.restype = ctypes.c_int32 fpga_pci_get_slot_spec.argtypes = [ctypes.c_int32, POINTER_T(struct_fpga_slot_spec)] class struct_fpga_pci_resource_map(ctypes.Structure): @@ -143,21 +142,22 @@ class struct_fpga_pci_resource_map(ctypes.Structure): ('map', struct_fpga_pci_resource_map * 2), ] -fpga_pci_get_all_slot_specs = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_get_all_slot_specs +fpga_pci_get_all_slot_specs = _libraries['libfpga_mgmt.so'].fpga_pci_get_all_slot_specs fpga_pci_get_all_slot_specs.restype = ctypes.c_int32 fpga_pci_get_all_slot_specs.argtypes = [struct_fpga_slot_spec * 0, ctypes.c_int32] -fpga_pci_get_resource_map = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_get_resource_map +fpga_pci_get_resource_map = _libraries['libfpga_mgmt.so'].fpga_pci_get_resource_map fpga_pci_get_resource_map.restype = ctypes.c_int32 fpga_pci_get_resource_map.argtypes = [ctypes.c_int32, ctypes.c_int32, POINTER_T(struct_fpga_pci_resource_map)] -fpga_pci_rescan_slot_app_pfs = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_rescan_slot_app_pfs +fpga_pci_rescan_slot_app_pfs = _libraries['libfpga_mgmt.so'].fpga_pci_rescan_slot_app_pfs fpga_pci_rescan_slot_app_pfs.restype = ctypes.c_int32 fpga_pci_rescan_slot_app_pfs.argtypes = [ctypes.c_int32] -fpga_pci_get_address = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_get_address +fpga_pci_get_address = _libraries['libfpga_mgmt.so'].fpga_pci_get_address fpga_pci_get_address.restype = ctypes.c_int32 fpga_pci_get_address.argtypes = [pci_bar_handle_t, uint64_t, uint64_t, POINTER_T(POINTER_T(None))] -fpga_pci_memset = _libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'].fpga_pci_memset +fpga_pci_memset = _libraries['libfpga_mgmt.so'].fpga_pci_memset fpga_pci_memset.restype = ctypes.c_int32 fpga_pci_memset.argtypes = [pci_bar_handle_t, uint64_t, uint32_t, uint64_t] + class union_c__UA_pthread_mutex_t(ctypes.Union): pass @@ -193,7 +193,14 @@ class struct___pthread_internal_list(ctypes.Structure): ('PADDING_0', ctypes.c_ubyte * 32), ] -fpga_pci_readdir_mutex = (union_c__UA_pthread_mutex_t).in_dll(_libraries['PY_BIND_AFI_MGMT_LIBS_DST_DIR/libfpga_mgmt.so'], 'fpga_pci_readdir_mutex') + +fpga_pci_readdir_mutex = None # Variable union_c__UA_pthread_mutex_t +fpga_acquire_readdir_lock = _libraries['libfpga_mgmt.so'].fpga_acquire_readdir_lock +fpga_acquire_readdir_lock.restype = ctypes.c_int32 +fpga_acquire_readdir_lock.argtypes = [] +fpga_release_readdir_lock = _libraries['libfpga_mgmt.so'].fpga_release_readdir_lock +fpga_release_readdir_lock.restype = ctypes.c_int32 +fpga_release_readdir_lock.argtypes = [] # values for enumeration 'c__Ea_FPGA_CMD_RSVD' c__Ea_FPGA_CMD_RSVD__enumvalues = { @@ -202,14 +209,18 @@ class struct___pthread_internal_list(ctypes.Structure): 4: 'FPGA_CMD_CLEAR_HW_METRICS', 8: 'FPGA_CMD_FORCE_SHELL_RELOAD', 16: 'FPGA_CMD_DRAM_DATA_RETENTION', - 30: 'FPGA_CMD_ALL_FLAGS', + 64: 'FPGA_CMD_EXTENDED_METRICS_SIZE', + 128: 'FPGA_CMD_PREFETCH', + 222: 'FPGA_CMD_ALL_FLAGS', } FPGA_CMD_RSVD = 1 FPGA_CMD_GET_HW_METRICS = 2 FPGA_CMD_CLEAR_HW_METRICS = 4 FPGA_CMD_FORCE_SHELL_RELOAD = 8 FPGA_CMD_DRAM_DATA_RETENTION = 16 -FPGA_CMD_ALL_FLAGS = 30 +FPGA_CMD_EXTENDED_METRICS_SIZE = 64 +FPGA_CMD_PREFETCH = 128 +FPGA_CMD_ALL_FLAGS = 222 c__Ea_FPGA_CMD_RSVD = ctypes.c_int # enum # values for enumeration 'c__Ea_FPGA_ERR_OK' @@ -224,9 +235,14 @@ class struct___pthread_internal_list(ctypes.Structure): 16: 'FPGA_ERR_SHELL_MISMATCH', 17: 'FPGA_ERR_POWER_VIOLATION', 18: 'FPGA_ERR_DRAM_DATA_RETENTION_NOT_POSSIBLE', + 19: 'FPGA_ERR_HARDWARE_BUSY', + 20: 'FPGA_ERR_PCI_MISSING', + 21: 'FPGA_ERR_AFI_CMD_MALFORMED', 22: 'FPGA_ERR_DRAM_DATA_RETENTION_FAILED', 23: 'FPGA_ERR_DRAM_DATA_RETENTION_SETUP_FAILED', - 24: 'FPGA_ERR_END', + 24: 'FPGA_ERR_SOFTWARE_PROBLEM', + 25: 'FPGA_ERR_UNRESPONSIVE', + 26: 'FPGA_ERR_END', } FPGA_ERR_OK = 0 FPGA_ERR_AFI_CMD_BUSY = 3 @@ -238,9 +254,14 @@ class struct___pthread_internal_list(ctypes.Structure): FPGA_ERR_SHELL_MISMATCH = 16 FPGA_ERR_POWER_VIOLATION = 17 FPGA_ERR_DRAM_DATA_RETENTION_NOT_POSSIBLE = 18 +FPGA_ERR_HARDWARE_BUSY = 19 +FPGA_ERR_PCI_MISSING = 20 +FPGA_ERR_AFI_CMD_MALFORMED = 21 FPGA_ERR_DRAM_DATA_RETENTION_FAILED = 22 FPGA_ERR_DRAM_DATA_RETENTION_SETUP_FAILED = 23 -FPGA_ERR_END = 24 +FPGA_ERR_SOFTWARE_PROBLEM = 24 +FPGA_ERR_UNRESPONSIVE = 25 +FPGA_ERR_END = 26 c__Ea_FPGA_ERR_OK = ctypes.c_int # enum # values for enumeration 'c__Ea_FPGA_STATUS_LOADED' @@ -358,6 +379,8 @@ class struct_fpga_metrics_common(ctypes.Structure): ('power_mean', ctypes.c_uint64), ('power_max', ctypes.c_uint64), ('power', ctypes.c_uint64), + ('cached_agfis', ctypes.c_uint64 * 16), + ('flags', ctypes.c_uint64), ] @@ -406,20 +429,63 @@ class struct_fpga_metrics_common(ctypes.Structure): FPGA_PAP_WCHANNEL_TIMEOUT_ERROR = 1024 FPGA_PAP_ERROR_STATUS_ALL = 1918 c__Ea_FPGA_PAP_4K_CROSS_ERROR = ctypes.c_int # enum +class union_c__UA_pthread_mutex_t(ctypes.Union): + pass + +class struct___pthread_mutex_s(ctypes.Structure): + pass + +class struct___pthread_internal_list(ctypes.Structure): + pass + +struct___pthread_internal_list._pack_ = True # source:False +struct___pthread_internal_list._fields_ = [ + ('__prev', POINTER_T(struct___pthread_internal_list)), + ('__next', POINTER_T(struct___pthread_internal_list)), +] + +struct___pthread_mutex_s._pack_ = True # source:False +struct___pthread_mutex_s._fields_ = [ + ('__lock', ctypes.c_int32), + ('__count', ctypes.c_uint32), + ('__owner', ctypes.c_int32), + ('__nusers', ctypes.c_uint32), + ('__kind', ctypes.c_int32), + ('__spins', ctypes.c_int16), + ('__elision', ctypes.c_int16), + ('__list', struct___pthread_internal_list), +] + +union_c__UA_pthread_mutex_t._pack_ = True # source:False +union_c__UA_pthread_mutex_t._fields_ = [ + ('__data', struct___pthread_mutex_s), + ('__size', ctypes.c_char * 40), + ('__align', ctypes.c_int64), + ('PADDING_0', ctypes.c_ubyte * 32), +] + __all__ = \ ['APP_PF_BAR0', 'APP_PF_BAR1', 'APP_PF_BAR4', 'APP_PF_BAR_MAX', 'BURST_CAPABLE', 'FPGA_APP_PF', 'FPGA_ATTACH_RESERVED', 'FPGA_CMD_ALL_FLAGS', 'FPGA_CMD_CLEAR_HW_METRICS', - 'FPGA_CMD_DRAM_DATA_RETENTION', 'FPGA_CMD_FORCE_SHELL_RELOAD', - 'FPGA_CMD_GET_HW_METRICS', 'FPGA_CMD_RSVD', + 'FPGA_CMD_DRAM_DATA_RETENTION', 'FPGA_CMD_EXTENDED_METRICS_SIZE', + 'FPGA_CMD_FORCE_SHELL_RELOAD', 'FPGA_CMD_GET_HW_METRICS', + 'FPGA_CMD_PREFETCH', 'FPGA_CMD_RSVD', 'FPGA_ERR_AFI_CMD_API_VERSION_INVALID', 'FPGA_ERR_AFI_CMD_BUSY', - 'FPGA_ERR_AFI_ID_INVALID', 'FPGA_ERR_CL_DDR_CALIB_FAILED', - 'FPGA_ERR_CL_ID_MISMATCH', 'FPGA_ERR_DRAM_DATA_RETENTION_FAILED', + 'FPGA_ERR_AFI_CMD_MALFORMED', 'FPGA_ERR_AFI_ID_INVALID', + 'FPGA_ERR_CL_DDR_CALIB_FAILED', 'FPGA_ERR_CL_ID_MISMATCH', + 'FPGA_ERR_DRAM_DATA_RETENTION_FAILED', 'FPGA_ERR_DRAM_DATA_RETENTION_NOT_POSSIBLE', 'FPGA_ERR_DRAM_DATA_RETENTION_SETUP_FAILED', 'FPGA_ERR_END', - 'FPGA_ERR_FAIL', 'FPGA_ERR_OK', 'FPGA_ERR_POWER_VIOLATION', - 'FPGA_ERR_SHELL_MISMATCH', 'FPGA_INT_STATUS_ALL', + 'FPGA_ERR_FAIL', 'FPGA_ERR_HARDWARE_BUSY', 'FPGA_ERR_OK', + 'FPGA_ERR_PCI_MISSING', 'FPGA_ERR_POWER_VIOLATION', + 'FPGA_ERR_SHELL_MISMATCH', 'FPGA_ERR_SOFTWARE_PROBLEM', + 'FPGA_ERR_UNRESPONSIVE', 'FPGA_INT_STATUS_ALL', 'FPGA_INT_STATUS_BAR1_SLAVE_TIMEOUT', + 'FPGA_ERR_FAIL', 'FPGA_ERR_OK', 'FPGA_ERR_PCI_MISSING', + 'FPGA_ERR_POWER_VIOLATION', 'FPGA_ERR_SHELL_MISMATCH', + 'FPGA_ERR_SOFTWARE_PROBLEM', 'FPGA_ERR_UNRESPONSIVE', + 'FPGA_INT_STATUS_ALL', 'FPGA_INT_STATUS_BAR1_SLAVE_TIMEOUT', 'FPGA_INT_STATUS_DMA_PCI_SLAVE_TIMEOUT', 'FPGA_INT_STATUS_OCL_SLAVE_TIMEOUT', 'FPGA_INT_STATUS_PCI_MASTER_AXI_PROTOCOL_ERROR', @@ -439,13 +505,14 @@ class struct_fpga_metrics_common(ctypes.Structure): 'c__Ea_FPGA_CMD_RSVD', 'c__Ea_FPGA_ERR_OK', 'c__Ea_FPGA_INT_STATUS_SDACL_SLAVE_TIMEOUT', 'c__Ea_FPGA_PAP_4K_CROSS_ERROR', 'c__Ea_FPGA_STATUS_LOADED', - 'c__Ea_MGMT_PF_BAR0', 'fpga_pci_attach', 'fpga_pci_detach', - 'fpga_pci_get_address', 'fpga_pci_get_all_slot_specs', - 'fpga_pci_get_resource_map', 'fpga_pci_get_slot_spec', - 'fpga_pci_init', 'fpga_pci_memset', 'fpga_pci_peek', - 'fpga_pci_peek64', 'fpga_pci_peek8', 'fpga_pci_poke', - 'fpga_pci_poke64', 'fpga_pci_poke8', 'fpga_pci_readdir_mutex', - 'fpga_pci_rescan_slot_app_pfs', 'fpga_pci_write_burst', + 'c__Ea_MGMT_PF_BAR0', 'fpga_acquire_readdir_lock', + 'fpga_pci_attach', 'fpga_pci_detach', 'fpga_pci_get_address', + 'fpga_pci_get_all_slot_specs', 'fpga_pci_get_resource_map', + 'fpga_pci_get_slot_spec', 'fpga_pci_init', 'fpga_pci_memset', + 'fpga_pci_peek', 'fpga_pci_peek64', 'fpga_pci_peek8', + 'fpga_pci_poke', 'fpga_pci_poke64', 'fpga_pci_poke8', + 'fpga_pci_readdir_mutex', 'fpga_pci_rescan_slot_app_pfs', + 'fpga_pci_write_burst', 'fpga_release_readdir_lock', 'pci_bar_handle_t', 'struct___pthread_internal_list', 'struct___pthread_mutex_s', 'struct_afi_device_ids', 'struct_fpga_clocks_common', 'struct_fpga_common_cfg', diff --git a/shared/bin/set_common_env_vars.sh b/shared/bin/set_common_env_vars.sh index 27a8f4f6..52a07815 100644 --- a/shared/bin/set_common_env_vars.sh +++ b/shared/bin/set_common_env_vars.sh @@ -61,6 +61,9 @@ export SDK_DIR=$AWS_FPGA_REPO_DIR/sdk # SDACCEL # Setup Location of SDACCEL_DIR export SDACCEL_DIR=$AWS_FPGA_REPO_DIR/SDAccel +# Vitis +# Setup Location of VITIS_DIR +export VITIS_DIR=$AWS_FPGA_REPO_DIR/Vitis # PYTHONPATH # Update PYTHONPATH with libraries used for unit testing @@ -72,3 +75,6 @@ PYTHONPATH=$python_lib:$PYTHONPATH export PATH=$(echo $PATH | sed -e 's/\(^\|:\)[^:]\+\/shared\/bin\/scripts\(:\|$\)/:/g; s/^://; s/:$//') PATH=$AWS_FPGA_REPO_DIR/shared/bin/scripts:$PATH + +# Enable xilinx licensing +export XILINX_ENABLE_AWS_WHITELIST=095707098027 diff --git a/shared/bin/set_common_functions.sh b/shared/bin/set_common_functions.sh index 519b5e83..fe31c60b 100644 --- a/shared/bin/set_common_functions.sh +++ b/shared/bin/set_common_functions.sh @@ -41,12 +41,22 @@ function is_myvivado_set { fi } - -function is_vivado_available { - if ! vivado -version > /dev/null 2>&1 ; then +function is_xilinx_path_set { + if env | grep -q ^XILINX_PATH + then + true + else false + fi +} + +# Function to check whether a command exists. +exists() { + if command -v $1 >/dev/null 2>&1 + then + return 0 else - true + return 1 fi } @@ -61,8 +71,13 @@ function get_base_vivado_version { unset MYVIVADO local __vivado_version=$(get_vivado_version) export MYVIVADO=$MYVIVADO_ENV_VAR_BACKUP + elif is_xilinx_path_set + then + local XILINX_PATH_ENV_VAR_BACKUP=$XILINX_PATH + unset XILINX_PATH + local __vivado_version=$(get_vivado_version) + export XILINX_PATH=$XILINX_PATH_ENV_VAR_BACKUP else - local __vivado_version=$(get_vivado_version) fi @@ -100,7 +115,20 @@ function get_vivado_version { } function setup_patches { + local caller_script="${BASH_SOURCE[1]}" patch_AR71715 + patch_AR73068 "$caller_script" +} + +function is_patch_applied { + local patch_name="$1" + local long_vivado_version=$(get_vivado_version) + + if [[ "$long_vivado_version" =~ .*"$patch_name".* ]]; then + true + else + false + fi } function patch_AR71715 { @@ -117,7 +145,7 @@ function patch_AR71715 { local base_vivado_version=$(get_base_vivado_version) is_patch_valid=false - info_msg "Base vivado version is $base_vivado_version ; Checking if patch AR71715 needs to be installed" + info_msg "Base vivado version is $base_vivado_version. Checking if patch AR71715 needs to be installed" for vivado_version in "${valid_vivado_versions[@]}" do if [ ":$vivado_version" == ":$base_vivado_version" ]; then @@ -150,6 +178,124 @@ function patch_AR71715 { fi } +function install_patch { + local patch_name="$1" + local patch_bucket="$2" + local patch_object="$3" + local patch_dir_name="${patch_object%.*}" + + if is_patch_applied $patch_name + then + info_msg "$patch_name is already applied. Skipping." + else + info_msg "Applying $patch_name" + info_msg "in bucket $patch_bucket" + info_msg "object $patch_object" + + # Checking if the patches directory exists and making it if it doesn't + [ -d $script_dir/patches ] || mkdir -p $script_dir/patches + + info_msg "Downloading the $patch_name from $patch_bucket/$patch_object." + debug_msg "curl -s $patch_bucket/$patch_object -o $script_dir/patches/$patch_object" + + curl -s $patch_bucket/$patch_object -o $script_dir/patches/$patch_object || { err_msg "Failed to download Patch $object from $patch_bucket/$patch_object"; return 2; } + + info_msg "Extracting the $patch_name to $script_dir/patches/$patch_dir_name." + + unzip -q -o $script_dir/patches/$patch_object -d $script_dir/patches/$patch_dir_name || { err_msg "Failed to extract $script_dir/patches/$patch_object to $script_dir/patches/$patch_dir_name"; return 2; } + + # XILINX_PATH should not have AR73068 at this point. + info_msg "Appending XILINX_PATH with $script_dir/patches/$patch_dir_name/vivado" + + export XILINX_PATH=$XILINX_PATH:$script_dir/patches/$patch_dir_name/vivado + fi +} + +function fix_patch_vitis_AR73068_2019_2 { + local patch_object="$1" + local patch_dir_name="${patch_object%.*}" + pushd patches/$patch_dir_name + + sed -i '/.*checksum.*/d' ./vivado/data/ip/xilinx/ddr4_v2_2/component.xml + sed -i 's/coreRevision>73068/coreRevision>8/' ./vivado/data/ip/xilinx/ddr4_v2_2/component.xml + popd +} + +function patch_AR73068_2019_2 { + info_msg "Patching Vivado/Vitis 2019.2 with Xilinx Patch AR73068" + local fix_patch="$1" + local patch_bucket="https://aws-fpga-developer-ami.s3.amazonaws.com/1.8.0/Patches/AR73068" + local patch_object="AR73068_Vivado_2019_2_preliminary_rev1.zip" + + install_patch "AR73068" "$patch_bucket" "$patch_object" + + if [[ "$fix_patch" == true ]]; then + info_msg "Fixing Patch AR73068 for Vitis" + fix_patch_vitis_AR73068_2019_2 "$patch_object" + fi +} + +function patch_AR73068_2019_1 { + info_msg "Patching Vivado 2019.1 with Xilinx Patch AR73068" + + local patch_bucket="https://aws-fpga-developer-ami.s3.amazonaws.com/1.7.0/Patches/AR73068" + local patch_object="AR73068_Vivado_2019_1_preliminary_rev1.zip" + + install_patch "AR73068" "$patch_bucket" "$patch_object" +} + +function patch_AR73068_2018_3 { + info_msg "Patching Vivado 2018.3 with Xilinx Patch AR73068" + + local patch_bucket="https://aws-fpga-developer-ami.s3.amazonaws.com/1.6.0/Patches/AR73068" + local patch_object="AR73068_Vivado_2018_3_preliminary_rev1.zip" + + install_patch "AR73068" "$patch_bucket" "$patch_object" +} + +function patch_AR73068_2018_2 { + info_msg "Patching Vivado 2018.2 with Xilinx Patch AR73068" + + local patch_bucket="https://aws-fpga-developer-ami.s3.amazonaws.com/1.5.0/Patches/AR73068" + local patch_object="AR73068_Vivado_2018_2_preliminary_rev1.zip" + + install_patch "AR73068" "$patch_bucket" "$patch_object" +} + +function patch_AR73068_2017_4 { + info_msg "Patching Vivado 2017.4 with Xilinx Patch AR73068" + + local patch_bucket="https://aws-fpga-developer-ami.s3.amazonaws.com/1.4.0/Patches/AR73068" + local patch_object="AR73068_Vivado_2017_4_preliminary_rev2.zip" + + install_patch "AR73068" "$patch_bucket" "$patch_object" +} + +function patch_AR73068 { + local base_vivado_version=$(get_base_vivado_version) + local caller_script="$1" + local fix_patch=false + + # Vitis specific changes + if [[ "$caller_script" =~ "vitis_setup.sh" ]]; then + info_msg "Patching Vitis with AR73068" + fix_patch=true + fi + + if [[ "${base_vivado_version}" =~ "Vivado v2019.2" ]]; then + patch_AR73068_2019_2 "$fix_patch" + elif [[ "${base_vivado_version}" =~ "Vivado v2019.1" ]]; then + patch_AR73068_2019_1 + elif [[ "${base_vivado_version}" =~ "Vivado v2018.3" ]]; then + patch_AR73068_2018_3 + elif [[ "${base_vivado_version}" =~ "Vivado v2018.2" ]]; then + patch_AR73068_2018_2 + elif [[ "${base_vivado_version}" =~ "Vivado v2017.4" ]]; then + patch_AR73068_2017_4 + else + info_msg "Xilinx Patch AR73068 not applicable for Vivado version: ${base_vivado_version}." + fi +} function allow_non_root { [ ! -z ${AWS_FPGA_ALLOW_NON_ROOT} ] diff --git a/shared/lib/aws_fpga_test_utils/AwsFpgaTestBase.py b/shared/lib/aws_fpga_test_utils/AwsFpgaTestBase.py index 3fe20875..861d72d7 100644 --- a/shared/lib/aws_fpga_test_utils/AwsFpgaTestBase.py +++ b/shared/lib/aws_fpga_test_utils/AwsFpgaTestBase.py @@ -66,6 +66,7 @@ class AwsFpgaTestBase(object): git_repo_dir = get_git_repo_root(dirname(__file__)) WORKSPACE = git_repo_dir + ADD_BATCH = False ADD_SIMULATOR = False ADD_EXAMPLEPATH = False ADD_RTENAME = False @@ -89,6 +90,12 @@ def setup_class(cls, derived_cls, filename_of_test_class): AwsFpgaTestBase.xilinx_sdaccel_examples_dir = AwsFpgaTestBase.git_repo_dir + "/" + AwsFpgaTestBase.xilinx_sdaccel_examples_prefix_path AwsFpgaTestBase.xilinx_sdaccel_examples_list_file = AwsFpgaTestBase.WORKSPACE + "/sdaccel_examples_list.json" + # Vitis locations + # Need to move to either a config file somewhere or a subclass + AwsFpgaTestBase.xilinx_vitis_examples_prefix_path = "Vitis/examples/xilinx" + AwsFpgaTestBase.xilinx_vitis_examples_dir = AwsFpgaTestBase.git_repo_dir + "/" + AwsFpgaTestBase.xilinx_vitis_examples_prefix_path + AwsFpgaTestBase.xilinx_vitis_examples_list_file = AwsFpgaTestBase.WORKSPACE + "/vitis_examples_list.json" + if 'WORKSPACE' in os.environ: assert os.environ['WORKSPACE'] == AwsFpgaTestBase.git_repo_dir, "WORKSPACE incorrect" else: @@ -135,6 +142,17 @@ def assert_sdaccel_setup(): assert os.environ.get('AWS_PLATFORM') != 'None', "Environment Var AWS_PLATFORM not set. source {}/sdaccel_setup.sh".format(AwsFpgaTestBase.git_repo_dir) assert os.environ.get('XILINX_SDX') != 'None', "Environment Var XILINX_SDX not set. Please check the AMI." + @staticmethod + def assert_vitis_setup(): + assert 'AWS_FPGA_REPO_DIR' in os.environ, "AWS_FPGA_REPO_DIR not set. source {}/vitis_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ['AWS_FPGA_REPO_DIR'] == AwsFpgaTestBase.git_repo_dir, "AWS_FPGA_REPO_DIR not set to the repo dir. source {}/vitis_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert 'SDK_DIR' in os.environ, "SDK_DIR not set. source {}/vitis_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ['SDK_DIR'] == os.path.join(AwsFpgaTestBase.git_repo_dir, 'sdk'), "SDK_DIR incorrect. source {}/vitis_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert 'VITIS_DIR' in os.environ, "VITIS_DIR not set. source {}/vitis_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ['VITIS_DIR'] == os.path.join(AwsFpgaTestBase.git_repo_dir, 'Vitis'), "VITIS_DIR incorrect. source {}/vitis_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ.get('AWS_PLATFORM') != 'None', "Environment Var AWS_PLATFORM not set. source {}/vitis_setup.sh".format(AwsFpgaTestBase.git_repo_dir) + assert os.environ.get('XILINX_VITIS') != 'None', "Environment Var XILINX_VITIS not set. Please check the AMI." + @staticmethod def running_on_f1_instance(): ''' @@ -190,6 +208,12 @@ def run_sdaccel_cmd(cmd, echo=False, check=True): cmd = source_sdaccel_cmd + " && " + cmd return AwsFpgaTestBase.run_cmd(cmd, echo, check) + @staticmethod + def run_vitis_cmd(cmd, echo=False, check=True): + source_vitis_cmd = "source {}/vitis_setup.sh &> /dev/null".format(AwsFpgaTestBase.git_repo_dir) + cmd = source_vitis_cmd + " && " + cmd + return AwsFpgaTestBase.run_cmd(cmd, echo, check) + @staticmethod def get_shell_version(): shell_link = os.path.join(AwsFpgaTestBase.WORKSPACE, 'hdk/common/shell_stable') @@ -237,6 +261,10 @@ def get_cl_s3_afi_tag(cl, option_tag, xilinxVersion): def get_sdaccel_xclbin_dir(examplePath): return os.path.join(AwsFpgaTestBase.get_sdaccel_example_fullpath(examplePath=examplePath), 'xclbin') + @staticmethod + def get_vitis_xclbin_dir(examplePath, target='hw'): + return os.path.join(AwsFpgaTestBase.get_sdaccel_example_fullpath(examplePath=examplePath), "build_dir.{}.xilinx_aws-vu9p-f1_shell-v04261818_201920_2".format(target)) + @staticmethod def get_sdaccel_example_s3_root_tag(examplePath, target, rteName, xilinxVersion): ''' @@ -252,6 +280,21 @@ def get_sdaccel_example_s3_root_tag(examplePath, target, rteName, xilinxVersion) example_relative_path = os.path.relpath(examplePath, AwsFpgaTestBase.xilinx_sdaccel_examples_prefix_path) return "jenkins/{}/SDAccel/{}/{}/{}/{}".format(os.environ['BUILD_TAG'], xilinxVersion, rteName, example_relative_path, target) + @staticmethod + def get_vitis_example_s3_root_tag(examplePath, target, rteName, xilinxVersion): + ''' + @param examplePath: Path of the Xilinx Vitis example + @param target: The target to build. For eg: hw, hw_emu, sw_emu + @param rteName: The runtime environment + @param xilinxVersion: The Xilinx tool version + ''' + assert target != '' + assert examplePath != '' + assert rteName != '' + assert xilinxVersion != '' + example_relative_path = os.path.relpath(examplePath, AwsFpgaTestBase.xilinx_vitis_examples_prefix_path) + return "jenkins/{}/Vitis/{}/{}/{}/{}".format(os.environ['BUILD_TAG'], xilinxVersion, rteName, example_relative_path, target) + @staticmethod def get_sdaccel_example_s3_xclbin_tag(examplePath, target, rteName, xilinxVersion): ''' @@ -268,6 +311,22 @@ def get_sdaccel_example_s3_xclbin_tag(examplePath, target, rteName, xilinxVersio return "{}/xclbin".format(root_tag) + @staticmethod + def get_vitis_example_s3_xclbin_tag(examplePath, target, rteName, xilinxVersion): + ''' + @param examplePath: Path of the Xilinx Vitis example + @param target: The target to build. For eg: hw, hw_emu, sw_emu + @param rteName: The runtime environment + @param xilinxVersion: The Xilinx tool version + ''' + assert target != '' + assert examplePath != '' + assert rteName != '' + assert xilinxVersion != '' + root_tag = AwsFpgaTestBase.get_vitis_example_s3_root_tag(examplePath, target, rteName, xilinxVersion) + + return "{}/xclbin".format(root_tag) + @staticmethod def get_sdaccel_example_s3_dcp_tag(examplePath, target, rteName, xilinxVersion): ''' @@ -284,6 +343,22 @@ def get_sdaccel_example_s3_dcp_tag(examplePath, target, rteName, xilinxVersion): return "{}/dcp".format(root_tag) + @staticmethod + def get_vitis_example_s3_dcp_tag(examplePath, target, rteName, xilinxVersion): + ''' + @param examplePath: Path of the Xilinx Vitis example + @param target: The target to build. For eg: hw, hw_emu, sw_emu + @param rteName: The runtime environment + @param xilinxVersion: The Xilinx tool version + ''' + assert target != '' + assert examplePath != '' + assert rteName != '' + assert xilinxVersion != '' + root_tag = AwsFpgaTestBase.get_vitis_example_s3_root_tag(examplePath, target, rteName, xilinxVersion) + + return "{}/dcp".format(root_tag) + @staticmethod def get_sdaccel_example_s3_afi_tag(examplePath, target, rteName, xilinxVersion): ''' @@ -300,9 +375,25 @@ def get_sdaccel_example_s3_afi_tag(examplePath, target, rteName, xilinxVersion): return "{}/create-afi/afi-ids.txt".format(root_tag) @staticmethod - def get_sdaccel_example_run_cmd(examplePath): + def get_vitis_example_s3_afi_tag(examplePath, target, rteName, xilinxVersion): + ''' + @param examplePath: Path of the Xilinx Vitis example + @param target: The target to build. For eg: hw, hw_emu, sw_emu + @param rteName: The runtime environment + ''' + assert target != '' + assert examplePath != '' + assert rteName != '' + assert xilinxVersion != '' + root_tag = AwsFpgaTestBase.get_vitis_example_s3_root_tag(examplePath, target, rteName, xilinxVersion) + + return "{}/create-afi/afi-ids.txt".format(root_tag) + + @staticmethod + def get_sdaccel_example_run_cmd(examplePath, xilinxVersion): ''' @param examplePath: Path of the Xilinx SDAccel example + @param xilinxVersion: The Xilinx tool version ''' description = AwsFpgaTestBase.get_sdaccel_example_description(examplePath) if description.get("em_cmd", None): @@ -312,12 +403,47 @@ def get_sdaccel_example_run_cmd(examplePath): run_cmd = "./{}".format(description.get("host_exe", None)) if description.get("cmd_args", None): if "PROJECT" not in description.get("cmd_args", None) and "BUILD" not in description.get("cmd_args", None): - run_cmd += " {}".format(description.get("cmd_args", None)) + if "2019.1" not in xilinxVersion: + run_cmd += " {}".format(description.get("cmd_args", None)) + else: + run_cmd += " {}".format(description.get("cmd_args", None).replace(".xclbin",".hw.xilinx_aws-vu9p-f1-04261818_dynamic_5_0.xclbin")) else: - run_cmd += " {}".format((description.get("cmd_args", None).replace("PROJECT",".")).replace("BUILD","./xclbin")) + if "2019.1" not in xilinxVersion: + run_cmd += " {}".format((description.get("cmd_args", None).replace("PROJECT",".")).replace("BUILD","./xclbin")) + else: + run_cmd += " {}".format(((description.get("cmd_args", None).replace(".xclbin",".hw.xilinx_aws-vu9p-f1-04261818_dynamic_5_0.awsxclbin")).replace("PROJECT",".")).replace("BUILD","./xclbin")) assert run_cmd is not None, "Could not find run_cmd(em_cmd) or (host_exe) in the example description here {}".format(examplePath) - + + return run_cmd + + @staticmethod + def get_vitis_example_run_cmd(examplePath, xilinxVersion): + ''' + @param examplePath: Path of the Xilinx Vitis example + @param xilinxVersion: The Xilinx tool version + ''' + description = AwsFpgaTestBase.get_vitis_example_description(examplePath) + + host_description = description.get("host", None) + assert host_description is not None, "Could not find host key in the description.json here {}".format( + examplePath) + + launch_description = description.get("launch", None) + assert launch_description is not None, "Could not find launch/cmd_args key in the description.json here {}".format( + examplePath) + + if host_description.get("host_exe", None): + run_cmd = "./{}".format(host_description.get("host_exe", None)) + + if launch_description[0].get("cmd_args", None): + run_cmd += " {}".format(((launch_description[0].get("cmd_args", None).replace(".xclbin", + ".awsxclbin")).replace( + "PROJECT", ".")).replace("BUILD", "./build_dir.hw.xilinx_aws-vu9p-f1_shell-v04261818_201920_2")) + + assert run_cmd is not None, "Could not find run_cmd(em_cmd) or (host_exe) in the example description here {}".format( + examplePath) + return run_cmd @staticmethod @@ -332,10 +458,26 @@ def get_sdaccel_example_description(examplePath): description = json.load(json_data) return description + @staticmethod + def get_vitis_example_description(examplePath): + ''' + @param examplePath: Path of the Xilinx Vitis example + ''' + + example_description = AwsFpgaTestBase.assert_non_zero_file(os.path.join(AwsFpgaTestBase.get_vitis_example_fullpath(examplePath), "description.json")) + + with open(example_description) as json_data: + description = json.load(json_data) + return description + @staticmethod def get_sdaccel_example_fullpath(examplePath): return "{}/{}/".format(AwsFpgaTestBase.WORKSPACE, examplePath) + @staticmethod + def get_vitis_example_fullpath(examplePath): + return "{}/{}/".format(AwsFpgaTestBase.WORKSPACE, examplePath) + @staticmethod def fetch_sdaccel_xclbin_folder_from_s3(examplePath, rteName, xilinxVersion): cwd = os.getcwd() @@ -354,6 +496,24 @@ def fetch_sdaccel_xclbin_folder_from_s3(examplePath, rteName, xilinxVersion): os.chdir(cwd) return xclbin_path + @staticmethod + def fetch_vitis_xclbin_folder_from_s3(examplePath, rteName, xilinxVersion): + cwd = os.getcwd() + assert examplePath != '' + assert rteName != '' + assert xilinxVersion != '' + + os.chdir(AwsFpgaTestBase.get_vitis_example_fullpath(examplePath)) + rc = os.system("aws s3 cp s3://{}/{} {} --recursive".format(AwsFpgaTestBase.s3_bucket, AwsFpgaTestBase.get_vitis_example_s3_xclbin_tag(examplePath=examplePath, target="hw", rteName=rteName, xilinxVersion=xilinxVersion), AwsFpgaTestBase.get_vitis_xclbin_dir(examplePath=examplePath))) + assert rc == 0, "Error while copying from s3://{}/{} to {}".format(AwsFpgaTestBase.s3_bucket, AwsFpgaTestBase.get_vitis_example_s3_xclbin_tag(examplePath=examplePath, target="hw", rteName=rteName, xilinxVersion=xilinxVersion), AwsFpgaTestBase.get_vitis_xclbin_dir(examplePath=examplePath)) + xclbin_path = AwsFpgaTestBase.get_vitis_xclbin_dir(examplePath=examplePath) + + logger.debug(xclbin_path) + assert os.path.exists(xclbin_path), "Vitis Example xclbin path={} does not exist".format(xclbin_path) + + os.chdir(cwd) + return xclbin_path + @staticmethod def get_sdaccel_xclbin_file(examplePath, rteName, xilinxVersion): assert examplePath != '' @@ -365,6 +525,17 @@ def get_sdaccel_xclbin_file(examplePath, rteName, xilinxVersion): xclbin = AwsFpgaTestBase.assert_non_zero_file(os.path.join(xclbin_path, "*.{}.*.xclbin".format("hw"))) return xclbin + @staticmethod + def get_vitis_xclbin_file(examplePath, rteName, xilinxVersion): + assert examplePath != '' + assert rteName != '' + assert xilinxVersion != '' + xclbin_path = AwsFpgaTestBase.fetch_vitis_xclbin_folder_from_s3(examplePath, rteName, xilinxVersion) + logger.info("Checking that a non zero size xclbin file exists in {}".format(xclbin_path)) + + xclbin = AwsFpgaTestBase.assert_non_zero_file(os.path.join(xclbin_path, "*.xclbin")) + return xclbin + @staticmethod def get_sdaccel_aws_xclbin_file(examplePath, rteName, xilinxVersion): assert examplePath != '' @@ -376,6 +547,17 @@ def get_sdaccel_aws_xclbin_file(examplePath, rteName, xilinxVersion): aws_xclbin = AwsFpgaTestBase.assert_non_zero_file(os.path.join(xclbin_path, "*.{}.*.awsxclbin".format("hw"))) return aws_xclbin + @staticmethod + def get_vitis_aws_xclbin_file(examplePath, rteName, xilinxVersion): + assert examplePath != '' + assert rteName != '' + assert xilinxVersion != '' + + xclbin_path = AwsFpgaTestBase.fetch_vitis_xclbin_folder_from_s3(examplePath, rteName, xilinxVersion) + logger.info("Checking that a non zero size awsxclbin file exists in {}".format(xclbin_path)) + aws_xclbin = AwsFpgaTestBase.assert_non_zero_file(os.path.join(xclbin_path, "*.awsxclbin")) + return aws_xclbin + @staticmethod def assert_afi_available(afi): # Check the status of the afi @@ -481,6 +663,10 @@ def assert_non_zero_file(filter): filenames = glob.glob(filter) + # Removing .link.xclbin found in Vitis2020.1 + + filenames = [x for x in filenames if ".link." not in x] + assert len(filenames) > 0, "No {} file found in {}".format(filter, os.getcwd()) assert len(filenames) == 1, "More than 1 {} file found: {}\n{}".format(filter, len(filenames), filenames) @@ -517,7 +703,7 @@ def get_fio_write_benchmark_script(driver='xdma'): return os.path.join(AwsFpgaTestBase.get_fio_tool_root(), "scripts/{}_4-ch_4-1M_write.fio".format(driver)) @staticmethod - def setup_fio_tools(python_version=2.7): + def setup_fio_tools(): '''Install and setup fio tools''' # If downloaded repo already, exists, delete it so we can fetch again if os.path.exists(AwsFpgaTestBase.get_fio_tool_install_path()): @@ -525,7 +711,7 @@ def setup_fio_tools(python_version=2.7): logger.info("Installing fio_dma_tools") - (rc, stdout_lines, stderr_lines) = AwsFpgaTestBase.run_cmd("python{} {} {}".format(python_version, AwsFpgaTestBase.get_fio_tool_install_script(), AwsFpgaTestBase.get_fio_tool_install_path()), echo=True) + (rc, stdout_lines, stderr_lines) = AwsFpgaTestBase.run_cmd("python {} {}".format(AwsFpgaTestBase.get_fio_tool_install_script(), AwsFpgaTestBase.get_fio_tool_install_path()), echo=True) assert rc == 0 assert os.path.exists("{}".format(AwsFpgaTestBase.get_fio_tool_run_script())) diff --git a/shared/lib/aws_fpga_test_utils/__init__.py b/shared/lib/aws_fpga_test_utils/__init__.py index c806d7d7..d28616b6 100644 --- a/shared/lib/aws_fpga_test_utils/__init__.py +++ b/shared/lib/aws_fpga_test_utils/__init__.py @@ -247,6 +247,8 @@ def get_instance_type(): def get_num_fpga_slots(instance_type): if re.match('f1\.2xlarge', instance_type): return 1 + if re.match('f1\.4xlarge', instance_type): + return 2 elif re.match('f1\.16xlarge', instance_type): return 8 return 0 diff --git a/shared/lib/check_src_headers.py b/shared/lib/check_src_headers.py index c307caae..ef758686 100755 --- a/shared/lib/check_src_headers.py +++ b/shared/lib/check_src_headers.py @@ -176,7 +176,7 @@ ''' xilinx_xdma1 = '''Xilinx XDMA IP Core Linux Driver -Copyright(c) 2015 - 2017 Xilinx, Inc. +Copyright(c) 2015 - 2020 Xilinx, Inc. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -194,12 +194,6 @@ the file called "LICENSE". ''' -xilinx_xdma2 = '''Xilinx XDMA IP Core Linux Driver - -Copyright(c) Sidebranch. -Copyright(c) Xilinx, Inc. -''' - xilinx1 = '''\xa9 Copyright 2017 Xilinx, Inc. All rights reserved. This file contains confidential and proprietary information of Xilinx, Inc. and is protected under U.S. and @@ -400,7 +394,6 @@ apache_header_2018.split("\n"), gpl2_header.split("\n"), xilinx_xdma1.split("\n"), - xilinx_xdma2.split("\n"), xilinx1.split("\n"), xilinx2_header.split("\n"), xilinx3_header.split("\n"), @@ -554,13 +547,15 @@ def check_headers(dir): "sdk/linux_kernel_drivers/xocl/LICENSE$", "sdk/apps/virtual-ethernet/scripts/pktgen-ena-range.pkt", "sdk/apps/virtual-ethernet/scripts/pktgen-ena.pkt", - + "SDAccel/userspace/src/test", "SDAccel/examples/aws/kernel_3ddr_bandwidth/description.json", "SDAccel/examples/aws/helloworld_ocl_runtime/helloworld", + "SDAccel/examples/aws/helloworld_ocl_runtime/sdaccel.ini", "SDAccel/examples/aws/helloworld_ocl_runtime/vector_addition.hw.xilinx_aws-vu9p-f1-04261818_dynamic_5_0.awsxclbin", - "SDAccel/examples/aws/helloworld_ocl_runtime/2018.3/helloworld", - "SDAccel/examples/aws/helloworld_ocl_runtime/2018.3/vector_addition.hw.xilinx_aws-vu9p-f1-04261818_dynamic_5_0.awsxclbin" + "SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/helloworld", + "SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/sdaccel.ini", + "SDAccel/examples/aws/helloworld_ocl_runtime/2018.3_2019.1/vector_addition.hw.xilinx_aws-vu9p-f1-04261818_dynamic_5_0.awsxclbin" ]) file_provider.set_exclude_paths([ @@ -572,6 +567,9 @@ def check_headers(dir): "SDAccel/aws_platform", "SDAccel/examples/3rd_party", "SDAccel/examples/xilinx", + "Vitis/aws_platform", + "Vitis/examples/xilinx", + "Vitis/docs/Alveo_to_AWS_F1_Migration/example", ]) file_path_list = sorted(file_provider.get_files(dir)) diff --git a/shared/tests/TESTING.md b/shared/tests/TESTING.md index 1e0e5aeb..2052f299 100644 --- a/shared/tests/TESTING.md +++ b/shared/tests/TESTING.md @@ -59,7 +59,7 @@ The boto3 package is the AWS Python API. It can be used to start and terminate instances and any other API operation that you have permissions for. -Configuration of account credentials is explained in the [Quickstart](http://boto3.readthedocs.io/en/latest/guide/quickstart.html#configuration). +Configuration of account credentials is explained in the [Quickstart](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration). The following command will install the latest release. diff --git a/shared/tests/bin/check_md_links.py b/shared/tests/bin/check_md_links.py index b3addcf4..8ec80eb9 100755 --- a/shared/tests/bin/check_md_links.py +++ b/shared/tests/bin/check_md_links.py @@ -134,7 +134,7 @@ def contains_link(path): if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--exclude', action='store', nargs='*', default=[], help="Paths to ignore") - parser.add_argument('--ignore-url', action='store', nargs='*', default=[], help="URLs to ignore. Will ignore all URLs starting with this prefix.") + parser.add_argument('--ignore-url', nargs='*', default=[], help="URLs to ignore. Will ignore all URLs starting with this prefix.") parser.add_argument('--debug', action='store_true', default=False, help="Enable debug messages") args = parser.parse_args() if args.debug: diff --git a/shared/tests/bin/install_python_venv.sh b/shared/tests/bin/install_python_venv.sh new file mode 100755 index 00000000..98f83473 --- /dev/null +++ b/shared/tests/bin/install_python_venv.sh @@ -0,0 +1,89 @@ +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +# Script must be sourced from a bash shell or it will not work +# When being sourced $0 will be the interactive shell and $BASH_SOURCE_ will contain the script being sourced +# When being run $0 and $_ will be the same. + +os_uname=`uname -r` + +script=${BASH_SOURCE[0]} +full_script=$(readlink -f $script) +script_name=$(basename $full_script) +script_dir=$(dirname $full_script) + +python_versions=(3.6 2.7) + +# First install python if it is not installed. +for python_version in ${python_versions[@]}; do + if [[ $os_uname =~ (amzn2) ]]; then + python_version = ${python_version:0:1} + fi + + python=python$python_version + pip=pip$python_version + yum_python_package=${python/./} + if [ ! -e /usr/bin/$python ]; then + if ! sudo yum -y install $yum_python_package; then + echo "Error: Install of $yum_python_package failed" + exit 1 + fi + fi +done + +# Python2 pip is common between OS's. We can use that to install other +if [ ! -e /usr/bin/pip2 ]; then + if ! sudo yum -y install python2-pip; then + echo "Error: Install of $yum_python_package failed" + exit 1 + fi +fi + +# Install virtualenv +if [ ! -e /usr/bin/virtualenv ]; then + if ! sudo pip install virtualenv; then + echo "Error: Install of virtualenv failed" + exit 1 + fi +fi + +# Install virtualenvwrapper +if [ ! -e /usr/bin/virtualenvwrapper.sh ]; then + if ! sudo pip install virtualenvwrapper; then + echo "Error: Install of virtualenvwrapper failed" + exit 1 + fi +fi + +source virtualenvwrapper.sh + +# Create virtualenv environments +for python_version in ${python_versions[@]}; do + + if [[ $os_uname =~ (amzn2) ]] + then + site_packages=/usr/lib64/python$python_version/site-packages/ + + python=python${python_version:0:1} + + mkvirtualenv -r $script_dir/requirements.txt -p $(which $python) --system-site-packages python${python_version:0:1} + + # Adding the python bindings site packages to path + add2virtualenv $site_packages + else + python=python$python_version + mkvirtualenv -r $script_dir/requirements.txt -p $(which $python) python${python_version:0:1} + fi +done \ No newline at end of file diff --git a/shared/tests/bin/requirements.txt b/shared/tests/bin/requirements.txt new file mode 100644 index 00000000..354f359a --- /dev/null +++ b/shared/tests/bin/requirements.txt @@ -0,0 +1,6 @@ +pytest +pytest-timeout +pytest-rerunfailures +boto3 +markdown +GitPython \ No newline at end of file diff --git a/shared/tests/bin/setup_test_build_vitis_env.sh b/shared/tests/bin/setup_test_build_vitis_env.sh new file mode 100644 index 00000000..ed76bf76 --- /dev/null +++ b/shared/tests/bin/setup_test_build_vitis_env.sh @@ -0,0 +1,36 @@ +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +# Script must be sourced from a bash shell or it will not work +# When being sourced $0 will be the interactive shell and $BASH_SOURCE_ will contain the script being sourced +# When being run $0 and $_ will be the same. + +script=${BASH_SOURCE[0]} +if [ $script == $0 ]; then + echo "ERROR: You must source this script" + exit 2 +fi + +full_script=$(readlink -f $script) +script_name=$(basename $full_script) +script_dir=$(dirname $full_script) + +if ! source $script_dir/setup_test_env.sh; then + return 1 +fi + +if ! source $WORKSPACE/vitis_setup.sh; then + return 1 +fi diff --git a/shared/tests/bin/setup_test_env.sh b/shared/tests/bin/setup_test_env.sh index fdfb8af9..bca0a793 100644 --- a/shared/tests/bin/setup_test_env.sh +++ b/shared/tests/bin/setup_test_env.sh @@ -27,49 +27,20 @@ full_script=$(readlink -f $script) script_name=$(basename $full_script) script_dir=$(dirname $full_script) -python_versions=(2.7 3.4) +instance_id=`curl http://169.254.169.254/latest/meta-data/instance-id` +instance_type=`curl http://169.254.169.254/latest/meta-data/instance-type` -python_packages=(\ -pytest \ -pytest-timeout \ -GitPython \ -boto3 \ -markdown \ -) - -for python_version in ${python_versions[@]}; do - python=python$python_version - pip=pip$python_version - yum_python_package=${python/./} - yum_pip_package=$yum_python_package-pip - if [ ! -e /usr/bin/$python ]; then - if ! sudo yum -y install $yum_python_package; then - echo "error: Install of $yum_python_package failed" - return 1 - fi - fi - if [ ! -e /usr/bin/$pip ]; then - if ! sudo yum -y install $yum_pip_package; then - echo "error: Install of $yum_pip_package failed" - return 1 - fi - sudo $pip install --upgrade pip - fi - - for p in ${python_packages[@]}; do - if ! $pip show $p > /dev/null; then - echo "Installing $p" - if ! sudo $pip install $p; then - echo "error: Install of $python $p failed" - return 1 - fi - fi - done -done +echo "Test Running on INSTANCE ID: $instance_id INSTANCE TYPE: $instance_type" if [ ":$WORKSPACE" == ":" ]; then export WORKSPACE=$(git rev-parse --show-toplevel) fi +export WORKON_HOME=$WORKSPACE/.virtualenvs + +$script_dir/install_python_venv.sh + +# Setup default environment to work on +source $WORKSPACE/.virtualenvs/python2/bin/activate export PYTHONPATH=$WORKSPACE/shared/lib:$PYTHONPATH diff --git a/shared/tests/bin/setup_test_env_al2.sh b/shared/tests/bin/setup_test_env_al2.sh deleted file mode 100644 index 0323a458..00000000 --- a/shared/tests/bin/setup_test_env_al2.sh +++ /dev/null @@ -1,78 +0,0 @@ -# Amazon FPGA Hardware Development Kit -# -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Amazon Software License (the "License"). You may not use -# this file except in compliance with the License. A copy of the License is -# located at -# -# http://aws.amazon.com/asl/ -# -# or in the "license" file accompanying this file. This file is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or -# implied. See the License for the specific language governing permissions and -# limitations under the License. - -# Script must be sourced from a bash shell or it will not work -# When being sourced $0 will be the interactive shell and $BASH_SOURCE_ will contain the script being sourced -# When being run $0 and $_ will be the same. - -script=${BASH_SOURCE[0]} -if [ $script == $0 ]; then - echo "ERROR: You must source this script" - exit 2 -fi - -full_script=$(readlink -f $script) -script_name=$(basename $full_script) -script_dir=$(dirname $full_script) - -python_versions=(2 3) - -python_packages=(\ -pytest \ -pytest-timeout \ -GitPython \ -boto3 \ -awscli \ -markdown \ -) - -for python_version in ${python_versions[@]}; do - python=python$python_version - pip=pip$python_version - yum_python_package=${python/./} - yum_pip_package=$yum_python_package-pip - if [ ! -e /usr/bin/$python ]; then - if ! sudo yum -y install $yum_python_package; then - echo "error: Install of $yum_python_package failed" - set +x - return 1 - fi - fi - if [ ! -e /usr/bin/$pip ]; then - if ! sudo yum -y install $yum_pip_package; then - echo "error: Install of $yum_pip_package failed" - return 1 - fi - sudo $pip install --upgrade pip - fi - - for p in ${python_packages[@]}; do - if ! $pip show $p > /dev/null; then - echo "Installing $p" - if ! sudo $pip install $p; then - echo "error: Install of $python $p failed" - return 1 - fi - fi - done -done - -if [ ":$WORKSPACE" == ":" ]; then - export WORKSPACE=$(git rev-parse --show-toplevel) -fi - -export PYTHONPATH=$WORKSPACE/shared/lib:$PYTHONPATH - -export AWS_DEFAULT_REGION=us-east-1 diff --git a/shared/tests/bin/setup_test_runtime_sdaccel_env.sh b/shared/tests/bin/setup_test_runtime_sdaccel_env.sh index b16239f9..36774b59 100644 --- a/shared/tests/bin/setup_test_runtime_sdaccel_env.sh +++ b/shared/tests/bin/setup_test_runtime_sdaccel_env.sh @@ -27,6 +27,8 @@ full_script=$(readlink -f $script) script_name=$(basename $full_script) script_dir=$(dirname $full_script) +export LD_LIBRARY_PATH=$XILINX_SDX/lnx64/tools/opencv/:$LD_LIBRARY_PATH + if ! source $script_dir/setup_test_env.sh; then return 1 fi diff --git a/shared/tests/bin/setup_test_sdk_env_al2.sh b/shared/tests/bin/setup_test_runtime_vitis_env.sh similarity index 78% rename from shared/tests/bin/setup_test_sdk_env_al2.sh rename to shared/tests/bin/setup_test_runtime_vitis_env.sh index eb64b4ad..3e7cc1a5 100644 --- a/shared/tests/bin/setup_test_sdk_env_al2.sh +++ b/shared/tests/bin/setup_test_runtime_vitis_env.sh @@ -27,17 +27,16 @@ full_script=$(readlink -f $script) script_name=$(basename $full_script) script_dir=$(dirname $full_script) -if ! source $script_dir/setup_test_env_al2.sh; then +export LD_LIBRARY_PATH=$XILINX_VITIS/lnx64/tools/opencv/:$LD_LIBRARY_PATH + +if ! source $script_dir/setup_test_env.sh; then return 1 fi -if ! source $WORKSPACE/sdk_setup.sh; then +if ! source $script_dir/setup_test_xrtpatch.sh; then return 1 fi - -if [ x$1 == "xpy_bindings" ] ; then - aws s3 cp s3://aws-fpga-jenkins-testing/python_bindings_dependencies/setup.sh . - chmod 755 ./setup.sh - ./setup.sh - export PYTHONPATH=$PYTHONPATH:$SDK_DIR/apps + +if ! source $WORKSPACE/vitis_setup.sh; then + return 1 fi diff --git a/shared/tests/bin/setup_test_sdk_env.sh b/shared/tests/bin/setup_test_sdk_env.sh index 828f8f31..03f6eced 100644 --- a/shared/tests/bin/setup_test_sdk_env.sh +++ b/shared/tests/bin/setup_test_sdk_env.sh @@ -26,11 +26,21 @@ fi full_script=$(readlink -f $script) script_name=$(basename $full_script) script_dir=$(dirname $full_script) +setup_test_env_script_dir=$script_dir if ! source $script_dir/setup_test_env.sh; then - return 1 + return 1 fi if ! source $WORKSPACE/sdk_setup.sh; then - return 1 + return 1 fi + +if [ x$1 == "xpy_bindings" ] ; then + source $WORKSPACE/.virtualenvs/python3/bin/activate + aws s3 cp s3://aws-fpga-jenkins-testing/python_bindings_dependencies/setup.sh . + chmod 755 ./setup.sh + ./setup.sh + export PYTHONPATH=$PYTHONPATH:$SDK_DIR/apps + source $WORKSPACE/.virtualenvs/python2/bin/activate +fi \ No newline at end of file diff --git a/shared/tests/bin/setup_test_xrtpatch.sh b/shared/tests/bin/setup_test_xrtpatch.sh index 5881917e..ccfbfa47 100644 --- a/shared/tests/bin/setup_test_xrtpatch.sh +++ b/shared/tests/bin/setup_test_xrtpatch.sh @@ -28,13 +28,6 @@ script_name=$(basename $full_script) script_dir=$(dirname $full_script) s3_ami_bucket=aws-fpga-developer-ami -s3_ami_version=1.5.0 -xrt_release_version=XRT_2018_2_XDF_RC5 -xrt_rpm_name=xrt_201802.2.1.0_7.5.1804-xrt.rpm -aws_xrt_rpm_name=xrt_201802.2.1.0_7.5.1804-aws.rpm - -xrt_rpm_path=$s3_ami_bucket/$s3_ami_version/Patches/$xrt_release_version/$xrt_rpm_name -aws_xrt_rpm_path=$s3_ami_bucket/$s3_ami_version/Patches/$xrt_release_version/$aws_xrt_rpm_name VIVADO_TOOL_VERSION=`vivado -version | grep Vivado | head -1 | sed 's:Vivado *::' | sed 's: .*$::' | sed 's:v::'` export VIVADO_TOOL_VERSION=${VIVADO_TOOL_VERSION:0:6} @@ -44,6 +37,14 @@ echo "VIVADO_TOOL_VERSION is $VIVADO_TOOL_VERSION" if [[ "$VIVADO_TOOL_VERSION" =~ .*2018\.2.* ]]; then echo "Xilinx Vivado version is 2018.2" + s3_ami_version=1.5.0 + xrt_release_version=XRT_2018_2_XDF_RC5 + xrt_rpm_name=xrt_201802.2.1.0_7.5.1804-xrt.rpm + aws_xrt_rpm_name=xrt_201802.2.1.0_7.5.1804-aws.rpm + + xrt_rpm_path=$s3_ami_bucket/$s3_ami_version/Patches/$xrt_release_version/$xrt_rpm_name + aws_xrt_rpm_path=$s3_ami_bucket/$s3_ami_version/Patches/$xrt_release_version/$aws_xrt_rpm_name + if [ -f "/opt/xilinx/xrt/include/version.h" ]; then echo "XRT installed. proceeding to check version compatibility" xrt_build_ver=$(grep 'xrt_build_version_hash\[\]' /opt/xilinx/xrt/include/version.h | sed 's/";//' | sed 's/^.*"//') @@ -70,6 +71,80 @@ if [[ "$VIVADO_TOOL_VERSION" =~ .*2018\.2.* ]]; then sudo yum install -y $aws_xrt_rpm_name echo " XRT patch aws rpm installed successfully" fi +elif [[ "$VIVADO_TOOL_VERSION" =~ .*2018\.3.* ]]; then + echo "Xilinx Vivado version is 2018.3" + + s3_ami_version=1.6.0 + xrt_release_version=XRT_2018_3_RC5 + xrt_rpm_name=xrt_201830.2.1.0_7.6.1810-xrt.rpm + aws_xrt_rpm_name=xrt_201830.2.1.0_7.6.1810-aws.rpm + + xrt_rpm_path=$s3_ami_bucket/$s3_ami_version/Patches/$xrt_release_version/$xrt_rpm_name + aws_xrt_rpm_path=$s3_ami_bucket/$s3_ami_version/Patches/$xrt_release_version/$aws_xrt_rpm_name + + if [ -f "/opt/xilinx/xrt/include/version.h" ]; then + echo "XRT installed. proceeding to check version compatibility" + xrt_build_ver=$(grep 'xrt_build_version_hash\[\]' /opt/xilinx/xrt/include/version.h | sed 's/";//' | sed 's/^.*"//') + echo "Installed XRT version hash : $xrt_build_ver" + if grep -Fxq "$xrt_build_ver" $AWS_FPGA_REPO_DIR/SDAccel/sdaccel_xrt_version.txt + then + echo "XRT version $xrt_build_ver is up-to-date." + else + echo "$xrt_build_ver is stale. upgrading XRT to" + cat $AWS_FPGA_REPO_DIR/SDAccel/sdaccel_xrt_version.txt + curl -s https://s3.amazonaws.com/$xrt_rpm_path -o $xrt_rpm_name || { echo " Error: Failed to download xrt rpm from $xrt_rpm_path"; return 2; } + curl -s https://s3.amazonaws.com/$aws_xrt_rpm_path -o $aws_xrt_rpm_name || { echo " Error: Failed to download aws xrt rpm from $aws_xrt_rpm_path"; return 2; } + sudo yum reinstall -y $xrt_rpm_name + echo " XRT patch rpm installed successfully" + sudo yum reinstall -y $aws_xrt_rpm_name + echo " XRT patch aws rpm installed successfully" + fi + else + echo "XRT not installed. Please install XRT" + curl -s https://s3.amazonaws.com/$xrt_rpm_path -o $xrt_rpm_name || { echo " Error: Failed to download xrt rpm from $xrt_rpm_path"; return 2; } + curl -s https://s3.amazonaws.com/$aws_xrt_rpm_path -o $aws_xrt_rpm_name || { echo " Error: Failed to download aws xrt rpm from $aws_xrt_rpm_path"; return 2; } + sudo yum reinstall -y $xrt_rpm_name + echo " XRT patch rpm installed successfully" + sudo yum install -y $aws_xrt_rpm_name + echo " XRT patch aws rpm installed successfully" + fi +elif [[ "$VIVADO_TOOL_VERSION" =~ .*2019\.1.* ]]; then + echo "Xilinx Vivado version is 2019.1" + + s3_ami_version=1.7.0 + xrt_release_version=XRT_2019_1_0_3 + xrt_rpm_name=xrt_201910.2.2.0_7.7.1908-xrt.rpm + aws_xrt_rpm_name=xrt_201910.2.2.0_7.7.1908-aws.rpm + + xrt_rpm_path=$s3_ami_bucket/$s3_ami_version/Patches/$xrt_release_version/$xrt_rpm_name + aws_xrt_rpm_path=$s3_ami_bucket/$s3_ami_version/Patches/$xrt_release_version/$aws_xrt_rpm_name + + if [ -f "/opt/xilinx/xrt/include/version.h" ]; then + echo "XRT installed. proceeding to check version compatibility" + xrt_build_ver=$(grep 'xrt_build_version_hash\[\]' /opt/xilinx/xrt/include/version.h | sed 's/";//' | sed 's/^.*"//') + echo "Installed XRT version hash : $xrt_build_ver" + if grep -Fxq "$xrt_build_ver" $AWS_FPGA_REPO_DIR/SDAccel/sdaccel_xrt_version.txt + then + echo "XRT version $xrt_build_ver is up-to-date." + else + echo "$xrt_build_ver is stale. upgrading XRT to" + cat $AWS_FPGA_REPO_DIR/SDAccel/sdaccel_xrt_version.txt + curl -s https://s3.amazonaws.com/$xrt_rpm_path -o $xrt_rpm_name || { echo " Error: Failed to download xrt rpm from $xrt_rpm_path"; return 2; } + curl -s https://s3.amazonaws.com/$aws_xrt_rpm_path -o $aws_xrt_rpm_name || { echo " Error: Failed to download aws xrt rpm from $aws_xrt_rpm_path"; return 2; } + sudo yum reinstall -y $xrt_rpm_name + echo " XRT patch rpm installed successfully" + sudo yum reinstall -y $aws_xrt_rpm_name + echo " XRT patch aws rpm installed successfully" + fi + else + echo "XRT not installed. Please install XRT" + curl -s https://s3.amazonaws.com/$xrt_rpm_path -o $xrt_rpm_name || { echo " Error: Failed to download xrt rpm from $xrt_rpm_path"; return 2; } + curl -s https://s3.amazonaws.com/$aws_xrt_rpm_path -o $aws_xrt_rpm_name || { echo " Error: Failed to download aws xrt rpm from $aws_xrt_rpm_path"; return 2; } + sudo yum reinstall -y $xrt_rpm_name + echo " XRT patch rpm installed successfully" + sudo yum install -y $aws_xrt_rpm_name + echo " XRT patch aws rpm installed successfully" + fi else echo "Xilinx Vivado version is $VIVADO_TOOL_VERSION . Proceeding with base runtime version " fi diff --git a/shared/tests/test_md_links.py b/shared/tests/test_md_links.py index 1d03c111..a4c684fa 100755 --- a/shared/tests/test_md_links.py +++ b/shared/tests/test_md_links.py @@ -59,6 +59,6 @@ def test_md_links(self): cmd = self.test_dir + "/bin/check_md_links.py" cmd += " --exclude SDAccel/examples/xilinx" # This is a valid link but sometimes it 404s - cmd += " --ignore-url https://docs.pytest.org/en/latest/" + cmd += " --ignore-url https://docs.pytest.org/en/latest/ https://forums.xilinx.com/t5/SDAccel/bd-p/SDx" (rc, stdout, stderr) = self.run_cmd(cmd, echo=True) assert rc == 0 diff --git a/supported_vivado_versions.txt b/supported_vivado_versions.txt index fa85458c..261b08e1 100644 --- a/supported_vivado_versions.txt +++ b/supported_vivado_versions.txt @@ -1,8 +1,10 @@ -Vivado v2017.4 (64-bit) -Vivado v2017.4.op (64-bit) -Vivado v2018.2_AR71275_op (64-bit) -Vivado v2018.2_AR71715 (64-bit) -Vivado v2018.2.op (64-bit) -Vivado v2018.2 (64-bit) -Vivado v2018.3.op (64-bit) -Vivado v2018.3 (64-bit) +Vivado v2019.1.op (64-bit) +Vivado v2019.1 (64-bit) +Vivado v2019.1_AR73068 (64-bit) +Vivado v2019.1_AR73068_op (64-bit) +Vivado v2019.1_AR72668 (64-bit) +Vivado v2019.2 (64-bit) +Vivado v2019.2_AR73068_op (64-bit) +Vivado v2019.2_AR73068 (64-bit) +Vivado v2020.1 (64-bit) +Vivado v2020.2 (64-bit) diff --git a/vitis_runtime_setup.sh b/vitis_runtime_setup.sh new file mode 100644 index 00000000..68af3c6b --- /dev/null +++ b/vitis_runtime_setup.sh @@ -0,0 +1,206 @@ +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +# Script must be sourced from a bash shell or it will not work +# When being sourced $0 will be the interactive shell and $BASH_SOURCE_ will contain the script being sourced +# When being run $0 and $_ will be the same. +script=${BASH_SOURCE[0]} +if [ $script == $0 ]; then + echo "ERROR: You must source this script" + exit 2 +fi + +full_script=$(readlink -f $script) +script_name=$(basename $full_script) +script_dir=$(dirname $full_script) +current_dir=$(pwd) + +source $script_dir/shared/bin/set_common_functions.sh +source $script_dir/shared/bin/set_common_env_vars.sh +source $VITIS_DIR/Runtime/xrt_common_functions.sh + +# Source sdk_setup.sh +info_msg "Sourcing sdk_setup.sh" +if ! source $AWS_FPGA_REPO_DIR/sdk_setup.sh; then + return 1 +fi + +if [ -z "$SDK_DIR" ]; then + err_msg "SDK_DIR environment variable is not set. Please use 'source sdk_setup.sh' from the aws-fpga directory." + return 1 +fi + +debug=0 +override=0 +function usage { + echo -e "USAGE: source [\$AWS_FPGA_REPO_DIR/]$script_name [-d|-debug] [-h|-help] [-o|-override]" +} + +function help { + info_msg "$script_name" + info_msg " " + info_msg "Checks & Sets up the runtime environment for AWS FPGA Vitis Application usage." + info_msg " " + info_msg "vitis_runtime_check.sh script will:" + info_msg " (1) install FPGA Management Tools," + info_msg " (2) check if Xilinx Runtime (XRT) is installed" + info_msg " (3) check if correct version of Xilinx Runtime (XRT) is installed," + info_msg " (4) check if the required XOCL driver is running " + info_msg " (5) source runtime setup script " + echo " " + usage +} + +function check_xdma_driver { + + if lsmod | grep -q 'xdma' ; then + err_msg "Found XDMA Driver running. Please remove xdma driver using below command" + err_msg " rmmod xdma" + return 1 + fi +} + +function check_edma_driver { + + if lsmod | grep -q 'edma' ; then + err_msg "Found EDMA Driver running. Please remove edma driver using below command" + err_msg " rmmod edma" + return 1 + fi +} + +function check_xocl_driver { + if lsmod | grep -q 'xocl' ; then + info_msg "Found 'xocl Driver is installed and running. ' " + else + err_msg " XOCL Driver not installed. Please install xocl driver using below instructions" + err_msg " If using 2019.2 Vitis toolset please source $AWS_FPGA_REPO_DIR/vitis_setup.sh " + return 1 + fi +} + +function check_kernel_ver { + + ins_ker_ver=$(uname -r) + info_msg "Installed kernel version : $ins_ker_ver" + if grep -Fxq "$ins_ker_ver" $AWS_FPGA_REPO_DIR/Vitis/kernel_version.txt + then + info_msg "kernel version $ins_ker_ver has been validated for this devkit." + else + warn_msg "$ins_ker_ver does not match one of recommended kernel versions" + cat $AWS_FPGA_REPO_DIR/Vitis/kernel_version.txt + warn_msg "Xilinx Runtime not validated against your installed kernel version." + fi +} + +# Process command line args +args=( "$@" ) +for (( i = 0; i < ${#args[@]}; i++ )); do + arg=${args[$i]} + case $arg in + -d|-debug) + debug=1 + ;; + -h|-help) + help + return 0 + ;; + -o|-override) + override=1 + ;; + *) + err_msg "Invalid option: $arg\n" + usage + return 1 + esac +done + + +if ! exists vivado; then + if [[ -z "${VIVADO_TOOL_VERSION}" ]]; then + err_msg " VIVADO_TOOL_VERSION ENV variable is not set." + err_msg " ENV Variable VIVADO_TOOL_VERSION needs to be set for runtime usage. " + err_msg " If AFI was generated using V2019.2 tools use the command : export VIVADO_TOOL_VERSION=2019.2 " + err_msg " Please set VIVADO_TOOL_VERSION to the correct value and re-run script." + return 1 + else + info_msg " VIVADO tools not found. Reading VIVADO_TOOL_VERSION ENV variable to determine runtime version... " + VIVADO_TOOL_VERSION="${VIVADO_TOOL_VERSION}" + export VIVADO_TOOL_VERSION=${VIVADO_TOOL_VERSION:0:6} + fi +else + info_msg "You are using instance with installed vivado tools. determining VIVADO version for runtime setup..." + VIVADO_TOOL_VERSION=`vivado -version | grep Vivado | head -1 | sed 's:Vivado *::' | sed 's: .*$::' | sed 's:v::'` + export VIVADO_TOOL_VERSION=${VIVADO_TOOL_VERSION:0:6} +fi +info_msg "VIVADO_TOOL_VERSION is $VIVADO_TOOL_VERSION" + + +check_kernel_ver +check_xdma_driver +check_edma_driver + +if [[ "$VIVADO_TOOL_VERSION" =~ .*2019\.2.* || "$VIVADO_TOOL_VERSION" =~ .*2020\.1.* || "$VIVADO_TOOL_VERSION" =~ .*2020\.2.* ]]; then + info_msg "Xilinx Vivado version is $VIVADO_TOOL_VERSION" + + if [ $override == 1 ]; then + info_msg "XRT check overide selected." + source /opt/xilinx/xrt/setup.sh + return 0 + fi + + if [ -f "/opt/xilinx/xrt/include/version.h" ]; then + info_msg "XRT installed. proceeding to check version compatibility" + xrt_build_ver=$VIVADO_TOOL_VERSION + xrt_build_ver+=: + xrt_build_ver+=$(grep 'xrt_build_version_hash\[\]' /opt/xilinx/xrt/include/version.h | sed 's/";//' | sed 's/^.*"//') + info_msg "Installed XRT version : $xrt_build_ver" + if grep -Fxq "$xrt_build_ver" $AWS_FPGA_REPO_DIR/Vitis/vitis_xrt_version.txt + then + info_msg "XRT version $xrt_build_ver is supported." + info_msg " Now checking XOCL driver..." + check_xocl_driver + if [ -f "/opt/xilinx/xrt/setup.sh" ]; then + source /opt/xilinx/xrt/setup.sh + else + err_msg " Cannot find /opt/xilinx/xrt/setup.sh" + err_msg " Please check XRT is installed correctly" + err_msg " Please Refer to $AWS_FPGA_REPO/Vitis/doc/XRT_installation_instructions.md for XRT installation instructions" + return 1 + fi + info_msg " XRT Runtime setup Done" + else + err_msg "$xrt_build_ver does not match recommended versions" + cat $AWS_FPGA_REPO_DIR/Vitis/vitis_xrt_version.txt + err_msg "Please Refer $AWS_FPGA_REPO/Vitis/doc/XRT_installation_instructions.md for XRT installation instructions" + return 1 + fi + else + err_msg "XRT not installed. Please install XRT" + err_msg "Please Refer $AWS_FPGA_REPO/Vitis/doc/XRT_installation_instructions.md for XRT installation instructions" + return 1 + fi +else + err_msg "Xilinx Vivado version is $VIVADO_TOOL_VERSION , only 2019.2, 2020.1 or 2020.2 are supported for Vitis " + return 1 +fi + +# Setup XRT as we need it for building +setup_runtime + +info_msg "Starting MPD" +systemctl is-active --quiet mpd || sudo systemctl start mpd + +info_msg "Vitis runtime check PASSED" diff --git a/vitis_setup.sh b/vitis_setup.sh new file mode 100644 index 00000000..37848da7 --- /dev/null +++ b/vitis_setup.sh @@ -0,0 +1,292 @@ +# Amazon FPGA Hardware Development Kit +# +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use +# this file except in compliance with the License. A copy of the License is +# located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +# implied. See the License for the specific language governing permissions and +# limitations under the License. + +# Script must be sourced from a bash shell or it will not work +# When being sourced $0 will be the interactive shell and $BASH_SOURCE_ will contain the script being sourced +# When being run $0 and $_ will be the same. +script=${BASH_SOURCE[0]} +if [ $script == $0 ]; then + echo "ERROR: You must source this script" + exit 2 +fi + +full_script=$(readlink -f $script) +script_name=$(basename $full_script) +script_dir=$(dirname $full_script) +current_dir=$(pwd) + +source $script_dir/shared/bin/set_common_functions.sh +source $script_dir/shared/bin/set_common_env_vars.sh +source $VITIS_DIR/Runtime/xrt_common_functions.sh + +# Source sdk_setup.sh +info_msg "Sourcing sdk_setup.sh" +if ! source $AWS_FPGA_REPO_DIR/sdk_setup.sh; then + return 1 +fi + +if [ -z "$SDK_DIR" ]; then + err_msg "SDK_DIR environment variable is not set. Please use 'source sdk_setup.sh' from the aws-fpga directory." + return 1 +fi + +debug=0 + +function usage { + echo -e "USAGE: source [\$AWS_FPGA_REPO_DIR/]$script_name [-d|-debug] [-h|-help]" +} + +function help { + info_msg "$script_name" + info_msg " " + info_msg "Sets up the environment for AWS FPGA Vitis tools." + info_msg " " + info_msg "vitis_setup.sh script will:" + info_msg " (1) install FPGA Management Tools," + info_msg " (2) check if Xilinx tools are available," + info_msg " (3) check if required packages are installed," + info_msg " (4) Download lastest AWS Platform," + info_msg " (5) Install Runtime drivers " + echo " " + usage +} + +function check_set_xilinx_vitis { + if [[ ":$XILINX_VITIS" == ':' ]]; then + debug_msg "XILINX_VITIS is not set" + which vitis + RET=$? + if [ $RET != 0 ]; then + debug_msg "vitis not found in path." + err_msg "XILINX_VITIS variable not set and vitis not in the path" + err_msg "Please set XILINX_VITIS variable to point to your location of your Xilinx installation or add location of vitis exectuable to your PATH variable" + return $RET + else + export XILINX_VITIS=`which vitis | sed 's:/bin/vitis::'` + info_msg "Setting XILINX_VITIS to $XILINX_VITIS" + fi + else + info_msg "XILINX_VITIS is already set to $XILINX_VITIS" + fi + # get Vitis release version, i.e. "2019.2" + RELEASE_VER=$(basename $XILINX_VITIS) + RELEASE_VER=${RELEASE_VER:0:6} + export RELEASE_VER=$RELEASE_VER + echo "RELEASE_VER equals $RELEASE_VER" +} + +function check_install_packages_centos { +#TODO: Check required packages are installed or install them +#TODO: Check version of gcc is above 4.8.5 (4.6.3 does not work) + for pkg in `cat $VITIS_DIR/packages.txt`; do + if yum list installed "$pkg" >/dev/null 2>&1; then + true + else + warn_msg " $pkg not installed - please run: sudo yum install $pkg " + fi + done +} + +function check_install_packages_ubuntu { + for pkg in `cat $VITIS_DIR/packages.txt`; do + if apt -qq list "$pkg" >/dev/null 2>&1; then + true + else + warn_msg " $pkg not installed - please run: sudo apt-get install $pkg " + fi + done +} + +function check_internet { + curl --silent --head -m 30 http://www.amazon.com + RET=$? + if [ $RET != 0 ]; then + err_msg "curl cannot connect to the internet using please check your internet connection or proxy settings" + err_msg "To check your connection run: curl --silent --head -m 30 http://www.amazon.com " + return $RET + else + info_msg "Internet Access OK" + fi +} + +function check_icd { + info_msg "Checking ICD is installed" + if grep -q 'libxilinxopencl.so' /etc/OpenCL/vendors/xilinx.icd; then + info_msg "Found 'libxilinxopencl.so" + else + info_msg "/etc/OpenCL/vendors/xilinx.icd does not exist or does not contain lbixilinxopencl.so creating and adding libxilinxopencl.so to it" + sudo sh -c "echo libxilinxopencl.so > /etc/OpenCL/vendors/xilinx.icd" + RET=$? + if [ $RET != 0 ]; then + err_msg "/etc/OpenCL/vendors/xilinx.icd does not exist and cannot be created, sudo permissions needed to update it." + err_msg "Run the following with sudo permissions: sudo sh -c \"echo libxilinxopencl.so > /etc/OpenCL/vendors/xilinx.icd\" " + return $RET + else + echo "Done with ICD installation" + fi + fi +} + +# Process command line args +args=( "$@" ) +for (( i = 0; i < ${#args[@]}; i++ )); do + arg=${args[$i]} + case $arg in + -d|-debug) + debug=1 + ;; + -h|-help) + help + return 0 + ;; + *) + err_msg "Invalid option: $arg\n" + usage + return 1 + esac +done + +# Check XILINX_VITIS is set +if ! check_set_xilinx_vitis; then + return 1 +fi + +info_msg " XILINX_VITIS is set to $XILINX_VITIS" +# Install patches as required. +info_msg " Checking & installing required patches" +setup_patches + + +# Update Xilinx Vitis Examples from GitHub +info_msg "Using Vitis $RELEASE_VER" +if [[ $RELEASE_VER =~ .*2019\.2.* || $RELEASE_VER =~ .*2020\.1.* || $RELEASE_VER =~ .*2020\.2.* ]]; then + info_msg "Updating Xilinx Vitis Examples $RELEASE_VER" + git submodule update --init -- Vitis/examples/xilinx_$RELEASE_VER + export VIVADO_TOOL_VER=$RELEASE_VER + if [ -e $VITIS_DIR/examples/xilinx ]; then + if [ ! -L $VITIS_DIR/examples/xilinx ]; then + err_msg "ERROR: Vitis/examples/xilinx is not a symbolic link. Backup any data and remove Vitis/examples/xilinx directory. The setup needs to create a symbolic link from Vitis/examples/xilinx to Vitis/examples/xilinx_$RELEASE_VER" + return 1 + fi + fi + ln -sf $VITIS_DIR/examples/xilinx_$RELEASE_VER $VITIS_DIR/examples/xilinx +else + echo " $RELEASE_VER is not supported (2019.2, 2020.1 or 2020.2 are supported).\n" + return 2 +fi + +# settings64 removal - once we put this in the AMI, we will add a check +#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$XILINX_VITIS/lib/lnx64.o + +export LD_LIBRARY_PATH=`$XILINX_VITIS/bin/ldlibpath.sh $XILINX_VITIS/lib/lnx64.o` + +# Check if internet connection is available +if ! check_internet; then + return 1 +fi + +# Check ICD is installed +if ! check_icd; then + return 1 +fi + +# Check correct packages are installed +if [ -f "/etc/redhat-release" ]; then + if ! check_install_packages_centos; then + return 1 + fi +else + if ! check_install_packages_ubuntu; then + return 1 + fi +fi + +function setup_xsa { + + if [ "$#" -ne 3 ]; then + err_msg "Illegal number of parameters sent to the setup_xsa function!" + return 1 + fi + + XSA=$1 + XSA_S3_BASE_DIR=$2 + PLATFORM_ENV_VAR_NAME=$3 + + xsa_dir=$VITIS_DIR/aws_platform/$XSA/hw/ + vitis_xsa=$xsa_dir/$XSA.xsa + vitis_xsa_s3_bucket=aws-fpga-hdk-resources + s3_vitis_xsa=$vitis_xsa_s3_bucket/Vitis/$XSA_S3_BASE_DIR/$XSA/$XSA.xsa + + # set a variable to point to the platform for build and emulation runs + export "$PLATFORM_ENV_VAR_NAME"=$VITIS_DIR/aws_platform/$XSA/$XSA.xpfm + + # Download the sha256 + if [ ! -e $xsa_dir ]; then + mkdir -p $xsa_dir || { err_msg "Failed to create $xsa_dir"; return 2; } + fi + + # Use curl instead of AWS CLI so that credentials aren't required. + curl -s https://s3.amazonaws.com/$s3_vitis_xsa.sha256 -o $vitis_xsa.sha256 || { err_msg "Failed to download XSA version from $s3_vitis_xsa.sha256 -o $vitis_xsa.sha256"; return 2; } + if grep -q '