Skip to content

OCP demo configuration

Igor Chorążewicz edited this page Oct 12, 2023 · 7 revisions

Hardware

  • 2 x Intel Xeon Platinum 8470 Processor @2.0GHz
  • 52 cores/socket, hyperthreading enabled
  • 16x 64GB DDR5-4800 channels per socket, 1TB/socket in total
  • 3x 96GB CXL memory expanders per socket, 288 GB in total

CXL2.0 memory expander:

  • EDSFF E3.S, PCIe Gen 5.0 x8 interface, 96GB ​
  • Bidirectional BW comparable to 1-ch DDR5 BW:
    • All read: 17.8 GB/s
    • R:W 3:1 23.6 GB/s
    • R:W 2:1 25.1 GB/s
    • R:W 1:1 27.2 GB/s
  • Latency: 250ns

OS

Multi-Tier (DRAM + CXL) CacheLib vs DRAM-only CacheLib

CacheLib version: https://github.com/intel/CacheLib/tree/b99f2b30aa7efb737d6f7ac54d9912ce42a6582c

Cachebench configuration (Multi-tier)
{
  "cache_config":
  {
    "cacheSizeMB": 32768,
    "backgroundEvictorIntervalMilSec": 4,
    "evictorThreads": 4,
    "backgroundPromoterIntervalMilSec":0,
    "promoterThreads": 0,
    "insertToFirstFreeTier": true,
    "memoryTiers" : [
      {
        "ratio": 1,
        "memBindNodes": "0"
      },
      {
        "ratio": 15,
        "memBindNodes": "2,3,4"
      }
    ],
    "allocFactor": 1.08,
    "maxAllocSize": 524288,
    "minAllocSize": 64,
    "enableChainedItem": true,
    "htBucketPower": 29,
    "moveOnSlabRelease": false,
    "poolRebalanceIntervalSec": 2,
    "poolResizeIntervalSec": 2,
    "rebalanceStrategy": "hits"
  },
  "test_config":
  {
    "opRatePerSec": 300000,
    "enableLookaside": false,
    "generator": "replay",
    "replayGeneratorConfig":
    {
        "ampFactor": 200
    },
    "repeatTraceReplay": true,
    "repeatOpCount" : true,
    "onlySetIfMiss" : false,
    "numOps": 100000000000,
    "numThreads": 24,
    "prepopulateCache": true,
    "traceFileNames": [
            "kvcache_traces_1.csv",
            "kvcache_traces_2.csv",
            "kvcache_traces_3.csv",
            "kvcache_traces_4.csv",
            "kvcache_traces_5.csv"
    ]
  }
}

Cachebench configuration (DRAM-only)
{
  "cache_config":
  {
    "cacheSizeMB": 32768,
    "allocFactor": 1.08,
    "maxAllocSize": 524288,
    "minAllocSize": 64,
    "enableChainedItem": true,
    "htBucketPower": 29,
    "moveOnSlabRelease": false,
    "poolRebalanceIntervalSec": 2,
    "poolResizeIntervalSec": 2,
    "rebalanceStrategy": "hits"
  },
  "test_config":
  {
    "opRatePerSec": 300000,
    "enableLookaside": false,
    "generator": "replay",
    "replayGeneratorConfig":
    {
        "ampFactor": 200
    },
    "repeatTraceReplay": true,
    "repeatOpCount" : true,
    "onlySetIfMiss" : false,
    "numOps": 100000000000,
    "numThreads": 24,
    "prepopulateCache": true,
    "traceFileNames": [
            "kvcache_traces_1.csv",
            "kvcache_traces_2.csv",
            "kvcache_traces_3.csv",
            "kvcache_traces_4.csv",
            "kvcache_traces_5.csv"
    ]
  }
}

Bandwith-expanded (DRAM+CXL) CacheLib vs DRAM-only CacheLib

CacheLib version:

Cachebench configuration (DRAM 128GB + CXL 32GB)
{
  "cache_config": {
    "cacheSizeMB": 163840,
    "poolRebalanceIntervalSec": 0,
    "htBucketPower" : 24,
    "cacheDir": "/tmp/mem-tier-04",
    "memoryTiers" : [
      {
        "ratio": 1,
        "memBindNodes": "0,1,2,3,4"
      }
    ]
  },
  "test_config":
    {
      "addChainedRatio": 0.0,
      "delRatio": 0.0,
      "enableLookaside": true,
      "getRatio": 0.9911552928593673,
      "keySizeRange": [
        1,
        8,
        64
      ],
      "keySizeRangeProbability": [
        0.3,
        0.7
      ],
      "loneGetRatio": 0.008844707140632665,
      "numKeys": 8935378,
      "numOps": 3000000,
      "numThreads": 20,
      "popDistFile": "pop.json",
      "setRatio": 0.0,
      "valSizeDistFile": "sizes.json"
    }

}
Cachebench configuration (DRAM 160GB)
{
  "cache_config": {
    "cacheSizeMB": 163840,
    "poolRebalanceIntervalSec": 0,
    "htBucketPower" : 24,
    "cacheDir": "/tmp/mem-tier-01"
  },
  "test_config":
    {
      "addChainedRatio": 0.0,
      "delRatio": 0.0,
      "enableLookaside": true,
      "getRatio": 0.9911552928593673,
      "keySizeRange": [
        1,
        8,
        64
      ],
      "keySizeRangeProbability": [
        0.3,
        0.7
      ],
      "loneGetRatio": 0.008844707140632665,
      "numKeys": 8935378,
      "numOps": 3000000,
      "numThreads": 20,
      "popDistFile": "pop.json",
      "setRatio": 0.0,
      "valSizeDistFile": "sizes.json"
    }

}

CXL Hybrid Cache (CXL+NVMe) vs DRAM Hybrid Cache (DRAM + NVMe)

CacheLib version:

Cachebench configuration (DRAM 32GB + NVMe 468GB)
{
    "cache_config": {
        "cacheSizeMB": 32768,
        "htBucketPower": 24,
        "htLockPower": 10,
        "cacheDir": "/tmp/mem-tier-rp-dram",
        "allocSizes": [
            128,
..
4194304
        ],
        "numPools": 1,
        "poolSizes": [
            1.0
        ],
        "nvmCacheSizeMB": 479232,
        "nvmCachePaths": [
            "/dev/nvme0n1"
        ],
        "navyBlockSize": 4096,
        "navySegmentedFifoSegmentRatio": [
            1,
            1,
            1
        ],
        "navyReqOrderShardsPower": 0,
        "navyBigHashSizePct": 0,
        "navyHitsReinsertionThreshold": 1,
        "navyProbabilityReinsertionThreshold": 0,
        "navyReaderThreads": 128,
        "navyWriterThreads": 300,
        "navyCleanRegions": 6,
        "navyNumInmemBuffers": 6,
        "navyParcelMemoryMB": 102400,
        "navyDataChecksum": false,
        "truncateItemToOriginalAllocSizeInNvm": true,
        "memoryOnlyTTL": 7200,
        "navyMaxConcurrentInserts": 1600000,
       "navyRegionSizeMB": 256,
        "printNvmCounters": true,
        "useTraceTimeStamp": true,
        "tickerSynchingSeconds": 600.0
    },
    "test_config": {
        "enableLookaside": false,
        "generator": "piecewise-replay",
        "numOps": 1000000000,
        "numThreads": 24,
        "populateItem": true,
        "prepopulateCache": false,
        "traceFileName": "rnha0c01_20230315_20230322_0.8000.csv",
        "replayGeneratorConfig": {
            "numAggregationFields": 3,
            "numExtraFields": 0,
            "statsPerAggField": {},
            "ampFactor": 10
        },
        "cachePieceSize": 65536
    }
}

Cachebench configuration (CXL 32GB + NVMe 468GB)
{
    "cache_config": {
        "cacheSizeMB": 32768,
        "htBucketPower": 24,
        "htLockPower": 10,
        "cacheDir": "/tmp/mem-tier-rp-cxl",
        "memoryTiers" : [
          {
            "ratio": 1,
            "memBindNodes": "2,3,4"
          }
        ],
        "allocSizes": [
            128,
           ,,,
4194304
        ],
        "numPools": 1,
        "poolSizes": [
            1.0
        ],
        "nvmCacheSizeMB": 479232,
        "nvmCachePaths": [
            "/dev/nvme2n1"
        ],
        "navyBlockSize": 4096,
        "navySegmentedFifoSegmentRatio": [
            1,
            1,
            1
        ],
        "navyReqOrderShardsPower": 0,
        "navyBigHashSizePct": 0,
        "navyHitsReinsertionThreshold": 1,
        "navyProbabilityReinsertionThreshold": 0,
        "navyReaderThreads": 128,
        "navyWriterThreads": 300,
        "navyCleanRegions": 6,
        "navyNumInmemBuffers": 6,
        "navyParcelMemoryMB": 102400,
        "navyDataChecksum": false,
        "truncateItemToOriginalAllocSizeInNvm": true,
        "memoryOnlyTTL": 7200,
        "navyMaxConcurrentInserts": 1600000,
        "navyRegionSizeMB": 256,
        "printNvmCounters": true,
        "useTraceTimeStamp": true,
        "tickerSynchingSeconds": 600.0
    },
    "test_config": {
        "enableLookaside": false,
        "generator": "piecewise-replay",
        "numOps": 1000000000,
        "numThreads": 24,
        "populateItem": true,
        "prepopulateCache": false,
        "traceFileName": "rnha0c01_20230315_20230322_0.8000.csv",
        "replayGeneratorConfig": {
            "numAggregationFields": 3,
            "numExtraFields": 0,
            "statsPerAggField": {},
            "ampFactor": 10
        },
        "cachePieceSize": 65536
    }
}


Notice and Disclaimers

Performance varies by use, configuration and other factors. Learn more on the Performance Index site.

Performance results are based on testing as of dates shown in configurations and may not reflect all publicly available updates. No product or component can be absolutely secure.

Your costs and results may vary.

Intel technologies may require enabled hardware, software or service activation.

© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others.