Skip to content

OCP demo configuration

Igor Chorążewicz edited this page Oct 11, 2023 · 7 revisions

Hardware

  • 1 x Intel Xeon Platinum 8470 Processor @2.0GHz
  • 52 cores, hyperthreading enabled
  • 8 DDR5-4800 channels per socket, 128GB in total
  • 2 CXL memory expanders per socket, 192 GB in total

CXL2.0 memory expander:

  • EDSFF E3.S, PCIe Gen 5.0 x8 interface, 96GB ​
  • Bidirectional BW comparable to 1-ch DDR5 BW:
    • All read: 17.8 GB/s
    • R:W 3:1 23.6 GB/s
    • R:W 2:1 25.1 GB/s
    • R:W 1:1 27.2 GB/s
  • Latency: 250ns

OS

Multi-Tier (DRAM + CXL) CacheLib vs DRAM-only CacheLib

CacheLib version: https://github.com/intel/CacheLib/tree/b99f2b30aa7efb737d6f7ac54d9912ce42a6582c

Cachebench configuration (Multi-tier)
{
  "cache_config":
  {
    "cacheSizeMB": 32768,
    "backgroundEvictorIntervalMilSec": 4,
    "evictorThreads": 4,
    "backgroundPromoterIntervalMilSec":0,
    "promoterThreads": 0,
    "insertToFirstFreeTier": true,
    "memoryTiers" : [
      {
        "ratio": 1,
        "memBindNodes": "0"
      },
      {
        "ratio": 15,
        "memBindNodes": "2,3,4"
      }
    ],
    "allocFactor": 1.08,
    "maxAllocSize": 524288,
    "minAllocSize": 64,
    "enableChainedItem": true,
    "htBucketPower": 29,
    "moveOnSlabRelease": false,
    "poolRebalanceIntervalSec": 2,
    "poolResizeIntervalSec": 2,
    "rebalanceStrategy": "hits"
  },
  "test_config":
  {
    "opRatePerSec": 300000,
    "enableLookaside": false,
    "generator": "replay",
    "replayGeneratorConfig":
    {
        "ampFactor": 200
    },
    "repeatTraceReplay": true,
    "repeatOpCount" : true,
    "onlySetIfMiss" : false,
    "numOps": 100000000000,
    "numThreads": 24,
    "prepopulateCache": true,
    "traceFileNames": [
            "kvcache_traces_1.csv",
            "kvcache_traces_2.csv",
            "kvcache_traces_3.csv",
            "kvcache_traces_4.csv",
            "kvcache_traces_5.csv"
    ]
  }
}

Cachebench configuration (DRAM-only)
{
  "cache_config":
  {
    "cacheSizeMB": 32768,
    "allocFactor": 1.08,
    "maxAllocSize": 524288,
    "minAllocSize": 64,
    "enableChainedItem": true,
    "htBucketPower": 29,
    "moveOnSlabRelease": false,
    "poolRebalanceIntervalSec": 2,
    "poolResizeIntervalSec": 2,
    "rebalanceStrategy": "hits"
  },
  "test_config":
  {
    "opRatePerSec": 300000,
    "enableLookaside": false,
    "generator": "replay",
    "replayGeneratorConfig":
    {
        "ampFactor": 200
    },
    "repeatTraceReplay": true,
    "repeatOpCount" : true,
    "onlySetIfMiss" : false,
    "numOps": 100000000000,
    "numThreads": 24,
    "prepopulateCache": true,
    "traceFileNames": [
            "kvcache_traces_1.csv",
            "kvcache_traces_2.csv",
            "kvcache_traces_3.csv",
            "kvcache_traces_4.csv",
            "kvcache_traces_5.csv"
    ]
  }
}

Bandwith-expanded (DRAM+CXL) CacheLib vs DRAM-only CacheLib

CacheLib version: https://github.com/facebook/CacheLib/tree/55d1e748f34c425018f94c7cdb33631d5e381ab0

Cachebench configuration (DRAM 128GB + CXL 32GB)
{
  "cache_config": {
    "cacheSizeMB": 163840,
    "poolRebalanceIntervalSec": 0,
    "htBucketPower" : 24,
    "cacheDir": "/tmp/mem-tier-04",
    "memoryTiers" : [
      {
        "ratio": 1,
        "memBindNodes": "0,1,2,3,4"
      }
    ]
  },
  "test_config":
    {
      "addChainedRatio": 0.0,
      "delRatio": 0.0,
      "enableLookaside": true,
      "getRatio": 0.9911552928593673,
      "keySizeRange": [
        1,
        8,
        64
      ],
      "keySizeRangeProbability": [
        0.3,
        0.7
      ],
      "loneGetRatio": 0.008844707140632665,
      "numKeys": 8935378,
      "numOps": 3000000,
      "numThreads": 20,
      "popDistFile": "pop.json",
      "setRatio": 0.0,
      "valSizeDistFile": "sizes.json"
    }

}
Cachebench configuration (DRAM 160GB)
{
  "cache_config": {
    "cacheSizeMB": 163840,
    "poolRebalanceIntervalSec": 0,
    "htBucketPower" : 24,
    "cacheDir": "/tmp/mem-tier-01"
  },
  "test_config":
    {
      "addChainedRatio": 0.0,
      "delRatio": 0.0,
      "enableLookaside": true,
      "getRatio": 0.9911552928593673,
      "keySizeRange": [
        1,
        8,
        64
      ],
      "keySizeRangeProbability": [
        0.3,
        0.7
      ],
      "loneGetRatio": 0.008844707140632665,
      "numKeys": 8935378,
      "numOps": 3000000,
      "numThreads": 20,
      "popDistFile": "pop.json",
      "setRatio": 0.0,
      "valSizeDistFile": "sizes.json"
    }

}

CXL Hybrid Cache (CXL+NVMe) vs DRAM Hybrid Cache (DRAM + NVMe)

CacheLib version: https://github.com/facebook/CacheLib/tree/55d1e748f34c425018f94c7cdb33631d5e381ab0 with memory allocation patch

Cachebench configuration (DRAM 32GB + NVMe 468GB)
{
    "cache_config": {
        "cacheSizeMB": 32768,
        "htBucketPower": 24,
        "htLockPower": 10,
        "cacheDir": "/tmp/mem-tier-rp-dram",
        "allocSizes": [
            128,
..
4194304
        ],
        "numPools": 1,
        "poolSizes": [
            1.0
        ],
        "nvmCacheSizeMB": 479232,
        "nvmCachePaths": [
            "/dev/nvme0n1"
        ],
        "navyBlockSize": 4096,
        "navySegmentedFifoSegmentRatio": [
            1,
            1,
            1
        ],
        "navyReqOrderShardsPower": 0,
        "navyBigHashSizePct": 0,
        "navyHitsReinsertionThreshold": 1,
        "navyProbabilityReinsertionThreshold": 0,
        "navyReaderThreads": 128,
        "navyWriterThreads": 300,
        "navyCleanRegions": 6,
        "navyNumInmemBuffers": 6,
        "navyParcelMemoryMB": 102400,
        "navyDataChecksum": false,
        "truncateItemToOriginalAllocSizeInNvm": true,
        "memoryOnlyTTL": 7200,
        "navyMaxConcurrentInserts": 1600000,
       "navyRegionSizeMB": 256,
        "printNvmCounters": true,
        "useTraceTimeStamp": true,
        "tickerSynchingSeconds": 600.0
    },
    "test_config": {
        "enableLookaside": false,
        "generator": "piecewise-replay",
        "numOps": 1000000000,
        "numThreads": 24,
        "populateItem": true,
        "prepopulateCache": false,
        "traceFileName": "rnha0c01_20230315_20230322_0.8000.csv",
        "replayGeneratorConfig": {
            "numAggregationFields": 3,
            "numExtraFields": 0,
            "statsPerAggField": {},
            "ampFactor": 10
        },
        "cachePieceSize": 65536
    }
}

Cachebench configuration (CXL 32GB + NVMe 468GB)
{
    "cache_config": {
        "cacheSizeMB": 32768,
        "htBucketPower": 24,
        "htLockPower": 10,
        "cacheDir": "/tmp/mem-tier-rp-cxl",
        "memoryTiers" : [
          {
            "ratio": 1,
            "memBindNodes": "2,3,4"
          }
        ],
        "allocSizes": [
            128,
           ,,,
4194304
        ],
        "numPools": 1,
        "poolSizes": [
            1.0
        ],
        "nvmCacheSizeMB": 479232,
        "nvmCachePaths": [
            "/dev/nvme2n1"
        ],
        "navyBlockSize": 4096,
        "navySegmentedFifoSegmentRatio": [
            1,
            1,
            1
        ],
        "navyReqOrderShardsPower": 0,
        "navyBigHashSizePct": 0,
        "navyHitsReinsertionThreshold": 1,
        "navyProbabilityReinsertionThreshold": 0,
        "navyReaderThreads": 128,
        "navyWriterThreads": 300,
        "navyCleanRegions": 6,
        "navyNumInmemBuffers": 6,
        "navyParcelMemoryMB": 102400,
        "navyDataChecksum": false,
        "truncateItemToOriginalAllocSizeInNvm": true,
        "memoryOnlyTTL": 7200,
        "navyMaxConcurrentInserts": 1600000,
        "navyRegionSizeMB": 256,
        "printNvmCounters": true,
        "useTraceTimeStamp": true,
        "tickerSynchingSeconds": 600.0
    },
    "test_config": {
        "enableLookaside": false,
        "generator": "piecewise-replay",
        "numOps": 1000000000,
        "numThreads": 24,
        "populateItem": true,
        "prepopulateCache": false,
        "traceFileName": "rnha0c01_20230315_20230322_0.8000.csv",
        "replayGeneratorConfig": {
            "numAggregationFields": 3,
            "numExtraFields": 0,
            "statsPerAggField": {},
            "ampFactor": 10
        },
        "cachePieceSize": 65536
    }
}


Clone this wiki locally