onnxruntime_run_options_config_keys.h 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. // Copyright (c) Microsoft Corporation. All rights reserved.
  2. // Licensed under the MIT License.
  3. #pragma once
  4. /*
  5. * This file defines RunOptions Config Keys and format of the Config Values.
  6. *
  7. * The Naming Convention for a RunOptions Config Key,
  8. * "[Area][.[SubArea1].[SubArea2]...].[Keyname]"
  9. * Such as "ep.cuda.use_arena"
  10. * The Config Key cannot be empty
  11. * The maximum length of the Config Key is 128
  12. *
  13. * The string format of a RunOptions Config Value is defined individually for each Config.
  14. * The maximum length of the Config Value is 1024
  15. */
  16. // Key for enabling shrinkages of user listed device memory arenas.
  17. // Expects a list of semi-colon separated key value pairs separated by colon in the following format:
  18. // "device_0:device_id_0;device_1:device_id_1"
  19. // No white-spaces allowed in the provided list string.
  20. // Currently, the only supported devices are : "cpu", "gpu" (case sensitive).
  21. // If "cpu" is included in the list, DisableCpuMemArena() API must not be called (i.e.) arena for cpu should be enabled.
  22. // Example usage: "cpu:0;gpu:0" (or) "gpu:0"
  23. // By default, the value for this key is empty (i.e.) no memory arenas are shrunk
  24. static const char* const kOrtRunOptionsConfigEnableMemoryArenaShrinkage = "memory.enable_memory_arena_shrinkage";
  25. // Set to '1' to not synchronize execution providers with CPU at the end of session run.
  26. // Per default it will be set to '0'
  27. // Taking CUDA EP as an example, it omit triggering cudaStreamSynchronize on the compute stream.
  28. static const char* const kOrtRunOptionsConfigDisableSynchronizeExecutionProviders = "disable_synchronize_execution_providers";
  29. // Set HTP performance mode for QNN HTP backend before session run.
  30. // options for HTP performance mode: "burst", "balanced", "default", "high_performance",
  31. // "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", "power_saver",
  32. // "sustained_high_performance". Default to "default".
  33. static const char* const kOrtRunOptionsConfigQnnPerfMode = "qnn.htp_perf_mode";
  34. // Set HTP performance mode for QNN HTP backend post session run.
  35. static const char* const kOrtRunOptionsConfigQnnPerfModePostRun = "qnn.htp_perf_mode_post_run";
  36. // Set RPC control latency for QNN HTP backend
  37. static const char* const kOrtRunOptionsConfigQnnRpcControlLatency = "qnn.rpc_control_latency";
  38. // Set graph annotation id for CUDA EP. Use with enable_cuda_graph=true.
  39. // The value should be an integer. If the value is not set, the default value is 0 and
  40. // ORT session only captures one cuda graph before another capture is requested.
  41. // If the value is set to -1, cuda graph capture/replay is disabled in that run.
  42. // User are not expected to set the value to 0 as it is reserved for internal use.
  43. static const char* const kOrtRunOptionsConfigCudaGraphAnnotation = "gpu_graph_id";