|
NodeManager: |
|
Node ID: 13be7277f830f5a8b967d2a0092091c94c7576cfebf8a5fa66025fcf |
|
Node name: 192.168.0.2 |
|
InitialConfigResources: {node:192.168.0.2: 10000, accelerator_type:A40: 10000, node:__internal_head__: 10000, memory: 844922429440000, object_store_memory: 21474836480000, CPU: 200000, GPU: 20000} |
|
ClusterTaskManager: |
|
========== Node: 13be7277f830f5a8b967d2a0092091c94c7576cfebf8a5fa66025fcf ================= |
|
Infeasible queue length: 0 |
|
Schedule queue length: 0 |
|
Dispatch queue length: 0 |
|
num_waiting_for_resource: 0 |
|
num_waiting_for_plasma_memory: 0 |
|
num_waiting_for_remote_node_resources: 0 |
|
num_worker_not_started_by_job_config_not_exist: 0 |
|
num_worker_not_started_by_registration_timeout: 0 |
|
num_tasks_waiting_for_workers: 0 |
|
num_cancelled_tasks: 0 |
|
cluster_resource_scheduler state: |
|
Local id: -2158256074887862688 Local resources: {"total":{node:__internal_head__: [10000], accelerator_type:A40: [10000], GPU: [10000, 10000], CPU: [200000], memory: [844922429440000], object_store_memory: [21474836480000], node:192.168.0.2: [10000]}}, "available": {node:__internal_head__: [10000], accelerator_type:A40: [10000], GPU: [10000, 10000], CPU: [200000], memory: [844922429440000], object_store_memory: [21474836480000], node:192.168.0.2: [10000]}}, "labels":{"ray.io/node_id":"13be7277f830f5a8b967d2a0092091c94c7576cfebf8a5fa66025fcf",} is_draining: 0 is_idle: 1 Cluster resources: node id: -2158256074887862688{"total":{object_store_memory: 21474836480000, CPU: 200000, accelerator_type:A40: 10000, GPU: 20000, node:192.168.0.2: 10000, node:__internal_head__: 10000, memory: 844922429440000}}, "available": {object_store_memory: 21474836480000, CPU: 200000, memory: 844922429440000, GPU: 20000, node:192.168.0.2: 10000, accelerator_type:A40: 10000, node:__internal_head__: 10000}}, "labels":{"ray.io/node_id":"13be7277f830f5a8b967d2a0092091c94c7576cfebf8a5fa66025fcf",}, "is_draining": 0, "draining_deadline_timestamp_ms": -1} { "placment group locations": [], "node to bundles": []} |
|
Waiting tasks size: 0 |
|
Number of executing tasks: 0 |
|
Number of pinned task arguments: 0 |
|
Number of total spilled tasks: 0 |
|
Number of spilled waiting tasks: 0 |
|
Number of spilled unschedulable tasks: 0 |
|
Resource usage { |
|
} |
|
Backlog Size per scheduling descriptor :{workerId: num backlogs}: |
|
|
|
Running tasks by scheduling class: |
|
================================================== |
|
|
|
ClusterResources: |
|
LocalObjectManager: |
|
- num pinned objects: 0 |
|
- pinned objects size: 0 |
|
- num objects pending restore: 0 |
|
- num objects pending spill: 0 |
|
- num bytes pending spill: 0 |
|
- num bytes currently spilled: 0 |
|
- cumulative spill requests: 0 |
|
- cumulative restore requests: 0 |
|
- spilled objects pending delete: 0 |
|
|
|
ObjectManager: |
|
- num local objects: 0 |
|
- num unfulfilled push requests: 0 |
|
- num object pull requests: 0 |
|
- num chunks received total: 0 |
|
- num chunks received failed (all): 0 |
|
- num chunks received failed / cancelled: 0 |
|
- num chunks received failed / plasma error: 0 |
|
Event stats: |
|
Global stats: 0 total (0 active) |
|
Queueing time: mean = -nan s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
Execution time: mean = -nan s, total = 0.000 s |
|
Event stats: |
|
PushManager: |
|
- num pushes in flight: 0 |
|
- num chunks in flight: 0 |
|
- num chunks remaining: 0 |
|
- max chunks allowed: 409 |
|
OwnershipBasedObjectDirectory: |
|
- num listeners: 0 |
|
- cumulative location updates: 0 |
|
- num location updates per second: 0.000 |
|
- num location lookups per second: 0.000 |
|
- num locations added per second: 0.000 |
|
- num locations removed per second: 0.000 |
|
BufferPool: |
|
- create buffer state map size: 0 |
|
PullManager: |
|
- num bytes available for pulled objects: 2147483648 |
|
- num bytes being pulled (all): 0 |
|
- num bytes being pulled / pinned: 0 |
|
- get request bundles: BundlePullRequestQueue{0 total, 0 active, 0 inactive, 0 unpullable} |
|
- wait request bundles: BundlePullRequestQueue{0 total, 0 active, 0 inactive, 0 unpullable} |
|
- task request bundles: BundlePullRequestQueue{0 total, 0 active, 0 inactive, 0 unpullable} |
|
- first get request bundle: N/A |
|
- first wait request bundle: N/A |
|
- first task request bundle: N/A |
|
- num objects queued: 0 |
|
- num objects actively pulled (all): 0 |
|
- num objects actively pulled / pinned: 0 |
|
- num bundles being pulled: 0 |
|
- num pull retries: 0 |
|
- max timeout seconds: 0 |
|
- max timeout request is already processed. No entry. |
|
|
|
WorkerPool: |
|
- registered jobs: 1 |
|
- process_failed_job_config_missing: 0 |
|
- process_failed_rate_limited: 0 |
|
- process_failed_pending_registration: 0 |
|
- process_failed_runtime_env_setup_failed: 0 |
|
- num PYTHON workers: 20 |
|
- num PYTHON drivers: 1 |
|
- num PYTHON pending start requests: 0 |
|
- num PYTHON pending registration requests: 0 |
|
- num object spill callbacks queued: 0 |
|
- num object restore queued: 0 |
|
- num util functions queued: 0 |
|
- num idle workers: 20 |
|
TaskDependencyManager: |
|
- task deps map size: 0 |
|
- get req map size: 0 |
|
- wait req map size: 0 |
|
- local objects map size: 0 |
|
WaitManager: |
|
- num active wait requests: 0 |
|
Subscriber: |
|
Channel WORKER_OBJECT_LOCATIONS_CHANNEL |
|
- cumulative subscribe requests: 0 |
|
- cumulative unsubscribe requests: 0 |
|
- active subscribed publishers: 0 |
|
- cumulative published messages: 0 |
|
- cumulative processed messages: 0 |
|
Channel WORKER_OBJECT_EVICTION |
|
- cumulative subscribe requests: 0 |
|
- cumulative unsubscribe requests: 0 |
|
- active subscribed publishers: 0 |
|
- cumulative published messages: 0 |
|
- cumulative processed messages: 0 |
|
Channel WORKER_REF_REMOVED_CHANNEL |
|
- cumulative subscribe requests: 0 |
|
- cumulative unsubscribe requests: 0 |
|
- active subscribed publishers: 0 |
|
- cumulative published messages: 0 |
|
- cumulative processed messages: 0 |
|
num async plasma notifications: 0 |
|
Remote node managers: |
|
Event stats: |
|
Global stats: 45735 total (35 active) |
|
Queueing time: mean = 22.619 ms, max = 123.051 s, min = 57.000 ns, total = 1034.490 s |
|
Execution time: mean = 228.250 us, total = 10.439 s |
|
Event stats: |
|
NodeManagerService.grpc_server.ReportWorkerBacklog.HandleRequestImpl - 10920 total (0 active), Execution time: mean = 38.910 us, total = 424.898 ms, Queueing time: mean = 118.335 us, max = 26.128 ms, min = 5.488 us, total = 1.292 s |
|
NodeManagerService.grpc_server.ReportWorkerBacklog - 10920 total (0 active), Execution time: mean = 555.357 us, total = 6.064 s, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
RaySyncer.OnDemandBroadcasting - 5196 total (1 active), Execution time: mean = 11.697 us, total = 60.779 ms, Queueing time: mean = 98.022 us, max = 28.199 ms, min = 12.241 us, total = 509.320 ms |
|
ObjectManager.UpdateAvailableMemory - 5196 total (0 active), Execution time: mean = 6.196 us, total = 32.192 ms, Queueing time: mean = 116.465 us, max = 706.852 us, min = 3.115 us, total = 605.153 ms |
|
NodeManager.CheckGC - 5196 total (1 active), Execution time: mean = 3.059 us, total = 15.895 ms, Queueing time: mean = 105.703 us, max = 28.206 ms, min = 6.199 us, total = 549.234 ms |
|
RayletWorkerPool.deadline_timer.kill_idle_workers - 2600 total (1 active), Execution time: mean = 19.169 us, total = 49.838 ms, Queueing time: mean = 78.963 us, max = 1.689 ms, min = 11.310 us, total = 205.304 ms |
|
MemoryMonitor.CheckIsMemoryUsageAboveThreshold - 2076 total (1 active), Execution time: mean = 460.779 us, total = 956.577 ms, Queueing time: mean = 89.006 us, max = 23.328 ms, min = 9.730 us, total = 184.776 ms |
|
NodeManager.ScheduleAndDispatchTasks - 521 total (1 active), Execution time: mean = 15.483 us, total = 8.066 ms, Queueing time: mean = 83.815 us, max = 2.235 ms, min = 5.788 us, total = 43.668 ms |
|
NodeManager.deadline_timer.spill_objects_when_over_threshold - 520 total (1 active), Execution time: mean = 2.884 us, total = 1.500 ms, Queueing time: mean = 184.971 us, max = 2.259 ms, min = 6.405 us, total = 96.185 ms |
|
NodeManager.deadline_timer.flush_free_objects - 520 total (1 active), Execution time: mean = 8.940 us, total = 4.649 ms, Queueing time: mean = 180.920 us, max = 2.264 ms, min = 8.861 us, total = 94.079 ms |
|
NodeManagerService.grpc_server.GetResourceLoad.HandleRequestImpl - 520 total (0 active), Execution time: mean = 102.406 us, total = 53.251 ms, Queueing time: mean = 117.319 us, max = 492.037 us, min = 16.203 us, total = 61.006 ms |
|
NodeManagerService.grpc_server.GetResourceLoad - 520 total (0 active), Execution time: mean = 636.948 us, total = 331.213 ms, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
ClusterResourceManager.ResetRemoteNodeView - 174 total (1 active), Execution time: mean = 8.947 us, total = 1.557 ms, Queueing time: mean = 76.749 us, max = 253.487 us, min = 16.912 us, total = 13.354 ms |
|
NodeManager.GcsCheckAlive - 104 total (1 active), Execution time: mean = 286.918 us, total = 29.839 ms, Queueing time: mean = 642.545 us, max = 2.306 ms, min = 99.479 us, total = 66.825 ms |
|
ray::rpc::NodeInfoGcsService.grpc_client.CheckAlive.OnReplyReceived - 104 total (0 active), Execution time: mean = 53.596 us, total = 5.574 ms, Queueing time: mean = 117.882 us, max = 218.783 us, min = 27.880 us, total = 12.260 ms |
|
ray::rpc::NodeInfoGcsService.grpc_client.CheckAlive - 104 total (0 active), Execution time: mean = 1.519 ms, total = 157.936 ms, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
NodeManager.deadline_timer.record_metrics - 104 total (1 active), Execution time: mean = 552.821 us, total = 57.493 ms, Queueing time: mean = 379.650 us, max = 1.725 ms, min = 8.885 us, total = 39.484 ms |
|
ClientConnection.async_read.ProcessMessageHeader - 95 total (21 active), Execution time: mean = 8.155 us, total = 774.725 us, Queueing time: mean = 10.849 s, max = 123.051 s, min = 33.517 us, total = 1030.640 s |
|
ClientConnection.async_read.ProcessMessage - 74 total (0 active), Execution time: mean = 929.411 us, total = 68.776 ms, Queueing time: mean = 43.207 us, max = 369.478 us, min = 3.543 us, total = 3.197 ms |
|
NodeManager.deadline_timer.debug_state_dump - 52 total (1 active, 1 running), Execution time: mean = 1.791 ms, total = 93.132 ms, Queueing time: mean = 66.948 us, max = 153.123 us, min = 21.807 us, total = 3.481 ms |
|
ClientConnection.async_write.DoAsyncWrites - 22 total (0 active), Execution time: mean = 1.574 us, total = 34.627 us, Queueing time: mean = 41.270 us, max = 146.493 us, min = 11.116 us, total = 907.929 us |
|
NodeManagerService.grpc_server.GetSystemConfig.HandleRequestImpl - 21 total (0 active), Execution time: mean = 121.458 us, total = 2.551 ms, Queueing time: mean = 94.581 us, max = 235.076 us, min = 11.959 us, total = 1.986 ms |
|
ObjectManager.ObjectAdded - 21 total (0 active), Execution time: mean = 12.500 us, total = 262.503 us, Queueing time: mean = 106.328 us, max = 212.914 us, min = 9.723 us, total = 2.233 ms |
|
NodeManagerService.grpc_server.GetSystemConfig - 21 total (0 active), Execution time: mean = 758.936 us, total = 15.938 ms, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
ObjectManager.ObjectDeleted - 21 total (0 active), Execution time: mean = 19.347 us, total = 406.289 us, Queueing time: mean = 139.222 us, max = 517.588 us, min = 35.214 us, total = 2.924 ms |
|
PeriodicalRunner.RunFnPeriodically - 13 total (0 active), Execution time: mean = 197.633 us, total = 2.569 ms, Queueing time: mean = 4.308 ms, max = 15.027 ms, min = 25.785 us, total = 56.007 ms |
|
NodeManagerService.grpc_server.RequestWorkerLease - 10 total (0 active), Execution time: mean = 780.093 us, total = 7.801 ms, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
- 10 total (0 active), Execution time: mean = 1.025 us, total = 10.254 us, Queueing time: mean = 114.099 us, max = 182.528 us, min = 27.587 us, total = 1.141 ms |
|
RaySyncer.BroadcastMessage - 10 total (0 active), Execution time: mean = 221.900 us, total = 2.219 ms, Queueing time: mean = 695.800 ns, max = 931.000 ns, min = 70.000 ns, total = 6.958 us |
|
WorkerPool.PopWorkerCallback - 10 total (0 active), Execution time: mean = 21.932 us, total = 219.320 us, Queueing time: mean = 109.091 us, max = 200.547 us, min = 20.707 us, total = 1.091 ms |
|
NodeManagerService.grpc_server.ReturnWorker.HandleRequestImpl - 10 total (0 active), Execution time: mean = 123.767 us, total = 1.238 ms, Queueing time: mean = 96.345 us, max = 137.794 us, min = 36.307 us, total = 963.446 us |
|
NodeManagerService.grpc_server.ReturnWorker - 10 total (0 active), Execution time: mean = 635.791 us, total = 6.358 ms, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
NodeManagerService.grpc_server.RequestWorkerLease.HandleRequestImpl - 10 total (0 active), Execution time: mean = 115.576 us, total = 1.156 ms, Queueing time: mean = 115.782 us, max = 331.820 us, min = 13.069 us, total = 1.158 ms |
|
NodeManager.deadline_timer.print_event_loop_stats - 9 total (1 active), Execution time: mean = 2.059 ms, total = 18.527 ms, Queueing time: mean = 54.393 us, max = 111.886 us, min = 25.166 us, total = 489.536 us |
|
ray::rpc::InternalPubSubGcsService.grpc_client.GcsSubscriberCommandBatch - 2 total (0 active), Execution time: mean = 1.646 ms, total = 3.291 ms, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
ray::rpc::InternalPubSubGcsService.grpc_client.GcsSubscriberPoll - 2 total (1 active), Execution time: mean = 460.725 ms, total = 921.449 ms, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
RaySyncerRegister - 2 total (0 active), Execution time: mean = 1.800 us, total = 3.599 us, Queueing time: mean = 209.500 ns, max = 362.000 ns, min = 57.000 ns, total = 419.000 ns |
|
ray::rpc::InternalPubSubGcsService.grpc_client.GcsSubscriberCommandBatch.OnReplyReceived - 2 total (0 active), Execution time: mean = 129.215 us, total = 258.430 us, Queueing time: mean = 412.096 us, max = 685.035 us, min = 139.158 us, total = 824.193 us |
|
ray::rpc::JobInfoGcsService.grpc_client.AddJob - 1 total (0 active), Execution time: mean = 1.759 ms, total = 1.759 ms, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
ray::rpc::NodeInfoGcsService.grpc_client.GetAllNodeInfo - 1 total (0 active), Execution time: mean = 1.586 ms, total = 1.586 ms, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
ray::rpc::JobInfoGcsService.grpc_client.GetAllJobInfo - 1 total (0 active), Execution time: mean = 1.546 ms, total = 1.546 ms, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
ray::rpc::NodeInfoGcsService.grpc_client.RegisterNode - 1 total (0 active), Execution time: mean = 2.263 ms, total = 2.263 ms, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
ray::rpc::NodeInfoGcsService.grpc_client.GetAllNodeInfo.OnReplyReceived - 1 total (0 active), Execution time: mean = 132.115 us, total = 132.115 us, Queueing time: mean = 203.939 us, max = 203.939 us, min = 203.939 us, total = 203.939 us |
|
ray::rpc::NodeInfoGcsService.grpc_client.RegisterNode.OnReplyReceived - 1 total (0 active), Execution time: mean = 407.271 us, total = 407.271 us, Queueing time: mean = 44.821 us, max = 44.821 us, min = 44.821 us, total = 44.821 us |
|
ray::rpc::InternalKVGcsService.grpc_client.GetInternalConfig.OnReplyReceived - 1 total (0 active), Execution time: mean = 1.026 s, total = 1.026 s, Queueing time: mean = 109.121 us, max = 109.121 us, min = 109.121 us, total = 109.121 us |
|
ray::rpc::JobInfoGcsService.grpc_client.GetAllJobInfo.OnReplyReceived - 1 total (0 active), Execution time: mean = 29.389 us, total = 29.389 us, Queueing time: mean = 150.068 us, max = 150.068 us, min = 150.068 us, total = 150.068 us |
|
Subscriber.HandlePublishedMessage_GCS_JOB_CHANNEL - 1 total (0 active), Execution time: mean = 86.446 us, total = 86.446 us, Queueing time: mean = 403.833 us, max = 403.833 us, min = 403.833 us, total = 403.833 us |
|
NodeManager.GCTaskFailureReason - 1 total (1 active), Execution time: mean = 0.000 s, total = 0.000 s, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
ray::rpc::InternalPubSubGcsService.grpc_client.GcsSubscriberPoll.OnReplyReceived - 1 total (0 active), Execution time: mean = 327.529 us, total = 327.529 us, Queueing time: mean = 115.620 us, max = 115.620 us, min = 115.620 us, total = 115.620 us |
|
ray::rpc::JobInfoGcsService.grpc_client.AddJob.OnReplyReceived - 1 total (0 active), Execution time: mean = 60.113 us, total = 60.113 us, Queueing time: mean = 447.798 us, max = 447.798 us, min = 447.798 us, total = 447.798 us |
|
ray::rpc::InternalKVGcsService.grpc_client.GetInternalConfig - 1 total (0 active), Execution time: mean = 1.740 ms, total = 1.740 ms, Queueing time: mean = 0.000 s, max = -0.000 s, min = 9223372036.855 s, total = 0.000 s |
|
DebugString() time ms: 1 |