Skip to content

Commit fb41e47

Browse files
committed
enable auto-restarts of the NRI plugin server after the container runtime is restarted
Signed-off-by: Tariq Ibrahim <tibrahim@nvidia.com>
1 parent b39720b commit fb41e47

1 file changed

Lines changed: 18 additions & 3 deletions

File tree

  • cmd/nvidia-ctk-installer/container/runtime/nri

cmd/nvidia-ctk-installer/container/runtime/nri/plugin.go

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,16 @@ func containerName(pod *api.PodSandbox, container *api.Container) string {
113113
return container.Name
114114
}
115115

116-
// Start starts the NRI plugin
116+
// Start initializes the NRI plugin stub and starts the NRI plugin server
117117
func (p *Plugin) Start(ctx context.Context, nriSocketPath, nriPluginIdx string) error {
118118
pluginOpts := []stub.Option{
119119
stub.WithPluginIdx(nriPluginIdx),
120120
stub.WithLogger(toNriLogger{p.logger}),
121121
stub.WithOnClose(func() {
122-
p.logger.Infof("NRI ttrpc connection to %s is down. NRI plugin stopped.", nriSocketPath)
122+
p.logger.Infof("NRI ttrpc connection to %s is down. NRI plugin stopped. Attempting to reconnect...", nriSocketPath)
123+
if err := p.start(ctx); err != nil {
124+
p.logger.Errorf("failed to restart NRI plugin: %v", err)
125+
}
123126
}),
124127
}
125128
if len(nriSocketPath) > 0 {
@@ -134,13 +137,25 @@ func (p *Plugin) Start(ctx context.Context, nriSocketPath, nriPluginIdx string)
134137
if p.stub, err = stub.New(p, pluginOpts...); err != nil {
135138
return fmt.Errorf("failed to initialise plugin at %s: %w", nriSocketPath, err)
136139
}
137-
err = p.stub.Start(ctx)
140+
err = p.start(ctx)
138141
if err != nil {
139142
return fmt.Errorf("plugin exited with error: %w", err)
140143
}
141144
return nil
142145
}
143146

147+
// start starts the NRI plugin server
148+
func (p *Plugin) start(ctx context.Context) error {
149+
if p != nil {
150+
if p.stub == nil {
151+
p.logger.Infof("NRI plugin not initialized. Skipping plugin start")
152+
} else {
153+
return p.stub.Start(ctx)
154+
}
155+
}
156+
return nil
157+
}
158+
144159
// Stop stops the NRI plugin
145160
func (p *Plugin) Stop() {
146161
if p == nil || p.stub == nil {

0 commit comments

Comments
 (0)