Skip to content

Commit

Permalink
Merge pull request #575 from elezar/add-log-tailer
Browse files Browse the repository at this point in the history
Add log tailer for MPS control logs
  • Loading branch information
elezar authored Mar 13, 2024
2 parents c9d70ac + b321869 commit 6199a40
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 4 deletions.
19 changes: 15 additions & 4 deletions cmd/mps-control-daemon/mps/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"io"
"os"
"os/exec"
"path/filepath"

"k8s.io/klog/v2"

Expand All @@ -46,6 +47,8 @@ type Daemon struct {
// root represents the root at which the files and folders controlled by the
// daemon are created. These include the log and pipe directories.
root Root
// logTailer tails the MPS control daemon logs.
logTailer *tailer
}

// NewDaemon creates an MPS daemon instance.
Expand Down Expand Up @@ -124,23 +127,31 @@ func (d *Daemon) Start() error {
}
defer statusFile.Close()

d.logTailer = newTailer(filepath.Join(logDir, "control.log"))
klog.InfoS("Starting log tailer", "resource", d.rm.Resource())
if err := d.logTailer.Start(); err != nil {
klog.ErrorS(err, "Could not start tail command on control.log; ignoring logs")
}

return nil
}

// Stop ensures that the MPS daemon is quit.
func (d *Daemon) Stop() error {
output, err := d.EchoPipeToControl("quit")
_, err := d.EchoPipeToControl("quit")
if err != nil {
return fmt.Errorf("error sending quit message: %w", err)
}
klog.InfoS("Shut down MPS", "output", output)
klog.InfoS("Stopped MPS control daemon", "resource", d.rm.Resource())

err = d.logTailer.Stop()
klog.InfoS("Stopped log tailer", "resource", d.rm.Resource(), "error", err)

if err := d.setComputeMode(computeModeDefault); err != nil {
return fmt.Errorf("error setting compute mode %v: %w", computeModeDefault, err)
}

err = os.Remove(d.startedFile())
if err != nil && err != os.ErrNotExist {
if err := os.Remove(d.startedFile()); err != nil && err != os.ErrNotExist {
return fmt.Errorf("failed to remove started file: %w", err)
}

Expand Down
69 changes: 69 additions & 0 deletions cmd/mps-control-daemon/mps/log-tailer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/

package mps

import (
"context"
"os"
"os/exec"
)

// tailer tails the contents of a file.
type tailer struct {
filename string
cmd *exec.Cmd
cancel context.CancelFunc
}

// newTailer creates a tailer.
func newTailer(filename string) *tailer {
return &tailer{
filename: filename,
}
}

// Start starts tailing the specified filename.
func (t *tailer) Start() error {
ctx, cancel := context.WithCancel(context.Background())
t.cancel = cancel

//nolint:gosec // G204: Subprocess launched with a potential tainted input or cmd arguments (gosec)
cmd := exec.CommandContext(ctx, "tail", "-n", "+1", "-f", t.filename)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr

if err := cmd.Start(); err != nil {
return err
}
t.cmd = cmd
return nil
}

// Stop stops the tailer.
// The associated cancel function is called after which the command wait is
// called -- if applicable.
func (t *tailer) Stop() error {
if t.cancel != nil {
t.cancel()
}

if t.cmd == nil {
return nil
}

return t.cmd.Wait()
}

0 comments on commit 6199a40

Please sign in to comment.