diff --git a/.github/workflows/build-msi.yaml b/.github/workflows/build-msi.yaml new file mode 100644 index 00000000..8384a92f --- /dev/null +++ b/.github/workflows/build-msi.yaml @@ -0,0 +1,46 @@ +name: Build MSI + +on: + workflow_dispatch: + inputs: + # since this is being triggered manually on branch but we should use our regular git tags when we're back on the normal flow + msi_version: + description: "MSI package version (e.g., 1.0.0)" + required: true + +jobs: + build-msi: + runs-on: windows-2019 + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: '1.23.1' + + - name: Install Make + run: choco install make --yes + + - name: Install WiX CLI + run: dotnet tool install --global wix + + - name: Install WiX Extensions + run: | + wix extension add -g WixToolset.Firewall.wixext/5.0.2 + wix extension add -g WixToolset.Util.wixext/5.0.2 + + - name: Build Go binary + run: make windows + + - name: Build MSI + run: | + wix build agent.wxs -define GoBinDir="${{ github.workspace }}\bin" -define MSIProductVersion="${{ github.event.inputs.msi_version }}" -ext WixToolset.Util.wixext -ext WixToolset.Firewall.wixext -o agent-${{ github.event.inputs.msi_version }}.msi + + - name: Upload MSI artifact + uses: actions/upload-artifact@v4 + with: + name: agent-${{ github.event.inputs.msi_version }}.msi + path: agent-${{ github.event.inputs.msi_version }}.msi diff --git a/Makefile b/Makefile index de766b8c..75010082 100644 --- a/Makefile +++ b/Makefile @@ -37,6 +37,12 @@ bin/viam-agent-$(PATH_VERSION)-$(LINUX_ARCH): go.* *.go */*.go */*/*.go subsyste go build -o $@ -trimpath -tags $(TAGS) -ldflags $(LDFLAGS) ./cmd/viam-agent/main.go test "$(PATH_VERSION)" != "custom" && cp $@ bin/viam-agent-stable-$(LINUX_ARCH) || true +.PHONY: windows +windows: bin/viam-agent.exe + +bin/viam-agent.exe: + GOOS=windows GOARCH=amd64 go build -o $@ -trimpath -tags $(TAGS) -ldflags $(LDFLAGS) ./cmd/viam-agent + .PHONY: clean clean: rm -rf bin/ diff --git a/agent.bat b/agent.bat new file mode 100644 index 00000000..904458bc --- /dev/null +++ b/agent.bat @@ -0,0 +1,16 @@ +@echo off +:: installer for agent on windows + +set root=\opt\viam +set fname=viam-agent-windows-amd64-alpha-16-6dece14.exe +mkdir %root%\cache +mkdir %root%\bin +curl https://storage.googleapis.com/packages.viam.com/temp/%fname% -o %root%\cache\%fname% +netsh advfirewall firewall add rule name="%fname%" dir=in action=allow program="c:\%root%\cache\%fname%" enable=yes +del %root%\bin\viam-agent.exe +mklink %root%\bin\viam-agent.exe %root%\cache\%fname% +:: todo: restart on error +sc create viam-agent binpath= c:%root%\bin\viam-agent.exe start= auto +sc failure viam-agent reset= 0 actions= restart/30000/restart/30000/restart/30000 +sc failureflag viam-agent 1 +sc start viam-agent diff --git a/agent.wxs b/agent.wxs new file mode 100644 index 00000000..83221432 --- /dev/null +++ b/agent.wxs @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cmd/viam-agent/main.go b/cmd/viam-agent/main.go index ccccb746..bfcc9564 100644 --- a/cmd/viam-agent/main.go +++ b/cmd/viam-agent/main.go @@ -4,12 +4,11 @@ import ( "bytes" "context" "fmt" - "io/fs" "os" - "os/exec" "os/signal" "os/user" "path/filepath" + "runtime" "strings" "sync" "syscall" @@ -19,12 +18,10 @@ import ( "github.com/nightlyone/lockfile" "github.com/pkg/errors" "github.com/viamrobotics/agent" - "github.com/viamrobotics/agent/subsystems/provisioning" - _ "github.com/viamrobotics/agent/subsystems/syscfg" "github.com/viamrobotics/agent/subsystems/viamagent" "github.com/viamrobotics/agent/subsystems/viamserver" + autils "github.com/viamrobotics/agent/utils" "go.viam.com/rdk/logging" - "go.viam.com/utils" ) var ( @@ -34,26 +31,29 @@ var ( globalLogger = logging.NewLogger("viam-agent") ) +//nolint:lll +type agentOpts struct { + Config string `default:"/etc/viam.json" description:"Path to config file" long:"config" short:"c"` + ProvisioningConfig string `default:"/etc/viam-provisioning.json" description:"Path to provisioning (customization) config file" long:"provisioning" short:"p"` + Debug bool `description:"Enable debug logging (agent only)" env:"VIAM_AGENT_DEBUG" long:"debug" short:"d"` + Fast bool `description:"Enable fast start mode" env:"VIAM_AGENT_FAST_START" long:"fast" short:"f"` + Help bool `description:"Show this help message" long:"help" short:"h"` + Version bool `description:"Show version" long:"version" short:"v"` + Install bool `description:"Install systemd service" long:"install"` + DevMode bool `description:"Allow non-root and non-service" env:"VIAM_AGENT_DEVMODE" long:"dev-mode"` +} + //nolint:gocognit -func main() { +func commonMain() { ctx, cancel := setupExitSignalHandling() + agent.GlobalCancel = cancel defer func() { cancel() activeBackgroundWorkers.Wait() }() - //nolint:lll - var opts struct { - Config string `default:"/etc/viam.json" description:"Path to config file" long:"config" short:"c"` - ProvisioningConfig string `default:"/etc/viam-provisioning.json" description:"Path to provisioning (customization) config file" long:"provisioning" short:"p"` - Debug bool `description:"Enable debug logging (agent only)" env:"VIAM_AGENT_DEBUG" long:"debug" short:"d"` - Fast bool `description:"Enable fast start mode" env:"VIAM_AGENT_FAST_START" long:"fast" short:"f"` - Help bool `description:"Show this help message" long:"help" short:"h"` - Version bool `description:"Show version" long:"version" short:"v"` - Install bool `description:"Install systemd service" long:"install"` - DevMode bool `description:"Allow non-root and non-service" env:"VIAM_AGENT_DEVMODE" long:"dev-mode"` - } + var opts agentOpts parser := flags.NewParser(&opts, flags.IgnoreUnknown) parser.Usage = "runs as a background service and manages updates and the process lifecycle for viam-server." @@ -82,7 +82,7 @@ func main() { // need to be root to go any further than this curUser, err := user.Current() exitIfError(err) - if curUser.Uid != "0" && !opts.DevMode { + if runtime.GOOS != "windows" && curUser.Uid != "0" && !opts.DevMode { //nolint:forbidigo fmt.Printf("viam-agent must be run as root (uid 0), but current user is %s (uid %s)\n", curUser.Username, curUser.Uid) return @@ -93,7 +93,7 @@ func main() { return } - if !opts.DevMode { + if !opts.DevMode && runtime.GOOS != "windows" { // confirm that we're running from a proper install if !strings.HasPrefix(os.Args[0], agent.ViamDirs["viam"]) { //nolint:forbidigo @@ -117,63 +117,16 @@ func main() { } }() - // pass the provisioning path arg to the subsystem - absProvConfigPath, err := filepath.Abs(opts.ProvisioningConfig) - exitIfError(err) - provisioning.ProvisioningConfigFilePath = absProvConfigPath - globalLogger.Infof("provisioning config file path: %s", absProvConfigPath) - - // tie the manager config to the viam-server config - absConfigPath, err := filepath.Abs(opts.Config) - exitIfError(err) - viamserver.ConfigFilePath = absConfigPath - provisioning.AppConfigFilePath = absConfigPath - globalLogger.Infof("config file path: %s", absConfigPath) + absConfigPath := setupProvisioningPaths(opts) // main manager structure manager, err := agent.NewManager(ctx, globalLogger) exitIfError(err) - err = manager.LoadConfig(absConfigPath) + loadConfigErr := manager.LoadConfig(absConfigPath) //nolint:nestif - if err != nil { - // If the local /etc/viam.json config is corrupted, invalid, or missing (due to a new install), we can get stuck here. - // Rename the file (if it exists) and wait to provision a new one. - if !errors.Is(err, fs.ErrNotExist) { - if err := os.Rename(absConfigPath, absConfigPath+".old"); err != nil { - // if we can't rename the file, we're up a creek, and it's fatal - globalLogger.Error(errors.Wrapf(err, "removing invalid config file %s", absConfigPath)) - globalLogger.Error("unable to continue with provisioning, exiting") - manager.CloseAll() - return - } - } - - // We manually start the provisioning service to allow the user to update it and wait. - // The user may be updating it soon, so better to loop quietly than to exit and let systemd keep restarting infinitely. - globalLogger.Infof("main config file %s missing or corrupt, entering provisioning mode", absConfigPath) - - if err := manager.StartSubsystem(ctx, provisioning.SubsysName); err != nil { - if errors.Is(err, agent.ErrSubsystemDisabled) { - globalLogger.Warn("provisioning subsystem disabled, please manually update /etc/viam.json and connect to internet") - } else { - globalLogger.Error(errors.Wrapf(err, - "could not start provisioning subsystem, please manually update /etc/viam.json and connect to internet")) - manager.CloseAll() - return - } - } - - for { - globalLogger.Warn("waiting for user provisioning") - if !utils.SelectContextOrWait(ctx, time.Second*10) { - manager.CloseAll() - return - } - if err := manager.LoadConfig(absConfigPath); err == nil { - break - } - } + if loadConfigErr != nil { + runPlatformProvisioning(ctx, manager, loadConfigErr, absConfigPath) } netAppender, err := manager.CreateNetAppender() if err != nil { @@ -199,23 +152,7 @@ func main() { // wait to be online timeoutCtx, cancel := context.WithTimeout(ctx, time.Minute) defer cancel() - for { - cmd := exec.CommandContext(timeoutCtx, "systemctl", "is-active", "network-online.target") - _, err := cmd.CombinedOutput() - - if err == nil { - break - } - - if e := (&exec.ExitError{}); !errors.As(err, &e) { - // if it's not an ExitError, that means it didn't even start, so bail out - globalLogger.Error(errors.Wrap(err, "running 'systemctl is-active network-online.target'")) - break - } - if !utils.SelectContextOrWait(timeoutCtx, time.Second) { - break - } - } + autils.WaitOnline(globalLogger, timeoutCtx) // Check for self-update and restart if needed. needRestart, err := manager.SelfUpdate(ctx) @@ -268,12 +205,11 @@ func setupExitSignalHandling() (context.Context, func()) { // this will eventually be handled elsewhere as a restart, not exit case syscall.SIGHUP: - // ignore SIGURG entirely, it's used for real-time scheduling notifications - case syscall.SIGURG: - // log everything else default: - globalLogger.Debugw("received unknown signal", "signal", sig) + if !ignoredSignal(sig) { + globalLogger.Debugw("received unknown signal", "signal", sig) + } } } }() @@ -282,6 +218,7 @@ func setupExitSignalHandling() (context.Context, func()) { return ctx, cancel } +// helper to log.Fatal if error is non-nil. func exitIfError(err error) { if err != nil { globalLogger.Fatal(err) diff --git a/cmd/viam-agent/main_windows.go b/cmd/viam-agent/main_windows.go new file mode 100644 index 00000000..66343d30 --- /dev/null +++ b/cmd/viam-agent/main_windows.go @@ -0,0 +1,63 @@ +package main + +import ( + "fmt" + + "github.com/viamrobotics/agent/utils" + "golang.org/x/sys/windows/svc" + "golang.org/x/sys/windows/svc/debug" + "golang.org/x/sys/windows/svc/eventlog" +) + +var elog debug.Log + +const serviceName = "viam-agent" + +type agentService struct{} + +// control loop for a windows service +func (*agentService) Execute(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (ssec bool, errno uint32) { + changes <- svc.Status{State: svc.Running, Accepts: svc.AcceptStop | svc.AcceptShutdown} + for { + c := <-r + if c.Cmd == svc.Stop || c.Cmd == svc.Shutdown { + elog.Info(1, fmt.Sprintf("%s service stopping", serviceName)) + if err := utils.KillTree(-1); err != nil { + elog.Error(1, fmt.Sprintf("error killing subtree %s", err)) + } + elog.Info(1, "taskkilled") + break + } else { + elog.Error(1, fmt.Sprintf("unexpected control request #%d", c)) + } + } + changes <- svc.Status{State: svc.StopPending} + return +} + +func main() { + if inService, err := svc.IsWindowsService(); err != nil { + panic(err) + } else if !inService { + println("no service detected -- running as normal process") + commonMain() + return + } + + var err error + elog, err = eventlog.Open(serviceName) + if err != nil { + return + } + defer elog.Close() + + elog.Info(1, fmt.Sprintf("starting %s service", serviceName)) + go commonMain() + err = svc.Run(serviceName, &agentService{}) + if err != nil { + elog.Error(1, fmt.Sprintf("%s service failed: %v", serviceName, err)) + return + } + // todo(windows): gracefully stop. without this, RDK stays running in the background. + elog.Info(1, fmt.Sprintf("%s service stopped", serviceName)) +} diff --git a/cmd/viam-agent/subsystems_linux.go b/cmd/viam-agent/subsystems_linux.go new file mode 100644 index 00000000..d6d65170 --- /dev/null +++ b/cmd/viam-agent/subsystems_linux.go @@ -0,0 +1,87 @@ +package main + +import ( + "context" + "io/fs" + "os" + "path/filepath" + "syscall" + "time" + + "github.com/pkg/errors" + "github.com/viamrobotics/agent" + "github.com/viamrobotics/agent/subsystems/provisioning" + // register-only. + _ "github.com/viamrobotics/agent/subsystems/syscfg" + "github.com/viamrobotics/agent/subsystems/viamserver" + "go.viam.com/utils" +) + +func main() { + commonMain() +} + +// platform-specific provisioning logic. +func runPlatformProvisioning(ctx context.Context, manager *agent.Manager, loadConfigErr error, absConfigPath string) { + // If the local /etc/viam.json config is corrupted, invalid, or missing (due to a new install), we can get stuck here. + // Rename the file (if it exists) and wait to provision a new one. + if !errors.Is(loadConfigErr, fs.ErrNotExist) { + if err := os.Rename(absConfigPath, absConfigPath+".old"); err != nil { + // if we can't rename the file, we're up a creek, and it's fatal + globalLogger.Error(errors.Wrapf(err, "removing invalid config file %s", absConfigPath)) + globalLogger.Error("unable to continue with provisioning, exiting") + manager.CloseAll() + return + } + } + + // We manually start the provisioning service to allow the user to update it and wait. + // The user may be updating it soon, so better to loop quietly than to exit and let systemd keep restarting infinitely. + globalLogger.Infof("main config file %s missing or corrupt, entering provisioning mode", absConfigPath) + + if err := manager.StartSubsystem(ctx, provisioning.SubsysName); err != nil { + if errors.Is(err, agent.ErrSubsystemDisabled) { + globalLogger.Warn("provisioning subsystem disabled, please manually update /etc/viam.json and connect to internet") + } else { + globalLogger.Error(errors.Wrapf(err, + "could not start provisioning subsystem, please manually update /etc/viam.json and connect to internet")) + manager.CloseAll() + return + } + } + + for { + globalLogger.Warn("waiting for user provisioning") + if !utils.SelectContextOrWait(ctx, time.Second*10) { + manager.CloseAll() + return + } + if err := manager.LoadConfig(absConfigPath); err == nil { + break + } + } +} + +// platform-specific path setup. +func setupProvisioningPaths(opts agentOpts) string { + // pass the provisioning path arg to the subsystem + absProvConfigPath, err := filepath.Abs(opts.ProvisioningConfig) + exitIfError(err) + provisioning.ProvisioningConfigFilePath = absProvConfigPath + globalLogger.Infof("provisioning config file path: %s", absProvConfigPath) + + // tie the manager config to the viam-server config + absConfigPath, err := filepath.Abs(opts.Config) + exitIfError(err) + viamserver.ConfigFilePath = absConfigPath + provisioning.AppConfigFilePath = absConfigPath + globalLogger.Infof("config file path: %s", absConfigPath) + + return absConfigPath +} + +// return true if this error is safe to ignore on this platform. +func ignoredSignal(sig os.Signal) bool { + // ignore SIGURG entirely, it's used for real-time scheduling notifications + return sig == syscall.SIGURG +} diff --git a/cmd/viam-agent/subsystems_windows.go b/cmd/viam-agent/subsystems_windows.go new file mode 100644 index 00000000..e023f19c --- /dev/null +++ b/cmd/viam-agent/subsystems_windows.go @@ -0,0 +1,27 @@ +package main + +import ( + "context" + "os" + "path/filepath" + + "github.com/viamrobotics/agent" + "github.com/viamrobotics/agent/subsystems/viamserver" +) + +func runPlatformProvisioning(context.Context, *agent.Manager, error, string) { + globalLogger.Warn("provisioning not available on windows yet") +} + +// platform-specific path setup. +func setupProvisioningPaths(opts agentOpts) string { + // tie the manager config to the viam-server config + absConfigPath, err := filepath.Abs(opts.Config) + exitIfError(err) + viamserver.ConfigFilePath = absConfigPath + globalLogger.Infof("config file path: %s", absConfigPath) + + return absConfigPath +} + +func ignoredSignal(os.Signal) bool { return false } diff --git a/manager.go b/manager.go index 55fb34e6..253c9e7d 100644 --- a/manager.go +++ b/manager.go @@ -446,7 +446,7 @@ func (m *Manager) processConfig(cfg map[string]*pb.DeviceSubsystemConfig) { // GetConfig retrieves the configuration from the cloud, or returns a cached version if unable to communicate. func (m *Manager) GetConfig(ctx context.Context) (map[string]*pb.DeviceSubsystemConfig, time.Duration, error) { if m.cloudConfig == nil { - return nil, 0, errors.New("can't GetConfig until successful LoadConfig") + return nil, minimalCheckInterval, errors.New("can't GetConfig until successful LoadConfig") } timeoutCtx, cancelFunc := context.WithTimeout(ctx, defaultNetworkTimeout) defer cancelFunc() diff --git a/subsystem.go b/subsystem.go index 013a7955..2350bdb9 100644 --- a/subsystem.go +++ b/subsystem.go @@ -13,11 +13,13 @@ import ( "path" "path/filepath" "regexp" + "runtime" "sync" "syscall" "time" errw "github.com/pkg/errors" + autils "github.com/viamrobotics/agent/utils" pb "go.viam.com/api/app/agent/v1" "go.viam.com/rdk/logging" ) @@ -247,6 +249,9 @@ func (s *AgentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConf } updateInfo := cfg.GetUpdateInfo() + if updateInfo == nil { + return false, fmt.Errorf("updateInfo for %s is nil. are you on an unsupported platform?", s.name) + } // check if we already have the version given by the cloud verData, ok := s.CacheData.Versions[updateInfo.GetVersion()] @@ -340,6 +345,9 @@ func (s *AgentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConf // symlink the extracted file to bin verData.SymlinkPath = path.Join(ViamDirs["bin"], updateInfo.GetFilename()) + if runtime.GOOS == "windows" { + verData.SymlinkPath += ".exe" + } if err = ForceSymlink(verData.UnpackedPath, verData.SymlinkPath); err != nil { return needRestart, errw.Wrap(err, "creating symlink") } @@ -438,7 +446,7 @@ func (is *InternalSubsystem) Start(ctx context.Context) error { //nolint:gosec is.cmd = exec.Command(path.Join(ViamDirs["bin"], is.name), is.cmdArgs...) is.cmd.Dir = ViamDirs["viam"] - is.cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + autils.PlatformSubprocessSettings(is.cmd) is.cmd.Stdout = stdio is.cmd.Stderr = stderr @@ -522,10 +530,7 @@ func (is *InternalSubsystem) Stop(ctx context.Context) error { } is.logger.Warnf("%s refused to exit, killing", is.name) - err = syscall.Kill(-is.cmd.Process.Pid, syscall.SIGKILL) - if err != nil { - is.logger.Error(err) - } + autils.PlatformKill(is.logger, is.cmd) if is.waitForExit(ctx, StopKillTimeout) { is.logger.Infof("%s successfully killed", is.name) @@ -555,44 +560,6 @@ func (is *InternalSubsystem) waitForExit(ctx context.Context, timeout time.Durat } } -// HealthCheck sends a USR1 signal to the subsystem process, which should cause it to log "HEALTHY" to stdout. -func (is *InternalSubsystem) HealthCheck(ctx context.Context) (errRet error) { - is.startStopMu.Lock() - defer is.startStopMu.Unlock() - is.mu.Lock() - defer is.mu.Unlock() - if !is.running { - return errw.Errorf("%s not running", is.name) - } - - is.logger.Debugf("starting healthcheck for %s", is.name) - - checkChan, err := is.cmd.Stdout.(*MatchingLogger).AddMatcher("healthcheck", regexp.MustCompile(`HEALTHY`), true) - if err != nil { - return err - } - defer func() { - matcher, ok := is.cmd.Stdout.(*MatchingLogger) - if ok { - matcher.DeleteMatcher("healthcheck") - } - }() - - err = is.cmd.Process.Signal(syscall.SIGUSR1) - if err != nil { - is.logger.Error(err) - } - - select { - case <-time.After(time.Second * 30): - case <-ctx.Done(): - case <-checkChan: - is.logger.Debugf("healthcheck for %s is good", is.name) - return nil - } - return errw.Errorf("timeout waiting for healthcheck on %s", is.name) -} - func (is *InternalSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig, newVersion bool) (bool, error) { jsonBytes, err := cfg.GetAttributes().MarshalJSON() if err != nil { diff --git a/subsystem_linux.go b/subsystem_linux.go new file mode 100644 index 00000000..10789fa4 --- /dev/null +++ b/subsystem_linux.go @@ -0,0 +1,48 @@ +package agent + +import ( + "context" + "regexp" + "syscall" + "time" + + errw "github.com/pkg/errors" +) + +// HealthCheck sends a USR1 signal to the subsystem process, which should cause it to log "HEALTHY" to stdout. +func (is *InternalSubsystem) HealthCheck(ctx context.Context) (errRet error) { + is.startStopMu.Lock() + defer is.startStopMu.Unlock() + is.mu.Lock() + defer is.mu.Unlock() + if !is.running { + return errw.Errorf("%s not running", is.name) + } + + is.logger.Debugf("starting healthcheck for %s", is.name) + + checkChan, err := is.cmd.Stdout.(*MatchingLogger).AddMatcher("healthcheck", regexp.MustCompile(`HEALTHY`), true) + if err != nil { + return err + } + defer func() { + matcher, ok := is.cmd.Stdout.(*MatchingLogger) + if ok { + matcher.DeleteMatcher("healthcheck") + } + }() + + err = is.cmd.Process.Signal(syscall.SIGUSR1) + if err != nil { + is.logger.Error(err) + } + + select { + case <-time.After(time.Second * 30): + case <-ctx.Done(): + case <-checkChan: + is.logger.Debugf("healthcheck for %s is good", is.name) + return nil + } + return errw.Errorf("timeout waiting for healthcheck on %s", is.name) +} diff --git a/subsystem_windows.go b/subsystem_windows.go new file mode 100644 index 00000000..1f199aea --- /dev/null +++ b/subsystem_windows.go @@ -0,0 +1,10 @@ +package agent + +import ( + "context" +) + +func (is *InternalSubsystem) HealthCheck(ctx context.Context) (errRet error) { + // todo: flesh this out. SIGUSR1 isn't available on windows. + return nil +} diff --git a/subsystems/viamagent/viamagent.go b/subsystems/viamagent/viamagent.go index 50450359..6267f5ae 100644 --- a/subsystems/viamagent/viamagent.go +++ b/subsystems/viamagent/viamagent.go @@ -10,6 +10,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strings" errw "github.com/pkg/errors" @@ -40,10 +41,12 @@ var ( serviceFileContents []byte ) -type agentSubsystem struct{} +type agentSubsystem struct { + logger logging.Logger +} func NewSubsystem(ctx context.Context, logger logging.Logger, updateConf *pb.DeviceSubsystemConfig) (subsystems.Subsystem, error) { - return agent.NewAgentSubsystem(ctx, subsysName, logger, &agentSubsystem{}) + return agent.NewAgentSubsystem(ctx, subsysName, logger, &agentSubsystem{logger: logger}) } // Start does nothing (we're already running as we ARE the agent.) @@ -64,13 +67,28 @@ func (a *agentSubsystem) HealthCheck(ctx context.Context) error { // Update here handles the post-update installation of systemd files and the like. // The actual update check and download is done in the wrapper (agent.AgentSubsystem). func (a *agentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig, newVersion bool) (bool, error) { + // todo: pass logger into this function; these are important events if !newVersion { return false, nil } expectedPath := filepath.Join(agent.ViamDirs["bin"], subsysName) + if runtime.GOOS == "windows" { + a.logger.Info("windows postinstall") + // no systemd on windows -- for now you need to double-restart. + if _, err := exec.Command(expectedPath, "--version").Output(); err != nil { + return false, errw.Wrap(err, "testing binary") + } + a.logger.Info("windows okay test binary") + if err := agent.RequestRestart(); err != nil { + return false, err + } + a.logger.Info("windows requested restart") + return true, nil + } // Run the newly updated version to install systemd and other service files. + // Note: this also restarts the daemon. //nolint:gosec cmd := exec.Command(expectedPath, "--install") output, err := cmd.CombinedOutput() @@ -83,6 +101,7 @@ func (a *agentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConf return true, nil } +// Install installs systemd and restarts the daemon. func Install(logger logging.Logger) error { // Check for systemd cmd := exec.Command("systemctl", "--version") diff --git a/subsystems/viamserver/viamserver.go b/subsystems/viamserver/viamserver.go index efbba063..d435bdd6 100644 --- a/subsystems/viamserver/viamserver.go +++ b/subsystems/viamserver/viamserver.go @@ -7,9 +7,11 @@ import ( "encoding/json" "errors" "net/http" + "os" "os/exec" "path" "regexp" + "runtime" "strings" "sync" "sync/atomic" @@ -20,6 +22,7 @@ import ( "github.com/viamrobotics/agent" "github.com/viamrobotics/agent/subsystems" "github.com/viamrobotics/agent/subsystems/registry" + autils "github.com/viamrobotics/agent/utils" pb "go.viam.com/api/app/agent/v1" "go.viam.com/rdk/logging" "go.viam.com/utils" @@ -109,6 +112,12 @@ func configFromProto(logger logging.Logger, updateConf *pb.DeviceSubsystemConfig return ret } +func pathExists(path string) bool { + // todo: give the manager access to this + _, err := os.Stat(path) + return err == nil +} + func (s *viamServer) Start(ctx context.Context) error { s.startStopMu.Lock() defer s.startStopMu.Unlock() @@ -119,6 +128,16 @@ func (s *viamServer) Start(ctx context.Context) error { s.mu.Unlock() return nil } + binPath := path.Join(agent.ViamDirs["bin"], SubsysName) + if runtime.GOOS == "windows" { + binPath += ".exe" + } + if !pathExists(binPath) { + s.logger.Warnf("viam-server binary missing at %s, not starting", binPath) + // todo: nested func so unlock is deferable + s.mu.Unlock() + return nil + } if s.shouldRun { s.logger.Warnf("Restarting %s after unexpected exit", SubsysName) } else { @@ -129,9 +148,9 @@ func (s *viamServer) Start(ctx context.Context) error { stdio := agent.NewMatchingLogger(s.logger, false, false) stderr := agent.NewMatchingLogger(s.logger, true, false) //nolint:gosec - s.cmd = exec.Command(path.Join(agent.ViamDirs["bin"], SubsysName), "-config", ConfigFilePath) + s.cmd = exec.Command(binPath, "-config", ConfigFilePath) s.cmd.Dir = agent.ViamDirs["viam"] - s.cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + autils.PlatformSubprocessSettings(s.cmd) s.cmd.Stdout = stdio s.cmd.Stderr = stderr @@ -175,6 +194,11 @@ func (s *viamServer) Start(ctx context.Context) error { close(s.exitChan) }() + timeout := globalConfig.Load().startTimeout + if runtime.GOOS == "windows" { + // otherwise pin_url update can't be tested; todo fix this + timeout = time.Second * 10 + } select { case matches := <-c: s.checkURL = matches[1] @@ -184,7 +208,7 @@ func (s *viamServer) Start(ctx context.Context) error { return nil case <-ctx.Done(): return ctx.Err() - case <-time.After(globalConfig.Load().startTimeout): + case <-time.After(timeout): return errw.New("startup timed out") case <-s.exitChan: return errw.New("startup failed") @@ -211,21 +235,27 @@ func (s *viamServer) Stop(ctx context.Context) error { s.logger.Infof("Stopping %s", SubsysName) + if runtime.GOOS == "windows" { + // note: Signal(SIGTERM) returns 'not supported on windows' error on windows + // note: this kills all subproces, not just RDK + if err := autils.KillTree(-1); err != nil { + return errw.Wrap(err, "stopping viam-server process tree") + } + return nil + } err := s.cmd.Process.Signal(syscall.SIGTERM) if err != nil { - s.logger.Error(err) + s.logger.Error(errw.Wrap(err, "terminating")) } if s.waitForExit(ctx, stopTermTimeout) { s.logger.Infof("%s successfully stopped", SubsysName) return nil } - s.logger.Warnf("%s refused to exit, killing", SubsysName) - err = syscall.Kill(-s.cmd.Process.Pid, syscall.SIGKILL) - if err != nil { - s.logger.Error(err) - } + + // todo: kill process tree + autils.PlatformKill(s.logger, s.cmd) if s.waitForExit(ctx, stopKillTimeout) { s.logger.Infof("%s successfully killed", SubsysName) @@ -264,6 +294,10 @@ func (s *viamServer) HealthCheck(ctx context.Context) (errRet error) { return errw.Errorf("%s not running", SubsysName) } if s.checkURL == "" { + if runtime.GOOS == "windows" { + // todo(windows): we hit this case on windows; debug why. note: it also can't signal the subprocess to stop. + return nil + } return errw.Errorf("can't find listening URL for %s", SubsysName) } @@ -307,6 +341,10 @@ func (s *viamServer) HealthCheck(ctx context.Context) (errRet error) { // Must be called with `s.mu` held, as `s.checkURL` and `s.checkURLAlt` are // both accessed. func (s *viamServer) isRestartAllowed(ctx context.Context) (bool, error) { + if runtime.GOOS == "windows" { + // todo(windows): this function throws 'unsupported protocol scheme', needs debugging + return true, nil + } for _, url := range []string{s.checkURL, s.checkURLAlt} { s.logger.Debugf("starting restart allowed check for %s using %s", SubsysName, url) @@ -379,7 +417,7 @@ func (s *viamServer) Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig, SubsysName) needRestart = true } else { - s.logger.Infof("will not restart %s version to run new version, as it has not reported"+ + s.logger.Infof("will not restart %s version to run new version, as it has not reported "+ "allowance of a restart", SubsysName) } } diff --git a/utils.go b/utils.go index 48722598..d5c511a4 100644 --- a/utils.go +++ b/utils.go @@ -14,15 +14,15 @@ import ( "net/http" "net/url" "os" + "os/exec" "path" "path/filepath" + "runtime" "strings" - "syscall" "time" errw "github.com/pkg/errors" "github.com/ulikunitz/xz" - "golang.org/x/sys/unix" "google.golang.org/protobuf/types/known/structpb" ) @@ -32,6 +32,8 @@ var ( GitRevision = "" ViamDirs = map[string]string{"viam": "/opt/viam"} + // so RequestRestart can exit the main loop. + GlobalCancel func() ) // GetVersion returns the version embedded at build time. @@ -51,6 +53,14 @@ func GetRevision() string { } func init() { + if runtime.GOOS == "windows" { + // note: forward slash isn't an abs path on windows, but resolves to one. + var err error + ViamDirs["viam"], err = filepath.Abs(ViamDirs["viam"]) + if err != nil { + panic(err) + } + } ViamDirs["bin"] = filepath.Join(ViamDirs["viam"], "bin") ViamDirs["cache"] = filepath.Join(ViamDirs["viam"], "cache") ViamDirs["tmp"] = filepath.Join(ViamDirs["viam"], "tmp") @@ -59,6 +69,10 @@ func init() { func InitPaths() error { uid := os.Getuid() + expectedPerms := 0o755 + if runtime.GOOS == "windows" { + expectedPerms = 0o777 + } for _, p := range ViamDirs { info, err := os.Stat(p) if err != nil { @@ -71,19 +85,14 @@ func InitPaths() error { } return errw.Wrapf(err, "checking directory %s", p) } - stat, ok := info.Sys().(*syscall.Stat_t) - if !ok { - // should be impossible on Linux - return errw.New("cannot convert to syscall.Stat_t") - } - if uid != int(stat.Uid) { - return errw.Errorf("%s is owned by UID %d but the current UID is %d", p, stat.Uid, uid) + if err := checkPathOwner(uid, info); err != nil { + return err } if !info.IsDir() { return errw.Errorf("%s should be a directory, but is not", p) } - if info.Mode().Perm() != 0o755 { - return errw.Errorf("%s should be have permission set to 0755, but has permissions %d", p, info.Mode().Perm()) + if info.Mode().Perm() != fs.FileMode(expectedPerms) { + return errw.Errorf("%s should have permission set to %#o, but has permissions %#o", p, expectedPerms, info.Mode().Perm()) } } return nil @@ -96,11 +105,15 @@ func DownloadFile(ctx context.Context, rawURL string) (outPath string, errRet er return "", err } - outPath = filepath.Join(ViamDirs["cache"], path.Base(parsedURL.Path)) + parsedPath := parsedURL.Path + if runtime.GOOS == "windows" && !strings.HasSuffix(parsedPath, ".exe") { + parsedPath += ".exe" + } + outPath = filepath.Join(ViamDirs["cache"], path.Base(parsedPath)) //nolint:nestif if parsedURL.Scheme == "file" { - infd, err := os.Open(parsedURL.Path) + infd, err := os.Open(parsedPath) if err != nil { return "", err } @@ -158,8 +171,16 @@ func DownloadFile(ctx context.Context, rawURL string) (outPath string, errRet er if err != nil { return "", err } + closed := false defer func() { - errRet = errors.Join(errRet, out.Close(), SyncFS(out.Name())) + if !closed { + errRet = errors.Join(errRet, out.Close()) + } + if runtime.GOOS != "windows" { + // note: error is different on windows (EBADF?). + // also this has in theory already synced in the success case. + errRet = errors.Join(errRet, SyncFS(out.Name())) + } if err := os.Remove(out.Name()); err != nil && !os.IsNotExist(err) { errRet = errors.Join(errRet, err) } @@ -169,8 +190,18 @@ func DownloadFile(ctx context.Context, rawURL string) (outPath string, errRet er if err != nil && !os.IsNotExist(err) { errRet = errors.Join(errRet, err) } + errRet = errors.Join(errRet, out.Close()) + closed = true errRet = errors.Join(errRet, os.Rename(out.Name(), outPath), SyncFS(outPath)) + if runtime.GOOS == "windows" { + cmd := exec.Command( + "netsh", "advfirewall", "firewall", "add", "rule", "name="+path.Base(outPath), + "dir=in", "action=allow", "program=\""+outPath+"\"", "enable=yes", + ) + cmd.Start() + errRet = errors.Join(errRet, cmd.Wait()) + } return outPath, errRet } @@ -283,18 +314,6 @@ func ForceSymlink(orig, symlink string) error { return SyncFS(symlink) } -func SyncFS(syncPath string) (errRet error) { - file, errRet := os.Open(filepath.Dir(syncPath)) - if errRet != nil { - return errw.Wrapf(errRet, "syncing fs %s", syncPath) - } - _, _, err := unix.Syscall(unix.SYS_SYNCFS, file.Fd(), 0, 0) - if err != 0 { - errRet = errw.Wrapf(err, "syncing fs %s", syncPath) - } - return errors.Join(errRet, file.Close()) -} - func WriteFileIfNew(outPath string, data []byte) (bool, error) { //nolint:gosec curFileBytes, err := os.ReadFile(outPath) diff --git a/utils/utils_linux.go b/utils/utils_linux.go new file mode 100644 index 00000000..2166f431 --- /dev/null +++ b/utils/utils_linux.go @@ -0,0 +1,51 @@ +package utils + +import ( + "context" + "os/exec" + "syscall" + "time" + + "github.com/pkg/errors" + "go.viam.com/rdk/logging" + "go.viam.com/utils" +) + +// PlatformSubprocessSettings sets platform-specific subprocess settings. +func PlatformSubprocessSettings(cmd *exec.Cmd) { + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} +} + +// PlatformKill does SIGKILL if available for the platform. +func PlatformKill(logger logging.Logger, cmd *exec.Cmd) { + err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + if err != nil { + logger.Error(err) + } +} + +// WaitOnline attempts to wait until the network comes up, with various bailout conditions. +func WaitOnline(logger logging.Logger, ctx context.Context) { + for { + cmd := exec.CommandContext(ctx, "systemctl", "is-active", "network-online.target") + _, err := cmd.CombinedOutput() + + if err == nil { + break + } + + if e := (&exec.ExitError{}); !errors.As(err, &e) { + // if it's not an ExitError, that means it didn't even start, so bail out + logger.Error(errors.Wrap(err, "running 'systemctl is-active network-online.target'")) + break + } + if !utils.SelectContextOrWait(ctx, time.Second) { + break + } + } +} + +// KillTree kills the process tree on windows (because other signaling doesn't work). +func KillTree(pid int) error { + return nil +} diff --git a/utils/utils_windows.go b/utils/utils_windows.go new file mode 100644 index 00000000..b0804d34 --- /dev/null +++ b/utils/utils_windows.go @@ -0,0 +1,55 @@ +package utils + +import ( + "context" + "fmt" + "os" + "os/exec" + "strconv" + "strings" + + "go.viam.com/rdk/logging" +) + +// PlatformSubprocessSettings sets platform-specific subprocess settings. +func PlatformSubprocessSettings(cmd *exec.Cmd) {} + +// PlatformKill does SIGKILL if available for the platform. +func PlatformKill(logger logging.Logger, cmd *exec.Cmd) {} + +func WaitOnline(logger logging.Logger, ctx context.Context) { + logger.Warn("WaitOnline not available on windows yet") +} + +// KillTree kills the process tree on windows (because other signaling doesn't work). +func KillTree(pid int) error { + if pid == -1 { + pid = os.Getpid() + } + cmd := exec.Command("WMIC.exe", "process", "where", fmt.Sprintf("ParentProcessId=%d", pid), "get", "ProcessId") + output, err := cmd.Output() + if err != nil { + return err + // elog.Error(1, fmt.Sprintf("error executing %s %s", cmd.Path, cmd.Args)) + // elog.Error(1, fmt.Sprintf("error getting child process for #%d, #%s", pid, err)) + } + lines := strings.Split(string(output), "\r\n") + for _, line := range lines[1:] { + if line == "" { + continue + } + var childPID int + _, err := fmt.Sscan(line, &childPID) + if err != nil { + // elog.Error(1, fmt.Sprintf("not a valid childProcess line %s, #%s", line, err)) + continue + } + cmd = exec.Command("taskkill", "/F", "/T", "/PID", strconv.Itoa(childPID)) + cmd.Run() + // err = cmd.Run() + // if err != nil { + // // elog.Error(1, fmt.Sprintf("error running taskkill #%s", err)) + // } + } + return nil +} diff --git a/utils_linux.go b/utils_linux.go new file mode 100644 index 00000000..0de6c01d --- /dev/null +++ b/utils_linux.go @@ -0,0 +1,41 @@ +package agent + +import ( + "errors" + "io/fs" + "os" + "path/filepath" + "syscall" + + errw "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +// platform-specific UID check. +func checkPathOwner(uid int, info fs.FileInfo) error { + stat, ok := info.Sys().(*syscall.Stat_t) + if !ok { + // should be impossible on Linux + return errw.New("cannot convert to syscall.Stat_t") + } + if uid != int(stat.Uid) { + return errw.Errorf("%s is owned by UID %d but the current UID is %d", info.Name(), stat.Uid, uid) + } + return nil +} + +func SyncFS(syncPath string) (errRet error) { + file, errRet := os.Open(filepath.Dir(syncPath)) + if errRet != nil { + return errw.Wrapf(errRet, "syncing fs %s", syncPath) + } + _, _, err := unix.Syscall(unix.SYS_SYNCFS, file.Fd(), 0, 0) + if err != 0 { + errRet = errw.Wrapf(err, "syncing fs %s", syncPath) + } + return errors.Join(errRet, file.Close()) +} + +func RequestRestart() error { + return nil +} diff --git a/utils_windows.go b/utils_windows.go new file mode 100644 index 00000000..ceac2c74 --- /dev/null +++ b/utils_windows.go @@ -0,0 +1,60 @@ +package agent + +import ( + "io/fs" + "os" + "syscall" + "time" + + "github.com/pkg/errors" + "github.com/viamrobotics/agent/utils" +) + +// platform-specific UID check. +func checkPathOwner(uid int, info fs.FileInfo) error { + // todo: figure this out on windows. + return nil +} + +func SyncFS(syncPath string) error { + handle, err := syscall.Open(syncPath, syscall.O_RDWR, 0) + if err != nil { + return err + } + defer syscall.CloseHandle(handle) + err = syscall.Fsync(handle) + if err != nil { + return err + } + return nil +} + +func RequestRestart() error { + // note: sc.exe doesn't have a restart command it seems. + // note: this stops but doesn't start + // if _, err := exec.Command("powershell", "-command", "Restart-Service viam-agent").Output(); err != nil { + // return false, errw.Wrap(err, "restarting windows service") + // } + // if agent.GlobalCancel == nil { + // return false, errors.New("can't call globalCancel because it's nil") + // } + // agent.GlobalCancel() + // if inService, err := svc.IsWindowsService(); err != nil { + // return errw.Wrap(err, "can't request restart -- error checking whether in service") + // } else if !inService { + // return errors.New("can't request restart -- not in service") + // } + // if _, err := exec.Command("net", "stop", "viam-agent").Output(); err != nil { + // return errw.Wrap(err, "restarting windows service") + // } + if GlobalCancel == nil { + return errors.New("globalcancel is nil, can't restart") + } + GlobalCancel() + time.Sleep(5 * time.Second) // todo: rearchitect to wait for exit + if err := utils.KillTree(-1); err != nil { + return err + } + os.Exit(1) // non-zero exit code to trigger service restart; test whether this is necessary + return nil +}