加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
bugfix-also-stop-machine-when-a-machine-un.patch 6.16 KB
一键复制 编辑 原始数据 按行查看 历史
胡宇彪 提交于 2024-01-08 19:20 . update systemd to v255
From 89110c823f246d3d2c398652999826107da446bf Mon Sep 17 00:00:00 2001
From: yangbin <robin.yb@huawei.com>
Date: Tue, 7 Apr 2020 12:01:39 +0800
Subject: [PATCH] systemd-machined: Also stop machine when a machine unit is
active but the leader process is exited
When a VM machine is created in a scenario as below, it will remain in systemd-machined even though it has already been terminated by libvirtd.
1. libvirtd sends a request to systemd-machined with the leader(the PID of the vm) to create a machine.
2. systemd-machined directs the request to systemd
3. systemd constructs a scope and creates cgroup for the machine. the scope unit is then added to job queue and will be started later.
4. the leader process(the PID of the vm) is terminated by libvirtd(due some reason) before the scope is started.
5. Since the scope unit is yet not started, systemd will not destroy the scope althrough it is noticed with the signal event.
6. systemd starts the scope, and now the scope and machine is in active but no leader process exist.
7. systemd-machined will not stop and destroy the machine, and remains in system until the scope is stopped by others or the OS is restarted.
This patch fix this problem by ansering yes to stop machine in machine_check_gc
when the machine unit is active but the leader process has already exited.
Change-Id: I80e3c32832f4ecf08b6cb149735978730ce1d1c0
---
src/machine/machine.c | 37 ++++++++++++++++++++++++++++++++++++-
src/machine/machined-dbus.c | 35 +++++++++++++++++++++++++++++++++++
src/machine/machined.h | 1 +
3 files changed, 72 insertions(+), 1 deletion(-)
diff --git a/src/machine/machine.c b/src/machine/machine.c
index 44ff5c1..2519fd7 100644
--- a/src/machine/machine.c
+++ b/src/machine/machine.c
@@ -34,6 +34,7 @@
#include "tmpfile-util.h"
#include "unit-name.h"
#include "user-util.h"
+#include "cgroup-util.h"
DEFINE_TRIVIAL_CLEANUP_FUNC(Machine*, machine_free);
@@ -534,6 +535,40 @@ int machine_finalize(Machine *m) {
return 0;
}
+static bool machine_validate_unit(Machine *m) {
+ int r;
+ _cleanup_free_ char *unit = NULL;
+ _cleanup_free_ char *cgroup = NULL;
+
+ r = cg_pid_get_unit(m->leader.pid, &unit);
+ if (!r && streq(m->unit, unit))
+ return true;
+
+ if (r == -ESRCH) {
+ /* the original leader may exit and be replaced with a new leader when qemu hotreplace is performed.
+ * so we don't return true here, otherwise the vm will be added to the gc list.
+ * */
+ log_info("Machine unit is in active, but the leader process is exited. "
+ "machine: %s, leader: "PID_FMT", unit: %s.", m->name, m->leader.pid, m->unit);
+ } else if (r) {
+ log_info_errno(r, "Can not get unit from cgroup. "
+ "machine: %s, leader: "PID_FMT", unit: %s, error: %m", m->name, m->leader.pid, m->unit);
+ } else if (unit && !streq(m->unit, unit)) {
+ log_info("Machine unit name not match. "
+ "machine: %s, leader: "PID_FMT", machine unit: %s, real unit: %s", m->name, m->leader.pid, m->unit, unit);
+ }
+
+ r = manager_get_unit_cgroup_path(m->manager, m->unit, &cgroup);
+ if (!r && !isempty(cgroup) && cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, cgroup) > 0) {
+ log_info("Cgroup is empty in the machine unit. "
+ "machine: %s, leader: "PID_FMT", machine unit: %s.", m->name, m->leader.pid, m->unit);
+ /*The vm will be added to gc list only when there is no any process in the scope*/
+ return false;
+ }
+
+ return true;
+}
+
bool machine_may_gc(Machine *m, bool drop_not_started) {
assert(m);
@@ -546,7 +581,7 @@ bool machine_may_gc(Machine *m, bool drop_not_started) {
if (m->scope_job && manager_job_is_active(m->manager, m->scope_job))
return false;
- if (m->unit && manager_unit_is_active(m->manager, m->unit))
+ if (m->unit && manager_unit_is_active(m->manager, m->unit) && machine_validate_unit(m))
return false;
return true;
diff --git a/src/machine/machined-dbus.c b/src/machine/machined-dbus.c
index 9fec047..938f42b 100644
--- a/src/machine/machined-dbus.c
+++ b/src/machine/machined-dbus.c
@@ -1514,3 +1514,38 @@ int manager_add_machine(Manager *m, const char *name, Machine **_machine) {
return 0;
}
+
+int manager_get_unit_cgroup_path(Manager *manager, const char *unit, char **cgroup) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *path = NULL;
+ const char *cgroup_path = NULL;
+ int r;
+
+ assert(manager);
+ assert(unit);
+
+ path = unit_dbus_path_from_name(unit);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_get_property(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ path,
+ endswith(unit, ".scope") ? "org.freedesktop.systemd1.Scope" : "org.freedesktop.systemd1.Service",
+ "ControlGroup",
+ &error,
+ &reply,
+ "s");
+ if (r < 0) {
+ return r;
+ }
+
+ r = sd_bus_message_read(reply, "s", &cgroup_path);
+ if (r < 0)
+ return -EINVAL;
+ *cgroup = strdup(cgroup_path);
+
+ return 0;
+}
diff --git a/src/machine/machined.h b/src/machine/machined.h
index 280c32b..6b8d98b 100644
--- a/src/machine/machined.h
+++ b/src/machine/machined.h
@@ -58,6 +58,7 @@ int manager_kill_unit(Manager *manager, const char *unit, int signo, sd_bus_erro
int manager_unref_unit(Manager *m, const char *unit, sd_bus_error *error);
int manager_unit_is_active(Manager *manager, const char *unit);
int manager_job_is_active(Manager *manager, const char *path);
+int manager_get_unit_cgroup_path(Manager *manager, const char *unit, char **cgroup);
#if ENABLE_NSCD
int manager_enqueue_nscd_cache_flush(Manager *m);
--
2.33.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化