Merge "The initial implementation of the edit monitor" into main

This commit is contained in:
Zhuoyao Zhang
2024-09-23 21:56:58 +00:00
committed by Gerrit Code Review
3 changed files with 385 additions and 0 deletions

View File

@@ -19,3 +19,26 @@ package {
default_applicable_licenses: ["Android-Apache-2.0"],
default_team: "trendy_team_adte",
}
python_library_host {
name: "edit_monitor_lib",
pkg_path: "edit_monitor",
srcs: [
"daemon_manager.py",
],
}
python_test_host {
name: "daemon_manager_test",
main: "daemon_manager_test.py",
pkg_path: "edit_monitor",
srcs: [
"daemon_manager_test.py",
],
libs: [
"edit_monitor_lib",
],
test_options: {
unit_test: true,
},
}

View File

@@ -0,0 +1,165 @@
# Copyright 2024, The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import hashlib
import logging
import multiprocessing
import os
import pathlib
import signal
import subprocess
import tempfile
import time
DEFAULT_PROCESS_TERMINATION_TIMEOUT_SECONDS = 1
def default_daemon_target():
"""Place holder for the default daemon target."""
print("default daemon target")
class DaemonManager:
"""Class to manage and monitor the daemon run as a subprocess."""
def __init__(
self,
binary_path: str,
daemon_target: callable = default_daemon_target,
daemon_args: tuple = (),
):
self.binary_path = binary_path
self.daemon_target = daemon_target
self.daemon_args = daemon_args
self.pid = os.getpid()
self.daemon_process = None
pid_file_dir = pathlib.Path(tempfile.gettempdir()).joinpath("edit_monitor")
pid_file_dir.mkdir(parents=True, exist_ok=True)
self.pid_file_path = self._get_pid_file_path(pid_file_dir)
def start(self):
"""Writes the pidfile and starts the daemon proces."""
try:
self._write_pid_to_pidfile()
self._start_daemon_process()
except Exception as e:
logging.exception("Failed to start daemon manager with error %s", e)
def stop(self):
"""Stops the daemon process and removes the pidfile."""
logging.debug("in daemon manager cleanup.")
try:
if self.daemon_process and self.daemon_process.is_alive():
self._terminate_process(self.daemon_process.pid)
self._remove_pidfile()
except Exception as e:
logging.exception("Failed to stop daemon manager with error %s", e)
def _write_pid_to_pidfile(self):
"""Creates a pidfile and writes the current pid to the file.
Raise FileExistsError if the pidfile already exists.
"""
try:
# Use the 'x' mode to open the file for exclusive creation
with open(self.pid_file_path, "x") as f:
f.write(f"{self.pid}")
except FileExistsError as e:
# This could be caused due to race condition that a user is trying
# to start two edit monitors at the same time. Or because there is
# already an existing edit monitor running and we can not kill it
# for some reason.
logging.exception("pidfile %s already exists.", self.pid_file_path)
raise e
def _start_daemon_process(self):
"""Starts a subprocess to run the daemon."""
p = multiprocessing.Process(
target=self.daemon_target, args=self.daemon_args
)
p.start()
logging.info("Start subprocess with PID %d", p.pid)
self.daemon_process = p
def _terminate_process(
self, pid: int, timeout: int = DEFAULT_PROCESS_TERMINATION_TIMEOUT_SECONDS
):
"""Terminates a process with given pid.
It first sends a SIGTERM to the process to allow it for proper
termination with a timeout. If the process is not terminated within
the timeout, kills it forcefully.
"""
try:
os.kill(pid, signal.SIGTERM)
if not self._wait_for_process_terminate(pid, timeout):
logging.warning(
"Process %d not terminated within timeout, try force kill", pid
)
os.kill(pid, signal.SIGKILL)
except ProcessLookupError:
logging.info("Process with PID %d not found (already terminated)", pid)
def _wait_for_process_terminate(self, pid: int, timeout: int) -> bool:
start_time = time.time()
while time.time() < start_time + timeout:
if not self._is_process_alive(pid):
return True
time.sleep(1)
logging.error("Process %d not terminated within %d seconds.", pid, timeout)
return False
def _is_process_alive(self, pid: int) -> bool:
try:
output = subprocess.check_output(
["ps", "-p", str(pid), "-o", "state="], text=True
).strip()
state = output.split()[0]
return state != "Z" # Check if the state is not 'Z' (zombie)
except subprocess.CalledProcessError:
# Process not found (already dead).
return False
except (FileNotFoundError, OSError, ValueError) as e:
logging.warning(
"Unable to check the status for process %d with error: %s.", pid, e
)
return True
def _remove_pidfile(self):
try:
os.remove(self.pid_file_path)
except FileNotFoundError:
logging.info("pid file %s already removed.", self.pid_file_path)
def _get_pid_file_path(self, pid_file_dir: pathlib.Path) -> pathlib.Path:
"""Generates the path to store the pidfile.
The file path should have the format of "/tmp/edit_monitor/xxxx.lock"
where xxxx is a hashed value based on the binary path that starts the
process.
"""
hash_object = hashlib.sha256()
hash_object.update(self.binary_path.encode("utf-8"))
pid_file_path = pid_file_dir.joinpath(hash_object.hexdigest() + ".lock")
logging.info("pid_file_path: %s", pid_file_path)
return pid_file_path

View File

@@ -0,0 +1,197 @@
# Copyright 2024, The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unittests for DaemonManager."""
import logging
import multiprocessing
import os
import pathlib
import signal
import subprocess
import sys
import tempfile
import time
import unittest
from unittest import mock
from edit_monitor import daemon_manager
TEST_BINARY_FILE = '/path/to/test_binary'
TEST_PID_FILE_PATH = (
'587239c2d1050afdf54512e2d799f3b929f86b43575eb3c7b4bab105dd9bd25e.lock'
)
def example_daemon(output_file):
with open(output_file, 'w') as f:
f.write('running daemon target')
def long_running_daemon():
while True:
time.sleep(1)
class DaemonManagerTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
super().setUpClass()
# Configure to print logging to stdout.
logging.basicConfig(filename=None, level=logging.DEBUG)
console = logging.StreamHandler(sys.stdout)
logging.getLogger('').addHandler(console)
def setUp(self):
super().setUp()
self.original_tempdir = tempfile.tempdir
self.working_dir = tempfile.TemporaryDirectory()
# Sets the tempdir under the working dir so any temp files created during
# tests will be cleaned.
tempfile.tempdir = self.working_dir.name
def tearDown(self):
# Cleans up any child processes left by the tests.
self._cleanup_child_processes()
self.working_dir.cleanup()
# Restores tempdir.
tempfile.tempdir = self.original_tempdir
super().tearDown()
def test_start_success(self):
damone_output_file = tempfile.NamedTemporaryFile(
dir=self.working_dir.name, delete=False
)
dm = daemon_manager.DaemonManager(
TEST_BINARY_FILE,
daemon_target=example_daemon,
daemon_args=(damone_output_file.name,),
)
dm.start()
dm.daemon_process.join()
# Verifies the expected pid file is created.
expected_pid_file_path = pathlib.Path(self.working_dir.name).joinpath(
'edit_monitor', TEST_PID_FILE_PATH
)
self.assertEqual(dm.pid_file_path, expected_pid_file_path)
self.assertTrue(expected_pid_file_path.exists())
# Verifies the daemon process is executed successfully.
with open(damone_output_file.name, 'r') as f:
contents = f.read()
self.assertEqual(contents, 'running daemon target')
def test_start_failed_to_write_pidfile(self):
pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
'edit_monitor'
)
pid_file_path_dir.mkdir(parents=True, exist_ok=True)
# Makes the directory read-only so write pidfile will fail.
os.chmod(pid_file_path_dir, 0o555)
dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
dm.start()
# Verifies no daemon process is started.
self.assertIsNone(dm.daemon_process)
def test_start_failed_to_start_daemon_process(self):
dm = daemon_manager.DaemonManager(
TEST_BINARY_FILE, daemon_target='wrong_target', daemon_args=(1)
)
dm.start()
# Verifies no daemon process is started.
self.assertIsNone(dm.daemon_process)
def test_stop_success(self):
dm = daemon_manager.DaemonManager(
TEST_BINARY_FILE, daemon_target=long_running_daemon
)
dm.start()
dm.stop()
self.assert_no_subprocess_running()
self.assertFalse(dm.pid_file_path.exists())
@mock.patch('os.kill')
def test_stop_failed_to_kill_daemon_process(self, mock_kill):
mock_kill.side_effect = OSError('Unknown OSError')
dm = daemon_manager.DaemonManager(
TEST_BINARY_FILE, daemon_target=long_running_daemon
)
dm.start()
dm.stop()
self.assertTrue(dm.daemon_process.is_alive())
self.assertTrue(dm.pid_file_path.exists())
@mock.patch('os.remove')
def test_stop_failed_to_remove_pidfile(self, mock_remove):
mock_remove.side_effect = OSError('Unknown OSError')
dm = daemon_manager.DaemonManager(
TEST_BINARY_FILE, daemon_target=long_running_daemon
)
dm.start()
dm.stop()
self.assert_no_subprocess_running()
self.assertTrue(dm.pid_file_path.exists())
def assert_no_subprocess_running(self):
child_pids = self._get_child_processes(os.getpid())
for child_pid in child_pids:
self.assertFalse(
self._is_process_alive(child_pid), f'process {child_pid} still alive'
)
def _get_child_processes(self, parent_pid):
try:
output = subprocess.check_output(
['ps', '-o', 'pid,ppid', '--no-headers'], text=True
)
child_processes = []
for line in output.splitlines():
pid, ppid = line.split()
if int(ppid) == parent_pid:
child_processes.append(int(pid))
return child_processes
except subprocess.CalledProcessError as e:
self.fail(f'failed to get child process, error: {e}')
def _is_process_alive(self, pid):
try:
output = subprocess.check_output(
['ps', '-p', str(pid), '-o', 'state='], text=True
).strip()
state = output.split()[0]
return state != 'Z' # Check if the state is not 'Z' (zombie)
except subprocess.CalledProcessError:
return False
def _cleanup_child_processes(self):
child_pids = self._get_child_processes(os.getpid())
for child_pid in child_pids:
try:
os.kill(child_pid, signal.SIGKILL)
except ProcessLookupError:
# process already terminated
pass
if __name__ == '__main__':
unittest.main()