diff --git a/tools/edit_monitor/Android.bp b/tools/edit_monitor/Android.bp index 80437c00d4..b939633817 100644 --- a/tools/edit_monitor/Android.bp +++ b/tools/edit_monitor/Android.bp @@ -19,3 +19,26 @@ package { default_applicable_licenses: ["Android-Apache-2.0"], default_team: "trendy_team_adte", } + +python_library_host { + name: "edit_monitor_lib", + pkg_path: "edit_monitor", + srcs: [ + "daemon_manager.py", + ], +} + +python_test_host { + name: "daemon_manager_test", + main: "daemon_manager_test.py", + pkg_path: "edit_monitor", + srcs: [ + "daemon_manager_test.py", + ], + libs: [ + "edit_monitor_lib", + ], + test_options: { + unit_test: true, + }, +} diff --git a/tools/edit_monitor/daemon_manager.py b/tools/edit_monitor/daemon_manager.py new file mode 100644 index 0000000000..c09c3213ae --- /dev/null +++ b/tools/edit_monitor/daemon_manager.py @@ -0,0 +1,165 @@ +# Copyright 2024, The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import hashlib +import logging +import multiprocessing +import os +import pathlib +import signal +import subprocess +import tempfile +import time + + +DEFAULT_PROCESS_TERMINATION_TIMEOUT_SECONDS = 1 + + +def default_daemon_target(): + """Place holder for the default daemon target.""" + print("default daemon target") + + +class DaemonManager: + """Class to manage and monitor the daemon run as a subprocess.""" + + def __init__( + self, + binary_path: str, + daemon_target: callable = default_daemon_target, + daemon_args: tuple = (), + ): + self.binary_path = binary_path + self.daemon_target = daemon_target + self.daemon_args = daemon_args + + self.pid = os.getpid() + self.daemon_process = None + + pid_file_dir = pathlib.Path(tempfile.gettempdir()).joinpath("edit_monitor") + pid_file_dir.mkdir(parents=True, exist_ok=True) + self.pid_file_path = self._get_pid_file_path(pid_file_dir) + + def start(self): + """Writes the pidfile and starts the daemon proces.""" + try: + self._write_pid_to_pidfile() + self._start_daemon_process() + except Exception as e: + logging.exception("Failed to start daemon manager with error %s", e) + + def stop(self): + """Stops the daemon process and removes the pidfile.""" + + logging.debug("in daemon manager cleanup.") + try: + if self.daemon_process and self.daemon_process.is_alive(): + self._terminate_process(self.daemon_process.pid) + self._remove_pidfile() + except Exception as e: + logging.exception("Failed to stop daemon manager with error %s", e) + + def _write_pid_to_pidfile(self): + """Creates a pidfile and writes the current pid to the file. + + Raise FileExistsError if the pidfile already exists. + """ + try: + # Use the 'x' mode to open the file for exclusive creation + with open(self.pid_file_path, "x") as f: + f.write(f"{self.pid}") + except FileExistsError as e: + # This could be caused due to race condition that a user is trying + # to start two edit monitors at the same time. Or because there is + # already an existing edit monitor running and we can not kill it + # for some reason. + logging.exception("pidfile %s already exists.", self.pid_file_path) + raise e + + def _start_daemon_process(self): + """Starts a subprocess to run the daemon.""" + p = multiprocessing.Process( + target=self.daemon_target, args=self.daemon_args + ) + p.start() + + logging.info("Start subprocess with PID %d", p.pid) + self.daemon_process = p + + def _terminate_process( + self, pid: int, timeout: int = DEFAULT_PROCESS_TERMINATION_TIMEOUT_SECONDS + ): + """Terminates a process with given pid. + + It first sends a SIGTERM to the process to allow it for proper + termination with a timeout. If the process is not terminated within + the timeout, kills it forcefully. + """ + try: + os.kill(pid, signal.SIGTERM) + if not self._wait_for_process_terminate(pid, timeout): + logging.warning( + "Process %d not terminated within timeout, try force kill", pid + ) + os.kill(pid, signal.SIGKILL) + except ProcessLookupError: + logging.info("Process with PID %d not found (already terminated)", pid) + + def _wait_for_process_terminate(self, pid: int, timeout: int) -> bool: + start_time = time.time() + + while time.time() < start_time + timeout: + if not self._is_process_alive(pid): + return True + time.sleep(1) + + logging.error("Process %d not terminated within %d seconds.", pid, timeout) + return False + + def _is_process_alive(self, pid: int) -> bool: + try: + output = subprocess.check_output( + ["ps", "-p", str(pid), "-o", "state="], text=True + ).strip() + state = output.split()[0] + return state != "Z" # Check if the state is not 'Z' (zombie) + except subprocess.CalledProcessError: + # Process not found (already dead). + return False + except (FileNotFoundError, OSError, ValueError) as e: + logging.warning( + "Unable to check the status for process %d with error: %s.", pid, e + ) + return True + + def _remove_pidfile(self): + try: + os.remove(self.pid_file_path) + except FileNotFoundError: + logging.info("pid file %s already removed.", self.pid_file_path) + + def _get_pid_file_path(self, pid_file_dir: pathlib.Path) -> pathlib.Path: + """Generates the path to store the pidfile. + + The file path should have the format of "/tmp/edit_monitor/xxxx.lock" + where xxxx is a hashed value based on the binary path that starts the + process. + """ + hash_object = hashlib.sha256() + hash_object.update(self.binary_path.encode("utf-8")) + pid_file_path = pid_file_dir.joinpath(hash_object.hexdigest() + ".lock") + logging.info("pid_file_path: %s", pid_file_path) + + return pid_file_path diff --git a/tools/edit_monitor/daemon_manager_test.py b/tools/edit_monitor/daemon_manager_test.py new file mode 100644 index 0000000000..5be4beee51 --- /dev/null +++ b/tools/edit_monitor/daemon_manager_test.py @@ -0,0 +1,197 @@ +# Copyright 2024, The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unittests for DaemonManager.""" + +import logging +import multiprocessing +import os +import pathlib +import signal +import subprocess +import sys +import tempfile +import time +import unittest +from unittest import mock +from edit_monitor import daemon_manager + +TEST_BINARY_FILE = '/path/to/test_binary' +TEST_PID_FILE_PATH = ( + '587239c2d1050afdf54512e2d799f3b929f86b43575eb3c7b4bab105dd9bd25e.lock' +) + + +def example_daemon(output_file): + with open(output_file, 'w') as f: + f.write('running daemon target') + + +def long_running_daemon(): + while True: + time.sleep(1) + + +class DaemonManagerTest(unittest.TestCase): + + @classmethod + def setUpClass(cls): + super().setUpClass() + # Configure to print logging to stdout. + logging.basicConfig(filename=None, level=logging.DEBUG) + console = logging.StreamHandler(sys.stdout) + logging.getLogger('').addHandler(console) + + def setUp(self): + super().setUp() + self.original_tempdir = tempfile.tempdir + self.working_dir = tempfile.TemporaryDirectory() + # Sets the tempdir under the working dir so any temp files created during + # tests will be cleaned. + tempfile.tempdir = self.working_dir.name + + def tearDown(self): + # Cleans up any child processes left by the tests. + self._cleanup_child_processes() + self.working_dir.cleanup() + # Restores tempdir. + tempfile.tempdir = self.original_tempdir + super().tearDown() + + def test_start_success(self): + damone_output_file = tempfile.NamedTemporaryFile( + dir=self.working_dir.name, delete=False + ) + dm = daemon_manager.DaemonManager( + TEST_BINARY_FILE, + daemon_target=example_daemon, + daemon_args=(damone_output_file.name,), + ) + dm.start() + dm.daemon_process.join() + + # Verifies the expected pid file is created. + expected_pid_file_path = pathlib.Path(self.working_dir.name).joinpath( + 'edit_monitor', TEST_PID_FILE_PATH + ) + self.assertEqual(dm.pid_file_path, expected_pid_file_path) + self.assertTrue(expected_pid_file_path.exists()) + + # Verifies the daemon process is executed successfully. + with open(damone_output_file.name, 'r') as f: + contents = f.read() + self.assertEqual(contents, 'running daemon target') + + def test_start_failed_to_write_pidfile(self): + pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath( + 'edit_monitor' + ) + pid_file_path_dir.mkdir(parents=True, exist_ok=True) + # Makes the directory read-only so write pidfile will fail. + os.chmod(pid_file_path_dir, 0o555) + + dm = daemon_manager.DaemonManager(TEST_BINARY_FILE) + dm.start() + + # Verifies no daemon process is started. + self.assertIsNone(dm.daemon_process) + + def test_start_failed_to_start_daemon_process(self): + dm = daemon_manager.DaemonManager( + TEST_BINARY_FILE, daemon_target='wrong_target', daemon_args=(1) + ) + dm.start() + + # Verifies no daemon process is started. + self.assertIsNone(dm.daemon_process) + + def test_stop_success(self): + dm = daemon_manager.DaemonManager( + TEST_BINARY_FILE, daemon_target=long_running_daemon + ) + dm.start() + dm.stop() + + self.assert_no_subprocess_running() + self.assertFalse(dm.pid_file_path.exists()) + + @mock.patch('os.kill') + def test_stop_failed_to_kill_daemon_process(self, mock_kill): + mock_kill.side_effect = OSError('Unknown OSError') + dm = daemon_manager.DaemonManager( + TEST_BINARY_FILE, daemon_target=long_running_daemon + ) + dm.start() + dm.stop() + + self.assertTrue(dm.daemon_process.is_alive()) + self.assertTrue(dm.pid_file_path.exists()) + + @mock.patch('os.remove') + def test_stop_failed_to_remove_pidfile(self, mock_remove): + mock_remove.side_effect = OSError('Unknown OSError') + + dm = daemon_manager.DaemonManager( + TEST_BINARY_FILE, daemon_target=long_running_daemon + ) + dm.start() + dm.stop() + + self.assert_no_subprocess_running() + self.assertTrue(dm.pid_file_path.exists()) + + def assert_no_subprocess_running(self): + child_pids = self._get_child_processes(os.getpid()) + for child_pid in child_pids: + self.assertFalse( + self._is_process_alive(child_pid), f'process {child_pid} still alive' + ) + + def _get_child_processes(self, parent_pid): + try: + output = subprocess.check_output( + ['ps', '-o', 'pid,ppid', '--no-headers'], text=True + ) + + child_processes = [] + for line in output.splitlines(): + pid, ppid = line.split() + if int(ppid) == parent_pid: + child_processes.append(int(pid)) + return child_processes + except subprocess.CalledProcessError as e: + self.fail(f'failed to get child process, error: {e}') + + def _is_process_alive(self, pid): + try: + output = subprocess.check_output( + ['ps', '-p', str(pid), '-o', 'state='], text=True + ).strip() + state = output.split()[0] + return state != 'Z' # Check if the state is not 'Z' (zombie) + except subprocess.CalledProcessError: + return False + + def _cleanup_child_processes(self): + child_pids = self._get_child_processes(os.getpid()) + for child_pid in child_pids: + try: + os.kill(child_pid, signal.SIGKILL) + except ProcessLookupError: + # process already terminated + pass + + +if __name__ == '__main__': + unittest.main()