minijail: Add a new option to allow program run as pid 1.

Add a new flag that indicates whether to fork or not
when pid namespace is set, so that programs can be
run as pid 1 inside a new pid namespace.

BUG=chromium:350616
TEST=security_Minijail0 pass
TEST=`minijail -I /bin/bash` then `echo $$` and get pid 1

Change-Id: Icc959b775e5fe6368c15a834e23ce3f2c119af41
Reviewed-on: https://chromium-review.googlesource.com/289440
Reviewed-by: Jorge Lucangeli Obes <jorgelo@chromium.org>
Commit-Queue: Yu-hsi Chiang <yuhsi@google.com>
Tested-by: Yu-hsi Chiang <yuhsi@google.com>
diff --git a/libminijail.c b/libminijail.c
index 78db271..2e625d6 100644
--- a/libminijail.c
+++ b/libminijail.c
@@ -95,6 +95,7 @@
 		int log_seccomp_filter:1;
 		int chroot:1;
 		int mount_tmp:1;
+		int do_init:1;
 	} flags;
 	uid_t uid;
 	gid_t gid;
@@ -122,6 +123,7 @@
 	j->flags.enter_vfs = 0;
 	j->flags.readonly = 0;
 	j->flags.pids = 0;
+	j->flags.do_init = 0;
 }
 
 /*
@@ -277,6 +279,7 @@
 	j->flags.vfs = 1;
 	j->flags.readonly = 1;
 	j->flags.pids = 1;
+	j->flags.do_init = 1;
 }
 
 void API minijail_namespace_net(struct minijail *j)
@@ -300,6 +303,15 @@
 	j->flags.ptrace = 1;
 }
 
+void API minijail_run_as_init(struct minijail *j)
+{
+	/*
+	 * Since the jailed program will become 'init' in the new PID namespace,
+	 * Minijail does not need to fork an 'init' process.
+	 */
+	j->flags.do_init = 0;
+}
+
 int API minijail_enter_chroot(struct minijail *j, const char *dir)
 {
 	if (j->chrootdir)
@@ -1037,6 +1049,7 @@
 	int ret;
 	/* We need to remember this across the minijail_preexec() call. */
 	int pid_namespace = j->flags.pids;
+	int do_init = j->flags.do_init;
 
 	oldenv = getenv(kLdPreloadEnvVar);
 	if (oldenv) {
@@ -1232,12 +1245,13 @@
 	/* Jail this process and its descendants... */
 	minijail_enter(j);
 
-	if (pid_namespace) {
+	if (pid_namespace && do_init) {
 		/*
 		 * pid namespace: this process will become init inside the new
-		 * namespace, so fork off a child to actually run the program
-		 * (we don't want all programs we might exec to have to know
-		 * how to be init).
+		 * namespace. We don't want all programs we might exec to have
+		 * to know how to be init. Normally |do_init == 1| we fork off
+		 * a child to actually run the program. If |do_init == 0|, we
+		 * let the program keep pid 1 and be init.
 		 *
 		 * If we're multithreaded, we'll probably deadlock here. See
 		 * WARNING above.
@@ -1250,7 +1264,7 @@
 	}
 
 	/*
-	 * If we aren't pid-namespaced:
+	 * If we aren't pid-namespaced, or jailed program asked to be init:
 	 *   calling process
 	 *   -> execve()-ing process
 	 * If we are:
@@ -1266,6 +1280,7 @@
 {
 	pid_t child_pid;
 	int pid_namespace = j->flags.pids;
+	int do_init = j->flags.do_init;
 
 	if (j->flags.caps)
 		die("caps not supported with static targets");
@@ -1291,12 +1306,13 @@
 	j->flags.pids = 0;
 	minijail_enter(j);
 
-	if (pid_namespace) {
+	if (pid_namespace && do_init) {
 		/*
 		 * pid namespace: this process will become init inside the new
-		 * namespace, so fork off a child to actually run the program
-		 * (we don't want all programs we might exec to have to know
-		 * how to be init).
+		 * namespace. We don't want all programs we might exec to have
+		 * to know how to be init. Normally |do_init == 1| we fork off
+		 * a child to actually run the program. If |do_init == 0|, we
+		 * let the program keep pid 1 and be init.
 		 *
 		 * If we're multithreaded, we'll probably deadlock here. See
 		 * WARNING above.
diff --git a/libminijail.h b/libminijail.h
index 6738a32..324731f 100644
--- a/libminijail.h
+++ b/libminijail.h
@@ -56,6 +56,7 @@
  */
 void minijail_namespace_pids(struct minijail *j);
 void minijail_remount_readonly(struct minijail *j);
+void minijail_run_as_init(struct minijail *j);
 void minijail_inherit_usergroups(struct minijail *j);
 void minijail_disable_ptrace(struct minijail *j);
 
diff --git a/minijail0.c b/minijail0.c
index 22b828c..bb7d631 100644
--- a/minijail0.c
+++ b/minijail0.c
@@ -90,6 +90,7 @@
 	       "  -H:         seccomp filter help message\n"
 	       "  -i:         exit immediately after fork (do not act as init)\n"
 	       "              Not compatible with -p\n"
+	       "  -I:         run <program> as init (pid 1) inside a new pid namespace (implies -p)\n"
 	       "  -L:         report blocked syscalls to syslog when using seccomp filter.\n"
 	       "              Forces the following syscalls to be allowed:\n"
 	       "                  ", progn);
@@ -129,7 +130,7 @@
 	const char *filter_path;
 	if (argc > 1 && argv[1][0] != '-')
 		return 1;
-	while ((opt = getopt(argc, argv, "u:g:sS:c:C:b:V:vrGhHinpLet")) != -1) {
+	while ((opt = getopt(argc, argv, "u:g:sS:c:C:b:V:vrGhHinpLetI")) != -1) {
 		switch (opt) {
 		case 'u':
 			set_user(j, optarg);
@@ -200,6 +201,10 @@
 		case 'H':
 			seccomp_filter_usage(argv[0]);
 			exit(1);
+		case 'I':
+			minijail_namespace_pids(j);
+			minijail_run_as_init(j);
+			break;
 		default:
 			usage(argv[0]);
 			exit(1);