logupload/client/log_uploader.c - vendor/google/platform - Git at Google

 #include <ctype.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
 #include <inttypes.h>
 #include <openssl/md5.h>
 #include <openssl/hmac.h>
 #include <sys/types.h>
 #include <sys/stat.h>

 #include "log_uploader.h"
 #include "utils.h"

 int logmark_once(const char* output_path, const char* version_path,
     const char* ntp_sync_path) {
   char buf[1024];
   char version[64];
   struct tm timeinfo;
   struct timespec curr_time;
   if (read_file_as_string(version_path, version, sizeof(version)) < 0) {
     strcpy(version, "unknown-version");
   }
   rstrip(version);
   memset(&timeinfo, 0, sizeof(timeinfo));
   memset(&curr_time, 0, sizeof(curr_time));
   clock_gettime(CLOCK_REALTIME, &curr_time);
   localtime_r(&curr_time.tv_sec, &timeinfo);
   snprintf(buf, sizeof(buf), "<7>T: %s %d.%03d %02d/%02d %02d:%02d:%02d "
       "ntp=%d\n",
       version, (int)curr_time.tv_sec, (int)(curr_time.tv_nsec/1000000L),
       timeinfo.tm_mon + 1, timeinfo.tm_mday, timeinfo.tm_hour, timeinfo.tm_min,
       timeinfo.tm_sec, path_exists(ntp_sync_path));
   int rv = write_to_file(output_path, buf);
   if (rv < 0) {
     perror(output_path);
     return -1;
   }
   return 0;
 }

 char* parse_and_consume_log_data(struct log_parse_params* params) {
   unsigned long log_buffer_size = params->total_read;
   int wrote_start_marker = 0;
   char *last_start_marker, *last_start_before_end_marker;
   int check_for_markers = (params->last_log_counter == 0);
   struct line_data parsed_line;
   memset(&parsed_line, 0, sizeof(parsed_line));
   ssize_t num_read;
   if (check_for_markers)
     last_start_marker = last_start_before_end_marker = NULL;
   params->total_read = 0;

   while (1) {
     // Make sure we have room in our main buffer for another line if we
     // don't then just stop reading now. This should never really happen,
     // but we should protect against it anyways.
     // Use 2x so that we can also log data about missing entries if we
     // need to do that as well.
     if (params->total_read + 2*params->line_buffer_size >= log_buffer_size) {
       break;
     }
     if (!params->last_line_valid) {
       // Read the next line from /dev/kmsg
       num_read = params->read_log_data(params->line_buffer,
           params->line_buffer_size, params->user_data);
     } else {
       // We have our start marker in the line buffer from the last run
       // of the loop, just leave it there and we'll parse it.
       params->last_line_valid = 0;
       num_read = 0;
     }
     if (num_read < 0) {
       if (errno == EAGAIN) {
         if (!wrote_start_marker) {
           // Write out the start marker.
           if (write_to_file(params->dev_kmsg_path, LOG_MARKER_START_LINE) < 0) {
             perror("start marker");
             return NULL;
           }
           if (logmark_once(params->dev_kmsg_path, params->version_path,
                 params->ntp_synced_path) < 0) {
             fprintf(stderr, "failed to execute logmark-once properly\n");
             return NULL;
           }
           wrote_start_marker = 1;
           continue;
         } else {
           // We have finished reading everything, exit the loop.
           // But this should never really happen because we should find
           // out start marker instead and exit that way.
           break;
         }
       } else if (errno == EPIPE) {
         // This means the last message we read is no longer there,
         // it will then reset us to the beginning so just continue on
         // and we will log that we lost messages. This also will happen
         // the first time /dev/kmsg is opened.
         continue;
       } else if (errno == EINVAL) {
         // This should never happen because we use an 8192 size buf which is
         // the same size as the kernel uses for the temp buf to copy the result
         // to. So if this happens...that likely means there is kernel
         // corruption.
         perror("kernel memory possibly corrupted, devkmsg_read");
         continue;
       } else {
         perror("/dev/kmsg");
         return NULL;
       }
     }

     num_read = suppress_mac_addresses(params->line_buffer, num_read);

     // Parse the data on the line to get the extra information
     if (parse_line_data(params->line_buffer, &parsed_line)) {
       // We don't want to be fatal if we fail to parse a line for some
       // reason because we'd then repeatedly fail on that line when we
       // were restarted. Instead just use it the way it is.
       memcpy(params->log_buffer + params->total_read, params->line_buffer,
           num_read);
       params->total_read += num_read;
     } else {
       if (wrote_start_marker && strstr(parsed_line.text, LOG_MARKER_START)) {
         // We just read back in our start marker that we wrote out. Leave
         // it in the buffer and stop reading at this point.
         params->last_line_valid = 1;
         break;
       }
       // Successfully parsed a log line, check to see if we're at the right
       // spot relative to the counter.
       int time_sec = (int) (parsed_line.ts_nsec / 1000000);
       int time_usec = (int) (parsed_line.ts_nsec % 1000000);
       if (params->last_log_counter > 0) {
         if (parsed_line.seq > params->last_log_counter + 1) {
           // Put this in what we upload so when viewing it then we
           // know there was loss and its interleaved at the correct spot.
           params->total_read += snprintf(params->log_buffer +
               params->total_read, log_buffer_size - params->total_read,
               "<7>[%5d.%06d] WARNING: missed %" PRIu64 " log entries\n",
               time_sec, time_usec,
               parsed_line.seq - params->last_log_counter - 1);
         } else if (!params->config->upload_all &&
             parsed_line.seq <= params->last_log_counter) {
           // Skip this entry because we've already read it and uploaded
           // it. This'll happen if we crash and are restarted.
           continue;
         }
       }
       // If we don't have our tracking working then we need to look at the
       // markers in the log to avoid uploading tons of duplicate data.
       // This should only happen after a reboot.
       if (check_for_markers) {
         if (strstr(parsed_line.text, LOG_MARKER_END) && last_start_marker) {
           last_start_before_end_marker = last_start_marker;
           last_start_marker = NULL;
         } else if (strstr(parsed_line.text, LOG_MARKER_START)) {
           last_start_marker = params->log_buffer + params->total_read;
         }
       }

       params->total_read += snprintf(params->log_buffer +
           params->total_read, log_buffer_size - params->total_read,
           "<%d>[%5d.%06d] %s", parsed_line.level, time_sec, time_usec,
           parsed_line.text);
       params->last_log_counter = parsed_line.seq;
     }
   }
   params->log_buffer[params->total_read] = '\0';

   if (check_for_markers && last_start_before_end_marker) {
       // We have duplicate data from an upload (should have been before
       // a reboot which just occurred). Only upload what's been there
       // since the last start marker before the last end marker. We will
       // still upload duplicate data, but it would only be what was logged
       // during the timeframe of us writing the start marker and consuming
       // all the log data which should be a very short timeframe.
       int dupe_size = last_start_before_end_marker - params->log_buffer;
       params->total_read -= dupe_size;
       return last_start_before_end_marker;
   }
   return params->log_buffer;
 }

 const char SOFT[] = "AEIOUY" "V";
 const char HARD[] = "BCDFGHJKLMNPQRSTVWXYZ" "AEIOU";
 const char *consensus_key_file = "/tmp/waveguide/consensus_key";
 #define CONSENSUS_KEY_LEN 16
 uint8_t consensus_key[CONSENSUS_KEY_LEN];
 #define MAC_ADDR_LEN 17

 void default_consensus_key()
 {
   int fd;

   if ((fd = open("/dev/urandom", O_RDONLY)) >= 0) {
     ssize_t siz = sizeof(consensus_key);
     if (read(fd, consensus_key, siz) != siz) {
       /* https://xkcd.com/221/ */
       memset(consensus_key, time(NULL), siz);
     }
     close(fd);
   }
 }

 /* Read the waveguide consensus_key, if any */
 static void get_consensus_key()
 {
   static ino_t ino = 0;
   static time_t mtime = 0;
   struct stat statbuf;
   int fd;

   if (stat(consensus_key_file, &statbuf)) {
     if ((statbuf.st_ino == ino) && (statbuf.st_mtime == mtime)) {
       return;
     }
   }

   fd = open(consensus_key_file, O_RDONLY);
   if (fd >= 0) {
     uint8_t new_key[sizeof(consensus_key)];
     if (read(fd, new_key, sizeof(new_key)) == sizeof(new_key)) {
       memcpy(consensus_key, new_key, sizeof(consensus_key));
       ino = statbuf.st_ino;
       mtime = statbuf.st_mtime;
     }
     close(fd);
   }
 }

 /* Given a value from 0..4095, encode it as a cons+vowel+cons sequence. */
 static void trigraph(int num, char *out)
 {
   int ns = sizeof(SOFT) - 1;
   int nh = sizeof(HARD) - 1;
   int c1, c2, c3;

   c3 = num % nh;
   c2 = (num / nh) % ns;
   c1 = num / nh / ns;
   out[0] = HARD[c1];
   out[1] = SOFT[c2];
   out[2] = HARD[c3];
 }

 static int hex_chr_to_int(char hex) {
   switch(hex) {
     case '0' ... '9':
       return hex - '0';
     case 'a' ... 'f':
       return hex - 'a' + 10;
     case 'A' ... 'F':
       return hex - 'A' + 10;
   }

   return 0;
 }

 /*
  * Convert a string of the form "00:11:22:33:44:55" to
  * a binary array 001122334455.
  */
 static void get_binary_mac(const char *mac, uint8_t *out) {
   int i;
   for (i = 0; i < MAC_ADDR_LEN; i += 3) {
     *out = (hex_chr_to_int(mac[i]) << 4) | hex_chr_to_int(mac[i+1]);
     out++;
   }
 }

 static const char *get_anonid_for_mac(const char *mac, char *out) {
   unsigned char digest[EVP_MAX_MD_SIZE];
   unsigned int digest_len = sizeof(digest);
   uint8_t macbin[6];
   uint32_t num;

   get_consensus_key();
   get_binary_mac(mac, macbin);
   HMAC(EVP_md5(), consensus_key, sizeof(consensus_key), macbin, sizeof(macbin),
       digest, &digest_len);
   num = (digest[0] << 16) | (digest[1] << 8) | digest[2];
   trigraph((num >> 12) & 0x0fff, out);
   trigraph((num      ) & 0x0fff, out + 3);

   return out;
 }

 static ssize_t anonymize_mac_address(char *s, ssize_t len) {
   char anonid[6];
   ssize_t offset = MAC_ADDR_LEN - sizeof(anonid);

   get_anonid_for_mac(s, anonid);
   memcpy(s, anonid, sizeof(anonid));
   s += sizeof(anonid);
   len -= offset;
   memmove(s, s + offset, len);
   return offset;
 }

 static int is_mac_addr(const char *s, char sep) {
   if ((s[2] == sep) && (s[5] == sep) && (s[8] == sep) &&
       (s[11] == sep) && (s[14] == sep) &&
       isxdigit(s[0]) && isxdigit(s[1]) &&
       isxdigit(s[3]) && isxdigit(s[4]) &&
       isxdigit(s[6]) && isxdigit(s[7]) &&
       isxdigit(s[9]) && isxdigit(s[10]) &&
       isxdigit(s[12]) && isxdigit(s[13]) &&
       isxdigit(s[15]) && isxdigit(s[16])) {
     return 1;
   }

   return 0;
 }

 /*
  * search for text patterns which look like MAC addresses,
  * and anonymize them.
  * Ex: f8:8f:ca:00:00:01 to PEEVJB
  */
 unsigned long suppress_mac_addresses(char *line, ssize_t len) {
   char *s = line;
   unsigned long new_len = len;
   ssize_t reduce;

   while (len >= MAC_ADDR_LEN) {
     if (is_mac_addr(s, ':') || is_mac_addr(s, '-') || is_mac_addr(s, '_')) {
       reduce = anonymize_mac_address(s, len);
       len -= reduce;
       new_len -= reduce;
     } else {
       s += 1;
       len -= 1;
     }
   }

   return new_len;
 }
	#include <ctype.h>
	#include <stdlib.h>
	#include <errno.h>
	#include <fcntl.h>
	#include <stdio.h>
	#include <string.h>
	#include <time.h>
	#include <unistd.h>
	#include <inttypes.h>
	#include <openssl/md5.h>
	#include <openssl/hmac.h>
	#include <sys/types.h>
	#include <sys/stat.h>

	#include "log_uploader.h"
	#include "utils.h"

	int logmark_once(const char* output_path, const char* version_path,
	const char* ntp_sync_path) {
	char buf[1024];
	char version[64];
	struct tm timeinfo;
	struct timespec curr_time;
	if (read_file_as_string(version_path, version, sizeof(version)) < 0) {
	strcpy(version, "unknown-version");
	}
	rstrip(version);
	memset(&timeinfo, 0, sizeof(timeinfo));
	memset(&curr_time, 0, sizeof(curr_time));
	clock_gettime(CLOCK_REALTIME, &curr_time);
	localtime_r(&curr_time.tv_sec, &timeinfo);
	snprintf(buf, sizeof(buf), "<7>T: %s %d.%03d %02d/%02d %02d:%02d:%02d "
	"ntp=%d\n",
	version, (int)curr_time.tv_sec, (int)(curr_time.tv_nsec/1000000L),
	timeinfo.tm_mon + 1, timeinfo.tm_mday, timeinfo.tm_hour, timeinfo.tm_min,
	timeinfo.tm_sec, path_exists(ntp_sync_path));
	int rv = write_to_file(output_path, buf);
	if (rv < 0) {
	perror(output_path);
	return -1;
	}
	return 0;
	}

	char* parse_and_consume_log_data(struct log_parse_params* params) {
	unsigned long log_buffer_size = params->total_read;
	int wrote_start_marker = 0;
	char last_start_marker, last_start_before_end_marker;
	int check_for_markers = (params->last_log_counter == 0);
	struct line_data parsed_line;
	memset(&parsed_line, 0, sizeof(parsed_line));
	ssize_t num_read;
	if (check_for_markers)
	last_start_marker = last_start_before_end_marker = NULL;
	params->total_read = 0;

	while (1) {
	// Make sure we have room in our main buffer for another line if we
	// don't then just stop reading now. This should never really happen,
	// but we should protect against it anyways.
	// Use 2x so that we can also log data about missing entries if we
	// need to do that as well.
	if (params->total_read + 2*params->line_buffer_size >= log_buffer_size) {
	break;
	}
	if (!params->last_line_valid) {
	// Read the next line from /dev/kmsg
	num_read = params->read_log_data(params->line_buffer,
	params->line_buffer_size, params->user_data);
	} else {
	// We have our start marker in the line buffer from the last run
	// of the loop, just leave it there and we'll parse it.
	params->last_line_valid = 0;
	num_read = 0;
	}
	if (num_read < 0) {
	if (errno == EAGAIN) {
	if (!wrote_start_marker) {
	// Write out the start marker.
	if (write_to_file(params->dev_kmsg_path, LOG_MARKER_START_LINE) < 0) {
	perror("start marker");
	return NULL;
	}
	if (logmark_once(params->dev_kmsg_path, params->version_path,
	params->ntp_synced_path) < 0) {
	fprintf(stderr, "failed to execute logmark-once properly\n");
	return NULL;
	}
	wrote_start_marker = 1;
	continue;
	} else {
	// We have finished reading everything, exit the loop.
	// But this should never really happen because we should find
	// out start marker instead and exit that way.
	break;
	}
	} else if (errno == EPIPE) {
	// This means the last message we read is no longer there,
	// it will then reset us to the beginning so just continue on
	// and we will log that we lost messages. This also will happen
	// the first time /dev/kmsg is opened.
	continue;
	} else if (errno == EINVAL) {
	// This should never happen because we use an 8192 size buf which is
	// the same size as the kernel uses for the temp buf to copy the result
	// to. So if this happens...that likely means there is kernel
	// corruption.
	perror("kernel memory possibly corrupted, devkmsg_read");
	continue;
	} else {
	perror("/dev/kmsg");
	return NULL;
	}
	}

	num_read = suppress_mac_addresses(params->line_buffer, num_read);

	// Parse the data on the line to get the extra information
	if (parse_line_data(params->line_buffer, &parsed_line)) {
	// We don't want to be fatal if we fail to parse a line for some
	// reason because we'd then repeatedly fail on that line when we
	// were restarted. Instead just use it the way it is.
	memcpy(params->log_buffer + params->total_read, params->line_buffer,
	num_read);
	params->total_read += num_read;
	} else {
	if (wrote_start_marker && strstr(parsed_line.text, LOG_MARKER_START)) {
	// We just read back in our start marker that we wrote out. Leave
	// it in the buffer and stop reading at this point.
	params->last_line_valid = 1;
	break;
	}
	// Successfully parsed a log line, check to see if we're at the right
	// spot relative to the counter.
	int time_sec = (int) (parsed_line.ts_nsec / 1000000);
	int time_usec = (int) (parsed_line.ts_nsec % 1000000);
	if (params->last_log_counter > 0) {
	if (parsed_line.seq > params->last_log_counter + 1) {
	// Put this in what we upload so when viewing it then we
	// know there was loss and its interleaved at the correct spot.
	params->total_read += snprintf(params->log_buffer +
	params->total_read, log_buffer_size - params->total_read,
	"<7>[%5d.%06d] WARNING: missed %" PRIu64 " log entries\n",
	time_sec, time_usec,
	parsed_line.seq - params->last_log_counter - 1);
	} else if (!params->config->upload_all &&
	parsed_line.seq <= params->last_log_counter) {
	// Skip this entry because we've already read it and uploaded
	// it. This'll happen if we crash and are restarted.
	continue;
	}
	}
	// If we don't have our tracking working then we need to look at the
	// markers in the log to avoid uploading tons of duplicate data.
	// This should only happen after a reboot.
	if (check_for_markers) {
	if (strstr(parsed_line.text, LOG_MARKER_END) && last_start_marker) {
	last_start_before_end_marker = last_start_marker;
	last_start_marker = NULL;
	} else if (strstr(parsed_line.text, LOG_MARKER_START)) {
	last_start_marker = params->log_buffer + params->total_read;
	}
	}

	params->total_read += snprintf(params->log_buffer +
	params->total_read, log_buffer_size - params->total_read,
	"<%d>[%5d.%06d] %s", parsed_line.level, time_sec, time_usec,
	parsed_line.text);
	params->last_log_counter = parsed_line.seq;
	}
	}
	params->log_buffer[params->total_read] = '\0';

	if (check_for_markers && last_start_before_end_marker) {
	// We have duplicate data from an upload (should have been before
	// a reboot which just occurred). Only upload what's been there
	// since the last start marker before the last end marker. We will
	// still upload duplicate data, but it would only be what was logged
	// during the timeframe of us writing the start marker and consuming
	// all the log data which should be a very short timeframe.
	int dupe_size = last_start_before_end_marker - params->log_buffer;
	params->total_read -= dupe_size;
	return last_start_before_end_marker;
	}
	return params->log_buffer;
	}

	const char SOFT[] = "AEIOUY" "V";
	const char HARD[] = "BCDFGHJKLMNPQRSTVWXYZ" "AEIOU";
	const char *consensus_key_file = "/tmp/waveguide/consensus_key";
	#define CONSENSUS_KEY_LEN 16
	uint8_t consensus_key[CONSENSUS_KEY_LEN];
	#define MAC_ADDR_LEN 17

	void default_consensus_key()
	{
	int fd;

	if ((fd = open("/dev/urandom", O_RDONLY)) >= 0) {
	ssize_t siz = sizeof(consensus_key);
	if (read(fd, consensus_key, siz) != siz) {
	/* https://xkcd.com/221/ */
	memset(consensus_key, time(NULL), siz);
	}
	close(fd);
	}
	}

	/* Read the waveguide consensus_key, if any */
	static void get_consensus_key()
	{
	static ino_t ino = 0;
	static time_t mtime = 0;
	struct stat statbuf;
	int fd;

	if (stat(consensus_key_file, &statbuf)) {
	if ((statbuf.st_ino == ino) && (statbuf.st_mtime == mtime)) {
	return;
	}
	}

	fd = open(consensus_key_file, O_RDONLY);
	if (fd >= 0) {
	uint8_t new_key[sizeof(consensus_key)];
	if (read(fd, new_key, sizeof(new_key)) == sizeof(new_key)) {
	memcpy(consensus_key, new_key, sizeof(consensus_key));
	ino = statbuf.st_ino;
	mtime = statbuf.st_mtime;
	}
	close(fd);
	}
	}

	/* Given a value from 0..4095, encode it as a cons+vowel+cons sequence. */
	static void trigraph(int num, char *out)
	{
	int ns = sizeof(SOFT) - 1;
	int nh = sizeof(HARD) - 1;
	int c1, c2, c3;

	c3 = num % nh;
	c2 = (num / nh) % ns;
	c1 = num / nh / ns;
	out[0] = HARD[c1];
	out[1] = SOFT[c2];
	out[2] = HARD[c3];
	}

	static int hex_chr_to_int(char hex) {
	switch(hex) {
	case '0' ... '9':
	return hex - '0';
	case 'a' ... 'f':
	return hex - 'a' + 10;
	case 'A' ... 'F':
	return hex - 'A' + 10;
	}

	return 0;
	}

	/*
	* Convert a string of the form "00:11:22:33:44:55" to
	* a binary array 001122334455.
	*/
	static void get_binary_mac(const char mac, uint8_t out) {
	int i;
	for (i = 0; i < MAC_ADDR_LEN; i += 3) {
	*out = (hex_chr_to_int(mac[i]) << 4) \| hex_chr_to_int(mac[i+1]);
	out++;
	}
	}

	static const char get_anonid_for_mac(const char mac, char *out) {
	unsigned char digest[EVP_MAX_MD_SIZE];
	unsigned int digest_len = sizeof(digest);
	uint8_t macbin[6];
	uint32_t num;

	get_consensus_key();
	get_binary_mac(mac, macbin);
	HMAC(EVP_md5(), consensus_key, sizeof(consensus_key), macbin, sizeof(macbin),
	digest, &digest_len);
	num = (digest[0] << 16) \| (digest[1] << 8) \| digest[2];
	trigraph((num >> 12) & 0x0fff, out);
	trigraph((num ) & 0x0fff, out + 3);

	return out;
	}

	static ssize_t anonymize_mac_address(char *s, ssize_t len) {
	char anonid[6];
	ssize_t offset = MAC_ADDR_LEN - sizeof(anonid);

	get_anonid_for_mac(s, anonid);
	memcpy(s, anonid, sizeof(anonid));
	s += sizeof(anonid);
	len -= offset;
	memmove(s, s + offset, len);
	return offset;
	}

	static int is_mac_addr(const char *s, char sep) {
	if ((s[2] == sep) && (s[5] == sep) && (s[8] == sep) &&
	(s[11] == sep) && (s[14] == sep) &&
	isxdigit(s[0]) && isxdigit(s[1]) &&
	isxdigit(s[3]) && isxdigit(s[4]) &&
	isxdigit(s[6]) && isxdigit(s[7]) &&
	isxdigit(s[9]) && isxdigit(s[10]) &&
	isxdigit(s[12]) && isxdigit(s[13]) &&
	isxdigit(s[15]) && isxdigit(s[16])) {
	return 1;
	}

	return 0;
	}

	/*
	* search for text patterns which look like MAC addresses,
	* and anonymize them.
	* Ex: f8:8f:ca:00:00:01 to PEEVJB
	*/
	unsigned long suppress_mac_addresses(char *line, ssize_t len) {
	char *s = line;
	unsigned long new_len = len;
	ssize_t reduce;

	while (len >= MAC_ADDR_LEN) {
	if (is_mac_addr(s, ':') \|\| is_mac_addr(s, '-') \|\| is_mac_addr(s, '_')) {
	reduce = anonymize_mac_address(s, len);
	len -= reduce;
	new_len -= reduce;
	} else {
	s += 1;
	len -= 1;
	}
	}

	return new_len;
	}