blob: 8aa09908e60bb012859315f10e480ea50d75a55c [file] [log] [blame]
#include <ctype.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <inttypes.h>
#include <openssl/md5.h>
#include <openssl/hmac.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "log_uploader.h"
#include "utils.h"
int logmark_once(const char* output_path, const char* version_path,
const char* ntp_sync_path) {
char buf[1024];
char version[64];
struct tm timeinfo;
struct timespec curr_time;
if (read_file_as_string(version_path, version, sizeof(version)) < 0) {
strcpy(version, "unknown-version");
}
rstrip(version);
memset(&timeinfo, 0, sizeof(timeinfo));
memset(&curr_time, 0, sizeof(curr_time));
clock_gettime(CLOCK_REALTIME, &curr_time);
localtime_r(&curr_time.tv_sec, &timeinfo);
snprintf(buf, sizeof(buf), "<7>T: %s %d.%03d %02d/%02d %02d:%02d:%02d "
"ntp=%d\n",
version, (int)curr_time.tv_sec, (int)(curr_time.tv_nsec/1000000L),
timeinfo.tm_mon + 1, timeinfo.tm_mday, timeinfo.tm_hour, timeinfo.tm_min,
timeinfo.tm_sec, path_exists(ntp_sync_path));
int rv = write_to_file(output_path, buf);
if (rv < 0) {
perror(output_path);
return -1;
}
return 0;
}
char* parse_and_consume_log_data(struct log_parse_params* params) {
unsigned long log_buffer_size = params->total_read;
int wrote_start_marker = 0;
char *last_start_marker, *last_start_before_end_marker;
int check_for_markers = (params->last_log_counter == 0);
struct line_data parsed_line;
memset(&parsed_line, 0, sizeof(parsed_line));
ssize_t num_read;
if (check_for_markers)
last_start_marker = last_start_before_end_marker = NULL;
params->total_read = 0;
while (1) {
// Make sure we have room in our main buffer for another line if we
// don't then just stop reading now. This should never really happen,
// but we should protect against it anyways.
// Use 2x so that we can also log data about missing entries if we
// need to do that as well.
if (params->total_read + 2*params->line_buffer_size >= log_buffer_size) {
break;
}
if (!params->last_line_valid) {
// Read the next line from /dev/kmsg
num_read = params->read_log_data(params->line_buffer,
params->line_buffer_size, params->user_data);
} else {
// We have our start marker in the line buffer from the last run
// of the loop, just leave it there and we'll parse it.
params->last_line_valid = 0;
num_read = 0;
}
if (num_read < 0) {
if (errno == EAGAIN) {
if (!wrote_start_marker) {
// Write out the start marker.
if (write_to_file(params->dev_kmsg_path, LOG_MARKER_START_LINE) < 0) {
perror("start marker");
return NULL;
}
if (logmark_once(params->dev_kmsg_path, params->version_path,
params->ntp_synced_path) < 0) {
fprintf(stderr, "failed to execute logmark-once properly\n");
return NULL;
}
wrote_start_marker = 1;
continue;
} else {
// We have finished reading everything, exit the loop.
// But this should never really happen because we should find
// out start marker instead and exit that way.
break;
}
} else if (errno == EPIPE) {
// This means the last message we read is no longer there,
// it will then reset us to the beginning so just continue on
// and we will log that we lost messages. This also will happen
// the first time /dev/kmsg is opened.
continue;
} else if (errno == EINVAL) {
// This should never happen because we use an 8192 size buf which is
// the same size as the kernel uses for the temp buf to copy the result
// to. So if this happens...that likely means there is kernel
// corruption.
perror("kernel memory possibly corrupted, devkmsg_read");
continue;
} else {
perror("/dev/kmsg");
return NULL;
}
}
num_read = suppress_mac_addresses(params->line_buffer, num_read);
// Parse the data on the line to get the extra information
if (parse_line_data(params->line_buffer, &parsed_line)) {
// We don't want to be fatal if we fail to parse a line for some
// reason because we'd then repeatedly fail on that line when we
// were restarted. Instead just use it the way it is.
memcpy(params->log_buffer + params->total_read, params->line_buffer,
num_read);
params->total_read += num_read;
} else {
if (wrote_start_marker && strstr(parsed_line.text, LOG_MARKER_START)) {
// We just read back in our start marker that we wrote out. Leave
// it in the buffer and stop reading at this point.
params->last_line_valid = 1;
break;
}
// Successfully parsed a log line, check to see if we're at the right
// spot relative to the counter.
int time_sec = (int) (parsed_line.ts_nsec / 1000000);
int time_usec = (int) (parsed_line.ts_nsec % 1000000);
if (params->last_log_counter > 0) {
if (parsed_line.seq > params->last_log_counter + 1) {
// Put this in what we upload so when viewing it then we
// know there was loss and its interleaved at the correct spot.
params->total_read += snprintf(params->log_buffer +
params->total_read, log_buffer_size - params->total_read,
"<7>[%5d.%06d] WARNING: missed %" PRIu64 " log entries\n",
time_sec, time_usec,
parsed_line.seq - params->last_log_counter - 1);
} else if (!params->config->upload_all &&
parsed_line.seq <= params->last_log_counter) {
// Skip this entry because we've already read it and uploaded
// it. This'll happen if we crash and are restarted.
continue;
}
}
// If we don't have our tracking working then we need to look at the
// markers in the log to avoid uploading tons of duplicate data.
// This should only happen after a reboot.
if (check_for_markers) {
if (strstr(parsed_line.text, LOG_MARKER_END) && last_start_marker) {
last_start_before_end_marker = last_start_marker;
last_start_marker = NULL;
} else if (strstr(parsed_line.text, LOG_MARKER_START)) {
last_start_marker = params->log_buffer + params->total_read;
}
}
params->total_read += snprintf(params->log_buffer +
params->total_read, log_buffer_size - params->total_read,
"<%d>[%5d.%06d] %s", parsed_line.level, time_sec, time_usec,
parsed_line.text);
params->last_log_counter = parsed_line.seq;
}
}
params->log_buffer[params->total_read] = '\0';
if (check_for_markers && last_start_before_end_marker) {
// We have duplicate data from an upload (should have been before
// a reboot which just occurred). Only upload what's been there
// since the last start marker before the last end marker. We will
// still upload duplicate data, but it would only be what was logged
// during the timeframe of us writing the start marker and consuming
// all the log data which should be a very short timeframe.
int dupe_size = last_start_before_end_marker - params->log_buffer;
params->total_read -= dupe_size;
return last_start_before_end_marker;
}
return params->log_buffer;
}
const char SOFT[] = "AEIOUY" "V";
const char HARD[] = "BCDFGHJKLMNPQRSTVWXYZ" "AEIOU";
const char *consensus_key_file = "/tmp/waveguide/consensus_key";
#define CONSENSUS_KEY_LEN 16
uint8_t consensus_key[CONSENSUS_KEY_LEN];
#define MAC_ADDR_LEN 17
void default_consensus_key()
{
int fd;
if ((fd = open("/dev/urandom", O_RDONLY)) >= 0) {
ssize_t siz = sizeof(consensus_key);
if (read(fd, consensus_key, siz) != siz) {
/* https://xkcd.com/221/ */
memset(consensus_key, time(NULL), siz);
}
close(fd);
}
}
/* Read the waveguide consensus_key, if any */
static void get_consensus_key()
{
static ino_t ino = 0;
static time_t mtime = 0;
struct stat statbuf;
int fd;
if (stat(consensus_key_file, &statbuf)) {
if ((statbuf.st_ino == ino) && (statbuf.st_mtime == mtime)) {
return;
}
}
fd = open(consensus_key_file, O_RDONLY);
if (fd >= 0) {
uint8_t new_key[sizeof(consensus_key)];
if (read(fd, new_key, sizeof(new_key)) == sizeof(new_key)) {
memcpy(consensus_key, new_key, sizeof(consensus_key));
ino = statbuf.st_ino;
mtime = statbuf.st_mtime;
}
close(fd);
}
}
/* Given a value from 0..4095, encode it as a cons+vowel+cons sequence. */
static void trigraph(int num, char *out)
{
int ns = sizeof(SOFT) - 1;
int nh = sizeof(HARD) - 1;
int c1, c2, c3;
c3 = num % nh;
c2 = (num / nh) % ns;
c1 = num / nh / ns;
out[0] = HARD[c1];
out[1] = SOFT[c2];
out[2] = HARD[c3];
}
static int hex_chr_to_int(char hex) {
switch(hex) {
case '0' ... '9':
return hex - '0';
case 'a' ... 'f':
return hex - 'a' + 10;
case 'A' ... 'F':
return hex - 'A' + 10;
}
return 0;
}
/*
* Convert a string of the form "00:11:22:33:44:55" to
* a binary array 001122334455.
*/
static void get_binary_mac(const char *mac, uint8_t *out) {
int i;
for (i = 0; i < MAC_ADDR_LEN; i += 3) {
*out = (hex_chr_to_int(mac[i]) << 4) | hex_chr_to_int(mac[i+1]);
out++;
}
}
static const char *get_anonid_for_mac(const char *mac, char *out) {
unsigned char digest[EVP_MAX_MD_SIZE];
unsigned int digest_len = sizeof(digest);
uint8_t macbin[6];
uint32_t num;
get_consensus_key();
get_binary_mac(mac, macbin);
HMAC(EVP_md5(), consensus_key, sizeof(consensus_key), macbin, sizeof(macbin),
digest, &digest_len);
num = (digest[0] << 16) | (digest[1] << 8) | digest[2];
trigraph((num >> 12) & 0x0fff, out);
trigraph((num ) & 0x0fff, out + 3);
return out;
}
static ssize_t anonymize_mac_address(char *s, ssize_t len) {
char anonid[6];
ssize_t offset = MAC_ADDR_LEN - sizeof(anonid);
get_anonid_for_mac(s, anonid);
memcpy(s, anonid, sizeof(anonid));
s += sizeof(anonid);
len -= offset;
memmove(s, s + offset, len);
return offset;
}
static int is_mac_addr(const char *s, char sep) {
if ((s[2] == sep) && (s[5] == sep) && (s[8] == sep) &&
(s[11] == sep) && (s[14] == sep) &&
isxdigit(s[0]) && isxdigit(s[1]) &&
isxdigit(s[3]) && isxdigit(s[4]) &&
isxdigit(s[6]) && isxdigit(s[7]) &&
isxdigit(s[9]) && isxdigit(s[10]) &&
isxdigit(s[12]) && isxdigit(s[13]) &&
isxdigit(s[15]) && isxdigit(s[16])) {
return 1;
}
return 0;
}
/*
* search for text patterns which look like MAC addresses,
* and anonymize them.
* Ex: f8:8f:ca:00:00:01 to PEEVJB
*/
unsigned long suppress_mac_addresses(char *line, ssize_t len) {
char *s = line;
unsigned long new_len = len;
ssize_t reduce;
while (len >= MAC_ADDR_LEN) {
if (is_mac_addr(s, ':') || is_mac_addr(s, '-') || is_mac_addr(s, '_')) {
reduce = anonymize_mac_address(s, len);
len -= reduce;
new_len -= reduce;
} else {
s += 1;
len -= 1;
}
}
return new_len;
}