#!/usr/bin/awk -f -

#ident	"@(#)smail/util:RELEASE-3_2_0_115:logsumm.awk,v 1.8 2003/06/17 19:07:56 woods Exp"
#
#	logsumm.awk - guts of logsumm, in AWK
#
# This script expects the following variables to be set on the command-line:
#
#	ERRORS		- whether to collect error info for chkerr (0 or 1)
#	TMPDIR		- where to store the error info
#
# Normally this script is invoked by the logsumm shell wrapper script.
#
# WARNING: the following code assumes SMAIL_LOG_STYLE=2.
#
# Note:  We essentially ignore continued log lines (eg. those which have SMTP
# error responses appended to them).
#
BEGIN {
	if (TMPDIR != "") {
		mxerr_file = TMPDIR "/.chkerr.mxerrs";
		relayerr_file = TMPDIR "/.chkerr.relays";
	} else {
		mxerr_file = "chkerr.mxerrs";
		relayerr_file = "chkerr.relays";
	}
	connections = 0;
	refused = 0;
	received = 0;
	delivered = 0;
	deferred = 0;
	completed = 0;
	returned = 0;
	failed = 0;
	size = 0;
	max_size = 0;
	min_size = 0;
	rcvd_bytes = 0;

	rcvd_ssq = 0.0;			# float: sum of (values^2), a big number

	# initialise arrays -- these elements must be deleted afterwards
	delivered_type[""] = 0;
	dnsbl[""] = 0;
	rcvd_bytes_type[""] = 0;
	received_type[""] = 0;
	rejected_type[""] = 0;
}
NR == 1 {
	sub(/:$/, "", $2);
	start_time = $1 " " $2;
}
{
	# any time could be the last time!
	# (note: do not call sub() on every record just to clobber trailing colon)
	last_time = $1 " " $2;
}
$4 == "remote" && $5 == "connection" {
	connections++;
}
$4 == "remote" && ($5 == "HELO:" || $5 == "EHLO:") && $6 == "rejected:" {
	refused++;
	rejected_type["bad-greeting"]++;
}
$4 == "remote" && ($5 == "HELO:" || $5 == "EHLO:") && $6 == "refusing" {
	refused++;
	if ($12 == "DNSBL:") {
		rejected_type["DNSBL"]++;
		if (VERBOSE) {
			sub(/^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+\./, "", $13);
			dnsbl[$13]++;
		}
	} else if ($12 == "in") {
		rejected_type[$13]++;
	}
}
$4 == "remote" && $5 == "MAIL" && $13 == "matched" {
	refused++;
	rejected_type["RH-DNSBL"]++;
	if (VERBOSE) {
		split($7, addr, /@|<|>|=/);
		rhdnsbl[substr($(NF-1), length(addr[3]) + 2)]++;
	}
}
ERRORS && $5 == "DNS" && $6 == "MX" && $7 == "error:" {
	gsub(/\'/, "", $9);
	gsub(/\'/, "", $12);
	sub(/\)$/, "", $17);
	gsub(/\'/, "", $17);
	printf("%-26s %-26s %s\n", $9, $12, $17) >> mxerr_file;
}
ERRORS && $5 == "Failed" && $10 == "DNS" && $11 == "error:" && $12 == "MX" && $(NF) == "CNAME." {
	gsub(/\'/, "", $14);
	gsub(/\'/, "", $19);
	printf("%-26s %-26s %s\n", $14, $19, "(unknown)") >> mxerr_file;
}
ERRORS && $5 == "Failed" && $10 == "DNS" && $11 == "error:" && $12 == "No" {
	sub(/^\(/, "", $19);
	sub(/\)$/, "", $19);
	gsub(/\'/, "", $(NF));
	printf("%-26s %-26s %s\n", $19, $(NF), "(unknown)") >> mxerr_file;
}
ERRORS && NF >= 10 && $(NF-10) == "security" && $(NF-9) == "violation:" {
	printf("%s\n", $(NF-6)) >> relayerr_file;
}
$5 == "Received" {
	received++;
	program = "";
	protocol = "";
	for (fn = 6; fn <= NF; fn++) {
		if (substr($fn, 1, 8) == "PROGRAM:")
			program = substr($fn, 9);
		if (substr($fn, 1, 9) == "PROTOCOL:")
			protocol = substr($fn, 10);
		if (substr($fn, 1, 5) == "SIZE:")
			size = substr($fn, 6) + 0;
	}
	if (protocol == "")
		protocol = "STDIN";
	type = program "/" protocol;
	rcvd_bytes += size;
	rcvd_ssq += size * size;
	if (size > max_size)
		max_size = size;
	if (min_size == 0 || (min_size > 0 && size < min_size))
		min_size = size;
	rcvd_bytes_type[type] += size;
	received_type[type]++;
}
$5 == "Delivered" {
	delivered++;
	type = "";
	for (fn = 6; fn <= NF; fn++) {
		if (substr($fn, 1, 10) == "TRANSPORT:") {
			type = substr($fn, 11);
		}
	}
	delivered_type[type]++;
}
$5 == "Failed" {
	failed++;
}
$5 == "Returned" {
	returned++;
}
$5 == "Deferred" {
	deferred++;
}
$5 == "Completed." {
	completed++;
}

END {
	# delete array initialisation elements
	delete delivered_type[""];
	delete dnsbl[""];
	delete rcvd_bytes_type[""];
	delete received_type[""];
	delete rejected_type[""];

	printf("First record timestamp: %s\n", start_time);
	# clobber the trailing colon now instead of on every assignment above
	sub(/:$/, "", last_time);
	printf("Last record timestamp:  %s\n", last_time);
	print "";
	printf("%16d  Total log entries processed.\n", NR);
	print "";
	printf("%16d  Total incoming SMTP connections.\n", connections);
	print "";

	if (refused > 0) {
		for (type in rejected_type) {
			printf("%16d  connections rejected by %s\n", rejected_type[type], type);
		}
		printf("%16s\n", substr("================", 1, length(sprintf("%d", refused))));
	}
	printf("%16d  Total SMTP connections rejected.\n", refused);
	print "";

	if (received > 0) {
		for (type in received_type) {
			printf("%16d  messages received by %s\n", received_type[type], type);
		}
		printf("%16s\n", substr("================", 1, length(sprintf("%d", received))));
	}
	printf("%16d  Total messages received.\n", received);
	print "";

	if (rcvd_bytes > 0) {
		for (type in rcvd_bytes_type) {
			printf("%16d  bytes received by %s\n", rcvd_bytes_type[type], type);
		}
		printf("%16s\n", substr("================", 1, length(sprintf("%d", rcvd_bytes))));
	}
	printf("%16d  Total bytes received.\n", rcvd_bytes);
	if (received > 1 && rcvd_bytes > 0) {
		rcvd_mean = 0.0;
		rcvd_stddev = 0.0;
		rcvd_mean = rcvd_bytes / received;
		#
		# one of these forms is less likely to overflow -- probably the
		# second one
		#
#		rcvd_variance = ((received * rcvd_ssq) - (rcvd_bytes * rcvd_bytes)) / (received * (received - 1));
		rcvd_variance = ((rcvd_ssq / received) - (rcvd_mean * rcvd_mean)) * received / (received - 1);
		rcvd_stddev = sqrt(rcvd_variance);
		printf("%16s  (min_size=%d, max_size=%d\n", "", min_size, max_size);
		printf("%16s   mean=%.1f, stddev=%.1f)\n", "", rcvd_mean, rcvd_stddev);
	}
	print "";
	if (delivered > 0) {
		for (type in delivered_type) {
			printf("%16d  messages delivered by transport: %s\n", delivered_type[type], type);
		}
		printf("%16s\n", substr("================", 1, length(sprintf("%d", delivered))));
	}
	printf("%16d  Total messages delivered.\n", delivered);
	print "";

	printf("%16d  Total delivery attempts deferred for retry.\n", deferred);
	printf("%16d  Total message deliveries failed.\n", failed);
	printf("%16d  Total messages returned.\n", returned);
	print "";
	printf("%16d  Total messages `completed'.\n", completed);
	if (VERBOSE) {
		print "";
		for (domain in dnsbl) {
			printf("%16d  matches in DNSBL %s\n", dnsbl[domain], domain) | "sort -rn";
		}
		close("sort -rn");
		print "";
		for (domain in rhdnsbl) {
			printf("%16d  matches in RH-DNSBL %s\n", rhdnsbl[domain], domain) | "sort -rn";
		}
		close("sort -rn");
	}
}
