#!/bin/sh
# Curl Gnu Mailman Mailing list archive
# use with ggm

# steal AGPL-3.0-only licensed code from https://code.librehq.com/ots/ots-tools/-/blob/main/search-mailman-archive

# testing mailing list archives:
# https://lists.zx2c4.com/pipermail/cgit/
# https://mailman.nginx.org/pipermail/nginx/
# only year:
# https://dianne.skoll.ca/pipermail/remind-fans/

# misc urls:
# https://wiki.list.org/DOC/How%20do%20I%20make%20the%20archives%20searchable
# https://martin-thoma.com/how-to-analyze-mailman-archives/>

# maybe need "${1%/}"
url="$1"
dir="$(basename "$1")_mail_archives"

mkdir "$dir"

# Then parse it to get links to all the gzipped archive files for
# individual months.

# only tested with monthly and yearly archives, not tested with quarterly
for month in $(curl -L "$url" \
	| awk -F'"' '/href="[0-9]{4,4}(-[[:alnum:]]+)?\.txt(\.gz)?">\[/{print $2}'); do
	{
		curl -s -L -o "$dir/$month" "$url/$month"
		echo "Fetched $month..."
		if [ "${month##*.}" = gz ]; then
			gunzip "$dir/$month"
		fi
	} &
done

wait