diff options
| author | Xiao Pan <gky44px1999@gmail.com> | 2024-04-01 03:51:18 -0700 | 
|---|---|---|
| committer | Xiao Pan <gky44px1999@gmail.com> | 2024-04-01 03:51:18 -0700 | 
| commit | abe0f2a59a4dfb64003d759d406ee5a21cf142bd (patch) | |
| tree | f9a18a4367223aed9e3ee97bc2f1cc8cf2f92c76 /sh/cgm | |
| parent | acc9d82db44135df1c7fe642241fe0bf39cd5efe (diff) | |
curl and grep GNU Mailman mailing list archive scripts
cgm steal AGPL-3.0-only licensed code from https://code.librehq.com/ots/ots-tools/-/blob/main/search-mailman-archive
Diffstat (limited to 'sh/cgm')
| -rwxr-xr-x | sh/cgm | 34 | 
1 files changed, 34 insertions, 0 deletions
| @@ -0,0 +1,34 @@ +#!/bin/sh +# Curl Gnu Mailman Mailing list archive +# use with ggm + +# steal AGPL-3.0-only licensed code from https://code.librehq.com/ots/ots-tools/-/blob/main/search-mailman-archive + +# testing mailing list archives: +# https://lists.zx2c4.com/pipermail/cgit/ +# https://mailman.nginx.org/pipermail/nginx/ +# only year: +# https://dianne.skoll.ca/pipermail/remind-fans/ + +# maybe need "${1%/}" +url="$1" +dir="$(basename "$1")_mail_archives" + +mkdir "$dir" + +# Then parse it to get links to all the gzipped archive files for +# individual months. + +# only tested with monthly and yearly archives, not tested with quarterly +for month in $(curl -L "$url" \ +	| awk -F'"' '/href="[0-9]{4,4}(-[[:alnum:]]+)?\.txt(\.gz)?">\[/{print $2}'); do +	{ +		curl -s -L -o "$dir/$month" "$url/$month" +		echo "Fetched $month..." +		if [ "${month##*.}" = gz ]; then +			gunzip "$dir/$month" +		fi +	} & +done + +wait | 
