From abe0f2a59a4dfb64003d759d406ee5a21cf142bd Mon Sep 17 00:00:00 2001 From: Xiao Pan Date: Mon, 1 Apr 2024 03:51:18 -0700 Subject: curl and grep GNU Mailman mailing list archive scripts cgm steal AGPL-3.0-only licensed code from https://code.librehq.com/ots/ots-tools/-/blob/main/search-mailman-archive --- sh/cgm | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100755 sh/cgm (limited to 'sh/cgm') diff --git a/sh/cgm b/sh/cgm new file mode 100755 index 0000000..262ebb2 --- /dev/null +++ b/sh/cgm @@ -0,0 +1,34 @@ +#!/bin/sh +# Curl Gnu Mailman Mailing list archive +# use with ggm + +# steal AGPL-3.0-only licensed code from https://code.librehq.com/ots/ots-tools/-/blob/main/search-mailman-archive + +# testing mailing list archives: +# https://lists.zx2c4.com/pipermail/cgit/ +# https://mailman.nginx.org/pipermail/nginx/ +# only year: +# https://dianne.skoll.ca/pipermail/remind-fans/ + +# maybe need "${1%/}" +url="$1" +dir="$(basename "$1")_mail_archives" + +mkdir "$dir" + +# Then parse it to get links to all the gzipped archive files for +# individual months. + +# only tested with monthly and yearly archives, not tested with quarterly +for month in $(curl -L "$url" \ + | awk -F'"' '/href="[0-9]{4,4}(-[[:alnum:]]+)?\.txt(\.gz)?">\[/{print $2}'); do + { + curl -s -L -o "$dir/$month" "$url/$month" + echo "Fetched $month..." + if [ "${month##*.}" = gz ]; then + gunzip "$dir/$month" + fi + } & +done + +wait -- cgit v1.2.3-70-g09d2