summaryrefslogtreecommitdiff
path: root/sh/cgm
blob: 6342393f79ec220e780e5390ff7ab62fe581fb0c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/bin/sh
# Curl Gnu Mailman Mailing list archive
# use with ggm

# steal AGPL-3.0-only licensed code from https://code.librehq.com/ots/ots-tools/-/blob/main/search-mailman-archive

# testing mailing list archives:
# https://lists.zx2c4.com/pipermail/cgit/
# https://mailman.nginx.org/pipermail/nginx/
# only year:
# https://dianne.skoll.ca/pipermail/remind-fans/

# misc urls:
# https://wiki.list.org/DOC/How%20do%20I%20make%20the%20archives%20searchable
# https://martin-thoma.com/how-to-analyze-mailman-archives/>

# maybe need "${1%/}"
url="$1"
dir="$(basename "$1")_mail_archives"

mkdir "$dir"

# Then parse it to get links to all the gzipped archive files for
# individual months.

# only tested with monthly and yearly archives, not tested with quarterly
for month in $(curl -L "$url" \
	| awk -F'"' '/href="[0-9]{4,4}(-[[:alnum:]]+)?\.txt(\.gz)?">\[/{print $2}'); do
	{
		curl -sS -L -o "$dir/$month" "$url/$month"
		echo "Fetched $month..."
		if [ "${month##*.}" = gz ]; then
			gunzip "$dir/$month"
		fi
	} &
done

wait