blob: 7ffdd46ed6e37789a52c9fbb9f47a047d9088816 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
#!/bin/sh
# Curl Gnu Mailman Mailing list archive
# use with ggm
# steal AGPL-3.0-only licensed code from https://code.librehq.com/ots/ots-tools/-/blob/main/search-mailman-archive
# testing mailing list archives:
# https://lists.zx2c4.com/pipermail/cgit/
# https://mailman.nginx.org/pipermail/nginx/
# only year:
# https://dianne.skoll.ca/pipermail/remind-fans/
# misc urls:
# https://wiki.list.org/DOC/How%20do%20I%20make%20the%20archives%20searchable
# https://martin-thoma.com/how-to-analyze-mailman-archives/
# maybe need "${1%/}"
url="$1"
dir="$(basename "$1")_mail_archives"
mkdir "$dir"
# Then parse it to get links to all the gzipped archive files for
# individual months.
# only tested with monthly and yearly archives, not tested with quarterly
for month in $(curl -L "$url" \
| awk -F'"' '/href="[0-9]{4,4}(-[[:alnum:]]+)?\.txt(\.gz)?">\[/{print $2}'); do
{
curl -sS -L -o "$dir/$month" "$url/$month"
echo "Fetched $month..."
if [ "${month##*.}" = gz ]; then
gunzip "$dir/$month"
fi
} &
done
wait
|