#!/bin/bash dir="$PWD" tmp="$dir/tmp" rm -r "$tmp" mkdir -p "$tmp" cd "$tmp" if [ "$1" == "treegen" ]; then shift a=$(echo "$2" | sed "s|$1||g") cd "$2" tree -a -T "profile.gigaset.net" -C -H http://profile.gigaset.net"$a" -L 1 | sed 's/net\/\.\//net\//g;s/class=\"DIR\" href=\"http\:\/\/profile\.gigaset\.net/class=\"DIR\" href=\"http\:\/\/daniil\.it\/gigaclone\/tree/g;s/<\/a>/<\/a>
/g;s/<\/a>

/<\/a>
/g' >index.html cd "$OLDPWD" exit fi extractlog() { tar -xzf $tmp/logs.tgz cd log for extractfile in ./*;do echo "Working... for $extractfile" sed -i '/200\|206\|302\|301/!d;s/\sHTTP.*//g;s/.*\s/http:\/\/profile\.gigaset\.net/g;/http:\/\/profile\.gigaset\.net\"\"/d;s/?.*//g;s/\.net\/device/\.net\/chagall/g;s/^\.$//g' $extractfile echo "Remove duplicates for $extractfile" awk '!seen[$0]++' $extractfile>>$tmp/tmp done md5sum $tmp/logs.tgz >$dir/.md5sum rm $tmp/logs.tgz rdupes $tmp/tmp } rdupes() { if [ "$1" != "" ];then echo "Remove all duplicates for $*" awk '!seen[$0]++' $* $dir/urllist >$dir/final mv $dir/final $dir/urllist rm $* fi sed -i 's/\/\//\//g;s/\/$//g;s/http:\//http:\/\//g;s/http:\/\/\//http:\/\//g' $dir/urllist while read dl; do a=$(curl -w "%{url_effective}\n" -L -f -s -I -S "$dl" -o /dev/null) [ $? = "0" ] && echo "$a" >> $dir/final done < $dir/urllist mv $dir/final $dir/urllist awk '!seen[$0]++' $dir/urllist >$dir/final mv $dir/final $dir/urllist } extractbin() { echo "Extracting urls..." cd $tmp grep "\.bin" $dir/urllist | while read currenturl;do if [ $(wget -S --spider "$currenturl" 2>&1 | sed '/Length/!d;s/Length\: //g;s/\s.*//g;s/\s//g;s/[^0-9]*//g' | tr -d "\n") -lt 52428800 ]; then file=$(wget -qO- "$currenturl" | strings) echo "$file" | sed '/http:\/\//!d;/profile.gigaset.net\|update.gigaset.net/!d;s/.*http:\/\//http:\/\//g;s/update\.gigaset/profile\.gigaset/g'>>$tmp/tmp baseurl=$(dirname "$currenturl")/ url=$(echo "$file" | sed '/http/d;/\.bin/!d') for e in $url;do echo "$baseurl$e">>$tmp/tmp;done fi done rdupes $tmp/tmp } treegen() { echo "Creating tree..." cd $dir rm -r tree mkdir tree cd tree sed 's/http:\/\/profile.gigaset.net\///g;s/^\/*//g' $dir/urllist | while read f;do f=$(echo "$f" | awk -niord '{printf RT?$0chr("0x"substr(RT,2)):$0}' RS=%..) mkdir -p "$(dirname "$f")"&>/dev/null touch "$f" done find $PWD -type d -exec bash $dir/mkurllist treegen "$PWD" "{}" \; find . -not -iname "index.html" -type f -a -not -name "." -exec rm "{}" \; } { wget http://profile.gigaset.net/logs.tgz -qO $tmp/logs.tgz [ "$(md5sum $tmp/logs.tgz)" != "$(cat $dir/.md5sum)" ] && { extractlog extractbin } cd $dir sort urllist > final mv final urllist treegen cd $dir rm -rf $tmp/ mkdir $tmp git clone https://$GH_TOKEN@github.com/danog/gigaclone.git $tmp/git cd $tmp/git rm -r tree cp -a $dir/* . rm -rf tmp gigaclone.log git add -A git commit -m "Updated url list" git push origin master &>/dev/null git checkout gh-pages rm -r tree cp -a $dir/tree . git add -A git commit -m "Updated dir tree" git push origin gh-pages &>/dev/null cd $dir echo "Clean up." rm -rf $tmp } &> $dir/gigaclone.log