diff options
author | Markus Koch <markus@notsyncing.net> | 2020-04-30 18:29:53 +0200 |
---|---|---|
committer | Markus Koch <markus@notsyncing.net> | 2020-04-30 18:29:53 +0200 |
commit | 3d0afc953ee0db0dc1d950724a0f9cd342e15361 (patch) | |
tree | 0574a3c79f3b1db853c7424224a6c342e7fa470a /scripts/geojson/get_rendered_meta.sh | |
parent | 9bdb4095a7b465d8332c3dfe60da66805e877ca5 (diff) | |
download | lifomapserver-3d0afc953ee0db0dc1d950724a0f9cd342e15361.tar.gz lifomapserver-3d0afc953ee0db0dc1d950724a0f9cd342e15361.tar.bz2 lifomapserver-3d0afc953ee0db0dc1d950724a0f9cd342e15361.zip |
Extract categories, description, and thumbnail from rendered HTML pages
Diffstat (limited to 'scripts/geojson/get_rendered_meta.sh')
-rwxr-xr-x | scripts/geojson/get_rendered_meta.sh | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/scripts/geojson/get_rendered_meta.sh b/scripts/geojson/get_rendered_meta.sh new file mode 100755 index 0000000..120645c --- /dev/null +++ b/scripts/geojson/get_rendered_meta.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +BASE="https://wiki.linux-forks.de" + +mode="find_description" + +thumbnail="" +categories="" + +data=`curl -s "$1"` + +temp=`echo "$data" | sed -n 's/<p>\(.\+\).*/\1/p' | head -n1` +if [ "$temp" != "" ]; then + temp=`echo "$temp" | sed "s#href=\"#href=\"$BASE#g" | sed 's/"/\\\\"/g' | sed 's/\t//g'` + description="$temp" + mode="find_infobox"; +fi + +IFS=$'>'; +for line in $data; do + if [ "$mode" == "find_infobox" ]; then + if [ "`echo \"$line\" | grep 'infobox'`" != "" ]; then + mode="image"; + fi + elif [ "$mode" == "image" ]; then + temp=`echo "$line" | sed -n 's/.*img.*src="\([^"]\+\).*/\1/p'`; + if [ "$temp" != "" ]; then + thumbnail="$BASE$temp" + mode="find_cat" + fi + elif [ "$mode" == "find_cat" ]; then + if [ "`echo \"$line\" | grep 'mw-normal-catlinks'`" != "" ]; then + mode="cat"; + fi + elif [ "$mode" == "cat" ]; then + temp=`echo "$line" | sed -n 's/.*title="Category:\([^"]\+\).*/\1/pg' | grep -v 'page does not exist'` + if [ "$temp" != "" ]; then + if [ "$categories" != "" ]; then + categories="$categories," + fi + categories="$categories\"$temp\"" + fi + fi +done +IFS=" "; + +echo "\"categories\": [$categories]," +echo "\"image\": \"$thumbnail\"," +echo "\"description\": \"$description\"" |