summaryrefslogtreecommitdiff
path: root/scripts/geojson/get_rendered_meta.sh
diff options
context:
space:
mode:
authorMarkus Koch <markus@notsyncing.net>2020-04-30 18:29:53 +0200
committerMarkus Koch <markus@notsyncing.net>2020-04-30 18:29:53 +0200
commit3d0afc953ee0db0dc1d950724a0f9cd342e15361 (patch)
tree0574a3c79f3b1db853c7424224a6c342e7fa470a /scripts/geojson/get_rendered_meta.sh
parent9bdb4095a7b465d8332c3dfe60da66805e877ca5 (diff)
downloadlifomapserver-3d0afc953ee0db0dc1d950724a0f9cd342e15361.tar.gz
lifomapserver-3d0afc953ee0db0dc1d950724a0f9cd342e15361.tar.bz2
lifomapserver-3d0afc953ee0db0dc1d950724a0f9cd342e15361.zip
Extract categories, description, and thumbnail from rendered HTML pages
Diffstat (limited to 'scripts/geojson/get_rendered_meta.sh')
-rwxr-xr-xscripts/geojson/get_rendered_meta.sh49
1 files changed, 49 insertions, 0 deletions
diff --git a/scripts/geojson/get_rendered_meta.sh b/scripts/geojson/get_rendered_meta.sh
new file mode 100755
index 0000000..120645c
--- /dev/null
+++ b/scripts/geojson/get_rendered_meta.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+BASE="https://wiki.linux-forks.de"
+
+mode="find_description"
+
+thumbnail=""
+categories=""
+
+data=`curl -s "$1"`
+
+temp=`echo "$data" | sed -n 's/<p>\(.\+\).*/\1/p' | head -n1`
+if [ "$temp" != "" ]; then
+ temp=`echo "$temp" | sed "s#href=\"#href=\"$BASE#g" | sed 's/"/\\\\"/g' | sed 's/\t//g'`
+ description="$temp"
+ mode="find_infobox";
+fi
+
+IFS=$'>';
+for line in $data; do
+ if [ "$mode" == "find_infobox" ]; then
+ if [ "`echo \"$line\" | grep 'infobox'`" != "" ]; then
+ mode="image";
+ fi
+ elif [ "$mode" == "image" ]; then
+ temp=`echo "$line" | sed -n 's/.*img.*src="\([^"]\+\).*/\1/p'`;
+ if [ "$temp" != "" ]; then
+ thumbnail="$BASE$temp"
+ mode="find_cat"
+ fi
+ elif [ "$mode" == "find_cat" ]; then
+ if [ "`echo \"$line\" | grep 'mw-normal-catlinks'`" != "" ]; then
+ mode="cat";
+ fi
+ elif [ "$mode" == "cat" ]; then
+ temp=`echo "$line" | sed -n 's/.*title="Category:\([^"]\+\).*/\1/pg' | grep -v 'page does not exist'`
+ if [ "$temp" != "" ]; then
+ if [ "$categories" != "" ]; then
+ categories="$categories,"
+ fi
+ categories="$categories\"$temp\""
+ fi
+ fi
+done
+IFS=" ";
+
+echo "\"categories\": [$categories],"
+echo "\"image\": \"$thumbnail\","
+echo "\"description\": \"$description\""