diff options
author | Markus Koch <markus@notsyncing.net> | 2020-04-30 18:29:53 +0200 |
---|---|---|
committer | Markus Koch <markus@notsyncing.net> | 2020-04-30 18:29:53 +0200 |
commit | 3d0afc953ee0db0dc1d950724a0f9cd342e15361 (patch) | |
tree | 0574a3c79f3b1db853c7424224a6c342e7fa470a /scripts/geojson | |
parent | 9bdb4095a7b465d8332c3dfe60da66805e877ca5 (diff) | |
download | lifomapserver-3d0afc953ee0db0dc1d950724a0f9cd342e15361.tar.gz lifomapserver-3d0afc953ee0db0dc1d950724a0f9cd342e15361.tar.bz2 lifomapserver-3d0afc953ee0db0dc1d950724a0f9cd342e15361.zip |
Extract categories, description, and thumbnail from rendered HTML pages
Diffstat (limited to 'scripts/geojson')
-rwxr-xr-x | scripts/geojson/fetch_all_points.sh | 19 | ||||
-rwxr-xr-x | scripts/geojson/get_rendered_meta.sh | 49 |
2 files changed, 52 insertions, 16 deletions
diff --git a/scripts/geojson/fetch_all_points.sh b/scripts/geojson/fetch_all_points.sh index 81c46e0..d1ec504 100755 --- a/scripts/geojson/fetch_all_points.sh +++ b/scripts/geojson/fetch_all_points.sh @@ -22,22 +22,9 @@ for line in $data; do fi if [[ "$title" != "" && "$coords" != "" ]]; then echo "{\"type\": \"Feature\", \"properties\": {\"name\": \"$title\"," - echo -n "\"categories\": [" - urltitle=`echo "$title" | sed 's/ /%20/g'` - cjson=`curl -s "https://wiki.linux-forks.de/mediawiki/api.php?action=query&format=json&titles=$urltitle&prop=categories" | json_reformat -m` - categories=`echo "$cjson" | sed -n 's/.*\("categories":[^]]\+\).*/\1/p'`; - IFS='}' - fc="true"; - for entry in $categories; do - if [[ "$fc" != "true" ]]; then - echo -n ","; - fi - category=`echo "$entry" | sed -n 's/.*Category:\([^"]\+\).*/\1/p'` - echo -n "\"$category\""; - fc="false"; - done; - IFS=$'\n' - echo "]},"; + urltitle=`echo "$title" | sed 's/ /_/g'` + ./get_rendered_meta.sh "https://wiki.linux-forks.de/mediawiki/index.php/$urltitle" + echo "},"; echo "\"geometry\": {\"type\": \"Point\", \"coordinates\": [$coords]}},"; title=""; coords=""; diff --git a/scripts/geojson/get_rendered_meta.sh b/scripts/geojson/get_rendered_meta.sh new file mode 100755 index 0000000..120645c --- /dev/null +++ b/scripts/geojson/get_rendered_meta.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +BASE="https://wiki.linux-forks.de" + +mode="find_description" + +thumbnail="" +categories="" + +data=`curl -s "$1"` + +temp=`echo "$data" | sed -n 's/<p>\(.\+\).*/\1/p' | head -n1` +if [ "$temp" != "" ]; then + temp=`echo "$temp" | sed "s#href=\"#href=\"$BASE#g" | sed 's/"/\\\\"/g' | sed 's/\t//g'` + description="$temp" + mode="find_infobox"; +fi + +IFS=$'>'; +for line in $data; do + if [ "$mode" == "find_infobox" ]; then + if [ "`echo \"$line\" | grep 'infobox'`" != "" ]; then + mode="image"; + fi + elif [ "$mode" == "image" ]; then + temp=`echo "$line" | sed -n 's/.*img.*src="\([^"]\+\).*/\1/p'`; + if [ "$temp" != "" ]; then + thumbnail="$BASE$temp" + mode="find_cat" + fi + elif [ "$mode" == "find_cat" ]; then + if [ "`echo \"$line\" | grep 'mw-normal-catlinks'`" != "" ]; then + mode="cat"; + fi + elif [ "$mode" == "cat" ]; then + temp=`echo "$line" | sed -n 's/.*title="Category:\([^"]\+\).*/\1/pg' | grep -v 'page does not exist'` + if [ "$temp" != "" ]; then + if [ "$categories" != "" ]; then + categories="$categories," + fi + categories="$categories\"$temp\"" + fi + fi +done +IFS=" "; + +echo "\"categories\": [$categories]," +echo "\"image\": \"$thumbnail\"," +echo "\"description\": \"$description\"" |