Create script for downloading pokemon images

The python script scraps a bulbapedia page searching do the pokemon images and their names.
2015-06-10 23:43:08 -03:00
commit 3fe6993974
2 changed files with 2220 additions and 0 deletions
--- a/scrap_pokemon.py
+++ b/scrap_pokemon.py
@@ -0,0 +1,33 @@
+#!/usr/bin/python3
+
+import re
+import sys
+import html
+import urllib.request
+
+# Load the pokemon sprites page.
+with open('scrapped-data/bulbapedia.html', 'r') as page:
+	html_page = "".join(page.readlines());
+
+# Find all pokemon name and image urls.
+image_regex = r'<img alt="(.*)" src="(http://cdn.bulbagarden.net/upload/.*\.png)" width="40" height="40" />'
+all_pokemon = re.findall(image_regex, html_page)
+
+# Save the image of each pokemon.
+for pokemon in all_pokemon:
+	# Unpack the tuple data.
+	name, image_url = pokemon
+
+	# Clean HTML escape sequences in the name
+	name = html.unescape(name)
+
+	# Set file path for the image.
+	file_path = './scrapped-data/' + name + '.png'
+
+	# Tell the user what we are doing here.
+	print('Downloading "{}" image to "{}"...'.format(name, file_path))
+
+	# Download the image.
+	with open(file_path, 'wb') as pokemon_file:
+		with urllib.request.urlopen(image_url) as pokemon_image:
+			pokemon_file.write(pokemon_image.read())
--- a/scrapped-data/bulbapedia.html
+++ b/scrapped-data/bulbapedia.html