Comic Scrape

So last night I decided I wanted to archive some web comics I used to read religiously in a format that I could later manipulate into an easy to read format so I can catch up on a few years of missed material without clicking Next a ton of times, thus was born cscrape!

#!/usr/bin/python
import requests
from bs4 import BeautifulSoup
import xml.etree.cElementTree as ET

def make_xml_file(comic_image, comic_date, comic_description):
    root = ET.Element("root")
    doc = ET.SubElement(root, "comic")

    ET.SubElement(doc, "image").text = comic_image
    ET.SubElement(doc, "date").text = comic_date
    ET.SubElement(doc, "description").text = comic_description

    tree = ET.ElementTree(root)
    filename = 'comics/'+comic_date+".xml"
    tree.write(filename)

start_url = 'url_goes_here'

url = start_url

while True:
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')

    for link in soup.find_all('meta'):
        if link.get('property') == 'og:title':
            comic_title = link.get('content')
        if link.get('property') == 'og:image':
            comic_image = link.get('content')
        if link.get('property') == 'og:description':
            comic_description = link.get('content')
        if link.get('property') == 'og:site_name':
            comic_name = link.get('content')
        if link.get('property') == 'og:article:published_time':
            comic_date = link.get('content')
    filename_split = comic_image.split('/')
    comic_filename = filename_split[4]
    make_xml_file(comic_filename, comic_date, comic_description)
    r_img = requests.get(comic_image)
    with open('comics/'+comic_filename, "wb") as fh:
        fh.write(r.content)

    for link in soup.find_all(rel='next'):
        print link.get('href')
    if soup.find_all(rel='next'):
        comic_next = link.get('href')
        url = comic_next
    else:
        break

It spits out the images and an accompanying XML file in the comics/ directory so that later I can write something else to process them into an easy to digest format to view say on a tablet or mobile phone for comfortable reading while kicked back in my recliner. Unfortunately for right now its fairly specific to a certain comic however I hope to be able make the code a bit more flexible in the future to allow working with any comic and perhaps roll in the functionality to process the scraped data into easy to read formats.