From d8250fa7bf384a2c8ebcde6dcee72a0b579a9200 Mon Sep 17 00:00:00 2001
From: David Lynch <kemayo@gmail.com>
Date: Fri, 14 Aug 2015 01:03:04 -0500
Subject: [PATCH] Handle some threadmarks better

---
 sites/spacebattles.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/sites/spacebattles.py b/sites/spacebattles.py
index 1c83ddf..3f1a298 100644
--- a/sites/spacebattles.py
+++ b/sites/spacebattles.py
@@ -36,13 +36,19 @@ def extract(url, fetch):
         print("Extracting chapter from", href)
         match = re.match(r'posts/(\d+)/?', href)
         if not match:
-            print("Unparseable threadmark href", href)
-            return
-        postid = match.group(1)
+            match = re.match(r'.+#post-(\d+)$', href)
+            if not match:
+                print("Unparseable threadmark href", href)
+        chapter_postid = match and match.group(1)
         chapter_page = fetch(base + href)
         chapter_soup = BeautifulSoup(chapter_page, 'html5lib')
 
-        post = chapter_soup.find('li', id='post-'+postid).find('blockquote', class_='messageText')
+        if chapter_postid:
+            post = chapter_soup.find('li', id='post-'+chapter_postid)
+        else:
+            # just the first one in the thread, then
+            post = chapter_soup.find('li', class_='message')
+        post = post.find('blockquote', class_='messageText')
         post.name = 'div'
 
         chapters.append((str(mark.a.string), post.prettify()))