Update adapter_wuxiaworldco.py

Strip non-numeric characters from chapter number before parsing as number. However, allows decimals and parses numbers as floats. Consequently, 'chapter 10.5 - xxx' still can be sorted into correct place.
This commit is contained in:
teffalump 2020-08-17 15:50:27 -07:00 committed by GitHub
parent a7b71b94fd
commit b73093584f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -102,19 +102,18 @@ class WuxiaWorldCoSiteAdapter(BaseSiteAdapter):
# Sort and deduplicate chapters (some stories in incorrect order and/or duplicates)
chapters_data = []
numbers_regex = re.compile('[^0-9\.]') # Everything except decimal and numbers
for ch in chapters:
chapter_title = ch.p.get_text()
chapter_url = ch['href']
if chapter_title.startswith('Chapter'):
try:
number = int(chapter_title.split()[1])
except:
continue
target_number = chapter_title.split()[1]
else:
try:
number = int(chapter_title.split()[0])
except:
continue
target_number = chapter_title.split()[0]
try:
number = float(re.sub(numbers_regex, '', target_number))
except:
continue # Cannot parse chapter number
chapters_data.append((number, chapter_title, chapter_url))
chapters_data.sort(key=lambda ch: ch[0])