mirror of
https://github.com/kemayo/leech
synced 2025-12-06 16:33:16 +01:00
Helper for extracting form data from a soup
This commit is contained in:
parent
e9f704716a
commit
f57db3e1a8
1 changed files with 37 additions and 0 deletions
|
|
@ -161,6 +161,43 @@ class Site:
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
return BeautifulSoup(page.text, method)
|
return BeautifulSoup(page.text, method)
|
||||||
|
|
||||||
|
def _form_in_soup(self, soup):
|
||||||
|
if soup.name == 'form':
|
||||||
|
return soup
|
||||||
|
return soup.find('form')
|
||||||
|
|
||||||
|
def _form_data(self, soup):
|
||||||
|
data = {}
|
||||||
|
form = self._form_in_soup(soup)
|
||||||
|
if not form:
|
||||||
|
return data, '', ''
|
||||||
|
for tag in form.find_all('input'):
|
||||||
|
itype = tag.attrs.get('type', 'text')
|
||||||
|
name = tag.attrs.get('name')
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
value = tag.attrs.get('value', '')
|
||||||
|
if itype in ('checkbox', 'radio') and not tag.attrs.get('checked', False):
|
||||||
|
continue
|
||||||
|
data[name] = value
|
||||||
|
for select in form.find_all('select'):
|
||||||
|
# todo: multiple
|
||||||
|
name = select.attrs.get('name')
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
data[name] = ''
|
||||||
|
for option in select.find_all('option'):
|
||||||
|
value = option.attrs.get('value', '')
|
||||||
|
if value and option.attrs.get('selected'):
|
||||||
|
data[name] = value
|
||||||
|
for textarea in form.find_all('textarea'):
|
||||||
|
name = textarea.attrs.get('name')
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
data[name] = textarea.attrs.get('value', '')
|
||||||
|
|
||||||
|
return data, form.attrs.get('action'), form.attrs.get('method', 'get').lower()
|
||||||
|
|
||||||
def _new_tag(self, *args, **kw):
|
def _new_tag(self, *args, **kw):
|
||||||
soup = BeautifulSoup("", 'html5lib')
|
soup = BeautifulSoup("", 'html5lib')
|
||||||
return soup.new_tag(*args, **kw)
|
return soup.new_tag(*args, **kw)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue