Python3 compatible mode.

This commit is contained in:
László Velinszky 2020-08-04 18:58:36 -07:00
parent b273ad32eb
commit da84071de2

View file

@ -16,25 +16,39 @@ import requests
import subprocess import subprocess
import json import json
import argparse import argparse
import cookielib try:
import ConfigParser import http.cookiejar as cookielib
from configparser import RawConfigParser
from urllib.parse import quote_plus
except ImportError:
import cookielib
from ConfigParser import RawConfigParser
from urllib import quote_plus
try:
input = raw_input
except NameError:
pass
import os import os
import urllib
import math import math
import urllib2
import string import string
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from thready import threaded
reload(sys) #from thready import threaded
sys.setdefaultencoding('utf-8')
try:
reload(sys)
sys.setdefaultencoding('utf-8')
except Exception:
pass
""" Setup Argument Parameters """ """ Setup Argument Parameters """
parser = argparse.ArgumentParser(description='Discovery LinkedIn') parser = argparse.ArgumentParser(description='Discovery LinkedIn')
parser.add_argument('-u', '--keywords', help='Keywords to search') parser.add_argument('-u', '--keywords', help='Keywords to search')
parser.add_argument('-o', '--output', help='Output file (do not include extentions)') parser.add_argument('-o', '--output', help='Output file (do not include extentions)')
args = parser.parse_args() args = parser.parse_args()
config = ConfigParser.RawConfigParser() config = RawConfigParser()
config.read('LinkedInt.cfg') config.read('LinkedInt.cfg')
api_key = config.get('API_KEYS', 'hunter') api_key = config.get('API_KEYS', 'hunter')
username = config.get('CREDS', 'linkedin_username') username = config.get('CREDS', 'linkedin_username')
@ -58,11 +72,11 @@ def login():
} }
rv = s.post(URL + '/checkpoint/lg/login-submit', data=postdata) rv = s.post(URL + '/checkpoint/lg/login-submit', data=postdata)
try: try:
cookie = requests.utils.dict_from_cookiejar(s.cookies) cookie = requests.utils.dict_from_cookiejar(s.cookies)
cookie = cookie['li_at'] cookie = cookie['li_at']
except: except:
print "[!] Cannot log in" print("[!] Cannot log in")
sys.exit(0) sys.exit(0)
return cookie return cookie
def get_search(): def get_search():
@ -109,46 +123,46 @@ def get_search():
# Do we want to automatically get the company ID? # Do we want to automatically get the company ID?
if bCompany: if bCompany:
if bAuto: if bAuto:
# Automatic # Automatic
# Grab from the URL # Grab from the URL
companyID = 0 companyID = 0
url = "https://www.linkedin.com/voyager/api/typeahead/hits?q=blended&query=%s" % search url = "https://www.linkedin.com/voyager/api/typeahead/hits?q=blended&query={}".format(search)
headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'} headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'}
cookies['JSESSIONID'] = 'ajax:0397788525211216808' cookies['JSESSIONID'] = 'ajax:0397788525211216808'
r = requests.get(url, cookies=cookies, headers=headers) r = requests.get(url, cookies=cookies, headers=headers)
content = json.loads(r.text) content = json.loads(r.text)
firstID = 0 firstID = 0
for i in range(0,len(content['elements'])): for i in range(0,len(content['elements'])):
try: try:
companyID = content['elements'][i]['hitInfo']['com.linkedin.voyager.typeahead.TypeaheadCompany']['id'] companyID = content['elements'][i]['hitInfo']['com.linkedin.voyager.typeahead.TypeaheadCompany']['id']
if firstID == 0: if firstID == 0:
firstID = companyID firstID = companyID
print "[Notice] Found company ID: %s" % companyID print("[Notice] Found company ID: {}".format(companyID))
except: except:
continue continue
companyID = firstID companyID = firstID
if companyID == 0: if companyID == 0:
print "[WARNING] No valid company ID found in auto, please restart and find your own" print("[WARNING] No valid company ID found in auto, please restart and find your own")
else: else:
# Don't auto, use the specified ID # Don't auto, use the specified ID
companyID = bSpecific companyID = bSpecific
print print()
print "[*] Using company ID: %s" % companyID print("[*] Using company ID: {}".format(companyID))
# Fetch the initial page to get results/page counts # Fetch the initial page to get results/page counts
if bCompany == False: if bCompany == False:
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=0" % search url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords={}&origin=OTHER&q=guided&start=0".format(search)
else: else:
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=0" % (companyID) url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->{})&origin=OTHER&q=guided&start=0".format(companyID)
print url print(url)
headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'} headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'}
cookies['JSESSIONID'] = 'ajax:0397788525211216808' cookies['JSESSIONID'] = 'ajax:0397788525211216808'
#print url
r = requests.get(url, cookies=cookies, headers=headers) r = requests.get(url, cookies=cookies, headers=headers)
content = json.loads(r.text) content = json.loads(r.text)
data_total = content['elements'][0]['total'] data_total = content['elements'][0]['total']
@ -157,34 +171,34 @@ def get_search():
pages = int(math.ceil(data_total / 40.0)) pages = int(math.ceil(data_total / 40.0))
if pages == 0: if pages == 0:
pages = 1 pages = 1
if data_total % 40 == 0: if data_total % 40 == 0:
# Becuase we count 0... Subtract a page if there are no left over results on the last page # Becuase we count 0... Subtract a page if there are no left over results on the last page
pages = pages - 1 pages = pages - 1
if pages == 0: if pages == 0:
print "[!] Try to use quotes in the search name" print("[!] Try to use quotes in the search name")
sys.exit(0) sys.exit(0)
print "[*] %i Results Found" % data_total print("[*] {} Results Found".format(data_total))
if data_total > 1000: if data_total > 1000:
pages = 25 pages = 25
print "[*] LinkedIn only allows 1000 results. Refine keywords to capture all data" print("[*] LinkedIn only allows 1000 results. Refine keywords to capture all data")
print "[*] Fetching %i Pages" % pages print("[*] Fetching {} Pages".format(pages))
print print()
for p in range(pages): for p in range(pages):
# Request results for each page using the start offset # Request results for each page using the start offset
if bCompany == False: if bCompany == False:
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=%i" % (search, p*40) url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords={}&origin=OTHER&q=guided&start={}".format(search, p*40)
else: else:
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=%i" % (companyID, p*40) url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->{})&origin=OTHER&q=guided&start={}".format(companyID, p*40)
#print url
r = requests.get(url, cookies=cookies, headers=headers) r = requests.get(url, cookies=cookies, headers=headers)
content = r.text.encode('UTF-8') content = r.text.encode('UTF-8')
content = json.loads(content) content = json.loads(content)
print "[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements'])) print("[*] Fetching page {} with {} results".format((p),len(content['elements'][0]['elements'])))
for c in content['elements'][0]['elements']: for c in content['elements'][0]['elements']:
if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False: if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False:
try: try:
@ -193,13 +207,13 @@ def get_search():
data_industry = "" data_industry = ""
data_firstname = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['firstName'] data_firstname = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['firstName']
data_lastname = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['lastName'] data_lastname = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['lastName']
data_slug = "https://www.linkedin.com/in/%s" % c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['publicIdentifier'] data_slug = "https://www.linkedin.com/in/{}".format(c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['publicIdentifier'])
data_occupation = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['occupation'] data_occupation = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['occupation']
data_location = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['location'] data_location = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['location']
try: try:
data_picture = "%s%s" % (c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['rootUrl'],c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['artifacts'][2]['fileIdentifyingUrlPathSegment']) data_picture = "{}{}".format(c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['rootUrl'],c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['artifacts'][2]['fileIdentifyingUrlPathSegment'])
except: except:
print "[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation) print("[*] No picture found for {} {}, {}".format(data_firstname, data_lastname, data_occupation))
data_picture = "" data_picture = ""
# incase the last name is multi part, we will split it down # incase the last name is multi part, we will split it down
@ -257,68 +271,68 @@ def get_search():
else: else:
user = '{}{}{}'.format(fname[0], mname[0], lname) user = '{}{}{}'.format(fname[0], mname[0], lname)
if prefix == 'lastfirst': if prefix == 'lastfirst':
user = '{}{}'.format(lname, fname) user = '{}{}'.format(lname, fname)
email = '{}@{}'.format(user, suffix) email = '{}@{}'.format(user, suffix)
body += "<tr>" \ body += "<tr>" \
"<td><a href=\"%s\"><img src=\"%s\" width=200 height=200></a></td>" \ "<td><a href=\"{}\"><img src=\"{}\" width=200 height=200></a></td>" \
"<td><a href=\"%s\">%s</a></td>" \ "<td><a href=\"{}\">{}</a></td>" \
"<td>%s</td>" \ "<td>{}</td>" \
"<td>%s</td>" \ "<td>{}</td>" \
"<td>%s</td>" \ "<td>{}</td>" \
"<a>" % (data_slug, data_picture, data_slug, name, email, data_occupation, data_location) "<a>".format(data_slug, data_picture, data_slug, name, email, data_occupation, data_location)
csv.append('"%s","%s","%s","%s","%s", "%s"' % (data_firstname, data_lastname, name, email, data_occupation, data_location.replace(",",";"))) csv.append('"{}","{}","{}","{}","{}", "{}"'.format(data_firstname, data_lastname, name, email, data_occupation, data_location.replace(",",";")))
foot = "</table></center>" foot = "</table></center>"
f = open('{}.html'.format(outfile), 'wb') f = open('{}.html'.format(outfile), 'w')
f.write(css) f.write(css)
f.write(header) f.write(header)
f.write(body) f.write(body)
f.write(foot) f.write(foot)
f.close() f.close()
f = open('{}.csv'.format(outfile), 'wb') f = open('{}.csv'.format(outfile), 'w')
f.writelines('\n'.join(csv)) f.writelines('\n'.join(csv))
f.close() f.close()
else: else:
print "[!] Headless profile found. Skipping" print("[!] Headless profile found. Skipping")
print print()
def banner(): def banner():
with open('banner.txt', 'r') as f: with open('banner.txt', 'r') as f:
data = f.read() data = f.read()
print "\033[1;31m%s\033[0;0m" % data print("\033[1;31m{}\033[0;0m".format(data))
print "\033[1;34mProviding you with Linkedin Intelligence" print("\033[1;34mProviding you with Linkedin Intelligence")
print "\033[1;32mAuthor: Vincent Yiu (@vysec, @vysecurity)\033[0;0m" print("\033[1;32mAuthor: Vincent Yiu (@vysec, @vysecurity)\033[0;0m")
print "\033[1;32mOriginal version by @DisK0nn3cT\033[0;0m" print("\033[1;32mOriginal version by @DisK0nn3cT\033[0;0m")
def authenticate(): def authenticate():
try: try:
a = login() a = login()
print a print(a)
session = a session = a
if len(session) == 0: if len(session) == 0:
sys.exit("[!] Unable to login to LinkedIn.com") sys.exit("[!] Unable to login to LinkedIn.com")
print "[*] Obtained new session: %s" % session print("[*] Obtained new session: {}".format(session))
cookies = dict(li_at=session) cookies = dict(li_at=session)
except Exception, e: except Exception as e:
sys.exit("[!] Could not authenticate to linkedin. %s" % e) sys.exit("[!] Could not authenticate to linkedin. {}".format(e))
return cookies return cookies
if __name__ == '__main__': if __name__ == '__main__':
banner() banner()
# Prompt user for data variables # Prompt user for data variables
search = args.keywords if args.keywords!=None else raw_input("[*] Enter search Keywords (use quotes for more precise results)\n") search = args.keywords if args.keywords!=None else input("[*] Enter search Keywords (use quotes for more precise results)\n")
print print()
outfile = args.output if args.output!=None else raw_input("[*] Enter filename for output (exclude file extension)\n") outfile = args.output if args.output!=None else input("[*] Enter filename for output (exclude file extension)\n")
print print()
while True: while True:
bCompany = raw_input("[*] Filter by Company? (Y/N): \n") bCompany = input("[*] Filter by Company? (Y/N): \n")
if bCompany.lower() == "y" or bCompany.lower() == "n": if bCompany.lower() == "y" or bCompany.lower() == "n":
break break
else: else:
print "[!] Incorrect choice" print("[!] Incorrect choice")
if bCompany.lower() == "y": if bCompany.lower() == "y":
bCompany = True bCompany = True
@ -330,87 +344,84 @@ if __name__ == '__main__':
prefix = "" prefix = ""
suffix = "" suffix = ""
print print()
if bCompany: if bCompany:
while True: while True:
bSpecific = raw_input("[*] Specify a Company ID (Provide ID or leave blank to automate): \n") bSpecific = input("[*] Specify a Company ID (Provide ID or leave blank to automate): \n")
if bSpecific != "": if bSpecific != "":
bAuto = False bAuto = False
if bSpecific != 0: if bSpecific != 0:
try: try:
int(bSpecific) int(bSpecific)
break break
except: except:
print "[!] Incorrect choice, the ID either has to be a number or blank" print("[!] Incorrect choice, the ID either has to be a number or blank")
else:
else: print("[!] Incorrect choice, the ID either has to be a number or blank")
print "[!] Incorrect choice, the ID either has to be a number or blank" else:
else: bAuto = True
bAuto = True break
break print()
print
while True: while True:
suffix = raw_input("[*] Enter e-mail domain suffix (eg. contoso.com): \n") suffix = input("[*] Enter e-mail domain suffix (eg. contoso.com): \n")
suffix = suffix.lower() suffix = suffix.lower()
if "." in suffix: if "." in suffix:
break break
else: else:
print "[!] Incorrect e-mail? There's no dot" print("[!] Incorrect e-mail? There's no dot")
print print()
while True: while True:
prefix = raw_input("[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast,lastfirst): \n") prefix = input("[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast,lastfirst): \n")
prefix = prefix.lower() prefix = prefix.lower()
print print()
if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst": if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
break break
elif prefix == "auto": elif prefix == "auto":
#if auto prefix then we want to use hunter IO to find it. #if auto prefix then we want to use hunter IO to find it.
print "[*] Automatically using Hunter IO to determine best Prefix" print("[*] Automatically using Hunter IO to determine best Prefix")
url = "https://hunter.io/trial/v2/domain-search?offset=0&domain=%s&format=json" % suffix url = "https://hunter.io/trial/v2/domain-search?offset=0&domain={}&format=json".format(suffix)
r = requests.get(url) r = requests.get(url)
content = json.loads(r.text) content = json.loads(r.text)
if "status" in content: if "status" in content:
print "[!] Rate limited by Hunter IO trial" print("[!] Rate limited by Hunter IO trial")
url = "https://api.hunter.io/v2/domain-search?domain=%s&api_key=%s" % (suffix, api_key) url = "https://api.hunter.io/v2/domain-search?domain={}&api_key={}".format(suffix, api_key)
#print url
r = requests.get(url) r = requests.get(url)
content = json.loads(r.text) content = json.loads(r.text)
if "status" in content: if "status" in content:
print "[!] Rate limited by Hunter IO Key" print("[!] Rate limited by Hunter IO Key")
continue continue
#print content #print content
prefix = content['data']['pattern'] prefix = content['data']['pattern']
print "[!] %s" % prefix print("[!] {}".format(prefix))
if prefix: if prefix:
prefix = prefix.replace("{","").replace("}", "") prefix = prefix.replace("{","").replace("}", "")
if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst": if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
print "[+] Found %s prefix" % prefix print("[+] Found {} prefix".format(prefix))
break break
else: else:
print "[!] Automatic prefix search failed, please insert a manual choice" print("[!] Automatic prefix search failed, please insert a manual choice")
continue continue
else: else:
print "[!] Automatic prefix search failed, please insert a manual choice" print("[!] Automatic prefix search failed, please insert a manual choice")
continue continue
else: else:
print "[!] Incorrect choice, please select a value from (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast)" print("[!] Incorrect choice, please select a value from (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast)")
print print()
# URL Encode for the querystring # URL Encode for the querystring
search = urllib.quote_plus(search) search = quote_plus(search)
cookies = authenticate() cookies = authenticate()
# Initialize Scraping # Initialize Scraping
get_search() get_search()
print "[+] Complete" print("[+] Complete")