[REFACTORING] Python3 compatibility added

This commit is contained in:
Harshil Shah 2020-11-12 04:12:50 +05:30
parent b273ad32eb
commit b25737d458
2 changed files with 76 additions and 100 deletions

View file

@ -16,25 +16,23 @@ import requests
import subprocess import subprocess
import json import json
import argparse import argparse
import cookielib import http.cookiejar
import ConfigParser import configparser
import os import os
import urllib import urllib
import math import math
import urllib2 from urllib.request import urlopen
import string import string
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from thready import threaded import urllib.parse
reload(sys)
sys.setdefaultencoding('utf-8')
""" Setup Argument Parameters """ """ Setup Argument Parameters """
parser = argparse.ArgumentParser(description='Discovery LinkedIn') parser = argparse.ArgumentParser(description='Discovery LinkedIn')
parser.add_argument('-u', '--keywords', help='Keywords to search') parser.add_argument('-u', '--keywords', help='Keywords to search')
parser.add_argument('-o', '--output', help='Output file (do not include extentions)') parser.add_argument('-o', '--output', help='Output file (do not include extentions)')
args = parser.parse_args() args = parser.parse_args()
config = ConfigParser.RawConfigParser() config = configparser.RawConfigParser()
config.read('LinkedInt.cfg') config.read('LinkedInt.cfg')
api_key = config.get('API_KEYS', 'hunter') api_key = config.get('API_KEYS', 'hunter')
username = config.get('CREDS', 'linkedin_username') username = config.get('CREDS', 'linkedin_username')
@ -61,7 +59,7 @@ def login():
cookie = requests.utils.dict_from_cookiejar(s.cookies) cookie = requests.utils.dict_from_cookiejar(s.cookies)
cookie = cookie['li_at'] cookie = cookie['li_at']
except: except:
print "[!] Cannot log in" print("[!] Cannot log in")
sys.exit(0) sys.exit(0)
return cookie return cookie
@ -106,12 +104,8 @@ def get_search():
</tr> </tr>
""" """
# Do we want to automatically get the company ID?
if bCompany: if bCompany:
if bAuto: if bAuto:
# Automatic
# Grab from the URL
companyID = 0 companyID = 0
url = "https://www.linkedin.com/voyager/api/typeahead/hits?q=blended&query=%s" % search url = "https://www.linkedin.com/voyager/api/typeahead/hits?q=blended&query=%s" % search
headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'} headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'}
@ -124,67 +118,60 @@ def get_search():
companyID = content['elements'][i]['hitInfo']['com.linkedin.voyager.typeahead.TypeaheadCompany']['id'] companyID = content['elements'][i]['hitInfo']['com.linkedin.voyager.typeahead.TypeaheadCompany']['id']
if firstID == 0: if firstID == 0:
firstID = companyID firstID = companyID
print "[Notice] Found company ID: %s" % companyID print("[Notice] Found company ID: %s" % companyID)
except: except:
continue continue
companyID = firstID companyID = firstID
if companyID == 0: if companyID == 0:
print "[WARNING] No valid company ID found in auto, please restart and find your own" print("[WARNING] No valid company ID found in auto, please restart and find your own")
else: else:
# Don't auto, use the specified ID
companyID = bSpecific companyID = bSpecific
print print("")
print "[*] Using company ID: %s" % companyID print("[*] Using company ID: %s" % companyID)
# Fetch the initial page to get results/page counts
if bCompany == False: if bCompany == False:
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=0" % search url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=0" % search
else: else:
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=0" % (companyID) url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=0" % (companyID)
print url print(url)
headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'} headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'}
cookies['JSESSIONID'] = 'ajax:0397788525211216808' cookies['JSESSIONID'] = 'ajax:0397788525211216808'
#print url
r = requests.get(url, cookies=cookies, headers=headers) r = requests.get(url, cookies=cookies, headers=headers)
content = json.loads(r.text) content = json.loads(r.text)
data_total = content['elements'][0]['total'] data_total = content['elements'][0]['total']
# Calculate pages off final results at 40 results/page
pages = int(math.ceil(data_total / 40.0)) pages = int(math.ceil(data_total / 40.0))
if pages == 0: if pages == 0:
pages = 1 pages = 1
if data_total % 40 == 0: if data_total % 40 == 0:
# Becuase we count 0... Subtract a page if there are no left over results on the last page
pages = pages - 1 pages = pages - 1
if pages == 0: if pages == 0:
print "[!] Try to use quotes in the search name" print("[!] Try to use quotes in the search name")
sys.exit(0) sys.exit(0)
print "[*] %i Results Found" % data_total print("[*] %i Results Found" % data_total)
if data_total > 1000: if data_total > 1000:
pages = 25 pages = 25
print "[*] LinkedIn only allows 1000 results. Refine keywords to capture all data" print("[*] LinkedIn only allows 1000 results. Refine keywords to capture all data")
print "[*] Fetching %i Pages" % pages print("[*] Fetching %i Pages" % pages)
print print("")
for p in range(pages): for p in range(pages):
# Request results for each page using the start offset
if bCompany == False: if bCompany == False:
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=%i" % (search, p*40) url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=%i" % (search, p*40)
else: else:
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=%i" % (companyID, p*40) url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=%i" % (companyID, p*40)
#print url
r = requests.get(url, cookies=cookies, headers=headers) r = requests.get(url, cookies=cookies, headers=headers)
content = r.text.encode('UTF-8') content = r.text.encode('UTF-8')
content = json.loads(content) content = json.loads(content)
print "[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements'])) print("[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements'])))
for c in content['elements'][0]['elements']: for c in content['elements'][0]['elements']:
if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False: if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False:
try: try:
@ -199,10 +186,10 @@ def get_search():
try: try:
data_picture = "%s%s" % (c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['rootUrl'],c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['artifacts'][2]['fileIdentifyingUrlPathSegment']) data_picture = "%s%s" % (c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['rootUrl'],c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['artifacts'][2]['fileIdentifyingUrlPathSegment'])
except: except:
print "[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation) print("[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation))
data_picture = "" data_picture = ""
# incase the last name is multi part, we will split it down
parts = data_lastname.split() parts = data_lastname.split()
@ -231,10 +218,9 @@ def get_search():
lname = re.sub('[^A-Za-z]+', '', lname) lname = re.sub('[^A-Za-z]+', '', lname)
if len(fname) == 0 or len(lname) == 0: if len(fname) == 0 or len(lname) == 0:
# invalid user, let's move on, this person has a weird name
continue continue
#come here
if prefix == 'full': if prefix == 'full':
user = '{}{}{}'.format(fname, mname, lname) user = '{}{}{}'.format(fname, mname, lname)
@ -251,6 +237,8 @@ def get_search():
user = '{}{}'.format(fname,lname[0]) user = '{}{}'.format(fname,lname[0])
if prefix == 'first.last': if prefix == 'first.last':
user = '{}.{}'.format(fname, lname) user = '{}.{}'.format(fname, lname)
if prefix == 'first_last':
user = '{}_{}'.format(fname, lname)
if prefix == 'fmlast': if prefix == 'fmlast':
if len(mname) == 0: if len(mname) == 0:
user = '{}{}{}'.format(fname[0], mname, lname) user = '{}{}{}'.format(fname[0], mname, lname)
@ -258,6 +246,9 @@ def get_search():
user = '{}{}{}'.format(fname[0], mname[0], lname) user = '{}{}{}'.format(fname[0], mname[0], lname)
if prefix == 'lastfirst': if prefix == 'lastfirst':
user = '{}{}'.format(lname, fname) user = '{}{}'.format(lname, fname)
if prefix == 'first':
user = '{}'.format(fname)
email = '{}@{}'.format(user, suffix) email = '{}@{}'.format(user, suffix)
@ -272,53 +263,47 @@ def get_search():
csv.append('"%s","%s","%s","%s","%s", "%s"' % (data_firstname, data_lastname, name, email, data_occupation, data_location.replace(",",";"))) csv.append('"%s","%s","%s","%s","%s", "%s"' % (data_firstname, data_lastname, name, email, data_occupation, data_location.replace(",",";")))
foot = "</table></center>" foot = "</table></center>"
f = open('{}.html'.format(outfile), 'wb') f = open('{}.html'.format(outfile), 'wb')
f.write(css) f.write(css.encode())
f.write(header) f1=open('only_email.csv','a')
f.write(body) f1.write((email+"\n"))
f.write(foot) f1.close()
f.write(header.encode())
f.write(body.encode())
f.write(foot.encode())
f.close() f.close()
f = open('{}.csv'.format(outfile), 'wb') f = open('{}.csv'.format(outfile), 'wb')
f.writelines('\n'.join(csv)) #newcsv='\n'.join(csv)
#f.writelines(newcsv.encode())
for x in csv:
f.write(x.join('\n').encode())
f.close() f.close()
else: else:
print "[!] Headless profile found. Skipping" print("[!] Headless profile found. Skipping")
print print("")
def banner():
with open('banner.txt', 'r') as f:
data = f.read()
print "\033[1;31m%s\033[0;0m" % data
print "\033[1;34mProviding you with Linkedin Intelligence"
print "\033[1;32mAuthor: Vincent Yiu (@vysec, @vysecurity)\033[0;0m"
print "\033[1;32mOriginal version by @DisK0nn3cT\033[0;0m"
def authenticate(): def authenticate():
try: try:
a = login() a = login()
print a print(a)
session = a session = a
if len(session) == 0: if len(session) == 0:
sys.exit("[!] Unable to login to LinkedIn.com") sys.exit("[!] Unable to login to LinkedIn.com")
print "[*] Obtained new session: %s" % session print("[*] Obtained new session: %s" % session)
cookies = dict(li_at=session) cookies = dict(li_at=session)
except Exception, e: except Exception:
sys.exit("[!] Could not authenticate to linkedin. %s" % e) sys.exit("[!] Could not authenticate to linkedin. %s" % e)
return cookies return cookies
if __name__ == '__main__': if __name__ == '__main__':
banner() search = args.keywords if args.keywords!=None else input("[*] Enter search Keywords (use quotes for more precise results)\n")
# Prompt user for data variables print("")
search = args.keywords if args.keywords!=None else raw_input("[*] Enter search Keywords (use quotes for more precise results)\n") outfile = args.output if args.output!=None else input("[*] Enter filename for output (exclude file extension)\n")
print print("")
outfile = args.output if args.output!=None else raw_input("[*] Enter filename for output (exclude file extension)\n")
print
while True: while True:
bCompany = raw_input("[*] Filter by Company? (Y/N): \n") bCompany = input("[*] Filter by Company? (Y/N): \n")
if bCompany.lower() == "y" or bCompany.lower() == "n": if bCompany.lower() == "y" or bCompany.lower() == "n":
break break
else: else:
print "[!] Incorrect choice" print("[!] Incorrect choice")
if bCompany.lower() == "y": if bCompany.lower() == "y":
bCompany = True bCompany = True
@ -330,11 +315,11 @@ if __name__ == '__main__':
prefix = "" prefix = ""
suffix = "" suffix = ""
print print("")
if bCompany: if bCompany:
while True: while True:
bSpecific = raw_input("[*] Specify a Company ID (Provide ID or leave blank to automate): \n") bSpecific = input("[*] Specify a Company ID (Provide ID or leave blank to automate): \n")
if bSpecific != "": if bSpecific != "":
bAuto = False bAuto = False
if bSpecific != 0: if bSpecific != 0:
@ -342,75 +327,67 @@ if __name__ == '__main__':
int(bSpecific) int(bSpecific)
break break
except: except:
print "[!] Incorrect choice, the ID either has to be a number or blank" print("[!] Incorrect choice, the ID either has to be a number or blank")
else: else:
print "[!] Incorrect choice, the ID either has to be a number or blank" print("[!] Incorrect choice, the ID either has to be a number or blank")
else: else:
bAuto = True bAuto = True
break break
print print("")
while True: while True:
suffix = raw_input("[*] Enter e-mail domain suffix (eg. contoso.com): \n") suffix = input("[*] Enter e-mail domain suffix (eg. contoso.com): \n")
suffix = suffix.lower() suffix = suffix.lower()
if "." in suffix: if "." in suffix:
break break
else: else:
print "[!] Incorrect e-mail? There's no dot" print("[!] Incorrect e-mail? There's no dot")
print print("")
while True: while True:
prefix = raw_input("[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast,lastfirst): \n") prefix = input("[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast,lastfirst,first): \n")
prefix = prefix.lower() prefix = prefix.lower()
print print("")
if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst": if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first_last"or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
break break
elif prefix == "auto": elif prefix == "auto":
#if auto prefix then we want to use hunter IO to find it. print("[*] Automatically using Hunter IO to determine best Prefix")
print "[*] Automatically using Hunter IO to determine best Prefix"
url = "https://hunter.io/trial/v2/domain-search?offset=0&domain=%s&format=json" % suffix url = "https://hunter.io/trial/v2/domain-search?offset=0&domain=%s&format=json" % suffix
r = requests.get(url) r = requests.get(url)
content = json.loads(r.text) content = json.loads(r.text)
if "status" in content: if "status" in content:
print "[!] Rate limited by Hunter IO trial" print("[!] Rate limited by Hunter IO trial")
url = "https://api.hunter.io/v2/domain-search?domain=%s&api_key=%s" % (suffix, api_key) url = "https://api.hunter.io/v2/domain-search?domain=%s&api_key=%s" % (suffix, api_key)
#print url
r = requests.get(url) r = requests.get(url)
content = json.loads(r.text) content = json.loads(r.text)
if "status" in content: if "status" in content:
print "[!] Rate limited by Hunter IO Key" print("[!] Rate limited by Hunter IO Key")
continue continue
#print content
prefix = content['data']['pattern'] prefix = content['data']['pattern']
print "[!] %s" % prefix print("[!] %s" % prefix)
if prefix: if prefix:
prefix = prefix.replace("{","").replace("}", "") prefix = prefix.replace("{","").replace("}", "")
if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst": if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
print "[+] Found %s prefix" % prefix print("[+] Found %s prefix" % prefix)
break break
else: else:
print "[!] Automatic prefix search failed, please insert a manual choice" print("[!] Automatic prefix search failed, please insert a manual choice")
continue continue
else: else:
print "[!] Automatic prefix search failed, please insert a manual choice" print("[!] Automatic prefix search failed, please insert a manual choice")
continue continue
else: else:
print "[!] Incorrect choice, please select a value from (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast)" print("[!] Incorrect choice, please select a value from (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast)")
print print("")
search = urllib.parse.quote_plus(search)
# URL Encode for the querystring
search = urllib.quote_plus(search)
cookies = authenticate() cookies = authenticate()
# Initialize Scraping
get_search() get_search()
print "[+] Complete" print("[+] Complete")

View file

@ -2,7 +2,6 @@ beautifulsoup4==4.6.0
certifi==2018.1.18 certifi==2018.1.18
chardet==3.0.4 chardet==3.0.4
idna==2.6 idna==2.6
pkg-resources==0.0.0
requests==2.18.4 requests==2.18.4
thready==0.1.5 thready==0.1.5
urllib3==1.22 urllib3==1.22