Merge pull request #17 from thatonesecguy/refactoringBranch

[REFACTORING] Python3 compatibility added
This commit is contained in:
Vincent Yiu 2021-03-10 21:40:05 +08:00 committed by GitHub
commit 9f4b08e3e4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 76 additions and 100 deletions

View file

@ -16,25 +16,23 @@ import requests
import subprocess
import json
import argparse
import cookielib
import ConfigParser
import http.cookiejar
import configparser
import os
import urllib
import math
import urllib2
from urllib.request import urlopen
import string
from bs4 import BeautifulSoup
from thready import threaded
import urllib.parse
reload(sys)
sys.setdefaultencoding('utf-8')
""" Setup Argument Parameters """
parser = argparse.ArgumentParser(description='Discovery LinkedIn')
parser.add_argument('-u', '--keywords', help='Keywords to search')
parser.add_argument('-o', '--output', help='Output file (do not include extentions)')
args = parser.parse_args()
config = ConfigParser.RawConfigParser()
config = configparser.RawConfigParser()
config.read('LinkedInt.cfg')
api_key = config.get('API_KEYS', 'hunter')
username = config.get('CREDS', 'linkedin_username')
@ -58,11 +56,11 @@ def login():
}
rv = s.post(URL + '/checkpoint/lg/login-submit', data=postdata)
try:
cookie = requests.utils.dict_from_cookiejar(s.cookies)
cookie = requests.utils.dict_from_cookiejar(s.cookies)
cookie = cookie['li_at']
except:
print "[!] Cannot log in"
sys.exit(0)
print("[!] Cannot log in")
sys.exit(0)
return cookie
def get_search():
@ -106,12 +104,8 @@ def get_search():
</tr>
"""
# Do we want to automatically get the company ID?
if bCompany:
if bAuto:
# Automatic
# Grab from the URL
companyID = 0
url = "https://www.linkedin.com/voyager/api/typeahead/hits?q=blended&query=%s" % search
headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'}
@ -124,67 +118,60 @@ def get_search():
companyID = content['elements'][i]['hitInfo']['com.linkedin.voyager.typeahead.TypeaheadCompany']['id']
if firstID == 0:
firstID = companyID
print "[Notice] Found company ID: %s" % companyID
print("[Notice] Found company ID: %s" % companyID)
except:
continue
companyID = firstID
if companyID == 0:
print "[WARNING] No valid company ID found in auto, please restart and find your own"
print("[WARNING] No valid company ID found in auto, please restart and find your own")
else:
# Don't auto, use the specified ID
companyID = bSpecific
print
print("")
print "[*] Using company ID: %s" % companyID
print("[*] Using company ID: %s" % companyID)
# Fetch the initial page to get results/page counts
if bCompany == False:
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=0" % search
else:
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=0" % (companyID)
print url
print(url)
headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'}
cookies['JSESSIONID'] = 'ajax:0397788525211216808'
#print url
r = requests.get(url, cookies=cookies, headers=headers)
content = json.loads(r.text)
data_total = content['elements'][0]['total']
# Calculate pages off final results at 40 results/page
pages = int(math.ceil(data_total / 40.0))
if pages == 0:
pages = 1
if data_total % 40 == 0:
# Becuase we count 0... Subtract a page if there are no left over results on the last page
pages = pages - 1
if pages == 0:
print "[!] Try to use quotes in the search name"
print("[!] Try to use quotes in the search name")
sys.exit(0)
print "[*] %i Results Found" % data_total
print("[*] %i Results Found" % data_total)
if data_total > 1000:
pages = 25
print "[*] LinkedIn only allows 1000 results. Refine keywords to capture all data"
print "[*] Fetching %i Pages" % pages
print
print("[*] LinkedIn only allows 1000 results. Refine keywords to capture all data")
print("[*] Fetching %i Pages" % pages)
print("")
for p in range(pages):
# Request results for each page using the start offset
if bCompany == False:
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=%i" % (search, p*40)
else:
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=%i" % (companyID, p*40)
#print url
r = requests.get(url, cookies=cookies, headers=headers)
content = r.text.encode('UTF-8')
content = json.loads(content)
print "[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements']))
print("[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements'])))
for c in content['elements'][0]['elements']:
if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False:
try:
@ -199,10 +186,10 @@ def get_search():
try:
data_picture = "%s%s" % (c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['rootUrl'],c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['artifacts'][2]['fileIdentifyingUrlPathSegment'])
except:
print "[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation)
print("[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation))
data_picture = ""
# incase the last name is multi part, we will split it down
parts = data_lastname.split()
@ -231,10 +218,9 @@ def get_search():
lname = re.sub('[^A-Za-z]+', '', lname)
if len(fname) == 0 or len(lname) == 0:
# invalid user, let's move on, this person has a weird name
continue
#come here
if prefix == 'full':
user = '{}{}{}'.format(fname, mname, lname)
@ -251,6 +237,8 @@ def get_search():
user = '{}{}'.format(fname,lname[0])
if prefix == 'first.last':
user = '{}.{}'.format(fname, lname)
if prefix == 'first_last':
user = '{}_{}'.format(fname, lname)
if prefix == 'fmlast':
if len(mname) == 0:
user = '{}{}{}'.format(fname[0], mname, lname)
@ -258,6 +246,9 @@ def get_search():
user = '{}{}{}'.format(fname[0], mname[0], lname)
if prefix == 'lastfirst':
user = '{}{}'.format(lname, fname)
if prefix == 'first':
user = '{}'.format(fname)
email = '{}@{}'.format(user, suffix)
@ -272,53 +263,47 @@ def get_search():
csv.append('"%s","%s","%s","%s","%s", "%s"' % (data_firstname, data_lastname, name, email, data_occupation, data_location.replace(",",";")))
foot = "</table></center>"
f = open('{}.html'.format(outfile), 'wb')
f.write(css)
f.write(header)
f.write(body)
f.write(foot)
f.write(css.encode())
f1=open('only_email.csv','a')
f1.write((email+"\n"))
f1.close()
f.write(header.encode())
f.write(body.encode())
f.write(foot.encode())
f.close()
f = open('{}.csv'.format(outfile), 'wb')
f.writelines('\n'.join(csv))
#newcsv='\n'.join(csv)
#f.writelines(newcsv.encode())
for x in csv:
f.write(x.join('\n').encode())
f.close()
else:
print "[!] Headless profile found. Skipping"
print
def banner():
with open('banner.txt', 'r') as f:
data = f.read()
print "\033[1;31m%s\033[0;0m" % data
print "\033[1;34mProviding you with Linkedin Intelligence"
print "\033[1;32mAuthor: Vincent Yiu (@vysec, @vysecurity)\033[0;0m"
print "\033[1;32mOriginal version by @DisK0nn3cT\033[0;0m"
print("[!] Headless profile found. Skipping")
print("")
def authenticate():
try:
a = login()
print a
session = a
if len(session) == 0:
sys.exit("[!] Unable to login to LinkedIn.com")
print "[*] Obtained new session: %s" % session
cookies = dict(li_at=session)
except Exception, e:
print(a)
session = a
if len(session) == 0:
sys.exit("[!] Unable to login to LinkedIn.com")
print("[*] Obtained new session: %s" % session)
cookies = dict(li_at=session)
except Exception:
sys.exit("[!] Could not authenticate to linkedin. %s" % e)
return cookies
if __name__ == '__main__':
banner()
# Prompt user for data variables
search = args.keywords if args.keywords!=None else raw_input("[*] Enter search Keywords (use quotes for more precise results)\n")
print
outfile = args.output if args.output!=None else raw_input("[*] Enter filename for output (exclude file extension)\n")
print
search = args.keywords if args.keywords!=None else input("[*] Enter search Keywords (use quotes for more precise results)\n")
print("")
outfile = args.output if args.output!=None else input("[*] Enter filename for output (exclude file extension)\n")
print("")
while True:
bCompany = raw_input("[*] Filter by Company? (Y/N): \n")
bCompany = input("[*] Filter by Company? (Y/N): \n")
if bCompany.lower() == "y" or bCompany.lower() == "n":
break
else:
print "[!] Incorrect choice"
print("[!] Incorrect choice")
if bCompany.lower() == "y":
bCompany = True
@ -330,11 +315,11 @@ if __name__ == '__main__':
prefix = ""
suffix = ""
print
print("")
if bCompany:
while True:
bSpecific = raw_input("[*] Specify a Company ID (Provide ID or leave blank to automate): \n")
bSpecific = input("[*] Specify a Company ID (Provide ID or leave blank to automate): \n")
if bSpecific != "":
bAuto = False
if bSpecific != 0:
@ -342,75 +327,67 @@ if __name__ == '__main__':
int(bSpecific)
break
except:
print "[!] Incorrect choice, the ID either has to be a number or blank"
print("[!] Incorrect choice, the ID either has to be a number or blank")
else:
print "[!] Incorrect choice, the ID either has to be a number or blank"
print("[!] Incorrect choice, the ID either has to be a number or blank")
else:
bAuto = True
break
print
print("")
while True:
suffix = raw_input("[*] Enter e-mail domain suffix (eg. contoso.com): \n")
suffix = input("[*] Enter e-mail domain suffix (eg. contoso.com): \n")
suffix = suffix.lower()
if "." in suffix:
break
else:
print "[!] Incorrect e-mail? There's no dot"
print("[!] Incorrect e-mail? There's no dot")
print
print("")
while True:
prefix = raw_input("[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast,lastfirst): \n")
prefix = input("[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast,lastfirst,first): \n")
prefix = prefix.lower()
print
if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
print("")
if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first_last"or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
break
elif prefix == "auto":
#if auto prefix then we want to use hunter IO to find it.
print "[*] Automatically using Hunter IO to determine best Prefix"
print("[*] Automatically using Hunter IO to determine best Prefix")
url = "https://hunter.io/trial/v2/domain-search?offset=0&domain=%s&format=json" % suffix
r = requests.get(url)
content = json.loads(r.text)
if "status" in content:
print "[!] Rate limited by Hunter IO trial"
print("[!] Rate limited by Hunter IO trial")
url = "https://api.hunter.io/v2/domain-search?domain=%s&api_key=%s" % (suffix, api_key)
#print url
r = requests.get(url)
content = json.loads(r.text)
if "status" in content:
print "[!] Rate limited by Hunter IO Key"
print("[!] Rate limited by Hunter IO Key")
continue
#print content
prefix = content['data']['pattern']
print "[!] %s" % prefix
print("[!] %s" % prefix)
if prefix:
prefix = prefix.replace("{","").replace("}", "")
if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
print "[+] Found %s prefix" % prefix
print("[+] Found %s prefix" % prefix)
break
else:
print "[!] Automatic prefix search failed, please insert a manual choice"
print("[!] Automatic prefix search failed, please insert a manual choice")
continue
else:
print "[!] Automatic prefix search failed, please insert a manual choice"
print("[!] Automatic prefix search failed, please insert a manual choice")
continue
else:
print "[!] Incorrect choice, please select a value from (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast)"
print("[!] Incorrect choice, please select a value from (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast)")
print
print("")
# URL Encode for the querystring
search = urllib.quote_plus(search)
search = urllib.parse.quote_plus(search)
cookies = authenticate()
# Initialize Scraping
get_search()
print "[+] Complete"
print("[+] Complete")

View file

@ -2,7 +2,6 @@ beautifulsoup4==4.6.0
certifi==2018.1.18
chardet==3.0.4
idna==2.6
pkg-resources==0.0.0
requests==2.18.4
thready==0.1.5
urllib3==1.22
urllib3==1.22