mirror of
https://github.com/vysecurity/LinkedInt.git
synced 2025-12-06 08:52:29 +01:00
Merge pull request #17 from thatonesecguy/refactoringBranch
[REFACTORING] Python3 compatibility added
This commit is contained in:
commit
9f4b08e3e4
2 changed files with 76 additions and 100 deletions
161
LinkedInt.py
161
LinkedInt.py
|
|
@ -16,25 +16,23 @@ import requests
|
|||
import subprocess
|
||||
import json
|
||||
import argparse
|
||||
import cookielib
|
||||
import ConfigParser
|
||||
import http.cookiejar
|
||||
import configparser
|
||||
import os
|
||||
import urllib
|
||||
import math
|
||||
import urllib2
|
||||
from urllib.request import urlopen
|
||||
import string
|
||||
from bs4 import BeautifulSoup
|
||||
from thready import threaded
|
||||
import urllib.parse
|
||||
|
||||
reload(sys)
|
||||
sys.setdefaultencoding('utf-8')
|
||||
|
||||
""" Setup Argument Parameters """
|
||||
parser = argparse.ArgumentParser(description='Discovery LinkedIn')
|
||||
parser.add_argument('-u', '--keywords', help='Keywords to search')
|
||||
parser.add_argument('-o', '--output', help='Output file (do not include extentions)')
|
||||
args = parser.parse_args()
|
||||
config = ConfigParser.RawConfigParser()
|
||||
config = configparser.RawConfigParser()
|
||||
config.read('LinkedInt.cfg')
|
||||
api_key = config.get('API_KEYS', 'hunter')
|
||||
username = config.get('CREDS', 'linkedin_username')
|
||||
|
|
@ -61,7 +59,7 @@ def login():
|
|||
cookie = requests.utils.dict_from_cookiejar(s.cookies)
|
||||
cookie = cookie['li_at']
|
||||
except:
|
||||
print "[!] Cannot log in"
|
||||
print("[!] Cannot log in")
|
||||
sys.exit(0)
|
||||
return cookie
|
||||
|
||||
|
|
@ -106,12 +104,8 @@ def get_search():
|
|||
</tr>
|
||||
"""
|
||||
|
||||
# Do we want to automatically get the company ID?
|
||||
|
||||
if bCompany:
|
||||
if bAuto:
|
||||
# Automatic
|
||||
# Grab from the URL
|
||||
companyID = 0
|
||||
url = "https://www.linkedin.com/voyager/api/typeahead/hits?q=blended&query=%s" % search
|
||||
headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'}
|
||||
|
|
@ -124,67 +118,60 @@ def get_search():
|
|||
companyID = content['elements'][i]['hitInfo']['com.linkedin.voyager.typeahead.TypeaheadCompany']['id']
|
||||
if firstID == 0:
|
||||
firstID = companyID
|
||||
print "[Notice] Found company ID: %s" % companyID
|
||||
print("[Notice] Found company ID: %s" % companyID)
|
||||
except:
|
||||
continue
|
||||
companyID = firstID
|
||||
if companyID == 0:
|
||||
print "[WARNING] No valid company ID found in auto, please restart and find your own"
|
||||
print("[WARNING] No valid company ID found in auto, please restart and find your own")
|
||||
else:
|
||||
# Don't auto, use the specified ID
|
||||
companyID = bSpecific
|
||||
|
||||
print
|
||||
print("")
|
||||
|
||||
print "[*] Using company ID: %s" % companyID
|
||||
print("[*] Using company ID: %s" % companyID)
|
||||
|
||||
# Fetch the initial page to get results/page counts
|
||||
if bCompany == False:
|
||||
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=0" % search
|
||||
else:
|
||||
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=0" % (companyID)
|
||||
|
||||
print url
|
||||
print(url)
|
||||
|
||||
headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'}
|
||||
cookies['JSESSIONID'] = 'ajax:0397788525211216808'
|
||||
#print url
|
||||
r = requests.get(url, cookies=cookies, headers=headers)
|
||||
content = json.loads(r.text)
|
||||
data_total = content['elements'][0]['total']
|
||||
|
||||
# Calculate pages off final results at 40 results/page
|
||||
pages = int(math.ceil(data_total / 40.0))
|
||||
|
||||
if pages == 0:
|
||||
pages = 1
|
||||
|
||||
if data_total % 40 == 0:
|
||||
# Becuase we count 0... Subtract a page if there are no left over results on the last page
|
||||
pages = pages - 1
|
||||
|
||||
if pages == 0:
|
||||
print "[!] Try to use quotes in the search name"
|
||||
print("[!] Try to use quotes in the search name")
|
||||
sys.exit(0)
|
||||
|
||||
print "[*] %i Results Found" % data_total
|
||||
print("[*] %i Results Found" % data_total)
|
||||
if data_total > 1000:
|
||||
pages = 25
|
||||
print "[*] LinkedIn only allows 1000 results. Refine keywords to capture all data"
|
||||
print "[*] Fetching %i Pages" % pages
|
||||
print
|
||||
print("[*] LinkedIn only allows 1000 results. Refine keywords to capture all data")
|
||||
print("[*] Fetching %i Pages" % pages)
|
||||
print("")
|
||||
|
||||
for p in range(pages):
|
||||
# Request results for each page using the start offset
|
||||
if bCompany == False:
|
||||
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=%i" % (search, p*40)
|
||||
else:
|
||||
url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=%i" % (companyID, p*40)
|
||||
#print url
|
||||
r = requests.get(url, cookies=cookies, headers=headers)
|
||||
content = r.text.encode('UTF-8')
|
||||
content = json.loads(content)
|
||||
print "[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements']))
|
||||
print("[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements'])))
|
||||
for c in content['elements'][0]['elements']:
|
||||
if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False:
|
||||
try:
|
||||
|
|
@ -199,10 +186,10 @@ def get_search():
|
|||
try:
|
||||
data_picture = "%s%s" % (c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['rootUrl'],c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['artifacts'][2]['fileIdentifyingUrlPathSegment'])
|
||||
except:
|
||||
print "[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation)
|
||||
print("[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation))
|
||||
data_picture = ""
|
||||
|
||||
# incase the last name is multi part, we will split it down
|
||||
|
||||
|
||||
parts = data_lastname.split()
|
||||
|
||||
|
|
@ -231,10 +218,9 @@ def get_search():
|
|||
lname = re.sub('[^A-Za-z]+', '', lname)
|
||||
|
||||
if len(fname) == 0 or len(lname) == 0:
|
||||
# invalid user, let's move on, this person has a weird name
|
||||
continue
|
||||
|
||||
#come here
|
||||
|
||||
|
||||
if prefix == 'full':
|
||||
user = '{}{}{}'.format(fname, mname, lname)
|
||||
|
|
@ -251,6 +237,8 @@ def get_search():
|
|||
user = '{}{}'.format(fname,lname[0])
|
||||
if prefix == 'first.last':
|
||||
user = '{}.{}'.format(fname, lname)
|
||||
if prefix == 'first_last':
|
||||
user = '{}_{}'.format(fname, lname)
|
||||
if prefix == 'fmlast':
|
||||
if len(mname) == 0:
|
||||
user = '{}{}{}'.format(fname[0], mname, lname)
|
||||
|
|
@ -258,6 +246,9 @@ def get_search():
|
|||
user = '{}{}{}'.format(fname[0], mname[0], lname)
|
||||
if prefix == 'lastfirst':
|
||||
user = '{}{}'.format(lname, fname)
|
||||
if prefix == 'first':
|
||||
user = '{}'.format(fname)
|
||||
|
||||
|
||||
email = '{}@{}'.format(user, suffix)
|
||||
|
||||
|
|
@ -272,53 +263,47 @@ def get_search():
|
|||
csv.append('"%s","%s","%s","%s","%s", "%s"' % (data_firstname, data_lastname, name, email, data_occupation, data_location.replace(",",";")))
|
||||
foot = "</table></center>"
|
||||
f = open('{}.html'.format(outfile), 'wb')
|
||||
f.write(css)
|
||||
f.write(header)
|
||||
f.write(body)
|
||||
f.write(foot)
|
||||
f.write(css.encode())
|
||||
f1=open('only_email.csv','a')
|
||||
f1.write((email+"\n"))
|
||||
f1.close()
|
||||
f.write(header.encode())
|
||||
f.write(body.encode())
|
||||
f.write(foot.encode())
|
||||
f.close()
|
||||
f = open('{}.csv'.format(outfile), 'wb')
|
||||
f.writelines('\n'.join(csv))
|
||||
#newcsv='\n'.join(csv)
|
||||
#f.writelines(newcsv.encode())
|
||||
for x in csv:
|
||||
f.write(x.join('\n').encode())
|
||||
f.close()
|
||||
else:
|
||||
print "[!] Headless profile found. Skipping"
|
||||
print
|
||||
|
||||
def banner():
|
||||
with open('banner.txt', 'r') as f:
|
||||
data = f.read()
|
||||
|
||||
print "\033[1;31m%s\033[0;0m" % data
|
||||
print "\033[1;34mProviding you with Linkedin Intelligence"
|
||||
print "\033[1;32mAuthor: Vincent Yiu (@vysec, @vysecurity)\033[0;0m"
|
||||
print "\033[1;32mOriginal version by @DisK0nn3cT\033[0;0m"
|
||||
|
||||
print("[!] Headless profile found. Skipping")
|
||||
print("")
|
||||
def authenticate():
|
||||
try:
|
||||
a = login()
|
||||
print a
|
||||
print(a)
|
||||
session = a
|
||||
if len(session) == 0:
|
||||
sys.exit("[!] Unable to login to LinkedIn.com")
|
||||
print "[*] Obtained new session: %s" % session
|
||||
print("[*] Obtained new session: %s" % session)
|
||||
cookies = dict(li_at=session)
|
||||
except Exception, e:
|
||||
except Exception:
|
||||
sys.exit("[!] Could not authenticate to linkedin. %s" % e)
|
||||
return cookies
|
||||
|
||||
if __name__ == '__main__':
|
||||
banner()
|
||||
# Prompt user for data variables
|
||||
search = args.keywords if args.keywords!=None else raw_input("[*] Enter search Keywords (use quotes for more precise results)\n")
|
||||
print
|
||||
outfile = args.output if args.output!=None else raw_input("[*] Enter filename for output (exclude file extension)\n")
|
||||
print
|
||||
search = args.keywords if args.keywords!=None else input("[*] Enter search Keywords (use quotes for more precise results)\n")
|
||||
print("")
|
||||
outfile = args.output if args.output!=None else input("[*] Enter filename for output (exclude file extension)\n")
|
||||
print("")
|
||||
while True:
|
||||
bCompany = raw_input("[*] Filter by Company? (Y/N): \n")
|
||||
bCompany = input("[*] Filter by Company? (Y/N): \n")
|
||||
if bCompany.lower() == "y" or bCompany.lower() == "n":
|
||||
break
|
||||
else:
|
||||
print "[!] Incorrect choice"
|
||||
print("[!] Incorrect choice")
|
||||
|
||||
if bCompany.lower() == "y":
|
||||
bCompany = True
|
||||
|
|
@ -330,11 +315,11 @@ if __name__ == '__main__':
|
|||
prefix = ""
|
||||
suffix = ""
|
||||
|
||||
print
|
||||
print("")
|
||||
|
||||
if bCompany:
|
||||
while True:
|
||||
bSpecific = raw_input("[*] Specify a Company ID (Provide ID or leave blank to automate): \n")
|
||||
bSpecific = input("[*] Specify a Company ID (Provide ID or leave blank to automate): \n")
|
||||
if bSpecific != "":
|
||||
bAuto = False
|
||||
if bSpecific != 0:
|
||||
|
|
@ -342,75 +327,67 @@ if __name__ == '__main__':
|
|||
int(bSpecific)
|
||||
break
|
||||
except:
|
||||
print "[!] Incorrect choice, the ID either has to be a number or blank"
|
||||
print("[!] Incorrect choice, the ID either has to be a number or blank")
|
||||
|
||||
else:
|
||||
print "[!] Incorrect choice, the ID either has to be a number or blank"
|
||||
print("[!] Incorrect choice, the ID either has to be a number or blank")
|
||||
else:
|
||||
bAuto = True
|
||||
break
|
||||
|
||||
print
|
||||
print("")
|
||||
|
||||
|
||||
while True:
|
||||
suffix = raw_input("[*] Enter e-mail domain suffix (eg. contoso.com): \n")
|
||||
suffix = input("[*] Enter e-mail domain suffix (eg. contoso.com): \n")
|
||||
suffix = suffix.lower()
|
||||
if "." in suffix:
|
||||
break
|
||||
else:
|
||||
print "[!] Incorrect e-mail? There's no dot"
|
||||
print("[!] Incorrect e-mail? There's no dot")
|
||||
|
||||
print
|
||||
print("")
|
||||
|
||||
while True:
|
||||
prefix = raw_input("[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast,lastfirst): \n")
|
||||
prefix = input("[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast,lastfirst,first): \n")
|
||||
prefix = prefix.lower()
|
||||
print
|
||||
if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
|
||||
print("")
|
||||
if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first_last"or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
|
||||
break
|
||||
elif prefix == "auto":
|
||||
#if auto prefix then we want to use hunter IO to find it.
|
||||
print "[*] Automatically using Hunter IO to determine best Prefix"
|
||||
print("[*] Automatically using Hunter IO to determine best Prefix")
|
||||
url = "https://hunter.io/trial/v2/domain-search?offset=0&domain=%s&format=json" % suffix
|
||||
r = requests.get(url)
|
||||
content = json.loads(r.text)
|
||||
if "status" in content:
|
||||
print "[!] Rate limited by Hunter IO trial"
|
||||
print("[!] Rate limited by Hunter IO trial")
|
||||
url = "https://api.hunter.io/v2/domain-search?domain=%s&api_key=%s" % (suffix, api_key)
|
||||
#print url
|
||||
r = requests.get(url)
|
||||
content = json.loads(r.text)
|
||||
if "status" in content:
|
||||
print "[!] Rate limited by Hunter IO Key"
|
||||
print("[!] Rate limited by Hunter IO Key")
|
||||
continue
|
||||
#print content
|
||||
prefix = content['data']['pattern']
|
||||
print "[!] %s" % prefix
|
||||
print("[!] %s" % prefix)
|
||||
if prefix:
|
||||
prefix = prefix.replace("{","").replace("}", "")
|
||||
if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
|
||||
print "[+] Found %s prefix" % prefix
|
||||
print("[+] Found %s prefix" % prefix)
|
||||
break
|
||||
else:
|
||||
print "[!] Automatic prefix search failed, please insert a manual choice"
|
||||
print("[!] Automatic prefix search failed, please insert a manual choice")
|
||||
continue
|
||||
else:
|
||||
print "[!] Automatic prefix search failed, please insert a manual choice"
|
||||
print("[!] Automatic prefix search failed, please insert a manual choice")
|
||||
continue
|
||||
else:
|
||||
print "[!] Incorrect choice, please select a value from (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast)"
|
||||
print("[!] Incorrect choice, please select a value from (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast)")
|
||||
|
||||
print
|
||||
print("")
|
||||
|
||||
|
||||
|
||||
# URL Encode for the querystring
|
||||
search = urllib.quote_plus(search)
|
||||
search = urllib.parse.quote_plus(search)
|
||||
cookies = authenticate()
|
||||
|
||||
|
||||
# Initialize Scraping
|
||||
get_search()
|
||||
|
||||
print "[+] Complete"
|
||||
print("[+] Complete")
|
||||
|
|
@ -2,7 +2,6 @@ beautifulsoup4==4.6.0
|
|||
certifi==2018.1.18
|
||||
chardet==3.0.4
|
||||
idna==2.6
|
||||
pkg-resources==0.0.0
|
||||
requests==2.18.4
|
||||
thready==0.1.5
|
||||
urllib3==1.22
|
||||
Loading…
Reference in a new issue