From b25737d4581334dab6bf15ba72040557cf29fb55 Mon Sep 17 00:00:00 2001 From: Harshil Shah Date: Thu, 12 Nov 2020 04:12:50 +0530 Subject: [PATCH] [REFACTORING] Python3 compatibility added --- LinkedInt.py | 173 ++++++++++++++++++++--------------------------- requirements.txt | 3 +- 2 files changed, 76 insertions(+), 100 deletions(-) diff --git a/LinkedInt.py b/LinkedInt.py index 7198659..2690326 100644 --- a/LinkedInt.py +++ b/LinkedInt.py @@ -16,25 +16,23 @@ import requests import subprocess import json import argparse -import cookielib -import ConfigParser +import http.cookiejar +import configparser import os import urllib import math -import urllib2 +from urllib.request import urlopen import string from bs4 import BeautifulSoup -from thready import threaded +import urllib.parse -reload(sys) -sys.setdefaultencoding('utf-8') """ Setup Argument Parameters """ parser = argparse.ArgumentParser(description='Discovery LinkedIn') parser.add_argument('-u', '--keywords', help='Keywords to search') parser.add_argument('-o', '--output', help='Output file (do not include extentions)') args = parser.parse_args() -config = ConfigParser.RawConfigParser() +config = configparser.RawConfigParser() config.read('LinkedInt.cfg') api_key = config.get('API_KEYS', 'hunter') username = config.get('CREDS', 'linkedin_username') @@ -58,11 +56,11 @@ def login(): } rv = s.post(URL + '/checkpoint/lg/login-submit', data=postdata) try: - cookie = requests.utils.dict_from_cookiejar(s.cookies) + cookie = requests.utils.dict_from_cookiejar(s.cookies) cookie = cookie['li_at'] except: - print "[!] Cannot log in" - sys.exit(0) + print("[!] Cannot log in") + sys.exit(0) return cookie def get_search(): @@ -106,12 +104,8 @@ def get_search(): """ - # Do we want to automatically get the company ID? - if bCompany: if bAuto: - # Automatic - # Grab from the URL companyID = 0 url = "https://www.linkedin.com/voyager/api/typeahead/hits?q=blended&query=%s" % search headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'} @@ -124,67 +118,60 @@ def get_search(): companyID = content['elements'][i]['hitInfo']['com.linkedin.voyager.typeahead.TypeaheadCompany']['id'] if firstID == 0: firstID = companyID - print "[Notice] Found company ID: %s" % companyID + print("[Notice] Found company ID: %s" % companyID) except: continue companyID = firstID if companyID == 0: - print "[WARNING] No valid company ID found in auto, please restart and find your own" + print("[WARNING] No valid company ID found in auto, please restart and find your own") else: - # Don't auto, use the specified ID companyID = bSpecific - print + print("") - print "[*] Using company ID: %s" % companyID + print("[*] Using company ID: %s" % companyID) - # Fetch the initial page to get results/page counts if bCompany == False: url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=0" % search else: url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=0" % (companyID) - print url + print(url) headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'} cookies['JSESSIONID'] = 'ajax:0397788525211216808' - #print url r = requests.get(url, cookies=cookies, headers=headers) content = json.loads(r.text) data_total = content['elements'][0]['total'] - # Calculate pages off final results at 40 results/page pages = int(math.ceil(data_total / 40.0)) if pages == 0: pages = 1 if data_total % 40 == 0: - # Becuase we count 0... Subtract a page if there are no left over results on the last page pages = pages - 1 if pages == 0: - print "[!] Try to use quotes in the search name" + print("[!] Try to use quotes in the search name") sys.exit(0) - print "[*] %i Results Found" % data_total + print("[*] %i Results Found" % data_total) if data_total > 1000: pages = 25 - print "[*] LinkedIn only allows 1000 results. Refine keywords to capture all data" - print "[*] Fetching %i Pages" % pages - print + print("[*] LinkedIn only allows 1000 results. Refine keywords to capture all data") + print("[*] Fetching %i Pages" % pages) + print("") for p in range(pages): - # Request results for each page using the start offset if bCompany == False: url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=%i" % (search, p*40) else: url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=%i" % (companyID, p*40) - #print url r = requests.get(url, cookies=cookies, headers=headers) content = r.text.encode('UTF-8') content = json.loads(content) - print "[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements'])) + print("[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements']))) for c in content['elements'][0]['elements']: if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False: try: @@ -199,10 +186,10 @@ def get_search(): try: data_picture = "%s%s" % (c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['rootUrl'],c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['artifacts'][2]['fileIdentifyingUrlPathSegment']) except: - print "[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation) + print("[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation)) data_picture = "" - # incase the last name is multi part, we will split it down + parts = data_lastname.split() @@ -231,10 +218,9 @@ def get_search(): lname = re.sub('[^A-Za-z]+', '', lname) if len(fname) == 0 or len(lname) == 0: - # invalid user, let's move on, this person has a weird name continue - #come here + if prefix == 'full': user = '{}{}{}'.format(fname, mname, lname) @@ -251,6 +237,8 @@ def get_search(): user = '{}{}'.format(fname,lname[0]) if prefix == 'first.last': user = '{}.{}'.format(fname, lname) + if prefix == 'first_last': + user = '{}_{}'.format(fname, lname) if prefix == 'fmlast': if len(mname) == 0: user = '{}{}{}'.format(fname[0], mname, lname) @@ -258,6 +246,9 @@ def get_search(): user = '{}{}{}'.format(fname[0], mname[0], lname) if prefix == 'lastfirst': user = '{}{}'.format(lname, fname) + if prefix == 'first': + user = '{}'.format(fname) + email = '{}@{}'.format(user, suffix) @@ -272,53 +263,47 @@ def get_search(): csv.append('"%s","%s","%s","%s","%s", "%s"' % (data_firstname, data_lastname, name, email, data_occupation, data_location.replace(",",";"))) foot = "" f = open('{}.html'.format(outfile), 'wb') - f.write(css) - f.write(header) - f.write(body) - f.write(foot) + f.write(css.encode()) + f1=open('only_email.csv','a') + f1.write((email+"\n")) + f1.close() + f.write(header.encode()) + f.write(body.encode()) + f.write(foot.encode()) f.close() f = open('{}.csv'.format(outfile), 'wb') - f.writelines('\n'.join(csv)) + #newcsv='\n'.join(csv) + #f.writelines(newcsv.encode()) + for x in csv: + f.write(x.join('\n').encode()) f.close() else: - print "[!] Headless profile found. Skipping" - print - -def banner(): - with open('banner.txt', 'r') as f: - data = f.read() - - print "\033[1;31m%s\033[0;0m" % data - print "\033[1;34mProviding you with Linkedin Intelligence" - print "\033[1;32mAuthor: Vincent Yiu (@vysec, @vysecurity)\033[0;0m" - print "\033[1;32mOriginal version by @DisK0nn3cT\033[0;0m" - + print("[!] Headless profile found. Skipping") + print("") def authenticate(): try: a = login() - print a - session = a - if len(session) == 0: - sys.exit("[!] Unable to login to LinkedIn.com") - print "[*] Obtained new session: %s" % session - cookies = dict(li_at=session) - except Exception, e: + print(a) + session = a + if len(session) == 0: + sys.exit("[!] Unable to login to LinkedIn.com") + print("[*] Obtained new session: %s" % session) + cookies = dict(li_at=session) + except Exception: sys.exit("[!] Could not authenticate to linkedin. %s" % e) return cookies if __name__ == '__main__': - banner() - # Prompt user for data variables - search = args.keywords if args.keywords!=None else raw_input("[*] Enter search Keywords (use quotes for more precise results)\n") - print - outfile = args.output if args.output!=None else raw_input("[*] Enter filename for output (exclude file extension)\n") - print + search = args.keywords if args.keywords!=None else input("[*] Enter search Keywords (use quotes for more precise results)\n") + print("") + outfile = args.output if args.output!=None else input("[*] Enter filename for output (exclude file extension)\n") + print("") while True: - bCompany = raw_input("[*] Filter by Company? (Y/N): \n") + bCompany = input("[*] Filter by Company? (Y/N): \n") if bCompany.lower() == "y" or bCompany.lower() == "n": break else: - print "[!] Incorrect choice" + print("[!] Incorrect choice") if bCompany.lower() == "y": bCompany = True @@ -330,11 +315,11 @@ if __name__ == '__main__': prefix = "" suffix = "" - print + print("") if bCompany: while True: - bSpecific = raw_input("[*] Specify a Company ID (Provide ID or leave blank to automate): \n") + bSpecific = input("[*] Specify a Company ID (Provide ID or leave blank to automate): \n") if bSpecific != "": bAuto = False if bSpecific != 0: @@ -342,75 +327,67 @@ if __name__ == '__main__': int(bSpecific) break except: - print "[!] Incorrect choice, the ID either has to be a number or blank" + print("[!] Incorrect choice, the ID either has to be a number or blank") else: - print "[!] Incorrect choice, the ID either has to be a number or blank" + print("[!] Incorrect choice, the ID either has to be a number or blank") else: bAuto = True break - print + print("") while True: - suffix = raw_input("[*] Enter e-mail domain suffix (eg. contoso.com): \n") + suffix = input("[*] Enter e-mail domain suffix (eg. contoso.com): \n") suffix = suffix.lower() if "." in suffix: break else: - print "[!] Incorrect e-mail? There's no dot" + print("[!] Incorrect e-mail? There's no dot") - print + print("") while True: - prefix = raw_input("[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast,lastfirst): \n") + prefix = input("[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast,lastfirst,first): \n") prefix = prefix.lower() - print - if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst": + print("") + if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first_last"or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst": break elif prefix == "auto": - #if auto prefix then we want to use hunter IO to find it. - print "[*] Automatically using Hunter IO to determine best Prefix" + print("[*] Automatically using Hunter IO to determine best Prefix") url = "https://hunter.io/trial/v2/domain-search?offset=0&domain=%s&format=json" % suffix r = requests.get(url) content = json.loads(r.text) if "status" in content: - print "[!] Rate limited by Hunter IO trial" + print("[!] Rate limited by Hunter IO trial") url = "https://api.hunter.io/v2/domain-search?domain=%s&api_key=%s" % (suffix, api_key) - #print url r = requests.get(url) content = json.loads(r.text) if "status" in content: - print "[!] Rate limited by Hunter IO Key" + print("[!] Rate limited by Hunter IO Key") continue - #print content prefix = content['data']['pattern'] - print "[!] %s" % prefix + print("[!] %s" % prefix) if prefix: prefix = prefix.replace("{","").replace("}", "") if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst": - print "[+] Found %s prefix" % prefix + print("[+] Found %s prefix" % prefix) break else: - print "[!] Automatic prefix search failed, please insert a manual choice" + print("[!] Automatic prefix search failed, please insert a manual choice") continue else: - print "[!] Automatic prefix search failed, please insert a manual choice" + print("[!] Automatic prefix search failed, please insert a manual choice") continue else: - print "[!] Incorrect choice, please select a value from (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast)" + print("[!] Incorrect choice, please select a value from (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast)") - print + print("") - - - # URL Encode for the querystring - search = urllib.quote_plus(search) + search = urllib.parse.quote_plus(search) cookies = authenticate() - - # Initialize Scraping get_search() - print "[+] Complete" + print("[+] Complete") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c08d8c0..bad0719 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,6 @@ beautifulsoup4==4.6.0 certifi==2018.1.18 chardet==3.0.4 idna==2.6 -pkg-resources==0.0.0 requests==2.18.4 thready==0.1.5 -urllib3==1.22 +urllib3==1.22 \ No newline at end of file