diff --git a/LinkedInt.py b/LinkedInt.py index 7198659..4b2ba5c 100644 --- a/LinkedInt.py +++ b/LinkedInt.py @@ -16,25 +16,39 @@ import requests import subprocess import json import argparse -import cookielib -import ConfigParser +try: + import http.cookiejar as cookielib + from configparser import RawConfigParser + from urllib.parse import quote_plus +except ImportError: + import cookielib + from ConfigParser import RawConfigParser + from urllib import quote_plus + +try: + input = raw_input +except NameError: + pass + import os -import urllib import math -import urllib2 import string from bs4 import BeautifulSoup -from thready import threaded -reload(sys) -sys.setdefaultencoding('utf-8') +#from thready import threaded + +try: + reload(sys) + sys.setdefaultencoding('utf-8') +except Exception: + pass """ Setup Argument Parameters """ parser = argparse.ArgumentParser(description='Discovery LinkedIn') parser.add_argument('-u', '--keywords', help='Keywords to search') parser.add_argument('-o', '--output', help='Output file (do not include extentions)') args = parser.parse_args() -config = ConfigParser.RawConfigParser() +config = RawConfigParser() config.read('LinkedInt.cfg') api_key = config.get('API_KEYS', 'hunter') username = config.get('CREDS', 'linkedin_username') @@ -58,11 +72,11 @@ def login(): } rv = s.post(URL + '/checkpoint/lg/login-submit', data=postdata) try: - cookie = requests.utils.dict_from_cookiejar(s.cookies) + cookie = requests.utils.dict_from_cookiejar(s.cookies) cookie = cookie['li_at'] except: - print "[!] Cannot log in" - sys.exit(0) + print("[!] Cannot log in") + sys.exit(0) return cookie def get_search(): @@ -109,46 +123,46 @@ def get_search(): # Do we want to automatically get the company ID? if bCompany: - if bAuto: - # Automatic - # Grab from the URL - companyID = 0 - url = "https://www.linkedin.com/voyager/api/typeahead/hits?q=blended&query=%s" % search - headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'} - cookies['JSESSIONID'] = 'ajax:0397788525211216808' - r = requests.get(url, cookies=cookies, headers=headers) - content = json.loads(r.text) - firstID = 0 - for i in range(0,len(content['elements'])): - try: - companyID = content['elements'][i]['hitInfo']['com.linkedin.voyager.typeahead.TypeaheadCompany']['id'] - if firstID == 0: - firstID = companyID - print "[Notice] Found company ID: %s" % companyID - except: - continue - companyID = firstID - if companyID == 0: - print "[WARNING] No valid company ID found in auto, please restart and find your own" - else: - # Don't auto, use the specified ID - companyID = bSpecific + if bAuto: + # Automatic + # Grab from the URL + companyID = 0 + url = "https://www.linkedin.com/voyager/api/typeahead/hits?q=blended&query={}".format(search) + headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'} + cookies['JSESSIONID'] = 'ajax:0397788525211216808' + r = requests.get(url, cookies=cookies, headers=headers) + content = json.loads(r.text) + firstID = 0 + for i in range(0,len(content['elements'])): + try: + companyID = content['elements'][i]['hitInfo']['com.linkedin.voyager.typeahead.TypeaheadCompany']['id'] + if firstID == 0: + firstID = companyID + print("[Notice] Found company ID: {}".format(companyID)) + except: + continue + companyID = firstID + if companyID == 0: + print("[WARNING] No valid company ID found in auto, please restart and find your own") + else: + # Don't auto, use the specified ID + companyID = bSpecific - print - - print "[*] Using company ID: %s" % companyID + print() + + print("[*] Using company ID: {}".format(companyID)) - # Fetch the initial page to get results/page counts + # Fetch the initial page to get results/page counts if bCompany == False: - url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=0" % search + url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords={}&origin=OTHER&q=guided&start=0".format(search) else: - url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=0" % (companyID) + url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->{})&origin=OTHER&q=guided&start=0".format(companyID) - print url + print(url) headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'} cookies['JSESSIONID'] = 'ajax:0397788525211216808' - #print url + r = requests.get(url, cookies=cookies, headers=headers) content = json.loads(r.text) data_total = content['elements'][0]['total'] @@ -157,34 +171,34 @@ def get_search(): pages = int(math.ceil(data_total / 40.0)) if pages == 0: - pages = 1 + pages = 1 if data_total % 40 == 0: # Becuase we count 0... Subtract a page if there are no left over results on the last page pages = pages - 1 if pages == 0: - print "[!] Try to use quotes in the search name" - sys.exit(0) + print("[!] Try to use quotes in the search name") + sys.exit(0) - print "[*] %i Results Found" % data_total + print("[*] {} Results Found".format(data_total)) if data_total > 1000: pages = 25 - print "[*] LinkedIn only allows 1000 results. Refine keywords to capture all data" - print "[*] Fetching %i Pages" % pages - print + print("[*] LinkedIn only allows 1000 results. Refine keywords to capture all data") + print("[*] Fetching {} Pages".format(pages)) + print() for p in range(pages): # Request results for each page using the start offset if bCompany == False: - url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=%i" % (search, p*40) + url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords={}&origin=OTHER&q=guided&start={}".format(search, p*40) else: - url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=%i" % (companyID, p*40) - #print url + url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->{})&origin=OTHER&q=guided&start={}".format(companyID, p*40) + r = requests.get(url, cookies=cookies, headers=headers) content = r.text.encode('UTF-8') content = json.loads(content) - print "[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements'])) + print("[*] Fetching page {} with {} results".format((p),len(content['elements'][0]['elements']))) for c in content['elements'][0]['elements']: if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False: try: @@ -193,13 +207,13 @@ def get_search(): data_industry = "" data_firstname = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['firstName'] data_lastname = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['lastName'] - data_slug = "https://www.linkedin.com/in/%s" % c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['publicIdentifier'] + data_slug = "https://www.linkedin.com/in/{}".format(c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['publicIdentifier']) data_occupation = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['occupation'] data_location = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['location'] try: - data_picture = "%s%s" % (c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['rootUrl'],c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['artifacts'][2]['fileIdentifyingUrlPathSegment']) + data_picture = "{}{}".format(c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['rootUrl'],c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['artifacts'][2]['fileIdentifyingUrlPathSegment']) except: - print "[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation) + print("[*] No picture found for {} {}, {}".format(data_firstname, data_lastname, data_occupation)) data_picture = "" # incase the last name is multi part, we will split it down @@ -257,68 +271,68 @@ def get_search(): else: user = '{}{}{}'.format(fname[0], mname[0], lname) if prefix == 'lastfirst': - user = '{}{}'.format(lname, fname) + user = '{}{}'.format(lname, fname) email = '{}@{}'.format(user, suffix) body += "