Merge pull request #17 from thatonesecguy/refactoringBranch

[REFACTORING] Python3 compatibility added
2025-12-06 08:52:29 +01:00 · 2021-03-10 21:40:05 +08:00 · 2021-03-10 21:40:05 +08:00 · 9f4b08e3e4
commit 9f4b08e3e4
parent b273ad32eb b25737d458
2 changed files with 76 additions and 100 deletions
--- a/LinkedInt.py
+++ b/LinkedInt.py
@ -16,25 +16,23 @@ import requests
 import subprocess
 import json
 import argparse
-import cookielib
-import ConfigParser
+import http.cookiejar
+import configparser
 import os
 import urllib
 import math
-import urllib2
+from urllib.request import urlopen
 import string
 from bs4 import BeautifulSoup
-from thready import threaded
+import urllib.parse

-reload(sys)
-sys.setdefaultencoding('utf-8')

 """ Setup Argument Parameters """
 parser = argparse.ArgumentParser(description='Discovery LinkedIn')
 parser.add_argument('-u', '--keywords', help='Keywords to search')
 parser.add_argument('-o', '--output', help='Output file (do not include extentions)')
 args = parser.parse_args()
-config = ConfigParser.RawConfigParser()
+config = configparser.RawConfigParser()
 config.read('LinkedInt.cfg')
 api_key = config.get('API_KEYS', 'hunter')
 username = config.get('CREDS', 'linkedin_username')
@ -61,7 +59,7 @@ def login():
        cookie = requests.utils.dict_from_cookiejar(s.cookies)
        cookie = cookie['li_at']
    except:
-        print "[!] Cannot log in"
+        print("[!] Cannot log in")
        sys.exit(0)
    return cookie

@ -106,12 +104,8 @@ def get_search():
             </tr>
             """

-    # Do we want to automatically get the company ID?
-
    if bCompany:
 	    if bAuto:
-	        # Automatic
-	        # Grab from the URL 
 	        companyID = 0
 	        url = "https://www.linkedin.com/voyager/api/typeahead/hits?q=blended&query=%s" % search
 	        headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'}
@ -124,67 +118,60 @@ def get_search():
 	        		companyID = content['elements'][i]['hitInfo']['com.linkedin.voyager.typeahead.TypeaheadCompany']['id']
 	        		if firstID == 0:
 	        			firstID = companyID
-	        		print "[Notice] Found company ID: %s" % companyID
+	        		print("[Notice] Found company ID: %s" % companyID)
 	        	except:
 	        		continue
 	        companyID = firstID
 	        if companyID == 0:
-	        	print "[WARNING] No valid company ID found in auto, please restart and find your own"
+	        	print("[WARNING] No valid company ID found in auto, please restart and find your own")
 	    else:
-	        # Don't auto, use the specified ID
 	        companyID = bSpecific

-	    print
+	    print("")
 	    
-	    print "[*] Using company ID: %s" % companyID
+	    print("[*] Using company ID: %s" % companyID)

-	# Fetch the initial page to get results/page counts
    if bCompany == False:
        url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=0" % search
    else:
        url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=0" % (companyID)
    
-    print url
+    print(url)
    
    headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'}
    cookies['JSESSIONID'] = 'ajax:0397788525211216808'
-    #print url
    r = requests.get(url, cookies=cookies, headers=headers)
    content = json.loads(r.text)
    data_total = content['elements'][0]['total']

-    # Calculate pages off final results at 40 results/page
    pages = int(math.ceil(data_total / 40.0))

    if pages == 0:
    	pages = 1

    if data_total % 40 == 0:
-        # Becuase we count 0... Subtract a page if there are no left over results on the last page
        pages = pages - 1 

    if pages == 0: 
-    	print "[!] Try to use quotes in the search name"
+    	print("[!] Try to use quotes in the search name")
    	sys.exit(0)
    
-    print "[*] %i Results Found" % data_total
+    print("[*] %i Results Found" % data_total)
    if data_total > 1000:
        pages = 25
-        print "[*] LinkedIn only allows 1000 results. Refine keywords to capture all data"
-    print "[*] Fetching %i Pages" % pages
-    print
+        print("[*] LinkedIn only allows 1000 results. Refine keywords to capture all data")
+    print("[*] Fetching %i Pages" % pages)
+    print("")

    for p in range(pages):
-        # Request results for each page using the start offset
        if bCompany == False:
            url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=%i" % (search, p*40)
        else:
            url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=%i" % (companyID, p*40)
-        #print url
        r = requests.get(url, cookies=cookies, headers=headers)
        content = r.text.encode('UTF-8')
        content = json.loads(content)
-        print "[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements']))
+        print("[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements'])))
        for c in content['elements'][0]['elements']:
            if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False:
                try:
@ -199,10 +186,10 @@ def get_search():
                try:
                    data_picture = "%s%s" % (c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['rootUrl'],c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.common.VectorImage']['artifacts'][2]['fileIdentifyingUrlPathSegment'])
                except:
-                    print "[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation)
+                    print("[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation))
                    data_picture = ""

-                # incase the last name is multi part, we will split it down
+               

                parts = data_lastname.split()

@ -231,10 +218,9 @@ def get_search():
                lname = re.sub('[^A-Za-z]+', '', lname)

                if len(fname) == 0 or len(lname) == 0:
-                    # invalid user, let's move on, this person has a weird name
                    continue

-                    #come here
+              

                if prefix == 'full':
                    user = '{}{}{}'.format(fname, mname, lname)
@ -251,6 +237,8 @@ def get_search():
                    user = '{}{}'.format(fname,lname[0])
                if prefix == 'first.last':
                    user = '{}.{}'.format(fname, lname)
+                if prefix == 'first_last':
+                    user = '{}_{}'.format(fname, lname)    
                if prefix == 'fmlast':
                    if len(mname) == 0:
                        user = '{}{}{}'.format(fname[0], mname, lname)
@ -258,6 +246,9 @@ def get_search():
                        user = '{}{}{}'.format(fname[0], mname[0], lname)
                if prefix == 'lastfirst':
                	user = '{}{}'.format(lname, fname)
+                if prefix == 'first':
+                	user = '{}'.format(fname)
+	

                email = '{}@{}'.format(user, suffix)

@ -272,53 +263,47 @@ def get_search():
                csv.append('"%s","%s","%s","%s","%s", "%s"' % (data_firstname, data_lastname, name, email, data_occupation, data_location.replace(",",";")))
                foot = "</table></center>"
                f = open('{}.html'.format(outfile), 'wb')
-                f.write(css)
-                f.write(header)
-                f.write(body)
-                f.write(foot)
+                f.write(css.encode())
+                f1=open('only_email.csv','a')
+                f1.write((email+"\n"))
+                f1.close()
+                f.write(header.encode())
+                f.write(body.encode())
+                f.write(foot.encode())
                f.close()
                f = open('{}.csv'.format(outfile), 'wb')
-                f.writelines('\n'.join(csv))
+                #newcsv='\n'.join(csv)
+                #f.writelines(newcsv.encode())
+                for x in csv:
+                    f.write(x.join('\n').encode())
                f.close()
            else:
-                print "[!] Headless profile found. Skipping"
-        print
-
-def banner():
-        with open('banner.txt', 'r') as f:
-            data = f.read()
-
-            print "\033[1;31m%s\033[0;0m" % data
-            print "\033[1;34mProviding you with Linkedin Intelligence"
-            print "\033[1;32mAuthor: Vincent Yiu (@vysec, @vysecurity)\033[0;0m"
-            print "\033[1;32mOriginal version by @DisK0nn3cT\033[0;0m"
-
+                print("[!] Headless profile found. Skipping")
+        print("")
 def authenticate():
    try:
    	a = login()
-    	print a
+    	print(a)
    	session = a
    	if len(session) == 0:
    		sys.exit("[!] Unable to login to LinkedIn.com")
-        print "[*] Obtained new session: %s" % session
+    	print("[*] Obtained new session: %s" % session)
    	cookies = dict(li_at=session)
-    except Exception, e:
+    except Exception:
        sys.exit("[!] Could not authenticate to linkedin. %s" % e)
    return cookies

 if __name__ == '__main__':
-    banner()
-    # Prompt user for data variables
-    search = args.keywords if args.keywords!=None else raw_input("[*] Enter search Keywords (use quotes for more precise results)\n")
-    print 
-    outfile = args.output if args.output!=None else raw_input("[*] Enter filename for output (exclude file extension)\n")
-    print 
+    search = args.keywords if args.keywords!=None else input("[*] Enter search Keywords (use quotes for more precise results)\n")
+    print("")
+    outfile = args.output if args.output!=None else input("[*] Enter filename for output (exclude file extension)\n")
+    print("") 
    while True:
-        bCompany = raw_input("[*] Filter by Company? (Y/N): \n")
+        bCompany = input("[*] Filter by Company? (Y/N): \n")
        if bCompany.lower() == "y" or bCompany.lower() == "n":
            break
        else:
-            print "[!] Incorrect choice"
+            print("[!] Incorrect choice")

    if bCompany.lower() == "y":
        bCompany = True
@ -330,11 +315,11 @@ if __name__ == '__main__':
    prefix = ""
    suffix = ""

-    print
+    print("")

    if bCompany:
 	    while True:
-	        bSpecific = raw_input("[*] Specify a Company ID (Provide ID or leave blank to automate): \n")
+	        bSpecific = input("[*] Specify a Company ID (Provide ID or leave blank to automate): \n")
 	        if bSpecific != "":
 	            bAuto = False
 	            if bSpecific != 0:
@ -342,75 +327,67 @@ if __name__ == '__main__':
 	                    int(bSpecific)
 	                    break
 	                except:
-	                    print "[!] Incorrect choice, the ID either has to be a number or blank"
+	                    print("[!] Incorrect choice, the ID either has to be a number or blank")
 	                
 	            else:
-	                print "[!] Incorrect choice, the ID either has to be a number or blank"
+	                print("[!] Incorrect choice, the ID either has to be a number or blank")
 	        else:
 	            bAuto = True
 	            break

-    print
+    print("")

    
    while True:
-        suffix = raw_input("[*] Enter e-mail domain suffix (eg. contoso.com): \n")
+        suffix = input("[*] Enter e-mail domain suffix (eg. contoso.com): \n")
        suffix = suffix.lower()
        if "." in suffix:
            break
        else:
-            print "[!] Incorrect e-mail? There's no dot"
+            print("[!] Incorrect e-mail? There's no dot")

-    print
+    print("")

    while True:
-        prefix = raw_input("[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast,lastfirst): \n")
+        prefix = input("[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast,lastfirst,first): \n")
        prefix = prefix.lower()
-        print
-        if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
+        print("")
+        if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first_last"or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
            break
        elif prefix == "auto":
-            #if auto prefix then we want to use hunter IO to find it.
-            print "[*] Automatically using Hunter IO to determine best Prefix"
+            print("[*] Automatically using Hunter IO to determine best Prefix")
            url = "https://hunter.io/trial/v2/domain-search?offset=0&domain=%s&format=json" % suffix
            r = requests.get(url)
            content = json.loads(r.text)
            if "status" in content:
-                print "[!] Rate limited by Hunter IO trial"
+                print("[!] Rate limited by Hunter IO trial")
                url = "https://api.hunter.io/v2/domain-search?domain=%s&api_key=%s" % (suffix, api_key)
-                #print url
                r = requests.get(url)
                content = json.loads(r.text)
                if "status" in content:
-                    print "[!] Rate limited by Hunter IO Key"
+                    print("[!] Rate limited by Hunter IO Key")
                    continue
-            #print content
            prefix = content['data']['pattern']
-            print "[!] %s" % prefix
+            print("[!] %s" % prefix)
            if prefix:
                prefix = prefix.replace("{","").replace("}", "")
                if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix == "firstl" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
-                    print "[+] Found %s prefix" % prefix
+                    print("[+] Found %s prefix" % prefix)
                    break
                else:
-                    print "[!] Automatic prefix search failed, please insert a manual choice"
+                    print("[!] Automatic prefix search failed, please insert a manual choice")
                    continue
            else:
-                print "[!] Automatic prefix search failed, please insert a manual choice"
+                print("[!] Automatic prefix search failed, please insert a manual choice")
                continue
        else:
-            print "[!] Incorrect choice, please select a value from (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast)"
+            print("[!] Incorrect choice, please select a value from (auto,full,firstlast,firstmlast,flast,firstl,first.last,fmlast)")

-    print 
+    print("")

-
-    
-    # URL Encode for the querystring
-    search = urllib.quote_plus(search)
+    search = urllib.parse.quote_plus(search)
    cookies = authenticate()
  
-    
-    # Initialize Scraping
    get_search()

-    print "[+] Complete"
+    print("[+] Complete")
--- a/requirements.txt
+++ b/requirements.txt
@ -2,7 +2,6 @@ beautifulsoup4==4.6.0
 certifi==2018.1.18
 chardet==3.0.4
 idna==2.6
-pkg-resources==0.0.0
 requests==2.18.4
 thready==0.1.5
 urllib3==1.22