package com.ey.gst.dao.service; import java.io.IOException; import java.net.SocketTimeoutException; import java.security.cert.X509Certificate; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.Map; import java.util.HashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLContext; import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; import org.jsoup.HttpStatusException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.openqa.selenium.By; import org.openqa.selenium.WebElement; public class WebFinal { public static boolean visited =false; public static boolean pageAccess =true; public static Set titleHeaders = new HashSet<>(); public static List> details =new ArrayList<>(); public static int count=1; public static void crawler(String url) throws IOException { disableCertificateValidation(); try { Document doc = Jsoup.connect(url) .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") .timeout(3600).get(); Elements ele=doc.select("div.jsx-4d407376001b01ad.resultbox_info"); for(Element e:ele.select("div.jsx-4d407376001b01ad.resultbox_textbox")) { //String result =e.attr("h2").text(); //String result =e.text(); Map dataMap=new HashMap<>(); Element ratingElement=e.selectFirst("div.jsx-4d407376001b01ad.resultbox_totalrate"); Element h2Element = e.selectFirst("h2.resultbox_title"); Element addressElement = e.selectFirst("div.jsx-4d407376001b01ad.resultbox_address"); //Element openElement=e.selectFirst("jsx-4d407376001b01ad.resultbox_activity"); if (h2Element != null) { String title = h2Element.attr("title"); String ratings= ratingElement.text(); String address =addressElement.text(); //String openHours=openElement.text(); dataMap.put("title", title); dataMap.put("ratings", ratings); dataMap.put("address", address); //dataMap.put("openHours", openHours); if(!titleHeaders.contains(title)) { System.out.println("Title " + count++ +" Ratings : " + ratings +" -- "+ title); System.out.println("Address : " +address ); } titleHeaders.add(title); //System.out.println("Title: " + title); } //System.out.println(e); } }catch(HttpStatusException se) { System.out.println("Access denied in the web page"); pageAccess=false; return; } catch(SocketTimeoutException se) { System.out.println("Unable to fetch more data.. "); pageAccess=false; return ; } catch(NullPointerException ne) { return; } catch(Exception e) { pageAccess=false; e.printStackTrace(); return; } } public static void main(String[] args) throws IOException { String location ="Kolkata"; String search= "Marriage Function Halls"; // IIT Coaching Classes Marriage Function Halls search = search.replace(" ", "-"); int maxTitles=100; String url="https://www.justdial.com/"+location+"/"+search+"/"; while(titleHeaders.size()<=maxTitles ) { if(pageAccess) { crawler(url); }else { break; } } // for(String s:titleHeaders) { // System.out.println(s); // } System.out.println("==========================================================================================="); //"+details.size() +" System.out.println("Printing the complete data .............." +titleHeaders.size()+" .............. ..................... "); System.out.println("==========================================================================================="); } private static void disableCertificateValidation() { try { TrustManager[] trustAllCertificates = new TrustManager[] { new X509TrustManager() { public X509Certificate[] getAcceptedIssuers() { return null; } public void checkClientTrusted(X509Certificate[] certs, String authType) { } public void checkServerTrusted(X509Certificate[] certs, String authType) { } } }; SSLContext sslContext = SSLContext.getInstance("TLS"); sslContext.init(null, trustAllCertificates, new java.security.SecureRandom()); HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory()); HttpsURLConnection.setDefaultHostnameVerifier((hostname, session) -> true); } catch (Exception e) { e.printStackTrace(); } } }