A simple URL validator
Below code for URL validator is just a sample & can be used for demo but not for productions -
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.ConnectException;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.UnknownHostException;
import javax.net.ssl.SSLHandshakeException;
/**
* @author nitin_agrawal
*
*/
public class ValidateURL {
public static void main(String[] args) {
String url = "http://www.nitinagrawal.com/";
try{
Proxy proxy =
new Proxy(Proxy.Type.HTTP, new InetSocketAddress("proxy.com", 8080));
// Use proxy when working behind some firewall or proxy & provide correct proxy address above
HttpURLConnection connection =
(HttpURLConnection)new URL(url).openConnection(proxy);
connection.setConnectTimeout(1);
connection.connect();
InputStream is = connection.getInputStream();
is.close();
System.out.println("URL is correct & accessible........");
connection.disconnect();
} catch(UnknownHostException uhe){
System.out.println("Given proxy is incorrect...........");
} catch(MalformedURLException mue){
System.out.println("URL is not correct.......");
} catch(ConnectException ce) {
System.out.println("Check if system is behind some proxy.......");
} catch(SocketException se) {
se.printStackTrace();
System.out.println("URL is not correct.......");
} catch(IllegalArgumentException iae) {
if(url.contains("http:") || url.contains("HTTP:"))
System.out.println("Used protocol http is not allowed. Check if the system is behind some proxy/firewall......");
else if(url.contains("https:") || url.contains("HTTPS:"))
System.out.println("Used protocol https is not allowed. Check if the system is behind some proxy/firewall......");
} catch(FileNotFoundException fe) {
System.out.println("Page for given URL doesn't exist......");
} catch(SSLHandshakeException sse){
System.out.println("Server doen't support https, please use http instead...........");
} catch(SocketTimeoutException ste) {
System.out.println("Check if system is behind some proxy/firewall..........");
} catch (IOException e) {
String msg = e.getMessage();
if(msg.contains("503"))
System.out.println("Either server is down or server is not available...........");
else if(msg.contains("403"))
System.out.println("Access denied by the used proxy, please check...........");
}
}
}
===============================================================================================
One way is suggested above, but when it comes to URL validation comes, then I have following questions -
a) When we are receiving the URL to consume then possibly we must be having internet connection.
b) What purpose can be solved by just checking the format of URL, if no such URL exists?
So I don't see the purpose to have various APIs or regular expressions to verify the format of URLs & it needs lot of testing & efforts to check the URL offline. My point is, why not just try to ping that URL directly to check its validity.
I tried below piece of code to verify the URL -
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Ping {
public static void main(String[] args) throws MalformedURLException, IOException {
String url = "https://stackoverflow.com/questions///";
// This block not working as expression is not correct.
/*
* Pattern pattern = Pattern.compile(
* "^(http:\\/\\/|https:\\/\\/)?(www.)?([a-zA-Z0-9]+).[a-zA-Z0-9]*.[a-z]{3}.?([a-z]+)?$"
* ); Matcher matcher = pattern.matcher(url); if(!matcher.find())
* System.out.println("URL not correct");
*/
int responseCode= 0;
try {
HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
connection.setRequestMethod("HEAD");
responseCode = connection.getResponseCode();
} catch(Exception e) {
System.out.println("URL not correct");
}
if (responseCode == 200) {
System.out.println("URL is correct");
} else if(responseCode == 301) {
System.out.println("URL is redirected");
} else if(responseCode/100 == 4) {
System.out.println("URL not correct");
}
}
}
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.ConnectException;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.UnknownHostException;
import javax.net.ssl.SSLHandshakeException;
/**
* @author nitin_agrawal
*
*/
public class ValidateURL {
public static void main(String[] args) {
String url = "http://www.nitinagrawal.com/";
try{
Proxy proxy =
new Proxy(Proxy.Type.HTTP, new InetSocketAddress("proxy.com", 8080));
// Use proxy when working behind some firewall or proxy & provide correct proxy address above
HttpURLConnection connection =
(HttpURLConnection)new URL(url).openConnection(proxy);
connection.setConnectTimeout(1);
connection.connect();
InputStream is = connection.getInputStream();
is.close();
System.out.println("URL is correct & accessible........");
connection.disconnect();
} catch(UnknownHostException uhe){
System.out.println("Given proxy is incorrect...........");
} catch(MalformedURLException mue){
System.out.println("URL is not correct.......");
} catch(ConnectException ce) {
System.out.println("Check if system is behind some proxy.......");
} catch(SocketException se) {
se.printStackTrace();
System.out.println("URL is not correct.......");
} catch(IllegalArgumentException iae) {
if(url.contains("http:") || url.contains("HTTP:"))
System.out.println("Used protocol http is not allowed. Check if the system is behind some proxy/firewall......");
else if(url.contains("https:") || url.contains("HTTPS:"))
System.out.println("Used protocol https is not allowed. Check if the system is behind some proxy/firewall......");
} catch(FileNotFoundException fe) {
System.out.println("Page for given URL doesn't exist......");
} catch(SSLHandshakeException sse){
System.out.println("Server doen't support https, please use http instead...........");
} catch(SocketTimeoutException ste) {
System.out.println("Check if system is behind some proxy/firewall..........");
} catch (IOException e) {
String msg = e.getMessage();
if(msg.contains("503"))
System.out.println("Either server is down or server is not available...........");
else if(msg.contains("403"))
System.out.println("Access denied by the used proxy, please check...........");
}
}
}
===============================================================================================
One way is suggested above, but when it comes to URL validation comes, then I have following questions -
a) When we are receiving the URL to consume then possibly we must be having internet connection.
b) What purpose can be solved by just checking the format of URL, if no such URL exists?
So I don't see the purpose to have various APIs or regular expressions to verify the format of URLs & it needs lot of testing & efforts to check the URL offline. My point is, why not just try to ping that URL directly to check its validity.
I tried below piece of code to verify the URL -
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Ping {
public static void main(String[] args) throws MalformedURLException, IOException {
String url = "https://stackoverflow.com/questions///";
// This block not working as expression is not correct.
/*
* Pattern pattern = Pattern.compile(
* "^(http:\\/\\/|https:\\/\\/)?(www.)?([a-zA-Z0-9]+).[a-zA-Z0-9]*.[a-z]{3}.?([a-z]+)?$"
* ); Matcher matcher = pattern.matcher(url); if(!matcher.find())
* System.out.println("URL not correct");
*/
int responseCode= 0;
try {
HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
connection.setRequestMethod("HEAD");
responseCode = connection.getResponseCode();
} catch(Exception e) {
System.out.println("URL not correct");
}
if (responseCode == 200) {
System.out.println("URL is correct");
} else if(responseCode == 301) {
System.out.println("URL is redirected");
} else if(responseCode/100 == 4) {
System.out.println("URL not correct");
}
}
}