1.获取页面流
package com.cnki.base.utils;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
/**
* @author SongBin on 2018/11/1.
*/
public class HttpUtil {
/**
* 访问URL并拿到响应代码
* @param urlstr
* @return
* @throws IOException
*/
public static InputStream doGet(String urlstr) throws IOException {
URL url= new URL(urlstr);
HttpURLConnection conn= (HttpURLConnection) url.openConnection();
conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
InputStream inputStream= conn.getInputStream();
return inputStream;
}
}
2.将页面流转为字符串
package com.cnki.base.utils;
import java.io.IOException;
import java.io.InputStream;
/**
* @author SongBin on 2018/11/1.
*
* 将响应的InputStream转成String的代码
*/
public class StreamUtil {
public static String inputStreamToString(InputStream is, String charset) throws IOException {
byte[] bytes = new byte[1024];
int byteLength = 0;
StringBuffer sb = new StringBuffer();
while((byteLength = is.read(bytes)) != -1) {
sb.append(new String(bytes, 0, byteLength, charset));
}
return sb.toString();
}
}
3.刷流量方法
package com.cnki.base.utils;
import java.io.IOException;
import java.io.InputStream;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author SongBin on 2018/11/1.
*
* 刷csdn博客访问量
*/
public class AddCsdnBlogPV {
private Set<String> blogListPageUrls = new TreeSet<>();
private Set<String> blogUrls = new TreeSet<>();
public void visitBlog() throws IOException {
addBlogUrl();
for(String blogUrl : blogUrls) {
String artlUrl = myBlogUrl + blogUrl;
InputStream is = HttpUtil.doGet(artlUrl);
if (is != null) {
System.out.println(artlUrl + "访问成功");
}
is.close();
}
}
/**
* @throws IOException
* 加载所有的bolg地址
*/
public void addBlogUrl() throws IOException {
blogListPageUrls.add(myBlogUrl);
addBlogListPageUrl(myBlogUrl, blogListPageUrls);
for (String bolgListUrl : blogListPageUrls) {
addBlogUrl(bolgListUrl, blogUrls);
}
}
/**
* 通过下一页,遍历所有博客目录页面链接
* @param pageUrl
* @param pagelistUrls
* @throws IOException
*/
private String myBlogUrl = "http://www.songbin.top/";
/*<a href="index?ord=newest&pn=2" pageNo="2">*/
private String nextPagePanner = "<a href=\"index\\?ord=newest&pn=[0-9]{1,10}\" pageNo=\"[0-9]{1,10}\"> <i class=\"fa fa-angle-right\"></i> </a>"; //下一页的正则表达式
private String nextPageUrlPanner = "index\\?ord=newest&pn=[0-9]{1,10}";
public void addBlogListPageUrl(String pageUrl, Set<String> pagelistUrls) throws IOException {
InputStream is = HttpUtil.doGet(pageUrl);
String pageStr = StreamUtil.inputStreamToString(is, "UTF-8");
System.out.print(pageStr);
is.close();
Pattern nextPagePattern = Pattern.compile(nextPagePanner);
Matcher nextPagematcher = nextPagePattern.matcher(pageStr);
if (nextPagematcher.find()) {
nextPagePattern = Pattern.compile(nextPageUrlPanner);
nextPagematcher = nextPagePattern.matcher(nextPagematcher.group(0));
if (nextPagematcher.find()) {
pagelistUrls.add(myBlogUrl + nextPagematcher.group(0));
System.out.println("成功添加博客列表页面地址:" + myBlogUrl + nextPagematcher.group(0));
addBlogListPageUrl(myBlogUrl + nextPagematcher.group(0), pagelistUrls);
}
}
}
/**
* 添加搜索博客目录的博客链接
* @param blogListURL 博客目录地址
* @param artlUrls 存放博客访问地址的集合
* @throws IOException
*/
private String artlUrl = "/view/[0-9]{1,10}";//博客url的正则表达式
public void addBlogUrl(String blogListURL, Set<String> artlUrls) throws IOException {
InputStream is = HttpUtil.doGet(blogListURL);
String pageStr = StreamUtil.inputStreamToString(is, "UTF-8");
is.close();
Pattern pattern = Pattern.compile(artlUrl);
Matcher matcher = pattern.matcher(pageStr);
while (matcher.find()) {
String e = matcher.group(0);
System.out.println("成功添加博客地址:" + e);
artlUrls.add(e);
}
}
}
4.测试类
package com.cnki.utilstest;
import com.cnki.base.utils.AddCsdnBlogPV;
import java.io.IOException;
/**
* @author SongBin on 2018/11/1.
*/
public class TestUtils {
public static void main(String[] args){
AddCsdnBlogPV addBlogPv = new AddCsdnBlogPV();
try {
addBlogPv.visitBlog();
} catch (IOException e) {
e.printStackTrace();
}
}
}
更多内容请访问:IT源点
注意:本文归作者所有,未经作者允许,不得转载