package org.springblade.mdm.utils;
|
|
import org.apache.tika.Tika;
|
import org.mozilla.universalchardet.UniversalDetector;
|
import org.springblade.core.tool.utils.Charsets;
|
|
import java.io.*;
|
import java.nio.charset.Charset;
|
import java.nio.charset.StandardCharsets;
|
import java.util.ArrayList;
|
import java.util.List;
|
|
public class FileContentUtil {
|
|
/**
|
* 判断是否文本类型
|
* @param inputStream
|
* @return
|
*/
|
public static boolean isTextFile(InputStream inputStream) {
|
Tika tika = new Tika();
|
try {
|
String mimeType = tika.detect(inputStream);
|
//String mimeType = tika.detect(file);
|
return mimeType.startsWith("text/")
|
|| mimeType.equals("application/xml")
|
|| mimeType.equals("application/json");
|
} catch (IOException e) {
|
e.printStackTrace();
|
return false;
|
}
|
}
|
|
/**
|
* 在文本的输入流中插入一行文字
|
* @param ins 输入流
|
* @param lineIndexToInsert 插入的位置0based
|
* @param textToInsert 插入的文本
|
* @return 插入文本后的输入流
|
* @throws IOException 异常
|
*/
|
public static InputStream insertLine(InputStream ins, int lineIndexToInsert,String textToInsert) throws IOException {
|
byte[] bytes = ins.readAllBytes();
|
ByteArrayInputStream byteStream = new ByteArrayInputStream(bytes);
|
Charset charset = Charsets.charset(detectFromInputStream(byteStream));
|
|
byteStream.reset();//重置,使继续可用
|
return insertLine(byteStream, lineIndexToInsert,textToInsert, charset);
|
}
|
public static InputStream insertLine(InputStream ins, int lineIndexToInsert,String textToInsert,Charset charset) throws IOException {
|
List<String> lines = new ArrayList<>();
|
try (
|
InputStreamReader r = new InputStreamReader(ins,charset);//, charset
|
BufferedReader reader = new BufferedReader(r);) {
|
|
String line;
|
while ((line = reader.readLine()) != null) {
|
lines.add(line);
|
}
|
}
|
System.out.println("line="+lines);
|
// 插入新行
|
if (lines.size() < lineIndexToInsert) {
|
lines.add(textToInsert);
|
} else {
|
lines.add(lineIndexToInsert, textToInsert);
|
}
|
|
return convert(lines,charset);
|
}
|
|
public static InputStream convert(List<String> lines,Charset charset) {
|
// 使用系统行分隔符连接所有行
|
String content = String.join(System.lineSeparator(), lines);
|
// 转换为UTF-8字节流
|
return new ByteArrayInputStream(content.getBytes(charset));
|
}
|
|
public static String detect(byte[] content) {
|
UniversalDetector detector = new UniversalDetector(null);
|
//开始给一部分数据,让学习一下啊,官方建议是1000个byte左右(当然这1000个byte你得包含中文之类的)
|
detector.handleData(content, 0, content.length);
|
//识别结束必须调用这个方法
|
detector.dataEnd();
|
//神奇的时刻就在这个方法了,返回字符集编码。
|
return detector.getDetectedCharset();
|
}
|
|
public static String detectFromInputStream(InputStream inputStream) {
|
UniversalDetector detector = new UniversalDetector(null);
|
//开始给一部分数据,让学习一下啊,官方建议是1000个byte左右(当然这1000个byte你得包含中文之类的)
|
byte[] buffer = new byte[2048];
|
try {
|
int actRead = inputStream.read(buffer);
|
|
detector.handleData(buffer, 0, actRead);
|
|
//识别结束必须调用这个方法
|
detector.dataEnd();
|
} catch (IOException e) {
|
throw new RuntimeException(e);
|
}
|
//神奇的时刻就在这个方法了,返回字符集编码。
|
return detector.getDetectedCharset();
|
}
|
|
/**
|
* 读取文件某行
|
* @param inputStream
|
* @param lineIndex 行索引 0based
|
* @return 行文本
|
* @throws IOException
|
*/
|
public static String readLineAt(InputStream inputStream,int lineIndex) throws IOException {
|
String lineText = "";
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) {
|
// 跳过第一行
|
int current = 0;
|
while(current<=lineIndex && lineText!=null) {
|
lineText = reader.readLine();
|
current++;
|
}
|
}
|
return lineText;
|
}
|
|
/**
|
* 从输入流获取文本
|
* @param ins 束流
|
* @return 文本字符
|
* @throws IOException
|
*/
|
public static String getContentFromStream(InputStream ins) throws IOException {
|
byte[] bytes = ins.readAllBytes();
|
ByteArrayInputStream byteStream = new ByteArrayInputStream(bytes);
|
Charset charset = Charsets.charset(detectFromInputStream(byteStream));
|
byteStream.reset();//重置,使继续可用
|
|
return new String(bytes, charset);
|
}
|
}
|