package org.springblade.mdm.utils;
|
|
import org.apache.tika.Tika;
|
import org.apache.tika.config.TikaConfig;
|
import org.apache.tika.detect.CompositeDetector;
|
import org.apache.tika.detect.DefaultDetector;
|
import org.apache.tika.detect.Detector;
|
import org.apache.tika.detect.TextDetector;
|
import org.apache.tika.mime.MimeType;
|
import org.apache.tika.mime.MimeTypeException;
|
import org.apache.tika.mime.MimeTypes;
|
import org.mozilla.universalchardet.UniversalDetector;
|
import org.springblade.core.tool.utils.Charsets;
|
|
import java.io.*;
|
import java.nio.charset.Charset;
|
import java.nio.charset.StandardCharsets;
|
import java.nio.file.Files;
|
import java.nio.file.Path;
|
import java.util.ArrayList;
|
import java.util.List;
|
|
public class FileContentUtil {
|
|
/**
|
* 判断是否文本类型
|
* @param inputStream
|
* @return
|
*/
|
public static boolean isTextFile(InputStream inputStream) {
|
MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes();
|
Tika tika = new Tika(mimeTypes);
|
|
try {
|
String mimeType = tika.detect(inputStream);
|
//String mimeType = tika.detect(file);
|
return mimeType.startsWith("text/")
|
|| mimeType.equals("application/xml")
|
|| mimeType.equals("application/json");
|
} catch (IOException e) {
|
e.printStackTrace();
|
return false;
|
}
|
}
|
|
/**
|
* 在文本的输入流中插入一行文字
|
* @param ins 输入流
|
* @param lineIndexToInsert 插入的位置0based
|
* @param textToInsert 插入的文本
|
* @return 插入文本后的输入流
|
* @throws IOException 异常
|
*/
|
public static InputStream insertLine(InputStream ins, int lineIndexToInsert,String textToInsert) throws IOException {
|
byte[] bytes = ins.readAllBytes();
|
ByteArrayInputStream byteStream = new ByteArrayInputStream(bytes);
|
Charset charset = Charsets.charset(detectFromInputStream(byteStream));
|
|
byteStream.reset();//重置,使继续可用
|
return insertLine(byteStream, lineIndexToInsert,textToInsert, charset);
|
}
|
public static InputStream insertLine(InputStream ins, int lineIndexToInsert,String textToInsert,Charset charset) throws IOException {
|
List<String> lines = new ArrayList<>();
|
try (
|
InputStreamReader r = new InputStreamReader(ins,charset);//, charset
|
BufferedReader reader = new BufferedReader(r);) {
|
|
String line;
|
while ((line = reader.readLine()) != null) {
|
lines.add(line);
|
}
|
}
|
System.out.println("line="+lines);
|
// 插入新行
|
if (lines.size() < lineIndexToInsert) {
|
lines.add(textToInsert);
|
} else {
|
lines.add(lineIndexToInsert, textToInsert);
|
}
|
|
return convert(lines,charset);
|
}
|
|
public static InputStream convert(List<String> lines,Charset charset) {
|
// 使用系统行分隔符连接所有行
|
String content = String.join(System.lineSeparator(), lines);
|
// 转换为UTF-8字节流
|
return new ByteArrayInputStream(content.getBytes(charset));
|
}
|
|
public static String detect(byte[] content) {
|
UniversalDetector detector = new UniversalDetector(null);
|
//开始给一部分数据,让学习一下啊,官方建议是1000个byte左右(当然这1000个byte你得包含中文之类的)
|
detector.handleData(content, 0, content.length);
|
//识别结束必须调用这个方法
|
detector.dataEnd();
|
//神奇的时刻就在这个方法了,返回字符集编码。
|
return detector.getDetectedCharset();
|
}
|
|
public static String detectFromInputStream(InputStream inputStream) {
|
UniversalDetector detector = new UniversalDetector(null);
|
//开始给一部分数据,让学习一下啊,官方建议是1000个byte左右(当然这1000个byte你得包含中文之类的)
|
byte[] buffer = new byte[2048];
|
try {
|
int actRead = inputStream.read(buffer);
|
detector.handleData(buffer, 0, actRead);
|
//识别结束必须调用这个方法
|
detector.dataEnd();
|
detector.reset();
|
} catch (IOException e) {
|
throw new RuntimeException(e);
|
}
|
//神奇的时刻就在这个方法了,返回字符集编码。
|
return detector.getDetectedCharset();
|
}
|
|
/**
|
* 读取文件某行
|
* @param inputStream
|
* @param lineIndex 行索引 0based
|
* @return 行文本
|
* @throws IOException
|
*/
|
public static String readLineAt(InputStream inputStream,int lineIndex) throws IOException {
|
byte[] bytes = inputStream.readAllBytes();
|
ByteArrayInputStream byteStream = new ByteArrayInputStream(bytes);
|
Charset charset = Charsets.charset(detectFromInputStream(byteStream));
|
byteStream.reset();//重置,使继续可用
|
|
String lineText = "";
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(byteStream,charset))) {
|
// 跳过第一行
|
int current = 0;
|
while(current<=lineIndex && lineText!=null) {
|
lineText = reader.readLine();
|
current++;
|
}
|
}
|
return lineText;
|
}
|
|
/**
|
* 从输入流获取文本
|
* @param ins 束流
|
* @return 文本字符
|
* @throws IOException
|
*/
|
public static String getContentFromStream(InputStream ins) throws IOException {
|
byte[] bytes = ins.readAllBytes();
|
ByteArrayInputStream byteStream = new ByteArrayInputStream(bytes);
|
Charset charset = Charsets.charset(detectFromInputStream(byteStream));
|
byteStream.reset();//重置,使继续可用
|
|
return new String(bytes, charset);
|
}
|
|
/**
|
* 替换文件中的某一行
|
* @param ins 输入流
|
* @param replaceWith 替换的文本
|
* @param lineIndex 行索引 0 based
|
*/
|
public static InputStream replaceAtLine(InputStream ins, int lineIndex,String replaceWith) throws IOException {
|
byte[] bytes = ins.readAllBytes();
|
ByteArrayInputStream byteStream = new ByteArrayInputStream(bytes);
|
Charset charset = Charsets.charset(detectFromInputStream(byteStream));
|
byteStream.reset();
|
|
Path tempFile = Files.createTempFile("temppro"+System.currentTimeMillis(), ".tmp");
|
|
try (InputStreamReader isr = new InputStreamReader(byteStream, charset);
|
BufferedReader reader = new BufferedReader(isr);
|
BufferedWriter writer = Files.newBufferedWriter(tempFile,charset)) {//
|
|
int currentLine = 0;
|
String line;
|
|
while ((line = reader.readLine()) != null) {
|
// 如果是目标行,写入新内容
|
if (currentLine == lineIndex) {
|
writer.write(replaceWith);
|
} else {
|
writer.write(line);
|
}
|
writer.newLine();
|
currentLine++;
|
}
|
|
}
|
return new ByteArrayInputStream(Files.newInputStream(tempFile).readAllBytes());
|
}
|
|
public static List<String> readFirstNLines(InputStream inputStream, int n) throws IOException {
|
List<String> lines = new ArrayList<>();
|
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) {
|
String line;
|
int count = 0;
|
|
while ((line = reader.readLine()) != null && count < n) {
|
lines.add(line);
|
count++;
|
}
|
}
|
|
return lines;
|
}
|
}
|