package org.springblade.mdm.utils; import org.apache.tika.Tika; import org.apache.tika.config.TikaConfig; import org.apache.tika.detect.CompositeDetector; import org.apache.tika.detect.DefaultDetector; import org.apache.tika.detect.Detector; import org.apache.tika.detect.TextDetector; import org.apache.tika.mime.MimeType; import org.apache.tika.mime.MimeTypeException; import org.apache.tika.mime.MimeTypes; import org.mozilla.universalchardet.UniversalDetector; import org.springblade.core.tool.utils.Charsets; import java.io.*; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; public class FileContentUtil { /** * 判断是否文本类型 * @param inputStream * @return */ public static boolean isTextFile(InputStream inputStream) { MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(); Tika tika = new Tika(mimeTypes); try { String mimeType = tika.detect(inputStream); //String mimeType = tika.detect(file); return mimeType.startsWith("text/") || mimeType.equals("application/xml") || mimeType.equals("application/json"); } catch (IOException e) { e.printStackTrace(); return false; } } /** * 在文本的输入流中插入一行文字 * @param ins 输入流 * @param lineIndexToInsert 插入的位置0based * @param textToInsert 插入的文本 * @return 插入文本后的输入流 * @throws IOException 异常 */ public static InputStream insertLine(InputStream ins, int lineIndexToInsert,String textToInsert) throws IOException { byte[] bytes = ins.readAllBytes(); ByteArrayInputStream byteStream = new ByteArrayInputStream(bytes); Charset charset = Charsets.charset(detectFromInputStream(byteStream)); byteStream.reset();//重置,使继续可用 return insertLine(byteStream, lineIndexToInsert,textToInsert, charset); } public static InputStream insertLine(InputStream ins, int lineIndexToInsert,String textToInsert,Charset charset) throws IOException { List lines = new ArrayList<>(); try ( InputStreamReader r = new InputStreamReader(ins,charset);//, charset BufferedReader reader = new BufferedReader(r);) { String line; while ((line = reader.readLine()) != null) { lines.add(line); } } System.out.println("line="+lines); // 插入新行 if (lines.size() < lineIndexToInsert) { lines.add(textToInsert); } else { lines.add(lineIndexToInsert, textToInsert); } return convert(lines,charset); } public static InputStream convert(List lines,Charset charset) { // 使用系统行分隔符连接所有行 String content = String.join(System.lineSeparator(), lines); // 转换为UTF-8字节流 return new ByteArrayInputStream(content.getBytes(charset)); } public static String detect(byte[] content) { UniversalDetector detector = new UniversalDetector(null); //开始给一部分数据,让学习一下啊,官方建议是1000个byte左右(当然这1000个byte你得包含中文之类的) detector.handleData(content, 0, content.length); //识别结束必须调用这个方法 detector.dataEnd(); //神奇的时刻就在这个方法了,返回字符集编码。 return detector.getDetectedCharset(); } public static String detectFromInputStream(InputStream inputStream) { UniversalDetector detector = new UniversalDetector(null); //开始给一部分数据,让学习一下啊,官方建议是1000个byte左右(当然这1000个byte你得包含中文之类的) byte[] buffer = new byte[2048]; try { int actRead = inputStream.read(buffer); detector.handleData(buffer, 0, actRead); //识别结束必须调用这个方法 detector.dataEnd(); detector.reset(); } catch (IOException e) { throw new RuntimeException(e); } //神奇的时刻就在这个方法了,返回字符集编码。 return detector.getDetectedCharset(); } /** * 读取文件某行 * @param inputStream * @param lineIndex 行索引 0based * @return 行文本 * @throws IOException */ public static String readLineAt(InputStream inputStream,int lineIndex) throws IOException { byte[] bytes = inputStream.readAllBytes(); ByteArrayInputStream byteStream = new ByteArrayInputStream(bytes); Charset charset = Charsets.charset(detectFromInputStream(byteStream)); byteStream.reset();//重置,使继续可用 String lineText = ""; try (BufferedReader reader = new BufferedReader(new InputStreamReader(byteStream,charset))) { // 跳过第一行 int current = 0; while(current<=lineIndex && lineText!=null) { lineText = reader.readLine(); current++; } } return lineText; } /** * 从输入流获取文本 * @param ins 束流 * @return 文本字符 * @throws IOException */ public static String getContentFromStream(InputStream ins) throws IOException { byte[] bytes = ins.readAllBytes(); ByteArrayInputStream byteStream = new ByteArrayInputStream(bytes); Charset charset = Charsets.charset(detectFromInputStream(byteStream)); byteStream.reset();//重置,使继续可用 return new String(bytes, charset); } /** * 替换文件中的某一行 * @param ins 输入流 * @param replaceWith 替换的文本 * @param lineIndex 行索引 0 based */ public static InputStream replaceAtLine(InputStream ins, int lineIndex,String replaceWith) throws IOException { byte[] bytes = ins.readAllBytes(); ByteArrayInputStream byteStream = new ByteArrayInputStream(bytes); Charset charset = Charsets.charset(detectFromInputStream(byteStream)); byteStream.reset(); Path tempFile = Files.createTempFile("temppro"+System.currentTimeMillis(), ".tmp"); try (InputStreamReader isr = new InputStreamReader(byteStream, charset); BufferedReader reader = new BufferedReader(isr); BufferedWriter writer = Files.newBufferedWriter(tempFile,charset)) {// int currentLine = 0; String line; while ((line = reader.readLine()) != null) { // 如果是目标行,写入新内容 if (currentLine == lineIndex) { writer.write(replaceWith); } else { writer.write(line); } writer.newLine(); currentLine++; } } return new ByteArrayInputStream(Files.newInputStream(tempFile).readAllBytes()); } public static List readFirstNLines(InputStream inputStream, int n) throws IOException { List lines = new ArrayList<>(); try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) { String line; int count = 0; while ((line = reader.readLine()) != null && count < n) { lines.add(line); count++; } } return lines; } }