A large batch of data was needed some time ago. DBA provided CVS files, but each CVS file is several GB in size, so it is loaded directly. The database is very slow and it will cause insufficient memory. In order to realize this function, a program to quickly split files was written.
import org.apache.log4j.LogManager;import org.apache.log4j.Logger; import java.io.*;import java.util.*;import java.util.concurrent.*; public class FileSplitUtil { private final static Logger log = LogManager.getLogger(FileSplitUtil.class); private static final long originFileSize = 1024 * 1024 * 100;// 100M private static final int blockFileSize = 1024 * 1024 * 64;// To prevent Chinese garbled, you must take 2 to the N power/** * CVS file separator */ private static final char cvsSeparator = '^'; public static void main(String args[]){ long start = System.currentTimeMillis(); try { String fileName = "D://csvtest//aa.csv"; File sourceFile = new File(fileName); if (sourceFile.length() >= originFileSize) { String cvsFileName = fileName.replaceAll("////", "/"); FileSplitUtil fileSplitUtil = new FileSplitUtil(); List<String> parts=fileSplitUtil.splitBySize(cvsFileName, blockFileSize); for(String part:parts){ System.out.println("partName is:"+part); } } System.out.println("Total file length"+sourceFile.length()+", time to split the file:" + (System.currentTimeMillis() - start) + "ms."); }catch (Exception e){ log.info(e.getStackTrace()); } } /** * Split file* * @param fileName The complete file name to be split* @param byteSize Split by how many bytes* @return List of split file names*/ public List<String> splitBySize(String fileName, int byteSize) throws IOException, InterruptedException { List<String> parts = new ArrayList<String>(); File file = new File(fileName); int count = (int) Math.ceil(file.length() / (double) byteSize); int countLen = (count + "").length(); RandomAccessFile raf = new RandomAccessFile(fileName, "r"); long totalLen = raf.length(); CountDownLatch latch = new CountDownLatch(count); for (int i = 0; i < count; i++) { String partFileName = file.getPath() + "." + leftPad((i + 1) + "", countLen, '0') + ".cvs"; int readSize=byteSize; long startPos=(long)i * byteSize; long nextPos=(long)(i+1) * byteSize; if(nextPos>totalLen){ readSize= (int) (totalLen-startPos); } new SplitRunnable(readSize, startPos, partFileName, file, latch).run(); parts.add(partFileName); } latch.await();//Waiting for all files to be written//The lines may be cut during cutting, and all split files may be processed, mergeRow(parts); return parts; } /** * Segment processing Runnable * * @author supeidong */ private class SplitRunnable implements Runnable { int byteSize; String partFileName; File originFile; long startPos; CountDownLatch latch; public SplitRunnable(int byteSize, long startPos, String partFileName, File originFile, CountDownLatch latch) { this.startPos = startPos; this.byteSize = byteSize; this.partFileName = partFileName; this.originFile = originFile; this.latch = latch; } public void run() { RandomAccessFile rFile; OutputStream os; try { rFile = new RandomAccessFile(originFile, "r"); byte[] b = new byte[byteSize]; rFile.seek(startPos);// Move the pointer to the beginning of each "segment" int s = rFile.read(b); os = new FileOutputStream(partFileName); os.write(b, 0, s); os.flush(); os.close(); latch.countDown(); } catch (IOException e) { log.error(e.getMessage()); latch.countDown(); } } } /** * Merge cut lines* * @param parts */ private void mergeRow(List<String> parts) { List<PartFile> partsFiles = new ArrayList<PartFile>(); try { //Assemble the split table object for (int i=0;i<parts.size();i++) { String partFileName=parts.get(i); File splitFileTemp = new File(partFileName); if (splitFileTemp.exists()) { PartFile partFile = new PartFile(); BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream(splitFileTemp),"gbk")); String firstRow = reader.readLine(); String secondRow = reader.readLine(); String endRow = readLastLine(partFileName); partFile.setPartFileName(partFileName); partFile.setFirstRow(firstRow); partFile.setEndRow(endRow); if(i>=1){ String prePartFile=parts.get(i - 1); String preEndRow = readLastLine(prePartFile); partFile.setFirstIsFull(getCharCount(firstRow+preEndRow)>getCharCount(secondRow)); } partFiles.add(partFile); reader.close(); } } //Write the lines that need to be merged for (int i = 0; i < partFiles.size() - 1; i++) { PartFile partFile = partFiles.get(i); PartFile partFileNext = partFiles.get(i + 1); StringBuilder sb = new StringBuilder(); if (partFileNext.getFirstIsFull()) { sb.append("/r/n"); sb.append(partFileNext.getFirstRow()); } else { sb.append(partFileNext.getFirstRow()); } writeLastLine(partFile.getPartFileName(),sb.toString()); } } catch (Exception e) { log.error(e.getMessage()); } } /** * Get the number of times a character appears* @param s * @return */ private int getCharCount(String s) { int count = 0; for (int i = 0; i < s.length(); i++) { if (s.charAt(i) == cvsSeparator) { count++; } } return count; } /** * Use BufferedInputStream to read the number of file lines* * @param filename * @return */ public int getFileRow(String filename) throws IOException { InputStream is = new BufferedInputStream(new FileInputStream(filename)); byte[] c = new byte[1024]; int count = 0; int readChars = 0; while ((readChars = is.read(c)) != -1) { for (int i = 0; i < readChars; ++i) { if (c[i] == '/n') ++count; } } is.close(); return count; } /** * Read the last line of data* @param filename * @return * @throws IOException */ private String readLastLine(String filename) throws IOException { // Use RandomAccessFile , Find the last line of data from behind RandomAccessFile raf = new RandomAccessFile(filename, "r"); long len = raf.length(); String lastLine = ""; if(len!=0L) { long pos = len - 1; while (pos > 0) { pos--; raf.seek(pos); if (raf.readByte() == '/n') { lastLine = raf.readLine(); lastLine=new String(lastLine.getBytes("8859_1"), "gbk"); break; } } } } raf.close(); return lastLine; } /** * Modify the last line of data* @param fileName * @param lastString * @return * @throws IOException */ private void writeLastLine(String fileName,String lastString){ try { // Open a random access file stream and read and write according to the read and write method RandomAccessFile randomFile = new RandomAccessFile(fileName, "rw"); // File length, number of bytes long fileLength = randomFile.length(); // Move the write file pointer to the end of the file. randomFile.seek(fileLength); //Gbk must be added here, otherwise garbled randomFile.write(lastString.getBytes("gbk")); randomFile.close(); } catch (IOException e) { log.error(e.getMessage()); } } /** * Left fill* * @param str * @param length * @param ch * @return */ public static String leftPad(String str, int length, char ch) { if (str.length() >= length) { return str; } char[] chs = new char[length]; Arrays.fill(chs, ch); char[] src = str.toCharArray(); System.arraycopy(src, 0, chs, length - src.length, src.length); return new String(chs); } /** * Merge file line internal class*/ class PartFile { private String partFileName; private String firstRow; private String endRow; private boolean firstIsFull; public String getPartFileName() { return partFileName; } public void setPartFileName(String partFileName) { this.partFileName = partFileName; } public String getFirstRow() { return firstRow; } public void setFirstRow(String firstRow) { this.firstRow = firstRow; } public String getEndRow() { return endRow; } public void setEndRow(String endRow) { this.endRow = endRow; } public boolean getFirstIsFull() { return firstIsFull; } public void setFirstIsFull(boolean firstIsFull) { this.firstIsFull = firstIsFull; } } }The above is all about this article, I hope it will be helpful for everyone to learn Java programming.