記錄英語單詞時,想把英語和中文翻譯分別對齊,有些人寫代碼喜歡把變數按這種方式對齊。在網上沒搜到相關方法,於是自己試著寫代碼去實現,原本以為很簡單,寫的時候才發現有不少問題。先看效果: 普通的 對齊前: 對齊後: 發揮點創意 對齊前: 對齊後: 實現 實現的思路比較簡單,讀取文本文件,按正則分割,找出 ...
記錄英語單詞時,想把英語和中文翻譯分別對齊,有些人寫代碼喜歡把變數按這種方式對齊。在網上沒搜到相關方法,於是自己試著寫代碼去實現,原本以為很簡單,寫的時候才發現有不少問題。先看效果:
普通的
對齊前:
對齊後:
發揮點創意
對齊前:
對齊後:
實現
實現的思路比較簡單,讀取文本文件,按正則分割,找出最長的部分,補齊空格,輸出。
看起來相當簡單,花了一個多小時,就寫出來了,馬上運行,發現輸出一團糟,去數每個部分的字元數,個數是一樣的,網上一搜,原來跟字體有關係,好吧,那換個等寬的字體。換好字體後有些地方已經對齊了,有些地方還是沒對齊,發現是中文的問題,中文寬度與英文寬度不相同,於是首先根據正則去判斷字元是中文還是英文,然後自己實現計算字元長度的方法,在判斷中文字元上折騰了許久,因為標點符號等等都要考慮進去,反正是來來回回試了好久,對Unicode編碼範圍不熟悉,沒辦法。終於,好像都搞定了,反覆測試,突然發現第一行的對齊少了一個空格,尼瑪,這是怎麼回事啊,Debug發現第一行的最開始有一個奇怪的字元"\uFEFF",這他媽是什麼鬼,上網搜,發現是Unicode編碼的什麼鬼BOM頭,好吧,不管它是什麼鬼,直接把它去掉了……
反正是遇到了各種各樣的問題,越到後面心裡越沒底了,與字元集相關的問題實在是太頭疼了,而且我根本就沒去處理編碼的問題,所以文本的編碼需要和IDE的編碼保持一致,否則就會產生亂碼。我也就這樣算了,以下是JAVA代碼實現。
源碼
因為看過《重構》和《代碼整潔之道》,寫代碼時時刻想著要寫乾凈點,擴展性強點,經過反覆修改,最終自己覺得還行吧,當然,肯定有不少值得改進的地方,現在就這樣吧。
App.java
package textalign;
import java.io.IOException;
/**
* @author tingl
* @version 2017/9/27
*/
public class App {
public static void main(String[] args) {
long start = System.currentTimeMillis();
String filePath = "C:\\Users\\tingl\\Desktop\\Test2.txt";
TextAlign textAlign = new TextAlign(/*",|。|,|[.]|( {2,})|\t| +"*/);
if (args.length > 0) {
filePath = args[0];
}
try {
textAlign.align(filePath);
} catch (IOException e) {
e.printStackTrace();
}
System.out.println(System.currentTimeMillis() - start);
}
}
TextAlign.java
package textalign;
import java.io.IOException;
import java.util.List;
import java.util.regex.Pattern;
/**
* @author tingl
* @version 2017/9/27
*/
public class TextAlign {
private static final String CHINESE_CHARACTER = "[\u4e00-\u9fa5]|[\uFE30-\uFFA0]|[\u3000-\u303F]";
private static final Pattern CHINESE_CHARACTER_PATTERN = Pattern.compile(CHINESE_CHARACTER);
private static final int SEPARATE_SPACE_AMOUNT = 4;
private TextAlignFileUtil textAlignFileUtil;
private List<String[]> textLines;
private int[] longestBlockLengths;
public TextAlign() {
textAlignFileUtil = new TextAlignFileUtil();
}
public TextAlign(String spiltRegex) {
textAlignFileUtil = new TextAlignFileUtil(spiltRegex);
}
public void align(String filePath) throws IOException {
textLines = textAlignFileUtil.readToList(filePath);
initLongestBlockLengths();
fillTextLinesBySpaces();
textAlignFileUtil.write();
}
private void initLongestBlockLengths() {
int longestArrayLength = 0;
for (String[] blocks : textLines) {
if (blocks.length > longestArrayLength) {
longestArrayLength = blocks.length;
}
}
longestBlockLengths = new int[longestArrayLength];
fillLongestBlockLengths();
}
private void fillLongestBlockLengths() {
for (String[] blocks : textLines) {
if (blocks.length < 2) continue;
for (int i = 0; i < blocks.length; i++) {
int length = stringLengthFitWidth(blocks[i]);
if (length > longestBlockLengths[i]) {
longestBlockLengths[i] = length;
}
}
}
}
private int stringLengthFitWidth(String s) {
if (!CHINESE_CHARACTER_PATTERN.matcher(s).find()) {
return s.length();
}
int length = 0;
for (String c : s.split("")) {
if (CHINESE_CHARACTER_PATTERN.matcher(c).find()) {
length++;
}
length++;
}
return length;
}
private void fillTextLinesBySpaces() {
for (String[] blocks : textLines) {
for (int i = 0; i < blocks.length - 1; i++) {
String block = blocks[i];
int spaceAmount = longestBlockLengths[i] - stringLengthFitWidth(block) + SEPARATE_SPACE_AMOUNT;
blocks[i] = block + spaces(spaceAmount);
}
}
}
private String spaces(int spaceAmount) {
StringBuilder spaces = new StringBuilder();
for (int i = 0; i < spaceAmount; i++) {
spaces.append(" ");
}
return spaces.toString();
}
}
TextAlignFileUtil.java
package textalign;
import java.io.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* @author tingl
* @version 2017/9/27
*/
class TextAlignFileUtil {
private static final String FILENAME_POSTFIX = "_aligned";
private String spiltRegex = "( {2,})|\t";
private List<String[]> textLines;
private String outPath;
TextAlignFileUtil() {
}
TextAlignFileUtil(String spiltRegex) {
this.spiltRegex = spiltRegex;
}
List<String[]> readToList(String path) throws IOException {
File file = new File(path);
return readToList(file);
}
private List<String[]> readToList(File file) throws IOException {
getOutPath(file.getAbsolutePath());
BufferedReader reader = new BufferedReader(new FileReader(file));
textLines = new ArrayList<>();
String line;
while ((line = reader.readLine()) != null) {
textLines.add(removeEmptyAndTrim(line.split(spiltRegex)));
}
reader.close();
removeBomHead();
return textLines;
}
private void getOutPath(String srcPath) {
int dotPosition = srcPath.lastIndexOf(".");
outPath = srcPath.substring(0, dotPosition) + FILENAME_POSTFIX + srcPath.substring(dotPosition);
if (new File(outPath).exists()) {
getOutPath(outPath);
}
}
private String[] removeEmptyAndTrim(String[] src) {
for (int i = 0; i < src.length; i++) {
src[i] = src[i].trim();
}
List<String> dest = new ArrayList<>(Arrays.asList(src));
dest.removeIf(String::isEmpty);
return dest.toArray(new String[0]);
}
private void removeBomHead() {
String[] blocks = textLines.get(0);
blocks[0] = blocks[0].replace("\uFEFF", "");
}
void write() throws IOException {
BufferedWriter writer = new BufferedWriter(new FileWriter(outPath));
for (String[] blocks : textLines) {
writer.write(getLine(blocks));
writer.newLine();
writer.flush();
}
writer.close();
}
private String getLine(String[] blocks) {
StringBuilder sb = new StringBuilder();
for (String block : blocks) {
sb.append(block);
}
return sb.toString();
}
}