從類似如下的文本文件中讀取出所有的姓名,並列印出重覆的姓名和重覆的次數,並按重覆次數排序 1,張三,28 2,李四,35 3,張三,28 4,王五,35 5,張三,28 6,李四,35 7,趙六,28 8,田七,35 上面代碼讀取的是docx的word文件,直接讀取會出現亂碼,因word中不僅有文本 ...
從類似如下的文本文件中讀取出所有的姓名,並列印出重覆的姓名和重覆的次數,並按重覆次數排序
1,張三,28
2,李四,35
3,張三,28
4,王五,35
5,張三,28
6,李四,35
7,趙六,28
8,田七,35
package com.swift; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.TreeMap; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.usermodel.XWPFDocument; public class IO_Sort_Select { public static void main(String[] args) throws IOException { /* * 從類似如下的文本文件中讀取出所有的姓名,並列印出重覆的姓名和重覆的次數,並按重覆次數排序 */ File file_s = new File("e:\\neck\\data.docx"); File dir = new File("e:\\neck"); if (!dir.exists() && dir.isDirectory()) { System.out.println("目錄不存在,即將創建..."); dir.mkdirs(); } File file_t = new File(dir, "data.txt"); List<String> list = new ArrayList<String>(); String text = readFromDocx(file_s); String[] hang = text.split("\\n"); for (int i = 0; i < hang.length; i++) { String[] lie = hang[i].split("\\,"); for (int j = 0; j < lie.length; j++) { if (j == 1) { list.add(lie[j]); } } } printList(list); Map<String, Integer> map = new TreeMap<String, Integer>(); for (String str : list) { map.put(str, 0); } for(String str:list) { if(map.containsKey(str)) { int num=map.get(str); num++; map.remove(str); map.put(str, num); } } List<Entry<String,Integer>> listMap=new ArrayList<Entry<String,Integer>>(map.entrySet()); Collections.sort(listMap, new Comparator<Entry<String,Integer>>(){ @Override public int compare(Entry<String, Integer> arg0, Entry<String, Integer> arg1) { int num=arg1.getValue()-arg0.getValue(); return num==0?arg0.getKey().compareTo(arg1.getKey()):num; } }); for(Entry<String, Integer> entry:listMap) { System.out.println("重覆的姓名是 :"+entry.getKey()+" 重覆的次數是:"+entry.getValue()); } } private static void printList(List<String> list) { for (String str : list) { System.out.println(str); } } public static String readFromDocx(File file) throws IOException { FileInputStream fis = new FileInputStream(file); XWPFDocument docx = new XWPFDocument(fis); XWPFWordExtractor extractor = new XWPFWordExtractor(docx); String text = extractor.getText(); return text; } }
上面代碼讀取的是docx的word文件,直接讀取會出現亂碼,因word中不僅有文本還有壓縮的其他屬性等內容,所以要使用poi的jar包進行解析
解析的jar包如下圖
這個應該比較全面了,也可以解析excel操作或寫入表格等
下載地址:
鏈接: https://pan.baidu.com/s/1htYPKLA 密碼: e36e
下麵是讀取doc的方法
package com.swift; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.TreeMap; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.usermodel.XWPFDocument; public class IO_Sort_Select { public static void main(String[] args) throws IOException { /* * 從類似如下的文本文件中讀取出所有的姓名,並列印出重覆的姓名和重覆的次數,並按重覆次數排序 */ File file_s = new File("e:\\neck\\data.doc"); File dir = new File("e:\\neck"); if (!dir.exists() && dir.isDirectory()) { System.out.println("目錄不存在,即將創建..."); dir.mkdirs(); } List<String> list = new ArrayList<String>(); String text = readFromDoc(file_s); String[] hang = text.split("\\r"); for (int i = 0; i < hang.length; i++) { System.out.println(hang.length); System.out.println(hang[i]); String[] lie = hang[i].split("\\,"); for (int j = 0; j < lie.length; j++) { if (j == 1) { list.add(lie[j]); } } } printList(list); Map<String, Integer> map = new TreeMap<String, Integer>(); for (String str : list) { map.put(str, 0); } for(String str:list) { if(map.containsKey(str)) { int num=map.get(str); num++; map.remove(str); map.put(str, num); } } List<Entry<String,Integer>> listMap=new ArrayList<Entry<String,Integer>>(map.entrySet()); Collections.sort(listMap, new Comparator<Entry<String,Integer>>(){ @Override public int compare(Entry<String, Integer> arg0, Entry<String, Integer> arg1) { int num=arg1.getValue()-arg0.getValue(); return num==0?arg0.getKey().compareTo(arg1.getKey()):num; } }); for(Entry<String, Integer> entry:listMap) { System.out.println("重覆的姓名是 :"+entry.getKey()+" 重覆的次數是:"+entry.getValue()); } } private static void printList(List<String> list) { for (String str : list) { System.out.println(str); } } public static String readFromDocx(File file) throws IOException { FileInputStream fis = new FileInputStream(file); XWPFDocument docx = new XWPFDocument(fis); XWPFWordExtractor extractor = new XWPFWordExtractor(docx); String text = extractor.getText(); return text; } public static String readFromDoc(File file) throws IOException { FileInputStream fis = new FileInputStream(file); HWPFDocument doc = new HWPFDocument(fis); String text = doc.getDocumentText(); return text; } }
doc讀取出來的文本還不能用"\\n"進行行分割,用的是"\\r"才行,要不然只能分割出1行,這點註意,要不會覺得程式莫名其妙不安自己的思路走