java 字符串词频统计实例代码

Java教程 2025-08-08

复制代码代码如下:

package com.gpdi.action;

import java.util.ArrayList;

import java.util.Collections;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

public class WordsStatistics {

class Obj {

int count ;

Obj(int count){

this.count = count;

}

}

public List< WordCount > statistics(String word) {

List< WordCount > rs = new ArrayList< WordCount >();

Map < String,Obj > map = new HashMap< String,Obj >();

if(word == null ) {

return null;

}

word = word.toLowerCase();

word = word.replaceAll("'s", "");

word = word.replaceAll(",", "");

word = word.replaceAll("-", "");

word = word.replaceAll("//.", "");

word = word.replaceAll("'", "");

word = word.replaceAll(":", "");

word = word.replaceAll("!", "");

word = word.replaceAll("/n", "");

String [] wordArray = word.split(" ");

for(String simpleWord : wordArray) {

simpleWord = simpleWord.trim();

if (simpleWord != null && !simpleWord.equalsIgnoreCase("")) {

Obj cnt = map.get(simpleWord);

if ( cnt!= null ) {

cnt.count++;

}else {

map.put(simpleWord, new Obj(1));

}

}

}

for(String key : map.keySet()) {

WordCount wd = new WordCount(key,map.get(key).count);

rs.add(wd);

}

Collections.sort(rs, new java.util.Comparator< WordCount >(){

@Override

public int compare(WordCount o1, WordCount o2) {

int result = 0 ;

if (o1.getCount()  > o2.getCount() ) {

result = -1;

}else if (o1.getCount() <  o2.getCount()) {

result = 1;

}else {

int strRs = o1.getWord().compareToIgnoreCase(o2.getWord());

if ( strRs  > 0 ) {

result = 1;

}else {

result = -1 ;

}

}

return result;

}

});

return rs;

}

public static void main(String args[]) {

String word = "Pinterest is might be aa ab aa ab marketer's dream - ths site is largely used to curate products " ;

WordsStatistics s = new WordsStatistics();

List< WordCount > rs = s.statistics(word);

for(WordCount word1 : rs) {

System.out.println(word1.getWord()+"*"+word1.getCount());

}

}

}