[java]分词&词云

发布于 2019-06-02  27 次阅读


作业中需要用到词云,就尝试了下,还是比较简单的

分词和词频统计采用了KUMO,前端显示采用vue-wordcloud

maven配置

    <dependency>
      <groupId>com.kennycason</groupId>
      <artifactId>kumo-core</artifactId>
      <version>1.13</version>
    </dependency>
    <dependency>
      <groupId>com.kennycason</groupId>
      <artifactId>kumo-tokenizers</artifactId>
      <version>1.12</version>
    </dependency>

样例

该样例是根据论文的摘要和关键字生成词云

后端部分

package com.likole.aihw.utils;

import com.kennycason.kumo.WordFrequency;
import com.kennycason.kumo.nlp.FrequencyAnalyzer;
import com.kennycason.kumo.nlp.tokenizer.WhiteSpaceWordTokenizer;

import java.util.ArrayList;
import java.util.List;

/**
 * @author likole
 */
public class WordCloudUtils {

    public static List<WordFrequency> frequence(String abstractt, List<String> keywords){
        if(keywords==null){
            keywords=new ArrayList<>();
        }
        FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
        frequencyAnalyzer.setWordFrequenciesToReturn(600);
        frequencyAnalyzer.setMinWordLength(4);
        frequencyAnalyzer.setWordTokenizer(new WhiteSpaceWordTokenizer());
        List<String> lists=new ArrayList<>();
        lists.add(abstractt);
        for (int i=0;i<keywords.size();i++)
        {
            for (int j=0;j<keywords.size()-i+2;j++){
                lists.add(keywords.get(i));
            }
        }
        return frequencyAnalyzer.load(lists);
    }
}
    @At("/detail")
    public Object getDetail(@Param("wos") String wos) {
        NutMap re = new NutMap();
        Article article = dao.fetch(Article.class, wos);
        //author names
        if (article.getAuthorFullname() == null) {
            article.setAuthorFullNames(new ArrayList<String>());
        } else {
            article.setAuthorFullNames((List<String>) Json.fromJson(article.getAuthorFullname()));
        }
        //keywords
        if (article.getKeyword() == null) {
            article.setKeywords(new ArrayList<String>());
        } else {
            article.setKeywords((List<String>) Json.fromJson(article.getKeyword()));
        }
        //wordcloud
        List<WordFrequency> wordFrequencies= WordCloudUtils.frequence(article.getAbstractt(),article.getKeywords());
        List<WordCloudDto> wordCloudDtos=new ArrayList<>();
        for (WordFrequency wordFrequency:wordFrequencies){
            WordCloudDto wordCloudDto=new WordCloudDto();
            wordCloudDto.setName(wordFrequency.getWord());
            wordCloudDto.setValue(wordFrequency.getFrequency());
            wordCloudDtos.add(wordCloudDto);
        }
        return re.setv("code", 0).setv("article", article).setv("wc",wordCloudDtos);
    }
package com.likole.aihw.dto;

/**
 * @author likole
 */
public class WordCloudDto {
    private String name;

    private int value;

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public int getValue() {
        return value;
    }

    public void setValue(int value) {
        this.value = value;
    }
}

前端部分

template部分

              <wordcloud
                :data="wc"
                nameKey="name"
                valueKey="value"
                :showTooltip="true">
              </wordcloud>

js部分

import wordcloud from 'vue-wordcloud ,components中添加wordcloud ,data中定义wc:[]

        let self = this
        //获取文章的基本信息和词云信息
        axios.get('http://183.175.12.164:8082/article/detail?wos=' + wos)
          .then(function (response) {
            console.log(response)
            self.article = response.data.article
            self.wc = response.data.wc
          })
          .catch(function (error) {
            console.log(error)
          })

直接生成词云图片样例

package preprocess.spider.article;

import com.kennycason.kumo.CollisionMode;
import com.kennycason.kumo.WordFrequency;
import com.kennycason.kumo.bg.CircleBackground;
import com.kennycason.kumo.font.KumoFont;
import com.kennycason.kumo.font.scale.SqrtFontScalar;
import com.kennycason.kumo.nlp.FrequencyAnalyzer;
import com.kennycason.kumo.palette.LinearGradientColorPalette;

import java.awt.*;
import java.util.ArrayList;
import java.util.List;

/**
 * @author likole
 */
public class WordCloud {

    public static void generate(String wos,String abstractt){
        List<String> keywords=new ArrayList<>();
        generate(wos,abstractt,keywords);
    }

    public static void generate(String wos,String abstractt, List<String> keywords){
        FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
        frequencyAnalyzer.setWordFrequenciesToReturn(300);
        frequencyAnalyzer.setMinWordLength(4);
        //frequencyAnalyzer.setWordTokenizer(new WhiteSpaceWordTokenizer());
        List<String> lists=new ArrayList<>();
        lists.add(abstractt);
        for (int i=0;i<keywords.size();i++)
        {
            for (int j=0;j<keywords.size()-i+2;j++){
                lists.add(keywords.get(i));
            }
        }
        final List<WordFrequency> wordFrequencyList = frequencyAnalyzer.load(lists);
        Dimension dimension = new Dimension(600,600);
        com.kennycason.kumo.WordCloud wordCloud = new com.kennycason.kumo.WordCloud(dimension, CollisionMode.PIXEL_PERFECT);
        wordCloud.setPadding(2);
        java.awt.Font font = new java.awt.Font("STSong-Light", 2, 20);
        wordCloud.setColorPalette(new LinearGradientColorPalette(Color.RED, Color.BLUE, Color.GREEN, 30, 30));
        wordCloud.setKumoFont(new KumoFont(font));
        wordCloud.setBackgroundColor(new Color(255,255,255));
        wordCloud.setBackground(new CircleBackground(255));
        wordCloud.setFontScalar(new SqrtFontScalar(12, 45));
        wordCloud.build(wordFrequencyList);
        wordCloud.writeToFile("src/main/webapp/static/wordcloud/"+wos+".png");
    }
}