Rで解析：Shinyで習作「PubMedクラウド」

Shinyの習作です。「easyPubMed」パッケージなどを利用して論文タイトルからワードクラウドを作成する例です。

Shinyアプリの概要はネットで多く紹介されているので省略します。紹介コマンドをコピー後にRStudioを利用し「ui.R」と「server.R」を用意、メニューのRun Appをクリックすると作動します。

RStudioのversion 1.0.136。windows 10のR version 3.3.2で動作を確認しています。

コマンドの紹介

詳細はコマンド、各パッケージのヘルプを確認してください。

ui.Rの内容

#ui.R
#可変レイアウト
shinyUI(fluidPage(
  #タイトルを指定
  titlePanel("PubMedでWordCloud"),
  #1行目:可変レイアウト
  fluidRow(plotOutput("WordCloud",  width = "100%")),
  #2行目:可変レイアウト
  fluidRow(
    #1列目
    column(2, textInput("QueryWord", "クエリ", "クエリを入力"),
           textInput("GetPaper", "取得論文数", "100"),
           actionButton("GoWordCloud", "ワードクラウドを作成")),
    #2列目
    column(10, sliderInput("CountWord", "単語出現数",
                           min = 1, max = 20, value = 5, step = 1))
  )
)
)

server.Rの内容

#server.R
#&#12497;&#12483;&#12465;&#12540;&#12472;&#12398;&#35501;&#12415;&#36796;&#12415;
if (!require("easyPubMed")) {
  install.packages("easyPubMed")}
if (!require("tm")) {
  install.packages("tm")}
if (!require("wordcloud")) {
  install.packages("wordcloud")}
if (!require("tcltk")) {
  install.packages("tcltk")}

shinyServer(function(input, output) {
  
  output$WordCloud <- renderPlot({
    #GoWordCloud&#12508;&#12479;&#12531;&#12364;&#25276;&#12373;&#12428;&#12427;&#12414;&#12391;&#21205;&#20316;&#12375;&#12394;&#12356;
    input$GoWordCloud
    #isolate&#12364;&#12509;&#12452;&#12531;&#12488;
    isolate(
      if("&#12463;&#12456;&#12522;&#12434;&#20837;&#21147;" == input$QueryWord){
        
      }else{
        
        #&#32080;&#26524;&#12398;&#21462;&#24471;
        ALL_Result <- get_pubmed_ids(input$QueryWord)
        GetResult <- fetch_pubmed_data(ALL_Result,
                                          retmax = input$GetPaper, format = "xml")
        
        #&#35542;&#25991;&#12479;&#12452;&#12488;&#12523;&#12434;&#21462;&#24471;
        TitleData <- unlist(xpathApply(GetResult, "//ArticleTitle", xmlValue))
        
        ###&#12486;&#12461;&#12473;&#12488;&#12510;&#12452;&#12491;&#12531;&#12464;&#12398;&#35373;&#23450;&#12289;&#12362;&#22909;&#12415;&#12395;&#21512;&#12431;&#12379;&#12390;&#12367;&#12384;&#12373;&#12356;#####
        CorMaster <- Corpus(DataframeSource(data.frame(TitleData))) #&#12467;&#12540;&#12497;&#12473;&#12398;&#20316;&#25104;
        CorMaster <- tm_map(CorMaster, stripWhitespace) #&#31354;&#30333;&#12398;&#21066;&#38500;
        CorMaster <- tm_map(CorMaster, removeNumbers) #&#25968;&#23383;&#12398;&#21066;&#38500;
        CorMaster <- tm_map(CorMaster, removePunctuation) #&#21477;&#35501;&#28857;&#12398;&#21066;&#38500;
        CorMaster <- tm_map(CorMaster, removeWords, stopwords("english")) #and, or&#31561;&#12398;&#21066;&#38500;
        TermVec <- DocumentTermMatrix(CorMaster) #&#12479;&#12540;&#12512;&#12510;&#12488;&#12522;&#12483;&#12463;&#12473;&#12398;&#38598;&#35336;
        ########
        
        ###&#21336;&#35486;&#35299;&#26512;&#32080;&#26524;&#12434;&#12487;&#12540;&#12479;&#12501;&#12524;&#12540;&#12512;&#21270;#####
        #&#21336;&#35486;&#12398;&#20986;&#29694;&#29575;&#12434;&#38598;&#35336;
        AnalyticsAllWords <- as.data.frame(apply(TermVec, 2, sum))
        AnalyticsAllWords <- cbind(rownames(AnalyticsAllWords), AnalyticsAllWords)
        #&#38500;&#21435;&#12375;&#12383;&#12356;&#21336;&#35486;&#12434;&#35373;&#23450;
        AnalyticsAllWords <- subset(AnalyticsAllWords,
                                    !(AnalyticsAllWords[, 1] %in% c("the", "this", "can", "thus", "these")))
        ########
        
      })
    #&#12363;&#12394;&#12426;&#24375;&#24341;&#12395;&#20966;&#29702;
    if(exists("AnalyticsAllWords") == TRUE){
      AnalyticsWords <- subset(AnalyticsAllWords, AnalyticsAllWords[, 2] >= input$CountWord)
      #&#12503;&#12525;&#12483;&#12488;
      wordcloud(AnalyticsWords[, 1], AnalyticsWords[, 2], scale = c(8, .1),
                random.order = FALSE, rot.per = .10, colors = brewer.pal(8, "Dark2"))
    }else{
      NULL
    }
  })
})