#h/t http://emelineliu.com/2016/01/10/bible1/

library(ggplot2)
library(tm)
library(wordcloud)

cname <- ("C:/Users/George/Google Drive/R Templates/Gospels corpus")

corpus <- Corpus(DirSource(cname))

#Cleaning
corpus <- tm_map(corpus, tolower)
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, removeWords, c("a","the","an","that","and"))

#convert to a plain text file
corpus <- tm_map(corpus, PlainTextDocument)

#Create a term document matrix
tdm1 <- TermDocumentMatrix(corpus)

m1 <- as.matrix(tdm1)
word.freq <- sort(rowSums(m1), decreasing=T)
word.freq<-word.freq[1:100]

wordcloud(words=names(word.freq), freq=word.freq,
          random.order=F, scale=c(5, .1), colors=brewer.pal(10, "PRGn"))



setwd("C:/Users/George/Google Drive/R Templates/Gospels corpus")
files<-list.files("C:/Users/George/Google Drive/R Templates/Gospels corpus")


for(i in 1:length(files)) {

  cname <- (files[i])
  corpus <- Corpus(DirSource(cname))

  #Cleaning
  corpus <- tm_map(corpus, tolower)
  corpus <- tm_map(corpus, removeNumbers)
  corpus <- tm_map(corpus, removePunctuation)
  corpus <- tm_map(corpus, stripWhitespace)
  corpus <- tm_map(corpus, removeWords, c("a","the","an","that","and"))

  #convert to a plain text file
  corpus <- tm_map(corpus, PlainTextDocument)

  #Create a term document matrix
  tdm1 <- TermDocumentMatrix(corpus)

  m1 <- as.matrix(tdm1)
  word.freq <- sort(rowSums(m1), decreasing=T)
  word.freq<-word.freq[1:100]

  print(word.freq)

  wordcloud(words=names(word.freq), freq=word.freq,
            random.order=F, scale=c(5, .1), colors=brewer.pal(10, "PRGn"))

}

for(i in 1:length(files)) {

  print(files[i])

}

print(word.freq)
inspect(tdm1)