library(ggplot2)
library(tm)
library(wordcloud)
cname <- ("C:/Users/George/Google Drive/R Templates/Gospels corpus")
corpus <- Corpus(DirSource(cname))
corpus <- tm_map(corpus, tolower)
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, removeWords, c("a","the","an","that","and"))
corpus <- tm_map(corpus, PlainTextDocument)
tdm1 <- TermDocumentMatrix(corpus)
m1 <- as.matrix(tdm1)
word.freq <- sort(rowSums(m1), decreasing=T)
word.freq<-word.freq[1:100]
wordcloud(words=names(word.freq), freq=word.freq,
random.order=F, scale=c(5, .1), colors=brewer.pal(10, "PRGn"))
setwd("C:/Users/George/Google Drive/R Templates/Gospels corpus")
files<-list.files("C:/Users/George/Google Drive/R Templates/Gospels corpus")
for(i in 1:length(files)) {
cname <- (files[i])
corpus <- Corpus(DirSource(cname))
corpus <- tm_map(corpus, tolower)
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, removeWords, c("a","the","an","that","and"))
corpus <- tm_map(corpus, PlainTextDocument)
tdm1 <- TermDocumentMatrix(corpus)
m1 <- as.matrix(tdm1)
word.freq <- sort(rowSums(m1), decreasing=T)
word.freq<-word.freq[1:100]
print(word.freq)
wordcloud(words=names(word.freq), freq=word.freq,
random.order=F, scale=c(5, .1), colors=brewer.pal(10, "PRGn"))
}
for(i in 1:length(files)) {
print(files[i])
}
print(word.freq)
inspect(tdm1)