First I thought how many people used my name and else.
The word cloud showed me a best visualization.
The histogram plot about my name - Mark.
# national poplular baby names
# url : http://www.ssa.gov/oact/babynames/limits.html
# national data .zip
# get a file list
setwd("C:/Users/Mark/Downloads/names")
files<-list.files()
files<-files[grepl(".txt",files)]
files<-files[files!="NationalReadMe.pdf"]
# import data to data frame
fox <- NULL
for (i in 1:length(files))
{
data <- read.csv(files[i], header=F)
data["year"] <- substr(files[i],4,7)
fox <- rbind(fox,data)
}
# assign column name to data frame
colnames(fox) <- c('name','gender','cnt','year')
# word count
library(sqldf)
koala <- sqldf("select name,sum(cnt) as cnt from fox group by name")
# drawing a word cloud
library(wordcloud)
wordcloud( as.character(koala$name),as.integer(koala$cnt),
scale=c(5,0.5), max.words=50, random.order=FALSE,
rot.per=0.35, use.r.layout=FALSE,
colors=brewer.pal(8, "Dark2"))
# drawing a histogram of some name as years
koala <- sqldf("select year,sum(cnt) as cnt from fox where name = 'Mark' group by year")
plot (koala, type = 'h', ylab = 'Baby Name (Mark)s Count', col = 'Purple')
0 개의 댓글:
댓글 쓰기