options(stringsAsFactors=F,scipen=99)rm(list=ls());gc()getwd() 獲得工作路徑信息setwd() 設(shè)置工作路徑
快捷鍵control+L
filenames=dir("/Users/yuyin/Downloads/數(shù)據(jù)/Excel數(shù)據(jù)")##or推薦第二種setwd("/Users/yuyin/Downloads/數(shù)據(jù)/Excel數(shù)據(jù)")filenames=dir()
require(data.table)library(data.table)da<- fread("/Users/yuyin/Downloads/train_all_weekday.csv",header = FALSE)#讀取gbk編碼文件u<- read.csv("JData_User.csv",fileEncoding='gbk',header = TRUE)write.table (out, file ="/Users/yuyin/Downloads/2.csv",sep =",",row.names = F,col.names=F,quote =F)
library("xlsx")t=read.xlsx('吉林2014.xlsx',sheetIndex=1)write.xlsx(t, file="./s.xlsx")
library(sqldf)re=sqldf("select V1,V2,V6 from da where V2>=20161004 and V2<=20161017 order by V1,V2")
library(recharts)echartr(tmp,as.character(tmp$V2),V6,type = 'line')
#四個(gè)分位數(shù)quantile(ck) #自定義分位數(shù) quantile(ck, probs = c(0.85,0.95))median中位數(shù)mean均值
nrow(data.frame)
##方法一paste(Y,'/',m,'/',d,sep='')##方法二library(stringr)pout=str_c(path,name,collapse='')
name=str_replace_all(name,"/","_")
tt=unique(tt)
合并行rbind(t1,t2)合并列cbind(t1,t2)
x=x[order(x$bad_comment_rate,decreasing=F),]
runif(n, min=0, max=1) 均勻分布rnorm(n, mean=0, sd=1) 正態(tài)分布sample(seq(0,100,by=1),1,replace=TRUE) 抽樣生成隨機(jī)數(shù)
b1=(data[,1]-min(data[,1]))/(max(data[,1])-min(data[,1])) b1=(d-min(d))/(max(d)-min(d))
dateChar<-("2014-04-06")dtV<-as.POSIXct(dateChar,format="%Y-%m-%d")##或者dtV<-as.Date(dateChar,format="%Y-%m-%d")format(dtV,"%Y/%m/%d %H:%M:%S")#轉(zhuǎn)換為2014/4/6Y=format(dtV,"%Y")m=as.character(as.numeric(format(dtV,"%m")))d=as.character(as.numeric(format(dtV,"%d")))dt<-paste(Y,'/',m,'/',d,sep='')
d <- c('2013-12-05 18:43:00','2013-08-23 22:29:00')difftime(d[2],d[1])difftime(strptime(d, "%Y-%m-%d %H:%M:%S")[2],strptime(d, "%Y-%m-%d %H:%M:%S")[1],units='secs')
速度比sqldf快很多 適合數(shù)據(jù)量大處理
library(dplyr)#將數(shù)據(jù)整理成的tbl_df數(shù)據(jù)(處理速度快) iris <- tbl_df(iris)##變量篩選select 對應(yīng)select 刪除-select(iris,Sepal.Length,Sepal.Width)select(iris,-Species)##對數(shù)據(jù)運(yùn)算并添加為新列mutate() 對應(yīng) count(a) as t1mutate(iris,t1=Sepal.Length*2)##計(jì)算n(): 計(jì)算個(gè)數(shù)n_distinct() #: 計(jì)算 x 中唯一值的個(gè)數(shù)first(x), last(x) 和 nth(x, n)#: 返回對應(yīng)秩的值, 類似于自帶函數(shù) x[1], x[length(x)], 和 x[n]##過濾filter 對應(yīng) wherefilter(iris,Sepal.Length>5,Sepal.Width<4)filter(iris,Sepal.Length>5 & Sepal.Width<4 & (Species == "setosa" | Species == "versicolor"))##數(shù)據(jù)排序arrange 對應(yīng) order byarrange(iris,Sepal.Length)arrange(iris,desc(Sepal.Length))##匯總group_by() 分組-匯總group_by(iris, Species)group_by(iris,Species,Petal.Width) %>% summarise(c1=n(),c2=n_distinct(Species))##計(jì)算summarise()summarise(iris,c1=n(),c2=mean(Sepal.Length))##多步操作連接符%>%filter(iris,Sepal.Length>5,Sepal.Width<4) %>% summarise(c1=n(),c2=mean(Sepal.Length)) ##抽樣sample_n sample_fracsample_n(iris,20) ##左連接 ab交集 差集left_join(a, b, by="x1")right_join(a, b, by="x1")inner_join(a, b, by="x1")##保留匹配的數(shù)據(jù)outer_join(a, b, by="x1")##保留所有數(shù)據(jù)semi_join(a, b, by="x1") # 數(shù)據(jù)集a中能與數(shù)據(jù)集b匹配的記錄anti_join(a, b, by="x1") # 數(shù)據(jù)集a中雨數(shù)據(jù)集b不匹配的記錄intersect(x, y): x 和 y 的交集(按行)union(x, y): x 和 y 的并集(按行)setdiff(x, y): x 和 y 的補(bǔ)集 (在x中不在y中)##列合并bind_cols(y, z)##行合并bind_rows(y, z)
library(sos)findFn('onehot')##具體看sos的help
聯(lián)系客服