>help.start() # 全体のヘルプを出す場合
>help(rnorm) # function のヘルプを参照する場合、ただ help()に入れればよい
> wordlist <- c("able", "ability", "abuse") > wordlist [1] "able" "ability" "abuse"
> library(survival) 要求されたパッケージ splines をロード中です 次のパッケージを付け加えます: 'survival' The following object(s) are masked from package:ISwR : lung
> detach("package:survival")
> library()
> bnclemma <- read.table("c:/temp/lemma.num", header=T) header=T で1行目をヘッダとして読み込む
> lm <- edit(bnclemma)
> fix(bnclemma)
>dd <- data.frame() >fix(dd)
> ls() [1] "bmi" "d" "energy" "exp.lean" "exp.obese" "fpain" [7] "h" "height" "hh" "intake.post" "intake.pre" "l" [13] "mylist" "oops" "pain" "thue2" "thuesen" "weight" [19] "x" "y" "ylim"
>save.image()
> x <- seq(-4,4,0.1) -4 から 4 まで 0.1ずつの感覚で sequence を作って x に格納
> plot(x, dnorm(x),type="l") X軸に x を Y 軸に x にそって確率密度の正規曲線を生成 type="l" は line かな > plot(x, dnorm(x),type="h") 同じプロットを h にすると histogram のようにそれぞれの x 軸に縦棒が伸びる形式になる
> curve(dnorm(x), from=-4, to=4) これでも同じカーブを描くことができる
> 1-pnorm(160,mean=132,sd=13) [1] 0.01562612
> 1-pbinom(15,size=20,prob=.5) [1] 0.005908966
> x <- rnorm(50) > x [1] 1.11403983 -1.15165895 0.80461867 1.71557037 0.03169234 0.18838381 -0.80416122 [8] -0.10692887 -0.96407973 1.16309160 0.61545475 0.09261578 -0.76893852 -0.78379193 [15] 0.12446743 0.77148385 1.19391658 -0.78081663 -0.83306264 0.35587091 -0.76966258 [22] -0.15209363 -0.49532705 -0.52829549 0.64414085 1.59186102 -0.07530280 -0.35851963 [29] 1.55049255 0.21063906 0.22529564 0.34462737 0.19350551 -0.65114617 -1.49958077 [36] 1.15710520 -0.69795906 -1.12325445 -2.08717916 0.01182514 0.13297142 1.40239832 [43] 1.04982555 0.46580345 0.55212419 -0.53356749 -0.31117847 0.64688955 -1.44102550 [50] -0.06345214
> mean(x) [1] 0.02739456
> library(ISwR) #ISwR というライブラリを読み込む(データのため) > data(juul) #data juul を読み込む > juul age menarche sex igf1 tanner testvol 1 NA NA NA 90 NA NA 2 NA NA NA 88 NA NA 3 NA NA NA 164 NA NA 4 NA NA NA 166 NA NA 5 NA NA NA 131 NA NA
> attach(juul) #こうすると juul$…を省略できる > mean(igf1) [1] NA #NA は欠損値があるので平均値を出せない、とエラーメッセージを返している > mean(igf1,na.rm=T) #"na.rm=T" not available, remove の意味 欠損値を除く、という指定 [1] 340.168
> sd(x) [1] 0.876461
> var(x) [1] 0.768184
> median(x) [1] 0.06215406
> quantile(x) 0% 25% 50% 75% 100% -2.08717916 -0.68625583 0.06215406 0.63696932 1.71557037
> pvec <- seq(0,1,0.1) pvec という変数に 0 から 1 までを 0.1 区切りに格納 > pvec [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 > quantile(x,pvec) 0% 10% 20% 30% 40% 50% 60% -2.08717916 -0.97999720 -0.77189339 -0.52987709 -0.12499478 0.06215406 0.20035893 70% 80% 90% 100% 0.49169967 0.77811082 1.16617410 1.71557037
> summary(igf1)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's 25.0 202.2 313.5 340.2 462.8 915.0 321.0
> summary(juul) age menarche sex igf1 tanner Min. : 0.170 Min. : 1.000 Min. :1.000 Min. : 25.0 Min. : 1.000 1st Qu.: 9.053 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.:202.2 1st Qu.: 1.000 Median :12.560 Median : 1.000 Median :2.000 Median :313.5 Median : 2.000 Mean :15.095 Mean : 1.476 Mean :1.534 Mean :340.2 Mean : 2.640 3rd Qu.:16.855 3rd Qu.: 2.000 3rd Qu.:2.000 3rd Qu.:462.8 3rd Qu.: 5.000 Max. :83.000 Max. : 2.000 Max. :2.000 Max. :915.0 Max. : 5.000 NA's : 5.000 NA's :635.000 NA's :5.000 NA's :321.0 NA's :240.000 testvol Min. : 1.000 1st Qu.: 1.000 Median : 3.000 Mean : 7.896 3rd Qu.: 15.000 Max. : 30.000 NA's :859.000
> x <- rnorm(100) # normal density of pseudo-random numbers > x [1] -0.23852918 0.80007985 2.59488947 1.92574080 0.04232929 -1.06697286 -1.28712104 [8] -0.07618785 -1.59706302 0.82031697 -1.20817887 1.46262997 -1.29586205 -0.76564449 [15] 0.75213738 -0.06597683 0.23352029 -0.16270841 -1.64600217 0.75705638 0.46523458 [22] -0.12607458 0.09170498 -0.10259290 -0.45916474 0.25041968 -0.51401120 -0.23879437 [29] 1.45689028 0.64155579 -0.33764720 -1.25446597 0.70214707 0.44216508 -0.04646707 [36] -0.26736349 -0.97858879 1.45124404 -1.22135682 0.08365315 -0.26954490 -0.22825442 [43] -0.45558029 1.61702489 -1.51530741 1.12869612 1.03321808 1.67374952 -0.46476407 [50] -0.73521276 -0.12948352 -0.09378984 0.93129898 0.77798970 0.54310765 0.52949773 [57] 0.25044045 1.01388932 0.82988086 -1.59370935 -0.60922676 1.53202118 0.19678465 [64] -0.38521575 1.30834830 1.20246317 -0.03461022 -0.60884186 0.89151459 0.48934389 [71] 0.99960792 0.87056316 1.17321910 0.02667156 0.52161582 -0.09861786 -0.63170346 [78] -0.85243182 -0.87548459 0.20321376 -1.46930772 1.31395373 -1.09770929 -0.19700751 [85] -0.68914995 1.06938790 1.29973824 0.65570765 0.71533448 -2.32653615 0.06533285 [92] -0.25530870 0.46701640 1.27102185 1.73760250 1.31901418 1.08818204 -1.94065162 [99] -0.32908069 -0.49896537
> hist(x, freq=F) # write histogram of x, freq=F means "not based on frequency" > curve(dnorm(x),add=T) # write the normal curve graph, add means "overplot"
> h <- hist(x, plot=F) > ylim <- range(0, h$density, dnorm(0)) > hist(x, freq=F, ylim=ylim) > curve(dnorm(x), add=T)
> data(energy) > energy expend stature 1 9.21 obese 2 7.53 lean 3 7.48 lean 4 8.08 lean 5 8.09 lean 6 10.15 lean 7 8.40 lean 8 10.88 lean 9 6.13 lean 10 7.90 lean 11 11.51 obese 12 12.79 obese 13 7.05 lean 14 11.85 obese 15 9.97 obese 16 7.48 lean 17 8.79 obese 18 9.69 obese 19 9.68 obese 20 7.58 lean 21 9.19 obese 22 8.11 lean
> exp.lean <- energy$expend[energy$stature=="lean"]
> exp.obese <- energy$expend[energy$stature=="obese"] > exp.lean [1] 7.53 7.48 8.08 8.09 10.15 8.40 10.88 6.13 7.90 7.05 7.48 7.58 8.11 > exp.obese [1] 9.21 11.51 12.79 11.85 9.97 8.79 9.69 9.68 9.19
> l <-split(energy$expend, energy$stature) > > l $lean [1] 7.53 7.48 8.08 8.09 10.15 8.40 10.88 6.13 7.90 7.05 7.48 7.58 8.11
$obese [1] 9.21 11.51 12.79 11.85 9.97 8.79 9.69 9.68 9.19
(例)c:\temp にある lemma.num を bnclemma というデータフレームにする: > bnclemma <- read.table("c:/temp/lemma.num", header=T) header=T で1行目をヘッダとして読み込む
> lm <- edit(bnclemma)
> lm[10,3] [1] to
> lm[10,4] [1] prep Levels: a adv conj det infinitive-marker interjection modal n prep pron v ※このようにアウトプットの最後に、その変数に何レベルあるかが列挙される
> lm$word[1:4] [1] the be of and 5464 Levels: a abandon abbey ability able abnormal abolish abolition abortion about ... zone
> lm$pos[1] [1] det Levels: a adv conj det infinitive-marker interjection modal n prep pron v
> length(lm$word) [1] 6318
> lm$word[lm$rank < 100] [1] the be of and a in to have it to for [12] i that you he on with do at by not this [23] but from they his that she or which as we an [34] say will would can if their go what there all get [45] her make who as out up see know time take them [56] some could so him year into its then think my come [67] than more about now last your me no other give just [78] should these people also well any only new very when may [89] way look like use her such how because when as good
> lm$word[lm$rank < 100 & lm$pos == "n"] [1] time year people way ※ [ ] で条件設定し、& で2つの条件を組み合わせている ※ logical operator としては、&, |(=or), !(=not) が使える
> lm$word[lm$rank < 100 & (lm$pos == "n"|lm$pos == "a")] [1] time year last other people new way good ※ logical operator | が使われていることに注意
> lm[lm$rank <10,] rank freq word pos 1 1 6187267 the det 2 2 4239632 be v 3 3 3093444 of prep 4 4 2687863 and conj 5 5 2186369 a det 6 6 1924315 in prep 7 7 1620850 to infinitive-marker 8 8 1375636 have v 9 9 1090186 it pron
※10位まで取り出すならば、<11 とする ※[]の中で条件を付けるが、最後の ,] とカンマをつけるところが重要。
> lm[(lm$rank <101 & lm$pos == "v"),] rank freq word pos 2 2 4239632 be v 8 8 1375636 have v 18 18 559596 do v 34 34 333518 say v 40 40 249540 go v 44 44 220940 get v 46 46 217268 make v 51 51 191661 see v 52 52 185534 know v 54 54 179220 take v 64 64 153881 think v 66 66 151871 come v 76 76 131417 give v 90 90 111058 look v 92 92 108820 use v 100 100 98899 find v
> lm100 <- lm[(lm$rank <101),] # lm100 というデータフレームに100以内を格納 > lm100
rank freq word pos 1 1 6187267 the det 2 2 4239632 be v 3 3 3093444 of prep 4 4 2687863 and conj 5 5 2186369 a det 6 6 1924315 in prep 7 7 1620850 to infinitive-marker 8 8 1375636 have v 9 9 1090186 it pron 10 10 1039323 to prep 11 11 887877 for prep 12 12 884599 i pron 13 13 760399 that conj 14 14 695498 you pron 15 15 681255 he pron 16 16 680739 on prep 17 17 675027 with prep 18 18 559596 do v 19 19 534162 at prep 20 20 517171 by prep ※出力はこんな感じ
プロットの関数は plot() > plot(lm100$rank, log(lm100$freq), type="n") ※ランキングを横軸、頻度の自然対数を縦軸にとる。 ※type="n" はプロットの記号を消す、という意味。後で単語を載せるためここでは出力しない。 > text(lm100$rank, log(lm100$freq), as.character(lm100$word), cex=0.5) ※この text() という関数が交差する点に as.character() で指定した文字列を載せる機能 ※cex=で文字サイズを指定する
> vlm <- lm100[lm100$pos == "v",] #動詞を100位以内から抽出 vlm に格納 > vlm rank freq word pos 2 2 4239632 be v 8 8 1375636 have v 18 18 559596 do v 34 34 333518 say v 40 40 249540 go v 44 44 220940 get v 46 46 217268 make v 51 51 191661 see v 52 52 185534 know v 54 54 179220 take v 64 64 153881 think v 66 66 151871 come v 76 76 131417 give v 90 90 111058 look v 92 92 108820 use v 100 100 98899 find v > plot(vlm$rank, log(vlm$freq), type="n") #横軸ランク、縦軸に頻度の対数をとる、type="n" でプロットの記号を消しておく > text(vlm$rank, log(vlm$freq), as.character(vlm$word), cex=0.9) ※vlm$rank と vlm$freq の交差する点に vlm$word から取ってきた文字列を配置するというコマンド ※cex=0.9 で少し大きめに