Bevezetés az R-be 2.0 - Feladatgyűjtemény

Táblázatok

1. feladat. Táblázatok.
Határozzuk meg a MASS csomag survey adattáblájában a kategorikus változók gyakorisági eloszlását.

data(survey, package = "MASS") # a survey beolvasása
str(survey) # kategorikus oszlopok kiírása
'data.frame':    237 obs. of  12 variables:
 $ Sex   : Factor w/ 2 levels "Female","Male": 1 2 2 2 2 1 2 1 2 2 ...
 $ Wr.Hnd: num  18.5 19.5 18 18.8 20 18 17.7 17 20 18.5 ...
 $ NW.Hnd: num  18 20.5 13.3 18.9 20 17.7 17.7 17.3 19.5 18.5 ...
 $ W.Hnd : Factor w/ 2 levels "Left","Right": 2 1 2 2 2 2 2 2 2 2 ...
 $ Fold  : Factor w/ 3 levels "L on R","Neither",..: 3 3 1 3 2 1 1 3 3 3 ...
 $ Pulse : int  92 104 87 NA 35 64 83 74 72 90 ...
 $ Clap  : Factor w/ 3 levels "Left","Neither",..: 1 1 2 2 3 3 3 3 3 3 ...
 $ Exer  : Factor w/ 3 levels "Gyakran","Soha",..: 2 1 1 1 2 2 3 3 2 2 ...
 $ Smoke : Factor w/ 4 levels "Heavy","Never",..: 2 4 3 2 2 2 2 2 2 2 ...
 $ Height: num  173 178 NA 160 165 ...
 $ M.I   : Factor w/ 2 levels "Imperial","Metric": 2 1 NA 2 2 1 1 2 2 2 ...
 $ Age   : num  18.2 17.6 16.9 20.3 23.7 ...

A klasszikus table()

table(survey$Sex, useNA="ifany")

Female   Male   <NA> 
   118    118      1
table(survey$W.Hnd, useNA="ifany")

 Left Right  <NA> 
   18   218     1
table(survey$Fold, useNA="ifany")

 L on R Neither  R on L 
     99      18     120
table(survey$Clap, useNA="ifany")

   Left Neither   Right    <NA> 
     39      50     147       1
table(survey$Exer, useNA="ifany")

Gyakran    Soha    Néha 
     24      98     115
table(survey$Smoke, useNA="ifany")

Heavy Never Occas Regul  <NA> 
   11   189    19    17     1
table(survey$M.I, useNA="ifany")

Imperial   Metric     <NA> 
      68      141       28

A freq() függvény a descr csomagból

library(descr)
descr::freq(survey$Sex, plot=F)
survey$Sex 
       Frequency  Percent Valid Percent
Female       118  49.7890            50
Male         118  49.7890            50
NA's           1   0.4219              
Total        237 100.0000           100
descr::freq(survey$W.Hnd, plot=F)
survey$W.Hnd 
      Frequency  Percent Valid Percent
Left         18   7.5949         7.627
Right       218  91.9831        92.373
NA's          1   0.4219              
Total       237 100.0000       100.000
descr::freq(survey$Fold, plot=F)
survey$Fold 
        Frequency Percent
L on R         99  41.772
Neither        18   7.595
R on L        120  50.633
Total         237 100.000
descr::freq(survey$Clap, plot=F)
survey$Clap 
        Frequency  Percent Valid Percent
Left           39  16.4557         16.53
Neither        50  21.0970         21.19
Right         147  62.0253         62.29
NA's            1   0.4219              
Total         237 100.0000        100.00
descr::freq(survey$Exer, plot=F)
survey$Exer 
        Frequency Percent
Gyakran        24   10.13
Soha           98   41.35
Néha          115   48.52
Total         237  100.00
descr::freq(survey$Smoke, plot=F)
survey$Smoke 
      Frequency  Percent Valid Percent
Heavy        11   4.6414         4.661
Never       189  79.7468        80.085
Occas        19   8.0169         8.051
Regul        17   7.1730         7.203
NA's          1   0.4219              
Total       237 100.0000       100.000
descr::freq(survey$M.I, plot=F)
survey$M.I 
         Frequency Percent Valid Percent
Imperial        68   28.69         32.54
Metric         141   59.49         67.46
NA's            28   11.81              
Total          237  100.00        100.00

A freq() függvény a prettyR csomagból

library(prettyR)
prettyR::freq(survey$Sex, decr.order=F)

Frequencies for survey$Sex 
     Female   Male     NA
        118    118      1
%      49.8   49.8    0.4 
%!NA     50     50
prettyR::freq(survey$W.Hnd, decr.order=F)

Frequencies for survey$W.Hnd 
      Left Right    NA
        18   218     1
%      7.6    92   0.4 
%!NA   7.6  92.4
prettyR::freq(survey$Fold, decr.order=F)

Frequencies for survey$Fold 
      L on R Neither  R on L      NA
          99      18     120       0
%       41.8     7.6    50.6       0 
%!NA    41.8     7.6    50.6
prettyR::freq(survey$Clap, decr.order=F)

Frequencies for survey$Clap 
        Left Neither   Right      NA
          39      50     147       1
%       16.5    21.1      62     0.4 
%!NA    16.5    21.2    62.3
prettyR::freq(survey$Exer, decr.order=F)

Frequencies for survey$Exer 
     Gyakran    Soha    Néha      NA
          24      98     115       0
%       10.1    41.4    48.5       0 
%!NA    10.1    41.4    48.5
prettyR::freq(survey$Smoke, decr.order=F)

Frequencies for survey$Smoke 
     Heavy Never Occas Regul    NA
        11   189    19    17     1
%      4.6  79.7     8   7.2   0.4 
%!NA   4.7  80.1   8.1   7.2
prettyR::freq(survey$M.I, decr.order=F)

Frequencies for survey$M.I 
     Imperial   Metric       NA
           68      141       28
%        28.7     59.5     11.8 
%!NA     32.5     67.5

A Freq() függvény a DescTools csomagból

library(DescTools)
DescTools::Freq(survey$Sex, useNA = "ifany")
   level freq  perc cumfreq cumperc
1 Female  118 0.498     118   0.498
2   Male  118 0.498     236   0.996
3   <NA>    1 0.004     237   1.000
DescTools::Freq(survey$W.Hnd, useNA = "ifany")
  level freq  perc cumfreq cumperc
1  Left   18 0.076      18   0.076
2 Right  218 0.920     236   0.996
3  <NA>    1 0.004     237   1.000
DescTools::Freq(survey$Fold, useNA = "ifany")
    level freq  perc cumfreq cumperc
1  L on R   99 0.418      99   0.418
2 Neither   18 0.076     117   0.494
3  R on L  120 0.506     237   1.000
DescTools::Freq(survey$Clap, useNA = "ifany")
    level freq  perc cumfreq cumperc
1    Left   39 0.165      39   0.165
2 Neither   50 0.211      89   0.376
3   Right  147 0.620     236   0.996
4    <NA>    1 0.004     237   1.000
DescTools::Freq(survey$Exer, useNA = "ifany")
    level freq  perc cumfreq cumperc
1 Gyakran   24 0.101      24   0.101
2    Soha   98 0.414     122   0.515
3    Néha  115 0.485     237   1.000
DescTools::Freq(survey$Smoke, useNA = "ifany")
  level freq  perc cumfreq cumperc
1 Heavy   11 0.046      11   0.046
2 Never  189 0.797     200   0.844
3 Occas   19 0.080     219   0.924
4 Regul   17 0.072     236   0.996
5  <NA>    1 0.004     237   1.000
DescTools::Freq(survey$M.I, useNA = "ifany")
     level freq  perc cumfreq cumperc
1 Imperial   68 0.287      68   0.287
2   Metric  141 0.595     209   0.882
3     <NA>   28 0.118     237   1.000