Issue
I have data.frame df1:
df1 <- data.frame(
En_ID = c("KNT00000000003", "KNT00000000005", "KNT00000000419",
"KNT00000000457", "KNT00000000460", "KNT00000000938",
"KNT00000000971", "KNT00000001036", "KNT00000001084",
"KNT00000001167" ),
`Nor1` = c(-0.834165161710272, 1.02199443531549,
-0.558658947885705, -0.390114219973209, -1.23551839713296,
3.11429434221998, 0.283932163407262, -1.16908518620064,
-0.597054772455507, -0.593624543273255),
`Nor2` = c(-1.18531035488942, 0.423719727339646, -1.23261719368372,
0.0855281133529292, -1.52366830232278, 3.36692586561211,
1.00323690950956, -0.000211248816114964, -4.74738483548391,
-0.318176231083024),
`Nor3` = c(-0.262659255267546, 1.3962481061442, -0.548673555705647,
-0.0149651083306594, -1.45458689193089, 2.54126941463459,
1.17711308509307, -1.19425284921181, 1.17788731755683,
-0.367897054652365 ),
`Nor4` = c(-0.840752912305256, 0.536548846040064, -0.277409459604357,
-0.241073614962264, -0.875313153342293, 1.61789645804321,
0.412287101096504, -1.11846661523232, -2.6274528854429,
-0.760452698231182),
`Tor1` = c(-0.968784779247286, -0.502809694119192, -0.231526399163731,
-0.530038395734114, -0.706006018337411, 3.58264357077653,
-0.127521010699219, 0.270523387217103, 1.68335644352003,
-0.314902131571829),
`Tor2` = c(-0.481754175843152, -0.440784040523259, -0.532975340622715,
-0.182089795101371, -0.564807490336052, 1.74119896504534,
-0.96169805631325, -0.721782763145306, -0.433459827401695,
-0.727495835245995 ),
`Tor3` = c(-0.889343429110847, 1.07937149728343, -0.215144871523998,
-0.92234350748557, -0.832108253417702, 2.02456082994848,
-0.0434322861759954, -0.523126561938426, -0.556984056084809,
-0.740331742513503),
`Tor4` = c(-0.858141567384178, 1.87728717064375, -0.381047638414538,
-0.613568289061259, -1.92838339196505, 2.23393705735665,
0.635389543483408, -0.466053620529111, -1.50483745357134,
-1.33400859143521),
`Tor5` = c(-0.486388736112514, 0.789390852922639, -0.869434195504952,
-0.70405854858187, -1.16488184095428, 2.91497178849082,
-2.10331904053714, -0.571130459068143, -0.219526004620518,
-0.301435496557957)
)
How can I get the column-wise Wilcox.test and fisher extract text, comparing Nor1, Nor2, Nor3, and Nor4 columns with Tor1, Tor2, Tor3, Tor4, and Tor5 columns of each row. Then, I would like to add that p-value output of both tests at the end column, resulting in df2:
df2 <- data.frame( En_ID = c("KNT00000000003", "KNT00000000005", "KNT00000000419", "KNT00000000457", "KNT00000000460", "KNT00000000938", "KNT00000000971", "KNT00000001036", "KNT00000001084", "KNT00000001167" ), `Nor1` = c(-0.834165161710272, 1.02199443531549, -0.558658947885705, -0.390114219973209, -1.23551839713296, 3.11429434221998, 0.283932163407262, -1.16908518620064, -0.597054772455507, -0.593624543273255), `Nor2` = c(-1.18531035488942, 0.423719727339646, -1.23261719368372, 0.0855281133529292, -1.52366830232278, 3.36692586561211, 1.00323690950956, -0.000211248816114964, -4.74738483548391, -0.318176231083024), `Nor3` = c(-0.262659255267546, 1.3962481061442, -0.548673555705647, -0.0149651083306594, -1.45458689193089, 2.54126941463459, 1.17711308509307, -1.19425284921181, 1.17788731755683, -0.367897054652365 ), `Nor4` = c(-0.840752912305256, 0.536548846040064, -0.277409459604357, -0.241073614962264, -0.875313153342293, 1.61789645804321, 0.412287101096504, -1.11846661523232, -2.6274528854429, -0.760452698231182), `Tor1` = c(-0.968784779247286, -0.502809694119192, -0.231526399163731, -0.530038395734114, -0.706006018337411, 3.58264357077653, -0.127521010699219, 0.270523387217103, 1.68335644352003, -0.314902131571829), `Tor2` = c(-0.481754175843152, -0.440784040523259, -0.532975340622715, -0.182089795101371, -0.564807490336052, 1.74119896504534, -0.96169805631325, -0.721782763145306, -0.433459827401695, -0.727495835245995 ), `Tor3` = c(-0.889343429110847, 1.07937149728343, -0.215144871523998, -0.92234350748557, -0.832108253417702, 2.02456082994848, -0.0434322861759954, -0.523126561938426, -0.556984056084809, -0.740331742513503), `Tor4` = c(-0.858141567384178, 1.87728717064375, -0.381047638414538, -0.613568289061259, -1.92838339196505, 2.23393705735665, 0.635389543483408, -0.466053620529111, -1.50483745357134, -1.33400859143521), `Tor5` = c(-0.486388736112514, 0.789390852922639, -0.869434195504952, -0.70405854858187, -1.16488184095428, 2.91497178849082, -2.10331904053714, -0.571130459068143, -0.219526004620518, -0.301435496557957),`Tor4` = c(-0.858141567384178, 1.87728717064375, -0.381047638414538, -0.613568289061259, -1.92838339196505, 2.23393705735665, 0.635389543483408, -0.466053620529111, -1.50483745357134, -1.33400859143521), `p-value-wilcox` = c(0.8, 0.3, 0.7, 0.8, 0.9, 0.8, 0.7, -0.5, -0.7, -0.9), `p-value-fisher` = c(0.1, 0.7, 0.3, 0.1, 0.5, 0.3, 0.9, -0.2, -0.9, -0.4) )
Here I putting dummy p-value to provide an outline of the desired output. The real data have >200 columns, but both groups (Nor and Tor) have unequal sample number.
I found some examples from the stack as mentioned below and tried to replicate them but miserably failed.
Please help me.
Solution
We may use dapply
from collapse
which should be faster
library(collapse)
cbind(df1, dapply(slt(df1, -c(1, 10)), MARGIN = 1,
FUN = function(x) c(wilcox = wilcox.test(x[1:4], x[5:8])$p.value,
fisher = fisher.test(x[1:4], x[5:8])$p.value)))
En_ID Nor1 Nor2 Nor3 Nor4 Tor1 Tor2 Tor3 Tor4 Tor5 wilcox fisher
1 KNT00000000003 -0.8341652 -1.1853103549 -0.26265926 -0.8407529 -0.9687848 -0.4817542 -0.88934343 -0.8581416 -0.4863887 0.6857143 1
2 KNT00000000005 1.0219944 0.4237197273 1.39624811 0.5365488 -0.5028097 -0.4407840 1.07937150 1.8772872 0.7893909 0.8857143 1
3 KNT00000000419 -0.5586589 -1.2326171937 -0.54867356 -0.2774095 -0.2315264 -0.5329753 -0.21514487 -0.3810476 -0.8694342 0.1142857 1
4 KNT00000000457 -0.3901142 0.0855281134 -0.01496511 -0.2410736 -0.5300384 -0.1820898 -0.92234351 -0.6135683 -0.7040585 0.1142857 1
5 KNT00000000460 -1.2355184 -1.5236683023 -1.45458689 -0.8753132 -0.7060060 -0.5648075 -0.83210825 -1.9283834 -1.1648818 0.3428571 1
6 KNT00000000938 3.1142943 3.3669258656 2.54126941 1.6178965 3.5826436 1.7411990 2.02456083 2.2339371 2.9149718 0.8857143 1
7 KNT00000000971 0.2839322 1.0032369095 1.17711309 0.4122871 -0.1275210 -0.9616981 -0.04343229 0.6353895 -2.1033190 0.1142857 1
8 KNT00000001036 -1.1690852 -0.0002112488 -1.19425285 -1.1184666 0.2705234 -0.7217828 -0.52312656 -0.4660536 -0.5711305 0.2000000 1
9 KNT00000001084 -0.5970548 -4.7473848355 1.17788732 -2.6274529 1.6833564 -0.4334598 -0.55698406 -1.5048375 -0.2195260 0.3428571 1
10 KNT00000001167 -0.5936245 -0.3181762311 -0.36789705 -0.7604527 -0.3149021 -0.7274958 -0.74033174 -1.3340086 -0.3014355 0.6857143 1
Answered By - akrun Answer Checked By - Marie Seifert (PHPFixing Admin)
0 Comments:
Post a Comment
Note: Only a member of this blog may post a comment.