genecards.org
* https://www.genecards.org/cgi-bin/carddisp.pl?gene=APOE&keywords=APOE
GO:NNNNNNN
, e.g. GO:0019319
.gmt
- Gene Set File Format.gmt
- Gene Matrix Transposelibrary('GSEABase') hallmarks_gmt <- getGmt(con='h.all.v7.5.1.symbols.gmt') hallmarks_gmt ## GeneSetCollection ## names: HALLMARK_TNFA_SIGNALING_VIA_NFKB, HALLMARK_HYPOXIA, ..., HALLMARK_PANCREAS_BETA_CELLS (50 total) ## unique identifiers: JUNB, CXCL2, ..., SRP14 (4383 total) ## types in collection: ## geneIdType: NullIdentifier (1 total) ## collectionType: NullCollection (1 total)
GO:0019319
)contingency_table <- matrix(c(13, 987, 23, 8977), 2, 2) fisher_results <- fisher.test(contingency_table, alternative='greater') fisher_results ## ## Fisher's Exact Test for Count Data ## ## data: contingency_table ## p-value = 2.382e-05 ## alternative hypothesis: true odds ratio is greater than 1 ## 95 percent confidence interval: ## 2.685749 Inf ## sample estimates: ## odds ratio ## 5.139308
tibble( x = seq(-4,4,by=0.1), `Probability Density`=dnorm(x,0,1) ) %>% ggplot(aes(x=x,y=`Probability Density`)) + geom_line() + labs(title="Probability Density Function for a Normal Distribution")
\[ P(X = x|\mu,\sigma) = \frac{1}{\sigma\sqrt{2\pi}}e^{\frac{-(x-\mu)^2}{2\sigma}} \]
\[ P(X = x|\mu,\sigma) = \frac{1}{\sigma\sqrt{2\pi}}e^{\frac{-(x-\mu)^2}{2\sigma}} \]
tibble( x = seq(-4,4,by=0.1), PDF=dnorm(x,0,1), CDF=pnorm(x,0,1) ) %>% ggplot() + geom_line(aes(x=x,y=PDF,color="PDF")) + geom_line(aes(x=x,y=CDF,color="CDF"),linetype="dashed")
norm
for normal distributiond
, p
, q
, and r
, e.g.dnorm(x, mean=0, sd=1)
- PDF of the normal distributionpnorm(q, mean=0, sd=1)
- CDF of the normal distributionqnorm(p, mean=0, sd=1)
- inverse CDF; accepts quantiles between 0 and 1 and returns the value of the distribution for those quantilesrnorm(n, mean=0, sd=1)
- generate n
samples from a normal distributionDistribution | Probability Density Function |
---|---|
Normal | dnorm(x,mean,sd) |
t Distribution | dt(x,df) |
Poisson | dpois(n,lambda) |
Binomial | dbinom(x, size, prob) |
Negative Binomial | dnbinom(x, size, prob, mu) |
Exponential | dexp(x, rate) |
\(\chi^2\) | dchisq(x, df) |
library(statip) # NB: not a base R distribution # dbern(x, prob, log = FALSE) # qbern(p, prob, lower.tail = TRUE, log.p = FALSE) # pbern(q, prob, lower.tail = TRUE, log.p = FALSE) # rbern(n, prob) rbern(10, 0.5)
rbern(10, 0.5)
## [1] 0 1 1 1 0 1 0 0 0 0
\[ Pr(X = x|n,p) = {n \choose x} p^x (1-p) ^{(n-x)} \]
# dbinom(x, size, prob, log = FALSE) # pbinom(q, size, prob, lower.tail = TRUE, log.p = FALSE) # qbinom(p, size, prob, lower.tail = TRUE, log.p = FALSE) # rbinom(n, size, prob) rbinom(10, 10, 0.5)
rbinom(10, 10, 0.5)
## [1] 5 5 4 5 5 7 5 6 5 3
mean(rbinom(1000, 10, 0.5))
## [1] 5
\[ Pr(X = x|p) = (1-p)^x p \]
# dgeom(x, prob, log = FALSE) # pgeom(q, prob, lower.tail = TRUE, log.p = FALSE) # qgeom(p, prob, lower.tail = TRUE, log.p = FALSE) # rgeom(n, prob) rgeom(10, 0.5)
rgeom(10, 0.5)
## [1] 2 0 1 2 0 2 0 3 0 2
\[ Pr(X=k|\lambda) = \frac {\lambda^k e^{-\lambda}} {k!} \]
# dpois(x, lambda, log = FALSE) # ppois(q, lambda, lower.tail = TRUE, log.p = FALSE) # qpois(p, lambda, lower.tail = TRUE, log.p = FALSE) # rpois(n, lambda) rpois(10, 5)
rpois(10, 5)
## [1] 3 5 6 5 3 7 3 2 5 8
mean(rpois(1000, 5))
## [1] 4.935
\[ Pr(X = x|r,p) = \frac {x+r-1} {r-1} p^r {(1-p)}^x \]
# dnbinom(x, size, prob, mu, log = FALSE) # pnbinom(q, size, prob, mu, lower.tail = TRUE, log.p = FALSE) # qnbinom(p, size, prob, mu, lower.tail = TRUE, log.p = FALSE) # rnbinom(n, size, prob, mu) rnbinom(10, 10, 0.5)
# dnbinom(x, size, prob, mu, log = FALSE) # pnbinom(q, size, prob, mu, lower.tail = TRUE, log.p = FALSE) # qnbinom(p, size, prob, mu, lower.tail = TRUE, log.p = FALSE) # rnbinom(n, size, prob, mu) rnbinom(10, 10, 0.5)
## [1] 18 9 7 6 9 9 3 13 12 4
\[ P(X=x|a,b) = \begin{cases} \frac{1}{b-a} & \text{for}\; a \le x \le b, \\ 0 & \text{otherwise} \end{cases} \]
dunif(x, min = 0, max = 1, log = FALSE) punif(q, min = 0, max = 1, lower.tail = TRUE, log.p = FALSE) qunif(p, min = 0, max = 1, lower.tail = TRUE, log.p = FALSE) runif(n, min = 0, max = 1) runif(10, min=0, max=10)
runif(10)
## [1] 0.09269301 0.04769325 0.49162694 0.06384177 0.74975727 0.06286865 ## [7] 0.59144210 0.85150450 0.80188802 0.49818460
mean(runif(1000))
## [1] 0.495675
\[ P(X = x|\mu,\sigma) = \frac{1}{\sigma\sqrt{2\pi}}e^{\frac{-(x-\mu)^2}{2\sigma}} \]
# dnorm(x, mean = 0, sd = 1, log = FALSE) # pnorm(q, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE) # qnorm(p, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE) # rnorm(n, mean = 0, sd = 1) rnorm(10, mean=10, sd=10)
rnorm(10, mean=10, sd=10)
## [1] 16.5110731 4.9263930 24.7133656 26.2699361 3.0473640 14.1095811 ## [7] 9.6754010 29.0091827 -13.9826178 0.4802505
mean(rnorm(1000, mean=10, sd=10))
## [1] 10.22659
\[ P(X=x|k) = \begin{cases} \frac{ x^{\frac{k}{2}-1}e^{-\frac{x}{2}} }{ 2^{\frac{k}{2}}\left ( \frac{k}{2} \right ) }, & x>0;\\ 0, & \text{otherwise} \end{cases} \]$
# dchisq(x, df, ncp = 0, log = FALSE) # pchisq(q, df, ncp = 0, lower.tail = TRUE, log.p = FALSE) # qchisq(p, df, ncp = 0, lower.tail = TRUE, log.p = FALSE) # rchisq(n, df, ncp = 0) rchisq(10, 10)
rchisq(10, 10)
## [1] 14.078925 10.028708 5.111300 16.071434 5.637748 10.010057 10.421410 ## [8] 6.069114 7.993634 14.850696
mean(rchisq(1000, 10))
## [1] 9.620238
p*
theoretical distribution functionsecdf(d$Value)
## Empirical CDF ## Call: ecdf(d$Value) ## x[1:3000] = -6.6546, -6.494, -6.2441, ..., 26.064, 30.29
plot(ecdf(d$Value))