\documentstyle[11pt,psfig]{article}
\setlength{\parskip}{.05in}
\setlength{\textwidth}{6.20in}
\setlength{\textheight}{8.25in}
\setlength{\evensidemargin}{-.2in}
\setlength{\oddsidemargin}{-.2in}
\setlength{\topmargin}{-.25in}
\def\squarebox#1{\hbox to #1{\hfill\vbox to #1{\vfill}}}
\newcommand{\finish}{\centerline{\rule{14cm}{1.5mm}}}
\newcommand{\qed}{\hspace*{\fill}
\vbox{\hrule\hbox{\vrule\squarebox{.667em}\vrule}\hrule}\smallskip}
\newenvironment{proof}{\noindent {\bf Proof:} \hspace{.677em}}%
{\qed}
\newenvironment{proofr}[1]{\noindent {\bf Proof of
{#1}:}\hspace{.677em}}%
{\qed}
\vspace{-.5in}
\title{ Introduction
to Probability\\
}
\author{
{\bf Nader H.~Bshouty
}\\
{\small Department of
Computer Science}\\
{\small Technion 32000}\\
{\small
{\small {\tt e-mail:~
}
\newcommand{\singlespacing}%
{\renewcommand{\baselinestretch}{1.0}\small\normalsize}
\newcommand{\comment}[1]{}
%Macro to draw pretty boxes around things
\newlength{\boxwidth}
\setlength{\boxwidth}{\textwidth}
\addtolength{\boxwidth}{-27 pt}
\newcommand{\boxer}[1]{\begin{center}
\fbox{\fbox{
\begin{minipage}{\boxwidth}
\vspace*{.2in}
% Copyright (C) 1988, all rights reserved.
\maketitle
\label{style}
#1
\vspace*{.2in}
\end{minipage}}}
\end{center}}
\def\cjfigures{norm}
\def\cjfigure#1#2#3{ % psfig command , caption , label
{
\begin{figure}[htb]
\centerline{\psfig{#1}}
\caption{#2}
\label{#3}
\end{figure}
}
}
\def\dir{/home/profs/bshouty/SUBMITTED/23-MonotoneTheory/OTHER/}
\newcommand{\unset}[2]{#1_{#2 \leftarrow *}}
\newcommand{\proj}[3]{#1 | (#2 \!\leftarrow\! #3)}
\newcommand{\unary}[2]{#1_{#2}}
\newcommand{\fA}{\unary{f}{A}}
\newcommand{\fB}{\unary{f}{B}}
\newcommand{\fC}{\unary{f}{C}}
\newcommand{\fD}{\unary{f}{D}}
\newcommand{\fAx}{\unary{f}{A}(x)}
\newcommand{\fBx}{\unary{f}{B}(x)}
\newcommand{\error}{\mbox{ERROR}}
\newcommand{\twobytwo}[4]{\left( \! \begin{array}{cc} #1
& #2 \\ #3 & #4
\end{array} \! \right)}
\newcommand{\twobyone}[2]{\left(\!\begin{array}{c} #1 \\ #2
\end{array}\!\right)}
\newcommand{\onebytwo}[2]{( \! \begin{array}{cc} #1 & #2
\end{array} \! )}
\newcommand{\assign}[3]{#1_{#2 \leftarrow #3}}
\newcommand{\assignpair}[5]{#1_{#2 \leftarrow #3,#4
\leftarrow #5}}
\newcommand{\op}{\triangleright}
\newcommand{\M}{{\cal M}}
\newcommand{\D}{{\cal D}}
\newcommand{\F}{{\cal F}}
\newcommand{\T}{{\cal T}}
\newcommand{\bB}{\mbox{\bf B}}
\newcommand{\bE}{\mbox{\bf E}}
\newcommand{\bT}{\mbox{\bf T}}
\newcommand{\bY}{\mbox{\bf Y}}
\newcommand{\bN}{\mbox{\bf N}}
\newcommand{\bL}{\mbox{\bf L}}
\newcommand{\bM}{\mbox{\bf M}}
\newcommand{\bH}{\mbox{\bf H}}
\newcommand{\bO}{\mbox{\bf O}}
\newcommand{\OR}{{\vee}}
\newcommand{\bor}{\bigvee}
\newcommand{\AND}{{\wedge}}
\newcommand{\band}{\bigwedge}
\newcommand{\imply}{\Rightarrow}
\newcommand{\equ}{\Leftrightarrow}
\newcommand{\eqdef}{\stackrel{\Delta}{=}}
\newtheorem{lemmasection}{Lemma}[section]
\newtheorem{lemma}{Lemma}[subsection]
\newtheorem{Rule}{Rule}
\newtheorem{theoremsection}{Theorem}[section]
\newtheorem{theorem}{Theorem}[subsection]
\newtheorem{corolarysection}{Corolary}[section]
\newtheorem{corolary}{Corolary}[subsection]
\newtheorem{definitionsection}{Definition}[section]
\newtheorem{definition}{Definition}[subsection]
\newtheorem{exercise}{Exercise}[subsection]
\newtheorem{example}{Example}
\begin{document}
%\tableofcontents
\date{}
\bibliographystyle{alpha}
\maketitle
\vspace{-.2in}
\thispagestyle{empty}
\pagenumbering{arabic}
\setlength{\unitlength}{.6mm}
%\begin{abstract}
%Here we give few inequalities we usually use in COLT.
%\end{abstract}
\medskip
\section{Combinatorics}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Simple Rules In Combinatorics}
The {\bf rule of sum} says that the number of ways to choose
an element from one of two disjoints sets is the sum of
the cardinalities of the sets.
That is, if $A$ and $B$ are two finite sets with no members
in common, then $|A\cup B|=|A|+|B|$.
The {\bf rule of product} says that the number of ways to
choose
an ordered pair is the number of ways to choose the first
element from $A$ and the second element from $B$.
That is, if $A$ and $B$ are two finite sets, then
$|A\times B|=|A|\cdot |B|$.
\subsection{Simple Counting}
\noindent
{\bf Strings}
A {\bf string} over a set $S$ is a sequence of elements of
$S$.
For example, there are 8 binary strings of length 3:
000,001,010,011,100,101,110,111.
A $k$-string over a set $S$ can be viewed as an element
of the Cartesian product $S^k$ of $k$-tuples; thus, there
are
$|S|^k$ strings of length $k$.
\noindent
{\bf Permutations}
A {\bf permutation} of a finite set $S$ is an ordered
sequence
of all the elements of $S$, with each element appearing
exactly once. For example, if $S=\{a,b,c\}$, there are 6
permutations of $S$:
abc,acb,bac,bca,cab,cba.
There are $n!$ permutations of a set of $n$ elements.
A {\bf $k$-permutation} of $S$ is an ordered sequence of $k$
of $S$, with no element appearing more than once in the
sequence.
The number of $k$-permutations of an $n$-set is:
$$n(n-1)(n-2)\cdots (n-k+1)=\frac {n!}{(n-k)!}$$
\noindent
{\bf Combinations}
A {\bf $k$-combination} of an $n$-set $S$ is simply a $k$-subset
of $S$. There are six 2-combinations of the 4-set
$\{a,b,c,d\}$:
$ab,ac,ad,bc,bd,cd$.
The number of $k$-combinations of an $n$-set.
$$\frac{n!}{k!(n-k)!}$$
\subsection{Equalities}
\noindent
{\bf Binomial coefficients}
We use the notation ${n\choose k}$ (read "$n$ choose
$k$")
to denote the number of $k$-combinations of an $n$-set.
We have $${n\choose k}=\frac {n!}{k!(n-k)}.$$
This formula is symmetric in $k$ and $n-k$:
$${n\choose k}={n\choose n-k}.$$
These numbers are also known as {\bf binomial coefficients},
due to
their appearance in the {\bf binomial expansion}:
$$(x+y)^n=\sum_{k=0}^n {n\choose k}x^k y^{n-k}$$
A special case: $$2^n=\sum_{k=0}^n {n\choose k}.$$
We also have
$${n\choose k}=\frac{n}{k} {n-1\choose k-1}$$
$${n\choose k}=\frac{n}{n-k} {n-1\choose k}$$
$${n\choose k}={n-1\choose k}+{n-1\choose k-1}$$
\subsection{Inequalities}
\noindent
{\bf Binomial bounds}
For $1\le k\le n$, we have the lower bound:
$${n\choose k}\ge \left(\frac {n}{k}\right)^k.$$
The upper bound:
$${n\choose k}\le \left(\frac {en}{k}\right)^k.$$
For all $0\le k\le n$ (see \ref{nklnnkknmknmk}),
$${n\choose k}\le \frac{n^n}{k^k(n-k)^{n-k}},$$
where for convenience we assume that $0^0=1$.
For $k=\lambda n$, where
$0\le \lambda \le 1$, this bound can be rewritten as (see
R\ref{nclnnHl}):
$${n\choose \lambda n}\le 2^{nH(\lambda)},$$
where $H(\lambda)=-\lambda lg \lambda-(1-\lambda)lg(1-\lambda)$
is the
{\bf entropy function} and where,
for convenience, we assume that $0\lg 0=0$,
so that $H(0)=H(1)=0$.
For $0\le \lambda\le \frac{1}{2}$ we have (see
R\ref{slnnciltnHl})
$$\sum_{0}^{\lambda n}{n\choose i}\le 2^{nH(\lambda)}.$$
For any $n\ge 0,j\ge 0,k\ge 0$, and $j+k\le n$,
$${n\choose j+k}\le {n\choose j}{n-j\choose k}.$$
The Stirling's approximation is
$$\log n!=\left(n+\frac{1}{2}\right)\log n-n-\frac{1}{2}\log(2\pi)+o(1),$$
$${2n\choose n}=\frac{2^{2n}}{\sqrt{\pi n}}
(1+O(1/n)).$$
\section{Probability}
\subsection{Introduction}
We define probability in terms of a {\bf sample space}
$S$, which is a set whose
elements are called {\bf elementary events}.
For flipping two distinguishable coins, we can view the
sample space
$S={HH,HT,TH,TT}$.
An {\bf event} is a subset of the sample space $S$.
The event $S$ is called the
{\bf certain event}, and the event $\O$ is called
the {\bf null event}.
We say that two events $A$ and
$B$ are {\bf mutually} exclusive if $A\cap B=\O$.
\noindent
{\bf Axioms of probability}
A {\bf probability distribution} $\Pr \{\}$ on a sample
space $S$ is a mapping from
events of $S$ to real numbers such that the
following probability axioms are
satisfied:
\begin{enumerate}
\item $\Pr [A]\ge 0$ for any event $A$.
\item $\Pr [S]=1$.
\item $\Pr [A\cup B]=\Pr [A]+\Pr [B]$ for
any two mutually exclusive events $A$ and $B$.
\end{enumerate}
We call $\Pr [A]$ the {\bf probability} of the event $A$.
Results:
$$\Pr [\O ]=0,$$ if $A\subseteq B$, then $\Pr [A]\le \Pr [B]$.
For $\overline{A}$ (the {\bf complement} of $A$),
$$\Pr [\overline{A}]=1-\Pr [A].$$ For
any two events $A$ and $B$,
\begin{eqnarray*}
\Pr [A\cup B]&=&\Pr [A]+\Pr [B]-\Pr [A\cap B]\\
&\le& \Pr [A]+\Pr [B]\\
\end{eqnarray*}
\noindent
{\bf Discrete probability distributions}
A probability distribution is {\bf discrete}
if it is defined over a finite or
or countably infinite sample space.Then for any event
$A$,$$\Pr
[A]=\sum_{s\in A} \Pr [s].$$
If $S$ is finite and every elementary event $s\in S$ has
probability
$\Pr[s]=1/|S|$
then we have the {\bf uniform probability distribution} on
$S$.
A {\bf fair coin} is
one which the probability of obtaining a head is the same as
the probability of obtaining a tail, that is, 1/2.
\subsection{Conditional probability and Independence}
Conditional probability formalizes the notion of having
prior partial
knowledge of the
outcome of an experiment. The conditional probability of an
event $A$
given that another
event $B$ occurs is defined to be:
$$\Pr [A|B]=\frac{\Pr[A\cap B]}{\Pr [B]}$$
Whenever $\Pr [B]\ne 0$,
(we read ``$\Pr [A|B]$'' as "the probability of
$A$ given $B$.")
Two events are {\bf independent} if,
$\Pr [A\cap B]=\Pr[A] \Pr [B]$, which
is equivalent, if
$\Pr [B]\ne 0$, to the condition $\Pr [A|B]= \Pr [A]$.
A collection $A_1,A_2,...,A_n$ of events
is said to be {\bf pairwise independent}
if $\Pr [A_i\cap A_j]=\Pr [A_i]\Pr [A_j]$.
For all $1\le i<j\le n$.
We say that they are ({\bf mutually}) {\bf independent} if
every $k$-subset $A_{i_1},A_{i_2},...,A_{i_k}$
of collection, where $2\le k\le n$ and $1\le
i_1<i_2<\cdots <i_k\le n$, satisfies:
$$\Pr[A_{i_1} \cap A_{i_2} \cap \cdots \cap A_{i_k} ]=\Pr [A_{i_1}
]
\Pr [A_{i_2} ]\cdots \Pr [A_{i_k} ]$$
\noindent
{\bf Bayes's Theorem}
$$\Pr [A|B]=\frac{\Pr[A] \Pr[B|A]}{\Pr [B]}.$$
From Bayes's theorem: $$\Pr[A|B]=\frac {\Pr [A] \Pr [B|A]}
{\Pr
[A] \Pr [B|A]+\Pr [\overline{A}] \Pr [B|\overline{A}]}$$
\noindent
{\bf Boole's inequality:} For any finite or countably
infinite sequence of events
$A_1 ,A_2 ,...,$
$$\Pr [A_1 \cup A_2 \cup \cdots] \le \Pr [A_1]+\Pr [A_2]+ \cdots.$$
For collection of events $A_1 ,A_2 ,...,A_n$,
\begin{eqnarray*}
\Pr [A_1 \cap A_2 \cap \cdots\cap A_n]&=&\Pr [A_1]\cdot
\Pr
[A_2|A_1]\cdot \Pr [A_3|A_1\cap A_2 ]\cdots\\
&&\Pr [A_n|A_1 \cap A_2 \cap \cdots \cap A_{n-1}]\\
\end{eqnarray*}
\subsection {Discrete Random Variables and Expectation}
A ({\bf discrete}) {\bf random variable} $X$ is a function
from a finite or
countably infinite sample space $S$ to the real numbers.
For random variable $X$ and a real number $x$, we define the
event $X=x$ to be
$[s\in S:X(s)=x]$; thus, $\Pr [X=x]=\sum_{[s\in S:X(s)=x]}
\Pr [s]$.
The function: $f(x)=\Pr [X=x]$, is the {\bf probability
density function} of
the random variable $X$.
It is common for several random variables to be defined on
the same sample
space . If $X$ and $Y$ are random variables, the function:
$f(x,y)= \Pr [X=x$ and $Y=y]$
Is the {\bf joint probability density function} of $X$ and
$Y$. For a fixed value
$y$, $\Pr [Y=y]=\sum_x \Pr [X=x$ and $Y=y]$.
We define two random variables $X$ and $Y$ to be {\bf
independent} if for all $x$
and $y$, the events $X=x$ and $Y=y$ are independent or,
equivalently, if for
all $x$ and $y$, we have $\Pr [X=x$ and $Y=y]=\Pr[X=x]\Pr[Y=y]$.
\noindent
{\bf Expected value of a random variable}
The {\bf expected value} (or, synonymously, {\bf
expectation} or {\bf mean}) of a discrete
random variable $X$ is $$E [X]=\sum_x x\Pr [X=x].$$
Sometimes the expectation of $X$ is denoted by $\mu_x$ or , when
the
random is apparent from context, simply by $\mu$.
The expectations of the sum of two random variables is the
sum of their
expectations, that is, $$E [X+Y]=E [X]+[Y].$$
If $X$ is any random variable, any function $g(x)$ defines a
new random
variable $g(X)$. If the expectation of $g(X)$ is defined,
then
$$E [g(X)]=\sum_x g(x) \Pr [X=x].$$
Letting $g(x)=ax$, we have for any constant $a$, $$E [aX]=aE[X].$$
$$E [aX+Y]=aE [X]+E [Y].$$
When two random variables $X$ and $Y$ are independent and
each has a defined
expectation, $E [XY]=E [X] E [Y].$
In general, when $n$ random variable $X_1,X_2,...,X_n$ are
mutually
independent, $E [X_1 X_2 \cdots X_n]=E [X_1]E [X_2]\cdots E[X_n].$
When a random variable $X$ takes on values from the natural
numbers
$N=\{0,1,2,...\}$, there is a nice formula for its
expectation:
$$E[X]=\sum_{i=1}^\infty \Pr [X\ge i].$$
\noindent
{\bf Variance and standard deviation}
The {\bf variance} of a random variable $X$ with mean $E [X]$
is:
\begin{eqnarray*}
Var [X]&=&E [(X-E [X])^2]\\
&=&E [X^2]-E^2 [X]\\
\end{eqnarray*}
We have
$$E [X^2]=Var [X]+E^2 [X]$$
$$Var [aX]=a^2 Var [X]$$
When $X$ and $Y$ are independent random variables, $Var [X+Y]=Var
[X]+Var[Y].$
In general, if $n$ random variables $X_1,X_2,...,X_n$ are
pairwise independent,
then $$Var [\sum_{i=1}^n X_i]=\sum_{i=1}^n Var [X_i].$$
The {\bf standard deviation} of a random variable $X$ is the
positive square root
of the variance of $X$.
\noindent
{\bf Markov's inequality:} (see Rule \ref{Markov})
For a nonnegative random variable $X$
$$\Pr [X\ge t]\le \frac{E [X]}{t}$$
for all $t>0$ and
$$\Pr[X\ge \lambda E[X]]\le \frac{1}{\lambda}.$$
See more results in \cite{G} Appendix A.
\noindent
{\bf Chebychev's inequality:}
$$\Pr[|X-E[X]|\ge k]\le \frac{Var[X]}{k^2}.$$
\noindent
{\bf Bienayme-Chebyschev}
Let $X_1,\ldots,X_m$ be pairwise independent random
variables such that $\bE[X_i]=\mu$ and $Var[X_i]=\sigma^2$. Then
$$\Pr\left[\left|\frac{\sum_{i=1}^m X_i}{m}-\mu\right|
\ge \lambda \right]
\le \frac{\sigma^2}{\lambda^2 m}.$$
See the proof in \cite{G} Appendix A.
\subsection{Geometric and Binomial distributions}
A coin flip is an instance of a {\bf Bernoulli trail}, which
is defined as an
experiment with only two possible outcomes: {\bf success},
which occurs with
probability $p$, and {\bf failure}, which occurs with
probability $q=1-p$.
The trails are mutually independent ok. each has the same
probability
$p$ for success.
\noindent
{\bf The geometric distribution}
Suppose we have a sequence of Bernoulli trails, each with a
probability
$p$ of success and a probability $q=1-p$ of failure.
Let the random variable $X$ be the number of trails needed
to obtain a success.
$$\Pr [X=k]=q^{k-1} p$$
A probability distribution satisfying this is said to be a
{\bf geometric distribution}.
$$E [X]=1/p$$
$$Var [X]=q/p^2$$
\noindent
{\bf The binomial distribution}
Define the random variable $X$ to be the number of success
in $n$ trails.
$$\Pr [X=x]={n\choose k}p^k q^{n-k}$$
A probability distribution satisfying this is said to be a
{\bf binomial distribution} .
Define: $$b(k;n,p)={n\choose k}p^k (1-p)^{n-k}$$
We have: $E [X]=np$ and $Var [X]=npq$.
The binomial distribution $b(k;n,p)$ increases as $k$ runs
from 0 to $n$
until it reaches the mean $np$, and then it decreases.
Let $n\ge0$, let $0<p<1$, let $q=1-p$, and let $0\le
k\le n$.
Then, $$b(k;n,p)\le \left(\frac{np}{k}\right)^k \left(\frac{nq}{n-k}\right)^{n-k}.$$
For $0\le k\le n$, $$b(k;n,1/2)\le 2^{nH(k/n)-n}.$$
\noindent
{\bf The tails of the binomial distribution}
Let $X_1,X_2,\ldots,X_n$ be independent random variables
such that
$X_i\in \{0,1\}$ and $E[X_i]=p$.
Then this is equivalent to
a sequence of $n$ Bernoulli trails , where success occurs
with probability $p$. Let $X$ be the random variable
denoting the
total number of successes. Then for $0\le k\le n$, the
probability
of at least $k$ success is:
$$ \Pr \left[\sum_{i=1}^n X_i\ge k\right]=\sum_{i=k}^n b(i;n,p)
\le {n\choose k}p^k. $$
For $np<k<n$, the probability of more than $k$
successes is:
$$\Pr \left[\sum_{i=1}^nX_i>k\right]=\sum_{i=k+1}^n b(i;n,p)
<\frac {(n-k)p}{k-np}b(k;n,p),$$
and
$$
\Pr \left[\frac{\sum_{i=1}^nX_i}{n}-p\ge \lambda\right]
\le\left(\frac{pe}{\lambda}\right)^{\lambda n}.
$$
$$ \Pr \left[\sum_{i=1}^nX_i\le k\right]=\sum_{i=1}^k b(i;n,p)
\le{n\choose k}(1-p)^{n-k}.
$$
For $0<k<np$, the probability of fewer than $k$
success is:
$$\Pr \left[\sum_{i=1}^nX_i<k\right]=\sum_{i=0}^{k-1} b(i;n,p)
<\frac {kq}{np-k} b(k;n,p).$$
{\bf Chernoff}
Let $X_1,\ldots,X_m$ be independent random
variables such that $X_i\in \{0,1\}$ and $\bE[X_i]=p$ Then
$$\Pr\left[\frac{\sum_{i=1}^n X_i}{n}-p
\ge \lambda p \right]
\le e^{-\lambda^2np/3}.$$
$$\Pr\left[p-\frac{\sum_{i=1}^n X_i}{n}
\ge \lambda p\right] \le e^{-\lambda^2np/2}.$$
See another bound in \cite{G} Appendix A.
{\bf Hoeffding}
Let $X_1,\ldots,X_m$ be independent random
variables such that $X_i\in [a,b]$ and $\bE[X_i]=p$.
Then
$$\Pr\left[\left|\frac{\sum_{i=1}^n X_i}{n}-p\right|\ge
\lambda \right]
\le 2e^{-2\lambda^2n/(b-a)^2}.$$
{\bf Other Results.}
Consider independent random variables $X_1,\ldots,X_n$
(a sequence of $n$ Bernoulli trails)
where $E[X_i]=p_i$ (where in the $i$th trail, for
$i=1,2,...,n$, success occurs with probability $p_i$ and
failure occurs with
probability $q_i=1-p_i$).
Let $\mu =E\left[\sum_{i=1}^n X_i\right]$. Then for
$r>\mu$,
$$\Pr \left [\sum_{i=1}^n X_i-\mu \ge r\right]\le \left(\frac{\mu
e}{r}\right)^r$$
$$\Pr [X-\mu\ge r]\le e^{-r^2/2n}$$
\section{Examples}
\begin{example}
{\bf Random Halving}: A game where you randomly uniformly
choose
an integer $1\le i_1\le n$ and then $1\le i_2\le i_1$ until
you
get $1$. Show that the expected number of steps is
$1+\sum_{i=1}^{n-1} 1/i$.
\end{example}
{\bf Solution:} Let $X_n$ be the expected number of steps
then
$X_1=1$ and
$$E[X_n]=1+E_{1\le i\le n}[E[X_i]].$$
Now
$$
\frac{n+1}{n} E[X_{n+1}]-E[X_n]=
\frac{1}{n}+\frac{1}{n}E[X_{n+1}]$$ and therefore
$E[X_{n+1}]=E[X_{n}]+(1/n)$.
{\bf Acknowledgement:} To Vivian Bshouty for helping me with
this
survey.
\begin{thebibliography}{10}
\bibitem[AS]{AS}
N. Alon and J. H. Spencer.
\newblock The Probabilistic Method.
\bibitem[CLR]{CLR}
T. Cormen, C. E. Leiserson and R. L. Rivest.
Introduction to Algorithms.
\bibitem[G]{G}
O. Goldreich.
Modern Cryptography, Probabilistic Proofs and Pseudo-randomness.
\bibitem[MR]{MR}
R. Motwani and P. Raghavan.
Randomized Algorithms.
\end{thebibliography}
\newpage
\section{Proofs}
\begin{Rule}
{\bf Markov's inequality:}
$$\Pr [X\ge t]\le \frac{E [X]}{t}$$
for all $t>0$.
\label{Markov}
{\bf Proof.} We have
$$E[X]=\sum_x\Pr[X=x]\cdot x\ge \sum_{x\ge t}\Pr[X=x]x
\ge \Pr[X\ge t] t.\Box$$
\end{Rule}
\begin{Rule}\label{nklnnkknmknmk}
We have
$${n\choose k}\le \frac{n^n}{k^k(n-k)^{n-k}}.$$
\end{Rule}
{\bf Proof.} We have
$$n^n=(k+(n-k))^n\ge {n\choose k}k^k (n-k)^{n-k}.\Box$$
\begin{Rule}\label{nclnnHl}
For $0\le \lambda \le 1$ we have
$${n\choose \lambda n}\le 2^{nH(\lambda)},$$
where $H(\lambda)=-\lambda lg \lambda-(1-\lambda)lg(1-\lambda)$.
\end{Rule}
{\bf Proof.} Use Rule \ref{nklnnkknmknmk}.$\Box$
\begin{Rule}\label{slnnciltnHl}
For $0\le \lambda\le \frac{1}{2}$ we have
$$\sum_{0}^{\lambda n}{n\choose i}\le 2^{nH(\lambda)}.$$
\end{Rule}
{\bf Proof.} We have
\begin{eqnarray*}
1&=&(\lambda+(1-\lambda))^n\ge
\sum_{i=1}^{\lambda n}{n\choose i}\lambda^i(1-\lambda)^{n-i}
=\sum_{i=1}^{\lambda n}{n\choose i}(1-\lambda)^n\left(
\frac{\lambda}{1-\lambda}\right)^{\lambda i}\\
&\le& \sum_{i=1}^{\lambda n}{n\choose
i}(1-\lambda)^n\left(\frac{\lambda}{1-\lambda}\right)^{\lambda
n}
=2^{-n H(\lambda)}\sum_{0}^{\lambda n}{n\choose i}.\Box
\end{eqnarray*}
\end{document}