%\documentclass[landscape]{article}
\documentclass{article}
\usepackage{amssymb}
\usepackage{amsmath}
%\usepackage{slide-article}
\usepackage{slide-article-tom}
%\usepackage{slide-article-tom-land}
\ifx\pdfoutput\undefined
\usepackage[dvips]{graphicx}
\else
\usepackage[pdftex]{graphicx}
%% \usepackage{type1cm}
%% \usepackage{color}
\pdfcompresslevel9
\fi
% \usepackage{epsfig}
%\usepackage{graphicx}
%\usepackage{graphics}
\usepackage{hyperref}
%\definecolor{Emerald}{cmyk}{1,0,0.50,0}
\hypersetup{colorlinks,
linkcolor=blue,
%pdfpagemode=FullScreen
pdfpagemode=None
}
%\usepackage{hyper}
%\usepackage{hthtml}
%\def\hyperref#1#2#3#4{\hturl{#1}}
%\def\@linkcolor{red}
%\def\@linkcolor{blue}
%\def\@anchorcolor{black}
%\def\@citecolor{green}
%\def\@filecolor{cyan}
%\def\@urlcolor{magenta}
%\def\@menucolor{red}
%\def\@pagecolor{red}
\def\pagedone{\newpage}
\def\tthdump#1{#1}
\tthdump{\def\sectionhead#1{\begin{center}{\LARGE\hypertarget{#1}
{#1}\hyperlink{Our general topics:}{\hfil$\leftarrow$}}\end{center}}}
%%tth:\def\sectionhead#1{{\LARGE#1\hypertarget{#1}{#1}}
%%tth: \special{html: Top}}
\tthdump{\def\exercises#1{\begin{center}{\LARGE Exercises: \hypertarget{#1.ex}
{#1}\hyperlink{Our general topics:}{\hfil$\leftarrow$}}\end{center}}}
%%tth:\def\exercises#1{{\LARGE Exercises: #1\hypertarget{#1.ex}{#1.ex}}
%%tth: \special{html: Top}}
\tthdump{\def\quotesection#1{\begin{center}{\LARGE\hypertarget{#1}
{#1}\hyperlink{The quotes}{\hfil$\twoheadleftarrow$}}\end{center}}}
%%tth:\def\quotesection#1{{\LARGE#1\hypertarget{#1}{#1}}
%%tth: \special{html: <-}}
%%tth:\def\makehyperlink#1{\special{html: }{\large#1}\special{html: }}
%%tth:\def\makehyperlinkex#1{\ \ \special{html: }{\large (ex)}\special{html: }}
%%tth:\def\binom#1#2{\left(\begin{array}{c}#1\\#2\end{array}\right)}
\tthdump{\def\httimes{\times}}
%%tth:\def\httimes{x\ }
\tthdump{\def\mybigtriangledown{\bigtriangledown}}
%%tth:\def\mybigtriangledown{\mathrm{Del}}
\tthdump{\def\htvec#1{\vec{#1}}}
%%tth:\def\htvec#1{{\bf #1}}
\def\myspan{\mathrm{span}}
\def\matrix{\mathrm{matrix}}
\tthdump{\def\myreal{\mathbb{R}}}
%%tth:\def\myreal{\mathbf{R}}
%%\def\myreal{\Re}
\tthdump{\def\mycomplex{\mathbb{C}}}
%%tth:\def\mycomplex{\mathbf{C}}
\tthdump{\def\myfield{\mathbb{F}}}
%%tth:\def\myfield{\mathbf{F}}
\tthdump{\def\myquaternions{\mathbb{H}}}
%%tth:\def\myquaternions{\mathbf{H}}
\tthdump{\def\myrationals{\mathbb{Q}}}
%%tth:\def\myrationals{\mathbf{Q}}
\tthdump{\def\myintegers{\mathbb{Z}}}
%%tth:\def\myintegers{\mathbf{Z}}
%\def\dim\mathrm{dim}
\def\im{\mathrm{im}}
\def\deriv{\mathrm{D}}
\def\trace{\mathrm{trace}}
\def\detm{\mathrm{det}_m}
\def\detv{\mathrm{det}_v}
\def\perm{\mathrm{perm}}
\def\sign{\mathrm{sign}}
% kets and bras etc.
\def\ket#1{|{#1}\rangle}
\def\bra#1{\langle{#1}|}
\def\braket#1#2{\langle{#1}|{#2}\rangle}
%\def\sectionhead#1{\begin{center}{\LARGE #1}\end{center}}
%\def\sectionhead#1{\section{#1}}
% defines a 2 element column vector.
\def\col#1#2{\left(\begin{array}{c}#1\\#2\end{array}\right)}
\def\tcol#1#2{(#1, #2)^T}
\begin{document}
\raggedright
%%tth:\special{html: }
\pagestyle{myfooters}
%\pagestyle{plain}
\thispagestyle{empty}
%%tth:\special{html: A brief survey of linear algebra}
%Slide 1
\title{{\LARGE\bf A brief survey of linear algebra}\newline \newline \newline}
\author{Tom Carter
\newline
\newline
\newline
\tthdump{\href
{http://cogs.csustan.edu/\~tom/linear-algebra}{http://cogs.csustan.edu/\~\ tom/linear-algebra}}
%%tth:\href{http://cogs.csustan.edu/~tom/linear-algebra}{http://cogs.csustan.edu/\~tom/linear-algebra}
\vfill
Santa Fe Institute\newline
Complex Systems Summer School
\newline
}
\date{June, 2001}
\maketitle
%Slide 2
\sectionhead{Our general topics:}
%%tth:\begin{itemize}
%%tth:\item
\tthdump{\hyperlink{Why linear algebra}
{\ $\circledcirc$ Why linear algebra\newline}}
%%tth:\makehyperlink{Why linear algebra}
%%tth:\item
%%% \tthdump{\hyperlink{Vector spaces}
%%% {$\circledcirc$ Vector spaces\newline}}
%%% %%tth:\makehyperlink{Vector spaces}
\tthdump{\hyperlink{Vector spaces}
{$\circledcirc$ Vector spaces}}
\tthdump{\hyperlink{Vector spaces.ex}
{ (ex)\newline}}
%%tth:\makehyperlink{Vector spaces}
%%tth:\makehyperlinkex{Vector spaces.ex}
%%tth:\item
\tthdump{\hyperlink{Examples of vector spaces}
{$\circledcirc$ Examples of vector spaces}}
\tthdump{\hyperlink{Examples of vector spaces.ex}
{ (ex)\newline}}
%%tth:\makehyperlink{Examples of vector spaces}
%%tth:\makehyperlinkex{Examples of vector spaces.ex}
%%tth:\item
\tthdump{\hyperlink{Subspaces}
{$\circledcirc$ Subspaces}}
\tthdump{\hyperlink{Subspaces.ex}
{ (ex)\newline}}
%%tth:\makehyperlink{Subspaces}
%%tth:\makehyperlinkex{Subspaces.ex}
%%tth:\item
\tthdump{\hyperlink{Linear dependence and independence}
{$\circledcirc$ Linear dependence and independence}}
\tthdump{\hyperlink{Linear dependence and independence.ex}
{ (ex)\newline}}
%%tth:\makehyperlink{Linear dependence and independence}
%%tth:\makehyperlinkex{Linear dependence and independence.ex}
%%tth:\item
\tthdump{\hyperlink{Span of a set of vectors}
{$\circledcirc$ Span of a set of vectors}}
\tthdump{\hyperlink{Span of a set of vectors.ex}
{ (ex)\newline}}
%%tth:\makehyperlink{Span of a set of vectors}
%%tth:\makehyperlinkex{Span of a set of vectors.ex}
%%tth:\item
\tthdump{\hyperlink{Basis for a vector space}
{$\circledcirc$ Basis for a vector space}}
\tthdump{\hyperlink{Basis for a vector space.ex}
{ (ex)\newline}}
%%tth:\makehyperlink{Basis for a vector space}
%%tth:\makehyperlinkex{Basis for a vector space.ex}
%%tth:\item
\tthdump{\hyperlink{Linear transformations}
{$\circledcirc$ Linear transformations}}
\tthdump{\hyperlink{Linear transformations.ex}
{ (ex)\newline}}
%%tth:\makehyperlink{Linear transformations}
%%tth:\makehyperlinkex{Linear transformations.ex}
%%tth:\item
\tthdump{\hyperlink{Morphisms -- mono, epi, and iso}
{$\circledcirc$ Morphisms -- mono, epi, and iso}}
\tthdump{\hyperlink{Morphisms -- mono, epi, and iso.ex}
{ (ex)\newline}}
%%tth:\makehyperlink{Morphisms -- mono, epi, and iso}
%%tth:\makehyperlinkex{Morphisms -- mono, epi, and iso.ex}
%%tth:\item
\tthdump{\hyperlink{Linear operators}
{$\circledcirc$ Linear operators}}
\tthdump{\hyperlink{Linear operators.ex}
{ (ex)\newline}}
%%tth:\makehyperlink{Linear operators}
%%tth:\makehyperlinkex{Linear operators.ex}
%%tth:\item
\tthdump{\hyperlink{Normed linear spaces}
{$\circledcirc$ Normed linear spaces}}
\tthdump{\hyperlink{Normed linear spaces.ex}
{ (ex)\newline}}
%%tth:\makehyperlink{Normed linear spaces}
%%tth:\makehyperlinkex{Normed linear spaces.ex}
%%tth:\item
\tthdump{\hyperlink{Eigenvectors and eigenvalues}
{$\circledcirc$ Eigenvectors and eigenvalues}}
\tthdump{\hyperlink{Eigenvectors and eigenvalues.ex}
{ (ex)\newline}}
%%tth:\makehyperlink{Eigenvectors and eigenvalues}
%%tth:\makehyperlinkex{Eigenvectors and eigenvalues.ex}
%%tth:\item
\tthdump{\hyperlink{Change of basis}
{$\circledcirc$ Change of basis}}
\tthdump{\hyperlink{Change of basis.ex}
{ (ex)\newline}}
%%tth:\makehyperlink{Change of basis}
%%tth:\makehyperlinkex{Change of basis.ex}
%%tth:\item
\tthdump{\hyperlink{Trace and determinant}
{$\circledcirc$ Trace and determinant}}
\tthdump{\hyperlink{Trace and determinant.ex}
{ (ex)\newline}}
%%tth:\makehyperlink{Trace and determinant}
%%tth:\makehyperlinkex{Trace and determinant.ex}
%% %%tth:\item
%% \tthdump{\hyperlink{Differential manifolds}
%% {$\circledcirc$ Differential manifolds}}
%% \tthdump{\hyperlink{Differential manifolds.ex}
%% { (ex)\newline}}
%% %%tth:\makehyperlink{Differential manifolds}
%% %%tth:\makehyperlinkex{Differential manifolds.ex}
%%tth:\item
\tthdump{\hyperlink{References}
{$\circledcirc$ References\newline}}
%%tth:\makehyperlink{References}
%%tth:\end{itemize}
(ex): exercises.
\pagedone
% \quotesection{The quotes}
% %%tth:\begin{itemize}
%
% %%tth:\item
% \tthdump{\hyperlink{Science, wisdom, and counting}
% {\ $\circledcirc$ Science, wisdom, and counting\newline}}
% %%tth:\makehyperlink{Science, wisdom, and counting}
% %%tth:\item
% \tthdump{\hyperlink{Being different -- or random}
% {$\circledcirc$ Being different -- or random\newline}}
% %%tth:\makehyperlink{Being different -- or random}
% %%tth:\item
% \tthdump{\hyperlink{Surprise, information, and miracles}
% {$\circledcirc$ Surprise, information, and miracles\newline}}
% %%tth:\makehyperlink{Surprise, information, and miracles}
% %%tth:\item
% \tthdump{\hyperlink{Information (and hope)}
% {$\circledcirc$ Information (and hope)\newline}}
% %%tth:\makehyperlink{Information (and hope)}
% %%tth:\item
% \tthdump{\hyperlink{H (or S) for Entropy}
% {$\circledcirc$ H (or S) for Entropy\newline}}
% %%tth:\makehyperlink{H (or S) for Entropy}
% %%tth:\item
% \tthdump{\hyperlink{Thermodynamics}
% {$\circledcirc$ Thermodynamics\newline}}
% %%tth:\makehyperlink{Thermodynamics}
% %%tth:\item
% \tthdump{\hyperlink{Language, and putting things together}
% {$\circledcirc$ Language, and putting things together\newline}}
% %%tth:\makehyperlink{Language, and putting things together}
% %%tth:\item
% \tthdump{\hyperlink{Tools}
% {$\circledcirc$ Tools}}
% %%tth:\makehyperlink{Tools}
% %%tth:\end{itemize}
% %\thepage
% \pagedone
%
% %Slide 3
%
% \quotesection{Science, wisdom, and counting}
% %%tth:\begin{quote}
% ``Science is organized knowledge. Wisdom is organized life.''
%
% - Immanuel Kant
%
% ``My own suspicion is that the universe is not only stranger than we suppose, but stranger than we can s% uppose.''
%
% - John Haldane
%
% ``Not everything that can be counted counts, and not everything that counts can be counted.''
%
% - Albert Einstein (1879-1955)
%
%
% ``The laws of probability, so true in general, so fallacious in particular .''
%
% - Edward Gibbon
%
% %%tth:\end{quote}
%
% \pagedone
\sectionhead{Why linear algebra}
\begin{itemize}
\item Linear models of phenomena are pervasive throughout science. The techniques
of linear algebra provide tools which are applicable in a wide variety of contexts.
Beyond that, linear algebra courses are often the transition from lower division
mathematics courses such as calculus, probability/statistics, and elementary
differential equations, which typically focus on specific problem solving techniques,
to the more theoretical axiomatic and proof oriented upper division mathematics
courses.
I am going to stay with a generally abstract, axiomatic presentation of the
basics of linear algebra. (But I'll also try to provide some practical advice along
the way \ldots :-)
\end{itemize}
\pagedone
\sectionhead{Vector spaces}
\begin{itemize}
\item The first thing we need is a field $\myfield$ of coefficients for our vector
space. The most frequently used fields are the real numbers $\myreal$ and
the complex numbers $\mycomplex$.
A {\bf field} $\myfield = (\myfield, +, *, -, \ ^{-1}, 0, 1)$ is a mathematical
object consisting of a set of elements $(\myfield)$, together with two binary
operations $(+, *)$, two unary operations $(-, \ ^{-1})$, and two distinguished
elements $0$ and $1$ of $\myfield$, which satisfy the fundamental properties:
\begin{enumerate}
\item $\myfield$ is closed under the four operations:
\begin{eqnarray*}
+ & : & \myfield \httimes \myfield \to \myfield \\
* & : & \myfield \httimes \myfield \to \myfield \\
- & : & \myfield \to \myfield \\
^{-1} & : & \myfield' \to \myfield'\ \ \ \
\myfield' = \{a \in \myfield | a \neq 0\}
\end{eqnarray*}
Of course, we usually write $a + b, a * b, -a$, and $a^{-1}$ instead of
$+(a,b), *(a, b), -(a)$, and $\ ^{-1}(a)$.
\pagedone
\item `$+$' and `$*$' are commutative and associative, and satisfy the
distributive property. That is, for $a, b, c \in \myfield$:
\begin{eqnarray*}
a + b & = & b + a \\
a * b & = & b * a \\
(a + b) + c & = & a + (b + c)\\
(a * b) * c & = & a * (b * c) \\
a * (b + c) & = & a * b + a * c
\end{eqnarray*}
\item $0$ is the identity element and `$-$' is the inverse for addition.
$1$ is the identity element and `$a^{-1}$' is the inverse for multiplication.
That is, for $a \in \myfield$:
\begin{eqnarray*}
a + 0 & = & a\\
a + (-a) & = & 0\\
a * 1 & = & a\\
a * (a^{-1}) & = & 1\ \ \ \ \ (a \neq 0)
\end{eqnarray*}
\pagedone
\item Although we won't need it for most of linear algebra, I'll mention that
$\myreal$ and $\mycomplex$ are both complete (Cauchy sequences
have limits), and $\myreal$ is fully ordered ($a < b$ or $b < a$ or
$a = b$ for all $a, b \in \myreal$).
\item As needed, we will identify $\myreal$ as a subfield of $\mycomplex$,
and we will typically write elements of $\mycomplex$ as $a + bi$, where $a$
and $b$ are real and $i^2 = -1$.
\item Although $\myreal$ and $\mycomplex$ are the most frequently used
coefficient fields, there are many other fields such as $\myrationals$, the
rationals, and the finite fields $\myintegers/p\myintegers$ for
$p$ a prime.
\end{enumerate}
\end{itemize}
\pagedone
\begin{itemize}
\item If we leave out the requirement that $a * b = b * a$, we get what are called
{\em skew} fields. An important example of a skew field is $\myquaternions$,
the quaternions (also called the hamiltonians) which contains additional square
roots of $-1$, $i^2 = j^2 = k^2 = -1$, and $ij = k$, $jk = i$, $ki = j$, but $ij = -ji$,
$jk = -kj$, and $ki = -ik$. We typically write quaternions either in the form
$a + bi + cj + dk$ with $a, b, c, d \in \myreal$, or \newline
$\alpha + \beta j$ with $\alpha, \beta \in \mycomplex$.
\end{itemize}
\pagedone
\begin{itemize}
\item We are then ready for the definition. A {\bf vector space}
$V = (V, \myfield, +, -, *, \htvec{0})$
over a field $\myfield$ is a set $V$ of elements (called vectors) together
with a distinguished element $\htvec{0}$,
two binary operations, and a unary operation:
\begin{eqnarray*}
+ & : & V \httimes V \to V\\
- & : & V \to V\\
* & : & \myfield \httimes V \to V
\end{eqnarray*}
For $u, v, w \in V$, and $a, b \in \myfield$, these operations satisfy
the properties:
\begin{eqnarray*}
v + w & = & w + v\\
(u + v) + w & = & u + (v + w)\\
v + \htvec{0} & = & v\\
v + (-v) & = & \htvec{0}\\
1 * v & = & v\\
a * (u + v) & = & (a * u) + (a * v)\\
(a + b) * v & = & (a * v) + (b * v)\\
(a * b) * v & = & a * (b * v)
\end{eqnarray*}
The elements of the field $\myfield$ are called the {\bf scalars}, or
{\em coefficients} of the vector space.
\pagedone
\item From the basic properties listed above, we can prove a variety of additional
properties, such as:
\begin{eqnarray*}
0 * v & = & \htvec{0}\\
a * \htvec{0} & = & \htvec{0}\\
(-1) * v & = & -v
\end{eqnarray*}
We can also prove that the additive identity $\htvec{0}$ is unique, as is the
additive inverse $-v$.
\item We will usually simplify the notation, and write $av$ instead of $a * v$.
Furthermore, although it is important to distinguish the scalar $0$ from the
vector $\htvec{0}$, we will typically write the vector in the simple form $0$.
%\pagedone
\item If $\myfield = \myreal$, we call $V$ a real vector space, and
typically write the scalars as $a, b, c$.
If $\myfield = \mycomplex$, we call $V$ a complex vector space, and
often write the scalars as $\alpha, \beta, \gamma$.
\end{itemize}
\pagedone
\exercises{Vector spaces}
\begin{enumerate}
\item Using the basic properties listed above, prove the additional
properties:
\begin{enumerate}
\item $0 * v = \htvec{0}$
\item $a * \htvec{0} = \htvec{0}$
\item $(-1) * v = -v$
\item $-(-v) = v$
\item For $a \in \myfield$ and $v \in \mathbf{V}$,
$av = \htvec{0}$ iff $a = 0$ or $v = \htvec{0}$.
\end{enumerate}
\item Prove that the additive identity $\htvec{0}$ is unique.
\item Prove that the additive inverse $-v$ is unique.
\end{enumerate}
\pagedone
\sectionhead{Examples of vector spaces}
\begin{itemize}
\item The first main example of a real vector space is $\myreal^n$, the Cartesian
product of $n$ copies of the real line. An element of $\myreal^n$ looks like
$(a_1, a_2, \ldots, a_n)$. When we add two vectors, we get
$$(a_1, a_2, \ldots, a_n) + (b_1, b_2, \ldots, b_n)$$
$$\ \ \ = (a_1+b_1, a_2+b_2, \ldots, a_n + b_n).$$
When we multiply by a scalar $a \in \myreal$, we get
$$a * (a_1, a_2, \ldots, a_n) = (aa_1, aa_2, \ldots, aa_n).$$
\item Similary, we have the complex vector space $\mycomplex^n$, with vector
addition and scalar multiplication defined the same way.
\item Note that we can also think of $\mycomplex^n$ as a real vector space, if we
restrict the scalars to $\myreal$.
\item Let $\myfield^\infty = \{(a_0, a_1, \ldots)\}$, with \\
$(a_0, a_1, \ldots) + (b_0, b_1, \ldots)$ \\$\ \ \ \ \ \ \ = (a_0 + b_0, a_1 + b_1, \ldots)$,\\
and $a * (a_0, a_1, \ldots) = (a*a_0, a*a_1, \ldots)$. \\
This is a vector space over $\myfield$.
\item For a scalar field $\myfield$, we can define $\myfield[x]$ to be
the set of all polynomials with formal variable $x$, and coefficients in $\myfield$.
This is a vector space over $\myfield$, the coefficient field. Each polynomial
is a vector. Vector addition is just polynomial adddition, and scalar multiplication
just multiplies each coefficient in the polynomial by the scalar:
$$ a * (a_0 + a_1x^1 + \ldots a_nx^n)$$
$$\ \ \ \ = aa_0 + aa_1x^1 + \ldots aa_nx^n.$$
\pagedone
\item For a scalar field $\myfield$, let $\myfield^n[x]$ be the set of all
polynomials of degree $\le n$ with coefficients in $\myfield$. We can also
let $\myfield^\infty[x]$ be the vector space of (formal) power series over
$\myfield$. These are also vector spaces over $\myfield$.
\item Let $C^0(\myreal)$ be the set of all continuous functions with domain
and range the real numbers. That is:\\
$C^0(\myreal) = \{f:\myreal \to \myreal\ |\ f\ \mathrm{is\ continuous}\}$.\\
We can define an addition operation on this set. To specify the sum of two functions, we
must specify the sum at each point in the domain. If $f, g \in C^0(\myreal)$,
we define $f + g$ for each $x$ by
$$(f + g)(x) = f(x) + g(x).$$
We define scalar multiplication pointwise also: $(a*f)(x) = a*(f(x))$.
$C^0(\myreal)$ thus becomes a real vector space, where each continuous
function is a vector in the space.
\pagedone
\item Let $C^n(\myreal)$ be the set of all continuous real functions whose
first $n$ derivatives are also continuous, and define addition and scalar multiplication
pointwise. Similarly, we can define $C^\infty(\myreal)$ as functions with
all derivatives continuous. Again we get real vector spaces.
\item Let $C^0[0,1]$ be the set of all continuous functions with domain the
closed interval $[0,1]$, and range $\myreal$. We can also define $C^n[0,1]$
to be those functions with first $n$ derivatives continuous, and $C^\infty[0,1]$
with all derivatives continuous. We can also generalize to subsets of $\myreal$
other than the interval $[0,1]$. With pointwise addition and scalar
multiplication, these are each real vector spaces.
\item We get can get similar complex vector spaces if we use $\mycomplex$ instead of
$\myreal$ in examples like those above.
\end{itemize}
\pagedone
\exercises{Examples of vector spaces}
\begin{enumerate}
\item Show that each of the examples listed in this section is a vector space.
\item Consider the set of points in $\myreal^2$ of the forms $(x, 0)$,
$x \in \myreal$, and $(0, y)$, $y \in \myreal$ (i.e., the union of
the X-axis and the Y-axis). Show that with the
usual vector addition in $\myreal^2$, this is not a vector space over
$\myreal$.
\item Consider the set $\myreal^2$ with usual vector addition:
$$(x_1, y_1) + (x_2, y_2) = (x_1 + x_2, y_1 + y_2),$$ but with
``scalar multiplication'' given by $$a * (x, y) = (ax, y)$$ for
$a \in \myreal$.
Show that this is not a vector space over $\myreal$.
\end{enumerate}
\pagedone
\sectionhead{Subspaces}
\begin{itemize}
\item If $V$ is a vector space over $\myfield$, a subset $U \subset V$ is called
a vector {\bf subspace} of $V$ if $U$ is a vector space over $\myfield$
using the same vector addition and scalar multiplication as in $V$. Often, in
context, we will just call $U$ a subspace of $V$.
\item Most often, given a subset $U \subset V$, we will just let $U$ inherit the vector
addition and scalar multiplication operations from $V$. Hence, most of the vector space
properties will come for free. What may not come for free, however, is whether
the subset $U$ is closed under the operations.
\pagedone
Thus, when we inherit the operations from $V$, we will have that
$$ + : U \httimes U \to V,$$
and
$$ * : \myfield \httimes U \to V,$$
whereas, what we need is
$$ + : U \httimes U \to U,$$
and
$$ * : \myfield \httimes U \to U.$$
In order for $U$ to be a vector subspace, we must be sure that doing the operations
will leave us in the subset $U$.
\pagedone
\item Examples of subspaces:
\begin{enumerate}
\item $\{\htvec{0}\}$ is a subspace of $V$ for any vector space $V$.
\item If $m \le n$, then $\myfield^m$ (identified with
$\{(a_1, a_2, \ldots, a_m, 0, \ldots, 0)\}$) is a subspace of $\myfield^n$.
\item If $m \le n$, then $\myfield^m[x]$ is a subspace of $\myfield^n[x]$,
and a subspace of $\myfield[x]$.
\item If $m \le n$, then $C^n(\myreal)$ and $C^\infty(\myreal)$ are
subspaces of $C^m(\myreal)$ .
\item In $\myreal^2$, let $U = \{(x, y)\ |\ y = 3x\}$. Then
$U$ is a subspace.
\item More generally, in $\myreal^n$, fix $a_1, a_2, \ldots, a_n$, and let
$U = \{(x_1, x_2, \ldots, x_n)\ |$
\ \ \ \ \ $a_1x_1 + a_2x_2 + \ldots + a_nx_n = 0\}$.\\
Then $U$ is a subspace.
\item If $U_1$ and $U_2$ are subspaces, so is $U_1 \cap U_2$.
\end{enumerate}
\item Examples which are not subspaces:
\begin{enumerate}
\item In $\myreal^2$, let $U = \{(x, y)\ |\ x = 0\ or\ y = 0\}$ (i.e., $U$ is the
union of the $x$-axis and the $y$-axis). Then $U$ is not a subspace, since
the two vectors $(1,0)$ and $(0,1)$ are in $U$, but $(1,0) + (0,1) = (1,1)$
is not in $U$.
\item In $\myreal^2$, let $U = \{(x, y)\ |\ x$ and $y$ are both
rational numbers$\}$. Then $U$ is not a subspace, since $(1, 1)$ is an element
of $U$, but $\sqrt{2}*(1, 1) = (\sqrt{2}, \sqrt{2})$ is not an element of $U$.
\item In $\myreal^n$, fix $a_1, a_2, \ldots, a_n$, and let
$U = \{(x_1, x_2, \ldots, x_n)\ |\ a_1x_1 + a_2x_2 + \ldots + a_nx_n = 2\}$.
Then $U$ is not a subspace, since, for example, $\htvec{0}$ is not an
element of $U$.
\item In general, if $U_1$ and $U_2$ are subspaces, $U_1 \cup U_2$ is
not a subspace (except in special cases, such as when $U_1 \subset U_2$).
\end{enumerate}
\item If $U_1$ and $U_2$ are both subspaces of a vector space $V$, we can
define the subspace which is the {\bf sum} of the two subspaces by:
$$U_1 + U_2 = \{u_1 + u_2\ |\ u_1 \in U_1\ \mathrm{and}\ u_2 \in U_2\}.$$
\item If in addition $U_1 \cap U_2 = \{0\}$, then each element $u \in U_1 + U_2$
can be written in a unique way as $u = u_1 + u_2$. In this case, we call the
sum of $U_1$ and $U_2$ a {\bf direct sum}, and write it
$U_1 \oplus U_2$.
\item These ideas generalize in a straightforward way to
$U_1+\cdots + U_n$ and $U_1\oplus \cdots \oplus U_n$
for a finite number of subspaces, and
$U_1+U_2+\cdots\ $ and $U_1\oplus U_2\oplus\cdots $
for countably many subspaces.
\end{itemize}
\pagedone
\exercises{Subspaces}
\begin{enumerate}
\item Show that each of the examples identified in this section as a subspace actually
is a vector subspace.
\item Which of the following are vector subspaces of $\myreal^3$?
\begin{enumerate}
\item $\{(x_1, x_2, x_3) \in \myreal^3\ |\ 3x_1 + 2x_2 - x_3 = 0\}$
\item $\{(x_1, x_2, x_3) \in \myreal^3\ |\ 3x_1 + 2x_2 - x_3 = 4\}$
\item $\{(x_1, x_2, x_3) \in \myreal^3\ |\ x_1x_2x_3 = 0\}$
\end{enumerate}
\item Suppose that $U$ is a vector subspace of $V$, and $V$ is a vector
subspace of $W$. Show that $U$ is a vector subspace of $W$.
\item Show that the intersection of any collection of vector subspaces of
$V$ is a vector subspace of $V$.
\item Let
$$l^2 = \{(a_i) \in \myreal^\infty\ | \sum_{i=0}^{\infty} |a_i|^2 < \infty\}.$$
Show that $l^2$ is a vector subspace of $\myreal^\infty$.
\item Let
$$L^2 = \{f \in C^0(\myreal)\ | \int_{-\infty}^{\infty} |f(x)|^2 dx < \infty\}.$$
Show that $L^2$ is a vector subspace of $C^0(\myreal)$.
\end{enumerate}
\pagedone
\sectionhead{Linear dependence and independence}
From here on, we'll assume that $U$ and $V$ are vector spaces over
a field $\myfield$.
\begin{itemize}
\item Suppose that $v_1, v_2, \ldots, v_n$ are vectors in $V$, and that for some
$a_2, a_3, \ldots, a_n \in \myfield$,
$$v_1 = a_2v_2 + a_3v_3 + \ldots + a_nv_n.$$
Then we say that $v_1$ is linearly dependent on $v_2, \ldots, v_n$. Note that
if we move $v_1$ to the other side, and let $a_1 = -1$, we have
$a_1v_1 + a_2v_2 + \ldots + a_nv_n = 0$,
and not all of the $a_i$ are zero (in particular, $a_1 \ne 0$).
This motivates a general definition: a set $S$ of vectors in
a vector space $V$ is called {\bf linearly dependent} if, for some
$n > 0$, and distinct $v_1, v_2, \ldots v_n \in S$, there exist
$a_1, a_2, \ldots, a_n \in \myfield$, not all 0, with
$$a_1v_1 + a_2v_2 + \ldots + a_nv_n = 0.$$
\pagedone
\item The flip side of this definition is the following: a set $S$ of vectors in
a vector space $V$ is called {\bf linearly independent} if, given distinct
$v_1, v_2, \ldots v_n \in S$, the only way for $a_1, a_2, \ldots, a_n \in \myfield$
to give
$$a_1v_1 + a_2v_2 + \ldots + a_nv_n = 0$$
is if every one of the $a_i$ are zero.
A useful way to think about this is that a set $S$ of vectors
is linearly independent if no individual one of the vectors is linearly dependent
on a finite number of the rest of them.
\item Some examples:
\begin{enumerate}
\item In $\myreal^2$, the sets of vectors $\{(1,0),(0,1)\}$, $\{(1,2),(1,3)\}$,
$\{(1,0)\}$ and $\{(2,2),(3,2)\}$ are each linearly independent sets.
\item In $\myreal^2$, none of the sets of vectors $\{(1,0),(2,0)\}$,
$\{(1,0),(0,1), (1,1)\}$, $\{(0,0),(0,1)\}$, $\{(0,0)\}$,
$\{(1,2),(2,3),(3,4),(4,1)\}$, nor $\{(1,2),(3,4),(2,1)\}$ is a linearly
independent set.
\item In any vector space, if a set of vectors contains the 0 vector, the set is
not linearly independent.
\item In $C^0(\myreal)$, if $f_1(1) = f_2(2) = \ldots = f_n(n) = 1$,
but $f_i(j) = 0$ for $i \ne j$, then the set
of functions $\{f_1, f_2, \ldots, f_n\}$ is linearly independent.
\item In $C^\infty(\myreal)$, the set of functions
$\{\cos(nx)\ |\ n = 0, 1, 2, \ldots\}$ is a linearly independent set.
\item The empty set, $\{\}$, is a linearly independent set.
\end{enumerate}
\pagedone
\item Mathematics is an extremely precise language. These two sentences do not
mean the same thing:
\begin{enumerate}
\item The set $\{v_1, v_2, \ldots, v_n\}$ of vectors is linearly independent.
\item $\{v_1, v_2, \ldots, v_n\}$ is a set of linearly independent vectors.
\end{enumerate}
One of the hardest parts of doing mathematics is developing your mathematical
intuition. It is tempting to imagine that intuition is what you have before you
know anything, but that is nonsense. Intuition is just the automatic part of your
knowledge, derived from your past experience. Becoming better at mathematics
involves learning new mathematics, and then integrating that new knowledge
into your intuition. Doing that takes care, precision, and {\bf lots of practice}!
\end{itemize}
\pagedone
\exercises{Linear dependence and independence}
\begin{enumerate}
\item Verify the statements in each of the six examples in this section.
\item Suppose that $P_0, P_1, \ldots , P_n$ are polynomials in $\myfield^n[x]$,
and $P_i(1) = 0$ for $i = 0, 1, \ldots ,n$. Show that
$\{P_0 , P_1, \ldots , P_n\}$ is not linearly independent in $\myfield^n[x]$.
\item In $C^\infty(\myreal)$, show that each of these sets of functions
is linearly independent:
\begin{enumerate}
\item $\{x^n\ |\ n = 0, 1, 2, \ldots\}$.
\item $\{\sin(nx)\ |\ n = 0, 1, 2, \ldots\}$.
\item $\{e^{nx}\ |\ n = 0, 1, 2, \ldots\}$.
\end{enumerate}
\end{enumerate}
\pagedone
\sectionhead{Span of a set of vectors}
\begin{itemize}
\item If $S$ is a set of vectors in the vector space $V$ over
$\myfield$, we define the {\bf span} of the set $S$ by:
$$\myspan(S) = \{a_1v_1 + a_2v_2 + \ldots + a_nv_n\ |$$
$$\ \ \ \ \ \ \ \ \ \ a_i \in \myfield, v_i \in S, n > 0\}.$$
This says that $\myspan(S)$ is the set of all vectors that can be written as finite linear
combinations of the vectors in S.
\item Examples:
\begin{enumerate}
\item In $\myreal^1$, $\myspan(\{(1)\})$ is all of $\myreal^1$.
\item In $\myreal^3$, $\myspan(\{(1,0,0), (0,1,0)\})$ is the $x-y$ plane.
Similarly, $\myspan(\{(1,0,0), (0,0,1)\})$ is the $x-z$ plane,
$\myspan(\{(0,1,0), (0,0,1)\})$ is the $y-z$ plane, and
$\myspan(\{(1,0,0), (0,1,0), (0,0,1)\})$ is all of $\myreal^3$.
\pagedone
\item Still in $\myreal^3$, $\myspan(\{(1,2,0), (2,1,0)\})$ is the $x-y$ plane,
$\myspan(\{(1,0,0)\})$ is the $x$ axis, $\myspan(\{(1,0,0), (1,1,0), (1,1,1)\})$ is
all of $\myreal^3$, $\myspan(\{(1,2,3), (1,1,0), (1,1,1), (2,1,4)\})$ is
all of $\myreal^3$, and $\myspan(\{(1,2,0), (1,1,0), (3,1,0), (2,1,0)\})$ is
the $x-y$ plane.
\item In $\myfield^n[x]$, $\myspan(\{1, x^1, x^2, \ldots, x^n\}) = \myfield^n[x]$.
\item In $\myfield[x]$, $\myspan(\{1, x^1, x^2, \ldots\}) = \myfield[x]$.
\item In $C^0(\myreal)$, $\myspan(\{f_1(x) = 1, f_2(x) = x\})$ is the set of all
linear functions, $y = mx + b$.
\item For any set $S$, $\myspan(S)$ is a subspace.
\item In particular,
$\myspan(S)$ is sometimes defined to be the smallest subspace of $V$
containing $S$, or the intersection of all
subspaces of $V$ that contain $S$.
\end{enumerate}
\end{itemize}
\pagedone
\exercises{Span of a set of vectors}
\begin{enumerate}
\item Verify the statements in each of the eight examples in this section.
\item Show that if $\myspan(\{v_0, v_1, \ldots , v_n\}) = V$, then
$\myspan(\{v_0 - v_1, v_1 - v_2, \ldots ,v_{n-1} - v_n, v_n\}) = V$.
\end{enumerate}
\pagedone
\sectionhead{Basis for a vector space}
\begin{itemize}
\item Suppose $S$ is an ordered set (or {\em list}) of vectors in a vector space $V$.
Suppose further that:
\begin{enumerate}
\item $S$ is linearly independent, and
\item $\myspan(S) = V$.
\end{enumerate}
Then we call $S$ a {\bf basis} for the vector space $V$.
\item Given a basis $S$ for the vector space $V$,
every vector $v \in V$, $v \ne 0$, can be written in a unique way as:
$$v = a_1v_1 + a_2v_2 + \ldots + a_nv_n$$
with $a_1, a_2, \ldots, a_n \in \myfield,\ a_i \ne 0$, and
$v_1, v_2, \ldots, v_n \in S$. For uniqueness, we take the $v_i$ to be
distinct, and to be in the order they appear in $S$. We are writing each $v$
as a finite linear combination of the basis vectors.
\pagedone
\item Examples:
\begin{enumerate}
\item In $\myfield^3$, the ordered set $S = ((1,0,0),(0,1,0),(0,0,1))$ is a
basis (often called the {\bf standard basis}). This generalizes in the obvious
way to $\myfield^n$.
\item In $\myfield^3$, the ordered set $S = ((1,0,0),(1,1,0),(1,1,1))$ is a basis.
So is $((1,2,3),(2,1,3),(1,2,2))$.
\item In general, in $\myfield^n$ any linearly independent ordered
set $S = (v_1, v_2, \ldots, v_n)$ of size $n$ is a basis.
\item $(1, x, x^2, x^3, \ldots)$ is a basis for $\myfield[x]$.
\item If $S$ is a linearly independent ordered set, then $S$ is a basis for $\myspan(S)$.
\end{enumerate}
\pagedone
\item If a vector space $V$ has a finite basis $S = (v_1, v_2, \ldots, v_n)$,
we say that $V$ is finite dimensional, and we define the {\bf dimension} of $V$ by:
$$ \dim(V) = n.$$
We define $\dim(\{\htvec{0}\}) = 0$.
Note that if a vector space $V$ has a finite basis of size $n$, then every basis
for $V$ contains $n$ vectors, and thus the definition makes sense.
For example, $\dim(\myfield^n) = n$ for any field $\myfield$ and $n > 0$.
We also have that $\dim(\myfield^n[x]) = n + 1$.
\pagedone
\item If a vector space $V$ has a finite or countably infinite basis $S$, then
we can uniquely represent each vector $v$ with respect to $S$ by a list of
the form:\\
$v = (b_1, b_2, \ldots, b_n)$ in the finite case, or\\
$v = (b_1, b_2, \ldots)$ in the infinite case.
Each $b_i$ is either the non-zero coefficient corresponding with the $i$th element
of $S$ from the unique representation
described above, or $0$ if the basis element does not appear there. In the
infinite case, only finitely many of the $b_i$ are non-zero.
We represent $0$ by $(0, 0, \ldots, 0)$ in the finite case, and
by $(0, 0, \ldots)$ in the infinite case.
\item In these cases, we also have, for $v_i \in S$, that
$$V = \myspan(v_1)\oplus\cdots\oplus\myspan(v_n)$$
in the finite case, and
$$V = \myspan(v_1)\oplus\myspan(v_2)\oplus\cdots$$
in the countably infinite case.
\end{itemize}
\pagedone
\exercises{Basis for a vector space}
\begin{enumerate}
\item Verify the statements in each of the five examples in this section.
\item Let $U$ be the subspace of $\myreal^6$ given by
$U = \{(x_1, x_2, x_3, x_4, x_5, x_6) \in \myreal^6\ |\ $
$2x_1 + 3x_3 = 0$ and $x_2 = 4x_5\}$.
Find a basis for $U$.
\item Show that if $U_1$ and $U_2$ are subspaces of a finite dimensional vector space,
then $\dim(U_1 + U_2) = \dim(U_1) + \dim(U_2) - \dim(U_1 \cap U_2)$.
\end{enumerate}
\pagedone
\sectionhead{Linear transformations}
\begin{itemize}
\item If $U$ and $V$ are vector spaces over $\myfield$, then a function $T : U \to V$
is called a {\bf linear transformation} or {\bf linear mapping} if
$$T(a_1u_1 + a_2u_2) = a_1T(u_1) + a_2T(u_2)$$
for all $a_1, a_2 \in \myfield$ and $u_1, u_2 \in U$.
An equivalent pair
of conditions is that $T(u_1 + u_2) = T(u_1) + T(u_2)$, and $T(au) = aT(u)$.
\item For a linear transformation $T : U \to V$, we call $U$ the
{\bf domain} and $V$ the {\bf codomain} (or sometimes {\bf range})
of $T$. We also define
the {\bf kernel} (or {\bf null space}) of $T$ by:
$$\ker(T) = \{u \in U\ |\ T(u) = 0\}.$$
Further, we define the {\bf image} (or sometimes {\bf range} -- be careful here!)
of $T$ by:
$$\im(T) = \{v \in V\ |\ v = T(u)\ \mathrm{for\ some}\ u \in U\}.$$
\pagedone
\item For a linear transformation $T : U \to V$ we have the nice properties:
\begin{enumerate}
\item $\ker(T)$ is a subspace of $U$, and $\im(T)$ is a subspace of $V$.
(ex)
\item If $U$ is finite dimensional, then
$$\dim(U) = \dim(\ker(T)) + \dim(\im(T)).$$
(ex)
\item If $U$ is finite dimensional with basis $(u_1, u_2, \ldots, u_n)$,
and $V$ is finite dimensional with basis $(v_1, v_2, \ldots, v_m)$,
then $T$ is determined by its effect on the basis elements $u_i$.
There exist $a_{ij}$, $1 \le i \le m$, $1 \le j \le n$ with:
$$T(u_j) = a_{1j}v_1 + a_{2j}v_2 + \ldots + a_{mj}v_m,$$
and in general, if $u = \sum_jb_ju_j$, then
$$T(u) = \sum_{i=1}^m\sum_{j=1}^na_{ij}b_jv_i.$$
\pagedone
\item Thus, given particular bases for $U$ and $V$, we can represent the
linear transformation $T$ by the matrix
$$[T] =
\left[\begin{array}{cccc}
a_{11} & a_{12} & \ldots & a_{1n}\\
a_{21} & a_{22} & \ldots & a_{2n}\\
\vdots & \vdots & \vdots & \vdots \\
a_{m1} & a_{m2} & \ldots & a_{mn}
\end{array}\right]
$$
or $[T]_{ij} = a_{ij}$.
\item If we represent the vector $u \in U$ by the column matrix
$[b_1, b_2, \ldots, b_n]^t$, then we have
\begin{eqnarray*}
[T(u)] & = & [T][u] \\
& = &
\left[\begin{array}{cccc}
a_{11} & a_{12} & \ldots & a_{1n}\\
a_{21} & a_{22} & \ldots & a_{2n}\\
\vdots & \vdots & \vdots & \vdots \\
a_{m1} & a_{m2} & \ldots & a_{mn}
\end{array}\right]
\left[\begin{array}{c}
b_1\\
b_2\\
\vdots\\
b_n
\end{array}\right] \\
& & \\
& & \\
& = &
\left[\begin{array}{c}
a_{11}b_1 + a_{12}b_2 + \ldots + a_{1n}b_n\\
a_{21}b_1 + a_{22}b_2 + \ldots + a_{2n}b_n\\
\vdots\\
a_{m1}b_1 + a_{m2}b_2 + \ldots + a_{mn}b_n
\end{array}\right]
\end{eqnarray*}
\pagedone
\item Sometimes it is more convenient to use the Einstein summation
conventions, where we would write:
$$T(u)_i = T_i^ju_j$$
with implied summation over the repeated upper and lower indices.
\item If $T_1 : U \to V$ and $T_2 : U \to V$ are two linear transformations,
we can define the sum of the two transformations as
$$(T_1 + T_2)(u) = T_1(u) + T_2(u),$$
and we can define scalar multiplication of a linear transformation $T$ by $a$ as
$$(a * T)(u) = a * (T(u)).$$
We can thus define $L(U, V)$ to be the space of all
linear transformations from $U$ to $V$. We define the {\bf zero}
transformation $0 : U \to V$ by $0(u) = \htvec{0}$.
With these definitions, $L(U, V)$ is a vector space. (ex)
\pagedone
\item Given particular bases for $U$ and $V$, the matrix representation of
$T_1 + T_2$ is given by: $[T_1 + T_2] = [T_1] + [T_2]$.
\item If $U$ and $V$ are finite dimensional, then $L(U, V)$ is also finite
dimensional, and
$$\dim(L(U, V)) = \dim(U)*\dim(V).$$
(ex)
\item If $T : U \to V$ and $S : V \to W$ are linear transformations,
then the composition $S \circ T : U \to W$ is also a linear transformation.
(ex)
Recall that $(S \circ T)(u) = S(T(u))$.
Given particular bases for $U$, $V$, and $W$, the matrix representation of
$S \circ T$ is the matrix product $[S \circ T] = [S][T]$. We usually abbreviate
$S \circ T$ as $ST$.
It is worth noting that unless $W \subseteq U$, it doesn't even make sense to
talk about $T \circ S$.
\pagedone
\item The matrix of the composition of two linear transformations is given by
the product of the two matrices, given by:
$$ [ST] = [S][T] = $$
$$
\left[\begin{array}{cccc}
a_{11} & a_{12} & \ldots & a_{1n}\\
a_{21} & a_{22} & \ldots & a_{2n}\\
\vdots & \vdots & \vdots & \vdots \\
a_{m1} & a_{m2} & \ldots & a_{mn}
\end{array}\right]
\left[\begin{array}{cccc}
b_{11} & b_{12} & \ldots & b_{1p}\\
b_{21} & b_{22} & \ldots & b_{2p}\\
\vdots & \vdots & \vdots & \vdots \\
b_{n1} & b_{n2} & \ldots & b_{np}
\end{array}\right]
$$
$$ = $$
$$
\left[\begin{array}{cccc}
\sum_{i=1}^na_{1i}b_{i1} & \sum_{i=1}^na_{1i}b_{i2} & \ldots &
\sum_{i=1}^na_{1i}b_{ip}\\
\sum_{i=1}^na_{2i}b_{i1} & \sum_{i=1}^na_{2i}b_{i2} & \ldots &
\sum_{i=1}^na_{2i}b_{ip}\\
\vdots & \vdots & \vdots & \vdots \\
\sum_{i=1}^na_{mi}b_{i1} & \sum_{i=1}^na_{mi}b_{i2} & \ldots &
\sum_{i=1}^na_{mi}b_{ip}
\end{array}\right]
$$
\item We also have the distributive property: $$S(T_1 + T_2) =
(ST_1) + (ST_2).$$
The matrix representation for this is:
$$[S][T_1 + T_2] = [S][T_1] + [S][T_2].$$
\end{enumerate}
\pagedone
\item Some examples of linear transformations:
\begin{enumerate}
\item The function $T : \myreal^2 \to \myreal$ given by
$T((x, y)) = x + y$ is linear.
\item The function $T : \myreal^2 \to \myreal^3$ given by
$T((x, y)) = (x + 2y, x - 2y, x - y)$ is linear.
\item Given $a_1, a_2, \ldots, a_n \in \myfield$, and $V$ a finite
dimensional vector space over $\myfield$ with basis
$(v_1, v_2, \ldots, v_n)$, the function $T : V \to \myfield$ given by
$T(b_1v_1 + \ldots + b_nv_n) = a_1b_1 + \ldots + a_nb_n$
is linear.
In general, for a vector space $V$, a linear transformation
$T : V \to \myfield$ is called a {\bf linear functional}. The
study of these transformations is called {\em functional analysis}.
\pagedone
\item The function $D : \myfield^n[x] \to \myfield^{n-1}[x]$
given by\\
$D(a_nx^n + \ldots + a_1x + a_0)$\\
$\ \ \ \ \ = na_nx^{n-1} + \ldots + 2a_2x + a_1$\\
is linear. This linear transformation is called the derivative \ldots
\item Similarly, we have the derivative transformation
$\deriv : \myfield^\infty[x] \to \myfield^\infty[x]$ given by
$$\deriv(\sum_{n=0}^\infty a_nx^n) = \sum_{n=1}^\infty n*a_nx^{n-1}.$$
\item The shift map $S : \myfield^\infty \to \myfield^\infty$ is
given by: $S((a_0, a_1, \ldots)) = (a_1, a_2, \ldots)$.
\item The difference map $\Delta: \myfield^\infty \to \myfield^\infty$ is
given by:
$$\Delta((a_0, a_1, \ldots)) = (a_1 - a_0, a_2 - a_1, \ldots),$$
or, abbreviating $(a_0, a_1, \ldots)$ by $(a_n)$,
$$\Delta((a_n)) = (a_{n+1} - a_n).$$
\end{enumerate}
\end{itemize}
\pagedone
\exercises{Linear transformations}
\begin{enumerate}
\item Verify each of the statements marked (ex) in this section.
\item Verify that each of the 7 examples actually are linear transformations.
\item Show that the usual calculus derivative $\frac{d\ }{dx} : C^\infty(\myreal)
\to C^\infty(\myreal)$
given by:
$$\frac{d\ f(x)}{dx} = \lim_{h\to 0}\frac{f(x+h) - f(x)}{h}$$
is a linear transformation.
\item Is the usual calculus indefinite integral
$$\int f(x)dx$$
a linear transformation? Why or why not? What about the definite integral?
\end{enumerate}
\pagedone
\sectionhead{Morphisms -- mono, epi, and iso}
\begin{itemize}
\item In algebra, we call a function that preserves structure a {\bf morphism}.
In our current context, a linear transformation preserves the linear structure
of a vector space in the sense that $T(u_1 + u_2) = T(u_1) + T(u_2)$ and
$T(au) = aT(u)$. Thus, linear transformations are morphisms in the
category of vector spaces.
\item We will be particularly interested in morphisms that have additional
properties. Specifically, we are likely to look for morphisms $T : U \to V$
which have one or more of the properties:
\pagedone
\begin{enumerate}
\item One-to-one. Such transformations are also called {\bf injective},
or {\bf monomorphisms}. These transformations have the property
that if $u_1 \ne u_2$, then $T(u_1) \ne T(u_2)$. This says that
different things get sent different places -- that is, no two things
get sent to the same place.
Monomorphisms are nice because the subspace $\im(T) \subset V$ looks
just like $U$.
\item Onto. Such transformations are also called {\bf surjective},
or {\bf epimorphisms}. These transformations have the property
that for every $v \in V$, there exists some $u \in U$ with
$T(u) = v$. This says that every element of $V$ is hit by some
element of $U$. Another way to say this is that $\im(T) = V$.
Epimorphisms are nice, because the algebraic properties of
$V$ will be reflected back in $U$.
\pagedone
\item Both one-to-one and onto. Such transformations are also called
{\bf bijective}. A bijective function $f : X \to Y$ has an inverse
$f^{-1} : Y \to X$ with the properties that for all $x \in X$ and
all $y \in Y$, $f^{-1}(f(x)) = x$ and $f(f^{-1}(y)) = y$.
A bijective morphism whose inverse also preserves algebraic structure is
called an {\bf isomorphism}. In linear algebra, we have the nice
property that if a linear transformation is bijective, then its inverse
is also linear, and thus it is an isomorphism.
%% A transform that has no inverse is often called $\mathbf singular$.
%% and one that has an inverse is often called $\mathbf invertible$
%% or $\mathbf nonsingular$.
\item If there is an isomorphism $T : U \to V$, we say that the spaces
$U$ and $V$ are {\bf isomorphic}. If two spaces are isomorphic, then
they share all relevant properties. Thus, two isomorphic vector spaces
are indistinguishable as vector spaces except for a renaming of the elements.
\end{enumerate}
\pagedone
\item We have the nice fact that a linear transformation $T : U \to V$ is
a monomorphism (is one-to-one) if and only if $\ker(T) = \{0\}$.
Pf.: Suppose $T$ is a monomorphism. We know that for every
linear transformation, $T(0) = 0$. Then, since $T$ is a monomorphism,
we know that if $T(u) = 0 = T(0)$, it must be that $u = 0$. Thus $\ker(T) = \{0\}$.
On the other hand, suppose that $\ker(T) = \{0\}$. Then, if
$T(u_1) = T(u_2)$, we will have $0 = T(u_1) - T(u_2) = T(u_1 - u_2)$.
But this means that $u_1 - u_2 \in \ker(T)$ and hence, since we
are assuming that $\ker(T) = \{0\}$, we must have $u_1 - u_2 = 0$,
or $u_1 = u_2$. By the contrapositive, this means that if
$u_1 \ne u_2$, then $T(u_1) \ne T(u_2)$. Q.E.D.
(I had to do at least one proof,\\
didn't I? :-)
\pagedone
\item An important example of an isomorphism is the identity
transformation $I_U : U \to U$ given by $I_U(u) = u$. (ex)
\item If $T : U \to V$ is a monomorphism, then there is a linear
transformation $S_1 : \im(T) \to U$ with $S_1(T(u)) = (S_1T)(u) = u$
for all $u \in U$. This says that $S_1T = I_U$. $S_1$ is called a left
(partial) inverse of $T$. (ex)
\item If $T : U \to V$ is an epimorphism, then there is a linear
transformation $S_2 : V \to U$ with $T(S_2(v)) = (TS_2)(v) = v$
for all $v \in V$. This says that $TS_2 = I_V$. $S_2$ is called a right
(partial) inverse of $T$. (ex)
\pagedone
\item If $T : U \to V$ is an isomorphism, then since $T$
is an epimorphism, both $S_1$ and $S_2$ (as above) exist.
Also, $\im(T) = V$. We therefore have that for all $v \in V$,
$$S_1(v) = S_1((TS_2)(v)) = (S_1T)(S_2(v)) = S_2(v).$$
Thus $S_1 = S_2$. In this case, the (two-sided) inverse of $T$
exists, and we have $T^{-1} = S_1 = S_2$.
\item If $U$ and $V$ are finite dimensional vector spaces over $\myfield$
with $\dim(U) = \dim(V)$, then $U$ and $V$ are isomorphic. (ex)
(Big) hint for proof: Let $(u_1, u_2, \ldots, u_n)$ and $(v_1, v_2, \ldots, v_n)$
be bases for $U$ and $V$ respectively. Define $T : U \to V$ by
$T(u_i) = v_i$ for $1 \le i \le n$, and extend by linearity. Make sense of
the phrase ``extend by linearity,'' and then show that $T$ is an isomorphism.
\pagedone
\item We also have the nice fact that if $\dim(U) = \dim(V) = n,\ U,\ V$ over
$\myfield$, and
$T : U \to V$ is linear, then the following are equivalent: (ex)
\begin{enumerate}
\item $T$ is a monomorphism.
\item $T$ is an epimorphism.
\item $T$ is an isomorphism.
\end{enumerate}
This means, for example, that such a $T$ is onto if and only $\ker(T) = {0}$.
\item If $T : U \to V$ and $S : V \to W$, then (ex)
\begin{enumerate}
\item If both $S$ and $T$ are monomorphisms, then so is $ST$.
\item If $ST$ is a monomorphism, then so is $T$.
\item If both $S$ and $T$ are epimorphisms, then so is $ST$.
\item If $ST$ is an epimorphism, then so is $S$.
\end{enumerate}
\pagedone
\item If $T : V \to V$ and $S : V \to V$, and $ST = TS$, then (ex)
\begin{enumerate}
\item Both $S$ and $T$ are monomorphisms if and only if $ST$
is a monomorphism.
\item Both $S$ and $T$ are epimorphisms if and only if $ST$
is an epimorphism.
\end{enumerate}
\item Let $\matrix(\myfield, n, m)$ be the space of all $n$ by $m$ matrices with
entries from $\myfield$. We use ordinary entry by entry matrix addition,
where $(A + B)_{ij} = A_{ij} + B_{ij}$, scalar multiplication, where
$(aA)_{ij} = aA_{ij}$, and let $(0)_{ij} = 0$. Then $\matrix(\myfield, n, m)$
is an $n*m$-dimensional vector space over $\myfield$.
If $\dim(U) = m$ and $\dim(V) = n$, then we can define
$$\mathrm{mat} : L(U, V) \to \matrix(\myfield, n, m)$$
by
$\mathrm{mat}(T) = [T]$.
This transformation is an isomorphism. (ex)
\end{itemize}
\pagedone
\exercises{Morphisms -- mono, epi, and iso}
\begin{enumerate}
\item Verify each of the statements marked (ex) in this section.
\item Show that if a function has an inverse, then the inverse is unique.
\item Consider the function $T : \myfield^n[x] \to \myfield^{n+1}$
given by
$$T(a_0 + a_1x + \ldots + a_nx^n) = (a_0, \ldots , a_n).$$
Show that this function is an isomorphism.
\item Consider the function $T : \mycomplex \to \matrix(\myreal, 2, 2)$
given by
\begin{eqnarray*}
T(a + bi) & = &
\left[\begin{array}{cc}
a & -b \\
b & a
\end{array}\right]
\end{eqnarray*}
\begin{enumerate}
\item Show that if we consider $\mycomplex$ as a real vector
space, this function is a monomorphism.
\item Show that this function also respects complex multiplication,
that is, $$T((a + bi)(c + di)) = T(a + bi) T(c + di).$$
\end{enumerate}
\item More generally, consider the function $T : \matrix(\mycomplex, n, n)
\to \matrix(\myreal, 2n, 2n)$
given by
$$
T\left(\left[\begin{array}{ccc}
a_{11} + b_{11}i & \cdots & a_{1n} + b_{1n}i \\
\vdots & & \vdots \\
a_{n1} + b_{n1}i & \cdots & a_{nn} + b_{nn}i
\end{array}\right]\right)
$$
$$\ \ \ \ \ \ = \ \left[\begin{array}{ccccc}
a_{11} & -b_{11} & \cdots & a_{1n} & -b_{1n} \\
b_{11} & a_{11} & \cdots & b_{1n} & a_{1n} \\
\vdots & \vdots & & \vdots & \vdots \\
a_{n1} & -b_{n1} & \cdots & a_{nn} & -b_{nn} \\
b_{n1} & a_{n1} & \cdots & b_{nn} & a_{nn} \\
\end{array}\right]
$$
\begin{enumerate}
\item Show that if we consider $\matrix(\mycomplex, n, n)$ as a real vector
space, this function is a monomorphism.
\item Show that this function also respects matrix multiplication,
that is, $$T(A B) = T(A) T(B).$$
\end{enumerate}
\item What the heck. Let $\myquaternions$ be the quaternions (described above).
Consider the function $T : \matrix(\myquaternions, n, n)
\to \matrix(\mycomplex, 2n, 2n)$ given by
$$
T\left(\left[\begin{array}{ccc}
\alpha_{11} + \beta_{11}j & \cdots & \alpha_{1n} + \beta_{1n}j \\
\vdots & & \vdots \\
\alpha_{n1} + \beta_{n1}j & \cdots & \alpha_{nn} + \beta_{nn}j
\end{array}\right]\right)
$$
$$\ \ \ \ \ \ = \ \left[\begin{array}{ccccc}
\alpha_{11} & -\overline{\beta_{11}} & \cdots & \alpha_{1n}
& -\overline{\beta_{1n}} \\
\beta_{11} & \alpha_{11} & \cdots & \beta_{1n} & \alpha_{1n} \\
\vdots & \vdots & & \vdots & \vdots \\
\alpha_{n1} & -\overline{\beta_{n1}} & \cdots & \alpha_{nn}
& -\overline{\beta_{nn}} \\
\beta_{n1} & \alpha_{n1} & \cdots & \beta_{nn} & \alpha_{nn} \\
\end{array}\right]
$$
\begin{enumerate}
\item Show that if we consider $\matrix(\myquaternions, n, n)$ as a
complex vector space, this function is a monomorphism.
\item Show that this function also respects matrix multiplication,
that is, $$T(A B) = T(A) T(B).$$
\end{enumerate}
Thus, if we denote by $\mathbf{O}(n)$, $\mathbf{U}(n)$,
and $\mathbf{Sp}(n)$ the distance preserving linear operators on $\myreal^n$,
$\mycomplex^n$, and $\myquaternions^n$ respectively (called the
orthogonal, unitary, and symplectic groups), then we have the monomorphisms:
$$\cdots \to \mathbf{O}(n) \to \mathbf{Sp}(n) \to \mathbf{U}(2n) \to \mathbf{O}(4n)$$
$$\ \ \ \ \to \mathbf{Sp}(4n) \to \mathbf{U}(8n) \to \mathbf{O}(16n) \to \cdots$$
\end{enumerate}
\pagedone
\sectionhead{Linear operators}
\begin{itemize}
\item If $T : V \to V$ is a linear transformation, we call $T$ a {\bf linear
operator} on $V$. Note that if $S$ and $T$ are operators on $V$,
then so is $ST$. We can abbreviate $L(V, V)$ as $L(V)$.
$L(V)$ has the algebraic structure of a {\bf ring} with identity. A ring
is similar to a field (as defined above), except without the requirements
that multiplication be commutative and that there be multiplicative inverses
for non-zero elements. The identity element is $I_V$.
$L(V)$ is a non-commutative ring, since in general
$ST \ne TS$. This is reflected in the fact that matrix multiplication is
non-commutative. Only in very special cases is it true that $[S][T] = [T][S]$
(for example, if both $[S]$ and $[T]$ are diagonal matrices, with
$[S]_{ij} = 0$ for $i \ne j$, and $[T]_{ij} = 0$ for $i \ne j$).
\pagedone
\item Note that for operators, it makes sense to talk about $T \circ T$, and we
can thus define $T^n = T \circ T \circ \ldots \circ T$ ($n$ times). We also
define $T^0 = I$, the identity operator.
\item Thus, if we have a polynomial $P(x) = a_nx^n + a_{n-1}x^{n-1} + \ldots
+ a_1x + a_0$ from $\myfield[x]$, we can talk about the polynomial in $T$:
$$P(T) = a_nT^n + a_{n-1}T^{n-1} + \ldots + a_1T + a_0.$$
(The $a_0$ term stands for $a_0I$.)
This is an operator on $V$ which acts on vectors as:
$$P(T)(v) = a_nT^n (v) + \ldots + a_1T(v) + a_0v.$$
\item In fact, we can generalize this to power series. If
$$p(x) = \sum_{n=0}^\infty a_nx^n,$$
then we can define
$$p(T) = \sum_{n=0}^\infty a_nT^n.$$
\pagedone
\item This means, for example, that we can define the exponential of an operator:
$$\exp(T) = \sum_{n=0}^\infty \frac{1}{n!}T^n.$$
\item We can even define the cosine or sine of an operator, etc.:
$$\cos(T) = \sum_{n=0}^\infty \frac{(-1)^n}{(2n)!}T^{2n},$$
$$\sin(T) = \sum_{n=0}^\infty \frac{(-1)^n}{(2n+1)!}T^{2n+1}.$$
\item We can talk about square roots of an operator, saying that
$S$ is a square root of $T$ if $ S^2 = T$. It is unlikely
that $S$ is unique.
\item We can also talk about logarithms of an operator, saying that
$S$ is a logarithm of $T$ if $\exp(S) = T$. Again, it is unlikely
that $S$ is unique.
\pagedone
\item Examples of linear operators:
\begin{enumerate}
\item A first important example begins like this:
suppose we have a system of $m$ linear equations in $n$ variables
\begin{eqnarray*}
a_{11}x_1 + a_{12}x_2 + \ldots + a_{1n}x_n & = & b_1\\
a_{21}x_1 + a_{22}x_2 + \ldots + a_{2n}x_n & = & 2_1\\
\vdots \ \ \ \ \ \ \ \ \ \ \ & & \vdots\\
a_{m1}x_1 + a_{m2}x_2 + \ldots + a_{mn}x_n & = & b_m.
\end{eqnarray*}
If we let $A : \myfield^n \to \myfield^m$
be the linear transformation with $[A]_{ij} = a_{ij}$, $\mathbf{x}$ be the
vector $(x_1, x_2, \ldots, x_n)^t$, and $\mathbf{b}$ the
vector $(b_1, b_2, \ldots, b_m)^t$, then we can rewrite the equation
in the form:
$$A\mathbf{x} = \mathbf{b}.$$
We then have several possibilities.
\pagedone
\begin{enumerate}
\item The first case is when $m = n$. There are then two possibilities:
\begin{enumerate}
\item If $A$ is an isomorphism, then $A^{-1}$ exists, and we can solve
the equation for $\mathbf{x}$:
$$\mathbf{x} = A^{-1}A\mathbf{x} = A^{-1}\mathbf{b}.$$
\item If $A$ is not an isomorphism, then in particular $A$ is not a an
epimorphism, and thus it is possible that $\mathbf{b} \notin \im(A)$.
In this case, there are no solutions to the equation.
On the other hand, if $\mathbf{b} \in \im(A)$ there is at least one
solution $\mathbf{x}_0$
with $A\mathbf{x}_0 = \mathbf{b}$.
Note, though, that we also know $A$ is not a monomorphism, and hence
$\dim(\ker(A)) \ge 1$. Then, if $\mathbf{z} \in \ker(A)$, we have
$A(\mathbf{x}_0 + \mathbf{z}) = A\mathbf{x}_0 + A\mathbf{z}
= A\mathbf{x}_0 + 0 = A\mathbf{x}_0 = \mathbf{b}$,
and so $\mathbf{x}_0 + \mathbf{z}$ is another solution. Furthermore,
if $\mathbf{y}$ is another solution with $A\mathbf{y} = \mathbf{b}$, then
$A(\mathbf{x}_0 - \mathbf{y}) = A\mathbf{x}_0 -A\mathbf{y}
= \mathbf{b} - \mathbf{b} = 0$.
This means $\mathbf{x}_0 - \mathbf{y} \in \ker(A)$, and so
$\mathbf{y} = \mathbf{x}_0 + \mathbf{z}$ for some $\mathbf{z} \in \ker(A)$.
Thus, if $\mathbf{x}_0$
is a particular solution, then every solution is of the form
$\mathbf{x}_0 + \mathbf{z}$ for some $\mathbf{z} \in \ker(A)$.
The space of solutions is then a translation of the kernel of A, of
the form $\mathbf{x}_0 + \ker(A)$. We then only need to find one particular
solution.
In this case, we have broken the problem down into two parts:
first, we solve $A\mathbf{x} = 0$ (called the {\em homogeneous} equation),
then we find a single solution $A\mathbf{x}_0 = \mathbf{b}$. For
$\myfield = \myreal$ or $\mycomplex$, there will be
infinitely many solutions.
\end{enumerate}
\pagedone
\item The second case is when $m < n$. In this case, $A$ cannot be
a monomorphism, and things are then similar to the second part
of the previous case. If $A$ is an epimorphism, there is sure to be
at least one solution. Again, we solve the homogeneous case, and
then find one particular solution. If $A$ is not an epimorphism, there
may not be any solutions. Otherwise, as above, in general we
will have infinitely many solutions.
\item If $m > n$, then $A$ cannot be an epimorphism, and hence there may
not be any solutions. If $A$ is a monomorphism, if there is a solution,
there will be just one. If $A$ is not a monomorphism, if there is
one solution, there will be infinitely many, as above.
\item In all three cases, we can start by trying to find the inverse
(or left partial inverse) of $A$. If $A$ is a monomorphism, we can
expect to be successful, and to be able to find the unique solution
if it exists. If $A$ is not a monomorphism, we solve the homogeneous
equation, and then look for a single particular solution.
\item Question: How can we tell if $A$ is a monomorphism? How
can we find $A^{-1}$, or at least $S_1$, the left partial inverse
of $A$?
\end{enumerate}
\pagedone
\item A second important example is the derivative operator
$$\deriv : C^\infty(\myreal) \to C^\infty(\myreal).$$
Note that this is a linear operator, since
$\deriv(f + g) = \deriv(f) + \deriv(g),\ \mathrm{and}$
$\deriv(af) = a\deriv(f)$.
Note that $\ker(\deriv) = \myspan(\{1\})$, the one-dimensional space consisting of all
constant functions. If we collapse $\ker(\deriv)$ down to nothing (in technical
terms, form the {\em quotient space} \ldots) then we can think of $\deriv$
as an isomorphism (on the quotient space). $\deriv$ has an inverse,
$$\int : C^\infty(\myreal) \to C^\infty(\myreal).$$
We then have (a variant of) the {\bf fundamental theorem of calculus}:
$$\deriv\int(f) = \int\deriv(f) = f$$
(on the reduced space -- that is, up to a constant).
\pagedone
\item We can then consider the general linear ordinary differential operators with
constant coefficients.
These are operators in $\myreal[\deriv]$, that is operators of the form:
$$P(\deriv) = a_n\deriv^n + \ldots + a_1\deriv + a_0.$$
These operators act on a function $f \in C^\infty(\myreal)$ as
$$P(\deriv)(f) = a_n\deriv^n(f) + \ldots + a_1\deriv(f) + a_0f.$$
We then have the general $n$th order linear ordinary differential
equation with constant coefficients:
$$P(\deriv)(f) = g.$$
We can work on solving these equations with an approach similar to the
method for systems of linear equations.
\pagedone
We first note that if the function $f_0$ is a solution to this equation, and
$z = z(x) \in \ker(P(\deriv))$, then $f_0 + z$ is also a solution, and if $f_1$
is another solution, then $P(\deriv)(f_0 - f_1) = P(\deriv)(f_0) - P(\deriv)(f_1)
= g - g = 0$. Thus, all solutions are of the form $f_0 + z$ where $f_0$ is
some particular solution, and $z \in \ker(P(\deriv))$.
We thus separate the problem into two parts. First we solve the associated
{\em homogeneous} equation:
$$P(\deriv)(f) = 0$$
to find $\ker(P(\deriv))$, and then we look for a single particular solution
to the original equation.
In general, we have that $\dim(\ker(P(\deriv))) = n$, the degree of $P$. This
is not an entirely obvious fact, but it is not counterintuitive \ldots
\pagedone
Hence what we need to do is find $n$ functions which form a basis for
$\ker(P(\deriv))$. What we need, then, are $n$ linearly independent functions
each of which is a solution to the homogeneous equation.
In theory (:-) this is not too hard.
We note first that for the first-order case, we have the solution:
\begin{eqnarray*}
(\deriv - r)(e^{rx}) & = & \deriv(e^{rx}) - re^{rx}\\
& = & re^{rx} - re^{rx}\\
& = & 0,
\end{eqnarray*}
and, in the $k$th order extension of this,
$$(\deriv - r)^k(x^{k-1}e^{rx}) = 0.$$
We also have that the set of functions $A = \{x^je^{r_ix}\ |\ 0 \le j \le k,
1 \le i \le m\}$\\
is linearly independent. From this we see how to solve equations of the form:
$$\prod_i(\deriv - r_i)^{k_i}(f) = 0.$$
\pagedone
Now, consider the operator
$$\deriv^2 - 2s\deriv + s^2 + t^2$$
for $s, t \in {\mathrm R}$.
We have that
$$(\deriv^2 -2s\deriv + s^2 + t^2)^k(x^{k-1}e^{sx}\cos(tx)) = 0,$$
$$(\deriv^2 -2s\deriv + s^2 + t^2)^k(x^{k-1}e^{sx}\sin(tx)) = 0.$$
We can think of this as $\alpha = s + ti$, and that we are working with
$$\deriv^2 - 2s\deriv + s^2 + t^2 = (\deriv - \alpha)(\deriv - \bar\alpha).$$
We have that the set of functions
$$B = \{x^je^{s_kx}\cos(t_kx), x^me^{s_nx}\sin(t_nx)\}$$
is also linearly independent.
From this we see how to solve equations of the form:
$$\prod_i(\deriv^2 -2s_i\deriv + s_i^2 + t_i^2)^{k_i}(f) = 0.$$
\pagedone
We can now put together all the pieces to solve the homogeneous equation
$P(\deriv) = 0$. We use the fact that any polynomial over $\myreal$
can be completely factored as
$$P(\deriv) = \prod_i(\deriv - r_i)^{k_i}\prod_j(\deriv -\alpha_j)^{k_j}
(\deriv - \bar\alpha_j)^{k_j}$$
with $r_i \in {\mathrm R}$ and $\alpha_j \in {\mathrm C}$.
To solve the inhomogeneous equation, we need only to find one particular solution of
$P(\deriv)(f) = g$.
This is just the bare beginnings of techniques for solving differential
equations, but it gives the flavor of some relatively powerful methods, and the
role that linear algebra plays. I haven't even mentioned the issues of initial
values/boundary conditions. For much more on these topics, look at a book
such as {\em Elementary Differential Equations with Linear Algebra,} by
Finney and Ostberg.
\pagedone
\item These approaches generalize to linear ordinary differential operators
with non-constant coefficients:
$$a_n(x)\deriv^n + a_{n-1}(x)\deriv^{n-1} + \cdots + a_0(x),$$
to systems of linear ordinary differential operators, and on to
linear partial differential operators and systems of linear partial
differential operators. But I think for now someone else will have to
write that up \ldots :-)
\item We can develop a similar approach to solving linear discrete difference equations.
For example, the difference equation
$(a_{n+2} - a_{n+1} - a_{n}) = (0)$
has as a solution the Fibonnacci sequence $(a_n) = (1, 1, 2, 3, 5, 8, \ldots)$.
We could work with the discrete version $\Delta$ of the differential operator
$D$, where $\Delta f(x) = (f(x + 1) - f(x))/1 = f(x + 1) - f(x)$. In the discrete
case, we can't let $h$ go to zero.
We would then have $\Delta((a_n)) = (a_{n+1} - a_n)$.
Our example difference equation would be
$$(\Delta^2 + \Delta -1)((a_n)) = (0).$$
Often, however, it is more convenient to work with the discrete increment
operator $\mathbf{E}$, with $\mathbf{E}(f(x)) = f(x + 1)$, or
$\mathbf{E}((a_n)) = (a_{n+1})$. Both $\Delta$ and $\mathbf{E}$ are
linear operators. Our example equation is then
$\mathbf{E}^2((a_n)) = \mathbf{E}((a_n)) + (a_n)$, or
$$(\mathbf{E}^2 - \mathbf{E} - 1)((a_n)) = (0).$$
In a general form, we have a polynomial $P(x)$ for an equation
of the form $P(\mathbf{E})((a_n)) = (0)$.
We can then find a general solution by using the facts that
$$(\mathbf{E} - \alpha)^k((n^j\alpha^n)) = (0)$$
for $0 \le j < k$, and
$$\{(n^j\alpha^n)\ |\ 0 \le j < k\}$$
is a linearly independent set, etc.
\item Of course, all of these are the easy cases. The hard ones are the
nonlinear equations \ldots
\item At times, this reminds me of a comment made by my Ph.D. thesis
advisor, after I had finished the proof of my main result for all odd
primes. He said there were three ways to think about it:
\begin{enumerate}
\item I had handled infinitely many cases, and omitted only one, the
even prime 2.
\item I had done half the cases. I had handled all the odd primes, but
none of the even ones.
\item I had dealt with all the uninteresting cases, but not the single
interesting case :-)
\end{enumerate}
\end{enumerate}
\end{itemize}
\pagedone
\exercises{Linear operators}
\begin{enumerate}
\item Find a linear operator $S$ such that
\begin{eqnarray*}
S^2 & = &
\left[\begin{array}{cc}
-1 & 0 \\
0 & -1
\end{array}\right]
\end{eqnarray*}
\item If $S$ is the operator
\begin{eqnarray*}
S & = &
\left[\begin{array}{cc}
0 & -\frac{\pi}{4} \\
\frac{\pi}{4} & 0
\end{array}\right],
\end{eqnarray*}
what is $\exp(S)$?
\item Solve the system of equations:
$$\begin{array}{ccccccc}
3x_1 & + & x_2 & - & 2x_3 & = & 3\\
x_1 & - & 2x_2 & - & x_3 & = & 1\\
2x_1 & - & 4x_2 & - & x_3 & = & 3
\end{array}
$$
\pagedone
\item Solve the differential equation:
$$\frac{d^5f}{dx^5} - 7\frac{d^4f}{dx^4} + 23\frac{d^3f}{dx^3}
- 45\frac{d^2f}{dx^2} + 48\frac{df}{dx} -20f$$
$$\ \ \ \ \ = 26\cos(x) - 18\sin(x)$$
Hint: $$x^5 - 7x^4 + 23x^3 - 45x^2 + 48x - 20$$
$$\ \ \ \ \ = (x - 1)(x - 2)^2(x^2 - 2x + 5)$$
\item Find the general solution to the difference equation
$$(\mathbf{E}^2 - \mathbf{E} - 1)((a_n)) = (0)$$
\item Find the general solution to the difference equation
$$(\mathbf{E}^4 + 2\mathbf{E}^2 + 1)((a_n)) = (0)$$
\pagedone
\item Verify that
\begin{enumerate}
\item $\deriv$, the derivative, is a linear operator.
\item $(\deriv - r)^k(x^{k-1}e^{rx}) = 0$
\begin{eqnarray*}
(\deriv^2 -2s\deriv + s^2 + t^2)^k(x^{k-1}e^{sx}\cos(tx)) & = & 0\\
(\deriv^2 -2s\deriv + s^2 + t^2)^k(x^{k-1}e^{sx}\sin(tx)) & = & 0
\end{eqnarray*}
\item $\mathbf{E}$, the discrete increment, is a linear operator.
\item $(\mathbf{E} - \alpha)^k((n^j\alpha^n)) = (0)$ for $0 \le j < k$.
\end{enumerate}
\item What happens in nonlinear cases? Sometimes they are manageable,
sometimes not.
\begin{enumerate}
\item Solve the differential equation
$$\deriv f = b * f * (1 - f).$$
\item What can be said about the difference equation
$$\mathbf{E}((a_n)) = (b * a_n * (1 - a_n))?$$
Note: this is often called the logistics equation.
\end{enumerate}
\end{enumerate}
\pagedone
\sectionhead{Normed linear spaces}
\begin{itemize}
\item A {\bf norm} on a real or complex vector space $V$ is a function
$$||\ \cdot\ || : V \to \myreal$$
with the properties, for $v, v_1, v_2 \in V$,
\begin{enumerate}
\item $||v|| \ge 0$
\item $||v|| = 0$ if and only if $u = \htvec{0}$
\item $||av|| = |a|\ ||v||$
\item $||v_1 + v_2|| \le ||v_1|| + ||v_2||$.
\end{enumerate}
A space with such an associated function is called a {\bf normed linear space}.
\pagedone
\item Using the norm, we can define a {\bf topology} on the space, and can
then talk about continuity of functions or operators, limits of sequences, etc.
I won't go into this much here beyond a few examples, but good places to look
are books on Hilbert Spaces and/or functional analysis. There are a few books
indicated in the references.
\item Using the norm, we can define a {\bf metric} on $V$ by:
$d(v_1, v_2) = ||v_1 - v_2||$. Metrics have the properties:
\begin{enumerate}
\item $d(v_1, v_2) \ge 0$
\item $d(v_1, v_2) = 0$ iff $v_1 = v_2$
\item $d(v_1, v_2) = d(v_2, v_1)$
\item $d(v_1, v_2) \le d(v_1, v_3) + d(v_3, v_2)$\\
(the triangle inequality).
\end{enumerate}
\pagedone
\item Some examples:
\begin{enumerate}
\item On $\myreal^n$ or $\mycomplex^n$, we have the norm
$$||(a_1, a_2, \ldots, a_n)||_2
= (|a_1|^2 + \ldots + |a_n|^2)^{1/2}.$$
This is usually called the {\bf Euclidean} norm.
We can also think of this in terms of the {\bf inner product} given by\\
$\left<(a_1, a_2, \ldots, a_n), (b_1, b_2, \ldots, b_n)\right>$\\
\ \ \ \ \ $= a_1\bar b_1 + a_2\bar b_2 + \dots + a_n\bar b_n$.
We then have $||v||_2 = \left^{1/2}$.
\item We can generalize, for $p > 0$, to
$$||(a_1, a_2, \ldots, a_n)||_p = (|a_1|^p + \ldots + |a_n|^p)^{1/p},$$
and
$$||(a_1, a_2, \ldots, a_n)||_\infty = \max(|a_1|, \ldots, |a_n|).$$
These are called the $p$-norms and the $\infty$-norm (or max-norm).
\pagedone
A fun little exercise is to draw the circle of radius 1 in $\myreal^2$
for each of these norms:
$$circle_p(1) = \{(x, y) \in \myreal^2\ |\ ||(x, y)||_p = 1\}$$
for $p > 0$, or for $p = \infty$.
One of these constitutes a ``proof'' that a square is a circle :-)
\item We can generalize this to $\myreal^\infty$ or $\mycomplex^\infty$:
\begin{eqnarray*}
||(a_1, a_2, \ldots)||_p & = & \left(\sum_{n=1}^\infty |a_n|^p\right)^{1/p},\\
||(a_1, a_2, \ldots)||_\infty & = & \sup_n(|a_n|).
\end{eqnarray*}
For this to make sense, we will have to limit ourselves to the subspaces where
the sum converges:
\begin{eqnarray*}
l^p & = & \{(a_1, a_2, \ldots)\ |\ \left(\sum_{n=1}^\infty |a_n|^p\right)^{1/p}
< \infty\},\\
l^\infty & = & \{(a_1, a_2, \ldots)\ |\ \sup_n(|a_n|) < \infty\}.
\end{eqnarray*}
\pagedone
\item On $C^0(\myreal)$ or $C^0(\mycomplex)$, we can define
$$||f||_p = \left(\int|f|^p\right)^{1/p}$$
$$||f||_\infty = \sup_x(|f(x)|).$$
Again, we limit ourselves to the subspaces where these are $< \infty$, and
call the corresponding spaces $L^p$ or $L^\infty$.
\item We can think each of these Euclidean norms, $||\ \cdot\ ||_2$, as coming
from an inner product:
\begin{eqnarray*}
||(a_1, a_2, \ldots)||_2 & = & \left<(a_1, \ldots), (a_1, \ldots)\right>^{1/2}\\
& = & \left(\sum_{n = 1}^\infty a_n\bar a_n\right)^{1/2}\\
||f||_2 & = & \left^{1/2}\\
& = & \left(\int f\bar f\right)^{1/2}.
\end{eqnarray*}
When a complex Euclidean normed linear space is {\bf complete}
(that is, every Cauchy sequence converges),
it is called a {\bf Hilbert space}.
\end{enumerate}
\end{itemize}
\pagedone
\exercises{Normed linear spaces}
\begin{enumerate}
\item Verify that each of the examples actually are norms.
\item In $\myreal^2$, draw the unit circles
\begin{enumerate}
\item $circle_1(1) = \{(x, y) \in \myreal^2\ |\ ||(x, y)||_1 = 1\}$
\item \begin{center}$circle_{3/2}(1) =
\{(x, y) \in \myreal^2\ |\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ $\newline
$||(x, y)||_{3/2} = 1\}$\end{center}
\item $circle_2(1) = \{(x, y) \in \myreal^2\ |\ ||(x, y)||_2 = 1\}$
\item $circle_3(1) = \{(x, y) \in \myreal^2\ |\ ||(x, y)||_3 = 1\}$
\item \begin{center}$circle_\infty(1) =
\{(x, y) \in \myreal^2\ |\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ $\newline
$||(x, y)||_\infty = 1\}$\end{center}
\end{enumerate}
\pagedone
\item Suppose that $V$ is a real or complex vector space. An inner
product on $V$ is a conjugate-bilinear function on $V$:
$$ < \cdot , \cdot > : V \mathrm{x} V \to \myfield$$
where, for all $v_1, v_2, v \in V$, and $\alpha \in \myfield$,
\begin{eqnarray*}
< v , v > & \ge & 0\\
< v , v > & = & 0\ \mathrm{iff}\ v = 0\\
< v_1 + v_2 , v > & = & < v_1 , v > + < v_2 , v >\\
< v_1 , v_2 > & = & \overline{< v_2 , v_1 >}\\
< \alpha v_1 , v_2 > & = & \alpha < v_1 , v_2 >.
\end{eqnarray*}
Show that for an inner product,
\begin{eqnarray*}
< v , v_1 + v_2 > & = & < v , v_1 > + < v , v_2 >\\
< v_1 , \alpha v_2 > & = & \overline{\alpha} < v_1 , v_2 >.
\end{eqnarray*}
Show that for a finite dimensional real or complex vector space $V$
with basis $(v_1, v_2, \ldots , v_n)$, the function $f : V \mathrm{x} V \to \myfield$
given by
$$f\left(\sum_i a_i v_i, \sum_i b_i v_i\right) = \sum_i a_i \overline{b_i}$$
is an inner product.
\pagedone
\item Recall that a linear functional on a vector space $V$ is a linear map
$ f : V \to \myfield$.
For a finite dimensional real or complex inner product space $V$, define
the $\mathbf{dual\ space}$ of $V$ to be the space
$$ V^* = \{ f : V \to \myfield \ | \ f\ \mathrm{is\ a\ linear\ functional}\}$$
Show that $V^*$ is a vector space over $\myfield$, and that
$V$ and $V^*$ are isomorphic to each other.
Hint: Show that every $f \in V^*$ corresponds with a function
of the form
$$< v_f , \cdot > : V \to \myfield$$
for some $v_f \in V$.
\end{enumerate}
\pagedone
\sectionhead{Eigenvectors and eigenvalues}
\begin{itemize}
\item Suppose $T$ is a linear operator on $V$. Then $\lambda \in \myfield$
is called an {\bf eigenvalue} or {\bf characteristic value} of $T$ if there
exists $v \ne 0$ in $V$ with $T(v) = \lambda v$. In this case we call $v$ an
{\bf eigenvector} or {\bf characteristic vector} of $T$.
\item An equivalent definition is that $\lambda$ is an eigenvalue of $T$
if and only if $(T - \lambda I)v = 0$ for some $v \ne 0$. This follows from the fact
that $0 = (T - \lambda I)v = T(v) - \lambda v$
if and only if $T(v) = \lambda v$. Note that this also means
that $\ker(T - \lambda I) \ne \{0\}$. Thus, $\lambda$ is an eigenvalue if
and only if $(T - \lambda I)$ is not a
monomorphism. In the finite dimensional case, this is equivalent to
$(T - \lambda I)$ not being invertible.
\pagedone
\item Now suppose that $\lambda_1, \dots, \lambda_k$ are distinct eigenvalues of $T$
(i.e., $\lambda_i \ne \lambda_j$ for $i \ne j$), with
associated eigenvectors $v_1, \ldots, v_k$. Then $\{v_1, \ldots, v_k\}$ is a
linearly independent set.
\item This means in particular that if $T \in L(V)$ with $\dim(V) = n$,
and $T$ has $n$ distinct eigenvalues $\lambda_i$ with associated
eigenvectors $v_i$, then the set $S = (v_1, \ldots, v_n)$ is a basis
for $V$. Furthermore, the matrix representation of $T$ with respect
to the basis $S$ is
$$[T] =
\left[\begin{array}{cccc}
\lambda_1 & 0 & \ldots & 0\\
0 & \lambda_2 & \ldots & 0\\
\vdots & \vdots & \vdots & \vdots \\
0 & 0 & \ldots & \lambda_n
\end{array}\right]
$$
or $[T]_{ii} = \lambda_i,\ [T]_{ij} = 0$ for $i \ne j$.
This is also sometimes written as
$[T] = \mathrm{diag}(\lambda_1, \ldots, \lambda_n)$.
Eigenvalues and eigenvectors can thus give us a very simple representation for $T$.
\pagedone
\item Not all linear operators have eigenvalues (or eigenvectors). For
example, the linear operator $T_\theta : \myreal^2 \to \myreal^2$
given by
$$[T_\theta] =
\left[\begin{array}{cc}
\cos(\theta) & -\sin(\theta)\\
\sin(\theta) & \cos(\theta)
\end{array}\right]
$$
is a (counter-clockwise) rotation around the origin through the angle $\theta$.
If $\theta$ is not an integral multiple of $\pi$, then $T_\theta$ has no
eigenvalues.
\item More generally, consider a non-zero linear operator
$T : \myreal^2 \to \myreal^2$. If $0 \ne u \in \myreal^2$,
then there must be a linear combination of $\{u, T(u), T^2(u)\}$ with
$$0 = T^2(u) + aT(u) + b(u) = P_u(T)(u).$$
If $P_u(T)$ factors into linear factors $P_u(T) = (T - \lambda_1I)(T - \lambda_2I)$
with $\lambda_1, \lambda_2 \in \myreal$, then $\lambda_1$ and $\lambda_2$
are eigenvalues of $T$. On the other hand, if $a^2 - 4b < 0$, then $T$ has no
eigenvalues (it is a general rotation in $\myreal^2)$.
\pagedone
\item On the other hand, every non-zero linear operator
$T : \mycomplex^n \to \mycomplex^n$ has an eigenvalue. We can see this
from the fact that, for any $u \ne 0$ in $\mycomplex^n$, the set of vectors
$\{u, T(u), T^2(u), \ldots, T^n(u)\}$ must be a linearly dependent set, and
hence there are $a_0, a_1, \ldots, a_n$ not all zero with
$$0 = \sum_{i=0}^na_iT^i(u) = P_u(T)(u).$$
We know that we can factor the polynomial $P_u(T)$ over $\mycomplex$
into a product of linear factors
$$P_u(T) = \prod_j(T - z_jI)^{k_j}(T).$$
This linear operator is not a monomorphism (since $u \in \ker(P_u(T))$).
It is a product of commutative factors, and hence at least one of the
factors $(T - z_jI)$ is not a monomorphism. This says then that at least
one of the $z_j$ is an eigenvalue of $T$. We can proceed to find $n$
eigenvalues and eigenvectors.
\pagedone
\item For for every non-zero linear operator
$T : \mycomplex^n \to \mycomplex^n$, there is a polynomial
$P_T(T) = \sum_ia_iT^i$ of degree $n$, called the {\bf characteristic polynomial} of $T$,
with:
\begin{enumerate}
\item $P_T(T) = 0$. That is, $P_T(T)(u) = 0$ for all $u \in \mycomplex^n$.
\item The linear factors of $P_T(T)$ are each of the form $(T - \lambda_jI)$
for some eigenvalue $\lambda_j$. Also, $a_n = 1$. That is,
$$P_T(T) = \prod_j(T - \lambda_jI)^{k_j}(T).$$
\item For each eigenvalue $\lambda_j$, there is an eigenvector $u_j$.
If $k_j > 1$, there is a linearly independent set of $k_j$ eigenvectors
for $\lambda_j$. The set of $n$ eigenvectors $\{u_i\}$ is a linearly
independent set, and thus is a basis for $\mycomplex^n$. As above,
the matrix representation of $T$ with respect to this basis
is $[T] = \mathrm{diag}(\lambda_1, \ldots, \lambda_n)$ (with repetitions
as necessary).
\end{enumerate}
\pagedone
\item A non-zero linear operator $T : \myreal^n \to \myreal^n$
also has a characteristic polynomial of degree n. The big difference for the
real case is that the polynomial will factor into a combination of linear and quadratic
factors. There is then a basis consisting partially of eigenvectors and
partially of pairs of basis vectors for two-dimensional subspaces on which
$T$ is a rotation. The matrix of $T$ with respect to this basis then looks
like $[T] = \mathrm{diag}(\lambda_1, \ldots, \lambda_k,
A_{k+1},\ldots,A_{(n-k)/2})$
where each $A_i$ is a two-by-two matrix without eigenvalues.
\item The set of eigenvalues of an operator is sometimes called the
{\bf spectrum} of the operator. In cases like $\mycomplex^n$ where
the eigenvectors form a basis for the space, using such as basis is
sometimes known as the {\bf spectral decomposition} of the operator.
\pagedone
\item Just briefly, let's glance at differential operators. $\deriv : C^\infty(\myreal)
\to C^\infty(\myreal)$ has uncountably many eigenvalues. Each real number
$a \in \myreal$ is an eigenvalue since
$$\deriv(e^{ax}) = ae^{ax}.$$
The corresponding eigenvectors are of course $f_a(x) = e^{ax}$.
The differential operator $\deriv^2$ also has uncountably many eigenvalues and
eigenvectors since for $a > 0$,
\begin{eqnarray*}
\deriv^2(\cos(\sqrt{a}x)) & = & -a\cos(\sqrt{a}x),\\
\deriv^2(\sin(\sqrt{a}x)) & = & -a\sin(\sqrt{a}x),\ \mathrm{and}\\
\deriv^2(e^{\sqrt{a}x}) & = & ae^{\sqrt{a}x}.
\end{eqnarray*}
\pagedone
\item In this context, here's an example which puts many of these
pieces together. In quantum mechanics, a typical expression of
Schr\"odinger's equation looks like
$$\left[- \frac{\hbar^2}{2m_e}\mybigtriangledown^2+V(x,y,z)
- i\hbar\frac{\partial}{\partial t}\right]\Psi = 0.$$
This example is for an electron (with mass $m_e$) in a potential field $V(x,y,z)$.
The general solution of this operator equation is
$$\Psi(x,y,z,t)=\sum_{n=0}^\infty c_n\Psi_n(x,y,z)\exp\left(\frac{-iE_nt}{\hbar}\right)$$
where $\Psi_n(x,y,z)$ is an eigenfunction solution of the associated time
independent Schr\"odinger equation, with $E_n$ the corresponding eigenvalue.
The inner product, giving a time dependent probability, looks like
$$P(t) = \int\Psi\bar\Psi dv.$$
\end{itemize}
\pagedone
\exercises{Eigenvectors and eigenvalues}
\begin{enumerate}
\item Show that if $v$ is an eigenvector of $T$ corresponding with the
eigenvalue $\lambda$, and $\alpha \in \myfield\ (\alpha \ne 0)$,
then $\alpha v$ is also an eigenvector of $T$ corresponding with
$\lambda$. In particular, eigenvectors are not unique.
\item Define $T \in L(\myfield^2)$ by $T((x, y)) = (y, x)$. Find all the
eigenvalues and eigenvectors of $T$.
\item Define $T \in L(\myfield^3)$ by $T((x, y, z)) = (-y, 0, 2z)$. Find all the
eigenvalues and eigenvectors of $T$.
\item Define $T \in L(\myfield^\infty)$ by $T((a_1, a_2, \ldots)) = (a_2, a_3, \ldots)$
(i.e., $T$ is the left shift operator). Find all the
eigenvalues and eigenvectors of $T$.
\item Suppose $T \in L(V)$ is invertible. Show that $\lambda \ne 0$ is
an eigenvalue of $T$ if and only if $\lambda^{-1}$ is an eigenvalue of $T^{-1}$.
\item Suppose $S, T \in L(V)$. Show that $ST$ and $TS$ have the same eigenvalues.
\item Give an example of an operator whose matrix has all zeros on the diagonal with
respect to some basis, but which is invertible. Give an example of an operator
whose matrix has all diagonal elements non-zero with respect to some basis,
but which is singular (i.e., has no inverse).
\item Show that if $S, T \in L(V)$, $S$ is invertible, and $P(x) \in \myfield[x]$,
then $P(S^{-1}TS) = S^{-1}P(T)S$.
\item Suppose that $V$ is a finite dimensional normed complex vector space, and
$T$ is an isometry of $V$ (i.e., $||T(v)|| = ||v||$ for all $v \in V$).
Show that every eigenvalue $\lambda$ of $T$ has $|\lambda| = 1$.
Hint: show that there is a basis for $V$ consisting of eigenvectors
$e_i$, with $||e_i|| = 1$ for $1 \le i \le n$.
\item Let $V$ be a complex vector space, and $T \in L(V)$. Let $P(x) \in
\mycomplex[x]$. Show that $\alpha \in \mycomplex$ is an eigenvalue
of $P(T)$ if and only if $\alpha = P(\lambda)$ for some eigenvalue
$\lambda$ of $T$.
\item Is the preceding true for real vector spaces? Why not? (Note: this
is another example of why
$\mycomplex$ is so much nicer a place to work than $\myreal$ \ldots)
Is the preceding true if we replace $\mycomplex[x]$ with $\mycomplex^\infty[x]$,
power series? If so, show it. If not, give a counterexample.
\end{enumerate}
\pagedone
\sectionhead{Change of basis}
\begin{itemize}
\item Recall that the matrix associated with a linear transformation depends on
the particular bases we use. In the case of a linear operator $T : V \to V$,
it is typical to use the same base for $V$ as domain and as codomain. However,
as we have seen, a basis consisting of eigenvectors is particularly convenient,
and hence it is useful to know how to change bases.
If $V$ is finite dimensional, with two bases
$S_1 = (u_1, \ldots, u_n)$ and $S_2 = (v_1, \ldots, v_n)$,
we can consider the matrices of $T$ with respect to
the mixed bases. We can indicate this by the various symbols
$[T]^{S_1}, [T]^{S_2}, [T]^{S_1S_2}$, and $[T]^{S_2S_1}$, where,
for example
\begin{eqnarray*}
T(u_i) & = & \sum_j[T]_{ji}^{S_1}u_j,\ \mathrm{and}\\
T(u_i) & = & \sum_j[T]_{ji}^{S_1S_2}v_j.
\end{eqnarray*}
\pagedone
\item We can look in particular at the matrix representations for the identity
operator $I$. For ease of notation, let $[B] = [I]^{S_1S_2}$, the matrix
representation for $I$ using $S_1$ for the domain and $S_2$ for the codomain.
In particular, we then have
$$u_i = \sum_j[B]_{ji}v_j = B_i^{j}v_j.$$
We also have that the matrix inverse $[B]^{-1} = [I]^{S_2S_1}$ which takes
us in the opposite direction. Putting the pieces together, we then have
$$[B][T]^{S_1} = [T]^{S_2}[B].$$
That is, doing the operator $T$ in the basis $S_1$ and then converting to the
basis $S_2$ is the same as converting bases first, and then doing $T$ in the
second base.
Another way to say this is:
$$[T]^{S_1} = [B]^{-1}[T]^{S_2}[B]$$
or
$$[T]^{S_2} = [B][T]^{S_1}[B]^{-1}.$$
\end{itemize}
\pagedone
\exercises{Change of basis}
\begin{enumerate}
\item Show that if $A$ and $B$ are operators on a finite dimensional vector
space with $AB = I$, the identity operator, then $BA = I$, and so
$B = A^{-1}$.
\item Suppose that $T$ is an operator that has the same matrix with
respect to every basis. Show that $T$ must be some multiple of
the identity operator $I$.
\end{enumerate}
\pagedone
\sectionhead{Trace and determinant}
\begin{itemize}
\item Let $V$ be a finite dimensional vector space over $\myreal$ or $\mycomplex$,
and $T : V \to V$ an operator on $V$. Let $P_T(T) = \sum_{i=0}^n a_iT^i$ be the
characteristic polynomial of $T$. We can then define the {\bf trace} of $T$
by $\trace(T) = a_{n-1}$, the coefficient of $T^{n-1}$.
We can also define the trace of an $n \mathrm{x} n$ real or complex matrix $A$
by $\trace_m(A) = \sum_{i=1}^{n}A_{ii}$, the sum of the diagonal elements of $A$.
These definitions are consistent, in the sense that
$\trace(T) = \trace_m([T])$, where $[T]$ is the matrix of $T$ with respect
to some basis for $V$. An exercise will be to show that it doesn't matter
what basis we use (they all come out the same). Since these definitions
are consistent, we will ordinarily write them both the same way, as $\trace()$.
\item For $S,T : V \to V$, and $a, b \in \myfield$, the trace has the nice properties:
\begin{enumerate}
\item $\trace(aS + bT) = a\ \trace(S) + b\ \trace(T)$. This says that
the trace is a linear functional on $L(V)$.
\item In the complex case, we have
$$\trace(T) = \sum_i\lambda_i$$
where the $\lambda_i$ are the eigenvalues of $T$.
\item $\trace(ST) = \trace(TS)$.
\end{enumerate}
\item From these we can derive nice facts like:
If we define the {\bf commutator} of $S$ and $T$ by $[S, T] = ST - TS$,
then we always have $[S, T] \ne I$, the identity operator.
\pagedone
\item We can define the {\bf determinant} of an operator $T$
by $\det(T) = (-1)^n a_0$, where $a_0$ is the constant term in the
characteristic polynomial.
\item The determinant has the nice properties:
\begin{enumerate}
\item In the complex case, we have
$$\det(T) = \prod_i\lambda_i$$
where the $\lambda_i$ are the eigenvalues of $T$.
\item $\det(T) \ne 0$ iff $\ker(T) = \{0\}$ iff $T$ is invertible.
\item $\det(ST) = \det(S)\det(T) = \det(TS)$.
\item $P_T(z) = \det(zI - T)$. That is, the characteristic polynomial
is the determinant of the generalized operator $(zI -T)$, where
we think of $z$ as a complex variable.
\pagedone
\item We can define the determinant of a square matrix $M = [a_{ij}]$ by
$$\detm(M) = \sum_{p \in \perm(n)}
\sign(p)\prod_ia_{p(i)i}$$
where $\perm(n)$ is the set of all permutations of the numbers
$(1, 2, \ldots, n)$, $\sign(p)$ is the sign of the permutation, and
$p(i)$ is the $i$th number in the permutation $p$.
We then have:
$$\detm([T]) = \det(T).$$
As for the trace, since the two definitions are consistent,
we will typically denote both determinants by the same
symbol $\det()$.
Showing that these two definitions are consistent is a fair
amount of work (just looking at the formula for $\detm()$
should give you some idea). You can look it up if you are
interested.
\item There is another definition of determinant. Suppose $V$
is a real or complex $n$-dimensional vector space. Then there
is an alternating multi-linear functional
$$\detv : V^n \to \myfield$$
with the properties, for all $v_1, v_2, \ldots, v_n, v_{i_1}, v_{i_2} \in V$ and
$a, b \in \myfield$:
\begin{enumerate}
\item $\detv(v_1, \ldots, v_i, \ldots, v_j, \ldots, v_n) =
- \detv(v_1, \ldots, v_j, \ldots, v_i, \ldots, v_n)$
(alternating)
\item $\detv(v_1, \ldots, av_{i_1} + bv_{i_2}, \ldots, v_n) =
a\detv(v_1, \ldots, v_{i_1}, \ldots, v_n) +
b\detv(v_1, \ldots, v_{i_2}, \ldots, v_n)$
(multi-linear)
\item $\detv(e_1, \ldots, e_n) = 1$ for a particular
basis $\{e_1, \ldots, e_n\}$ for $V$.
(uniqueness)
\end{enumerate}
We then have $\detm(A) = \detv(A_1, \ldots, A_n)$,
where $A_k$ is the kth column of the matrix $A$, considered
as a vector.
We have that $\det(T) = \detm([T]) = \detv([T]_1, \ldots, [T]_n)$.
The fact that there are three different versions of the same thing
suggests that many people have worked on this topic, and that
this topic occurs in a variety of contexts \ldots
\item Recall the discussion of norms on vector spaces. The Euclidean
norms ($|| v ||_2$), which can be thought of as coming from
an inner product ($< v_1, v_2 >$), have a nice relationship with
the determinant:
$$||T(v)||_2 = || v ||_2\ \mathrm{for\ all}\ v \in U$$
$$\mathrm{iff}\ |\det(T)| = 1.$$
\item An operator which preserves the Euclidean norm ($||T(v)||_2 = ||v||_2)$
is called an {\bf isometry}. When the scalar field is $\myreal$, these
are called {\bf orthogonal} operators. In the case of $\mycomplex$, they
are called {\bf unitary} operators. In the case of the quaternions, they
are called {\bf symplectic} operators.
\item The isometries on a normed linear space have the properties:
\begin{enumerate}
\item If $T$ is an isometry, then $\det(T) \ne 0$, and hence $T$
has an inverse, $T^{-1}$, which is also an isometry. Of course,
$I$, the identity operator, is an isometry.
\item If $T_1$ and $T_2$ are isometries, then $T_1T_2$ is also
an isometry.
\item This means that the set of isometries forms a {\bf group}. In the three
cases described above, these are called the {\bf orthogonal group}, the
{\bf unitary group}, and the {\bf symplectic group}.
\end{enumerate}
\pagedone
\item Another way to characterize the isometries (in the finite
dimensional case, but also with appropriate generalizations in
infinite dimensional cases) is that an operator is an isometry if
1.) its inverse exists, and 2.) the matrix of its inverse is given by the
conjugate transpose. That is, if
$$[T^{-1}]_{ij} = \overline{[T]_{ji}}.$$
\item The determinant also plays an important role in change of
variables formulas (in $\myreal^n$, for example).
Suppose $\Omega \subset \myreal^n$ and $\sigma : \Omega
\to \myreal^n$. We will think of $\sigma$ as a (local)
coordinate system on $\Omega$, or as a change of variables.
The {\bf derivative} of $\sigma$ at $\mathbf{x}$ is the unique
operator $T$ (if it exists) satisfying:
$$\lim_{y\to 0}\frac{||\sigma(x + y) - \sigma(x) - Ty||}
{||y||} = 0.$$
If this operator $T$ exists, $\sigma$ is called {\bf differentiable},
and $T$ is denoted by $\sigma'(x)$. Note that $\sigma'(x) \in L(\myreal^n)$.
We can write $\sigma(x) = (\sigma_1(x), \ldots, \sigma_n(x))$, and we
can denote the partial derivative of $\sigma_j$ with respect to the
$k$th coordinate by $\deriv_k\sigma_j(x)$.
If $\sigma$ is differentiable at $x$, then the matrix of $\sigma'(x)$ is
given by:
$$[\sigma'(x)] =
\left[\begin{array}{cccc}
\deriv_1\sigma_1(x) & \ldots & \deriv_n\sigma_1(x)\\
\vdots & & \vdots \\
\deriv_1\sigma_n(x) & \ldots & \deriv_n\sigma_n(x)
\end{array}\right]
$$
If we assume that $\Omega$ is a reasonable set (e.g., open, or
measurable) and $f : \Omega \to \myreal$ is also reasonable
(e.g., continuous, or measurable), then we have the change of
variables formula:
$$\int_{\sigma(\Omega)}f(y)dy
= \int_{\Omega}f(\sigma(x))|\det(\sigma'(x))|dx.$$
\end{enumerate}
% \item There are many other applications of these ideas, but I think I need to stop now \ldots
\end{itemize}
\pagedone
\exercises{Trace and determinant}
\begin{enumerate}
\item Given two real or complex $n \mathrm{x} n$ matrices $A$ and $B$,
show that $\trace_m(AB) = \trace_m(BA)$.
\item Given an operator $T$ on a real or complex finite dimensional
vector space $V$, show that $\trace_m([T])$ and $\detm([T])$ are
independent of the basis used for the matrix representation $[T]$.
Hint: use the change of basis formula $[T]^{S_1} = [B]^{-1}[T]^{S_2}[B]$,
the previous problem, and $\detm(AB) = \detm(BA)$.
\item Show that $\trace$ is linear, that is, $\trace(aS + bT)
= a\trace(S) + b\trace(T)$.
\item Show that $\trace(T) = \sum_{i=1}^{n}[T]_{ii}$.
\item For two operators $S$ and $T$, show that $[S, T] \ne I$. Recall
that $[S,T] = ST - TS$.
\item Show by example that in general, $\trace(ST) \ne \trace(S)\trace(T)$.
In particular, find an operator $T$ on a real vector space $V$ with
$\trace(T^2) < 0$.
\item Suppose $A$ is an $n \mathrm{x} n$ real matrix, $S \in L(\myreal^n)$ has
matrix representation $A$ with respect to some basis, and $T \in L(\mycomplex^n)$
has matrix representation $A$ with respect to some basis. Show that
$\trace(S) = \trace(T)$ and $\det(S) = \det(T)$.
\item Show that if $T$ is an isometry on a finite dimensional normed vector
space, then $|\det(T)| = 1$.
\item Show that if $T$ is an operator on a complex vector space of
dimension $n$, and $\alpha \in \mycomplex$, then
$\det(\alpha T) = \alpha^n\det(T).$
\end{enumerate}
\pagedone
%% \sectionhead{Differential manifolds}
%% \begin{itemize}
%%
%% \item Let's do just a little work on differential manifolds. An $n$-dimensional
%% manifold is a topological space $M$ together with a set of coordinate
%% mappings $\{f\}$. Each of the coordinate mappings is a continuous
%% one-to-one map from an open subset of $M$ onto an open subset of $\myreal^n$.
%% We will assume that together the domains of the coordinate maps cover $M$.
%% We will also assume that the coordinate maps are differentiable at each
%% point of $M$.
%%
%% For each point $m \in M$, we can attach a copy of $\myreal^n$
%%
%% \end{itemize}
%% \exercises{Differential manifolds}
%% \begin{enumerate}
%%
%% \item
%%
%% \end{enumerate}
\footnotesize
\bibliographystyle{plain}
\tthdump{\hypertarget{References}{}\hyperlink{Our general topics:}{\hfil To top $\leftarrow$}}
%%tth:{\special{html: Top}}
\begin{thebibliography}{12}
%%tth:{\special{html: }}
\bibitem{axler}
Axler, Sheldon,
{\em Linear Algebra Done Right,} second edition,
Springer-verlag, New York, 1997.
\bibitem{finney}
Finney, Ross L., and Ostberg, Donald R.,
{\em Elementary Differential Equations with Linear Algebra},
Addison-Wesley, 1976.
\bibitem{hoffman}
Hoffman, Kenneth, and Kunze, Ray,
{\em Linear Algebra,} second edition,
Engineering/Science/Mathematics, 1971.
\bibitem{naimark}
Naimark, M. A.,
{\em Normed Algebras,}
Wolters-Noordhoff, Groningen, the Netherlands, 1974.
\bibitem{noble}
Noble, Ben, and Daniel, James W.,
{\em Applied Linear Algebra,} third edition,
Engineering/Science/Mathematics, 1988.
\bibitem{prugovecki}
Prugove\v{c}ki, Eduard,
{\em Quantum Mechanics in Hilbert Space,} second edition,
Academic Press, New York, 1981.
\bibitem{rudin}
Rudin, Walter,
{\em Functional Analysis,}
McGraw-Hill, New York, 1973.
\bibitem{strang}
Strang, Gilbert,
{\em Linear Algebra and its Applications,}
Academic Press, New York, 1976.
\end{thebibliography}
\tthdump{\hyperlink{Our general topics:}{\hfil To top $\leftarrow$}}
%%tth:{\special{html: Back to top of file}}
%
% \sectionhead{On-line references}
%
%
% Some of the references listed above are available on line. They are listed again here for easy access:
%
% %\bibitem{abrams2}
% Abrams D S and Lloyd S,
% Non-Linear Quantum Mechanics implies Polynomial Time
% solution for NP-complete and $\#$P problems,
% %in {\it LANL e-print} quant-ph/9801041, http://xxx.lanl.gov (1998)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9801041}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9801041}
% {http://xxx.lanl.gov/abs/quant-ph/9801041}
%
%
% %\bibitem{aharonov5}
% Aharonov D, Beckman D, Chuang I and Nielsen M,
% What Makes Quantum Computers Powerful?
% \hyperref{http://wwwcas.phys.unm.edu/\~mnielsen/science.html}{}{}
% %\hyperURL{http}{wwwcas.phys.unm.edu/~mnielsen}{science.html}
% {http://wwwcas.phys.unm.edu/\~mnielsen/science.html}
% %
% %
%
% %\bibitem{decoherence2}
% Chuang I L, Laflamme R and Paz J P,
% Effects of Loss and Decoherence on a Simple Quantum Computer,
% %in {\it LANL e-print} quant-ph/9602018, http://xxx.lanl.gov (1996)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9602018}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9602018}
% {http://xxx.lanl.gov/abs/quant-ph/9602018}
%
% %\bibitem{grover2}
% Grover L K,
% A framework for fast quantum mechanical algorithms,
% %in {\it LANL e-print} quant-ph/9711043, http://xxx.lanl.gov (1997)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9711043}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9711043}
% {http://xxx.lanl.gov/abs/quant-ph/9711043}
%
% %\bibitem{grover4}
% Grover L K,
% A fast quantum mechanical algorithm for estimating the median,
% %in {\it LANL e-print} quant-ph/9607024, http://xxx.lanl.gov (1997)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9607024}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9607024}
% {http://xxx.lanl.gov/abs/quant-ph/9607024}
%
%
% %\bibitem{knill4}
% Knill E, Laflamme R and Zurek W H 1997
% Resilient quantum computation: error models and thresholds
% %in {\it LANL e-print} quant-ph/9702058, http://xxx.lanl.gov (1997)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9702058}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9702058}
% {http://xxx.lanl.gov/abs/quant-ph/9702058}
%
% \pagedone
%
%
% %\bibitem{preskill2}
% Preskill J 1997
% Fault tolerant quantum computation,
% %in {\it LANL e-print} quant-ph/9712048, http://xxx.lanl.gov (1997),
% to appear in {\it Introduction to Quantum
% Computation}, edited by H.-K. Lo, S. Popescu, and T. P. Spiller
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9712048}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9712048}
% {http://xxx.lanl.gov/abs/quant-ph/9712048}
%
% %\bibitem{preskill3}
% Preskill J, Kitaev A, Course notes for Physics 229, Fall 1998, Caltech Univ.,
% \hyperref{http://www.theory.caltech.edu/people/preskill/ph229}{}{}
% %\hyperURL{http}{www.theory.caltech.edu/people/preskill}{ph229}
% {http://www.theory.caltech.edu/people/preskill/ph229}
%
%
% %\bibitem{rieffel}
% Rieffel E, Polak W
% An Introduction to Quantum Computing for Non-Physicists
% %{\it LANL e-print} quant-ph/9809016, http://xxx.lanl.gov (1998),
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9809016}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9809016}
% {http://xxx.lanl.gov/abs/quant-ph/9809016}
%
% %\bibitem{Steane-97}
% Steane A,
% Quantum Computation, Reports on Progress in Physics 61 (1998) 117,
% %(preprint in {\it LANL e-print} quant-ph/9708022, http://xxx.lanl.gov)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9708022}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9708022}
% {http://xxx.lanl.gov/abs/quant-ph/9708022}
%
% %\bibitem{zalka2}
% Zalka C,
% Grover's quantum searching algorithm is optimal,
% %in {\it LANL e-print} quant-ph/9711070http://xxx.lanl.gov (1997)
% \hyperref{http://xxx.lanl.gov/abs/quant-ph/9711070}{}{}
% %\hyperURL{http}{xxx.lanl.gov/abs/quant-ph}{9711070}
% {http://xxx.lanl.gov/abs/quant-ph/9711070}
%
\end{document}