第18章 概率潜在语义分析 习题18.1   证明生成模型与共现模型是等价的。 解答: 解答思路: 生成模型的定义 共现模型的定义 证明生成模型与共现模型是等价的 解答步骤: 第1步:生成模型   根据书中第18.1.2节的生成模型的定义:   假设有单词集合$W=\{ w1, w2, \cdots, wM\}$,其中$M$是单词个数;文本(指标)集合$D=\{ d1, d2, \cdots, dN \}$,其中$N$是文本个数;话题集合$Z=\{z1,z2,\cdots, zK\}$,其中$K$是预先设定的话题个数。随机变量$w$取值于单词集合;随机变量$d$取值于文本集合,随机变量$z$取值于话题集合。
证明生成模型与共现模型是等价的。
解答:
解答思路:
解答步骤:
第1步:生成模型
根据书中第18.1.2节的生成模型的定义:
假设有单词集合W=\{ w_1, w_2, \cdots, w_M\},其中M是单词个数;文本(指标)集合D=\{ d_1, d_2, \cdots, d_N \},其中N是文本个数;话题集合Z=\{z_1,z_2,\cdots, z_K\},其中K是预先设定的话题个数。随机变量w取值于单词集合;随机变量d取值于文本集合,随机变量z取值于话题集合。概率分布P(d)、条件概率分布P(z|d)、条件概率分布P(w|z)皆属于多项分布,其中P(d)表示生成文本d的概率,P(z|d)表示文本d生成话题z的概率,P(w|z)表示话题z生成单词w的概率。
每个单词-文本对(w,d)的生成概率由以下公式决定:
\begin{aligned}
P(w,d)
&= P(d)P(w|d) \
&= P(d) \sum_z P(w,z|d) \
&= P(d) \sum_z P(z|d)P(w|z)
\end{aligned} \tag{1}
P(w,z|d) = P(z|d) P(w|z)
P(w,d) = \sum_{z \in Z} P(z) P(w|z) P(d|z) \tag{2}
P(w,d|z) = P(w|z) P(d|z)
\begin{aligned}
P(w,d)
&= P(d) \sum_z P(z|d)P(w|z) \
&= \sum_z P(w|z)P(z|d)P(d) \
&= \sum_z P(w,z|d)P(d) \
&= \sum_z P(w,d,z) \
&= \sum_z P(z)P(w,d|z) \
&= \sum_z P(z)P(w|z)P(d|z)
\end{aligned}
P(T) = \prod_{(w,d)} P(w,d)^{n(w,d)} \tag{1}
P(w, d) = \sum_{z \in Z} P(z)P(w|z)P(d|z) \tag{2}
\begin{aligned}
\log P(T) &= \sum_{i=1}^M \sum_{j=1}^N n(w_i, d_j) \log P(w_i, d_j) \
&= \sum_{i=1}^M \sum_{j=1}^N n(w_i, d_j) \log \left[ \sum_{k=1}^K P(z_k) P(w_i | z_k) P(d_j | z_k) \right]
\end{aligned}
\begin{aligned}
& \sum_{i=1}^M \sum_{j=1}^N n(w_i, d_j) \log \left[ \sum_{k=1}^K P(z_k) P(w_i | z_k) P(d_j | z_k) \right] \
\geqslant & \sum_{i=1}^M \sum_{j=1}^N n(w_i, d_j) \sum_{k=1}^K \log \Big[ P(z_k) P(w_i | z_k) P(d_j | z_k) \Big]
\end{aligned}
\begin{aligned}
Q
&= E_Z[\log P(T) |z] \
&= E_Z \left{ \sum_{i=1}^M \sum_{j=1}^N n(w_i, d_j) \sum_{k=1}^K \log \Big[ P(z_k) P(w_i | z_k) P(d_j | z_k) \Big] \right } \
&= \sum_{i=1}^M \sum_{j=1}^N n(w_i, d_j) E_Z \left{ \sum_{k=1}^K \log \Big[ P(z_k) P(w_i | z_k) P(d_j | z_k) \Big] \right } \
&= \sum_{i=1}^M \sum_{j=1}^N n(w_i, d_j) \sum_{k=1}^K P(z_k | w_i, d_j) \log \Big[ P(z_k) P(w_i | z_k) P(d_j | z_k) \Big]
\end{aligned}
P(z_k | w_i, d_j) = \frac{P(z_k) P(w_i | z_k) P(d_j | z_k)}{\displaystyle \sum_{k=1}^K P(z_k) P(w_i | z_k) P(d_j | z_k)}
\begin{array}{ll}
\displaystyle \sum_{k=1}^K P(z_k) = 1 \
\displaystyle \sum_{i=1}^M P(w_i | z_k) = 1, \quad k=1,2,\cdots,K \
\displaystyle \sum_{j=1}^N P(d_j | z_k) = 1, \quad k=1,2,\cdots,K
\end{array}
\Lambda = Q + \alpha \Big(1 - \sum_{k=1}^K P(z_k) \Big) + \sum_{k=1}^K \beta_k \Big( 1- \sum_{i=1}^M P(w_i | z_k) \Big) + \sum_{k=1}^K \gamma_k \Big( 1 - \sum_{j=1}^N P(d_j | z_k) \Big)
\begin{array}{lll}
\displaystyle \sum_{i=1}^M \sum_{j=1}^N n(w_i, d_j) P(z_k | w_i, d_j) - \alpha P(z_k) = 0,& k = 1,2,\cdots, K \
\displaystyle \sum_{j=1}^N n(w_i, d_j) P(z_k | w_i, d_j) - \beta_k P(w_i | z_k) = 0, & i=1,2, \cdots, M; & k = 1,2,\cdots, K \
\displaystyle \sum_{i=1}^M n(w_i, d_j) P(z_k | w_i, d_j) - \gamma_k P(d_j | z_k) = 0, & j=1,2, \cdots, N; & k = 1,2,\cdots, K
\end{array}
\begin{array}{ll}
\displaystyle p(z_k) = \frac{\displaystyle \sum_{i=1}^M \sum_{j=1}^N n(w_i, d_j) P(z_k | w_i, d_j)}{\displaystyle \sum_{j=1}^N n(d_j)} \
\displaystyle p(w_i | z_k) = \frac{\displaystyle \sum_{j=1}^N n(w_i, d_j) P(z_k | w_i, d_j)}{\displaystyle \sum_{m=1}^M \sum_{j=1}^N n(w_m, d_j) P(z_k | w_m, d_j)} \
\displaystyle p(d_j | z_k) = \frac{\displaystyle \sum_{i=1}^M n(w_i, d_j) P(z_k | w_i, d_j)}{\displaystyle \sum_{i=1}^M \sum_{l=1}^N n(w_i, d_l) P(z_k | w_i, d_l)}
\end{array}
P(z_k | w_i, d_j) = \frac{P(z_k) P(w_i | z_k) P(d_j | z_k)}{\displaystyle \sum_{k=1}^K P(z_k) P(w_i | z_k) P(d_j | z_k)}
\begin{array}{ll}
\displaystyle p(z_k) = \frac{\displaystyle \sum_{i=1}^M \sum_{j=1}^N n(w_i, d_j) P(z_k | w_i, d_j)}{\displaystyle \sum_{j=1}^N n(d_j)} \
\displaystyle p(w_i | z_k) = \frac{\displaystyle \sum_{j=1}^N n(w_i, d_j) P(z_k | w_i, d_j)}{\displaystyle \sum_{i=1}^M \sum_{j=1}^N n(w_i, d_j) P(z_k | w_i, d_j)} \
\displaystyle p(d_j | z_k) = \frac{\displaystyle \sum_{i=1}^M n(w_i, d_j) P(z_k | w_i, d_j)}{\displaystyle \sum_{i=1}^M \sum_{j=1}^N n(w_i, d_j) P(z_k | w_i, d_j)}
\end{array}
P(z_k | w_i, d_j) = \frac{P(w_i | z_k) P(z_k | d_j)}{\displaystyle \sum_{k=1}^K P(w_i | z_k) P(z_k | d_j)}
\begin{array}{ll}
\displaystyle p(w_i | z_k) = \frac{\displaystyle \sum_{j=1}^N n(w_i, d_j) P(z_k | w_i, d_j)}{\displaystyle \sum_{m=1}^M \sum_{j=1}^N n(w_m, d_j) P(z_k | w_m, d_j)} \
\displaystyle p(d_j | z_k) = \frac{\displaystyle \sum_{i=1}^M n(w_i, d_j) P(z_k | w_i, d_j)}{n(d_j)}
\end{array}