CF755GPolandBall and Many Other Balls

有一排$n(n\le 10^9)$ 个球,定义一个组可以只包含一个球或者包含两个相邻的球.现在一个球只能分到一个组中,求从这些球中取出 $k(k<2^{15})$组的方案数.

设$dp[i][j]$,为把前$i$个球分成$j$组的方案数。

显然

把$dp[i]$看出一个多项式,即$dp[i]=(x+1)dp[i-1]+xdp[i-2]$

倍增FFT

递推式已经得出来。考虑倍增式。
显然

  • 两堆不影响$dp[i+j][k]=\sum_{a+b=k}dp[i][b]*dp[j][a]$
  • 两堆中有$1+1$,$dp[i+j][k+1]=\sum_{a+b=k}dp[i-1][b]*dp[j-1][a]$

维护三个$(dp[x-2],dp[x-1],dp[x])\rightarrow(dp[x-1+x-1],dp[x-1+x],dp[x+x])$
类似快速幂的方法(遇到$1$往末尾$+1$,即递推,都有$<<1$)

特征方程

显然把$x$看成一阶特征方程的递推式,即

  • 注意由于常数项为$0$,可以进行开根,快速幂(简单版)
代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230

#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
#define pii pair<int, int>
#define mk make_pair
const int N = 1e6 + 10;

const int P = 998244353, gi = 3;
const double pi = acos(-1.0);
int inc(int x, int y, int mo)
{
if (x + y >= mo)
return (x + y - mo);
else
return (x + y);
}

int del(int x, int y, int mo)
{
if (x - y < 0)
return (x - y + mo);
else
return (x - y);
}
int read()
{
int x = 0, f = 1;
char c = getchar();
while (c < '0' || c > '9')
{
if (c == '-')
f = -1;
c = getchar();
}
while (c >= '0' && c <= '9')
x = (x << 1) + (x << 3) + c - '0', c = getchar();
return x * f;
}

int qpow(int a, int x, int mo)
{
int res = 1;
while (x)
{
if (x & 1)
res = 1ll * res * a % mo;
x >>= 1;
a = 1ll * a * a % mo;
}
return res;
}
int rev[N];
void NTT(int *A, int n, int inv)
{
for (int i = 0; i < n; i++)
if (i < rev[i])
swap(A[i], A[rev[i]]);
for (int l = 1; l < n; l <<= 1)
{
int tt = qpow(gi, (P - 1) / (l << 1), P);
int temp = (inv == 1 ? tt : qpow(tt, P - 2, P));
for (int i = 0; i < n; i += (l << 1))
{
int omega = 1;
for (int j = 0; j < l; j++, omega = 1ll * omega * temp % P)
{
int x = A[i + j], y = 1ll * omega * A[i + j + l] % P;
A[i + j] = inc(x, y, P);
A[i + j + l] = del(x, y, P);
}
}
}
int invv = qpow(n, P - 2, P);
if (inv == -1)
for (int i = 0; i < n; i++)
A[i] = 1ll * A[i] * invv % P;
}
void NTTX(int *a, int n, int *b, int m)
{
int ML = 1, bit = 0;
while (ML < n + m)
ML <<= 1, bit++;
for (int i = 0; i < ML; i++)
rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (bit - 1));
NTT(a, ML, 1);
NTT(b, ML, 1);
for (int i = 0; i < ML; i++)
a[i] = 1ll * a[i] * b[i] % P;
NTT(a, ML, -1);
}
int Inv2;
int C[N], D[N];

void Finv(int *a, int *b, int n)
{
b[0] = qpow(a[0], P - 2, P);
int len, ML;
int bit = 0;

for (len = 1; len < (n << 1); len <<= 1)
{
ML = len << 1;
bit++;
for (int i = 0; i < len; i++)
C[i] = a[i], D[i] = b[i];
for (int i = 0; i < ML; i++)
rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (bit - 1));
NTT(C, ML, 1), NTT(D, ML, 1);
for (int i = 0; i < ML; i++)
b[i] = ((2ll - 1ll * C[i] * D[i] % P) * D[i] % P + P) % P;
NTT(b, ML, -1);
for (int i = len; i < ML; i++)
b[i] = 0;
}
for (int i = 0; i < len; i++)
C[i] = D[i] = 0;
for (int i = n; i < len; i++)
b[i] = 0;
}
int E[N], F[N];

void Sqrt(int *a, int *b, int n)
{
Inv2 = qpow(2, P - 2, P);
b[0] = 1;
int bit = 0;
int len;

for (len = 1; len < (n << 1); len <<= 1)
{
int ML = len << 1;
bit++;
for (int i = 0; i < len; i++)
E[i] = a[i];
Finv(b, F, len);
for (int i = 0; i < ML; i++)
rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (bit - 1));
NTT(E, ML, 1), NTT(F, ML, 1);
for (int i = 0; i < ML; i++)
E[i] = 1ll * E[i] * F[i] % P;
NTT(E, ML, -1);
for (int i = 0; i < len; i++)
b[i] = 1LL * (b[i] + E[i]) % P * Inv2 % P;
for (int i = len; i < ML; i++)
b[i] = 0;
}
for (int i = 0; i < len; i++)
E[i] = F[i] = 0;
for (int i = n; i < len; i++)
b[i] = 0;
}

int aa[N], ia[N]; // aa 表示a的导数,ia表示a的逆
void Ln(int *a, int *res, int n)
{
for (int i = 0; i < n << 1; i++)
aa[i] = ia[i] = 0;
for (int i = 1; i < n; i++)
aa[i - 1] = 1ll * i * a[i] % P;
Finv(a, ia, n);
NTTX(aa, n, ia, n);
res[1] = 0;
for (int i = 1; i < n; i++)
res[i] = 1ll * aa[i - 1] * qpow(i, P - 2, P) % P;
}
int G[N], H[N], M[N];
void EXP(int *a, int *b, int n)
{
Inv2 = qpow(2, P - 2, P);
b[0] = 1;
int bit = 0;
int len;

for (len = 1; len < (n << 1); len <<= 1)
{
int ML = len << 1;
bit++;
for (int i = 0; i < len << 1; i++)
H[i] = G[i] = M[i] = 0;
for (int i = 0; i < len; i++)
H[i] = a[i];
for (int i = 0; i < len; i++)
G[i] = b[i];
Ln(G, M, len); // M(x)=lin(G)
for (int i = 0; i < ML; i++)
rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (bit - 1));
NTT(G, ML, 1), NTT(H, ML, 1), NTT(M, ML, 1);
for (int i = 0; i < ML; i++)
b[i] = 1LL * G[i] * (1ll - M[i] + H[i] + P) % P;
NTT(b, ML, -1);
for (int i = len; i < ML; i++)
b[i] = 0;
}
for (int i = 0; i < len; i++)
G[i] = H[i] = M[i] = 0;
for (int i = n; i < len; i++)
b[i] = 0;
}
int c[N];
void Fpow(int *f, int k, int n, int *g)
{
Ln(f, c, n);
for (int i = 0; i < n; i++)
c[i] = 1ll * c[i] * k % P;
EXP(c, g, n);
}
int f[N], g[N], ff[N], gg[N];
int main()
{
int n = read(), m = read();
int k = min(n, m);
f[0] = 1, f[1] = 6, f[2] = 1;
g[0] = 1, g[1] = 6, g[2] = 1;
Sqrt(f, ff, k + 1);
Sqrt(g, gg, k + 1);
memset(g, 0, sizeof(g));
memset(f, 0, sizeof(f));
Finv(gg, g, k + 1);
ff[0]++, ff[1]++;
int inv2 = qpow(2, P - 2, P);
for (int i = 0; i <= k + 1; i++)
ff[i] = 1ll * ff[i] % P;
Fpow(ff, n + 1, k + 1, f);
NTTX(f, k + 1, g, k + 1);
for (int i = 1; i <= k; ++i)
printf("%d ", f[i]);
for (int i = k + 1; i <= m; ++i)
printf("0 ");
}