1const MAX_J: usize = 64;
18
19#[derive(Debug, Clone, Copy)]
21pub struct Config {
22 pub bits_per_sample: u32,
24 pub block_size: u32,
26 pub ref_interval: u32,
28 pub preprocessor: bool,
30}
31
32#[derive(Debug, thiserror::Error)]
34pub enum Error {
35 #[error("Invalid configuration parameters")]
37 InvalidConfig,
38 #[error("Output buffer too small")]
40 OutputFull,
41 #[error("Input data truncated or malformed")]
43 Truncated,
44}
45
46impl Config {
47 fn id_len(&self) -> u32 {
49 match self.bits_per_sample {
50 1..=2 => 1,
51 3..=4 => 2,
52 5..=8 => 3,
53 9..=16 => 4,
54 17..=32 => 5,
55 _ => 0,
56 }
57 }
58
59 fn validate(&self) -> Result<(), Error> {
60 let n = self.bits_per_sample;
61 if n == 0 || n > 32 {
62 return Err(Error::InvalidConfig);
63 }
64 if !matches!(self.block_size, 8 | 16 | 32 | 64) {
65 return Err(Error::InvalidConfig);
66 }
67 if self.ref_interval > 4096 {
68 return Err(Error::InvalidConfig);
69 }
70 Ok(())
71 }
72
73 fn x_max(&self) -> u64 {
74 (1u64 << self.bits_per_sample) - 1
75 }
76}
77
78struct BitWriter<'a> {
81 buf: &'a mut [u8],
82 pos: usize,
83 bit: u32,
84}
85
86impl<'a> BitWriter<'a> {
87 fn new(buf: &'a mut [u8]) -> Self {
88 if !buf.is_empty() {
89 buf[0] = 0;
90 }
91 Self { buf, pos: 0, bit: 8 }
92 }
93
94 fn write(&mut self, value: u64, n_bits: u32) {
95 let mut rem = n_bits;
96 while rem > 0 {
97 let w = rem.min(self.bit);
98 let shift = self.bit - w;
99 let bits = ((value >> (rem - w)) & ((1u64 << w) - 1)) as u8;
100 self.buf[self.pos] |= bits << shift;
101 self.bit -= w;
102 rem -= w;
103 if self.bit == 0 {
104 self.pos += 1;
105 if self.pos < self.buf.len() {
106 self.buf[self.pos] = 0;
107 }
108 self.bit = 8;
109 }
110 }
111 }
112
113 fn write_fs(&mut self, m: u64) {
114 let mut zeros = m;
116 while zeros > 0 {
117 let w = (zeros as u32).min(self.bit);
118 self.bit -= w;
119 zeros -= w as u64;
120 if self.bit == 0 {
121 self.pos += 1;
122 if self.pos < self.buf.len() {
123 self.buf[self.pos] = 0;
124 }
125 self.bit = 8;
126 }
127 }
128 self.write(1, 1);
130 }
131
132 fn bytes_written(&self) -> usize {
133 if self.bit == 8 { self.pos } else { self.pos + 1 }
134 }
135}
136
137struct BitReader<'a> {
140 buf: &'a [u8],
141 pos: usize,
142 bit: u32,
143}
144
145impl<'a> BitReader<'a> {
146 fn new(buf: &'a [u8]) -> Self {
147 Self { buf, pos: 0, bit: 8 }
148 }
149
150 fn read(&mut self, n_bits: u32) -> Result<u64, Error> {
151 let mut result = 0u64;
152 let mut rem = n_bits;
153 while rem > 0 {
154 if self.pos >= self.buf.len() {
155 return Err(Error::Truncated);
156 }
157 let w = rem.min(self.bit);
158 let shift = self.bit - w;
159 let mask = ((1u32 << w) - 1) as u8;
160 let bits = (self.buf[self.pos] >> shift) & mask;
161 result = (result << w) | bits as u64;
162 self.bit -= w;
163 rem -= w;
164 if self.bit == 0 {
165 self.pos += 1;
166 self.bit = 8;
167 }
168 }
169 Ok(result)
170 }
171
172 fn read_fs(&mut self) -> Result<u64, Error> {
173 let mut m = 0u64;
174 loop {
175 if self.read(1)? == 1 {
176 return Ok(m);
177 }
178 m += 1;
179 }
180 }
181}
182
183fn map_error(delta: i64, x_hat: u64, x_max: u64) -> u64 {
187 let theta = x_hat.min(x_max - x_hat) as i64;
188 if delta >= 0 && delta <= theta {
189 2 * delta as u64
190 } else if delta < 0 && (-delta) <= theta {
191 (2 * (-delta)) as u64 - 1
192 } else {
193 theta as u64 + delta.unsigned_abs()
194 }
195}
196
197fn unmap_error(d: u64, x_hat: u64, x_max: u64) -> i64 {
199 let theta = (x_hat).min(x_max - x_hat) as i64;
200 if (d as i64) <= 2 * theta {
201 if d % 2 == 0 {
202 (d / 2) as i64
203 } else {
204 -(((d + 1) / 2) as i64)
205 }
206 } else if x_hat as i64 <= (x_max - x_hat) as i64 {
207 d as i64 - theta
208 } else {
209 theta - d as i64
210 }
211}
212
213fn preprocess_block(
217 samples: &[u32],
218 mapped: &mut [u64; MAX_J],
219 is_ref: bool,
220 prev: u32,
221 x_max: u64,
222) -> usize {
223 let j = samples.len();
224 let start = if is_ref { 1 } else { 0 };
225 let mut prev_val = if is_ref { samples[0] } else { prev };
226
227 for i in start..j {
228 let x = samples[i] as u64;
229 let x_hat = prev_val as u64;
230 let delta = x as i64 - x_hat as i64;
231 mapped[i - start] = map_error(delta, x_hat, x_max);
232 prev_val = samples[i];
233 }
234 j - start
235}
236
237fn cost_split(data: &[u64], k: u32) -> u64 {
240 let mut total = 0u64;
241 for &d in data {
242 total += (d >> k) + 1;
243 }
244 total + k as u64 * data.len() as u64
245}
246
247fn cost_second_ext(data: &[u64]) -> Option<u64> {
248 if data.len() % 2 != 0 {
249 return None;
250 }
251 let mut total = 0u64;
252 for pair in data.chunks_exact(2) {
253 let sum = pair[0] + pair[1];
254 let gamma = sum * (sum + 1) / 2 + pair[1];
255 total += gamma + 1;
256 }
257 Some(total)
258}
259
260fn cost_no_comp(data_len: usize, n: u32) -> u64 {
261 data_len as u64 * n as u64
262}
263
264#[derive(Debug, Clone, Copy, PartialEq, Eq)]
267enum CodingOption {
268 ZeroBlock,
269 SecondExtension,
270 Split(u32),
271 NoCompression,
272}
273
274pub fn compress(
282 cfg: &Config,
283 samples: &[u32],
284 output: &mut [u8],
285) -> Result<usize, Error> {
286 cfg.validate()?;
287
288 let n = cfg.bits_per_sample;
289 let j = cfg.block_size as usize;
290 let id_len = cfg.id_len();
291 let x_max = cfg.x_max();
292
293 if samples.is_empty() {
294 return Ok(0);
295 }
296 if samples.len() % j != 0 {
297 return Err(Error::InvalidConfig);
298 }
299
300 let num_blocks = samples.len() / j;
301 let mut w = BitWriter::new(output);
302 let mut prev_sample: u32 = 0;
303 let mut zero_run: u32 = 0;
304 let mut zero_ref: Option<u32> = None;
305 let mut seg_remaining: u32 = seg_size(cfg, 0);
306
307 for blk in 0..num_blocks {
308 let block = &samples[blk * j..(blk + 1) * j];
309 let is_ref = cfg.preprocessor
310 && cfg.ref_interval > 0
311 && blk % cfg.ref_interval as usize == 0;
312
313 let mut mapped = [0u64; MAX_J];
315 let data_len = if cfg.preprocessor {
316 preprocess_block(block, &mut mapped, is_ref, prev_sample, x_max)
317 } else {
318 for (i, &s) in block.iter().enumerate() {
319 mapped[i] = s as u64;
320 }
321 j
322 };
323 let data = &mapped[..data_len];
324
325 prev_sample = block[j - 1];
327
328 let max_k = ((1u32 << id_len) - 3).min(n.saturating_sub(2));
330 let option = select_option(data, n, id_len, max_k);
331
332 if let CodingOption::ZeroBlock = option {
333 if zero_run == 0 {
334 zero_ref = if is_ref { Some(block[0]) } else { None };
335 }
336 zero_run += 1;
337 seg_remaining -= 1;
338
339 let at_seg_end = seg_remaining == 0;
340 let at_input_end = blk == num_blocks - 1;
341
342 if at_seg_end || at_input_end {
343 flush_zero_blocks(
344 &mut w, id_len, n, zero_ref, zero_run,
345 at_seg_end && zero_run >= 5,
346 );
347 zero_run = 0;
348 zero_ref = None;
349 if at_seg_end {
350 seg_remaining = seg_size(cfg, blk + 1);
351 }
352 }
353 continue;
354 }
355
356 if zero_run > 0 {
358 flush_zero_blocks(
359 &mut w, id_len, n, zero_ref, zero_run, false,
360 );
361 zero_run = 0;
362 zero_ref = None;
363 }
364
365 seg_remaining -= 1;
366 if seg_remaining == 0 {
367 seg_remaining = seg_size(cfg, blk + 1);
368 }
369
370 match option {
372 CodingOption::SecondExtension => {
373 w.write(1, id_len + 1);
374 }
375 CodingOption::Split(k) => {
376 w.write(k as u64 + 1, id_len);
377 }
378 CodingOption::NoCompression => {
379 w.write((1u64 << id_len) - 1, id_len);
380 }
381 CodingOption::ZeroBlock => unreachable!(),
382 }
383
384 if is_ref {
386 w.write(block[0] as u64, n);
387 }
388
389 encode_data(&mut w, data, n, option);
391 }
392
393 Ok(w.bytes_written())
394}
395
396fn seg_size(cfg: &Config, block_idx: usize) -> u32 {
397 if cfg.ref_interval == 0 {
398 64
399 } else {
400 let blocks_in_interval = cfg.ref_interval;
401 let pos_in_interval = block_idx as u32 % blocks_in_interval;
402 let remaining = blocks_in_interval - pos_in_interval;
403 remaining.min(64)
404 }
405}
406
407fn flush_zero_blocks(
408 w: &mut BitWriter,
409 id_len: u32,
410 n: u32,
411 ref_sample: Option<u32>,
412 count: u32,
413 use_ros: bool,
414) {
415 w.write(0, id_len + 1);
417 if let Some(r) = ref_sample {
419 w.write(r as u64, n);
420 }
421 if use_ros && count >= 5 {
423 w.write_fs(4); } else if count <= 4 {
425 w.write_fs(count as u64 - 1);
426 } else {
427 w.write_fs(count as u64);
428 }
429}
430
431fn select_option(
432 data: &[u64],
433 n: u32,
434 id_len: u32,
435 max_k: u32,
436) -> CodingOption {
437 if data.iter().all(|&d| d == 0) {
438 return CodingOption::ZeroBlock;
439 }
440
441 let mut best_cost = u64::MAX;
442 let mut best = CodingOption::NoCompression;
443
444 let nc_cost = cost_no_comp(data.len(), n) + id_len as u64;
446 if nc_cost <= best_cost {
447 best_cost = nc_cost;
448 best = CodingOption::NoCompression;
449 }
450
451 if let Some(se_cost) = cost_second_ext(data) {
453 let total = se_cost + (id_len + 1) as u64;
454 if total < best_cost {
455 best_cost = total;
456 best = CodingOption::SecondExtension;
457 }
458 }
459
460 for k in 0..=max_k {
462 let total = cost_split(data, k) + id_len as u64;
463 if total < best_cost {
464 best_cost = total;
465 best = CodingOption::Split(k);
466 }
467 }
468
469 best
470}
471
472fn encode_data(
473 w: &mut BitWriter,
474 data: &[u64],
475 n: u32,
476 option: CodingOption,
477) {
478 match option {
479 CodingOption::Split(k) => {
480 for &d in data {
482 w.write_fs(d >> k);
483 }
484 if k > 0 {
486 let mask = (1u64 << k) - 1;
487 for &d in data {
488 w.write(d & mask, k);
489 }
490 }
491 }
492 CodingOption::SecondExtension => {
493 for pair in data.chunks(2) {
496 let d1 = pair[0];
497 let d2 = if pair.len() > 1 { pair[1] } else { 0 };
498 let sum = d1 + d2;
499 let gamma = sum * (sum + 1) / 2 + d2;
500 w.write_fs(gamma);
501 }
502 }
503 CodingOption::NoCompression => {
504 for &d in data {
505 w.write(d, n);
506 }
507 }
508 CodingOption::ZeroBlock => {}
509 }
510}
511
512pub fn decompress(
518 cfg: &Config,
519 input: &[u8],
520 output: &mut [u32],
521) -> Result<usize, Error> {
522 cfg.validate()?;
523
524 let n = cfg.bits_per_sample;
525 let j = cfg.block_size as usize;
526 let id_len = cfg.id_len();
527 let x_max = cfg.x_max();
528 let no_comp_id = (1u64 << id_len) - 1;
529
530 let mut r = BitReader::new(input);
531 let mut out_pos = 0usize;
532 let mut blk_idx = 0usize;
533 let mut prev_sample: u32 = 0;
534 let mut seg_remaining: u32 = seg_size(cfg, 0);
535
536 while out_pos + j <= output.len() {
537 let is_ref = cfg.preprocessor
538 && cfg.ref_interval > 0
539 && blk_idx % cfg.ref_interval as usize == 0;
540
541 let id_val = r.read(id_len)?;
543
544 if id_val == 0 {
545 let extra = r.read(1)?;
547 if extra == 0 {
548 let ref_val = if is_ref {
550 r.read(n)? as u32
551 } else {
552 0
553 };
554
555 let fs_val = r.read_fs()?;
556 let count = decode_zero_count(
557 fs_val, seg_remaining,
558 );
559
560 for i in 0..count as usize {
561 let blk_start = out_pos;
562 let this_ref = is_ref && i == 0;
563
564 if this_ref {
565 output[blk_start] = ref_val;
566 } else if cfg.preprocessor {
567 output[blk_start] = prev_sample;
569 } else {
570 output[blk_start] = 0;
571 }
572
573 for s in 1..j {
575 if cfg.preprocessor {
576 output[blk_start + s] =
578 output[blk_start + s - 1];
579 } else {
580 output[blk_start + s] = 0;
581 }
582 }
583
584 if this_ref && cfg.preprocessor {
586 for s in 1..j {
589 output[blk_start + s] =
590 output[blk_start + s - 1];
591 }
592 }
593
594 prev_sample = output[blk_start + j - 1];
595 out_pos += j;
596 blk_idx += 1;
597 seg_remaining -= 1;
598 if seg_remaining == 0 {
599 seg_remaining = seg_size(cfg, blk_idx);
600 }
601 }
602 continue;
603 } else {
604 decode_single_block(
606 &mut r, output, &mut out_pos, &mut prev_sample,
607 j, n, x_max, is_ref, cfg.preprocessor,
608 CodingOption::SecondExtension,
609 )?;
610 }
611 } else if id_val == no_comp_id {
612 decode_single_block(
613 &mut r, output, &mut out_pos, &mut prev_sample,
614 j, n, x_max, is_ref, cfg.preprocessor,
615 CodingOption::NoCompression,
616 )?;
617 } else {
618 let k = id_val as u32 - 1;
619 decode_single_block(
620 &mut r, output, &mut out_pos, &mut prev_sample,
621 j, n, x_max, is_ref, cfg.preprocessor,
622 CodingOption::Split(k),
623 )?;
624 }
625
626 blk_idx += 1;
627 seg_remaining -= 1;
628 if seg_remaining == 0 {
629 seg_remaining = seg_size(cfg, blk_idx);
630 }
631 }
632
633 Ok(out_pos)
634}
635
636fn decode_zero_count(fs_val: u64, _seg_remaining: u32) -> u32 {
637 if fs_val <= 3 {
642 fs_val as u32 + 1
643 } else if fs_val == 4 {
644 _seg_remaining
646 } else {
647 fs_val as u32
648 }
649}
650
651fn decode_single_block(
652 r: &mut BitReader,
653 output: &mut [u32],
654 out_pos: &mut usize,
655 prev_sample: &mut u32,
656 j: usize,
657 n: u32,
658 x_max: u64,
659 is_ref: bool,
660 preprocess: bool,
661 option: CodingOption,
662) -> Result<(), Error> {
663 let blk_start = *out_pos;
664
665 let ref_val = if is_ref {
667 r.read(n)? as u32
668 } else {
669 0
670 };
671
672 let data_len = if is_ref { j - 1 } else { j };
673 let mut mapped = [0u64; MAX_J];
674
675 match option {
676 CodingOption::Split(k) => {
677 for i in 0..data_len {
679 mapped[i] = r.read_fs()? << k;
680 }
681 if k > 0 {
683 for i in 0..data_len {
684 mapped[i] |= r.read(k)?;
685 }
686 }
687 }
688 CodingOption::SecondExtension => {
689 let se_data_len = if is_ref { j } else { data_len };
691 let n_pairs = se_data_len / 2;
692 let mut se_mapped = [0u64; MAX_J];
693 let se_start = if is_ref { 1 } else { 0 };
694
695 if is_ref {
697 se_mapped[0] = 0;
698 }
699
700 for p in 0..n_pairs {
702 let gamma = r.read_fs()?;
703 let (d1, d2) = decode_second_ext_pair(gamma);
704 se_mapped[se_start + 2 * p] = d1;
705 se_mapped[se_start + 2 * p + 1] = d2;
706 }
707
708 let copy_start = if is_ref { 1 } else { 0 };
710 for i in 0..data_len {
711 mapped[i] = se_mapped[copy_start + i];
712 }
713 }
714 CodingOption::NoCompression => {
715 for i in 0..data_len {
716 mapped[i] = r.read(n)?;
717 }
718 }
719 CodingOption::ZeroBlock => unreachable!(),
720 }
721
722 if is_ref {
724 output[blk_start] = ref_val;
725 }
726
727 let data_start = if is_ref { blk_start + 1 } else { blk_start };
728
729 if preprocess {
730 let mut prev = if is_ref {
731 ref_val
732 } else {
733 *prev_sample
734 };
735
736 for i in 0..data_len {
737 let delta = unmap_error(mapped[i], prev as u64, x_max);
738 let sample = (prev as i64 + delta) as u32;
739 output[data_start + i] = sample;
740 prev = sample;
741 }
742 } else {
743 for i in 0..data_len {
744 output[data_start + i] = mapped[i] as u32;
745 }
746 }
747
748 *prev_sample = output[blk_start + j - 1];
749 *out_pos += j;
750 Ok(())
751}
752
753fn decode_second_ext_pair(gamma: u64) -> (u64, u64) {
754 let mut beta = 0u64;
757 let mut threshold = 0u64;
758 loop {
759 let next = threshold + beta + 1;
760 if gamma < next {
761 break;
762 }
763 threshold = next;
764 beta += 1;
765 }
766 let d2 = gamma - threshold;
767 let d1 = beta - d2;
768 (d1, d2)
769}
770
771#[cfg(test)]
772mod tests {
773 use super::*;
774
775 fn roundtrip(
776 n: u32, j: u32, r: u32, preprocess: bool, samples: &[u32],
777 ) {
778 let cfg = Config {
779 bits_per_sample: n,
780 block_size: j,
781 ref_interval: r,
782 preprocessor: preprocess,
783 };
784 let mut compressed = [0u8; 4096];
785 let len = compress(&cfg, samples, &mut compressed)
786 .expect("compress");
787
788 let mut decompressed = [0u32; 256];
789 let dec_len = decompress(
790 &cfg,
791 &compressed[..len],
792 &mut decompressed[..samples.len()],
793 )
794 .expect("decompress");
795
796 assert_eq!(dec_len, samples.len());
797 assert_eq!(&decompressed[..dec_len], samples);
798 }
799
800 #[test]
801 fn roundtrip_constant() {
802 let samples = [100u32; 16];
803 roundtrip(8, 16, 1, true, &samples);
804 }
805
806 #[test]
807 fn roundtrip_ramp() {
808 let mut samples = [0u32; 16];
809 for i in 0..16 {
810 samples[i] = i as u32;
811 }
812 roundtrip(8, 16, 1, true, &samples);
813 }
814
815 #[test]
816 fn roundtrip_no_preprocess() {
817 let mut samples = [0u32; 16];
818 for i in 0..16 {
819 samples[i] = i as u32 * 3;
820 }
821 roundtrip(8, 16, 0, false, &samples);
822 }
823
824 #[test]
825 fn roundtrip_16bit() {
826 let mut samples = [0u32; 32];
827 for i in 0..32 {
828 samples[i] = 1000 + i as u32 * 10;
829 }
830 roundtrip(16, 32, 1, true, &samples);
831 }
832
833 #[test]
834 fn roundtrip_all_zeros() {
835 let samples = [0u32; 64];
836 roundtrip(8, 16, 1, true, &samples);
837 }
838
839 #[test]
840 fn roundtrip_random_like() {
841 let mut samples = [0u32; 64];
842 let mut v = 42u32;
843 for s in samples.iter_mut() {
844 v = v.wrapping_mul(1103515245).wrapping_add(12345);
845 *s = (v >> 16) & 0xFF;
846 }
847 roundtrip(8, 16, 1, true, &samples);
848 }
849
850 #[test]
851 fn roundtrip_small_block() {
852 let mut samples = [0u32; 8];
853 for i in 0..8 {
854 samples[i] = i as u32;
855 }
856 roundtrip(8, 8, 1, true, &samples);
857 }
858
859 #[test]
860 fn second_ext_pair_decode() {
861 assert_eq!(decode_second_ext_pair(0), (0, 0));
863 assert_eq!(decode_second_ext_pair(1), (1, 0));
865 assert_eq!(decode_second_ext_pair(2), (0, 1));
867 assert_eq!(decode_second_ext_pair(3), (2, 0));
869 assert_eq!(decode_second_ext_pair(4), (1, 1));
871 assert_eq!(decode_second_ext_pair(5), (0, 2));
873 }
874
875 #[test]
876 fn mapper_roundtrip() {
877 for x_hat in [0u64, 50, 100, 127, 200, 255] {
878 for x in 0..=255u64 {
879 let delta = x as i64 - x_hat as i64;
880 let mapped = map_error(delta, x_hat, 255);
881 let recovered = unmap_error(mapped, x_hat, 255);
882 assert_eq!(
883 recovered, delta,
884 "x_hat={x_hat}, x={x}, mapped={mapped}"
885 );
886 }
887 }
888 }
889
890 #[test]
891 fn fs_write_read() {
892 let mut buf = [0u8; 32];
893 let mut w = BitWriter::new(&mut buf);
894 w.write_fs(0);
895 w.write_fs(1);
896 w.write_fs(5);
897 w.write_fs(0);
898 let len = w.bytes_written();
899
900 let mut r = BitReader::new(&buf[..len]);
901 assert_eq!(r.read_fs().unwrap(), 0);
902 assert_eq!(r.read_fs().unwrap(), 1);
903 assert_eq!(r.read_fs().unwrap(), 5);
904 assert_eq!(r.read_fs().unwrap(), 0);
905 }
906
907 #[test]
908 fn id_len_values() {
909 let c = |n| Config {
910 bits_per_sample: n,
911 block_size: 16,
912 ref_interval: 0,
913 preprocessor: false,
914 };
915 assert_eq!(c(1).id_len(), 1);
916 assert_eq!(c(2).id_len(), 1);
917 assert_eq!(c(3).id_len(), 2);
918 assert_eq!(c(4).id_len(), 2);
919 assert_eq!(c(8).id_len(), 3);
920 assert_eq!(c(16).id_len(), 4);
921 assert_eq!(c(32).id_len(), 5);
922 }
923}