ugBASIC 1.18
An isomorphic BASIC language compiler for retrocomputers
Loading...
Searching...
No Matches
msc1.c
Go to the documentation of this file.
1/*****************************************************************************
2 * ugBASIC - an isomorphic BASIC language compiler for retrocomputers *
3 *****************************************************************************
4 * Copyright 2021-2026 Marco Spedaletti (asimov@mclink.it)
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *----------------------------------------------------------------------------
18 * Concesso in licenza secondo i termini della Licenza Apache, versione 2.0
19 * (la "Licenza"); è proibito usare questo file se non in conformità alla
20 * Licenza. Una copia della Licenza è disponibile all'indirizzo:
21 *
22 * http://www.apache.org/licenses/LICENSE-2.0
23 *
24 * Se non richiesto dalla legislazione vigente o concordato per iscritto,
25 * il software distribuito nei termini della Licenza è distribuito
26 * "COSÌ COM'È", SENZA GARANZIE O CONDIZIONI DI ALCUN TIPO, esplicite o
27 * implicite. Consultare la Licenza per il testo specifico che regola le
28 * autorizzazioni e le limitazioni previste dalla medesima.
29 ****************************************************************************/
30
31/****************************************************************************
32 * INCLUDE SECTION
33 ****************************************************************************/
34
35#include "msc1.h"
36
44
57
58typedef struct _MSC1Sequences {
59
61
62 int count;
63
65
66/****************************************************************************
67 * CODE SECTION
68 ****************************************************************************/
69
70void msc1_dump( MSC1Sequences * _sequences, int _count ) {
71
72 printf( "%d SEQUENCES\n", _sequences->count );
73 MSC1Sequence * actual = _sequences->first;
74
75 while( actual ) {
76 printf( " %2.2x %2.2x %2.2x %2.2x = %d offsets\n",
77 (unsigned char)(actual->value[0]), (unsigned char)(actual->value[1]),
78 (unsigned char)(actual->value[2]), (unsigned char)(actual->value[3]),
79 actual->count );
80 actual = actual->next;
81 --_count;
82 if ( !_count) break;
83 }
84
85}
86
87MSC1Sequence * msc1_find_sequence( MSC1Sequences * _sequences, MemoryBlock * _literal, int _limit ) {
88
89 // If sequences are presente, we look if the same
90 // sequence has been already stored inside the
91 // structure.
92 MSC1Sequence * actual = NULL;
93
94 if ( _sequences->first ) {
95 // Give a look to find if the same (sub)sequence
96 // has been stored before. Stop as soon as we
97 // find it.
98 actual = _sequences->first;
99 while( actual ) {
100 // printf("%2.2x%2.2x%2.2x%2.2x == %2.2x%2.2x%2.2x%2.2x ?\n",
101 // _literal[0], _literal[1], _literal[2], _literal[3],
102 // actual->value[0], actual->value[1], actual->value[2], actual->value[3]
103 // );
104
105 if ( memcmp( _literal, actual->value, 4 ) == 0 ) {
106 // printf( " --> ok!\n");
107 break;
108 }
109 actual = actual->next;
110
111 if ( _limit ) {
112 --_limit;
113 if ( !_limit) {
114 return NULL;
115 }
116 }
117
118 }
119
120 }
121
122 return actual;
123
124}
125
127
128 // Initialize the list of sequences:
129 // SEQUENCES = <empty>
130 MSC1Sequences * sequences = malloc( sizeof( MSC1Sequences ) );
131 memset( sequences, 0, sizeof( MSC1Sequences ) );
132
133 // We explore the input buffer in order to identify every
134 // sequences of 4 chars. We skip by just 1 char, in order
135 // to detect every subsequences:
136 //
137 // INPUT: A B C D E F G ->
138 // -> sequence: ABCD
139 // -> sequence: BCDE
140 // -> sequence: CDEF
141 // -> sequence: DEFG
142 //
143
144 int i = 0;
145 for( i=0; i<=(_size-4); ++i ) {
146
147 // The position is stored inside this structure.
148 MSC1SequenceValue * sequenceValue = malloc( sizeof( MSC1SequenceValue ) );
149 memset( sequenceValue, 0, sizeof( MSC1SequenceValue ) );
150 sequenceValue->offset = _input+i;
151
152 // This is the (sub)sequence to check.
153 MemoryBlock literal[4];
154 memcpy( literal, sequenceValue->offset, 4 );
155
156 // If sequences are presente, we look if the same
157 // sequence has been already stored inside the
158 // structure.
159 MSC1Sequence * actual = msc1_find_sequence( sequences, literal, 0 );
160
161 // (Sub)sequence has been found! So we must
162 // register the position inside the input buffer.
163 if ( actual ) {
164
165 // We put it at the end of the already present
166 // structures.
167 if ( ! actual->first ) {
168 actual->first = sequenceValue;
169 } else {
170 MSC1SequenceValue * actualValue = actual->first;
171 while( actualValue ) {
172 if ( ! actualValue->next ) {
173 actualValue->next = sequenceValue;
174 break;
175 }
176 actualValue = actualValue->next;
177 }
178 }
179
180 ++actual->count;
181
182 } else {
183
184 // (Sub)sequence has NOT been found! So we must
185 // register the position inside the input buffer
186 // as the first one.
187 actual = malloc( sizeof( MSC1Sequence ) );
188 memset( actual, 0, sizeof( MSC1Sequence ) );
189 actual->length = 4;
190 memcpy( actual->value, literal, 4 );
191 actual->first = sequenceValue;
192 actual->count = 1;
193
194 if ( ! sequences->first ) {
195 sequences->first = actual;
196 sequences->count = 1;
197 } else {
198 actual->next = sequences->first;
199 sequences->first = actual;
200 ++sequences->count;
201 }
202
203 }
204
205 }
206
207 return sequences;
208
209}
210
211/* compares two sequences according to their use count */
212static int sequence_comparison(const void *_a, const void *_b) {
213
214 const struct _MSC1Sequence *a = _a;
215 const struct _MSC1Sequence *b = _b;
216
217 int diff = a->count - b->count;
218
219 return diff < 0;
220
221}
222
223void msc1_sort( MSC1Sequences * _sequences ) {
224
225 // msc1_dump( _sequences, 10 );
226
227 MSC1Sequence * sequences = malloc( sizeof( MSC1Sequence ) * _sequences->count );
228
229 memset( sequences, 0, sizeof( MSC1Sequence ) * _sequences->count );
230
231 MSC1Sequence * current = _sequences->first;
232 for( int i=0; i<_sequences->count && current; ++i ) {
233 memcpy( &sequences[i], current, sizeof( MSC1Sequence ) );
234 current = current->next;
235 }
236
237 qsort(sequences, _sequences->count, sizeof( MSC1Sequence ), sequence_comparison);
238
239 for( int i=0; i<( _sequences->count - 1 ); ++i ) {
240 sequences[i].next = &sequences[i+1];
241 }
242
243 _sequences->first = &sequences[0];
244
245 // msc1_dump( _sequences, 10 );
246
247 // // Loop until no swap will be done.
248 // int done = 0;
249 // while( ! done ) {
250
251 // done = 1;
252
253 // // Previous and actual node examinated.
254 // MSC1Sequence * previous = NULL;
255 // MSC1Sequence * actual = _sequences->first;
256
257 // // Until all nodes will be explored...
258 // while( actual ) {
259
260 // // No comparison is possibile if next node is missing.
261 // if ( ! actual->next ) {
262 // break;
263 // }
264
265 // // If the next node has a greater count, we have to
266 // // move it on the upper part of the list.
267 // if ( actual->count < actual->next->count ) {
268
269 // // We have do differentiate if we are swapping the
270 // // first node or any other node.
271 // MSC1Sequence * swapped = NULL;
272
273 // // Any other node...
274 // if ( previous ) {
275 // swapped = actual->next->next;
276 // previous->next = actual->next;
277 // previous->next->next = actual;
278 // actual->next = swapped;
279 // // First node...
280 // } else {
281 // swapped = actual->next->next;
282 // _sequences->first = actual->next;
283 // _sequences->first->next = actual;
284 // actual->next = swapped;
285 // }
286 // done = 0;
287 // break;
288 // } else {
289 // // Go ahead with the next node...
290 // previous = actual;
291 // actual = actual->next;
292 // }
293
294 // }
295
296 // }
297
298}
299
300MSC1Compressor * msc1_create( int _maximum_repeated_sequences ) {
301
302 MSC1Compressor * msc1 = malloc( sizeof( MSC1Compressor ) );
303
304 memset( msc1, 0, sizeof( MSC1Compressor ) );
305
306 msc1->maximumRepeatedSequences = _maximum_repeated_sequences;
307
308 return msc1;
309
310}
311
312void msc1_echo_state( MSC1CompressorState _state, int _read, int _write, int _repeats, int _iliteral, char * _literal, char * _pointer, char *_wpointer, MSC1Sequence * _actual, MemoryBlock * _output ) {
313
314 switch( _state ) {
315
316 // STARTING STATE
317 case MSC1_CS_READY:
318 printf( "MSC1_CS_READY");
319 break;
320
321 // STORING THE LITERAL
322 case MSC1_CS_STORE:
323 printf( "MSC1_CS_STORE");
324 break;
325
326 // MOVE THE READ POINTER FORWARD BY 1 CHARACTER
327 case MSC1_CS_MOVE1:
328 printf( "MSC1_CS_MOVE1");
329 break;
330
331 // EMIT LITERALS INSIDE THE STAGE AREA
332 case MSC1_CS_LITERAL:
333 printf( "MSC1_CS_LITERAL");
334 break;
335
336 // EMIT LITERAL FOR DUPLICATED PATTERN
338 printf( "MSC1_CS_LITERAL_DUPE");
339 break;
340
341 // MOVE FORWARD BY 4 CHARACTERS
342 case MSC1_CS_MOVE4:
343 printf( "MSC1_CS_READY\n");
344 break;
345
346 // CHECK IF PATTERN IS STILL DUPLICATED
348 printf( "MSC1_CS_DUPE_STORE");
349 break;
350
351 // MOVE FORWARD BY 4 CHARACTERS, AND COUNT THE REPETITIONS
353 printf( "MSC1_CS_DUPE_MOVE4");
354 break;
355
356 // EMIT THE DUPLICATION COMMAND
357 case MSC1_CS_DUPES:
358 printf( "MSC1_CS_DUPES");
359 break;
360
362 printf( "MSC1_CS_END_OF_BLOCK");
363 break;
364 }
365
366 printf(" rp=%4.4x wp=%4.4x il=%d rep=%d\n", _read, _write, _iliteral, _repeats );
367 printf(" LT=");
368 for( int i=0; i<_iliteral; ++i ) {
369 printf( "%2.2x ", (unsigned char)(_literal[i]) );
370 }
371 int used = 0;
372 if ( _actual && _actual->used && _output ) {
373 used = _actual->used - _output;
374 }
375 printf(" seq=%p[%2.2x%2.2x%2.2x%2.2x] used=%4.4x\n", _actual, (_actual)?(_actual->value[0]):0, (_actual)?(_actual->value[1]):0, (_actual)?(_actual->value[2]):0, (_actual)?(_actual->value[3]):0, used );
376 printf(" RP=...%2.2x%2.2x%2.2x%2.2x\n", (unsigned char)(*(_pointer-3)), (unsigned char)(*(_pointer-2)), (unsigned char)(*(_pointer-1)), (unsigned char)(*(_pointer)) );
377 printf(" WP=...%2.2x%2.2x%2.2x%2.2x\n", (unsigned char)(*(_wpointer-4)), (unsigned char)(*(_wpointer-3)), (unsigned char)(*(_wpointer-2)), (unsigned char)(*(_wpointer-1)) );
378
379}
380
381MemoryBlock * msc1_compress( MSC1Compressor * _msc1, MemoryBlock * _input, int _size, int * _output_size ) {
382
383 // Read pointer, move to the start of the area to compress.
384 MemoryBlock * pointer = _input, * endPointer = pointer + _size;
385
386 // Write pointer to a cleared area.
387 MemoryBlock * output = malloc( 10 * _size ), * wpointer = output;
388 memset( output, 0, _size );
389
390 // Extract all 4 characters sequences from the input,
391 // and sort them in order to have the most frequent first.
392 MSC1Sequences * sequences = msc1_generate_sequences( _input, _size );
393 msc1_sort( sequences );
394 // msc1_dump( sequences, 10 );
395
396 MSC1Sequence * actual = NULL;
397 MemoryBlock literal[128];
398 memset( literal, 0, 128 );
399 int iliteral = 0;
400 int repeats = 0;
401
402 // Loop until the ASF is finished.
403 while( _msc1->state != MSC1_CS_END_OF_BLOCK ) {
404
405 // printf("--- --- --- --- --- --- --- --- --- ---\n");
406 // msc1_echo_state( _msc1->state, (pointer-_input), (wpointer-output), repeats, iliteral, &literal[0], pointer, wpointer, actual, output );
407 // printf("\n");
408
409 switch( _msc1->state ) {
410
411 // STARTING STATE
412 case MSC1_CS_READY:
413
414 // First of all, if we reach the end of memory
415 // to be compressed, we pass into the final
416 // state and the compression will end.
417 if ( pointer == endPointer ) {
419 break;
420 }
421
422 // Else, if no frequent pattern has been reached,
423 // we can start to store the literal from the
424 // input.
425 else if ( ! actual ) {
426 iliteral = 0;
427 memset( literal, 0, 128 );
428 _msc1->state = MSC1_CS_STORE;
429 }
430
431 // // Else if the frequent pattern has been reached
432 // // but not emitted yet on the output, then we
433 // // can write out it.
434 // else if ( ! actual->used ) {
435 // _msc1->state = MSC1_CS_LITERAL_DUPE;
436 // }
437
438 // Finally if the frequent pattern has been reached
439 // and it has been emitted (before) then we can
440 // count and emit the repetitions.
441 else {
442 _msc1->state = MSC1_CS_DUPE_STORE;
443 }
444 break;
445
446 // STORING THE LITERAL
447 case MSC1_CS_STORE:
448
449 // Copy the literal pointed by the read pointer
450 // into the stage area, to be able to be used
451 // to detect frequent patterns.
452 literal[iliteral] = *pointer;
453
454 // Move forward by 1 byte.
455 ++iliteral;
456 ++pointer;
457
458 // If the end of the memory is reached,
459 // we must emit the literal up to now.
460 if ( pointer == endPointer ) {
461 _msc1->state = MSC1_CS_LITERAL;
462 break;
463 }
464
465 // If we reach the limit of the stage area,
466 // we need to emit the literals and start
467 // over in collecting characters.
468 if ( iliteral == 127 ) {
469 _msc1->state = MSC1_CS_LITERAL;
470 break;
471 }
472
473 if ( iliteral > 3 ) {
474 // Since there are at least 4 characters, we can
475 // look if the last 4 characters belongs to the
476 // more frequent special sequences.
477 actual = msc1_find_sequence( sequences, &literal[iliteral-4], _msc1->maximumRepeatedSequences );
478
479 // If the sequence cannot be found, we can go on
480 // and try to continue to store the literals into
481 // the stage area.
482 if ( ! actual ) {
483 _msc1->state = MSC1_CS_MOVE1;
484 break;
485 }
486
487 // Otherwise, we can skip the sequence and go on in
488 // emitting literals from stage area.
489 else {
490 _msc1->state = MSC1_CS_LITERAL;
491 break;
492 }
493 } else {
494 _msc1->state = MSC1_CS_MOVE1;
495 break;
496 }
497
498 break;
499
500 case MSC1_CS_MOVE1:
501
502 _msc1->state = MSC1_CS_STORE;
503 break;
504
505 // EMIT LITERALS INSIDE THE STAGE AREA
506 case MSC1_CS_LITERAL:
507
508 // If we are emitting the literal from stage area
509 // because we find out a frequent pattern...
510 if ( actual ) {
511
512 // If the pattern is distant more than 1023
513 // bytes from the output byte, we must
514 // regenerate the pattern.
515 if( actual->used && ( wpointer - actual->used ) > 1000 ) {
516 actual->used = NULL;
517 }
518
519 if ( ! actual->used ) {
520 // printf(" rp=%4.4x wp=%4.4x EMIT: LITERAL %d [ ", (unsigned int)(pointer - _input), (unsigned int)(wpointer - output), iliteral);
521 // for( int i=0; i<iliteral-4; ++i ) {
522 // printf("%2.2x ", literal[i]);
523 // }
524 // printf("] [ ");
525 // for( int i=iliteral-4; i<iliteral; ++i ) {
526 // printf("%2.2x ", literal[i]);
527 // }
528 // printf("]\n");
529
530 // Write the number of symbols and,
531 // following, the literals themselves.
532 *wpointer = iliteral;
533 ++wpointer;
534 memcpy( wpointer, literal, iliteral );
535 wpointer += ( iliteral );
536
537 actual->used = ( wpointer - 4 );
538
539 repeats = 0;
540
541 } else {
542
543 if ( iliteral > 4 ) {
544
545 // printf(" rp=%4.4x wp=%4.4x EMIT: LITERAL %d [ ", (unsigned int)(pointer - _input), (unsigned int)(wpointer - output), iliteral-4);
546 // for( int i=0; i<iliteral-4; ++i ) {
547 // printf("%2.2x ", literal[i]);
548 // }
549 // printf("] < ");
550 // for( int i=iliteral-4; i<iliteral; ++i ) {
551 // printf("%2.2x ", literal[i]);
552 // }
553 // printf(">\n");
554
555 // Write the number of symbols and,
556 // following, the literals themselves.
557 *wpointer = iliteral-4;
558 ++wpointer;
559 memcpy( wpointer, literal, iliteral-4 );
560 wpointer += ( iliteral - 4 );
561
562 }
563
564 repeats = 1;
565
566 }
567
568 }
569
570 // ... else we are emitting literals because
571 // the size of stage area has been filled up.
572 else {
573
574 // printf(" rp=%4.4x wp=%4.4x EMIT: LITERAL %d [ ", (unsigned int)(pointer - _input), (unsigned int)(wpointer - output), iliteral);
575 // for( int i=0; i<iliteral; ++i ) {
576 // printf("%2.2x ", literal[i]);
577 // }
578 // printf("]\n");
579
580 // Write the number of symbols and,
581 // following, the literats themselves.
582 *wpointer = iliteral;
583 ++wpointer;
584 memcpy( wpointer, literal, iliteral );
585 wpointer += iliteral;
586
587 }
588
589 // Move to the initial state.
590 _msc1->state = MSC1_CS_READY;
591 break;
592
593 // // EMIT LITERAL FOR DUPLICATED PATTERN
594 // case MSC1_CS_LITERAL_DUPE:
595
596 // // Write the number of symbols and,
597 // // following, the literats themselves.
598 // *wpointer = 4;
599 // ++wpointer;
600 // memcpy( wpointer, &literal[iliteral - 4], 4 );
601
602 // // Register the last pattern pointer,
603 // // to be used to calculate offset.
604 // actual->used = wpointer;
605
606 // wpointer += 4;
607
608 // // Move to the next state
609 // _msc1->state = MSC1_CS_DUPE_MOVE4;
610 // break;
611
612 // MOVE FORWARD BY 4 CHARACTERS
613 case MSC1_CS_MOVE4:
614
615 pointer += 4;
616 _msc1->state = MSC1_CS_READY;
617 break;
618
619 // CHECK IF PATTERN IS STILL DUPLICATED
621
622 // If the input memory block is finished,
623 // we must emit the new literal.
624 if ( ( pointer + 4 ) >= endPointer ) {
625 _msc1->state = MSC1_CS_DUPES;
626 break;
627 }
628
629 // If the pattern is duplicated, we can move forward
630 // by 4 characters (and increment the repetitions).
631 if ( memcmp( pointer, &literal[iliteral-4], 4 ) == 0 ) {
632
633 // Increment the repetitions.
634 ++repeats;
635
636 _msc1->state = MSC1_CS_DUPE_MOVE4;
637 break;
638 }
639
640 // The repetition is ended, we can emit the
641 // duplicate definition.
642 else {
643
644 _msc1->state = MSC1_CS_DUPES;
645 break;
646 }
647
648 break;
649
650 // MOVE FORWARD BY 4 CHARACTERS, AND COUNT THE REPETITIONS
652
653 // Move forward by 4 characters.
654 pointer += 4;
655
656 // If the input memory block is finished,
657 // we must return back by 4 characters and
658 // emit the duplication command.
659 if ( pointer >= endPointer ) {
660 pointer -= 4;
661 _msc1->state = MSC1_CS_DUPES;
662 break;
663 }
664
665 // If there are 32 repetitions, we must
666 // emit the duplication command.
667 if ( repeats == 32 ) {
668 _msc1->state = MSC1_CS_DUPES;
669 break;
670 }
671
672 // Continue to count the repetition.
673 _msc1->state = MSC1_CS_DUPE_STORE;
674 break;
675
676 // EMIT THE DUPLICATION COMMAND
677 case MSC1_CS_DUPES:
678
679 if ( repeats > 0 ) {
680 if ( wpointer - actual->used + 2 > 1000 ) {
681
682 // printf(" rp=%4.4x wp=%4.4x EMIT: REPETITION %d [[ ", (unsigned int)(pointer - _input), (unsigned int)(wpointer - output), 4);
683 // for( int i=0; i<4; ++i ) {
684 // printf("%2.2x ", actual->value[i]);
685 // }
686 // printf("]]\n");
687
688 --repeats;
689 *wpointer = 4;
690 ++wpointer;
691 memcpy( wpointer, actual->value, 4 );
692 wpointer += ( 4 );
693 actual->used = ( wpointer - 4 );
694 }
695 }
696
697 // If there is at least one repetition,
698 // we can emit the relative command.
699 if ( repeats > 0 ) {
700
701 MemoryBlock token1 = 0x80 | ( ( repeats & 0x1f ) << 2 ) | ( ( ( wpointer - actual->used + 2 ) >> 8 ) & 0x03 );
702 MemoryBlock token2 = ( ( ( wpointer - actual->used + 2 ) ) & 0xff );
703
704 // int offset = wpointer - actual->used + 2;
705 // printf(" rp=%4.4x wp=%4.4x EMIT: DUPES %d from %4.4x [%2.2x %2.2x %2.2x %2.2x]\n", (unsigned int)(pointer - _input), (unsigned int)(wpointer - output), repeats, offset,
706 // (unsigned char)(*(wpointer-offset+2)), (unsigned char)(*(wpointer-offset+3)), (unsigned char)(*(wpointer-offset+4)), (unsigned char)(*(wpointer-offset+5)) );
707
708 *wpointer++ = token1;
709 *wpointer++ = token2;
710
711 }
712 if ( repeats == 32 ) {
713 repeats = 0;
714 _msc1->state = MSC1_CS_DUPE_STORE;
715 } else {
716 repeats = 0;
717 actual = NULL;
718 _msc1->state = MSC1_CS_READY;
719 }
720 break;
722 break;
723 }
724
725 // msc1_echo_state( _msc1->state, (pointer-_input), (wpointer-output), repeats, iliteral, &literal[0], pointer, wpointer, actual, output );
726 // printf("\n");
727 // printf("--- --- --- --- --- --- --- --- --- ---\n");
728
729 }
730
731 *wpointer = 0x00;
732 ++wpointer;
733
734 *_output_size = (wpointer - output);
735
736 return output;
737
738}
739
740void msc1_free( MSC1Compressor * _msc1 ) {
741
742 free( _msc1 );
743
744}
745
746MemoryBlock * msc1_uncompress( MSC1Compressor * _msc1, MemoryBlock * _input, int _size, int * _output_size ) {
747
748 // We allocate (precautiously) a memory block of ten
749 // times the input buffer.
750 *_output_size = 200 * _size;
751 MemoryBlock * output = malloc( *_output_size );
752
753 // This is the currently token examinated from
754 // the input stream.
755 MemoryBlock token;
756
757 // Read pointer .
758 MemoryBlock * pointer = _input;
759
760 // Write pointer.
761 MemoryBlock * wpointer = output;
762
763 // Loop through the entire input stream.
764 do {
765
766 // Take the current token from the input stream
767 // and move to the next element of the stream.
768 token = *pointer;
769 ++pointer;
770
771 // A token of zero (0) means "end of block".
772 // Nothing must be done.
773 if (token == 0) {
774 // printf( "rp=%4.4x - EOB\n", (unsigned int)(pointer - _input) );
775 }
776
777 // If the upper bit of the token is clear,
778 // it means that there is a literal block
779 // to emit on the output stream.
780 else if ((token & 0x80) == 0x00) {
781
782 // Take the number of literals (1...127),
783 // and copy from the pointer to the output.
784 int count = token & 0x7f;
785
786 // Check if (re)allocation is needed and,
787 // in that case, we reallocate the memory
788 // block with a double size.
789 if ( ((wpointer-output)+count) > *_output_size ) {
790 *_output_size = (*_output_size)<<1;
791 int reallocOffset = wpointer - output;
792 // printf( "reallocating %d bytes\n", *_output_size );
793 output = realloc( output, *_output_size );
794 wpointer = output + reallocOffset;
795 }
796
797 // printf( "rp=%4.4x - LITERAL %d characters\n", (unsigned int)(pointer - _input), count );
798 // printf( " ... %4.4x %2.2x %2.2x %2.2x %2.2x > %4.4x %2.2x %2.2x %2.2x %2.2x \n",
799 // (unsigned int)(pointer-_input), (pointer)[0], (pointer)[1], (pointer)[2], (pointer)[3],
800 // (unsigned int)(wpointer-output), (wpointer)[0], (wpointer)[1], (wpointer)[2], (wpointer)[3] );
801 // printf(" [ ");
802 // for(int i=0; i<count; ++i ) {
803 // printf("%2.2x ", (pointer)[i] );
804 // }
805 // printf(" ]\n");
806
807 memcpy(wpointer, pointer, count);
808 wpointer += count;
809 pointer += count;
810
811 }
812
813 // If the most significant bit is setted,
814 // we must output the repetition on the
815 // output stream.
816 else if ((token & 0x80) == 0x80) {
817
818 // Take the number of repetitions.
819 int repetitions = (token & 0x7f) >> 2;
820
821 // Extract the offset.
822 MemoryBlock tmp = *pointer;
823 ++pointer;
824 int offset = tmp | ((token & 0x03) << 8);
825
826 // If repetitions is zero then repetitions
827 // will be 32 times.
828 if (repetitions == 0) repetitions = 32;
829
830 // printf( "rp=%4.4x - DUPES %d times from %4.4x\n", (unsigned int)(pointer-_input), repetitions, offset );
831 // printf( " ... %4.4x %2.2x %2.2x %2.2x %2.2x > %4.4x %2.2x %2.2x %2.2x %2.2x \n",
832 // (unsigned int)(pointer-_input-offset), (pointer-offset)[0], (pointer-offset)[1], (pointer-offset)[2], (pointer-offset)[3],
833 // (unsigned int)(wpointer-output), (wpointer)[0], (wpointer)[1], (wpointer)[2], (wpointer)[3] );
834
835 MemoryBlock * sourcePointer = pointer - offset;
836
837 // Repeat the sequence from input stream
838 // to the output stream.
839 while( repetitions ) {
840 for( int j=0; j<4; ++j ) {
841
842 *wpointer = *(sourcePointer+j);
843
844 ++wpointer;
845
846 // Check if (re)allocation is needed and,
847 // in that case, we reallocate the memory
848 // block with a double size.
849 if ( (wpointer-output) > *_output_size ) {
850 *_output_size = (*_output_size)<<1;
851 int reallocOffset = wpointer - output;
852 // printf( "reallocating %d bytes\n", *_output_size );
853 output = realloc( output, *_output_size );
854 wpointer = output + reallocOffset;
855 }
856 }
857 --repetitions;
858 }
859
860 }
861 } while (token);
862
863 *_output_size = (wpointer - output);
864
865 return output;
866}
int offset
Definition _optimizer.c:681
void msc1_sort(MSC1Sequences *_sequences)
Definition msc1.c:223
struct _MSC1Sequence MSC1Sequence
MSC1Compressor * msc1_create(int _maximum_repeated_sequences)
Definition msc1.c:300
MSC1Sequence * msc1_find_sequence(MSC1Sequences *_sequences, MemoryBlock *_literal, int _limit)
Definition msc1.c:87
void msc1_echo_state(MSC1CompressorState _state, int _read, int _write, int _repeats, int _iliteral, char *_literal, char *_pointer, char *_wpointer, MSC1Sequence *_actual, MemoryBlock *_output)
Definition msc1.c:312
struct _MSC1SequenceValue MSC1SequenceValue
void msc1_free(MSC1Compressor *_msc1)
Definition msc1.c:740
MemoryBlock * msc1_uncompress(MSC1Compressor *_msc1, MemoryBlock *_input, int _size, int *_output_size)
Definition msc1.c:746
void msc1_dump(MSC1Sequences *_sequences, int _count)
Definition msc1.c:70
MSC1Sequences * msc1_generate_sequences(MemoryBlock *_input, int _size)
Definition msc1.c:126
struct _MSC1Sequences MSC1Sequences
MemoryBlock * msc1_compress(MSC1Compressor *_msc1, MemoryBlock *_input, int _size, int *_output_size)
Definition msc1.c:381
@ MSC1_CS_MOVE4
Definition msc1.h:66
@ MSC1_CS_DUPE_STORE
Definition msc1.h:69
@ MSC1_CS_DUPE_MOVE4
Definition msc1.h:72
@ MSC1_CS_LITERAL_DUPE
Definition msc1.h:63
@ MSC1_CS_DUPES
Definition msc1.h:75
@ MSC1_CS_END_OF_BLOCK
Definition msc1.h:78
@ MSC1_CS_MOVE1
Definition msc1.h:57
@ MSC1_CS_READY
Definition msc1.h:51
@ MSC1_CS_LITERAL
Definition msc1.h:60
@ MSC1_CS_STORE
Definition msc1.h:54
unsigned char MemoryBlock
Definition msc1.h:90
enum _MSC1CompressorState MSC1CompressorState
struct _MSC1Compressor MSC1Compressor
MSC1CompressorState state
Definition msc1.h:84
int maximumRepeatedSequences
Definition msc1.h:86
struct _MSC1Sequence * next
Definition msc1.c:54
MemoryBlock * used
Definition msc1.c:52
MSC1SequenceValue * first
Definition msc1.c:50
int length
Definition msc1.c:48
int count
Definition msc1.c:51
MemoryBlock value[4]
Definition msc1.c:47
MemoryBlock * offset
Definition msc1.c:39
struct _MSC1SequenceValue * next
Definition msc1.c:41
MSC1Sequence * first
Definition msc1.c:60
int count
Definition msc1.c:62
void * malloc(YYSIZE_T)
void free(void *)