Group sequences of repeating literals into one write

Add a new "dumpn" pseudo-opcode that dumps N copies of the same value
to the stage buffer. It's basically the same as "dump" but much faster
when dumping the same value a lot of times, for example, when writing
initial values for large arrays.

On my Linux system the time it took to dump a 100000000 cell array went
down from 20 to 8 seconds in Release configuration, i.e. 2.5 times faster!

I haven't profiled further yet (Visual Studio 2017 profiler is broken,
gprof won't output anything (I'm probably doing it wrong), I might try
valgrind later).
This commit is contained in:
Zeex 2017-10-22 12:23:33 +06:00
parent 092b07464b
commit a99e649a6b
5 changed files with 68 additions and 30 deletions

View File

@ -327,7 +327,11 @@ char *pc_readasm(void *handle, char *string, int maxchars)
*/ */
void *pc_openbin(char *filename) void *pc_openbin(char *filename)
{ {
return fopen(filename,"wb"); FILE *fbin;
fbin=fopen(filename,"wb");
setvbuf(fbin,NULL,_IOFBF,1UL<<20);
return fbin;
} }
void pc_closebin(void *handle,int deletefile) void pc_closebin(void *handle,int deletefile)

View File

@ -639,6 +639,7 @@ SC_FUNC void ffabort(int reason);
SC_FUNC void ffbounds(cell size); SC_FUNC void ffbounds(cell size);
SC_FUNC void jumplabel(int number); SC_FUNC void jumplabel(int number);
SC_FUNC void defstorage(void); SC_FUNC void defstorage(void);
SC_FUNC void defcompactstorage(void);
SC_FUNC void getfrm(void); SC_FUNC void getfrm(void);
SC_FUNC void modstk(int delta); SC_FUNC void modstk(int delta);
SC_FUNC void setstk(cell value); SC_FUNC void setstk(cell value);

View File

@ -414,7 +414,11 @@ char *pc_readasm(void *handle, char *string, int maxchars)
*/ */
void *pc_openbin(char *filename) void *pc_openbin(char *filename)
{ {
return fopen(filename,"wb"); FILE *fbin;
fbin=fopen(filename,"wb");
setvbuf(fbin,NULL,_IOFBF,1UL<<20);
return fbin;
} }
void pc_closebin(void *handle,int deletefile) void pc_closebin(void *handle,int deletefile)
@ -1716,29 +1720,42 @@ static void parse(void)
*/ */
static void dumplits(void) static void dumplits(void)
{ {
int j,k; int i,j;
static const int row_len=16;
if (sc_status==statSKIP) if (sc_status==statSKIP)
return; return;
k=0; i=0;
while (k<litidx){ while (i<litidx) {
/* should be in the data segment */ /* should be in the data segment */
assert(curseg==2); assert(curseg==2);
defstorage(); j=i+1;
j=16; /* 16 values per line */ while (j<litidx && litq[j]==litq[i])
while (j && k<litidx){ j++;
outval(litq[k], FALSE); if (j-i>=row_len-1) {
int count=j-i;
defcompactstorage();
outval(litq[i],FALSE);
stgwrite(" "); stgwrite(" ");
k++; outval(count,TRUE);
j--; i+=count;
if (j==0 || k>=litidx) } else {
stgwrite("\n"); /* force a newline after 10 dumps */ defstorage();
/* Note: stgwrite() buffers a line until it is complete. It recognizes j=row_len; /* 16 values per line */
* the end of line as a sequence of "\n\0", so something like "\n\t" while (j && i<litidx){
* so should not be passed to stgwrite(). outval(litq[i],FALSE);
*/ stgwrite(" ");
} /* while */ i++;
j--;
if (j==0 || i>=litidx)
stgwrite("\n"); /* force a newline after 10 dumps */
/* Note: stgwrite() buffers a line until it is complete. It recognizes
* the end of line as a sequence of "\n\0", so something like "\n\t"
* so should not be passed to stgwrite().
*/
} /* while */
} /* if */
} /* while */ } /* while */
} }
@ -1748,20 +1765,13 @@ static void dumplits(void)
*/ */
static void dumpzero(int count) static void dumpzero(int count)
{ {
int i;
if (sc_status==statSKIP || count<=0) if (sc_status==statSKIP || count<=0)
return; return;
assert(curseg==2); assert(curseg==2);
defstorage(); defcompactstorage();
i=0; outval(0, FALSE);
while (count-- > 0) { stgwrite(" ");
outval(0, FALSE); outval(count, TRUE);
i=(i+1) % 16;
stgwrite((i==0 || count==0) ? "\n" : " ");
if (i==0 && count>0)
defstorage();
} /* while */
} }
static void aligndata(int numbytes) static void aligndata(int numbytes)

View File

@ -832,6 +832,15 @@ SC_FUNC void defstorage(void)
stgwrite("dump "); stgwrite("dump ");
} }
/*
* Same as defstorage() but for repeating values.
*/
SC_FUNC void defcompactstorage()
{
stgwrite("dumpn ");
}
/* /*
* Copies frame address to primary register * Copies frame address to primary register
*/ */

View File

@ -382,7 +382,7 @@ static cell OPHANDLER_CALL parm5(FILE *fbin,char *params,cell opcode)
static cell OPHANDLER_CALL do_dump(FILE *fbin,char *params,cell opcode) static cell OPHANDLER_CALL do_dump(FILE *fbin,char *params,cell opcode)
{ {
ucell p; ucell p;
int num = 0; int num=0;
while (*params!='\0') { while (*params!='\0') {
p=getparam(params,&params); p=getparam(params,&params);
@ -395,6 +395,19 @@ static cell OPHANDLER_CALL do_dump(FILE *fbin,char *params,cell opcode)
return num*sizeof(cell); return num*sizeof(cell);
} }
static cell OPHANDLER_CALL do_dumpn(FILE *fbin,char *params,cell opcode)
{
ucell value,num,i;
value=getparam(params,&params);
num=getparam(params,NULL);
if (fbin!=NULL) {
for (i=0; i<num; i++)
write_encoded(fbin,&value,1);
} /* if */
return num*sizeof(cell);
}
static cell OPHANDLER_CALL do_call(FILE *fbin,char *params,cell opcode) static cell OPHANDLER_CALL do_call(FILE *fbin,char *params,cell opcode)
{ {
char name[sNAMEMAX+2]; /* +1 for a possible leading dot */ char name[sNAMEMAX+2]; /* +1 for a possible leading dot */
@ -521,6 +534,7 @@ static OPCODE opcodelist[] = {
{112, "dec.pri", sIN_CSEG, parm0 }, {112, "dec.pri", sIN_CSEG, parm0 },
{115, "dec.s", sIN_CSEG, parm1 }, {115, "dec.s", sIN_CSEG, parm1 },
{ 0, "dump", sIN_DSEG, do_dump }, { 0, "dump", sIN_DSEG, do_dump },
{ 0, "dumpn", sIN_DSEG, do_dumpn },
{ 95, "eq", sIN_CSEG, parm0 }, { 95, "eq", sIN_CSEG, parm0 },
{106, "eq.c.alt", sIN_CSEG, parm1 }, {106, "eq.c.alt", sIN_CSEG, parm1 },
{105, "eq.c.pri", sIN_CSEG, parm1 }, {105, "eq.c.pri", sIN_CSEG, parm1 },