blog.darkstar.work - a simple url encoder/decoder

 a simple url encoder/decoder
 http://blog.darkstar.work

Labels

Wirtschaft (152) Pressefreiheit (131) Österreich (123) IT (98) code (62) Staatsschulden (37) EZB (27) Pensionssystem (16)

2022-11-29

flex - fast lexical analyzer generator sample

Orginal posting can be fount here: http://blog.darkstar.work/2012/05/flex-fast-lexical-analyzer-generator.html

Who remember flex: fast lexical analyzer generator?

Here is a short sample, I have written it under gnu linux and ported it to win32 using gnuwin32 flex and getoptwin:

%option noyywrap

%{
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "getopt.h"
#include <string.h> 
#define MAXLEN 1024

int i, j, idx, len, mode = 0;
char tmps[MAXLEN], reverse[MAXLEN];

%}
SEGA  [2][5][0-5]
SEGB  [2][0-4][0-9]
SEGC  [1][0-9]{2}
SEGD  [1-9][0-9]{0,1}
SEG   {SEGA}|{SEGB}|{SEGC}|{SEGD}
IP    {SEG}["."]{SEG}["."]{SEG}["."]{SEG}

HOSTDOMAINSEGMENT [0-9a-zA-Z_"\-"]+["."]
TOPLEVELDOMAIN [a-zA-Z]{2,7}
HOSTNAME {HOSTDOMAINSEGMENT}+{TOPLEVELDOMAIN}
USER [0-9A-Za-z_"\-""."]+

EMAIL1 {USER}"@"{HOSTNAME}
EMAIL2 {USER}"@"{IP}

URIPROTOCOL [a-zA-Z]{2,10}"://"
URISUFFIX [^ \t\n\r"@"","">""<""("")""{""}"]
URL1   {URIPROTOCOL}{HOSTNAME}{URISUFFIX}*
URL2   {URIPROTOCOL}{IP}{URISUFFIX}*

%%
<<EOF>> {
        exit(1);
    }

{EMAIL1} |
{EMAIL2} {
        if (strchr(yytext, '@') != (char *)NULL) 
        {
    switch((mode % 16)) 
    {
case 0: strcpy(tmps, yytext); break;
case 1: strcpy(tmps, strchr(yytext, (int)'@')); break;
case 2: strcpy(tmps, &strchr(yytext, (int)'@')[1]); break;
     case 4: 
strcpy(tmps, &strchr(yytext, (int)'@')[1]);
len = strlen(tmps); 
for (j = 0, idx = 0; ((j < len) && (j < MAXLEN-1)); j++) 
{
                            if (tmps[j] == '.'
                            {
                                for (i = idx; i <= j; 
                                    reverse[(len-j) + (i-idx)] = tmps[i++]);
                                idx = j + 1;
                            }
}
for (i = idx; i <= j; 
                            reverse[(len-j) + (i-idx)] = (i < len) ? tmps[i] : '.', i++);
reverse[len + 1] = '\0';
strcpy(tmps, reverse);
break;
case 8: strcpy(tmps, &strrchr(yytext, (int)'.')[1]); break;
default: strcpy(tmps, yytext); break;
    }
    (void) printf("%s\n", tmps);
        }
    }

{URL1} |
{URL2} {
if (mode < 16) 
{
            switch((mode % 16)) 
            {
                case 0: strcpy(tmps, yytext); break;
         case 1: strcpy(tmps, strchr(yytext, (int)'/')); break;
         case 2: strcpy(tmps, &strrchr(yytext, (int)'/')[1]); break
         case 4: 
                        strcpy(tmps, &strrchr(yytext, (int)'/')[1]);
                        len = strlen(tmps);
                        for (j = 0, idx = 0; ((j < len) && (j < MAXLEN-1)) ; j++)
     {
                            if (tmps[j] == '.'
                            {
                                for (i = idx; i <= j; 
                                    reverse[(len-j) + (i-idx)] = tmps[i++]);
                                idx = j + 1;
                            }
                        }
                        for (i = idx; i <= j; 
                            reverse[(len-j) + (i-idx)] = (i < len) ? tmps[i] : '.', i++);
                        reverse[len + 1] = '\0';
                        strcpy(tmps, reverse);
                        break;
                case 8: strcpy(tmps, &strrchr(yytext, (int)'.')[1]); break;
                default: strcpy(tmps, yytext); break;
            }
            (void) printf("%s\n", tmps);
        }     
    } 

^[\n;] { ; }

[\r\n]+ { ; }

. { ; }

%%
void yyerror() { exit(1); }

void usage(const char *cmd) 
{
    (void) printf("Usage: %s [-f file] [-a ] [ -r ] [ -u ]\n", cmd);
    (void) printf("\t simple email address and uri lexer reads from stdin \n");
    (void) printf("\t -a, --noat    \t print only hostname of email address (all chars left of \'@\') \n");
    (void) printf("\t -u, --nouser  \t print email without username \n");
    (void) printf("\t -t, --top     \t prints domain toplevel only, when using option -a | -u \n");
    (void) printf("\t -n, --nouris \t print only email address and not uris\n");
    (void) printf("\t -r, --reverse \t reverse the hostdomain / ip address segments\n");
    
    exit(0);
}

int _tmain(int argc, TCHAR** argv)
{
    static int verbose_flag;
    int c;

    while(1)
    {
        static struct option long_options[] =
        {
            {_T("help"), ARG_NONE, 0, _T('h')},
            {_T("noat"), ARG_NONE, 0, _T('a')},
            {_T("nouser"), ARG_NONE, 0, _T('u')},
            {_T("top"), ARG_NONE, 0, _T('t')},
            {_T("nouris"), ARG_NONE, 0, _T('n')},
            {_T("reverse"),     ARG_NONE, 0, _T('r')},
            { ARG_NULL,         ARG_NULL, ARG_NULL, ARG_NULL}
        }; 

        int option_index = 0;
        c = getopt_long(argc, argv, _T("hautnr:"), long_options, &option_index);
        if (c == -1) 
            break;
        switch (c) // Handle options
        {
            case 0: // If this option set a flag, do nothing else now.
                    if (long_options[option_index].flag != 0)
                        break;
                    _tprintf (_T("option %s"), long_options[option_index].name);
                    if (optarg)
                        _tprintf (_T(" with arg %s"), optarg);
                    _tprintf (_T("\n"));
                    break;
            case _T('u'): mode = 1; break
            case _T('a'): mode = 2; break
            case _T('r'): mode = 4; break;
            case _T('h'): usage(argv[0]); break;
            case _T('t'): mode = 8; break;
            case _T('n'): mode += 16; break;  
            case '?': break// getopt_long already printed an error message. 
            default: abort();
        }
    }
    (void) fflush(stdout);
    yyin = stdin;
    yylex();
    exit(0);
}

Original posted 2011 here: http://www.area23.at/he/security/flex

2022-11-15

Could too much function pointers (delegates) inside many huge loops have an effect of unoptimizable code?

We have this simple c modified code from stack overflow called funcptrtest.c:

include <stdio.h>
/*   C code for program funcptrtest.c 
  => https://pastebin.com/rUtXfgSG 
*/
const int b = 23;

// A normal function with an int parameter and void return type
void fun(int a)
{
  if (a < b)
    printf("Value of a (%d) is lesser then value of b (%d)\n", a, b);
  else if (a == b)
    printf("Value of a (%d) is equal value of b (%d)\n", a, b);
  else if (a > b)
    printf("Value of a (%d) is greater than value of b (%d)\n", a, b);
}

// main => NO KNR style int main(argc, argv) int argc; char **argv 
int main(int argc, char **argv)
{
    int i = 0;
    /* fun_ptr is a pointer to function fun()
           void (*fun_ptr)(int) = &fun;
         is equivalent of following two
           void (*fun_ptr)(int);
           fun_ptr = &fun;
    */
    void (*fun_ptr)(int) = &fun;

    // call once with static int
    (*fun_ptr)(b);

    // iterate through for loop
    for (i = 1; i < 101; i+=11)  {
        // Invoking fun() using fun_ptr
        (*fun_ptr)(i);
    }

    return 0;
}

We compile it now with gnu c compiler with gcc option -S and generate an assembler file:

gcc -S funcptrtest.c -o funcptrtest.asm

Result will be something like this:

.file   "funcptrtest.c"
        .text
        .globl  b
        .section        .rodata
        .align 4
        .type   b, @object
        .size   b, 4
b:
        .long   23
        .align 8
.LC0:
        .string "Value of a (%d) is lesser then value of b (%d)\n"
        .align 8
.LC1:
        .string "Value of a (%d) is equal value of b (%d)\n"
        .align 8
.LC2:
        .string "Value of a (%d) is greater than value of b (%d)\n"
        .text
        .globl  fun
        .type   fun, @function
fun:
.LFB0:
        .cfi_startproc
        endbr64
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        movq    %rsp, %rbp
        .cfi_def_cfa_register 6
        subq    $16, %rsp
        movl    %edi, -4(%rbp)
        movl    $23, %eax
        cmpl    %eax, -4(%rbp)
        jge     .L2.L2:
        movl    $23, %eax
        cmpl    %eax, -4(%rbp)
        jne     .L4
        movl    $23, %edx
        movl    -4(%rbp), %eax
        movl    %eax, %esi
        leaq    .LC1(%rip), %rax
        movq    %rax, %rdi
        movl    $0, %eax
        call    printf@PLT
        jmp     .L5
        .L2:
        movl    $23, %eax
        cmpl    %eax, -4(%rbp)
        jne     .L4
        movl    $23, %edx
        movl    -4(%rbp), %eax
        movl    %eax, %esi
        leaq    .LC1(%rip), %rax
        movq    %rax, %rdi
        movl    $0, %eax
        call    printf@PLT
.L2:
        movl    $23, %eax
        cmpl    %eax, -4(%rbp)
        jne     .L4
        movl    $23, %edx
        movl    -4(%rbp), %eax
        movl    %eax, %esi
        leaq    .LC1(%rip), %rax
        movq    %rax, %rdi
        movl    $0, %eax
        call    printf@PLT
        jmp     .L5
.L4:
        movl    $23, %eax
        cmpl    %eax, -4(%rbp)
        jle     .L5
        movl    $23, %edx
        movl    -4(%rbp), %eax
        movl    %eax, %esi
        leaq    .LC2(%rip), %rax
        movq    %rax, %rdi
        movl    $0, %eax
        call    printf@PLT
.L5:
        nop
        leave
        .cfi_def_cfa 7, 8
        ret
        .cfi_endproc     
.LFE0:
        .size   fun, .-fun
        .globl  main
        .type   main, @function
main:
.LFB1:
        .cfi_startproc
        endbr64
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        movq    %rsp, %rbp
        .cfi_def_cfa_register 6
        subq    $32, %rsp
        movl    %edi, -20(%rbp)
        movq    %rsi, -32(%rbp)
        movl    $0, -12(%rbp)
        leaq    fun(%rip), %rax
        movq    %rax, -8(%rbp)
        movl    $23, %edx
        movq    -8(%rbp), %rax
        movl    %edx, %edi
        call    *%rax
        movl    $1, -12(%rbp)
        jmp     .L7
.L8:
        movl    -12(%rbp), %eax
        movq    -8(%rbp), %rdx
        movl    %eax, %edi
        call    *%rdx
        addl    $11, -12(%rbp)
.L7:
        cmpl    $100, -12(%rbp)
        jle     .L8
        movl    $0, %eax
        leave
        .cfi_def_cfa 7, 8
        ret
        .cfi_endproc
.LFE1:
        .size   main, .-main
        .ident  "GCC: (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0"
        .section        .note.GNU-stack,"",@progbits
        .section        .note.gnu.property,"a"
        .align 8
        .long   1f - 0f
        .long   4f - 1f
        .long   5
0:
        .string "GNU"
1:
        .align 8
        .long   0xc0000002
        .long   3f - 2f
2:
        .long   0x3
3:
        .align 8
4:

Lets look, if a simple loop is faster than a function pointer delegate call:

/* program looptest.c */
include <stdio.h>

const int b = 23;

// main
int main(int argc, char **argv)
{
    int i = 0;
    int a = 23;

    /*
    if (a < b)
        printf("Value of a (%d) is lesser then value of b (%d)\n", a, b);
    else if (a == b)
       printf("Value of a (%d) is equal value of b (%d)\n", a, b);
    else if (a > b)
       printf("Value of a (%d) is greater than value of b (%d)\n", a, b);
    */

    // iterate through for loop
    for (i = 1; i < 4194304; i+=11)  {
        a = i;
        if (a < b)
           printf("Value of a (%d) is lesser then value of b (%d)\n", a, b);
        else if (a == b)
           printf("Value of a (%d) is equal value of b (%d)\n", a, b);
        else if (a > b)
           printf("Value of a (%d) is greater than value of b (%d)\n", a, b);
    }

    return 0;
}

/* program funcptrtest.c */
include <stdio.h>

const int b = 23;

// A normal function with an int parameter and void return type
void fun(int a)
{
  if (a < b)
    printf("Value of a (%d) is lesser then value of b (%d)\n", a, b);
  else if (a == b)
    printf("Value of a (%d) is equal value of b (%d)\n", a, b);
  else if (a > b)
    printf("Value of a (%d) is greater than value of b (%d)\n", a, b);
}

// main
int main(int argc, char **argv)
{
    int i = 0;
    /* fun_ptr is a pointer to function fun()

       void (*fun_ptr)(int) = &fun;
       is equivalent of following two
       void (*fun_ptr)(int);
       fun_ptr = &fun;
    */
    void (*fun_ptr)(int) = &fun;

    // call once with static int
    // (*fun_ptr)(b);

    // iterate through for loop
    for (i = 1; i < 4194304; i+=11)  {
        // Invoking fun() using fun_ptr
        (*fun_ptr)(i);
    }

    return 0;
}

Well, that's not so deterministic, of course our simple looptest.asm has a shorter assembler.

compile options

But execution time is not so huge difference, we have to simulate, that in many scenarios.

[To be continued ...]