SUMMARY(2): Wordviewer for DU?

From: Bernt Christandl <beb_at_mpe.mpg.de>
Date: Fri, 19 Dec 1997 11:07:44 +0100

Hello managers,

it happened again...
-> answers to my original question: 2
-> reactions on my summary (up to now): 15
   Thank you all again!
  (Christian Wilson: my mail to you didn't make it through the net... :<)

So here comes an updated summary with the best at the end:
the sourcecode of catdoc.c from Victor B. Wagner sent to me from
two different sites.

(And please note: after lunch i will leave my desk until 7-jan-1998!)


My original question was:
>does such a beast exit: a viewer for MS Word documents under DU-4.0B ?


Answers are "No, but you can do/use the following...":

-> if you have a PC nearby you can use ftp and the original MS Word
   OR you may want to use an emulator of some kind on your unix machine
   (Softwindows from Insignia from the CAMPUS CD's - Licence needed,
    Wabi for Solaris (free) and few others,
    applixware - see http://www.applix.com/appware/oa/wordgraf.htm to start)
   OR for Xterminals there exist things like WinCenter...

   All in all "IF Word, THEN have a PC" is a good idea for whatever solution
   you prefer.

-> the only other possibility i've learned about is the command "strings"
   of the utility "catdoc" (source attached).
   
   Andre Delafontaine <andre.delafontaine_at_echostar.com> said:
> Depending on what you want to actually see in your word document, you
> can use a program called "catdoc" (source attached) that is able to
> extract the text from your word document. It uses the same principle as
> strings _file_ | more, but works much better.

   This should be sufficient to view attachements to email.


Thank you all very much again for your time and bandwidth
and have some fine days, weeks over Christmas and the New Year!

Bernt Christandl

----------------------------------------------------------------------
- Bernt Christandl / Max Planck Institut - Extraterrestrische Physik -
- D-85740 Garching / Phone: +49/89/3299-3342 / Fax: +49/89/3299-3569 -
- email: beb_at_mpe.mpg.de -
----------------------------------------------------------------------


.......<snip>............<snip>............<snip>..........

/* catdoc.c version 0.3 */
/* I am not the author of this utility, Victor B. Wagner is. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TEXT_WIDTH 72
#define LATIN1
/* enable this define, if you don't want cyrillic code page translations */

unsigned char specs[]={7, /* tab columns separator - handled specially*/
                       '\n',/* hook to handle end of line in tables */
                       0x1E,/* unbreakable defis */
                       0x1F,/* soft hyphen */
                       0x85,/* dots */
                       0x91,/* opening single quote */
                       0x92,/* closing single quote */
                       0x93,/* opening double quote */
                       0x94,/* closing double quote */
                       0x96,/* em-dash (or em-space)*/
                       0x97,/* en-dash */
                       0x99,/* Trade Mark sign */
                       0xA0,/* unbreakable space */
                       0xA9,/* Copyright sign */
                       0xAE,/* Reserved sign */
                       0xAB,/* opening << quote*/
                       0xBB,/* closing >> quote*/
 /* The rest is translated into itself unless TeX mode is selected */
                       '%','$','_','{','}','\\',
                    };
                         
char *ascii_specs[]={"\t","\n","-","","...","`","'","``","''","-","-","tm",
 " ","(c)","(R)","\"","\"","%","$","_","{","}","\\"};
char *TeX_specs[]={"\t&","\\\\\n","-","\\-","\\dots{}","`","'","``","''","---","--",
"${}^{\\scriptscriptstyle\\mathrm{TM}}$",/* this is my idea about tm sign*/
"~",
"{\\copyright}",
"(R)",/* to be replaced with correct command */
"<",">","\\%","\\$","$\\{$","$\\}$","$\\backslash$",};

#ifndef LATIN1
#ifdef unix
unsigned char table[256]={
/* Windows cyrillic code page to KOI-8 */
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0D,0x0C,0x0D,0x0E,0x0F,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x2D,0x20,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
0x80,0x81,0x82,0xAA,0x8F,0x90,0xA9,0x93,0x84,0x92,0x91,0x94,0x83,0x95,0x99,0x8B,
0x98,0x60,0x27,0x22,0x22,0x9A,0x2D,0x2D,0x9E,0xA6,0x87,0xB0,0x8D,0x97,0x86,0xA2,
0x20,0xA7,0xA5,0x88,0xA4,0x8E,0x96,0x85,0xB3,0xA1,0x9F,0x22,0xAB,0xAC,0xAD,0xAE,
0xAF,0xB2,0xB1,'i',0xB5,0xB6,0xB7,0xB8,0xA3,0xB9,0xBA,0x22,0xBC,0xBD,0xBE,0x9B,
0xE1,0xE2,0xF7,0xE7,0xE4,0xE5,0xF6,0xFA,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xF0,
0xF2,0xF3,0xF4,0xF5,0xE6,0xE8,0xE3,0xFE,0xFB,0xFD,0xFF,0xF9,0xF8,0xFC,0xE0,0xF1,
0xC1,0xC2,0xD7,0xC7,0xC4,0xC5,0xD6,0xDA,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xD0,
0xD2,0xD3,0xD4,0xD5,0xC6,0xC8,0xC3,0xDE,0xDB,0xDD,0xDF,0xD9,0xD8,0xDC,0xC0,0xD1};
#else
unsigned char table[256]={
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0D,0x0c,0x0d,0x0e,0x0f,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x2D,0x20,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x60,0x27,0x22,0x22,0x95,0x2D,0x2D,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0x20,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0x22,0xac,0xad,0xae,0xaf,
0xb0,0xb1,0xb2,0xb3,'i',0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0x22,0xbc,0xbd,0xbe,0xbf,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef};
#endif
#define recode_char(x) table[x]
#else
#define recode_char(x) x
#endif
char *map_char(char **map,int c)


{unsigned char *ptr;
 static char buffer[2]="a";
 if ((ptr=strchr(specs,c)))
  return map[ptr-specs];
 else
  { buffer[0]=recode_char(c); return buffer; }
}
void format(char *buf,char **map)
{ unsigned char outstring[128]="";
  unsigned char *sp=buf,*dp;int table=0;
  while (*sp)
  { if (*sp==7&&table)
     { printf("%s%s",outstring,map_char(map,'\n'));
       outstring[0]=0;
       table=0;sp++;
     }
   else
   { if (strlen(strcat(outstring,map_char(map,*sp)))>TEXT_WIDTH)
    { dp=strrchr(outstring,' ');
      if (dp)
       { *(dp++)=0;
         printf("%s\n",outstring);
         strcpy(outstring,dp);
       }
       else
       { int i;
         for(i=0;i<72;i++) putc(outstring[i],stdout);
           putc('\n',stdout);
         strcpy(outstring,outstring+72);
       }
    }
   table=*(sp++)==7;
   }
 }
if (outstring[0]==0) putc('\n',stdout);
 else printf("%s\n\n",outstring);
    
}
void help(void)
{ printf("catdoc - exctract text from MS-Word files and catenate it to stdout\n"
         "Copyright (c) by Victor B. Wagner, 1996\n"
         "Usage catdoc [-ast] files ...\n"
         "\t-a - converts non-standard printable chars into readable form (default)\n"
         "\t-t - converts them into TeX control sequences\n"
         "\t-s - exits with code 1 if MSWordDoc signature not found before\n"
         "\t\tfirst printable paragraph\n\n"
         "All options affects only files, specified AFTER them\n");
         exit(2);
}

char buf[8192];
void do_file(FILE *f,char **map,int search_sign)
{ int ok=!search_sign;
  int bufptr,c;
 while(!feof(f))
 {bufptr=-1;
  do {
   c=getc(f);
   /* Special printable symbols 7- table separator \r - paragraph end
      0x1E - short defis */
   if ((c<=255&&c>=32)||c==7||c=='\t'||c=='\r'||c==0x1E)
      buf[++bufptr]=c;
   else
   if (c==0x0b) buf[++bufptr]='\r';
   else
   { if (!c) {buf[++bufptr]=0;
              if(!strcmp(buf,"MSWordDoc"))
                { ok=1; }
             }
    if (c!=2) bufptr=-1;/* \002 is Word's footnote mark */
   }
  } while (c!='\r'&&c!=EOF);
 if (bufptr>0&&buf[bufptr]=='\r')
   { if (!ok) exit( 1);
     buf[bufptr]=0; format(buf,map);
   }
 }
}
  
int main(int argc,char **argv)
{ int search_sign =0; /* Must program exit with exit code 1 if MSWordDoc
                         signature is not found? */
  char **sequences=ascii_specs;/* pointer to array of character sequences
                            to represent special characters of Word */
  int i=1,stdin_processed=0;
  if (argc<2) help();
  for(;i<argc;i++)
  { if (!strcmp(argv[i],"-s")) search_sign=1;
    else
    if (!strcmp(argv[i],"-t")) sequences=TeX_specs;
    else
    if (!strcmp(argv[i],"-a")) sequences=ascii_specs;
    else
    if (!strcmp(argv[i],"-"))
        if (!stdin_processed) {do_file(stdin,sequences,search_sign);
                               stdin_processed=1;}
         else { fprintf(stderr,"Cannot process standard input twice a row\n");
                exit (2);}
    else
     if (argv[i][0]=='-') {fprintf(stderr,"Invalid option %s\n",argv[i]);
                           help();}
    else
     { FILE *f=fopen(argv[i],"r");
       if(!f) {fprintf(stderr,"Cannot open file %s\n",argv[i]);exit(2);}
       do_file(f,sequences,search_sign);
     }
   }
  return 0;
}
Received on Fri Dec 19 1997 - 11:08:00 NZDT

This archive was generated by hypermail 2.4.0 : Wed Nov 08 2023 - 11:53:37 NZDT