# # Encode.pl -- Perl routines to Encode and Decode binary strings. # # Written by Clint Goss , Jan 1997 package Encode; # Perl Modules use strict; # Restrict unsafe variables, references, barewords # Encode an arbitrary binary string into a VERY limited set of characters. # # The output has only the characters: % + - . a-z A-Z 0-9 # This routine is useful for encoding many strings which can have embedded # characters which are 'meta-characters' in some 'larger context': # # - Passing a string as an http URL, a Cookie, or a Location: reference. # # - Inserting a string into a data file which uses some 'delimeter' character # such as TAB or @. # # - Passing a string on a command line (through a shell) to another executable. # # The following transformations are done: # # Input Output # # a-z A-Z 0-9 No change in these characters # - . No change in these characters # (space) + # all other %HH where HH is 2 upper case characters 0-9, A-F sub enc { my ($arg) = @_; # Convert existing %'s FIRST, so they don't get munged by later # transformations. $arg =~ s/\%/\%25/g; # Protect plusses and map spaces onto plusses $arg =~ s/\+/\%2B/g; $arg =~ s/\ /+/g; # Convert everything else in one swell foop $arg =~ s/([^a-zA-Z0-9%+.-])/'%' . sprintf ("%02X", ord($1))/ge; return $arg; } # Decode a string into an arbirary binary string. This routine undoes the # work of &Encode::enc as well as the encodings done by all known web browsers. # # Plus characters are converted to spaces. Also, embedded %HH hexidecimal # 'macros' which are converted into their equivalent binary representation. sub dec { my ($arg) = @_; $arg =~ tr/+/ /; # Convert %XX from hex numbers to alphanumeric $arg =~ s/%(..)/pack ("c", hex ($1))/ge; return $arg; } # Encode an arbitrary string for output to HTML. Basically, we remove # anything which might be interpreted as a meta-character by the browser. sub encodeHtml { my ($arg) = @_; $arg =~ s/\&/&/g; $arg =~ s/\"/"/g; $arg =~ s/\/>/g; return $arg; } # Encode the non-printable characters in an arbitrary string. # Only characters between space (%20) and ~ (%7F) are considered OK. sub encodeNonPrinting { my ($arg) = @_; # Convert everything else which might be dangerous to a common # hex format. All characters between space (%20) and ~ (%7F) are # considered OK. $arg =~ s/([^ -~])/'%' . sprintf ("%02X", ord($1))/ge; return $arg; } # Make the argument a valid file name on this system. # "Dangerous" characters are encoded and the length is TRUNCATED to the # max file name length for this system. sub sanitizeFileName { my ($arg) = @_; # How long can a filename be? Good question! # # I'd have expected the limits.h file on any Unix system to have # this number, but SunOS 5.4 just refers you to pathconf(). # So ... I tried some file creations of ordinary files and # came up with ... my ($maxFileNameLen) = 120; my ($argE) = &Encode::enc ($arg); if (length ($argE) > $maxFileNameLen) { return substr ($argE, 0, $maxFileNameLen); } else { return $argE; } } 1; #return true __END__ =head1 NAME Encode.pl - Perl routines to encode and decode binary strings =head1 SYNOPSIS # Access to modules (see the NOTES section for setup) BEGIN { ... push (@INC, ...location of your library directory...); } require 'Encode.pl'; # Read any data from the requesting form and then # pick up the parameters for this script (from form # data or specified on the URL). my (%in) = &Encode::ReadParse; # Fetch the CGI parameter "FIELD". my ($fieldValue) = $in{"FIELD"}; =head1 DESCRIPTION This Perl5 library implements set of routines which encode and decode binary strings using a very limited subset of ASCII characters. These can be used to encode strings for use on the HTTP protocol. =head2 Functions =over 4 =item enc ($arg) Encode an arbitrary binary string into a VERY limited set of characters. The output has only the characters: % + - . a-z A-Z 0-9 This routine is useful for encoding many strings which can have embedded characters which are 'meta-characters' in some 'larger context': - Passing a string as an http URL, a Cookie, or a Location: reference. - Inserting a string into a data file which uses some 'delimeter' character such as TAB or @. - Passing a string on a command line (through a shell) to another executable. The following transformations are done: Input Output a-z A-Z 0-9 No change in these characters - . No change in these characters (space) + all other %HH where HH is 2 upper case characters 0-9, A-F =item dec ($arg) Decode a string into an arbirary binary string. This routine undoes the work of &Encode::enc as well as the encodings done by all known web browsers. Plus characters are converted to spaces. Also, embedded %HH hexidecimal 'macros' which are converted into their equivalent binary representation. =item encodeHtml ($arg) Encode an arbitrary string for output to HTML. Basically, we remove anything which might be interpreted as a meta-character by the browser. For example, encodeHtml () maps "<" onto "<" =item encodeNonPrinting ($arg) Encode the non-printable characters in an arbitrary string. Only characters between space (%20) and ~ (%7F) are considered OK. =item sanitizeFileName ($arg) Make the argument a valid file name on this system. "Dangerous" characters are encoded and the length is TRUNCATED to the max file name length for this system. How long can a filename be? Good question! I'd have expected the limits.h file on any Unix system to have this number, but SunOS 5.4 just refers you to pathconf(). So ... I tried some file creations of ordinary files and came up with 120 characters. =over 4 =back =head1 WARNING None =head1 SEE ALSO G(3), GBasic(3) =head1 NOTES This module has not yet been set up for "installation" in your local Perl. =head1 AUTHOR Clint Goss , Feb 1997 =cut