File : src/aws-url.adb
------------------------------------------------------------------------------
-- Ada Web Server --
-- --
-- Copyright (C) 2000-2004 --
-- ACT-Europe --
-- --
-- Authors: Dmitriy Anisimkov - Pascal Obry --
-- --
-- This library is free software; you can redistribute it and/or modify --
-- it under the terms of the GNU General Public License as published by --
-- the Free Software Foundation; either version 2 of the License, or (at --
-- your option) any later version. --
-- --
-- This library is distributed in the hope that it will be useful, but --
-- WITHOUT ANY WARRANTY; without even the implied warranty of --
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU --
-- General Public License for more details. --
-- --
-- You should have received a copy of the GNU General Public License --
-- along with this library; if not, write to the Free Software Foundation, --
-- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. --
-- --
-- As a special exception, if other files instantiate generics from this --
-- unit, or you link this unit with other files to produce an executable, --
-- this unit does not by itself cause the resulting executable to be --
-- covered by the GNU General Public License. This exception does not --
-- however invalidate any other reasons why the executable file might be --
-- covered by the GNU Public License. --
------------------------------------------------------------------------------
-- $Id: aws-url.adb,v 1.36 2004/03/17 20:05:36 obry Exp $
with Ada.Characters.Handling;
with Ada.Strings.Fixed;
with Ada.Strings.Maps;
with AWS.Messages;
with AWS.Utils;
with AWS.URL.Raise_URL_Error;
package body AWS.URL is
use Ada;
subtype Escape_Code is String (1 .. 2);
Not_Escaped : constant Escape_Code := " ";
function Code (C : in Character) return Escape_Code;
pragma Inline (Code);
-- Returns hexadecimal code for character C
subtype ASCII_7 is Character range Character'First .. Character'Val (127);
type ASCII_7_Set is array (ASCII_7) of Escape_Code;
function Build_Hex_Escape return ASCII_7_Set;
-- Returns the table with pre-computed encoding for 7bits characters
function Normalize (Path : in Unbounded_String) return Unbounded_String;
-- Returns Path with all possible occurences of parent and current
-- directories removed. Does not raise exception.
--------------
-- Abs_Path --
--------------
function Abs_Path
(URL : in Object;
Encode : in Boolean := False)
return String
is
Result : constant String
:= To_String (URL.Path & URL.File);
begin
if Encode then
return AWS.URL.Encode (Result);
else
return Result;
end if;
end Abs_Path;
----------------------
-- Build_Hex_Escape --
----------------------
function Build_Hex_Escape return ASCII_7_Set is
Result : ASCII_7_Set;
begin
for C in Character'Val (0) .. Character'Val (127) loop
if Strings.Maps.Is_In (C, Default_Encoding_Set) then
Result (C) := Code (C);
else
Result (C) := Not_Escaped;
end if;
end loop;
return Result;
end Build_Hex_Escape;
----------
-- Code --
----------
function Code (C : in Character) return Escape_Code is
begin
return Utils.Hex (Character'Pos (C));
end Code;
Hex_Escape : constant ASCII_7_Set := Build_Hex_Escape;
-- Limit Hex_Escape to 7bits ASCII characters only. Other ISO-8859-1 are
-- handled separately in Encode function. Space character is not processed
-- specifically, contrary to what is done in AWS.URL.
------------
-- Decode --
------------
function Decode (Str : in String) return String is
Res : String (1 .. Str'Length);
K : Natural := 0;
I : Positive := Str'First;
begin
if Str = "" then
return "";
end if;
loop
K := K + 1;
if Str (I) = '%'
and then I + 2 <= Str'Last
and then Characters.Handling.Is_Hexadecimal_Digit (Str (I + 1))
and then Characters.Handling.Is_Hexadecimal_Digit (Str (I + 2))
then
Res (K) := Character'Val (Utils.Hex_Value (Str (I + 1 .. I + 2)));
I := I + 2;
elsif Str (I) = '+' then
Res (K) := ' ';
else
Res (K) := Str (I);
end if;
I := I + 1;
exit when I > Str'Last;
end loop;
return Res (1 .. K);
end Decode;
------------
-- Encode --
------------
function Encode
(Str : in String;
Encoding_Set : in Strings.Maps.Character_Set := Default_Encoding_Set)
return String
is
C_128 : constant Character := Character'Val (128);
Res : String (1 .. Str'Length * 3);
K : Natural := 0;
begin
for I in Str'Range loop
if Strings.Maps.Is_In (Str (I), Encoding_Set) then
-- This character must be encoded
K := K + 1;
Res (K) := '%';
K := K + 1;
if Str (I) < C_128 then
-- We keep a table for characters lower than 128 for efficiency
Res (K .. K + 1) := Hex_Escape (Str (I));
else
Res (K .. K + 1) := Code (Str (I));
end if;
K := K + 1;
else
K := K + 1;
Res (K) := Str (I);
end if;
end loop;
return Res (1 .. K);
end Encode;
----------
-- File --
----------
function File
(URL : in Object;
Encode : in Boolean := False)
return String is
begin
if Encode then
return AWS.URL.Encode (To_String (URL.File));
else
return To_String (URL.File);
end if;
end File;
----------
-- Host --
----------
function Host (URL : in Object) return String is
begin
return To_String (URL.Host);
end Host;
--------------
-- Is_Valid --
--------------
function Is_Valid (URL : in Object) return Boolean is
begin
return URL.Status = Valid;
end Is_Valid;
---------------
-- Normalize --
---------------
function Normalize (Path : in Unbounded_String) return Unbounded_String is
URL_Path : Unbounded_String := Path;
K : Natural;
P : Natural;
begin
-- Checks for current directory and removes all occurences
-- Look for starting ./
if Length (URL_Path) >= 2 and then Slice (URL_Path, 1, 2) = "./" then
Delete (URL_Path, 1, 1);
end if;
-- Look for all /./ references
loop
K := Index (URL_Path, "/./");
exit when K = 0;
Delete (URL_Path, K, K + 1);
end loop;
-- Checks for parent directory
loop
K := Index (URL_Path, "/../");
exit when K = 0;
-- Look for previous directory, which should be removed
P := Strings.Fixed.Index
(Slice (URL_Path, 1, K - 1), "/", Strings.Backward);
exit when P = 0;
Delete (URL_Path, P, K + 2);
end loop;
return URL_Path;
end Normalize;
procedure Normalize (URL : in out Object) is
begin
URL.Path := URL.N_Path;
if URL.Status = Wrong then
Raise_URL_Error
(To_String (URL.Path),
"Reference Web root parent directory");
end if;
end Normalize;
----------------
-- Parameters --
----------------
function Parameters
(URL : in Object;
Encode : in Boolean := False)
return String is
begin
if Encode then
return AWS.URL.Encode (To_String (URL.Params));
else
return To_String (URL.Params);
end if;
end Parameters;
-----------
-- Parse --
-----------
function Parse
(URL : in String;
Check_Validity : in Boolean := True;
Normalize : in Boolean := False)
return Object
is
HTTP_Token : constant String := "http://";
HTTPS_Token : constant String := "https://";
L_URL : constant String
:= Strings.Fixed.Translate (URL, Strings.Maps.To_Mapping ("\", "/"));
P : Natural;
O : Object;
procedure Parse (URL : in String; Protocol_Specified : in Boolean);
-- Parse URL, the URL must not contain the HTTP_Token prefix.
-- Protocol_Specified is set to True when the protocol (http:// or
-- https:// prefix) was specified. This is used to raise ambiguity
-- while parsing the URL. See comment below.
-----------
-- Parse --
-----------
procedure Parse (URL : in String; Protocol_Specified : in Boolean) is
function "+"
(S : in String)
return Unbounded_String
renames To_Unbounded_String;
procedure Parse_Path_File (Start : in Positive);
-- Parse Path and File URL information starting at position Start in
-- URL.
I1, I2, I3 : Natural;
F : Positive;
---------------------
-- Parse_Path_File --
---------------------
procedure Parse_Path_File (Start : in Positive) is
PF : constant String := URL (Start .. URL'Last);
I3 : constant Natural
:= Strings.Fixed.Index (PF, "/", Strings.Backward);
begin
if I3 = 0 then
-- No '/' so this is certainly a single file. As a special
-- exception we check for current and parent directories
-- which must be part of the path.
declare
File : constant String := URL (Start .. URL'Last);
begin
if File = ".." or else File = "." then
O.Path := +File;
O.File := +"";
else
O.Path := +"";
O.File := +File;
end if;
end;
else
-- Check that after the last '/' we have not a current or
-- parent directories which must be part of the path.
declare
File : constant String := URL (I3 + 1 .. URL'Last);
begin
if File = ".." or else File = "." then
O.Path := +URL (Start .. URL'Last);
O.File := +"";
else
O.Path := +URL (Start .. I3);
O.File := +File;
end if;
end;
end if;
end Parse_Path_File;
User_Password : Boolean := False;
begin
I1 := Strings.Fixed.Index (URL, ":");
I2 := Strings.Fixed.Index (URL, "/");
I3 := Strings.Fixed.Index (URL, "@");
-- Check for [user:pawwsord@]
if I1 /= 0 and then I3 /= 0 and then I1 < I3 then
-- We have [user:password@]
O.User := +URL (URL'First .. I1 - 1);
O.Password := +URL (I1 + 1 .. I3 - 1);
F := I3 + 1;
-- Check if there is another ':' specified
I1 := Strings.Fixed.Index (URL (F .. URL'Last), ":");
User_Password := True;
else
F := URL'First;
end if;
if I1 = 0
and then not User_Password
and then not Protocol_Specified
then
-- No ':', there is no port specified and no host since we did
-- not have a [user:password@] parsed and there was no protocol
-- specified. Let's just parse the data as a path information.
--
-- There is ambiguity here, the data could be either:
--
-- some_host_name/some_path
-- or
-- relative_path/some_more_path
--
-- As per explanations above we take the second choice.
O.Host := +"";
Parse_Path_File (URL'First);
elsif I1 = 0 then
-- In this case we have not port specified but a [user:password@]
-- was found, we expect the first string to be the hostname.
if I2 = 0 then
-- No path information, case [user:password@host]
O.Host := +URL (F .. URL'Last);
O.Path := +"/";
else
-- A path, case [user:password@host/path]
O.Host := +URL (F .. I2 - 1);
Parse_Path_File (I2);
end if;
else
-- Here we have a port specified [host:port]
O.Host := +URL (F .. I1 - 1);
if I2 = 0 then
-- No path, we have [host:port]
if Utils.Is_Number (URL (I1 + 1 .. URL'Last)) then
O.Port := Positive'Value (URL (I1 + 1 .. URL'Last));
else
Raise_URL_Error (AWS.URL.Parse.URL, "Port is not valid");
end if;
O.Path := +"/";
else
-- Here we have a complete URL [host:port/path]
if Utils.Is_Number (URL (I1 + 1 .. I2 - 1)) then
O.Port := Positive'Value (URL (I1 + 1 .. I2 - 1));
else
Raise_URL_Error (AWS.URL.Parse.URL, "Port is not valid");
end if;
Parse_Path_File (I2);
end if;
end if;
end Parse;
begin
O.Security := False;
-- Checks for parameters
P := Strings.Fixed.Index (L_URL, "?");
if P = 0 then
P := L_URL'Last;
else
O.Params := To_Unbounded_String (L_URL (P .. L_URL'Last));
P := P - 1;
end if;
-- Checks for prefix
if Messages.Match (L_URL, HTTP_Token) then
O.Port := Default_HTTP_Port;
Parse (L_URL (L_URL'First + HTTP_Token'Length .. P), True);
elsif Messages.Match (L_URL, HTTPS_Token) then
O.Port := Default_HTTPS_Port;
Parse (L_URL (L_URL'First + HTTPS_Token'Length .. P), True);
O.Security := True;
elsif L_URL /= "" then
-- Prefix is not recognized, this is either because there is no
-- protocol specified or the protocol is not supported by AWS. For
-- example a javascript reference start with "javascript:". This
-- will be caught on the next parsing level.
--
-- At least we know that it is not a Secure HTTP protocol URL.
O.Security := False;
Parse (L_URL (L_URL'First .. P), False);
end if;
-- Normalize the URL path
O.N_Path := AWS.URL.Normalize (O.Path);
-- Set status
declare
Path_Len : constant Natural := Length (O.N_Path);
begin
if (Path_Len >= 4
and then Slice (O.N_Path, 1, 4) = "/../")
or else
(Path_Len = 3
and then Slice (O.N_Path, 1, 3) = "/..")
then
O.Status := Wrong;
else
O.Status := Valid;
end if;
end;
-- If Normalize is activated, the active URL Path is the normalized one
if Normalize then
O.Path := O.N_Path;
end if;
-- Raise URL_Error is the URL is suspicious
if Check_Validity and then O.Status = Wrong then
Raise_URL_Error
(To_String (O.N_Path),
"Reference Web root parent directory");
end if;
return O;
end Parse;
--------------
-- Password --
--------------
function Password (URL : in Object) return String is
begin
return To_String (URL.Password);
end Password;
----------
-- Path --
----------
function Path
(URL : in Object;
Encode : in Boolean := False)
return String is
begin
if Encode then
return AWS.URL.Encode (To_String (URL.Path));
else
return To_String (URL.Path);
end if;
end Path;
-----------------------------
-- Pathname_And_Parameters --
-----------------------------
function Pathname_And_Parameters
(URL : in Object;
Encode : in Boolean := False)
return String is
begin
return Pathname (URL, Encode) & Parameters (URL, Encode);
end Pathname_And_Parameters;
----------
-- Port --
----------
function Port (URL : in Object) return Positive is
begin
return URL.Port;
end Port;
function Port (URL : in Object) return String is
P_Image : constant String := Positive'Image (URL.Port);
begin
return P_Image (2 .. P_Image'Last);
end Port;
-------------------
-- Protocol_Name --
-------------------
function Protocol_Name (URL : in Object) return String is
begin
if URL.Security then
return "https";
else
return "http";
end if;
end Protocol_Name;
-----------
-- Query --
-----------
function Query
(URL : in Object;
Encode : in Boolean := False)
return String
is
P : constant String := Parameters (URL, Encode);
begin
return P (P'First + 1 .. P'Last);
end Query;
--------------
-- Security --
--------------
function Security (URL : in Object) return Boolean is
begin
return URL.Security;
end Security;
---------
-- URL --
---------
function URL (URL : in Object) return String is
function Port return String;
pragma Inline (Port);
-- Returns the port number if not the standard HTTP or HTTPS Port and
-- the empty string otherwise.
function User_Password return String;
pragma Inline (User_Password);
-- Returns the user:password@ if present and the empty string otherwise
----------
-- Port --
----------
function Port return String is
begin
if URL.Security then
if URL.Port /= Default_HTTPS_Port then
return ':' & Port (URL);
else
return "";
end if;
else
if URL.Port /= Default_HTTP_Port then
return ':' & Port (URL);
else
return "";
end if;
end if;
end Port;
-------------------
-- User_Password --
-------------------
function User_Password return String is
User : constant String := To_String (URL.User);
Password : constant String := To_String (URL.Password);
begin
if User = "" then
if Password = "" then
return "";
else
return ':' & Password & '@';
end if;
else
if Password = "" then
return User & ":@";
else
return User & ':' & Password & '@';
end if;
end if;
end User_Password;
begin
if Host (URL) = "" then
return Pathname_And_Parameters (URL);
else
return Protocol_Name (URL) & "://"
& User_Password
& Host (URL) & Port & Pathname (URL) & Parameters (URL);
end if;
end URL;
----------
-- User --
----------
function User (URL : in Object) return String is
begin
return To_String (URL.User);
end User;
end AWS.URL;