001    // Copyright 2008, 2010 The Apache Software Foundation
002    //
003    // Licensed under the Apache License, Version 2.0 (the "License");
004    // you may not use this file except in compliance with the License.
005    // You may obtain a copy of the License at
006    //
007    // http://www.apache.org/licenses/LICENSE-2.0
008    //
009    // Unless required by applicable law or agreed to in writing, software
010    // distributed under the License is distributed on an "AS IS" BASIS,
011    // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012    // See the License for the specific language governing permissions and
013    // limitations under the License.
014    
015    package org.apache.tapestry5.internal.services;
016    
017    import java.util.BitSet;
018    
019    import org.apache.tapestry5.services.URLEncoder;
020    
021    public class URLEncoderImpl implements URLEncoder
022    {
023        static final String ENCODED_NULL = "$N";
024        static final String ENCODED_BLANK = "$B";
025    
026        /**
027         * Bit set indicating which character are safe to pass through (when encoding or decoding) as-is. All other
028         * characters are encoded as a kind of unicode escape.
029         */
030        private final BitSet safe = new BitSet(128);
031    
032        {
033            markSafe("abcdefghijklmnopqrstuvwxyz");
034            markSafe("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
035            markSafe("01234567890-_.:");
036        }
037    
038        private void markSafe(String s)
039        {
040            for (char ch : s.toCharArray())
041            {
042                safe.set((int) ch);
043            }
044        }
045    
046        public String encode(String input)
047        {
048            if (input == null)
049                return ENCODED_NULL;
050    
051            if (input.equals(""))
052                return ENCODED_BLANK;
053    
054            boolean dirty = false;
055    
056            int length = input.length();
057    
058            StringBuilder output = new StringBuilder(length * 2);
059    
060            for (int i = 0; i < length; i++)
061            {
062                char ch = input.charAt(i);
063    
064                if (ch == '$')
065                {
066                    output.append("$$");
067                    dirty = true;
068                    continue;
069                }
070    
071                int chAsInt = (int) ch;
072    
073                if (safe.get(chAsInt))
074                {
075                    output.append(ch);
076                    continue;
077                }
078    
079                output.append(String.format("$%04x", chAsInt));
080                dirty = true;
081            }
082    
083            return dirty ? output.toString() : input;
084        }
085    
086        public String decode(String input)
087        {
088            assert input != null;
089    
090            if (input.equals(ENCODED_NULL))
091                return null;
092    
093            if (input.equals(ENCODED_BLANK))
094                return "";
095    
096            boolean dirty = false;
097    
098            int length = input.length();
099    
100            StringBuilder output = new StringBuilder(length * 2);
101    
102            for (int i = 0; i < length; i++)
103            {
104                char ch = input.charAt(i);
105    
106                if (ch == '$')
107                {
108                    dirty = true;
109    
110                    if (i + 1 < length && input.charAt(i + 1) == '$')
111                    {
112                        output.append('$');
113                        i++;
114    
115                        dirty = true;
116                        continue;
117                    }
118    
119                    if (i + 4 < length)
120                    {
121                        String hex = input.substring(i + 1, i + 5);
122    
123                        try
124                        {
125                            int unicode = Integer.parseInt(hex, 16);
126    
127                            output.append((char) unicode);
128                            i += 4;
129                            dirty = true;
130                            continue;
131                        }
132                        catch (NumberFormatException ex)
133                        {
134                            // Ignore.
135                        }
136                    }
137    
138                    throw new IllegalArgumentException(
139                            String.format(
140                                    "Input string '%s' is not valid; the '$' character at position %d should be followed by another '$' or a four digit hex number (a unicode value).",
141                                    input, i + 1));
142                }
143    
144                if (!safe.get((int) ch)) { throw new IllegalArgumentException(
145                        String.format("Input string '%s' is not valid; the character '%s' at position %d is not valid.",
146                                input, ch, i + 1)); }
147    
148                output.append(ch);
149            }
150    
151            return dirty ? output.toString() : input;
152        }
153    }