BrightSide Workbench Full Report + Source Code
HTMLEntities.java
Go to the documentation of this file.
1 /*
2  * Turró i Cutiller Foundation. License notice.
3  * Copyright (C) 2017 Lluis Turró Cutiller <http://www.turro.org/>
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Affero General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Affero General Public License for more details.
14  *
15  * You should have received a copy of the GNU Affero General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 package org.turro.html;
20 
21 import org.apache.commons.text.StringEscapeUtils;
22 import org.turro.elephant.context.ElephantContext;
23 
28 public class HTMLEntities {
29 
30  private static final String Entities_ISO88591[] = {
31  "&euro;",
32  "&sbquo;",
33  "&fnof;",
34  "&bdquo;",
35  "&hellip;",
36  "&dagger;",
37  "&Dagger;",
38  "&circ;",
39  "&permil;",
40  "&Scaron;",
41  "&lsaquo;",
42  "&OElig;",
43  "&Zcaron;",
44  "&lsquo;",
45  "&rsquo;",
46  "&ldquo;",
47  "&rdquo;",
48  "&bull;",
49  "&ndash;",
50  "&mdash;",
51  "&tilde;",
52  "&trade;",
53  "&scaron;",
54  "&rsaquo;",
55  "&oelig;",
56  "&zcaron;",
57  "&Yuml;",
58  "&iexcl;",
59  "&cent;",
60  "&pound;",
61  "&curren;",
62  "&yen;",
63  "&brvbar;",
64  "&sect;",
65  "&uml;",
66  "&copy;",
67  "&ordf;",
68  "&laquo;",
69  "&not;",
70  "&shy;",
71  "&reg;",
72  "&macr;",
73  "&deg;",
74  "&plusmn;",
75  "&sup2;",
76  "&sup3;",
77  "&acute;",
78  "&micro;",
79  "&para;",
80  "&middot;",
81  "&cedil;",
82  "&sup1;",
83  "&ordm;",
84  "&raquo;",
85  "&frac14;",
86  "&frac12;",
87  "&frac34;",
88  "&iquest;"
89  };
90 
91  private static final String SINGLE_QUOTE[] = {
92  "&sbquo;",
93  "&lsquo;",
94  "&rsquo;"
95  };
96 
97  private static final String DOUBLE_QUOTE[] = {
98  "&bdquo;",
99  "&ldquo;",
100  "&rdquo;"
101  };
102 
103  private static final String HELLIP[] = {
104  "&hellip;"
105  };
106 
107  private static final String BULLET[] = {
108  "&bull;"
109  };
110 
111  private static final String DASH[] = {
112  "&ndash;",
113  "&mdash;"
114  };
115 
116  public static String escape(String html) {
117  if(html != null && "ISO-8859-1".equals(ElephantContext.getEncoding())) {
118  for(String entity : Entities_ISO88591) {
119  html = html.replaceAll("\\" + StringEscapeUtils.unescapeHtml4(entity), "\\" + entity);
120  }
121  }
122  return html;
123  }
124 
125  public static String unescape(String html) {
126  if(html != null && "ISO-8859-1".equals(ElephantContext.getEncoding())) {
127  for(String entity : Entities_ISO88591) {
128  html = html.replaceAll("\\" + entity, "\\" + StringEscapeUtils.unescapeHtml4(entity));
129  }
130  }
131  return html;
132  }
133 
134  public static String plainWhenPossible(String html) {
135  if(html != null && "ISO-8859-1".equals(ElephantContext.getEncoding())) {
136  html = html.replaceAll("[\\‘\\’\\’ ]", "'")
137  .replaceAll("[\\“\\”\\“\\”]", "\"")
138  .replaceAll("[\\…]", "\\.\\.\\.")
139  .replaceAll("[\\•]", "\\·")
140  .replaceAll("[\\—]", "\\-");
141  }
142  return html;
143  }
144 
145 }
static String escape(String html)
static String plainWhenPossible(String html)
static String unescape(String html)