Stefan Gruncharov

the issue is on insert HTML content was passed through this function I think

  1. public static function html2raw($data, $preserveLinks = false, $wordWrap = 0, $config = null)
  2. {
  3. $defaultConfig = array(
  4. 'PreserveLinks' => false,
  5. 'ReplaceBoldAsterisk' => true,
  6. 'CompressWhitespace' => true,
  7. 'ReplaceImagesWithAlt' => true,
  8. );
  9. if (isset($config)) {
  10. $config = array_merge($defaultConfig, $config);
  11. } else {
  12. $config = $defaultConfig;
  13. }
  14.  
  15. $data = preg_replace("/<style([^A-Za-z0-9>][^>]*)?>.*?<\/style[^>]*>/is", "", $data);
  16. $data = preg_replace("/<script([^A-Za-z0-9>][^>]*)?>.*?<\/script[^>]*>/is", "", $data);
  17.  
  18. if ($config['ReplaceBoldAsterisk']) {
  19. $data = preg_replace('%<(strong|b)( [^>]*)?>|</(strong|b)>%i', '*', $data);
  20. }
  21.  
  22. // Expand hyperlinks
  23. if (!$preserveLinks && !$config['PreserveLinks']) {
  24. $data = preg_replace_callback('/<a[^>]*href\s*=\s*"([^"]*)">(.*?)<\/a>/ui', function ($matches) {
  25. return Convert::html2raw($matches[2]) . "[$matches[1]]";
  26. }, $data);
  27. $data = preg_replace_callback('/<a[^>]*href\s*=\s*([^ ]*)>(.*?)<\/a>/ui', function ($matches) {
  28. return Convert::html2raw($matches[2]) . "[$matches[1]]";
  29. }, $data);
  30. }
  31.  
  32. // Replace images with their alt tags
  33. if ($config['ReplaceImagesWithAlt']) {
  34. $data = preg_replace('/<img[^>]*alt *= *"([^"]*)"[^>]*>/i', ' \\1 ', $data);
  35. $data = preg_replace('/<img[^>]*alt *= *([^ ]*)[^>]*>/i', ' \\1 ', $data);
  36. }
  37.  
  38. // Compress whitespace
  39. if ($config['CompressWhitespace']) {
  40. $data = preg_replace("/\s+/u", " ", $data);
  41. }
  42.  
  43. // Parse newline tags
  44. $data = preg_replace("/\s*<[Hh][1-6]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data);
  45. $data = preg_replace("/\s*<[Pp]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data);
  46. $data = preg_replace("/\s*<[Dd][Ii][Vv]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data);
  47. $data = preg_replace("/\n\n\n+/", "\n\n", $data);
  48.  
  49. $data = preg_replace("/<[Bb][Rr]([^A-Za-z0-9>][^>]*)?> */", "\n", $data);
  50. $data = preg_replace("/<[Tt][Rr]([^A-Za-z0-9>][^>]*)?> */", "\n", $data);
  51. $data = preg_replace("/<\/[Tt][Dd]([^A-Za-z0-9>][^>]*)?> */", " ", $data);
  52. $data = preg_replace('/<\/p>/i', "\n\n", $data);
  53.  
  54. // Replace HTML entities
  55. $data = html_entity_decode($data, ENT_QUOTES, 'UTF-8');
  56. // Remove all tags (but optionally keep links)
  57.  
  58. // strip_tags seemed to be restricting the length of the output
  59. // arbitrarily. This essentially does the same thing.
  60. if (!$preserveLinks && !$config['PreserveLinks']) {
  61. $data = preg_replace('/<\/?[^>]*>/', '', $data);
  62. } else {
  63. $data = strip_tags($data, '<a>');
  64. }
  65.  
  66. // Wrap
  67. if ($wordWrap) {
  68. $data = wordwrap(trim($data), $wordWrap);
  69. }
  70. return trim($data);
  71. }
Stefan Gruncharov

yep one time, I have to write a script to do that for me, there are thousands of records

Stefan Gruncharov

or how it actually works, sorry for the stupid question

Stefan Gruncharov

does it have a function in Silvestripe that does that on the view?

Stefan Gruncharov

and I’m would like to see how to convert those weird dialects 😄