Re: Unescaping strings
Jeff Massung <massung@gmail.com> writes:
> Is there anything in the CLHS that deals with escaped strings?
>
> For example, in C: "\t\n", while in Lisp I seem forced to (format nil
> "~c~c" #\tab #\newline).
>
> I can obviously write a function to handle escaped strings (more
> importantly un-escaping them), but if something the spec deals with
> this already, I'd rather use it... I just haven't found it yet.
This was posted on http://paste.lisp.org once:
;;;; -*- mode:lisp;coding:utf-8 -*-
;;;;**************************************************************************
;;;;FILE: c-string-reader.lisp
;;;;LANGUAGE: Common-Lisp
;;;;SYSTEM: Common-Lisp
;;;;USER-INTERFACE: NONE
;;;;DESCRIPTION
;;;;
;;;; A C string reader, implememting C string back-slash escapes.
;;;; Also includes a writer to print strings with C back-slash escapes.
;;;;
;;;;AUTHORS
;;;; <PJB> Pascal J. Bourguignon <pjb@informatimago.com>
;;;;MODIFICATIONS
;;;; 2011-05-21 <PJB> Updated from http://paste.lisp.org/display/69905
;;;;BUGS
;;;;LEGAL
;;;; GPL
;;;;
;;;; Copyright Pascal J. Bourguignon 2011 - 2011
;;;;
;;;; This program is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU General Public License
;;;; as published by the Free Software Foundation; either version
;;;; 2 of the License, or (at your option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be
;;;; useful, but WITHOUT ANY WARRANTY; without even the implied
;;;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;;; PURPOSE. See the GNU General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU General Public
;;;; License along with this program; if not, write to the Free
;;;; Software Foundation, Inc., 59 Temple Place, Suite 330,
;;;; Boston, MA 02111-1307 USA
;;;;**************************************************************************
(defun write-c-string (string &optional (stream *standard-output*))
"Prints the string as a C string, with C escape sequences."
(loop
:for ch :across string
:initially (princ "\"" stream)
:do (princ (case ch
((#\bell) "\\a")
((#\backspace) "\\b")
((#\page) "\\f")
((#\newline #\linefeed) "\\n")
((#\return) "\\r")
((#\tab) "\\t")
((#\vt) "\\v")
((#\") "\\\"")
((#\\) "\\\\")
(otherwise
(if (< (char-code ch) 32)
(format nil "\\~3,'0o" (char-code ch))
ch))) stream)
:finally (princ "\"" stream)))
(defun read-c-string (stream)
"Read a C string from the STREAM
The initial double-quote must have been read already."
(let ((buffer (make-array 80 :element-type 'character
:adjustable t :fill-pointer 0))
(state :in-string)
(start 0))
(flet ((process-token (ch)
(ecase state
((:in-string)
(setf state (case ch
((#\") :out)
((#\\) :escape)
(otherwise (vector-push-extend ch buffer)
:in-string)))
nil)
((:escape)
(setf state :in-string)
(case ch
((#\' #\" #\? #\\) (vector-push-extend ch buffer))
((#\a) (vector-push-extend #\bell buffer))
((#\b) (vector-push-extend #\backspace buffer))
((#\f) (vector-push-extend #\page buffer))
((#\n) (vector-push-extend #\newline buffer))
((#\newline) #|remove it|#)
((#\r) (vector-push-extend #\return buffer))
((#\t) (vector-push-extend #\tab buffer))
((#\v) (vector-push-extend #\vt buffer))
((#\x)
(setf state :in-hexa
start (fill-pointer buffer)))
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)
(setf state :in-octal
start (fill-pointer buffer))
(vector-push-extend ch buffer))
(otherwise
(error "Invalid escape character \\~C at position ~D"
ch (fill-pointer buffer))))
nil)
((:in-octal)
(flet ((insert-octal ()
(setf (aref buffer start) (code-char (parse-integer buffer :start start :radix 8))
(fill-pointer buffer) (1+ start)
state :in-string)))
(case ch
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)
(vector-push-extend ch buffer)
(when (<= 3 (- (fill-pointer buffer) start))
(insert-octal))
nil)
(otherwise
(insert-octal)
:again))))
((:in-hexa)
(case ch
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
#\a #\b #\c #\d #\e #\f
#\A #\B #\C #\D #\E #\F)
(vector-push-extend ch buffer)
nil)
(otherwise
(if (< start (fill-pointer buffer))
(setf (aref buffer start) (code-char (parse-integer buffer :start start :radix 16))
(fill-pointer buffer) (1+ start))
(error "Invalid hexadecimal digit at position ~A" (fill-pointer buffer)))
(setf state :in-string)
:again))))))
(loop
:for ch = (read-char stream)
:do (loop :while (process-token ch))
:until (eq state :out)
:finally (return buffer)))))
(defun test/read-c-string ()
(let ((*readtable*
(let ((rt (copy-readtable nil)))
(set-macro-character #\"
(lambda (stream ch)
(declare (ignore ch))
(read-c-string stream))
nil
rt)
rt)))
(read-from-string "\"Hello, bell=\\a, backspace=\\b, page=\\f, newline=\\n, return=\\r, tab=\\t, vt=\\v, \\
\\\"double-quotes\\\", \\'single-quotes\\', question\\?, backslash=\\\\, \\
hexa=\\x3BB, octal=\\101, \\7\\77\\107\\3071\"")))
;;;; THE END ;;;;
--
__Pascal Bourguignon__ http://www.informatimago.com/
A bad day in () is better than a good day in {}.
_______________________________________________
Lisp Hug - the mailing list for LispWorks users
lisp-hug@lispworks.com
http://www.lispworks.com/support/lisp-hug.html