C++ and its type system: How to deal with data with multiple types?
"引言"
我对C++是比较新的。我完成了所有的基本工作,并为我的编程语言构建了2-3个简单的解释程序。
给我的第一件事仍然让我头疼:在C++中实现我的语言类型系统
想想看:ruby、python、php和co.有很多内置类型,这些类型显然是用C实现的。所以,我第一次尝试的是使在我的语言中给出一个值成为可能的三种类型:int、string和nil。
我想到了这个:
1 2 3 4 5 6 7 8 9 10 11 12 | enum ValueType { Int, String, Nil }; class Value { public: ValueType type; int intVal; string stringVal; }; |
是啊,哇,我知道。当必须一直调用字符串分配器时,传递此类是非常慢的。
下次我尝试类似的方法时:
1 2 3 4 5 6 7 8 9 10 11 12 | enum ValueType { Int, String, Nil }; extern string stringTable[255]; class Value { public: ValueType type; int index; }; |
我将把所有字符串存储在
不管怎样,上面的内容也让我头疼。过了一段时间,从这里的表中访问字符串,在那里引用它,然后在那里复制它,这些都超出了我的想象——我失去了控制。我不得不把翻译稿放下。
现在,好了,C和C++是静态类型的。
上面提到的语言的主要实现如何处理程序中的不同类型(fixnums、bignums、nums、字符串、数组、资源等)?
我应该怎么做才能在多种可用类型下获得最大速度?
这些解决方案与我上面的简化版本相比如何?
你可以在这里做一些不同的事情。不同的解决方案及时出现,其中大多数都需要动态分配实际数据(boost::variant可以避免为小对象使用动态分配的内存——谢谢@msalters)。
纯C方法:
存储类型信息和指向必须根据类型信息(通常是枚举)解释的内存的空指针:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | enum type_t { integer, string, null }; typedef struct variable { type_t type; void * datum; } variable_t; void init_int_variable( variable_t * var, int value ) { var->type = integer; var->datum = malloc( sizeof(int) ); *((int)var->datum) = value; } void fini_variable( variable_t var ) // optionally by pointer { free( var.datum ); } |
在C++中,可以通过使用类来简化使用方法来改进这种方法,但更重要的是,您可以使用更复杂的解决方案,并将现有的库用作Booo::任何或Boosi::对同一问题提供不同解决方案的变体。
boost::any和boost::variant都将值存储在动态分配的内存中,通常通过指向层次结构中虚拟类的指针,并使用重新解释(向下强制转换)到具体类型的运算符。
一个明显的解决方案是定义类型层次结构:
1 2 3 4 5 6 7 8 9 10 11 | class Type { }; class Int : public Type { }; class String : public Type { }; |
等等。作为一个完整的例子,让我们为一种小型语言编写一个解释程序。该语言允许声明如下变量:
1 | var a 10 |
这将创建一个
1 | + a b |
以下是解释程序的完整代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | #include <iostream> #include <string> #include <vector> #include <sstream> #include <cstdlib> #include <map> // The base Type object from which all data types are derived. class Type { public: typedef std::vector<Type*> TypeVector; virtual ~Type () { } // Some functions that you may want all types of objects to support: // Returns the string representation of the object. virtual const std::string toString () const = 0; // Returns true if other_obj is the same as this. virtual bool equals (const Type &other_obj) = 0; // Invokes an operation on this object with the objects in args // as arguments. virtual Type* invoke (const std::string &opr, const TypeVector &args) = 0; }; // An implementation of Type to represent an integer. The C++ int is // used to actually store the value. As a consequence this type is // machine dependent, which might not be what you want for a real // high-level language. class Int : public Type { public: Int () : value_ (0), ret_ (NULL) { } Int (int v) : value_ (v), ret_ (NULL) { } Int (const std::string &v) : value_ (atoi (v.c_str ())), ret_ (NULL) { } virtual ~Int () { delete ret_; } virtual const std::string toString () const { std::ostringstream out; out << value_; return out.str (); } virtual bool equals (const Type &other_obj) { if (&other_obj == this) return true; try { const Int &i = dynamic_cast<const Int&> (other_obj); return value_ == i.value_; } catch (std::bad_cast ex) { return false; } } // As of now, Int supports only addition, represented by '+'. virtual Type* invoke (const std::string &opr, const TypeVector &args) { if (opr =="+") { return add (args); } return NULL; } private: Type* add (const TypeVector &args) { if (ret_ == NULL) ret_ = new Int; Int *i = dynamic_cast<Int*> (ret_); Int *arg = dynamic_cast<Int*> (args[0]); i->value_ = value_ + arg->value_; return ret_; } int value_; Type *ret_; }; // We use std::map as a symbol (or variable) table. typedef std::map<std::string, Type*> VarsTable; typedef std::vector<std::string> Tokens; // A simple tokenizer for our language. Takes a line and // tokenizes it based on whitespaces. static void tokenize (const std::string &line, Tokens &tokens) { std::istringstream in (line, std::istringstream::in); while (!in.eof ()) { std::string token; in >> token; tokens.push_back (token); } } // Maps varName to an Int object in the symbol table. To support // other Types, we need a more complex interpreter that actually infers // the type of object by looking at the format of value. static void setVar (const std::string &varName, const std::string &value, VarsTable &vars) { Type *t = new Int (value); vars[varName] = t; } // Returns a previously mapped value from the symbol table. static Type * getVar (const std::string &varName, const VarsTable &vars) { VarsTable::const_iterator iter = vars.find (varName); if (iter == vars.end ()) { std::cout <<"Variable" << varName <<" not found." << std::endl; return NULL; } return const_cast<Type*> (iter->second); } // Invokes opr on the object mapped to the name var01. // opr should represent a binary operation. var02 will // be pushed to the args vector. The string represenation of // the result is printed to the console. static void invoke (const std::string &opr, const std::string &var01, const std::string &var02, const VarsTable &vars) { Type::TypeVector args; Type *arg01 = getVar (var01, vars); if (arg01 == NULL) return; Type *arg02 = getVar (var02, vars); if (arg02 == NULL) return; args.push_back (arg02); Type *ret = NULL; if ((ret = arg01->invoke (opr, args)) != NULL) std::cout <<"=>" << ret->toString () << std::endl; else std::cout <<"Failed to invoke" << opr <<" on" << var01 << std::endl; } // A simple REPL for our language. Type 'quit' to exit // the loop. int main (int argc, char **argv) { VarsTable vars; std::string line; while (std::getline (std::cin, line)) { if (line =="quit") break; else { Tokens tokens; tokenize (line, tokens); if (tokens.size () != 3) { std::cout <<"Invalid expression." << std::endl; continue; } if (tokens[0] =="var") setVar (tokens[1], tokens[2], vars); else invoke (tokens[0], tokens[1], tokens[2], vars); } } return 0; } |
与解释器的示例交互:
1 2 3 4 5 6 7 8 9 | /home/me $ ./mylang var a 10 var b 20 + a b 30 + a c Variable c not found. quit |
C++是一种强类型语言。我可以看到你来自一种非打字语言,并且仍然用这些术语思考。
如果您真的需要在一个变量中存储几个类型,那么可以查看boost::any。
但是,如果您正在实现一个解释器,那么您应该使用继承和表示特定类型的类。
关于速度,你说:
It was extremely slow to pass this
class around as the string allocator
had to be called all the time.
你知道你绝大多数时候应该通过引用来传递物体吗?对于一个简单的解释器,您的解决方案看起来是可行的。
根据Vijay的解决方案,实施将是:
1 2 3 4 5 6 7 | Type* array; // to initialize the array array = new Type(size_of_array); // when you want to add values array[0] = new Int(42); // to add another string value array[1] = new String("fourty two"); |
他代码中缺少的一点是如何提取那些值…这是我的版本(实际上我是从食人魔那里学来的,并根据我的喜好修改了它)。
用法如下:
1 2 3 4 5 6 7 8 9 10 | Any array[4]; // Automatically understands it's an integer array[0] = Any(1); // But let's say you want the number to be thought of as float array[1] = Any<float>(2); // What about string? array[2] = Any<std::string>("fourty two"); // Note that this gets the compiler thinking it's a char* // instead of std::string array[3] = Any("Sometimes it just turns out to be what you don't want!"); |
好,现在来看一个特定元素是否是字符串:
1 2 3 4 5 6 7 8 9 10 11 | if(array[2].isType<std::string>() { // Extract the string value. std::string val = array[2].cast<std::string>(); // Make the string do your bidding!!!... /evilgrin // WAIT! But what if you want to directly manipulate // the value in the array? std::string& val1 = array[2].cast<std::string>(); // HOHOHO... now any changes to val1 affects the value // in the array ;) } |
下面给出了任意类的代码。您可以随意使用它:)。希望这有帮助!
在头文件中…说什么
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 | #include <typeinfo> #include <exception> /* * \class Any * \brief A variant type to hold any type of value. * \detail This class can be used to store values whose types are not * known before hand, like to store user-data. */ class Any { public: /*! * \brief Default constructor. */ Any(void); /*! * \brief Constructor that accepts a default user-defined value. * \detail This constructor copies that user-defined value into a * place holder. This constructor is explicit to avoid the compiler * to call this constructor implicitly when the user didn't want * the conversion to happen. * \param val const reference to the value to be stored. */ template <typename ValueType> explicit Any(const ValueType& val); /*! * \brief Copy constructor. * \param other The \c Any variable to be copied into this. */ Any(const Any& other); /*! * \brief Destructor, does nothing other than destroying the place holder. */ ~Any(void); /*! * \brief Gets the type of the value stored by this class. * \detail This function uses typeid operator to determine the type * of the value it stores. * emarks If the place holder is empty it will return Touchscape::VOID_TYPE. * It is wise to check if this is empty by using the function Any::isEmpty(). */ const std::type_info& getType() const; /*! * \brief Function to verify type of the stored value. * \detail This function can be used to verify the type of the stored value. * Usage: * \code * int i; * Touchscape::Any int_any(i); * // Later in your code... * if (int_any.isType<int>()) * { * // Do something with int_any. * } * \endcode * eturn \c true if the type matches, false otherwise. */ template <typename T> bool isType() const; /*! * \brief Checks if the type stored can be converted 'dynamically' * to the requested type. * \detail This would useful when the type stored is a base class * and you would like to verify if it can be converted to type * the user wants. * Example: * \code * class Base * { * // class implementation. * }; * class Derived : public Base * { * // class implementation. * }; * * // In your implementation function. * { * //... * // Somewhere in your code. * Base* a = new Derived(); * Touchscape::Any user_data(a); * my_object.setUserData(user_data); * // Then when you need to know the user-data type * if(my_object.getUserData().isDynamicType<Derived>()) * { * // Do something with the user data * } * } * \endcode * eturn \c true if the value stored can be dynamically casted to the target type. * \deprecated This function will be removed and/or changed in the future. */ template <typename T> bool isDynamicType() const; /*! * \brief Convert the value stored to the required type. * \detail This function is used just like a static-cast to retrieve * the stored value. * eturn A reference to the stored value. * \warning This function will throw std::bad_cast exception if it * finds the target type to be incorrect. */ template <typename T> T& cast(); /*! * \brief Convert the value stored to the required type (const version). * \detail This function is used just like static_cast to retrieve * the stored value. * eturn A \c const reference to the stored value. * \warning This function will throw std::bad_cast exception if it * finds the target type to be incorrect. */ template <typename T> const T& cast() const; /*! * \brief Dynamically converts the stored value to the target type * \detail This function is just like dynamic_cast to retrieve * the stored value to the target type. * eturn A reference to the stored value. * \warning This function will throw std::bad_cast exception if it * finds that the value cannot be dynamically converted to the target type. * \deprecated This function will be removed and/or changed in the future. */ template <typename T> T& dynamicCast(); /*! * \brief Dynamically converts the stored value to the target type (const version) * \detail This function is just like dynamic_cast to retrieve * the stored value to the target type. * eturn A const reference to the stored value. * \warning This function will throw std::bad_cast exception if it * finds that the value cannot be dynamically converted to the target type. * \deprecated This function will be removed and/or changed in the future. */ template <typename T> const T& dynamicCast() const; /*! * \brief Swaps the contents with another \c Any variable. * eturn reference to this instance. */ Any& swap(Any& other); /*! * \brief Checks if the place holder is empty. * eturn \c true if the the place holder is empty, \c false otherwise. */ bool isEmpty() const; /*! * \brief Checks if the place holder is \b not empty. * eturn \c true if the the place holder is not empty, \c false otherwise. * emarks This is just a lazy programmer's attempt to make the code look elegant. */ bool isNotEmpty() const; /*! * \brief Assignment operator * \detail Assigns a 'raw' value to this instance. * eturn Reference to this instance after assignment. */ template <typename ValueType> Any& operator = (const ValueType& rhs); /*! * \brief Default assignment operator * \detail Assigns another \c Any type to this one. * eturn Reference to this instance after assignment. */ Any& operator = (const Any& rhs); /*! * \brief Boolean equality operator */ bool operator == (const Any& other) const; /*! * \brief Boolean equality operator that accepts a 'raw' type. */ template<typename ValueType> bool operator == (const ValueType& other) const; /*! * \brief Boolean inequality operator */ bool operator != (const Any& other) const; /*! * \brief Boolean inequality operator that accepts a 'raw' type. */ template<typename ValueType> bool operator != (const ValueType& other) const; protected: /*! * \class PlaceHolder * \brief The place holder base class * \detail The base class for the actual 'type'd class that stores * the value for T ouchscape::Any. */ class PlaceHolder { public: /*! * \brief Virtual destructor. */ virtual ~PlaceHolder(){} /*! * \brief Gets the \c type_info of the value stored. * eturn (const std::type_info&) The typeid of the value stored. */ virtual const std::type_info& getType() const = 0; /*! * \brief Clones this instance. * eturn (PlaceHolder*) Cloned instance. */ virtual PlaceHolder* clone() const = 0; }; /*! * \class PlaceHolderImpl * \brief The class that ultimately keeps hold of the value stored * in Touchscape::Any. */ template <typename ValueType> class PlaceHolderImpl : public PlaceHolder { public: /*! * \brief The only constructor allowed. * \param val The value to store. */ PlaceHolderImpl(const ValueType& val) :m_value(val){} /*! * \brief The destructor. * \detail Does nothing */ ~PlaceHolderImpl(){} /*! * \copydoc Touchscape::PlaceHolder::getType() */ const std::type_info& getType() const { return typeid(ValueType); } /*! * \copydoc Touchscape::PlaceHolder::clone() */ PlaceHolder* clone() const { return new PlaceHolderImpl<ValueType>(m_value); } ValueType m_value; }; PlaceHolder* m_content; }; /************************************************************************/ /* Template code implementation section */ /************************************************************************/ template <typename ValueType> Any::Any(const ValueType& val) :m_content(new PlaceHolderImpl<ValueType>(val)) { } //--------------------------------------------------------------------- template <typename T> bool Any::isType() const { bool result = m_content?m_content->getType() == typeid(T):false; return result; } //--------------------------------------------------------------------- template <typename T> bool Any::isDynamicType() const { bool result = m_content ?dynamic_cast<T>(static_cast<PlaceHolderImpl<T>*>(m_content)->m_value)!=NULL :false; return result; } //--------------------------------------------------------------------- template <typename T> T& Any::cast() { if (getType() != VOID_TYPE && isType<T>()) { T& result = static_cast<PlaceHolderImpl<T>*>(m_content)->m_value; return result; } StringStream ss; ss<<"Cannot convert '"<<getType().name()<<"' to '"<<typeid(T).name()<<"'. Did you mean to use dynamicCast() to cast to a different type?"; throw std::bad_cast(ss.str().c_str()); } //--------------------------------------------------------------------- template <typename T> const T& Any::cast() const { Any& _this = const_cast<Any&>(*this); return _this.cast<T>(); } //--------------------------------------------------------------------- template <typename T> T& Any::dynamicCast() { T* result = dynamic_cast<T>(static_cast<PlaceHolderImpl<T>*>(m_content)->m_value); if (result == NULL) { StringStream ss; ss<<"Cannot convert '"<<getType().name()<<"' to '"<<typeid(T)<<"'."; throw std::bad_cast(ss.str().c_str()); } return *result; } //--------------------------------------------------------------------- template <typename T> const T& Any::dynamicCast() const { Any& _this = const_cast<Any&>(*this); return _this.dynamicCast<T>(); } //--------------------------------------------------------------------- template <typename ValueType> Any& Any::operator = (const ValueType& rhs) { Any(rhs).swap(*this); return *this; } //--------------------------------------------------------------------- template <typename ValueType> bool Any::operator == (const ValueType& rhs) const { bool result = m_content == rhs; return result; } //--------------------------------------------------------------------- template <typename ValueType> bool Any::operator != (const ValueType& rhs) const { bool result = m_content != rhs; return result; } |
现在在cpp文件中…安普普
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | #include"Any.h" static const std::type_info& VOID_TYPE(typeid(void)); Any::Any( void ) :m_content(NULL) { } //--------------------------------------------------------------------- Any::Any( const Any& other ) :m_content(other.m_content?other.m_content->clone():NULL) { } //--------------------------------------------------------------------- Any::~Any( void ) { SafeDelete(m_content); } //--------------------------------------------------------------------- const std::type_info& Any::getType() const { return m_content?m_content->getType():VOID_TYPE; } //--------------------------------------------------------------------- Any& Any::swap( Any& other ) { std::swap(m_content, other.m_content); return *this; } //--------------------------------------------------------------------- Any& Any::operator=( const Any& rhs ) { Any(rhs).swap(*this); return *this; } //--------------------------------------------------------------------- bool Any::isEmpty() const { bool is_empty = m_content == NULL; return is_empty; } //--------------------------------------------------------------------- bool Any::isNotEmpty() const { bool is_not_empty = m_content != NULL; return is_not_empty; } //--------------------------------------------------------------------- bool Any::operator==( const Any& other ) const { bool result = m_content == other.m_content; return result; } //--------------------------------------------------------------------- bool Any::operator!=( const Any& other ) const { bool result = m_content != other.m_content; return result; } |