diff --git a/Zim/.zim/index.db b/Zim/.zim/index.db index d90a84d..a42c5af 100644 Binary files a/Zim/.zim/index.db and b/Zim/.zim/index.db differ diff --git a/Zim/.zim/state.conf b/Zim/.zim/state.conf index 7109f43..cb38217 100644 --- a/Zim/.zim/state.conf +++ b/Zim/.zim/state.conf @@ -1,7 +1,7 @@ [History] -list=[["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:xmodmap",4502,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:wiki xmodmap",187,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:xmodmap",145,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:keymaps",13256,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:wiki KEYMAPS",1701,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:wiki xmodmap",187,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:xmodmap",145,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:wiki xmodmap",877,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:keymaps",13256,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:xmodmap",145,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:wiki xmodmap",944,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:xmodmap",125,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:wiki KEYMAPS",2508,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:xmodmap",637,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:wiki xmodmap",914,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:keymaps",13256,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:wiki xmodmap",914,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:keymaps",13256,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:wiki xmodmap",914,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:keymaps",13256,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:wiki xmodmap",983,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:keymaps",13256,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:wiki xmodmap",2995,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:keymaps:personal.map",178,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:xmodmap",4928,null]] +list=[["Utils:gdb:gdb debugging",292,null],["Utils:gdb:gdb debugging:gdb pointer",284,null],["Utils:gdb:gdb debugging:gdb demo",340,null],["Research:Error Notes:\u4e0b\u8f7d\u9519\u8bef:chosen node create failed",0,null],["Programme:goagent",43,null],["Linux:accounts",0,null],["Utils:autoconf---automake",0,null],["Utils:autoconf---automake:\u4ee3\u7801",0,null],["Utils:blkid",0,null],["Utils:autoconf---automake",0,null],["Utils:gcc&g++",0,null],["Utils:gcc&g++:C++\u7f16\u8bd1\u521d\u6b65",0,null],["Utils:gdb",0,null],["Utils:gcc&g++:Installing---GCC--Configuration",0,null],["Utils:make",0,null],["Utils:\u5de5\u5177\u94fe",0,null],["Utils:\u5de5\u5177\u94fe:autoconf---automake",0,null],["Utils:\u5de5\u5177\u94fe:gcc&g++",0,null],["Utils:\u5de5\u5177\u94fe:gcc&g++:C++\u7f16\u8bd1\u521d\u6b65",0,null],["Utils:\u5de5\u5177\u94fe:autotut-Using GNU autoconf-automake-autoheader",0,null],["Utils:\u5de5\u5177\u94fe:automake\u53d8\u91cf",0,null],["Utils:irssi",0,null],["Utils:make",0,null],["Utils:\u5de5\u5177\u94fe:make",0,null],["Utils:script",null,null]] current=24 -recent=[["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04",43,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:\u66f4\u6539\u952e\u76d8\u5e03\u5c40(vitual terminal\u548cX\u7a0b\u5e8f)",0,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:keymaps:dumpkeys",75,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:keymaps:dumpkeys -l",14500,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:wiki KEYMAPS",2508,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:keymaps",13256,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:wiki xmodmap",2995,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:keymaps:personal.map",178,null],["Utils:\u952e\u76d8\u5e03\u5c40\u548c\u6309\u952e\u6620\u5c04:xmodmap",4928,null]] +recent=[["Utils:gdb",0,null],["Utils:\u5de5\u5177\u94fe",0,null],["Utils:\u5de5\u5177\u94fe:autoconf---automake",0,null],["Utils:\u5de5\u5177\u94fe:gcc&g++",0,null],["Utils:\u5de5\u5177\u94fe:gcc&g++:C++\u7f16\u8bd1\u521d\u6b65",0,null],["Utils:\u5de5\u5177\u94fe:autotut-Using GNU autoconf-automake-autoheader",0,null],["Utils:\u5de5\u5177\u94fe:automake\u53d8\u91cf",0,null],["Utils:irssi",0,null],["Utils:\u5de5\u5177\u94fe:make",0,null],["Utils:script",null,null]] [MainWindow] windowsize=[1278,779] @@ -20,8 +20,8 @@ windowpos=[0,19] toolbar_style=None toolbar_size=tiny active_tabs=["Index",null,null,"Attachments"] -toggle_panes=[] -left_pane=[true,222,"Index"] +toggle_panes=["left_pane"] +left_pane=[true,276,"Index"] right_pane=[false,200,null] top_pane=[false,200,null] bottom_pane=[false,200,null] @@ -50,7 +50,7 @@ calendar_expanded=False [InsertImageDialog] windowsize=[1280,749] attach_inserted_images=False -last_image_folder=/home/geekard/Notes/Zim/Research/\u5d4c\u5165\u5f0fPowerPC\u4e0a\u7684\u6241\u5e73\u8bbe\u5907\u6811FDT +last_image_folder=/home/geekard/Notes/Zim/Utils/gdb/gdb_debugging [InsertLinkDialog] windowsize=[328,156] @@ -70,7 +70,7 @@ windowsize=[637,165] windowsize=[500,400] [PreferencesDialog] -windowsize=[537,422] +windowsize=[592,422] [AttachmentBrowserPlugin] active=True @@ -125,5 +125,5 @@ output_file=/home/geekard/\u4e3a\u4ec0\u4e48\u8981\u7528\u975e\u5173\u7cfb\u6570 windowsize=[411,157] [FindAndReplaceDialog] -windowsize=[330,298] +windowsize=[332,298] diff --git a/Zim/.zim/state.conf.zim-new~ b/Zim/.zim/state.conf.zim-new~ new file mode 100644 index 0000000..e2682b8 --- /dev/null +++ b/Zim/.zim/state.conf.zim-new~ @@ -0,0 +1,129 @@ +[History] +list=[["Utils:gdb:gdb debugging:gdb demo",340,null],["Research:Error Notes:\u4e0b\u8f7d\u9519\u8bef:chosen node create failed",0,null],["Programme:goagent",43,null],["Linux:accounts",0,null],["Utils:autoconf---automake",0,null],["Utils:autoconf---automake:\u4ee3\u7801",0,null],["Utils:blkid",0,null],["Utils:autoconf---automake",0,null],["Utils:gcc&g++",0,null],["Utils:gcc&g++:C++\u7f16\u8bd1\u521d\u6b65",0,null],["Utils:gdb",0,null],["Utils:gcc&g++:Installing---GCC--Configuration",0,null],["Utils:make",0,null],["Utils:\u5de5\u5177\u94fe",0,null],["Utils:\u5de5\u5177\u94fe:autoconf---automake",0,null],["Utils:\u5de5\u5177\u94fe:gcc&g++",0,null],["Utils:\u5de5\u5177\u94fe:gcc&g++:C++\u7f16\u8bd1\u521d\u6b65",0,null],["Utils:\u5de5\u5177\u94fe:autotut-Using GNU autoconf-automake-autoheader",0,null],["Utils:\u5de5\u5177\u94fe:automake\u53d8\u91cf",0,null],["Utils:irssi",0,null],["Utils:make",0,null],["Utils:\u5de5\u5177\u94fe:make",0,null],["Utils:script",0,null],["Utils:\u5de5\u5177\u94fe",0,null],["Utils:\u5de5\u5177\u94fe:autoconf---automake",null,null]] +current=24 +recent=[["Utils:gdb",0,null],["Utils:\u5de5\u5177\u94fe:gcc&g++",0,null],["Utils:\u5de5\u5177\u94fe:gcc&g++:C++\u7f16\u8bd1\u521d\u6b65",0,null],["Utils:\u5de5\u5177\u94fe:autotut-Using GNU autoconf-automake-autoheader",0,null],["Utils:\u5de5\u5177\u94fe:automake\u53d8\u91cf",0,null],["Utils:irssi",0,null],["Utils:\u5de5\u5177\u94fe:make",0,null],["Utils:script",0,null],["Utils:\u5de5\u5177\u94fe",0,null],["Utils:\u5de5\u5177\u94fe:autoconf---automake",null,null]] + +[MainWindow] +windowsize=[1278,779] +show_sidepane=True +sidepane_pos=316 +show_menubar=True +show_menubar_fullscreen=True +show_toolbar=True +show_toolbar_fullscreen=False +show_statusbar=True +show_statusbar_fullscreen=False +pathbar_type=recent +pathbar_type_fullscreen=none +readonly=False +windowpos=[0,19] +toolbar_style=None +toolbar_size=tiny +active_tabs=["Index",null,null,"Attachments"] +toggle_panes=["left_pane"] +left_pane=[true,276,"Index"] +right_pane=[false,200,null] +top_pane=[false,200,null] +bottom_pane=[false,200,null] + +[ImportPageDialog] +windowsize=[500,400] + +[NewPageDialog] +windowsize=[362,170] + +[RenamePageDialog] +windowsize=[643,204] + +[DeletePageDialog] +windowsize=[633,438] + +[PropertiesDialog] +windowsize=[395,299] + +[InsertDateDialog] +windowsize=[319,247] +lastusedformat=%A %d/%m/%Y +linkdate=True +calendar_expanded=False + +[InsertImageDialog] +windowsize=[1280,749] +attach_inserted_images=False +last_image_folder=/home/geekard/Notes/Zim/Utils/gdb/gdb_debugging + +[InsertLinkDialog] +windowsize=[328,156] + +[EditImageDialog] +windowsize=[339,268] + +[AttachFileDialog] +windowsize=[500,400] +last_attachment_folder=/home/geekard/Notes/Zim/Research/Error_Notes/\u7f16\u8bd1\u9519\u8bef/crdb +insert_attached_images=False + +[PromptExistingFileDialog] +windowsize=[637,165] + +[InsertTextFromFileDialog] +windowsize=[500,400] + +[PreferencesDialog] +windowsize=[592,422] + +[AttachmentBrowserPlugin] +active=True +bottompane_pos=516 + +[TaskListDialog] +windowsize=[550,400] +hpane_pos=75 + +[InsertSymbolDialog] +windowsize=[350,400] + +[InsertScreenshotDialog] +windowsize=[212,148] + +[WordCountDialog] +windowsize=[316,146] + +[CustomToolManagerDialog] +windowsize=[410,299] + +[OpenPageDialog] +windowsize=[298,118] + +[NotebookDialog] +windowsize=[500,400] + +[PageWindow] +windowsize=[500,400] + +[CalendarDialog] +windowsize=[222,258] + +[TagsPlugin] +treeview=tagged +tagcloud_sorting=score + +[ExportDialog] +windowsize=[400,325] +document_root_url= +selection=page +selected_page=NonSQL:\u4e3a\u4ec0\u4e48\u8981\u7528\u975e\u5173\u7cfb\u6570\u636e\u5e93\uff1f +format=HTML +template=Default +template_file=None +document_root=absolute +output_folder=None +index_page= +output_file=/home/geekard/\u4e3a\u4ec0\u4e48\u8981\u7528\u975e\u5173\u7cfb\u6570\u636e\u5e93\uff1f.html + +[MovePageDialog] +windowsize=[411,157] + +[FindAndReplaceDialog] +windowsize=[332,298] + diff --git a/Zim/Linux/accounts.txt b/Zim/Linux/accounts.txt new file mode 100644 index 0000000..d203891 --- /dev/null +++ b/Zim/Linux/accounts.txt @@ -0,0 +1,11 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2011-12-02T14:03:59+08:00 + +====== accounts ====== +Created Friday 02 December 2011 + +kb310小熊 +ip: 192.168.1.113 +port: 222 +username: geekard diff --git a/Zim/Linux/kb318.txt b/Zim/Linux/kb318.txt deleted file mode 100644 index 8510390..0000000 --- a/Zim/Linux/kb318.txt +++ /dev/null @@ -1,14 +0,0 @@ -Content-Type: text/x-zim-wiki -Wiki-Format: zim 0.4 -Creation-Date: 2011-12-02T14:03:59+08:00 - -====== kb318 ====== -Created Friday 02 December 2011 - -常利伟帐号: - ssh -p 1998 clw@192.168.1.1 -密码:000000 - -ssh 192.168.1.85 -root: 000000 -geekard:ho4o44 diff --git a/Zim/Linux/黑客辞典.txt b/Zim/Linux/黑客辞典.txt index 8c7382b..d3711ee 100644 --- a/Zim/Linux/黑客辞典.txt +++ b/Zim/Linux/黑客辞典.txt @@ -8,3 +8,7 @@ Created Friday 02 November 2012 grub: gfx = graphics, 两者的读音差不多。 VESA = Video Electronics Standards Association VBE = VESA BIOS Extensions +evdev = event device: A componet of the linux kernel for handling input(from keyboards, mice, joysticks, etc.) + and a closely related input driver fo the X.Org server.The kernel componet is glue-code which translates input + events from peripheral-specific drivers into a generic structure which the input driver can easily translate into X11 + events. diff --git a/Zim/Programme/elf/Introduction_to_PIC.txt b/Zim/Programme/elf/Introduction_to_PIC.txt new file mode 100644 index 0000000..d8d8031 --- /dev/null +++ b/Zim/Programme/elf/Introduction_to_PIC.txt @@ -0,0 +1,38 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-23T16:02:09+08:00 + +====== Introduction to PIC ====== +Created Sunday 23 December 2012 +http://www.gentoo.org/proj/en/hardened/pic-guide.xml + +PIC code radically differs from conventional code in the way it calls functions and operates on data variables. +It will access these functions and data __through an indirection table__, the "Global Offset Table" (GOT), by software convention accessible using the reserved name "**_GLOBAL_OFFSET_TABLE_**". + +The exact mechanism used for this is hardware architecture dependent, but usually __a special machine register__ is reserved for setting up the location of the GOT when entering a function. + +The rationale behind this indirect addressing is to generate code that can be __independently accessed__ of the actual load address. 例如共享库的目标代码在内存中只加载一次,但是可以映射到多个进程中。 + +In a true PIC library **without** relocations in the __text segment__, only the symbols exported in the __"Global Offset Table"__ need updating at run-time depending on the current load address of the various shared libraries in the address space of the running process. 使用PIC技术的共享库,在动态链接(映射)到某个进程的地址空间中时,其text section不需要重定位(更改),只需要对 +GOT表中的符号进行重定位即可。而GOT位于.data section中。 + +Likewise, procedure calls to globally defined functions are redirected through the __"Procedure Linkage Table" (PLT)__ residing in the data segment of the core image. Again, this is done to avoid run-time modifications to the text segment. +其实PLT位于.text section之中,是只读和可执行的,它会使用GOT中的函数符号条目。 + +The __linker-editor__ allocates the Global Offset Table and Procedure Linkage Table when combining PIC object files into an image suitable for mapping into the process address space. It also collects all symbols that may be needed by the run-time link-editor and stores these along with the image's text and data bits. Another reserved symbol, **_DYNAMIC** is used to indicate the presence of the run-time linker structures. Whenever _DYNAMIC is relocated to 0, there is no need to invoke the run-time link- editor. If this symbol is non-zero, it points at a data structure from which the location of the necessary relocation- and symbol information can be derived. This is most notably used by the start-up module, **crt0, crt1S** and more recently **Scrt1**. The _DYNAMIC structure is conventionally located at the start of the data segment of the image to which it pertains. + +On most architectures, when you compile source code to object code, you __need to specify__ whether the object code should be position independent or not. There are occasional architectures which don't make the distinction, usually because all object code is position independent by virtue of the __Application Binary Interface (ABI),__ or less often because the load address of the object is fixed at compile time, which implies that shared libraries are not supported by such a platform. If an object is compiled as position independent code (PIC), then the operating system can load the object __at any address__ in preparation for execution. This involves a time overhead, in replacing direct address references with relative addresses at compile time, and a space overhead, in maintaining information to help the runtime loader fill in the unresolved addresses at runtime. + +Consequently, PIC objects are usually slightly larger and slower at runtime than the equivalent non-PIC object. The advantage of sharing library code on disk and in memory outweigh these problems as soon as the PIC object code in shared libraries is reused. + +PIC compilation is exactly what is required for objects which will become __part of__ a shared library. Consequently, __libtool__ builds PIC objects for use in shared libraries and non-PIC objects for use in static libraries. Whenever libtool instructs the compiler to generate a PIC object, it also defines the preprocessor symbol, `PIC', so that assembly code can be aware of whether it will reside in a PIC object or not. + +Typically, as libtool is compiling sources, it will generate a `.lo' object, as PIC, and a `.o' object, as non-PIC, and then it will use the appropriate one of the pair when linking executables and libraries of various sorts. On architectures where there is no distinction, the `.lo' file is just a soft link to the `.o' file. + +In practice, you can link PIC objects into a static archive for a small overhead in execution and load speed, and often you can similarly link non-PIC objects into shared archives. + +When you use position-independent code, relocatable references are generated as an indirection that use data in the shared object's data segment. The text segment code remains read-only, and all relocation updates are applied to corresponding entries within the data segment. + +If a shared object is built from code that is not position-independent, the text segment will usually require a large number of relocations to be performed at runtime. Although the runtime linker is equipped to handle this, the system overhead this creates can cause serious performance degradation. + +You can identify a shared object that requires relocations against its text segment using tools such as 'readelf -d foo' and inspect the output for any TEXTREL entry. The value of the TEXTREL entry is irrelevant. Its presence in a shared object indicates that text relocations exist. diff --git a/Zim/Programme/elf/elf_重定位.txt b/Zim/Programme/elf/elf_重定位.txt new file mode 100644 index 0000000..2991bc2 --- /dev/null +++ b/Zim/Programme/elf/elf_重定位.txt @@ -0,0 +1,213 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-21T20:33:28+08:00 + +====== elf 重定位 ====== +Created Friday 21 December 2012 + +Relocation is the process of __associate the symbolic reference with symbolic definition.__ For example, when a program calls a function, the associate all instruction must transfor control to the **proper destination address.** In other words, relocatable files must have information for modifying their section content. + +Relocation table entry structer: +{{./0.gif}} + +* **r_offset:** Holds the location at which the relocation apply. For a relocable file, the value is the byte offset from the beginning of the section to the storage unit affected by relocation. For an executable file or a share object file, the value is the virtual address of units affected by relocation. +* **r_info:** Holds both the __symbol table index__ with respect to which the relocation must be made, and __the type of relocation__. For example, a call instrution's relocation enry would hold the symbol index of the function. Relocation types are processor-sepcific. The following code shows how to manipulate the values. + #define ELF32_R_SYM(info) ((info)>>8) + #define ELF32_R_TYPE(info) ((info)&ff) + #define ELF32_R_INFO(s,t) (((s)<<8) + ((t)&0xff)) + symbol: bits 32–8 + type: bits 7–0 +**r_addend:** Holds a constand addend used to compute the value to be stored into the relocable field. + +===== Relocation Types:(SYSTEM V Architecture) ===== +The __link editor__ merge one or more relocable object files to form the output. It first disides how to combine and locate the input files then update the symbol values, and finally preform the relocation. Relocations applied to excutable or shared object files are similar. + +link editor(ld)首先合并可重定位目标文件,然后解析其中的符号引用,并将符号的最终实际地址写入到符号表中,最后重定位。 + +The relocation types specific which bits to change and how to caculate their values(下表真对的是x86而非x86_64). +{{./1.gif}} + +**R_386_32:** Symbol’s value + addend. In the following Fig, thre is a relocation at the **0×7 bytes** offset into **.text** section. The linker alter the address of ‘b’ with S+A, S is symbold b’s new address after reset. A is the endian, here it is zero. +**R_386_32是绝对寻址的重定位。将符号解析后的绝对实际地址填充到关联section的offset处。** +{{./2.gif}} +**R_386_PC32:** Symbol’s **value+Addend-Place**. Because it is __Relative Near CALL__, the operand is the offset from the “next instruction” (EIP) to the called procedure, more infor is here. **VALUE+EIP = Symbol.value, EIP = Place+4. So VALUE = Symbold.value – 4 – Place**. S is ‘Symbol.vale’, ‘-4′ is the Addend. P is the new virtuall address of relocation entry computed by r_offset and other factors. +R_386_PC32是相对寻址的重定位。S是符号表中符号解析后的实际地址,Place是调用该符号的指令地址,所以相对偏移量为 **VALUE = Symbold.value – 4 – Place** + +**R_386_GLOB_DAT:** This type is used to __set a global offset table entry__ to the address of the specific symbol. It is used for global or external variable in PIC code . 将解析后的全局或外部符号的实际地址写入到对应的__GOT条目中__。 + +**R_386_JMP_SLOT:** The linker editor creates this relocation type for dynamic linking. Its offset specify __the GOT entry that contain a PLT entry__. The dynamic linker use it to implement lazy linking.将解析后的外部函数实际入口地址写入到对应的GOT中的PLT条目类型中。 + +R_386_GLOB_DAT and R_386_JMP_SLOT are only appear in executable file or shared library. + +__上面两种类型的重定位是由动态链接器解析符号后完成的,与代码里是否引用该符号无关(因为代码是间接地利用GOT,PLT来引用外部变量和符号的)。而且是对GOT中的符号value进行填充,与代码段无关。__ + +**R_386_GOTOFF:**引用本文件内使用的static和rodata类型变量时使用的重定位类型。外部static变量和函数内static变量定义在.data section中,对它们的引用不通过GOT条目,而是其符号位置与GOT首地址的偏移量来实现的(同理,字符串字面量由于不能修改,一般保存在.rodata section中,对它们的引用也不是通过GOT条目),即重定位值= S+A-GOT。示例如下: + //ebx事先保存的是GOT的首地址 + movl __globalVarStatic@GOTOFF__(%ebx), %eax __//globalVarStatic@GOTOFF的值为S+A-GOT,再加上GOT的正好为符号globalVarStatic的地址。__ + movl (%eax), 4(%esp) + +**R_386_GOT32:**代码中引用外部变量时,ld生成的重定位类型。动态链接器将G+A-P的值填充到代码中的重定位位置。所以CPU实际寻址时得到的地址为R_386_GOT32+P-A = G。 +**R_386_PLT32:**代码中引用外部函数时,ld生成的重定位类型。动态链接器将L+A-P的值填充到代码中的重定位位置。所以CPU实际寻址时得到的地址为R_386_PLT32+P-A = L。 + +上面的G和L指的是__相应符号GOT条目距GOT首地址的偏移量。注意GOT32和PLT32一般和GOTPC一起使用,后者将GOT的首地址填充到代码段中的引用位置处。G+GOTPC=相应符号在GOT条目中的实际地址。__ + +在代码中引用外部变量时,汇编器一般生成如下代码(x86系统,如果是x86_64则直接具有**rip寄存器**): + call __i686.get_pc_thunk.cx + addl $_GLOBAL_OFFSET_TABLE_, %ecx //_GLOBAL_OFFSET_TABLE符号的值是__GOT表首地址距当前指令的偏移量__,它的重定位类型为R_386_GOTPC。现在ecx寄存器保存的是__GOT表的绝对地址__。 + movl var@GOT(%ecx), %eax //var@GOT是__var符号在GOT表中的偏移量__,所以var@GOT(%ecx)会通过GOT中的var条目取得__var符号的实际地址,并将其保存在eax寄存器中__。var@GOT的值是通过R_386_GOT32重定位的。 + movl (%eax), %eax //取得var符号引用的内存单元值,保存到eax寄存器中。 +__i686.get_pc_thunk.cx: //该函数的目的是获取EIP的值。 + mov (%esp),%ecx //此时的esp指向的内存单元保存的值时__函数返回后执行的指令地址,即紧接着call的addl指令地址__。 + ret + +**R_386_GOTPC:** This type asembles R_386_PC32, except it use __the address of GOT__ in its caculation. The symbol referenced in this relocation normally is **_GLOBAL_OFFSET_TABLE_(见上面的代码示例)** , which additionally instructs linker to build the GOT. It normally used in PIC’s relocable files. See “ELF PIC Dessection“. + +Sample: + d.c + int var = 10; + void fun (void){ + var++; + int a = var; + } + #gcc -S -o __d.s__ -fPIC d.c //生成汇编代码,这样其中包含有__编译器生成的指导汇编器生成重定位条目的具体信息__。而通过objdump -d d.o看到的反汇编代码已经去掉了这些重定位信息。 + #gcc -c -o d.o -fPIC d.c +In d.s, d.c assembled with PIC option, there are instructions to load the GOT address, shown in the following figure. +{{./3.gif}} + +There will be a R_386_GOTPC relocation entry in d.o for __update the value of “$_GLOBAL_OFFSET_TABLE”__ to the offset from “addl” to “GOT”(addl指令的地址与GOT表首地址的差值即为_GLOBAL_OFFSET_TABLE符号的值。__该值的计算方法是由重定位类型决定的__), see the following figure(objdump -d的反汇编代码已经看不出原始的重定位信息). The relocation entry is at 0xd bytes offset from .text section, $_GLOBAL_OFFSET_TABLE resides there. The item’s initial value is ’0×2′. It is the endian A for caculating the address of ‘addl’. During the relocation, the linker caculate the relocation entry’s P (position) by r_offset first. Thus P-2 is the address ‘addl’. why -2? because the opcode of ‘addl’ is 2 bytes long. So $_GLOBAL_OFFSET_TABLE = GOT-P+A. +{{./4.gif}} + +**R_386_COPY:** The link editor creates this relocation type for dynamic linking. Its offset member refers to a location in a writable segement. The symbol table index specifies a symbol that should exists __both__ in the current object file and in a shared object. During execution, the dynamic linker __copies the data__ associated with the shared object’s symbol to location specified by the offset. + +Sample: + + [root@www save]# cat 386copy.c + #include + extern int a; + int main(void) { + printf(“%d\n”, a); + } + [root@www save]# cat b.c + int a = 10; + + #gcc -fPIC -share -o b.so b.c + #gcc -o 386copy 386copy.c ./b.so + +{{./5.gif}} +Fig2 shows the variable a’s value from shared object to executable’s .bss section. + +===== Notation: ===== +S : The value of the symbol whose index resides in the relocation entry’s r_info. +A: The addend used to caculate the value of the relocation field. +P: The place, section offset or address, of the storage unit __being relocated__ (computed useing r_offset).__也就是计算后的值所替换的位置。__ +G: The __offset__ into the global offset table at which the address of relocation entry’s symbol will reside during execution. +GOT: The address of the global offset table. +L: The place, section offset or address, of PLT entry for a symbol. +B: The __base address__ at which a shared object file has been loaded into the memory during execution. + + +===== Relocation section: ===== +A relocation section(**而不是重定位条目的属性**) reference other two sections: __a symbol table and a section to modify__. The section header’s sh_info and sh_link, specify these relationships. sh_link is the symbol table index, sh_info is the section link. + +Samples Code: + + #include + char a = ‘a’; + int b = 10; + extern char c; + extern void fun(); + void pp (void) { } + + int main(void) { + printf(“%d\n”, __b__); + int bb = __b__; + char cc = __c__; //b和c都是绝对寻址 + **pp**(); //相对寻址 + fun(); + } + +# gcc -c -o test1 test.c //生成的是可重定位的目标对象文件,__没有使用GOT和PLT__,所以和它们相关的重定位类型都没有使用。一般只使用了__R_386_PC32和R_386_32__两种类型。 + [geekard@geekard rel]$ readelf -r rel.o + + Relocation section '.rel.text' at offset 0x478 contains 7 entries: + Offset Info Type Sym.Value Sym. Name + 0000000f 00000a01 __R_386_32__ 00000004 b + 0000001a 00000501 R_386_32 00000000 .rodata + 0000001f 00000d02 R_386_PC32 00000000 printf + 00000024 00000a01 R_386_32 00000004 b + 0000002f 00000e01 R_386_32 00000000 c + 00000038 00000b02 R_386_PC32 00000000 pp + 0000003d 00000f02 __R_386_PC32__ 00000000 fun + + Relocation section '.rel.eh_frame' at offset 0x4b0 contains 2 entries: + Offset Info Type Sym.Value Sym. Name + 00000020 00000202 R_386_PC32 00000000 .text + 00000040 00000202 R_386_PC32 00000000 .text + +没有使用PIC技术时,对目标对象文件中全局变量符号引用地址的重定位是直接用实际地址替换(**R_386_32**),对__内部和外部函数__的调用是相对调转(**R_386_PC32**)。 + [geekard@geekard rel]$ readelf -s rel.o + + Symbol table '.symtab' contains 16 entries: + Num: Value Size Type Bind Vis Ndx Name + 0: 00000000 0 NOTYPE LOCAL DEFAULT UND + 1: 00000000 0 FILE LOCAL DEFAULT ABS rel.c + 2: 00000000 0 SECTION LOCAL DEFAULT 1 + 3: 00000000 0 SECTION LOCAL DEFAULT 3 + 4: 00000000 0 SECTION LOCAL DEFAULT 4 + 5: 00000000 0 SECTION LOCAL DEFAULT 5 + 6: 00000000 0 SECTION LOCAL DEFAULT 7 + 7: 00000000 0 SECTION LOCAL DEFAULT 8 + 8: 00000000 0 SECTION LOCAL DEFAULT 6 + **9: 00000000 1 OBJECT GLOBAL DEFAULT 3 a** + ** 10: 00000004 4 OBJECT GLOBAL DEFAULT 3 b** + ** 11: 00000000 5 FUNC GLOBAL DEFAULT 1 pp** + ** 12: 00000005 62 FUNC GLOBAL DEFAULT 1 main** + ** 13: 00000000 0 NOTYPE GLOBAL DEFAULT UND printf** + ** 14: 00000000 0 NOTYPE GLOBAL DEFAULT UND c** + ** 15: 00000000 0 NOTYPE GLOBAL DEFAULT UND fun** + [geekard@geekard rel]$ +[geekard@geekard rel]$ gcc -c __-fPIC__ -o rel.o rel.c #还是可重定位目标类型目标文件,但是符号的引用使用了__位置无关__技术,所以对全局变量和外部函数的引用使用了GOT和PLT。 + [geekard@geekard rel]$ readelf -r rel.o + + Relocation section '.rel.text' at offset 0x594 contains 9 entries: + Offset Info Type Sym.Value Sym. Name + 00000010 00000f02 R_386_PC32 00000000 __x86.get_pc_thunk.bx + 00000016 0000100a __R_386_GOTPC__ 00000000 _GLOBAL_OFFSET_TABLE_ + 0000001c 00000c03 __R_386_GOT32__ 00000004 b + 00000028 00000509 R_386_GOTOFF 00000000 .rodata + 00000030 00001104 R_386_PLT32 00000000 printf + 00000036 00000c03 R_386_GOT32 00000004 b + 00000042 00001203 R_386_GOT32 00000000 c + 0000004e 00000d04 __R_386_PLT32__ 00000000 pp + 00000053 00001304 R_386_PLT32 00000000 fun + + Relocation section '.rel.eh_frame' at offset 0x5dc contains 3 entries: + Offset Info Type Sym.Value Sym. Name + 00000020 00000202 R_386_PC32 00000000 .text + 00000040 00000202 R_386_PC32 00000000 .text + 00000064 00000602 R_386_PC32 00000000 .text.__x86.get_pc_thu + [geekard@geekard rel]$ +使用了PIC技术后,所有符号的重定位使用__GOT和PLT。__ + +Here is the details of how REL section associated with symbol table and the section to relocation. + +1. Show the ELF sections. +{{./7.gif}} + +In fig1, the section ‘.rel.text’ is ‘REL’, the sections it is associated with are the first and the 9th section, ‘.text’ and ‘.symtab’. + +2. Show the relocation section entries: +{{./8.gif}} +In fig2, we can see there are two relocation entries for symbol ‘b’ because ‘b’ is referenced two times and the linker has to relocation it two times. + +3. What is the raw data of relocation table entry? +{{./9.gif}} + +Fig3 shows the content of the first entry of relocation table. r_offset is 0×10, that means the relocation entry is at the 0×10 of test1. the symbol table index is 0×09. we can see the 9th entry of symbol table is ‘b’ through Fig4; + +4. b’s offset if 4 byts offset from the start of data section and size is 4 bytes. Then the linercaculate the address of b and modify it’s address in .text section through relocation entry. +{{./10.gif}} + +So, we get a simple flow of how linker do the relocation. __first, get all relocation entries, then get all symbols associated with the relocation entries, then caculate the address and modify the unist in the section assosicated with the relocation entries.__ The real relocation is more complex but main flow is like this. + +Sunday, September 12th, 2010 at 16:29 diff --git a/Zim/Programme/elf/elf_重定位/0.gif b/Zim/Programme/elf/elf_重定位/0.gif new file mode 100644 index 0000000..11aeddf Binary files /dev/null and b/Zim/Programme/elf/elf_重定位/0.gif differ diff --git a/Zim/Programme/elf/elf_重定位/1.gif b/Zim/Programme/elf/elf_重定位/1.gif new file mode 100644 index 0000000..50b4d33 Binary files /dev/null and b/Zim/Programme/elf/elf_重定位/1.gif differ diff --git a/Zim/Programme/elf/elf_重定位/10.gif b/Zim/Programme/elf/elf_重定位/10.gif new file mode 100644 index 0000000..8db9c5b Binary files /dev/null and b/Zim/Programme/elf/elf_重定位/10.gif differ diff --git a/Zim/Programme/elf/elf_重定位/2.gif b/Zim/Programme/elf/elf_重定位/2.gif new file mode 100644 index 0000000..ddfa1a4 Binary files /dev/null and b/Zim/Programme/elf/elf_重定位/2.gif differ diff --git a/Zim/Programme/elf/elf_重定位/3.gif b/Zim/Programme/elf/elf_重定位/3.gif new file mode 100644 index 0000000..9d81263 Binary files /dev/null and b/Zim/Programme/elf/elf_重定位/3.gif differ diff --git a/Zim/Programme/elf/elf_重定位/4.gif b/Zim/Programme/elf/elf_重定位/4.gif new file mode 100644 index 0000000..8600643 Binary files /dev/null and b/Zim/Programme/elf/elf_重定位/4.gif differ diff --git a/Zim/Programme/elf/elf_重定位/5.gif b/Zim/Programme/elf/elf_重定位/5.gif new file mode 100644 index 0000000..f9a0b00 Binary files /dev/null and b/Zim/Programme/elf/elf_重定位/5.gif differ diff --git a/Zim/Programme/elf/elf_重定位/6.gif b/Zim/Programme/elf/elf_重定位/6.gif new file mode 100644 index 0000000..f7595e4 Binary files /dev/null and b/Zim/Programme/elf/elf_重定位/6.gif differ diff --git a/Zim/Programme/elf/elf_重定位/7.gif b/Zim/Programme/elf/elf_重定位/7.gif new file mode 100644 index 0000000..7d4e8a3 Binary files /dev/null and b/Zim/Programme/elf/elf_重定位/7.gif differ diff --git a/Zim/Programme/elf/elf_重定位/8.gif b/Zim/Programme/elf/elf_重定位/8.gif new file mode 100644 index 0000000..22a78b5 Binary files /dev/null and b/Zim/Programme/elf/elf_重定位/8.gif differ diff --git a/Zim/Programme/elf/elf_重定位/9.gif b/Zim/Programme/elf/elf_重定位/9.gif new file mode 100644 index 0000000..be22bfc Binary files /dev/null and b/Zim/Programme/elf/elf_重定位/9.gif differ diff --git a/Zim/Programme/elf/elf_重定位/CFI_for_gas.txt b/Zim/Programme/elf/elf_重定位/CFI_for_gas.txt new file mode 100644 index 0000000..b023af8 --- /dev/null +++ b/Zim/Programme/elf/elf_重定位/CFI_for_gas.txt @@ -0,0 +1,15 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-23T13:11:25+08:00 + +====== CFI for gas ====== +Created Sunday 23 December 2012 + +Modern ABIs don't require frame pointers to be used in functions. +Howerver missing FPs bring difficulties when doing a backtrace. +One solutions is to provide Dwarf-2 CFI(call frame information) data +for each such function. This can be easily done for example by GCC in +its output, but isn't that easy to write by hand for pure assembler functions. + +With the help of these .cfi_* directives one can ass appropriate unwind info +into his asm source without too much trouble. diff --git a/Zim/Programme/elf/elf_重定位/sample2--可重定位类型.txt b/Zim/Programme/elf/elf_重定位/sample2--可重定位类型.txt new file mode 100644 index 0000000..30a7747 --- /dev/null +++ b/Zim/Programme/elf/elf_重定位/sample2--可重定位类型.txt @@ -0,0 +1,198 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-22T10:50:43+08:00 + +====== sample2--可重定位类型 ====== +Created Saturday 22 December 2012 +[geekard@geekard rel]$ cat -n rel.c **#测试文件** + 1 #include + 2 int globalVar = 1; + 3 int globalVarUninit; + 4 static int globalVarStatic = 3; + 5 extern externVar; + 6 + 7 extern void externFun(void); + 8 void Fun(void) {} + 9 + 10 int main(void) { + 11 int autoVar = globalVar; + 12 static int staticVar = 2; + 13 globalVarUninit = externVar; + 14 printf("%d\n",globalVarStatic); + 15 externFun(); + 16 Fun(); + 17 } +[geekard@geekard rel]$ **gcc -c rel.c #编译,生成可重定位类型的目标对象文件** +[geekard@geekard rel]$ readelf **-r** rel.o **#查看可重定位条目** + +Relocation section '.rel.text' at offset 0x4e0 contains 8 entries: + Offset Info Type Sym.Value Sym. Name +0000000f 00000b01 R_386_32 00000000 globalVar __#对文件中的第11行变量引用进行重定位__ +00000018 00000f01 R_386_32 00000000 externVar **#13** +0000001d 00000c01 R_386_32 00000004 globalVarUninit **#13** +00000022 00000301 R_386_32 00000000 .data +0000002d 00000601 R_386_32 00000000 .rodata +00000032 00001002 R_386_PC32 00000000 printf **#14** +00000037 00001102 R_386_PC32 00000000 externFun **#15** +0000003c 00000d02 R_386_PC32 00000000 Fun **#16** + +Relocation section '.rel.eh_frame' at offset 0x520 contains 2 entries: + Offset Info Type Sym.Value Sym. Name +00000020 00000202 R_386_PC32 00000000 .text +00000040 00000202 R_386_PC32 00000000 .text +由于在编译时没有指定PIC,所以重定位条目没有使用GOT或PLT。对全局变量使用的时R_386_32的绝对地址重定位,对函数使用的是 +R_386_PC32相对寻址重定位。 + +[geekard@geekard rel]$ **objdump -t rel.o #查看符号表** + +rel.o: file format elf32-i386 + +SYMBOL TABLE: +00000000 l df *ABS* 00000000 rel.c +00000000 l d .text 00000000 .text +00000000 l d .data 00000000 .data +00000000 l d .bss 00000000 .bss +00000004 l O __.data__ 00000004 globalVarStatic +00000000 l d .rodata 00000000 .rodata +00000008 l O __.data__ 00000004 staticVar.1828 +00000000 l d .note.GNU-stack 00000000 .note.GNU-stack +00000000 l d .eh_frame 00000000 .eh_frame +00000000 l d .comment 00000000 .comment +00000000 g O __.data__ 00000004 globalVar +00000004 O __*COM*__ 00000004 globalVarUninit +00000000 g F .text 00000005 Fun +00000005 g F .text 0000003d main +00000000 *UND* 00000000 externVar +00000000 *UND* 00000000 printf +00000000 *UND* 00000000 externFun +[geekard@geekard rel]$ +全局静态变量、全局已初始化变量、静态自动变量都位于.data section中。但是全局未初始化变量位于COMMON(named after Fortran 77's "common blocks") section中,而且对外不可见。file-scope and local-scope uninitiated global variables 保存在bss("Block Started by Symbol")段中。如果想让globalVarUninit保存在.bss section中,可以在编译时使用-fno-common选项,则是建议的用法。 + geekard@ubuntu:~/Code$ cat bar.c + double globalVar; + int main() {} + geekard@ubuntu:~/Code$ cat bar.c + double globalVar; + int main() {} + geekard@ubuntu:~/Code$ + geekard@ubuntu:~/Code$ gcc foo.c bar.c + 编译并链接上面两个文件时,编译器并没有提示符号重复定义的错误,但是如果启用-fno-common选项,则会提示错误。 + geekard@ubuntu:~/Code$ gcc foo.c bar.c **-fno-common** + /tmp/cceNAIis.o:(.bss+0x0): multiple definition of `globalVar' + /tmp/ccWmFhZG.o:(.bss+0x0): first defined here + /usr/bin/ld: Warning: size of symbol `globalVar' changed from 4 in /tmp/ccWmFhZG.o to 8 in /tmp/cceNAIis.o + collect2: ld 返回 1 + geekard@ubuntu:~/Code$ + 则是由于foo.oh和bar.o中的globalVar都放在.bss section中,而且都是global bind,所以会冲突。注意,放在COMMON section中时 + 没有bind属性,默认是外界不可见的: + geekard@ubuntu:~/Code$ objdump -t bar.o |grep globalVar //未启用-fno-common,无绑定信息,外界不可见 + 0000000000000008 __O *COM*__ 0000000000000008 globalVar + geekard@ubuntu:~/Code$ objdump -t bar.o |grep globalVar //启用-fno-common后 + 0000000000000000 __g__ O __.bss__ 0000000000000008 globalVar + kkkn +[geekard@geekard rel]$ __gcc -S rel.c #编译__ +[geekard@geekard rel]$ cat rel.s #查看编译生成的汇编代码,代码中含有指示链接器ld生成各section和重定位的指令。 + .file "rel.c" + .globl globalVar **#符号全局可见** + .data **#data section开始** + .align 4 + .type globalVar, @object **#符号类型** + .size globalVar, 4 **#符号对象大小** +globalVar: + .long 1 **#符号的值** + __.comm__ globalVarUninit,4,4 **#COMMON section** + .align 4 + .type globalVarStatic, @object + .size globalVarStatic, 4 +globalVarStatic: + .long 3 + .text **#代码段开始** + .globl Fun + .type Fun, @function +Fun: +.LFB0: + .cfi_startproc + pushl %ebp + .cfi_def_cfa_offset 8 + .cfi_offset 5, -8 + movl %esp, %ebp + .cfi_def_cfa_register 5 + popl %ebp + .cfi_restore 5 + .cfi_def_cfa 4, 4 + ret + .cfi_endproc +.LFE0: + .size Fun, .-Fun #函数对象的大小 + .section .rodata **#rodata section的开始** +.LC0: + .string "%d\n" + .text + .globl main + .type main, @function +main: +.LFB1: + .cfi_startproc + pushl %ebp + .cfi_def_cfa_offset 8 + .cfi_offset 5, -8 + movl %esp, %ebp + .cfi_def_cfa_register 5 + andl $-16, %esp + subl $32, %esp + movl __globalVar,__ %eax __#对全局变量的引用是绝对寻址,没有使用GOT。汇编时as会生成R_386_32类型的重定位条目__ + movl %eax, 28(%esp) + movl __externVar__, %eax + movl %eax, __globalVarUninit__ + movl __globalVarStatic__, %eax + movl %eax, 4(%esp) + movl $.LC0, (%esp) + call __printf #对外部或全局函数的引用,使用的是相对寻址,没有使用PLT。汇编时as会生成R_386_PC32类型的重定位条目__ + call __externFun__ + call __Fun __ + leave + .cfi_restore 5 + .cfi_def_cfa 4, 4 + ret + .cfi_endproc +.LFE1: + .size main, .-main + .data + .align 4 + .type __staticVar.1828__, @object + .size staticVar.1828, 4 +staticVar.1828: + .long 2 + .ident "GCC: (GNU) 4.7.2" + .section .note.GNU-stack,"",@progbits +[geekard@geekard rel]$ +[geekard@geekard rel]$ objdump -d rel.o + +rel.o: file format elf32-i386 + + +Disassembly of section .text: + +00000000 : + 0: 55 push %ebp + 1: 89 e5 mov %esp,%ebp + 3: 5d pop %ebp + 4: c3 ret + +00000005
: + 5: 55 push %ebp + 6: 89 e5 mov %esp,%ebp + 8: 83 e4 f0 and $0xfffffff0,%esp + b: 83 ec 20 sub $0x20,%esp + e: a1 00 00 00 00 mov 0x0,%eax + 13: 89 44 24 1c mov %eax,0x1c(%esp) + 17: a1 00 00 00 00 mov 0x0,%eax + 1c: a3 00 00 00 00 mov %eax,0x0 + 21: a1 04 00 00 00 mov 0x4,%eax + 26: 89 44 24 04 mov %eax,0x4(%esp) + 2a: c7 04 24 00 00 00 00 movl $0x0,(%esp) + 31: e8 fc ff ff ff call 32 + 36: e8 fc ff ff ff call 37 + 3b: e8 fc ff ff ff call 3c + 40: c9 leave + 41: c3 ret +[geekard@geekard rel]$ diff --git a/Zim/Programme/elf/elf_重定位/sample3--PIC可重定位类型.txt b/Zim/Programme/elf/elf_重定位/sample3--PIC可重定位类型.txt new file mode 100644 index 0000000..52a9045 --- /dev/null +++ b/Zim/Programme/elf/elf_重定位/sample3--PIC可重定位类型.txt @@ -0,0 +1,236 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-22T22:55:46+08:00 + +====== sample3--PIC可重定位类型 ====== +Created Saturday 22 December 2012 + +[geekard@geekard rel]$ cat rel.c +#include +int globalVar = 1; +int globalVarUninit; +static int globalVarStatic = 3; +extern externVar; + +extern void externFun(int); +void Fun(void) {} + +int main(void) { + int autoVar = globalVar; + static int staticVar = 2; + globalVarUninit = externVar; + printf("%d\n",globalVarStatic); + externFun(staticVar); + Fun(); +} +[geekard@geekard rel]$ gcc -c __-fPIC__ rel.c +[geekard@geekard rel]$ readelf -r rel.o + +Relocation section '**.rel.text**' at offset 0x600 contains 10 entries: **//对text section中的符号引用重定位** + Offset Info Type Sym.Value Sym. Name +00000010 00001102 R_386_PC32 00000000 __x86.get_pc_thunk.bx //相对寻址重定位,这里没有使用PLT是因为该函数是文件内部定义的。 +00000016 0000120a R_386___GOTPC__ 00000000 _GLOBAL_OFFSET_TABLE_ //用IP与GOT首地址的偏移量重定位代码中的值。 +0000001c 00000d03 R_386___GOT32__ 00000000 globalVar //用var条目在GOT中的偏移量重定位代码中的值。 +00000028 00001303 R_386_GOT32 00000000 externVar +00000030 00000e03 R_386_GOT32 00000004 globalVarUninit +00000038 00000309 R_386___GOTOFF__ 00000000 .data //用__static符号的地址与GOT的偏移量__来重定位代码段中的值。 +00000042 00000609 R_386_GOTOFF 00000000 .rodata //.rodata section中保存的是**字符串字面量**。 +0000004a 00001404 R_386___PLT32__ 00000000 printf //用printf条目在GOT PLT中的偏移量重定位代码中的值。 +00000050 00000309 R_386_GOTOFF 00000000 .data +0000004f 00001504 R_386_PLT32 00000000 externFun +00000054 00000f04 R_386_PLT32 00000000 Fun + +Relocation section '.rel.eh_frame' at offset 0x650 contains 3 entries: + Offset Info Type Sym.Value Sym. Name +00000020 00000202 R_386_PC32 00000000 .text +00000040 00000202 R_386_PC32 00000000 .text +00000064 00000802 R_386_PC32 00000000 .text.__x86.get_pc_thu +[geekard@geekard rel]$ objdump -t rel.o + +rel.o: file format elf32-i386 + +SYMBOL TABLE: +00000000 l df *ABS* 00000000 rel.c +00000000 l d .text 00000000 .text //.text section定义 +00000000 g F __.text__ 00000005 Fun //.text section中的第一个函数(偏移量为0),本文件内定义 +00000005 g F .text 00000058 main //.text section中的第二个函数 +00000000 g F .text.x86.get_pc_thunk.bx 00000000 .hidden x86.get_pc_thunk.bx +00000000 l d .text.x86.get_pc_thunk.bx 00000000 .text.x86.get_pc_thunk.bx +00000000 l d .data 00000000 .data //.data section定义 +00000000 g O .data 00000004 globalVar +00000004 l O .data 00000004 globalVarStatic +00000008 l O .data 00000004 __staticVar.1828__ +00000000 l d .rodata 00000000 .rodata +00000000 l d .bss 00000000 .bss +00000004 O __*COM*__ 00000004 globalVarUninit +00000000 l d .note.GNU-stack 00000000 .note.GNU-stack +00000000 l d .eh_frame 00000000 .eh_frame +00000000 l d .comment 00000000 .comment +00000000 l d .group 00000000 .group +00000000 __*UND*__ 00000000 _GLOBAL_OFFSET_TABLE_ +00000000 *UND* 00000000 externVar +00000000 *UND* 00000000 printf +00000000 *UND* 00000000 externFun + +[geekard@geekard rel]$ gcc -S __-fPIC__ rel.c +[geekard@geekard rel]$ cat rel.s + .file "rel.c" + + .globl globalVar + .data + .align 4 + .type globalVar, @object + .size globalVar, 4 +globalVar: + .long 1 + + .comm globalVarUninit,4,4 + .align 4 + + .type globalVarStatic, @object + .size globalVarStatic, 4 +globalVarStatic: + .long 3 + + .text + .globl Fun + .type Fun, @function +Fun: +.LFB0: **//.LFB是Dwarf使用的一个标号,与.LFE相匹配。** + .cfi_startproc //[[../CFI_for_gas.txt|cfi(call frame information)]] + pushl %ebp + .cfi_def_cfa_offset 8 + .cfi_offset 5, -8 + movl %esp, %ebp + .cfi_def_cfa_register 5 + popl %ebp + .cfi_restore 5 + .cfi_def_cfa 4, 4 + ret + .cfi_endproc +.LFE0: + .size Fun, .-Fun + + .section __.rodata__ +__.LC0: //该标号没有使用.globl限定,所以符号表中没有包含。只在本文件内有效。__ + .string "%d\n" + + .text + .globl main + .type main, @function +main: +.LFB1: //前面使用的是.LFB0, 所以这里是.LFB1 + .cfi_startproc + pushl %ebp + .cfi_def_cfa_offset 8 + .cfi_offset 5, -8 + movl %esp, %ebp + .cfi_def_cfa_register 5 + pushl %ebx + andl $-16, %esp + subl $32, %esp + .cfi_offset 3, -12 + call ____x86.get_pc_thunk.bx __**//获得IP的值,保存在ebx寄存器中** + addl $___GLOBAL_OFFSET_TABLE___, %ebx **//通过GOTPC重定位,获得GOT与当前IP的偏移量。最终获得GOT的首地址** + movl __globalVar@GOT__(%ebx), %eax **//通过GOT32重定位,获得var@GOT的值,即var所在的GOT条目相对GOT的偏移量。** + movl (%eax), %eax **//eax寄存器的值为var符号的实际地址,这样间接引用获得其实际值。** + movl %eax, 28(%esp) + + movl externVar@GOT(%ebx), %eax + movl (%eax), %edx + movl globalVarUninit@GOT(%ebx), %eax + movl %edx, (%eax) + + movl __globalVarStatic@GOTOFF__(%ebx), %eax __//GOTOFF类型的重定位__ + movl %eax, 4(%esp) + leal __.LC0@GOTOFF__(%ebx), %eax + movl %eax, (%esp) + call __printf@PLT__ + + movl __staticVar.1828@GOTOFF__(%ebx), %eax + movl %eax, (%esp) + call externFun@PLT + + call Fun@PLT + + movl -4(%ebp), %ebx + leave + .cfi_restore 5 + .cfi_restore 3 + .cfi_def_cfa 4, 4 + ret + .cfi_endproc +.LFE1: + .size main, .-main + + + .data + .align 4 + .type staticVar.1828, @object + .size staticVar.1828, 4 +staticVar.1828: + .long 2 + + .section .text.x86.get_pc_thunk.bx,"axG",@progbits,x86.get_pc_thunk.bx,comdat + .globl __x86.get_pc_thunk.bx + .hidden __x86.get_pc_thunk.bx + .type __x86.get_pc_thunk.bx, @function +__x86.get_pc_thunk.bx: +.LFB2: + .cfi_startproc + movl (%esp), %ebx + ret + .cfi_endproc + +.LFE2: + .ident "GCC: (GNU) 4.7.2" + .section .note.GNU-stack,"",@progbits +[geekard@geekard rel]$ +[geekard@geekard rel]$ objdump -d rel.o + +rel.o: file format elf32-i386 + + +Disassembly of section .text: + +00000000 : + 0: 55 push %ebp + 1: 89 e5 mov %esp,%ebp + 3: 5d pop %ebp + 4: c3 ret + +00000005
: + 5: 55 push %ebp + 6: 89 e5 mov %esp,%ebp + 8: 53 push %ebx + 9: 83 e4 f0 and $0xfffffff0,%esp + c: 83 ec 20 sub $0x20,%esp + f: e8 __fc ff ff ff__ call 10 + 14: 81 c3 __02 00 00 00__ add $0x2,%ebx //objdump反汇编后的代码中已经__看不到原始的重定位信息__。所以需要和重定位条目一起查看。 + 1a: 8b 83 __00 00 00 00__ mov 0x0(%ebx),%eax + 20: 8b 00 mov (%eax),%eax + 22: 89 44 24 1c mov %eax,0x1c(%esp) + 26: 8b 83 __00 00 00 00__ mov 0x0(%ebx),%eax + 2c: 8b 10 mov (%eax),%edx + 2e: 8b 83 __00 00 00 00__ mov 0x0(%ebx),%eax + 34: 89 10 mov %edx,(%eax) + 36: 8b 83 __04 00 00 00__ mov 0x4(%ebx),%eax + 3c: 89 44 24 04 mov %eax,0x4(%esp) + 40: 8d 83 __00 00 00 00__ lea 0x0(%ebx),%eax + 46: 89 04 24 mov %eax,(%esp) + 49: e8 __fc ff ff ff__ call 4a + 4e: 8b 83 __08 00 00 00__ mov 0x8(%ebx),%eax + 54: 89 04 24 mov %eax,(%esp) + 57: e8 __fc ff ff ff__ call 58 + 5c: e8 __fc ff ff ff__ call 5d + 61: 8b 5d fc mov -0x4(%ebp),%ebx + 64: c9 leave + 65: c3 ret +#上面黄色标记的位置需要链接器对其重定位。 + +Disassembly of section .text.__x86.get_pc_thunk.bx: + +00000000 <__x86.get_pc_thunk.bx>: + 0: 8b 1c 24 mov (%esp),%ebx + 3: c3 ret +[geekard@geekard rel]$ diff --git a/Zim/Programme/elf/ld-linux调试信息.txt b/Zim/Programme/elf/ld-linux调试信息.txt new file mode 100644 index 0000000..34a393c --- /dev/null +++ b/Zim/Programme/elf/ld-linux调试信息.txt @@ -0,0 +1,125 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-16T17:34:49+08:00 + +====== ld-linux调试信息 ====== +Created Sunday 16 December 2012 +启用动态连接器调试信息输出的方法是定义变量LD_DEBUG=all + +**[geekard@geekard hello]$ cat hello.c** +#include +#include + +int glb_init = 1; +int glb_uninit; + +int main(void) +{ + char *str = "Just a test string!"; + printf("The test string is:\"%s\"\n", str); + printf("glb_init:%d, glb_uninit:%d\n", glb_init, glb_uninit); + pause(); **//暂停进程,这样可以查看其内存映射情况。** + exit(0); +} +[geekard@geekard hello]$ __strace -e trace=mmap2,mprotect,munmap,open,close -ELD_DEBUG=all ./hello &>log__ +^Z +[1]+ Stopped strace -e trace=mmap2,mprotect,munmap,open,close -ELD_DEBUG=all ./hello &>log +#上面的log文件中包含有strace的输出和hello的ld-linux.so的DEBUG信息。 + +**#4727为hello的进程号,下面命令从log中提取ld-linux.so的DEBUG信息** +**[geekard@geekard hello]$ cat log|grep 4727>log.ld ** + +#**下面命令从log中提取hello的系统调用信息** +**[geekard@geekard hello]$ cat log |sed '/4727/d' >log.strace** + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb77b8000 + +**[geekard@geekard hello]$ readelf -l /lib/libc.so.6** +Elf file type is DYN (Shared object file) +Entry point __0x19760__ +There are __10__ program headers, starting at offset 52 + +Program Headers: + Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align + PHDR 0x000034 0x00000034 0x00000034 0x00140 0x00140 R E 0x4 + INTERP 0x16b7e8 0x0016b7e8 0x0016b7e8 0x00017 0x00017 R 0x1 + [Requesting program interpreter: [[/usr/lib/ld-linux.so.2]]] +#第一个LOAD为RE,其大小为1718236B(0x1a37dc),需要4KB对齐,所以实际需要空间1720320B,这会传给mmap2函数。 + **LOAD** __0x000000__ 0x00000000 0x00000000 __0x1a37dc__ 0x1a37dc R E 0x1000 + **LOAD** 0x1a41dc 0x001a41dc 0x001a41dc 0x02ce0 __0x058e8__ RW 0x1000 + DYNAMIC __0x1a5d9c__ 0x001a5d9c 0x001a5d9c 0x000f8 0x000f8 RW 0x4 + NOTE 0x000174 0x00000174 0x00000174 0x00044 0x00044 R 0x4 + TLS 0x1a41dc 0x001a41dc 0x001a41dc 0x00008 0x00040 R 0x4 + GNU_EH_FRAME 0x16b800 0x0016b800 0x0016b800 0x07454 0x07454 R 0x4 + GNU_STACK 0x000000 0x00000000 0x00000000 0x00000 0x00000 RW 0x4 + **GNU_RELRO** 0x1a41dc 0x001a41dc 0x001a41dc 0x01e24 0x01e24 R 0x1 + + Section to Segment mapping: + Segment Sections... + 00 + 01 .interp + 02 .note.gnu.build-id .note.ABI-tag .gnu.hash .dynsym .dynstr .gnu.version .gnu.version_d .gnu.version_r .rel.dyn .rel.plt .plt .text __libc_freeres_fn __libc_thread_freeres_fn .rodata .interp .eh_frame_hdr .eh_frame .gcc_except_table .hash + 03 .tdata .init_array __libc_subfreeres __libc_atexit __libc_thread_subfreeres .data.rel.ro .dynamic .got .got.plt .data .bss + 04 .dynamic + 05 .note.gnu.build-id .note.ABI-tag + 06 .tdata .tbss + 07 .eh_frame_hdr + 08 + 09 .tdata .init_array __libc_subfreeres __libc_atexit __libc_thread_subfreeres .data.rel.ro .dynamic .got +可以看出libc.so.6中的__虚拟地址从0开始__。 + +**[geekard@geekard hello]$ pmap $(pgrep hello) |nl #查看hello进程的地址映射情况** + 1 4727: ./hello + 2 08048000 4K r-x-- /home/geekard/Code/hello/hello + 3 08049000 4K rw--- /home/geekard/Code/hello/hello + 4 __b75eb000__ 4K rw--- [ anon ] //libc的保护区域 +//0xb75ec000为ld-linux.so映射libc到a.out进程地址空间时的__随机base地址(见后文log文件)__。 + 5 __b75ec000__ 1680K r-x-- /usr/lib/libc-2.16.so + 6 __b7790000__ 8K r---- /usr/lib/libc-2.16.so + 7 b7792000 4K rw--- /usr/lib/libc-2.16.so + 8 __b7793000__ 12K rw--- [ anon ] + 9 b77b8000 **8K** rw--- [ anon ] //包含有第一次调用mmap2()分配的匿名内存块。 + 10 b77ba000 4K r-x-- [ anon ] //ld的保护区域 + 11 b77bb000 128K r-x-- /usr/lib/ld-2.16.so + 12 b77db000 4K r---- /usr/lib/ld-2.16.so + 13 b77dc000 4K rw--- /usr/lib/ld-2.16.so + 14 __bff8b000__ 132K rw--- [ stack ] + 15 total 1996K + +**[geekard@geekard hello]$ cat log //查看strace打印出的系统调用和ld-linux.so打印的DEBUG信息。** +//匿名映射,mmap2的第一个参数为NULL,所以内核会随机地选择一个地址,这里为 **0xb77b9000。** +//匿名映射的虚拟地址空间为**0xb77b9000~0xb77ba000。包含在pmap打印的第9行中。** +mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|**MAP_ANONYMOUS**, -1, 0) = **0xb77b9000** +open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 +//文件映射,内核随机选择一个起始地址,这里为**0xb7796000** +mmap2(**NULL**, 139868, PROT_READ, MAP_PRIVATE, 3, 0) = **0xb7796000** + 4727: + 4727: file=libc.so.6 [0]; needed by ./hello [0] + 4727: find library=libc.so.6 [0]; searching + 4727: search cache=/etc/ld.so.cache +//关闭了ld.so.cache,所以**其映射的内存区域将删除**。 +close(3) = 0 + 4727: trying file=/usr/lib/libc.so.6 +open("/usr/lib/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 +//文件映射,内核随机选择一个起始地址,这里为 __0xb75ec000。这里映射的是第一个LOAD segment__ +mmap2(**NULL**, 1743556, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = __0xb75ec000__ + +//这里映射的是第二个LOAD segment +mmap2(**0xb7790000,** 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, **0x1a4**) = 0xb7790000 + +mmap2(**0xb7793000**, 10948, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|**MAP_ANONYMOUS**, -1, 0) = 0xb7793000 +close(3) = 0 + 4727: + 4727: file=libc.so.6 [0]; generating link map + + 4727: dynamic: __0xb7791d9c__ base: __0xb75ec000__ size: 0x001a9ac4 + 4727: entry: 0xb7605760 phdr: 0xb75ec034 phnum: 10 + 4727: + 4727: checking for version `GLIBC_2.0' in file /usr/lib/libc.so.6 [0] required by file ./hello [0] + 4727: checking for version `GLIBC_2.3' in file /lib/ld-linux.so.2 [0] required by file /usr/lib/libc.so.6 [0] + 4727: checking for version `GLIBC_PRIVATE' in file /lib/ld-linux.so.2 [0] required by file /usr/lib/libc.so.6 [0] + 4727: checking for version `GLIBC_2.1' in file /lib/ld-linux.so.2 [0] required by file /usr/lib/libc.so.6 [0] +mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = **0xb75eb000** +mprotect(0xb7790000, 8192, PROT_READ) = 0 +mprotect(0xb77db000, 4096, PROT_READ) = 0 +munmap(0xb7796000, 139868) = 0 +mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = **0xb77b8000** diff --git a/Zim/Programme/goagent.txt b/Zim/Programme/goagent.txt new file mode 100644 index 0000000..54acb27 --- /dev/null +++ b/Zim/Programme/goagent.txt @@ -0,0 +1,7 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-29T11:19:39+08:00 + +====== goagent ====== +Created Saturday 29 December 2012 + diff --git a/Zim/Programme/python/The-Python-Standard-Library.txt b/Zim/Programme/python/The-Python-Standard-Library.txt new file mode 100644 index 0000000..8987130 --- /dev/null +++ b/Zim/Programme/python/The-Python-Standard-Library.txt @@ -0,0 +1,380 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-11-29T14:30:56+08:00 + +====== The-Python-Standard-Library ====== +Created Thursday 29 November 2012 +http://docs.python.org/2/library/index.html + +While **The Python Language Reference** describes the exact syntax and semantics of the Python language, this library reference manual describes the standard library that is distributed with Python. It also describes some of the optional components that are commonly included in Python distributions. + +Python’s standard library is very extensive, offering a wide range of facilities as indicated by the long table of contents listed below. + +The library contains __built-in modules__ (written in C) that provide access to system functionality such as file I/O that would otherwise be inaccessible to Python programmers, as well as modules written in Python that provide __standardized solutions__ for many problems that occur in everyday programming. Some of these modules are explicitly designed to encourage and enhance the portability of Python programs by abstracting away platform-specifics into platform-neutral APIs. + +The Python installers for the Windows platform usually includes the entire standard library and often also include many additional components. For Unix-like operating systems Python is normally provided as **a collection of packages**, so it may be necessary to use the packaging tools provided with the operating system to obtain some or all of the optional components. + +In addition to the standard library, there is a growing collection of several thousand components (from individual programs and modules to packages and entire application development frameworks), available from the Python Package Index. + +1. Introduction +2. Built-in Functions +3. Non-essential Built-in Functions +4. Built-in Constants +4.1. Constants added by the site module +5. Built-in Types +5.1. Truth Value Testing +5.2. Boolean Operations — and, or, not +5.3. Comparisons +5.4. Numeric Types — int, float, long, complex +5.5. Iterator Types +5.6. Sequence Types — str, unicode, list, tuple, bytearray, buffer, xrange +5.7. Set Types — set, frozenset +5.8. Mapping Types — dict +5.9. File Objects +5.10. memoryview type +5.11. Context Manager Types +5.12. Other Built-in Types +5.13. Special Attributes +6. Built-in Exceptions +6.1. Exception hierarchy +7. String Services +7.1. string — Common string operations +7.2. re — Regular expression operations +7.3. struct — Interpret strings as packed binary data +7.4. difflib — Helpers for computing deltas +7.5. StringIO — Read and write strings as files +7.6. cStringIO — Faster version of StringIO +7.7. textwrap — Text wrapping and filling +7.8. codecs — Codec registry and base classes +7.9. unicodedata — Unicode Database +7.10. stringprep — Internet String Preparation +7.11. fpformat — Floating point conversions +8. Data Types +8.1. datetime — Basic date and time types +8.2. calendar — General calendar-related functions +8.3. collections — High-performance container datatypes +8.4. heapq — Heap queue algorithm +8.5. bisect — Array bisection algorithm +8.6. array — Efficient arrays of numeric values +8.7. sets — Unordered collections of unique elements +8.8. sched — Event scheduler +8.9. mutex — Mutual exclusion support +8.10. Queue — A synchronized queue class +8.11. weakref — Weak references +8.12. UserDict — Class wrapper for dictionary objects +8.13. UserList — Class wrapper for list objects +8.14. UserString — Class wrapper for string objects +8.15. types — Names for built-in types +8.16. new — Creation of runtime internal objects +8.17. copy — Shallow and deep copy operations +8.18. pprint — Data pretty printer +8.19. repr — Alternate repr() implementation +9. Numeric and Mathematical Modules +9.1. numbers — Numeric abstract base classes +9.2. math — Mathematical functions +9.3. cmath — Mathematical functions for complex numbers +9.4. decimal — Decimal fixed point and floating point arithmetic +9.5. fractions — Rational numbers +9.6. random — Generate pseudo-random numbers +9.7. itertools — Functions creating iterators for efficient looping +9.8. functools — Higher-order functions and operations on callable objects +9.9. operator — Standard operators as functions +10. File and Directory Access +10.1. os.path — Common pathname manipulations +10.2. fileinput — Iterate over lines from multiple input streams +10.3. stat — Interpreting stat() results +10.4. statvfs — Constants used with os.statvfs() +10.5. filecmp — File and Directory Comparisons +10.6. tempfile — Generate temporary files and directories +10.7. glob — Unix style pathname pattern expansion +10.8. fnmatch — Unix filename pattern matching +10.9. linecache — Random access to text lines +10.10. shutil — High-level file operations +10.11. dircache — Cached directory listings +10.12. macpath — Mac OS 9 path manipulation functions +11. Data Persistence +11.1. pickle — Python object serialization +11.2. cPickle — A faster pickle +11.3. copy_reg — Register pickle support functions +11.4. shelve — Python object persistence +11.5. marshal — Internal Python object serialization +11.6. anydbm — Generic access to DBM-style databases +11.7. whichdb — Guess which DBM module created a database +11.8. dbm — Simple “database” interface +11.9. gdbm — GNU’s reinterpretation of dbm +11.10. dbhash — DBM-style interface to the BSD database library +11.11. bsddb — Interface to Berkeley DB library +11.12. dumbdbm — Portable DBM implementation +11.13. sqlite3 — DB-API 2.0 interface for SQLite databases +12. Data Compression and Archiving +12.1. zlib — Compression compatible with gzip +12.2. gzip — Support for gzip files +12.3. bz2 — Compression compatible with bzip2 +12.4. zipfile — Work with ZIP archives +12.5. tarfile — Read and write tar archive files +13. File Formats +13.1. csv — CSV File Reading and Writing +13.2. ConfigParser — Configuration file parser +13.3. robotparser — Parser for robots.txt +13.4. netrc — netrc file processing +13.5. xdrlib — Encode and decode XDR data +13.6. plistlib — Generate and parse Mac OS X .plist files +14. Cryptographic Services +14.1. hashlib — Secure hashes and message digests +14.2. hmac — Keyed-Hashing for Message Authentication +14.3. md5 — MD5 message digest algorithm +14.4. sha — SHA-1 message digest algorithm +15. Generic Operating System Services +15.1. os — Miscellaneous operating system interfaces +15.2. io — Core tools for working with streams +15.3. time — Time access and conversions +15.4. argparse — Parser for command-line options, arguments and sub-commands +15.5. optparse — Parser for command line options +15.6. getopt — C-style parser for command line options +15.7. logging — Logging facility for Python +15.8. logging.config — Logging configuration +15.9. logging.handlers — Logging handlers +15.10. getpass — Portable password input +15.11. curses — Terminal handling for character-cell displays +15.12. curses.textpad — Text input widget for curses programs +15.13. curses.ascii — Utilities for ASCII characters +15.14. curses.panel — A panel stack extension for curses +15.15. platform — Access to underlying platform’s identifying data +15.16. errno — Standard errno system symbols +15.17. ctypes — A foreign function library for Python +16. Optional Operating System Services +16.1. select — Waiting for I/O completion +16.2. threading — Higher-level threading interface +16.3. thread — Multiple threads of control +16.4. dummy_threading — Drop-in replacement for the threading module +16.5. dummy_thread — Drop-in replacement for the thread module +16.6. multiprocessing — Process-based “threading” interface +16.7. mmap — Memory-mapped file support +16.8. readline — GNU readline interface +16.9. rlcompleter — Completion function for GNU readline +17. Interprocess Communication and Networking +17.1. subprocess — Subprocess management +17.2. socket — Low-level networking interface +17.3. ssl — TLS/SSL wrapper for socket objects +17.4. signal — Set handlers for asynchronous events +17.5. popen2 — Subprocesses with accessible I/O streams +17.6. asyncore — Asynchronous socket handler +17.7. asynchat — Asynchronous socket command/response handler +18. Internet Data Handling +18.1. email — An email and MIME handling package +18.2. json — JSON encoder and decoder +18.3. mailcap — Mailcap file handling +18.4. mailbox — Manipulate mailboxes in various formats +18.5. mhlib — Access to MH mailboxes +18.6. mimetools — Tools for parsing MIME messages +18.7. mimetypes — Map filenames to MIME types +18.8. MimeWriter — Generic MIME file writer +18.9. mimify — MIME processing of mail messages +18.10. multifile — Support for files containing distinct parts +18.11. rfc822 — Parse RFC 2822 mail headers +18.12. base64 — RFC 3548: Base16, Base32, Base64 Data Encodings +18.13. binhex — Encode and decode binhex4 files +18.14. binascii — Convert between binary and ASCII +18.15. quopri — Encode and decode MIME quoted-printable data +18.16. uu — Encode and decode uuencode files +19. Structured Markup Processing Tools +19.1. HTMLParser — Simple HTML and XHTML parser +19.2. sgmllib — Simple SGML parser +19.3. htmllib — A parser for HTML documents +19.4. htmlentitydefs — Definitions of HTML general entities +19.5. xml.etree.ElementTree — The ElementTree XML API +19.6. xml.dom — The Document Object Model API +19.7. xml.dom.minidom — Lightweight DOM implementation +19.8. xml.dom.pulldom — Support for building partial DOM trees +19.9. xml.sax — Support for SAX2 parsers +19.10. xml.sax.handler — Base classes for SAX handlers +19.11. xml.sax.saxutils — SAX Utilities +19.12. xml.sax.xmlreader — Interface for XML parsers +19.13. xml.parsers.expat — Fast XML parsing using Expat +20. Internet Protocols and Support +20.1. webbrowser — Convenient Web-browser controller +20.2. cgi — Common Gateway Interface support +20.3. cgitb — Traceback manager for CGI scripts +20.4. wsgiref — WSGI Utilities and Reference Implementation +20.5. urllib — Open arbitrary resources by URL +20.6. urllib2 — extensible library for opening URLs +20.7. httplib — HTTP protocol client +20.8. ftplib — FTP protocol client +20.9. poplib — POP3 protocol client +20.10. imaplib — IMAP4 protocol client +20.11. nntplib — NNTP protocol client +20.12. smtplib — SMTP protocol client +20.13. smtpd — SMTP Server +20.14. telnetlib — Telnet client +20.15. uuid — UUID objects according to RFC 4122 +20.16. urlparse — Parse URLs into components +20.17. SocketServer — A framework for network servers +20.18. BaseHTTPServer — Basic HTTP server +20.19. SimpleHTTPServer — Simple HTTP request handler +20.20. CGIHTTPServer — CGI-capable HTTP request handler +20.21. cookielib — Cookie handling for HTTP clients +20.22. Cookie — HTTP state management +20.23. xmlrpclib — XML-RPC client access +20.24. SimpleXMLRPCServer — Basic XML-RPC server +20.25. DocXMLRPCServer — Self-documenting XML-RPC server +21. Multimedia Services +21.1. audioop — Manipulate raw audio data +21.2. imageop — Manipulate raw image data +21.3. aifc — Read and write AIFF and AIFC files +21.4. sunau — Read and write Sun AU files +21.5. wave — Read and write WAV files +21.6. chunk — Read IFF chunked data +21.7. colorsys — Conversions between color systems +21.8. imghdr — Determine the type of an image +21.9. sndhdr — Determine type of sound file +21.10. ossaudiodev — Access to OSS-compatible audio devices +22. Internationalization +22.1. gettext — Multilingual internationalization services +22.2. locale — Internationalization services +23. Program Frameworks +23.1. cmd — Support for line-oriented command interpreters +23.2. shlex — Simple lexical analysis +24. Graphical User Interfaces with Tk +24.1. Tkinter — Python interface to Tcl/Tk +24.2. ttk — Tk themed widgets +24.3. Tix — Extension widgets for Tk +24.4. ScrolledText — Scrolled Text Widget +24.5. turtle — Turtle graphics for Tk +24.6. IDLE +24.7. Other Graphical User Interface Packages +25. Development Tools +25.1. pydoc — Documentation generator and online help system +25.2. doctest — Test interactive Python examples +25.3. unittest — Unit testing framework +25.4. 2to3 - Automated Python 2 to 3 code translation +25.5. test — Regression tests package for Python +25.6. test.test_support — Utility functions for tests +26. Debugging and Profiling +26.1. bdb — Debugger framework +26.2. pdb — The Python Debugger +26.3. Debugger Commands +26.4. The Python Profilers +26.5. hotshot — High performance logging profiler +26.6. timeit — Measure execution time of small code snippets +26.7. trace — Trace or track Python statement execution +27. Python Runtime Services +27.1. sys — System-specific parameters and functions +27.2. sysconfig — Provide access to Python’s configuration information +27.3. __builtin__ — Built-in objects +27.4. future_builtins — Python 3 builtins +27.5. __main__ — Top-level script environment +27.6. warnings — Warning control +27.7. contextlib — Utilities for with-statement contexts +27.8. abc — Abstract Base Classes +27.9. atexit — Exit handlers +27.10. traceback — Print or retrieve a stack traceback +27.11. __future__ — Future statement definitions +27.12. gc — Garbage Collector interface +27.13. inspect — Inspect live objects +27.14. site — Site-specific configuration hook +27.15. user — User-specific configuration hook +27.16. fpectl — Floating point exception control +27.17. distutils — Building and installing Python modules +28. Custom Python Interpreters +28.1. code — Interpreter base classes +28.2. codeop — Compile Python code +29. Restricted Execution +29.1. rexec — Restricted execution framework +29.2. Bastion — Restricting access to objects +30. Importing Modules +30.1. imp — Access the import internals +30.2. importlib – Convenience wrappers for __import__() +30.3. imputil — Import utilities +30.4. zipimport — Import modules from Zip archives +30.5. pkgutil — Package extension utility +30.6. modulefinder — Find modules used by a script +30.7. runpy — Locating and executing Python modules +31. Python Language Services +31.1. parser — Access Python parse trees +31.2. ast — Abstract Syntax Trees +31.3. symtable — Access to the compiler’s symbol tables +31.4. symbol — Constants used with Python parse trees +31.5. token — Constants used with Python parse trees +31.6. keyword — Testing for Python keywords +31.7. tokenize — Tokenizer for Python source +31.8. tabnanny — Detection of ambiguous indentation +31.9. pyclbr — Python class browser support +31.10. py_compile — Compile Python source files +31.11. compileall — Byte-compile Python libraries +31.12. dis — Disassembler for Python bytecode +31.13. pickletools — Tools for pickle developers +32. Python compiler package +32.1. The basic interface +32.2. Limitations +32.3. Python Abstract Syntax +32.4. Using Visitors to Walk ASTs +32.5. Bytecode Generation +33. Miscellaneous Services +33.1. formatter — Generic output formatting +34. MS Windows Specific Services +34.1. msilib — Read and write Microsoft Installer files +34.2. msvcrt – Useful routines from the MS VC++ runtime +34.3. _winreg – Windows registry access +34.4. winsound — Sound-playing interface for Windows +35. Unix Specific Services +35.1. posix — The most common POSIX system calls +35.2. pwd — The password database +35.3. spwd — The shadow password database +35.4. grp — The group database +35.5. crypt — Function to check Unix passwords +35.6. dl — Call C functions in shared objects +35.7. termios — POSIX style tty control +35.8. tty — Terminal control functions +35.9. pty — Pseudo-terminal utilities +35.10. fcntl — The fcntl() and ioctl() system calls +35.11. pipes — Interface to shell pipelines +35.12. posixfile — File-like objects with locking support +35.13. resource — Resource usage information +35.14. nis — Interface to Sun’s NIS (Yellow Pages) +35.15. syslog — Unix syslog library routines +35.16. commands — Utilities for running commands +36. Mac OS X specific services +36.1. ic — Access to the Mac OS X Internet Config +36.2. MacOS — Access to Mac OS interpreter features +36.3. macostools — Convenience routines for file manipulation +36.4. findertools — The finder‘s Apple Events interface +36.5. EasyDialogs — Basic Macintosh dialogs +36.6. FrameWork — Interactive application framework +36.7. autoGIL — Global Interpreter Lock handling in event loops +36.8. Mac OS Toolbox Modules +36.9. ColorPicker — Color selection dialog +37. MacPython OSA Modules +37.1. gensuitemodule — Generate OSA stub packages +37.2. aetools — OSA client support +37.3. aepack — Conversion between Python variables and AppleEvent data containers +37.4. aetypes — AppleEvent objects +37.5. MiniAEFrame — Open Scripting Architecture server support +38. SGI IRIX Specific Services +38.1. al — Audio functions on the SGI +38.2. AL — Constants used with the al module +38.3. cd — CD-ROM access on SGI systems +38.4. fl — FORMS library for graphical user interfaces +38.5. FL — Constants used with the fl module +38.6. flp — Functions for loading stored FORMS designs +38.7. fm — Font Manager interface +38.8. gl — Graphics Library interface +38.9. DEVICE — Constants used with the gl module +38.10. GL — Constants used with the gl module +38.11. imgfile — Support for SGI imglib files +38.12. jpeg — Read and write JPEG files +39. SunOS Specific Services +39.1. sunaudiodev — Access to Sun audio hardware +39.2. SUNAUDIODEV — Constants used with sunaudiodev +40. Undocumented Modules +40.1. Miscellaneous useful utilities +40.2. Platform specific modules +40.3. Multimedia +40.4. Undocumented Mac OS modules +40.5. Obsolete +40.6. SGI-specific Extension modules +» +indexmodules |next |previous | Python » Documentation » +© Copyright 1990-2012, Python Software Foundation. +The Python Software Foundation is a non-profit corporation. Please donate. +Last updated on Dec 01, 2012. Found a bug? +Created using Sphinx 1.0.7. diff --git a/Zim/Programme/python/The-Python-Standard-Library/1._Introduction.txt b/Zim/Programme/python/The-Python-Standard-Library/1._Introduction.txt new file mode 100644 index 0000000..2fcf718 --- /dev/null +++ b/Zim/Programme/python/The-Python-Standard-Library/1._Introduction.txt @@ -0,0 +1,22 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-02T16:34:03+08:00 + +====== 1. Introduction ====== +Created Sunday 02 December 2012 + +===== 1. Introduction ===== +The “Python library” contains several different kinds of components. + +It contains __data types__ that would normally be considered part of the “core” of a language, such as **numbers and lists**. For these types, the Python language core defines the form of **literals** and places some constraints on their semantics, but does **not fully define** the semantics. (On the other hand, the language core does define syntactic properties like the spelling and priorities of operators.) + +The library also contains __built-in functions and exceptions__ — objects that can be used by all Python code //without the need of an import statement//. Some of these are defined by the core language, but many are not essential for the core semantics and are only described here. + +The bulk of the __library__, however, consists of a collection of modules. There are many ways to dissect this collection. Some modules are written in C and __built in__** to the Python interpreter**; others are written in Python and imported in source form. + +Some modules provide interfaces that are highly specific to Python, like printing a stack trace; some provide interfaces that are specific to particular operating systems, such as access to specific hardware; others provide interfaces that are specific to a particular application domain, like the World Wide Web. Some modules are available in all versions and ports of Python; others are only available when the underlying system supports or requires them; yet others are available only when a particular configuration option was chosen at the time when Python was compiled and installed. + +This manual is organized **“from the inside out:”** it first describes the built-in data types, then the built-in functions and exceptions, and finally the modules, grouped in chapters of related modules. The ordering of the chapters as well as the ordering of the modules within each chapter is roughly from most relevant to least important. + + +Let the show begin! diff --git a/Zim/Programme/python/The-Python-Standard-Library/2._Built-in_Functions.txt b/Zim/Programme/python/The-Python-Standard-Library/2._Built-in_Functions.txt new file mode 100644 index 0000000..e8ee02e --- /dev/null +++ b/Zim/Programme/python/The-Python-Standard-Library/2._Built-in_Functions.txt @@ -0,0 +1,892 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-02T16:37:06+08:00 + +====== 2. Built-in Functions ====== +Created Sunday 02 December 2012 + +The Python interpreter has a number of functions built into it that are __always available__. They are listed here in alphabetical order. + +abs() divmod() input() open() staticmethod() +all() enumerate() int() ord() str() +any() eval() isinstance() pow() sum() +basestring() execfile() issubclass() print() super() +bin() file() iter() property() tuple() +bool() filter() len() range() type() +bytearray() float() list() raw_input() unichr() +callable() format() locals() reduce() unicode() +chr() frozenset() long() reload() vars() +classmethod() getattr() map() repr() xrange() +cmp() globals() max() reversed() zip() +compile() hasattr() memoryview() round() __import__() +complex() hash() min() set() apply() +delattr() help() next() setattr() buffer() +dict() hex() object() slice() coerce() +dir() id() oct() sorted() intern() + +===== abs(x) ===== +Return the absolute value of **a number**. The argument may be a plain or long integer or a floating point number. If the argument is a complex number, its magnitude is returned. + +===== all(iterable) ===== +Return True if **all elements** of the iterable are true (or if the iterable is empty). Equivalent to: + +def all(iterable): + for element in iterable: + if not element: + return False + return True +New in version 2.5. + +===== any(iterable) ===== +Return True if **any element** of the iterable is true. If the iterable is empty, return False. Equivalent to: + +def any(iterable): + for element in iterable: + if element: + return True + return False +New in version 2.5. + +===== basestring() ===== +__This abstract type is the superclass for str and unicode. It cannot be called or instantiated__, but it can be used to test whether an object is an instance of str or unicode. **isinstance(obj, basestring)** is equivalent to isinstance(obj, (str, unicode)). +basestring其实是一个抽象类,它是str和unicode的基类,不能被实例化。 +New in version 2.3. + +===== bin(x) ===== +x需要为int或long类型,否则需要定义__index__()方法。 +Convert an __integer number__ to a binary string. The result is a valid Python expression. If x is not a Python int object, it has to define an **__index__()** method that returns an integer. + +=== 实例: === + **>>> bin(123.22)** + **Traceback (most recent call last):** + ** File "", line 1, in ** + **TypeError: 'float' object cannot be interpreted as an index** + **>>>** + +New in version 2.6. + +===== bool([x]) ===== +Convert a value to a Boolean, using the standard truth testing procedure. If x is false or omitted, this returns **False**; otherwise it returns True. __bool is also a class__, which is a subclass of int. Class bool cannot be subclassed further. Its only instances are False and True. +__False和True时class bool的实例。__ +New in version 2.2.1. + +Changed in version 2.3: If no argument is given, this function returns False. + +===== bytearray([source[, encoding[, errors]]]) ===== +Return a new array of bytes. The bytearray type __is a mutable sequence of integers in the range 0 <= x < 256__. It has most of the usual methods of mutable sequences, described in Mutable Sequence Types, as well as most methods that the str type has, see String Methods. + +The optional source parameter can be used to initialize the array in a few different ways: + +* If it is a **string**, you must also give the encoding (and optionally, errors) parameters; bytearray() then converts the string to bytes using str.encode(). +* If it is an **integer**, __the array will have that size__ and will be initialized with null bytes. +* If it is an **object** conforming to the buffer interface, a read-only buffer of the object will be used to initialize the bytes array. +* If it is an **iterable**, it must be an iterable of integers in the range 0 <= x < 256, which are used as the initial contents of the array. +Without an argument, an array of **size 0** is created. + +New in version 2.6. + +===== callable(object) ===== +Return True if the object argument appears callable, False if not. If this returns true, it is still possible that a call fails, but if it is false, calling object will never succeed. Note that **classes are callable** (calling a class returns a new instance); class instances are callable if they have a __call__() method. + +===== chr(i) ===== +Return **a string of one character** whose ASCII code is the integer i. For example, chr(97) returns the string 'a'. This is the inverse of **ord()**. The argument must be in the range [0..255], inclusive; ValueError will be raised if i is outside that range. See also unichr(). + +===== classmethod(function) ===== +Return a class method for function. + +A class method receives the class as __implicit first argument__, just like an instance method receives the instance. To declare a class method, use this idiom: + +class C: + **@classmethod //不可少,否则调用时解释器不会自动将class传给f函数。** + def f(cls, arg1, arg2, ...): ... +The @classmethod form is a function **decorator** – see the description of function definitions in Function definitions for details. + +It can be called either on the **class (such as C.f()) or on an instance (such as C().f())**. The instance is **ignored** except for its class. If a class method is called for a derived class, the derived **class object** is passed as the implied first argument. +__classmethod修饰器,用来修饰一个class method。可以通过class或实例来调用该class method,解释器会将class或instance所属的class默认传给class method。__ + +Class methods are different than C++ or Java static methods. If you want those, see **staticmethod()** in this section. + +For more information on class methods, consult the documentation on the standard type hierarchy in The standard type hierarchy. + +New in version 2.2. + +Changed in version 2.4: Function decorator syntax added. + +===== cmp(x, y) ===== +Compare the two objects x and y and __return an integer__ according to the outcome. The return value is negative if x < y, zero if x == y and strictly positive if x > y. + +===== compile(source, filename, mode[, flags[, dont_inherit]]) ===== +Compile the source into **a code object or AST object**. Code objects can be executed by an __exec__ statement or evaluated by a call to __eval().__ source can either be a string or an AST object. Refer to the ast module documentation for information on how to work with AST objects. + +The filename argument should give the file from which the code was read; pass some recognizable value if it wasn’t read from a file ('' is commonly used). + +The mode argument specifies **what kind of code** must be compiled; it can be __'exec'__ if source consists of a sequence of statements, __'eval'__ if it consists of a single expression, or __'single'__ if it consists of a single interactive statement (in the latter case, expression statements that evaluate to something other than None will be printed). + +The optional arguments flags and dont_inherit control which future statements (see PEP 236) affect the compilation of source. If neither is present (or both are zero) the code is compiled with those future statements that are in effect in the code that is calling compile. If the flags argument is given and dont_inherit is not (or is zero) then the future statements specified by the flags argument are used in addition to those that would be used anyway. If dont_inherit is a non-zero integer then the flags argument is it – the future statements in effect around the call to compile are ignored. + +Future statements are specified by bits which can be bitwise ORed together to specify multiple statements. The bitfield required to specify a given feature can be found as the compiler_flag attribute on the _Feature instance in the __future__ module. + +This function raises SyntaxError if the compiled source is invalid, and TypeError if the source contains null bytes. + +Note When compiling **a string with multi-line code** in 'single' or 'eval' mode, input must be terminated by at least one newline character. This is to facilitate detection of incomplete and complete statements in the code module. +Changed in version 2.3: The flags and dont_inherit arguments were added. + +Changed in version 2.6: Support for compiling AST objects. + +Changed in version 2.7: Allowed use of Windows and Mac newlines. Also input in 'exec' mode does not have to end in a newline anymore. + +===== complex([real[, imag]]) ===== +Create a complex number with the value real + imag*j or convert a string or number to a complex number. If the first parameter is a string, it will be interpreted as a complex number and the function must be called without a second parameter. The second parameter can never be a string. Each argument may be any numeric type (including complex). If imag is omitted, it defaults to zero and the function serves as a numeric conversion function like int(), long() and float(). If both arguments are omitted, returns 0j. + +Note When converting from a string, the string must not contain whitespace around the central + or - operator. For example, complex('1+2j') is fine, but complex('1 + 2j') raises ValueError. +The complex type is described in Numeric Types — int, float, long, complex. + +===== delattr(object, name) ===== +This is a relative of setattr(). The arguments are an object and a string. The string must be the name of one of the object’s attributes. The function deletes the named attribute, provided the object allows it. For example, **delattr(x, 'foobar')** is equivalent to **del x.foobar**. + +===== dict(**kwarg) ===== + +===== dict(mapping, **kwarg) ===== + +===== dict(iterable, **kwarg) ===== +Create a new dictionary. The dict object is the dictionary class. See dict and Mapping Types — dict for documentation about this class. + +For other containers see the built-in list, set, and tuple classes, as well as the collections module. + +===== dir([object]) ===== +Without arguments, return the list of names in the current local scope. With an argument, attempt to return a list of valid attributes for that object. + +If the object has a method named __dir__(), this method will be called and must return the list of attributes. This allows objects that implement a custom __getattr__() or __getattribute__() function to customize the way dir() reports their attributes. + +If the object does not provide __dir__(), the function tries its best to gather information from the object’s __dict__ attribute, if defined, and from its type object. The resulting list is not necessarily complete, and may be inaccurate when the object has a custom __getattr__(). + +The default dir() mechanism behaves differently with different types of objects, as it attempts to produce the most relevant, rather than complete, information: + +* If the object is a module object, the list contains the names of the module’s attributes. +* If the object is a type or class object, the list contains the names of its attributes, and recursively of the attributes of its bases. +* Otherwise, the list contains the object’s attributes’ names, the names of its class’s attributes, and recursively of the attributes of its class’s base classes. +The resulting list is sorted alphabetically. For example: + +>>> +>>> import struct +>>> dir() # show the names in the module namespace +['__builtins__', '__doc__', '__name__', 'struct'] +>>> dir(struct) # show the names in the struct module +['Struct', '__builtins__', '__doc__', '__file__', '__name__', + '__package__', '_clearcache', 'calcsize', 'error', 'pack', 'pack_into', + 'unpack', 'unpack_from'] +>>> class Shape(object): + def __dir__(self): + return ['area', 'perimeter', 'location'] +>>> s = Shape() +>>> dir(s) +['area', 'perimeter', 'location'] +Note Because dir() is supplied primarily as a convenience for use at an interactive prompt, it tries to supply an interesting set of names more than it tries to supply a rigorously or consistently defined set of names, and its detailed behavior may change across releases. For example, metaclass attributes are not in the result list when the argument is a class. + +===== divmod(a, b) ===== +Take two (non complex) numbers as arguments and return a pair of numbers consisting of their quotient and remainder when using long division. With mixed operand types, the rules for binary arithmetic operators apply. For plain and long integers, the result is the same as (a // b, a % b). For floating point numbers the result is (q, a % b), where q is usually math.floor(a / b) but may be 1 less than that. In any case q * b + a % b is very close to a, if a % b is non-zero it has the same sign as b, and 0 <= abs(a % b) < abs(b). + +Changed in version 2.3: Using divmod() with complex numbers is deprecated. + +===== enumerate(sequence, start=0) ===== +Return an enumerate object. sequence must be a sequence, an iterator, or some other object which supports iteration. The next() method of the iterator returned by enumerate() returns a tuple containing a count (from start which defaults to 0) and the values obtained from iterating over sequence: + +>>> +>>> seasons = ['Spring', 'Summer', 'Fall', 'Winter'] +>>> list(enumerate(seasons)) +[(0, 'Spring'), (1, 'Summer'), (2, 'Fall'), (3, 'Winter')] +>>> list(enumerate(seasons, start=1)) +[(1, 'Spring'), (2, 'Summer'), (3, 'Fall'), (4, 'Winter')] +Equivalent to: + +def enumerate(sequence, start=0): + n = start + for elem in sequence: + **yield n, elem** + n += 1 +New in version 2.3. + +Changed in version 2.6: The start parameter was added. + +===== eval(expression[, globals[, locals]]) ===== +The arguments are a string and optional globals and locals. If provided, globals must be a dictionary. If provided, locals can be any mapping object. + +Changed in version 2.4: formerly locals was required to be a dictionary. + +The expression argument is parsed and evaluated as a Python expression (technically speaking, a condition list) using the globals and locals dictionaries as global and local namespace. If the globals dictionary is present and lacks ‘__builtins__’, the current globals are copied into globals before expression is parsed. This means that expression normally has full access to the standard __builtin__ module and restricted environments are propagated. If the locals dictionary is omitted it defaults to the globals dictionary. If both dictionaries are omitted, the expression is executed in the environment where eval() is called. The return value is the result of the evaluated expression. Syntax errors are reported as exceptions. Example: + +>>> +>>> x = 1 +>>> print eval('x+1') +2 +This function can also be used to execute arbitrary code objects (such as those created by compile()). In this case pass a code object instead of a string. If the code object has been compiled with 'exec' as the mode argument, eval()‘s return value will be None. + +Hints: dynamic execution of statements is supported by the exec statement. Execution of statements from a file is supported by the execfile() function. The globals() and locals() functions returns the current global and local dictionary, respectively, which may be useful to pass around for use by eval() or execfile(). + +See ast.literal_eval() for a function that can safely evaluate strings with expressions containing only literals. + +===== execfile(filename[, globals[, locals]]) ===== +This function is similar to __the exec statement__, but parses a file instead of a string. It is different from the import statement in that it does not use the module administration — it reads the file unconditionally and does not create a new module. [1] + +The arguments are a file name and two optional dictionaries. The file is parsed and evaluated as a sequence of Python statements (similarly to a module) using the globals and locals dictionaries as global and local namespace. If provided, locals can be any mapping object. Remember that **at module level, globals and locals are the same dictionary**. If two separate objects are passed as globals and locals, the code will be executed as if it were embedded in a class definition. + +Changed in version 2.4: formerly locals was required to be a dictionary. + +__If the locals dictionary is omitted it defaults to the globals dictionary. If both dictionaries are omitted, the expression is executed in the environment where execfile() is called.__ The return value is None. + +Note The default locals act as described for function locals() below: modifications to the default locals dictionary should not be attempted. Pass an explicit locals dictionary if you need to see effects of the code on locals after function execfile() returns. execfile() cannot be used reliably to modify a function’s locals. + +===== file(name[, mode[, buffering]]) ===== +file其实时class file的构造函数。 +Constructor function for the **file type**, described further in section File Objects. The constructor’s arguments are the same as those of the open() built-in function described below. + +When opening a file, it’s preferable to use open() instead of invoking this constructor directly. file is more suited to type testing (for example, writing isinstance(f, file)). + +New in version 2.2. + +===== filter(function, iterable) ===== +Construct a list from those elements of iterable __for which function returns true__. iterable may be either a sequence, a container which supports iteration, or an iterator. + +**If iterable is a string or a tuple, the result also has that type; otherwise it is always a list.** +If function is None, the identity function is assumed, that is, all elements of iterable that are false are removed. + +**如果可迭代对象是字符串或tuple,则结果也为字符串或tuple,其它的都为list。** +**如果function为None,则结果为iterable中去掉判断为False的元素。** + +Note that **filter(function, iterable) is equivalent to [item for item in iterable if function(item)]** if function is not None and [item for item in iterable if item] if function is None. + +See **itertools.ifilter() and itertools.ifilterfalse() f**or iterator versions of this function, including a variation that filters for elements where the function returns false. + +===== float([x]) ===== +Convert a string or a number to floating point. + +If the argument is a string, it must contain a possibly signed decimal or floating point number, __possibly embedded in whitespace(空格, \t, \r, \n, \v, \f)__. + +__如果x为string,则x只能为十进制整型和浮点型字符串(则是因为float函数不像int函数那样含有可选的base参数,__ +__可以猜测或指定数字的进制类型),同时可以包含空字符。__The argument may also be **[+|-]nan or [+|-]inf**. + +Otherwise, the argument may be **a plain** or long integer or a floating point number, and a floating point number with the same value (within Python’s floating point precision) is returned. If no argument is given, returns 0.0. + +Note When passing in a string, values for **NaN** and **Infinity** may be returned, depending on the underlying C library. Float accepts the strings **nan, inf and -inf** for **NaN** and **positive or negative infinity**. The case and a leading + are ignored as well as a leading - is ignored for NaN. Float always represents NaN and infinity as nan, inf or -inf. +The float type is described in Numeric Types — int, float, long, complex. + >>> float(__0xff__) + 255.0 + >>> float(__'0xff'__) + Traceback (most recent call last): + File "", line 1, in + ValueError: invalid literal for float(): 0xff + >>> + >>> float('__+inf'__) + inf + >>> float('+nan') + nan + >>> + >>> float(__'123'__) + 123.0 + >>> float(__'123\n\r\v'__) + 123.0 + >>> + >>> float(__+nan__) + Traceback (most recent call last): + File "", line 1, in + NameError: name **'nan' is not defined** + >>> + + +===== format(value[, format_spec]) ===== +Convert a value to a “formatted” representation, as controlled by **format_spec**. The interpretation of format_spec will depend on the type of the value argument, however there is a standard formatting syntax that is used by most built-in types: Format Specification Mini-Language. + +Note format(value, format_spec) merely calls **value.__format__(format_spec)**. +New in version 2.6. + +===== frozenset([iterable]) ===== +Return a new frozenset object, optionally with elements taken from iterable. frozenset is a built-in class. See frozenset and Set Types — set, frozenset for documentation about this class. + +For other containers see the built-in set, list, tuple, and dict classes, as well as the __collections__ module. + +New in version 2.4. + +===== getattr(object, name[, default]) ===== +Return the value of the named attribute of object. **name must be a string**. If the string is the name of one of the object’s attributes, the result is the value of that attribute. For example, getattr(x, 'foobar') is equivalent to x.foobar. If the named attribute does not exist, default is returned if provided, otherwise AttributeError is raised. + +===== globals() ===== +Return a dictionary representing __the current global symbol table__. This is always the dictionary of the current module (inside a function or method, this is the module where it is **defined**, not the module from which it is called). + +===== hasattr(object, name) ===== +The arguments are an object and a string. The result is True if the string is the name of one of the object’s attributes, False if not. (This is implemented by calling getattr(object, name) and seeing whether it raises an exception or not.) + +===== hash(object) ===== +Return the hash value of the object (if it has one). **Hash values are integers**. They are used to quickly compare dictionary keys during a dictionary lookup. Numeric values that compare equal have the same hash value (even if they are of different types, as is the case for 1 and 1.0). + +===== help([object]) ===== +Invoke the built-in help system. (This function is intended for interactive use.) If no argument is given, the interactive help system starts on the interpreter console. If the argument is a string, then the string is looked up as the name of a module, function, class, method, keyword, or documentation topic, and a help page is printed on the console. If the argument is any other kind of object, a help page on the object is generated. + +This function is added to the built-in namespace by the site module. + +New in version 2.2. + +===== hex(x) ===== +Convert an integer number (of any size) to a hexadecimal string. The result is a valid Python expression. + +Note To obtain a hexadecimal string representation for a float, use the __float.hex()__ method. +Changed in version 2.4: Formerly only returned an unsigned literal. + +===== id(object) ===== +Return the “identity” of an object. This is an integer (or long integer) which is guaranteed to be unique and constant for this object during its lifetime. Two objects with non-overlapping lifetimes may have the same id() value. + +CPython implementation detail: This is **the address of the object in memory**. + +===== input([prompt]) ===== +Equivalent to **eval(raw_input(prompt))**. + +This function does not catch user errors. If the input is not syntactically valid, a SyntaxError will be raised. Other exceptions may be raised if there is an error during evaluation. + +If the __readline__ module was loaded, then input() will use it to provide elaborate line editing and history features. + +Consider using the **raw_input()** function for general input from users. + +===== int(x=0) ===== + +===== int(x, base=10) ===== +Convert a number or string x to an integer, or return 0 if no arguments are given. If x is a number, it can be a plain integer, a long integer, or a floating point number. If x is floating point, the conversion __truncates towards zero__. If the argument is outside the integer range, the function returns a long object instead. + +__如果x为number,则不需要为其指定base参数,因为int函数会根据x的前缀如0b, 0x等正确判断。如果x为float number则其必须为十进制。__ + +If x is not a number or if base is given, then x must be a string or Unicode object __representing an integer literal in radix base__. + +__如果x为string,则其必须为整型数字字符串,不能为各种进制的小数字符串。__ + + Optionally, the literal can be preceded by + or - (with no space in between) and __surrounded by whitespace__. A base-n literal consists of the digits 0 to n-1, with **a to z (or A to Z)** having values __10 to 35__. The default base is 10. The allowed values are 0 and 2-36. + +Base-2, -8, and -16 literals can be optionally **prefixed with 0b/0B, 0o/0O/0, or 0x/0X**, as with integer literals in code. __Base 0__ means to interpret the string exactly as an integer literal, so that the actual base is 2, 8, 10, or 16. + +The integer type is described in Numeric Types — int, float, long, complex. + >>> int("0xfff") **//默认为Base 10, 所以解码错误。** + Traceback (most recent call last): + File "", line 1, in + ValueError: invalid literal for int() with __base 10__: '0xfff' + >>> int("0xfff", 16) + 4095 + >>> int(0xfff.ff) __//如果x为float number,则其必须为十进制。__ + Traceback (most recent call last): + File "", line 1, in + AttributeError: 'int' object has no attribute 'ff' + >>> int(111.22) + 111 + >>> int(0b111.11) + File "", line 1 + int(0b111.11) + ^ + SyntaxError: invalid syntax + >>> + >>> int("**0xfff.ff"**, 16) + Traceback (most recent call last): + File "", line 1, in + ValueError: invalid literal for int() with base 16: '0xfff.ff' + >>> + + >>> int('22.33') __//如果x为string,则必须为整型数字字符串。__ + Traceback (most recent call last): + File "", line 1, in + ValueError: invalid literal for int() with base 10: '22.33' + +===== isinstance(object, classinfo) ===== +Return true if the object argument is an instance of the classinfo argument, or of a (direct, indirect or virtual) subclass thereof. Also return true if classinfo is a type object (new-style class) and object is an object of that type or of a (direct, indirect or virtual) subclass thereof. If object is not a class instance or an object of the given type, the function always returns false. If classinfo is neither a class object nor a type object, it may be a tuple of class or type objects, or may recursively contain other such tuples (other sequence types are not accepted). If classinfo is not a class, type, or tuple of classes, types, and such tuples, a TypeError exception is raised. + +Changed in version 2.2: Support for a tuple of type information was added. + +===== issubclass(class, classinfo) ===== +Return true if class is a subclass (direct, indirect or virtual) of classinfo. A class is considered a subclass of itself. classinfo may be a tuple of class objects, in which case every entry in classinfo will be checked. In any other case, a TypeError exception is raised. + +Changed in version 2.3: Support for a tuple of type information was added. + +===== iter(o[, sentinel]) ===== +Return an iterator object. The first argument is interpreted very differently depending on the presence of the second argument. Without a second argument, o must be __a collection object__ which supports th**e iteration protocol** (the __iter__() method), or it must support **the sequence protocol** (the __getitem__() method with integer arguments starting at 0). If it does not support either of those protocols, TypeError is raised. If the second argument, sentinel, is given, then o must be __a callable object__. The iterator created in this case will **call o with no arguments for each call** to its next() method; if the value returned is __equal to sentinel__, StopIteration will be raised, otherwise the value will be returned. + +如果没有使用第二个参数,则o必须为容器对象。否则,o必须为可调用对象。 + +One useful application of the second form of iter() is to read lines of a file until a certain line is reached. The following example reads a file until the readline() method returns an empty string: + +with open('mydata.txt') as fp: + for line in iter(fp.readline, ''): + process_line(line) +New in version 2.2. + +===== len(s) ===== +Return the length (the number of items) of an object. The argument may be a sequence (string, tuple or list) or a mapping (dictionary). + +===== list([iterable]) ===== +Return a list whose items are the same and in the same order as iterable‘s items. iterable may be either a sequence, a container that supports iteration, or an iterator object. If iterable is already a list, **a copy is made** and returned, similar to iterable[:]. For instance, list('abc') returns ['a', 'b', 'c'] and list( (1, 2, 3) ) returns [1, 2, 3]. If no argument is given, returns a new empty list, []. + +list is a mutable sequence type, as documented in Sequence Types — str, unicode, list, tuple, bytearray, buffer, xrange. For other containers see the built in dict, set, and tuple classes, and the collections module. + +===== locals() ===== +Update and return a dictionary representing the **current local symbol table**. Free variables are returned by locals() when it is called in function blocks, but not in class blocks. + +Note The contents of this dictionary should not be modified; changes may not affect the values of local and free variables used by the interpreter. + +===== long(x=0) ===== + +===== long(x, base=10) ===== +Convert a string or number to a long integer. If the argument is a string, it must contain a possibly signed number of arbitrary size, possibly embedded in whitespace. The base argument is interpreted in the same way as for int(), and may only be given when x is a string. Otherwise, the argument may be a plain or long integer or a floating point number, and a long integer with the same value is returned. Conversion of floating point numbers to integers truncates (towards zero). If no arguments are given, returns 0L. + +The long type is described in Numeric Types — int, float, long, complex. + +===== map(function, iterable, ...) ===== +Apply function to **every item of iterable** and return a list of the results. If additional iterable arguments are passed, function must take that many arguments and is applied to the items from all iterables in parallel. If one iterable is shorter than another it is assumed to be __extended with None items__. If function is None, the identity function is assumed; if there are multiple arguments, map() returns a list consisting of tuples containing the corresponding items from all iterables (a kind of transpose operation). The iterable arguments may be a sequence or any iterable object; the result is __always a list__. + +===== max(iterable[, key]) ===== + +===== max(arg1, arg2, *args[, key]) ===== +Return the largest item in an iterable or the largest of two or more arguments. + +If one positional argument is provided, iterable must be a non-empty iterable (such as a non-empty string, tuple or list). The largest item in the iterable is returned. If two or more positional arguments are provided, the largest of the positional arguments is returned. + +The optional key argument specifies **a one-argument ordering function** like that used for list.sort(). The key argument, if supplied, must be __in keyword form__ (for example, max(a,b,c,key=func)). + +Changed in version 2.5: Added support for the optional key argument. + +===== memoryview(obj) ===== +Return a “memory view” object created from the given argument. See memoryview type for more information. + +===== min(iterable[, key]) ===== + +===== min(arg1, arg2, *args[, key]) ===== +Return the smallest item in an iterable or the smallest of two or more arguments. + +If one positional argument is provided, iterable must be a non-empty iterable (such as a non-empty string, tuple or list). The smallest item in the iterable is returned. If two or more positional arguments are provided, the smallest of the positional arguments is returned. + +The optional key argument specifies a one-argument ordering function like that used for list.sort(). The key argument, if supplied, __must be in keyword form (__for example, min(a,b,c,key=func)). + +Changed in version 2.5: Added support for the optional key argument. + +===== next(iterator[, default]) ===== +Retrieve the next item from the iterator by calling its next() method. If default is given, it is returned if the iterator is exhausted, otherwise StopIteration is raised. + +New in version 2.6. + +===== object() ===== +Return a new featureless object. object is a base for all new style classes. It has the methods that are common to all instances of new style classes. + +New in version 2.2. + +Changed in version 2.3: This function does not accept any arguments. Formerly, it accepted arguments but ignored them. + +===== oct(x) ===== +Convert an integer number (of any size) to an octal string. The result is a valid Python expression. + +Changed in version 2.4: Formerly only returned an unsigned literal. + +===== open(name[, mode[, buffering]]) ===== +Open a file, returning an object of the **file type** described in section File Objects. If the file cannot be opened, __IOError__ is raised. When opening a file, it’s preferable to use open() instead of invoking the file constructor directly. + +The first two arguments are the same as for stdio‘s fopen(): name is the file name to be opened, and mode is a string indicating how the file is to be opened. + +The most commonly-used values of mode are __'r'__ for reading, __'w'__ for writing (**truncating the file** if it already exists), and __'a'__ for appending (which on some Unix systems means that all writes append to the end of the file regardless of the current seek position). If mode is omitted, __it defaults to 'r'__. The default is to use __text mode__, which may convert '\n' characters to a platform-specific representation on writing and back on reading. Thus, when opening a binary file, you should append __'b'__ to the mode value to open the file in binary mode, which will improve portability. (Appending 'b' is useful even on systems that don’t treat binary and text files differently, where it serves as documentation.) See below for more possible values of mode. + +The optional buffering argument specifies the file’s desired buffer size: __0 means unbuffered, 1 means line buffered, any other positive value means use a buffer of (approximately) that size.__ A negative buffering means to use **the system default**, which is usually line buffered for tty devices and fully buffered for other files. If omitted, the system default is used. [2] + +Modes **'r+', 'w+' and 'a+'** open the file for updating (note that __'w+' truncates the file__). Append 'b' to the mode to open the file in binary mode, on systems that differentiate between binary and text files; on systems that don’t have this distinction, adding the 'b' has no effect. + +In addition to the standard **fopen()** values mode may be 'U' or 'rU'. Python is usually built with universal newlines support; supplying 'U' opens the file as a text file, but lines may be terminated by any of the following: the Unix end-of-line convention '\n', the Macintosh convention '\r', or the Windows convention '\r\n'. All of these external representations are seen as '\n' by the Python program. If Python is built without universal newlines support a mode with 'U' is the same as normal text mode. Note that file objects so opened also have an attribute called newlines which has a value of None (if no newlines have yet been seen), '\n', '\r', '\r\n', or a tuple containing all the newline types seen. + +Python enforces that the mode, after stripping 'U', begins with 'r', 'w' or 'a'. + +Python provides many file handling modules including __fileinput, os, os.path, tempfile, and shutil__. + +Changed in version 2.5: Restriction on first letter of mode string introduced. + +===== ord(c) ===== +Given a string of length one, return an integer representing the Unicode code point of the character when the argument is a unicode object, or the value of the byte when the argument is an 8-bit string. For example, ord('a') returns the integer 97, ord(u'\u2020') returns 8224. This is the inverse of chr() for 8-bit strings and of unichr() for unicode objects. If a unicode argument is given and Python was built with UCS2 Unicode, then the character’s code point must be in the range [0..65535] inclusive; otherwise the string length is two, and a TypeError will be raised. + +===== pow(x, y[, z]) ===== +Return x to the power y; if z is present, return **x to the power y, modulo z** (computed more efficiently than pow(x, y) % z). The two-argument form pow(x, y) is equivalent to using the power operator: **x**y**. + +The arguments must have numeric types. With mixed operand types, the coercion rules for binary arithmetic operators apply. For int and long int operands, the result has the same type as the operands (after coercion) unless the second argument is negative; in that case, all arguments are converted to float and a float result is delivered. For example, 10**2 returns 100, but 10**-2 returns 0.01. (This last feature was added in Python 2.2. In Python 2.1 and before, if both arguments were of integer types and the second argument was negative, an exception was raised.) If the second argument is negative, the third argument must be omitted. If z is present, x and y must be of integer types, and y must be non-negative. (This restriction was added in Python 2.2. In Python 2.1 and before, floating 3-argument pow() returned platform-dependent results depending on floating-point rounding accidents.) + +===== print(*objects, sep=' ', end='\n', file=sys.stdout) ===== + +Print objects to the stream file, separated by sep and followed by end. sep, end and file, if present, must be given as keyword arguments. + +All non-keyword arguments are converted to strings like **str()** does and written to the stream, separated by sep and followed by end. Both sep and end must be strings; they can also be None, which means to use the default values. If no objects are given, print() will just write end. + +The file argument must be an object with a write(string) method; if it is not present or None, sys.stdout will be used. Output buffering is determined by file. Use **file.flush()** to ensure, for instance, immediate appearance on a screen. + +Note This function is **not normally available as a built-in** since the name print is recognized as the **print statement**. +该函数一般不能直接使用,因为pythn2中的print是一个statement。 +To disable the statement and use the print() function, use this future statement at the top of your module: +__from __future__ import print_function__ +New in version 2.6. + +===== property([fget[, fset[, fdel[, doc]]]]) ===== +Return a property attribute for new-style classes (classes that derive from object). + +fget is a function for getting an attribute value, likewise fset is a function for setting, and fdel a function for del’ing, an attribute. Typical use is to define a managed attribute x: + +class C(object): + def __init__(self): + self._x = None + + def getx(self): + return self._x + def setx(self, value): + self._x = value + def delx(self): + del self._x + __x = property(getx, setx, delx, "I'm the 'x' property.")__ +If then c is an instance of C, **c.x will invoke the getter, c.x = value will invoke the setter and del c.x the deleter**. + +If given, doc will be the docstring of the property attribute(例如上面的最后一行字符串). Otherwise, the property will copy fget‘s docstring (if it exists). This makes it possible to create **read-only properties** easily using property() as a decorator: + +class Parrot(object): + def __init__(self): + self._voltage = 100000 + + __@property //生成一个read-only properity。__ + def voltage(self): + """Get the current voltage.""" + return self._voltage +turns the voltage() method into a “getter” for a read-only attribute with the same name. + +A property object has __getter, setter, and deleter__ methods usable as decorators that create a copy of the property with the corresponding accessor function set to the decorated function. This is best explained with an example: + +class C(object): + def __init__(self): + self._x = None + + @property //property装饰器使修饰的函数x变为property object,默认生成一个read-only properity。 + def x(self): + """I'm the 'x' property.""" + return self._x + + __@x__.setter //调用property object的setter方法。 + __def x__(self, value): + self._x = value + + @x.deleter + def x(self): //注意没有参数,x就暗示了属性的类型。 + del self._x +This code is exactly equivalent to the first example. Be sure to give the additional functions **the same name as the original** property (x in this case.) + +The returned property also has the attributes fget, fset, and fdel corresponding to the constructor arguments. + +New in version 2.2. + +Changed in version 2.5: Use fget‘s docstring if no doc given. + +Changed in version 2.6: The getter, setter, and deleter attributes were added. + +===== range(stop) ===== + +===== range(start, stop[, step]) ===== +This is a versatile function to create lists containing arithmetic progressions. It is most often used in __for loops__. The arguments must be plain integers. If the step argument is omitted, it defaults to 1. If the start argument is omitted, it defaults to 0. The full form returns a list of plain integers [start, start + step, start + 2 * step, ...]. If step is positive, the last element is the largest start + i * step less than stop; if step is negative, the last element is the smallest start + i * step greater than stop. step must not be zero (or else ValueError is raised). Example: + +>>> +>>> range(10) +[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +>>> range(1, 11) +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +>>> range(0, 30, 5) +[0, 5, 10, 15, 20, 25] +>>> range(0, 10, 3) +[0, 3, 6, 9] +>>> range(0, -10, -1) +[0, -1, -2, -3, -4, -5, -6, -7, -8, -9] +>>> range(0) +[] +>>> range(1, 0) +[] + +===== raw_input([prompt]) ===== +If the prompt argument is present, it is written to standard output __without a trailing newline__. The function then reads a line from input, __converts it to a string (stripping a trailing newline)__, and returns that. When EOF is read, EOFError is raised. Example: + +>>> +>>> s = raw_input('--> ') +--> Monty Python's Flying Circus +>>> s +"Monty Python's Flying Circus" +If the readline module was loaded, then raw_input() will use it to provide elaborate line editing and history features. + +===== reduce(function, iterable[, initializer]) ===== +Apply function of __two arguments__ cumulatively to the items of iterable, from left to right, so as to reduce the iterable to a single value. For example, reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) calculates ((((1+2)+3)+4)+5). **The left argument, x, is the **__accumulated value__** and the right argument, y, is the update value from the iterable**. If the optional initializer is present, it is placed before the items of the iterable in the calculation, and serves as a default when the iterable is empty. If initializer is not given and iterable contains only one item, the first item is returned. Roughly equivalent to: + +def reduce(function, iterable, initializer=None): + it = iter(iterable) + if initializer is None: + try: + initializer = next(it) + except StopIteration: + raise TypeError('reduce() of empty sequence with no initial value') + **accum_value** = initializer + for x in it: + accum_value = function(accum_value, x) + return accum_value + +===== reload(module) ===== +Reload a previously imported module. The argument must be **a module object**, so it must have been successfully imported before. This is useful if you have **edited the module source file** using an external editor and want to try out the new version without leaving the Python interpreter. The return value is the module object (the same as the module argument). + +When reload(module) is executed: + +Python modules’ code is recompiled and the module-level code reexecuted, defining a new set of objects which are bound to names in the module’s dictionary. The init function of extension modules is not called a second time. +As with all other objects in Python the old objects are only reclaimed after their reference counts drop to zero. +The names in the module namespace are updated to point to any new or changed objects. +Other references to the old objects (such as names external to the module) are not rebound to refer to the new objects and must be updated in each namespace where they occur if that is desired. + +There are a number of other caveats: + +If a module is syntactically correct but its initialization fails, the first import statement for it does not bind its name locally, but does store a (partially initialized) module object in sys.modules. To reload the module you must first import it again (this will bind the name to the partially initialized module object) before you can reload() it. + +When a module is reloaded, its dictionary (containing the module’s global variables) is retained. Redefinitions of names will override the old definitions, so this is generally not a problem. If the new version of a module does not define a name that was defined by the old version, the old definition remains. This feature can be used to the module’s advantage if it maintains a global table or cache of objects — with a try statement it can test for the table’s presence and skip its initialization if desired: + +try: + cache +except NameError: + cache = {} +It is legal though generally not very useful to reload built-in or dynamically loaded modules, except for sys, __main__ and __builtin__. In many cases, however, extension modules are not designed to be initialized more than once, and may fail in arbitrary ways when reloaded. + +If a module imports objects from another module using from ... import ..., calling reload() for the other module does not redefine the objects imported from it — one way around this is to re-execute the from statement, another is to use import and qualified names (module.*name*) instead. + +If a module instantiates instances of a class, reloading the module that defines the class does not affect the method definitions of the instances — they continue to use the old class definition. The same is true for derived classes. + +===== repr(object) ===== +Return a string containing **a printable representation of an object**. This is the same value yielded by conversions (reverse quotes). It is sometimes useful to be able to access this operation as an ordinary function. For many types, this function makes an attempt to return a string that would yield an object with the same value when passed to eval(), otherwise the representation is a string enclosed in angle brackets that contains the name of the type of the object together with additional information often including the name and address of the object. A class can control what this function returns for its instances by defining a __repr__() method. + +===== reversed(seq) ===== +__Return a reverse iterator.__ seq must be an object which has a __reversed__() method or supports **the sequence protocol** (the __len__() method and the __getitem__() method with integer arguments starting at 0). + +New in version 2.4. + +Changed in version 2.6: Added the possibility to write a custom __reversed__() method. + +===== round(number[, ndigits]) ===== +Return the floating point value number rounded to **ndigits digits after the decimal point.** If ndigits is omitted, it defaults to zero. The result is a floating point number. Values are rounded to the closest multiple of 10 to the power minus ndigits; if two multiples are equally close, rounding is done away from 0 (so. for example, round(0.5) is 1.0 and round(-0.5) is -1.0). + +Note The behavior of round() for floats can be surprising: for example, round(2.675, 2) gives 2.67 instead of the expected 2.68. This is not a bug: it’s a result of the fact that most decimal fractions can’t be represented exactly as a float. See Floating Point Arithmetic: Issues and Limitations for more information. + +===== set([iterable]) ===== +Return a new set object, optionally with elements taken from iterable. set is a built-in class. See set and Set Types — set, frozenset for documentation about this class. + +For other containers see the built-in frozenset, list, tuple, and dict classes, as well as the collections module. + +New in version 2.4. + +===== setattr(object, name, value) ===== +This is the counterpart of getattr(). The arguments are an object, a string and an arbitrary value. The string may name an existing attribute or a new attribute. The function assigns the value to the attribute, provided the object allows it. For example, setattr(x, 'foobar', 123) is equivalent to x.foobar = 123. + +===== slice(stop) ===== + +===== slice(start, stop[, step]) ===== +Return a slice object representing the set of indices specified by **range(start, stop, step)**. The start and step arguments default to None. Slice objects have read-only data attributes start, stop and step which merely return the argument values (or their default). They have no other explicit functionality; however they are used by Numerical Python and other third party extensions. Slice objects are also generated when extended indexing syntax is used. For example: a[start:stop:step] or a[start:stop, i]. See itertools.islice() for an alternate version that returns an iterator. + +===== sorted(iterable[, cmp[, key[, reverse]]]) ===== +Return __a new sorted list__ from the items in iterable. + +The optional arguments **cmp, key, and reverse** have the same meaning as those for the list.sort() method (described in section Mutable Sequence Types). + +cmp specifies __a custom comparison function of two arguments__ (iterable elements) which should __return a negative, zero or positive number__ depending on whether the first argument is considered smaller than, equal to, or larger than the second argument: cmp=lambda x,y: cmp(x.lower(), y.lower()). The default value is None. + +key specifies a function of one argument that is used to **extract a comparison key from each list element**: key=str.lower. The default value is None (compare the elements directly). + +reverse is a boolean value. If set to True, then the list elements are sorted as if each comparison were reversed. + +In general, the key and reverse conversion processes are much faster than specifying an equivalent cmp function. This is because cmp is called multiple times for each list element while key and reverse touch each element only once. Use **functools.cmp_to_key()** to convert an old-style cmp function to a key function. + +For sorting examples and a brief sorting tutorial, see Sorting HowTo. + +New in version 2.4. + +===== staticmethod(function) ===== +Return a static method for function. + +A static method does not receive an implicit first argument. To declare a static method, use this idiom: + +class C: + @staticmethod + def f(arg1, arg2, ...): ... +The @staticmethod form is a function decorator – see the description of function definitions in Function definitions for details. + +It can be called either on the class (such as C.f()) or on an instance (such as C().f()). The instance is ignored except for its class. + +Static methods in Python are similar to those found in Java or C++. Also see classmethod() for a variant that is useful for creating alternate class constructors. + +For more information on static methods, consult the documentation on the standard type hierarchy in The standard type hierarchy. + +New in version 2.2. + +Changed in version 2.4: Function decorator syntax added. + +===== str(object='') ===== +Return a string containing a nicely printable representation of an object. For strings, this returns the string itself. The difference with repr(object) is that str(object) does not always attempt to return a string that is acceptable to eval(); its goal is to return a printable string. If no argument is given, returns the empty string, ''. + +For more information on strings see Sequence Types — str, unicode, list, tuple, bytearray, buffer, xrange which describes sequence functionality (strings are sequences), and also the string-specific methods described in the String Methods section. To output formatted strings use template strings or the % operator described in the String Formatting Operations section. In addition see the String Services section. See also unicode(). + +===== sum(iterable[, start]) ===== +Sums start and the items of an iterable from left to right and returns the total. start defaults to 0. The iterable‘s items are normally numbers, and the start value is not allowed to be a string. + +For some use cases, there are good alternatives to sum(). The preferred, fast way to concatenate a sequence of strings is by calling __''.join(sequence)__. To add floating point values with extended precision, see math.fsum(). To concatenate a series of iterables, consider using itertools.chain(). + +New in version 2.3. + +===== super(type[, object-or-type]) ===== +Return a proxy object that delegates method calls to a parent or sibling class of type. This is useful for accessing inherited methods that have been overridden in a class. The search order is same as that used by getattr() except that the type itself is skipped. + +The ____mro____ attribute of the type lists **the method resolution search order** used by both getattr() and super(). The attribute is dynamic and can change whenever the inheritance hierarchy is updated. + +If the second argument is omitted, the super object returned is unbound. If the second argument is an object, isinstance(obj, type) must be true. If the second argument is a type, issubclass(type2, type) must be true (this is useful for classmethods). + +Note super() only works for new-style classes. +There are two typical use cases for super. In a class hierarchy with single inheritance, **super can be used to refer to parent classes **__without naming them explicitly__, thus making the code more maintainable. This use closely parallels the use of super in other programming languages. + +The second use case is to support __cooperative multiple inheritance in a dynamic execution environment__. This use case is unique to Python and is not found in statically compiled languages or languages that only support single inheritance. This makes it possible to implement “diamond diagrams” where multiple base classes implement the same method. Good design dictates that this method have the same calling signature in every case (because the order of calls is determined at runtime, because that order adapts to changes in the class hierarchy, and because that order can include sibling classes that are unknown prior to runtime). + +For both use cases, a typical superclass call looks like this: + +class C(B): + def method(self, arg): + super(C, self).method(arg) +Note that super() is implemented as part of the binding process for explicit dotted attribute lookups such as super().__getitem__(name). It does so by implementing its own __getattribute__() method for searching classes in a predictable order that supports cooperative multiple inheritance. Accordingly, super() is undefined for implicit lookups using statements or operators such as super()[name]. + +Also note that super() is not limited to use inside methods. The two argument form specifies the arguments exactly and makes the appropriate references. + +For practical suggestions on how to design cooperative classes using super(), see guide to using super(). + +New in version 2.2. + +===== tuple([iterable]) ===== +Return a tuple whose items are the same and in the same order as iterable‘s items. iterable may be a sequence, a container that supports iteration, or an iterator object. If iterable is already a tuple, it is returned unchanged. For instance, tuple('abc') returns ('a', 'b', 'c') and tuple([1, 2, 3]) returns (1, 2, 3). If no argument is given, returns a new empty tuple, (). + +tuple is an immutable sequence type, as documented in Sequence Types — str, unicode, list, tuple, bytearray, buffer, xrange. For other containers see the built in dict, list, and set classes, and the collections module. + +===== type(object) ===== + +===== type(name, bases, dict) ===== +With one argument, return the type of an object. The return value is a type object. The isinstance() built-in function is recommended for testing the type of an object. + +With three arguments, return a new type object. This is essentially a dynamic form of the class statement. The name string is the class name and becomes the __name__ attribute; the bases tuple itemizes the base classes and becomes the __bases__ attribute; and the dict dictionary is the namespace containing definitions for class body and becomes the __dict__ attribute. For example, the following two statements create identical type objects: + +>>> +>>> class X(object): +... a = 1 +... +>>> X = type('X', (object,), dict(a=1)) +New in version 2.2. + +===== unichr(i) ===== +Return the Unicode string of one character whose Unicode code is the integer i. For example, unichr(97) returns the string u'a'. This is the inverse of ord() for Unicode strings. The valid range for the argument depends how Python was configured – it may be either UCS2 [0..0xFFFF] or UCS4 [0..0x10FFFF]. ValueError is raised otherwise. For ASCII and 8-bit strings see chr(). + +New in version 2.0. + +===== unicode(object='') ===== + +===== unicode(object[, encoding[, errors]]) ===== +Return the Unicode string version of object using one of the following modes: + +If encoding and/or errors are given, unicode() will decode the object which can either be an 8-bit string or a character buffer using the codec for encoding. The encoding parameter is a string giving the name of an encoding; if the encoding is not known, LookupError is raised. Error handling is done according to errors; this specifies the treatment of characters which are invalid in the input encoding. If errors is 'strict' (the default), a ValueError is raised on errors, while a value of 'ignore' causes errors to be silently ignored, and a value of 'replace' causes the official Unicode replacement character, U+FFFD, to be used to replace input characters which cannot be decoded. See also the codecs module. + +If no optional parameters are given, unicode() will mimic the behaviour of str() except that it returns Unicode strings instead of 8-bit strings. More precisely, if object is a Unicode string or subclass it will return that Unicode string without any additional decoding applied. + +For objects which provide a __unicode__() method, it will call this method without arguments to create a Unicode string. For all other objects, the 8-bit string version or representation is requested and then converted to a Unicode string using the codec for the default encoding in 'strict' mode. + +For more information on Unicode strings see Sequence Types — str, unicode, list, tuple, bytearray, buffer, xrange which describes sequence functionality (Unicode strings are sequences), and also the string-specific methods described in the String Methods section. To output formatted strings use template strings or the % operator described in the String Formatting Operations section. In addition see the String Services section. See also str(). + +New in version 2.0. + +Changed in version 2.2: Support for __unicode__() added. + +===== vars([object]) ===== +Return the __dict__ attribute for a module, class, instance, or any other object with a __dict__ attribute. + +Objects such as modules and instances have an updateable __dict__ attribute; however, other objects may have write restrictions on their __dict__ attributes (for example, new-style classes use a dictproxy to prevent direct dictionary updates). + +Without an argument, vars() acts like locals(). Note, the locals dictionary is only useful for reads since updates to the locals dictionary are ignored. + +===== xrange(stop) ===== + +===== xrange(start, stop[, step]) ===== +This function is very similar to range(), but returns an xrange object instead of a list. This is an opaque sequence type which yields the same values as the corresponding list, without actually storing them all simultaneously. The advantage of xrange() over range() is minimal (since xrange() still has to create the values when asked for them) except when a very large range is used on a memory-starved machine or when all of the range’s elements are never used (such as when the loop is usually terminated with break). For more information on xrange objects, see XRange Type and Sequence Types — str, unicode, list, tuple, bytearray, buffer, xrange. + +CPython implementation detail: xrange() is intended to be simple and fast. Implementations may impose restrictions to achieve this. The C implementation of Python restricts all arguments to native C longs (“short” Python integers), and also requires that the number of elements fit in a native C long. If a larger range is needed, an alternate version can be crafted using the itertools module: islice(count(start, step), (stop-start+step-1+2*(step<0))//step). + +===== zip([iterable, ...]) ===== +This function returns a list of tuples, where the i-th tuple contains the i-th element from each of the argument sequences or iterables. The returned list is truncated in length to the length of the shortest argument sequence. When there are multiple arguments which are all of the same length, zip() is similar to map() with an initial argument of None. With a single sequence argument, it returns a list of 1-tuples. With no arguments, it returns an empty list. + +The left-to-right evaluation order of the iterables is guaranteed. This makes possible an idiom for clustering a data series into n-length groups using zip(*[iter(s)]*n). + +zip() in conjunction with the * operator can be used to unzip a list: + +>>> +>>> x = [1, 2, 3] +>>> y = [4, 5, 6] +>>> zipped = zip(x, y) +>>> zipped +[(1, 4), (2, 5), (3, 6)] +>>> x2, y2 = zip(*zipped) +>>> x == list(x2) and y == list(y2) +True +New in version 2.0. + +Changed in version 2.4: Formerly, zip() required at least one argument and zip() raised a TypeError instead of returning an empty list. + +===== __import__(name[, globals[, locals[, fromlist[, level]]]]) ===== +Note This is an advanced function that is not needed in everyday Python programming, unlike importlib.import_module(). +This function is invoked by the import statement. It can be replaced (by importing the __builtin__ module and assigning to __builtin__.__import__) in order to change semantics of the import statement, but nowadays it is usually simpler to use import hooks (see PEP 302). Direct use of __import__() is rare, except in cases where you want to import a module whose name is only known at runtime. + +The function imports the module name, potentially using the given globals and locals to determine how to interpret the name in a package context. The fromlist gives the names of objects or submodules that should be imported from the module given by name. The standard implementation does not use its locals argument at all, and uses its globals only to determine the package context of the import statement. + +level specifies whether to use absolute or relative imports. The default is -1 which indicates both absolute and relative imports will be attempted. 0 means only perform absolute imports. Positive values for level indicate the number of parent directories to search relative to the directory of the module calling __import__(). + +When the name variable is of the form package.module, normally, the top-level package (the name up till the first dot) is returned, not the module named by name. However, when a non-empty fromlist argument is given, the module named by name is returned. + +For example, the statement import spam results in bytecode resembling the following code: + +spam = __import__('spam', globals(), locals(), [], -1) +The statement import spam.ham results in this call: + +spam = __import__('spam.ham', globals(), locals(), [], -1) +Note how __import__() returns the toplevel module here because this is the object that is bound to a name by the import statement. + +On the other hand, the statement from spam.ham import eggs, sausage as saus results in + +_temp = __import__('spam.ham', globals(), locals(), ['eggs', 'sausage'], -1) +eggs = _temp.eggs +saus = _temp.sausage +Here, the spam.ham module is returned from __import__(). From this object, the names to import are retrieved and assigned to their respective names. + +If you simply want to import a module (potentially within a package) by name, use importlib.import_module(). + +Changed in version 2.5: The level parameter was added. + +Changed in version 2.5: Keyword support for parameters was added. + +===== 3. Non-essential Built-in Functions ===== +There are several built-in functions that are no longer essential to learn, know or use in modern Python programming. They have been kept here to maintain backwards compatibility with programs written for older versions of Python. + +Python programmers, trainers, students and book writers should feel free to bypass these functions without concerns about missing something important. + +apply(function, args[, keywords]) +The function argument must be a callable object (a user-defined or built-in function or method, or a class object) and the args argument must be a sequence. The function is called with args as the argument list; the number of arguments is the length of the tuple. If the optional keywords argument is present, it must be a dictionary whose keys are strings. It specifies keyword arguments to be added to the end of the argument list. Calling apply() is different from just calling function(args), since in that case there is always exactly one argument. The use of apply() is equivalent to function(*args, **keywords). + +Deprecated since version 2.3: Use function(*args, **keywords) instead of apply(function, args, keywords) (see Unpacking Argument Lists). + +buffer(object[, offset[, size]]) +The object argument must be an object that supports the buffer call interface (such as strings, arrays, and buffers). A new buffer object will be created which references the object argument. The buffer object will be a slice from the beginning of object (or from the specified offset). The slice will extend to the end of object (or will have a length given by the size argument). + +coerce(x, y) +Return a tuple consisting of the two numeric arguments converted to a common type, using the same rules as used by arithmetic operations. If coercion is not possible, raise TypeError. + +intern(string) +Enter string in the table of “interned” strings and return the interned string – which is string itself or a copy. Interning strings is useful to gain a little performance on dictionary lookup – if the keys in a dictionary are interned, and the lookup key is interned, the key comparisons (after hashing) can be done by a pointer compare instead of a string compare. Normally, the names used in Python programs are automatically interned, and the dictionaries used to hold module, class or instance attributes have interned keys. + +Changed in version 2.3: Interned strings are not immortal (like they used to be in Python 2.2 and before); you must keep a reference to the return value of intern() around to benefit from it. + +Footnotes + +[1] It is used relatively rarely so does not warrant being made into a statement. +[2] Specifying a buffer size currently has no effect on systems that don’t have setvbuf(). The interface to specify the buffer size is not done using a method that calls setvbuf(), because that may dump core when called after any I/O has been performed, and there’s no reliable way to determine whether this is the case. +[3] In the current implementation, local variable bindings cannot normally be affected this way, but variables retrieved from other scopes (such as modules) can be. This may change. +» +indexmodules |next |previous | Python » Documentation » The Python Standard Library » +© Copyright 1990-2012, Python Software Foundation. +The Python Software Foundation is a non-profit corporation. Please donate. +Last updated on Dec 01, 2012. Found a bug? +Created using Sphinx 1.0.7. diff --git a/Zim/Programme/python/The-Python-Standard-Library/4._Built-in_Constants.txt b/Zim/Programme/python/The-Python-Standard-Library/4._Built-in_Constants.txt new file mode 100644 index 0000000..b1b45c9 --- /dev/null +++ b/Zim/Programme/python/The-Python-Standard-Library/4._Built-in_Constants.txt @@ -0,0 +1,57 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-02T20:49:44+08:00 + +====== 4. Built-in Constants ====== +Created Sunday 02 December 2012 + +A small number of constants live in the built-in namespace. They are: + +===== False ===== +The false value of the bool type. + +New in version 2.3. + +===== True ===== +The true value of the bool type. + +New in version 2.3. + +===== None ===== +The sole value of types.NoneType. None is frequently used to represent the absence of a value, as when default arguments are not passed to a function. + +Changed in version 2.4: Assignments to None are illegal and raise a SyntaxError. + +===== NotImplemented ===== +Special value which can be returned by the “rich comparison” special methods (__eq__(), __lt__(), and friends), to indicate that the comparison is not implemented with respect to the other type. + +===== Ellipsis ===== +Special value used in conjunction with extended slicing syntax. + +===== __debug__ ===== +This constant is true if Python was __not started with an -O option__. See also the assert statement. + +Note The names None and __debug__ cannot be reassigned (assignments to them, even as an attribute name, raise SyntaxError), so they can be considered “true” constants. +Changed in version 2.7: Assignments to __debug__ as an attribute became illegal. + +===== 4.1. Constants added by the site module ===== +The site module (which is __imported automatically during startup__, except if the -S command-line option is given) adds several constants to the built-in namespace. They are useful for the interactive interpreter shell and should not be used in programs. + +===== quit([code=None]) ===== + +===== exit([code=None]) ===== +Objects that when printed, print a message like “Use quit() or Ctrl-D (i.e. EOF) to exit”, and when called, raise SystemExit with the specified exit code. + +==== copyright ==== + +==== license ==== + +==== credits ==== +Objects that when printed, print a message like “Type license() to see the full license text”, and when called, display the corresponding text in a pager-like fashion (one screen at a time). + +» +indexmodules |next |previous | Python » Documentation » The Python Standard Library » +© Copyright 1990-2012, Python Software Foundation. +The Python Software Foundation is a non-profit corporation. Please donate. +Last updated on Dec 02, 2012. Found a bug? +Created using Sphinx 1.0.7. diff --git a/Zim/Programme/python/The-Python-Standard-Library/5._Built-in_Types.txt b/Zim/Programme/python/The-Python-Standard-Library/5._Built-in_Types.txt new file mode 100644 index 0000000..0b7f517 --- /dev/null +++ b/Zim/Programme/python/The-Python-Standard-Library/5._Built-in_Types.txt @@ -0,0 +1,1403 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-02T20:53:23+08:00 + +====== 5. Built-in Types ====== +Created Sunday 02 December 2012 + +The following sections describe the standard types that are built into the interpreter. + +Note Historically (until release 2.2), Python’s built-in types have differed from user-defined types because it was not possible to use the built-in types as the basis for object-oriented inheritance. This limitation no longer exists. +The principal built-in types are __numerics, sequences, mappings, files, classes, instances and exceptions__. + +Some operations are supported by several object types; in particular, practically all objects can be compared, tested for truth value, and converted to a string (with the repr() function or the slightly different str() function). The latter function is implicitly used when an object is written by the print() function. + +===== 5.1. Truth Value Testing ===== +Any object can be tested for truth value, for use in an if or while condition or as operand of the Boolean operations below. The following values are considered false: + +* None +* False +* zero of any numeric type, for example, 0, 0L, 0.0, 0j. +* any empty sequence, for example, '', (), []. +* any empty mapping, for example, {}. +* instances of user-defined classes, if the class defines a __nonzero__() or __len__() method, when that method returns the integer zero or bool value False. [1] + +All other values are considered true — so objects of many types are always true. + +Operations and built-in functions that have a Boolean result always return 0 or False for false and 1 or True for true, unless otherwise stated. (Important exception: the Boolean operations or and and always return one of their operands.) + +===== 5.2. Boolean Operations — and, or, not ===== +These are the Boolean operations, ordered by ascending priority: + +Operation Result Notes +x or y if x is false, then y, else x (1) +x and y if x is false, then x, else y (2) +not x if x is false, then True, else False (3) +Notes: + +* This is a short-circuit operator, so it only evaluates the second argument if the first one is False. +* This is a short-circuit operator, so it only evaluates the second argument if the first one is True. +* not has a lower priority than non-Boolean operators, so not a == b is interpreted as not (a == b), and a == not b is a syntax error. + +===== 5.3. Comparisons ===== +Comparison operations are supported by all objects. They __all have the same priority__ (which is higher than that of the Boolean operations). Comparisons can be chained arbitrarily; for example, __x < y <= z is equivalent to x < y and y <= z__, except that y is evaluated only once (but in both cases z is not evaluated at all when x < y is found to be false). + +This table summarizes the comparison operations: + +Operation Meaning Notes +* < strictly less than +* <= less than or equal +* > strictly greater than +* >= greater than or equal +* == equal +* != not equal (1) +* is object identity +is not negated object identity +Notes: + +!= can also be written **<>**, but this is an obsolete usage kept for backwards compatibility only. New code should always use !=. + +Objects of different types, except different numeric types and different string types, never compare equal; such objects are ordered consistently but arbitrarily (so that sorting a heterogeneous array yields a consistent result). Furthermore, some types (for example, file objects) support only a degenerate notion of comparison where any two objects of that type are unequal. Again, such objects are ordered arbitrarily but consistently. The <, <=, > and >= operators will raise a TypeError exception when any operand is a complex number. + +Instances of a class normally compare as non-equal unless the class defines the **__cmp__()** method. Refer to Basic customization) for information on the use of this method to effect object comparisons. + +CPython implementation detail: Objects of different types except numbers are ordered by their type names; objects of the same types that don’t support proper comparison are ordered by their address. + +Two more operations with the same syntactic priority, in and not in, are supported only by sequence types (below). + +===== 5.4. Numeric Types — int, float, long, complex ===== +There are four distinct numeric types: **plain integers, long integers, floating point numbers, and complex numbers**. In addition, **Booleans are a subtype of plain integers**. Plain integers (also just called integers) are implemented using long in C, which gives them at least 32 bits of precision (__sys.maxint__ is always set to the maximum plain integer value for the current platform, the minimum value is __-sys.maxint - 1__). Long integers have unlimited precision. Floating point numbers are usually implemented using double in C; information about the precision and internal representation of floating point numbers for the machine on which your program is running is available in __sys.float_info__. Complex numbers have a real and imaginary part, which are each a floating point number. To extract these parts from a complex number z, use z.real and z.imag. (The standard library includes additional numeric types, fractions that hold rationals, and decimal that hold floating-point numbers with user-definable precision.) + +Numbers are created by __numeric literals__ or as the result of built-in functions and operators. Unadorned integer literals (including **binary, hex, and octal numbers**) yield plain integers unless the value they denote is too large to be represented as a plain integer, in which case they yield a long integer. Integer literals with an 'L' or 'l' suffix yield long integers ('L' is preferred because 1l looks too much like eleven!). Numeric literals containing a decimal point or an exponent sign yield floating point numbers. Appending 'j' or 'J' to a numeric literal yields a complex number with a zero real part. A complex numeric literal is the sum of a real and an imaginary part. + +Python fully supports mixed arithmetic: when a binary arithmetic operator has operands of different numeric types, the operand with the “narrower” type is widened to that of the other, where plain integer is narrower than long integer is narrower than floating point is narrower than complex. Comparisons between numbers of mixed type use the same rule. [2] The constructors int(), long(), float(), and complex() can be used to produce numbers of a specific type. + +All built-in numeric types support the following operations. See The power operator and later sections for the operators’ priorities. + +Operation Result Notes +x + y sum of x and y +x - y difference of x and y +x * y product of x and y +x / y quotient of x and y (1) +x // y (floored) quotient of x and y (4)(5) +x % y remainder of x / y (4) +-x x negated ++x x unchanged +abs(x) absolute value or magnitude of x (3) +int(x) x converted to integer (2) +long(x) x converted to long integer (2) +float(x) x converted to floating point (6) +complex(re,im) a complex number with real part re, imaginary part im. im defaults to zero. +c.conjugate() conjugate of the complex number c. (Identity on real numbers) +divmod(x, y) the pair (x // y, x % y) (3)(4) +pow(x, y) x to the power y (3)(7) +**x ** y** x to the power y (7) +Notes: + +* For (plain or long) integer division, the result is an integer. __The result is always rounded towards minus infinity(结果为小于真实值的最大整数)__: 1/2 is 0, (-1)/2 is -1, 1/(-2) is -1, and (-1)/(-2) is 0. Note that the result is a long integer if either operand is a long integer, regardless of the numeric value. + +* Conversion from floats using int() or long() truncates toward zero like the related function, __math.trunc()__. Use the function __math.floor()__ to round downward and __math.ceil()__ to round upward. + +See Built-in Functions for a full description. + +Deprecated since version 2.3: The floor division operator, the modulo operator, and the divmod() function are no longer defined for complex numbers. Instead, convert to a floating point number using the abs() function if appropriate. + +Also referred to as integer division. The resultant value is a whole integer, though the result’s type is not necessarily int. + +float also accepts the strings “nan” and “inf” with an optional prefix “+” or “-” for Not a Number (NaN) and positive or negative infinity. + +New in version 2.6. + +Python defines pow(0, 0) and 0 ** 0 to be 1, as is common for programming languages. + +All numbers.Real types (int, long, and float) also include the following operations: + +Operation Result Notes +math.trunc(x) x truncated to Integral +round(x[, n]) x rounded to n digits, rounding half to even. If n is omitted, it defaults to 0. +math.floor(x) the greatest integral float <= x +math.ceil(x) the least integral float >= x + + +===== 5.4.1. Bitwise Operations on Integer Types ===== +Bitwise operations only make sense for integers. Negative numbers are treated as their 2’s complement value (this assumes a sufficiently large number of bits that no overflow occurs during the operation). + +The priorities of the binary bitwise operations are all lower than the numeric operations and higher than the comparisons; the unary operation ~ has the same priority as the other unary numeric operations (+ and -). + +This table lists the bitwise operations sorted in ascending priority (operations in the same box have the same priority): + +Operation Result Notes +x | y bitwise or of x and y +x ^ y bitwise exclusive or of x and y +x & y bitwise and of x and y +x << n x shifted left by n bits (1)(2) +x >> n x shifted right by n bits (1)(3) +~x the bits of x inverted +Notes: + +Negative shift counts are illegal and cause a ValueError to be raised. +A left shift by n bits is equivalent to multiplication by pow(2, n). A long integer is returned if the result exceeds the range of plain integers. +A right shift by n bits is equivalent to division by pow(2, n). + +===== 5.4.2. Additional Methods on Integer Types ===== +The integer types implement the numbers.Integral abstract base class. In addition, they provide one more method: + +int.bit_length() +long.bit_length() +Return the number of bits necessary to represent an integer in binary, excluding the sign and leading zeros: + +>>> +>>> n = -37 +>>> bin(n) +'-0b100101' +>>> n.bit_length() +6 +More precisely, if x is nonzero, then x.bit_length() is the unique positive integer k such that 2**(k-1) <= abs(x) < 2**k. Equivalently, when abs(x) is small enough to have a correctly rounded logarithm, then k = 1 + int(log(abs(x), 2)). If x is zero, then x.bit_length() returns 0. + +Equivalent to: + +def bit_length(self): + s = bin(self) # binary representation: bin(-37) --> '-0b100101' + s = s.lstrip('-0b') # remove leading zeros and minus sign + return len(s) # len('100101') --> 6 +New in version 2.7. + +===== 5.4.3. Additional Methods on Float ===== +The float type implements the numbers.Real abstract base class. float also has the following additional methods. + +float.as_integer_ratio() +Return a pair of integers whose ratio is exactly equal to the original float and with a positive denominator. Raises OverflowError on infinities and a ValueError on NaNs. + +New in version 2.6. + +float.is_integer() +Return True if the float instance is finite with integral value, and False otherwise: + +>>> +>>> (-2.0).is_integer() +True +>>> (3.2).is_integer() +False +New in version 2.6. + +Two methods support conversion to and from hexadecimal strings. Since Python’s floats are stored internally as binary numbers, converting a float to or from a decimal string usually involves a small rounding error. In contrast, hexadecimal strings allow exact representation and specification of floating-point numbers. This can be useful when debugging, and in numerical work. + +float.hex() +Return a representation of a floating-point number as a hexadecimal string. For finite floating-point numbers, this representation will always include a leading 0x and a trailing p and exponent. + +New in version 2.6. + +float.fromhex(s) +Class method to return the float represented by a hexadecimal string s. The string s may have leading and trailing whitespace. + +New in version 2.6. + +Note that float.hex() is an instance method, while float.fromhex() is a class method. + +A hexadecimal string takes the form: + +[sign] ['0x'] integer ['.' fraction] ['p' exponent] +where the optional sign may by either + or -, integer and fraction are strings of hexadecimal digits, and exponent is a decimal integer with an optional leading sign. Case is not significant, and there must be at least one hexadecimal digit in either the integer or the fraction. This syntax is similar to the syntax specified in section 6.4.4.2 of the C99 standard, and also to the syntax used in Java 1.5 onwards. In particular, the output of float.hex() is usable as a hexadecimal floating-point literal in C or Java code, and hexadecimal strings produced by C’s %a format character or Java’s Double.toHexString are accepted by float.fromhex(). + +Note that the exponent is written in decimal rather than hexadecimal, and that it gives the power of 2 by which to multiply the coefficient. For example, the hexadecimal string 0x3.a7p10 represents the floating-point number (3 + 10./16 + 7./16**2) * 2.0**10, or 3740.0: + +>>> +>>> float.fromhex('0x3.a7p10') +3740.0 +Applying the reverse conversion to 3740.0 gives a different hexadecimal string representing the same number: + +>>> +>>> float.hex(3740.0) +'0x1.d380000000000p+11' + +===== 5.5. Iterator Types ===== +New in version 2.2. + +Python supports a concept of iteration over containers. This is implemented using two distinct methods; these are used to allow user-defined classes to support iteration. Sequences, described below in more detail, always support the iteration methods. + +One method needs to be defined for container objects to provide iteration support: + +container.____iter__()__ +**Return an iterator object**. The object is required to support __the iterator protocol__ described below. If a container supports different types of iteration, additional methods can be provided to specifically request iterators for those iteration types. (An example of an object supporting multiple forms of iteration would be a tree structure which supports both breadth-first and depth-first traversal.) This method corresponds to the tp_iter slot of the type structure for Python objects in the Python/C API. + +The iterator objects themselves are required to support the following two methods, which together form the iterator protocol: + +iterator.__iter__() +Return the iterator object itself. This is required to allow both containers and iterators to be used with the for and in statements. This method corresponds to the tp_iter slot of the type structure for Python objects in the Python/C API. + +iterator.next() +Return the next item from the container. If there are no further items, raise the StopIteration exception. This method corresponds to the tp_iternext slot of the type structure for Python objects in the Python/C API. + +Python defines several iterator objects to support iteration over general and specific sequence types, dictionaries, and other more specialized forms. The specific types are not important beyond their implementation of the iterator protocol. + +The intention of the protocol is that once an iterator’s next() method raises StopIteration, it will continue to do so on subsequent calls. Implementations that do not obey this property are deemed broken. (This constraint was added in Python 2.3; in Python 2.2, various iterators are broken according to this rule.) + +===== 5.5.1. Generator Types ===== +Python’s generators provide a convenient way to implement the iterator protocol. If a container object’s __iter__() method is implemented as a generator, it will automatically return an iterator object (technically, a generator object) supplying the __iter__() and next() methods. More information about generators can be found in the documentation for the __yield__ expression. + +===== 5.6. Sequence Types — str, unicode, list, tuple, bytearray, buffer, xrange ===== +There are seven sequence types: strings, Unicode strings, lists, tuples, bytearrays, buffers, and xrange objects. + +For other containers see the built in dict and set classes, and the collections module. + +String literals are written in single or double quotes: 'xyzzy', "frobozz". See String literals for more about string literals. Unicode strings are much like strings, but are specified in the syntax using a preceding 'u' character: u'abc', u"def". In addition to the functionality described here, there are also string-specific methods described in the String Methods section. Lists are constructed with square brackets, separating items with commas: [a, b, c]. Tuples are constructed by the comma operator (not within square brackets), with or without enclosing parentheses, but an empty tuple must have the enclosing parentheses, such as a, b, c or (). A single item tuple must have a trailing comma, such as (d,). + +Bytearray objects are created with the built-in function bytearray(). + +Buffer objects are not directly supported by Python syntax, but can be created by calling the built-in function buffer(). They don’t support concatenation or repetition. + +Objects of type xrange are similar to buffers in that there is no specific syntax to create them, but they are created using the xrange() function. They don’t support slicing, concatenation or repetition, and using in, not in, min() or max() on them is inefficient. + +Most sequence types support the following operations. The in and not in operations have the same priorities as the comparison operations. The + and * operations have the same priority as the corresponding numeric operations. [3] Additional methods are provided for Mutable Sequence Types. + +This table lists the sequence operations sorted in ascending priority (operations in the same box have the same priority). In the table, s and t are sequences of the same type; n, i and j are integers: + +Operation Result Notes +__x in s__ True if an item of s is equal to x, else False (1) +x not in s False if an item of s is equal to x, else True (1) +s + t the concatenation of s and t (6) +s * n, n * s n shallow copies of s concatenated (2) +s[i] ith item of s, origin 0 (3) +s[i:j] slice of s from i to j (3)(4) +s[i:j:k] slice of s from i to j with step k (3)(5) +len(s) length of s +min(s) smallest item of s +max(s) largest item of s +s.index(i) index of the first occurence of i in s +s.count(i) total number of occurences of i in s + +Sequence types also support comparisons. In particular, tuples and lists are compared lexicographically by comparing corresponding elements. This means that to compare equal, every element must compare equal and the two sequences must be of the same type and have the same length. (For full details see Comparisons in the language reference.) + +Notes: + +When s is a string or Unicode string object the in and not in operations act like a substring test. In Python versions before 2.3, x had to be a string of length 1. In Python 2.3 and beyond, x may be a string of any length. + +Values of n less than 0 are treated as 0 (which yields an empty sequence of the same type as s). Note also that the copies are shallow; nested structures are not copied. This often haunts new Python programmers; consider: + +>>> +>>> lists = [[]] * 3 +>>> lists +[[], [], []] +>>> lists[0].append(3) +>>> lists +3], [3], [3 +What has happened is that [[]] is a one-element list containing an empty list, so all three elements of [[]] * 3 are (pointers to) this single empty list. Modifying any of the elements of lists modifies this single list. You can create a list of different lists this way: + +>>> +>>> lists = [[] for i in range(3)] +>>> lists[0].append(3) +>>> lists[1].append(5) +>>> lists[2].append(7) +>>> lists +3], [5], [7 +If i or j is negative, the index is relative to the end of the string: len(s) + i or len(s) + j is substituted. But note that -0 is still 0. + +The slice of s from i to j is defined as the sequence of items with index k such that i <= k < j. If i or j is greater than len(s), use len(s). If i is omitted or None, use 0. If j is omitted or None, use len(s). If i is greater than or equal to j, the slice is empty. + +The slice of s from i to j with step k is defined as the sequence of items with index x = i + n*k such that 0 <= n < (j-i)/k. In other words, the indices are i, i+k, i+2*k, i+3*k and so on, stopping when j is reached (but never including j). If i or j is greater than len(s), use len(s). If i or j are omitted or None, they become “end” values (which end depends on the sign of k). Note, k cannot be zero. If k is None, it is treated like 1. + +CPython implementation detail: If s and t are both strings, some Python implementations such as CPython can usually perform an in-place optimization for assignments of the form s = s + t or s += t. When applicable, this optimization makes quadratic run-time much less likely. This optimization is both version and implementation dependent. For performance sensitive code, it is preferable to use the str.join() method which assures consistent linear concatenation performance across versions and implementations. + +Changed in version 2.4: Formerly, string concatenation never occurred in-place. + +===== 5.6.1. String Methods ===== +Below are listed the string methods which both 8-bit strings and Unicode objects support. Some of them are also available on bytearray objects. + +In addition, Python’s strings support the sequence type methods described in the Sequence Types — str, unicode, list, tuple, bytearray, buffer, xrange section. To output formatted strings use template strings or the % operator described in the String Formatting Operations section. Also, see the re module for string functions based on regular expressions. + +=== str.capitalize() === +Return a copy of the string with its first character capitalized and the rest lowercased. + +For 8-bit strings, this method is locale-dependent. + +=== str.center(width[, fillchar]) === +Return centered in a string of length width. Padding is done using the specified fillchar (default is a space). + +Changed in version 2.4: Support for the fillchar argument. + +=== str.count(sub[, start[, end]]) === +Return the number of non-overlapping occurrences of substring sub in the range [start, end]. Optional arguments start and end are interpreted as in slice notation. + +=== str.decode([encoding[, errors]]) === +Decodes the string using the codec registered for encoding. encoding defaults to the default string encoding. errors may be given to set a different error handling scheme. The default is 'strict', meaning that encoding errors raise UnicodeError. Other possible values are 'ignore', 'replace' and any other name registered via codecs.register_error(), see section Codec Base Classes. + +New in version 2.2. + +Changed in version 2.3: Support for other error handling schemes added. + +Changed in version 2.7: Support for keyword arguments added. + +=== str.encode([encoding[, errors]]) === +Return an encoded version of the string. Default encoding is the current default string encoding. errors may be given to set a different error handling scheme. The default for errors is 'strict', meaning that encoding errors raise a UnicodeError. Other possible values are 'ignore', 'replace', 'xmlcharrefreplace', 'backslashreplace' and any other name registered via codecs.register_error(), see section Codec Base Classes. For a list of possible encodings, see section Standard Encodings. + +New in version 2.0. + +Changed in version 2.3: Support for 'xmlcharrefreplace' and 'backslashreplace' and other error handling schemes added. + +Changed in version 2.7: Support for keyword arguments added. + +=== str.endswith(suffix[, start[, end]]) === +Return True if the string ends with the specified suffix, otherwise return False. suffix can also be a tuple of suffixes to look for. With optional start, test beginning at that position. With optional end, stop comparing at that position. + +Changed in version 2.5: Accept tuples as suffix. + +=== str.expandtabs([tabsize]) === +Return a copy of the string where all tab characters are replaced by one or more spaces, depending on the current column and the given tab size. The column number is reset to zero after each newline occurring in the string. If tabsize is not given, a tab size of 8 characters is assumed. This doesn’t understand other non-printing characters or escape sequences. + +=== str.find(sub[, start[, end]]) === +Return the lowest index in the string where substring sub is found, such that sub is contained in the slice s[start:end]. Optional arguments start and end are interpreted as in slice notation. Return -1 if sub is not found. + +Note The find() method should be used only if you need to know the position of sub. To check if sub is a substring or not, use the in operator: +>>> +>>> 'Py' in 'Python' +True + +=== str.format(*args, **kwargs) === +Perform a string formatting operation. The string on which this method is called can contain literal text or replacement fields delimited by braces {}. Each replacement field contains either the numeric index of a positional argument, or the name of a keyword argument. Returns a copy of the string where each replacement field is replaced with the string value of the corresponding argument. + +>>> +>>> "The sum of 1 + 2 is {0}".format(1+2) +'The sum of 1 + 2 is 3' +See Format String Syntax for a description of the various formatting options that can be specified in format strings. + +This method of string formatting is the new standard in Python 3, and should be preferred to the % formatting described in String Formatting Operations in new code. + +New in version 2.6. + +=== str.index(sub[, start[, end]]) === +Like find(), but raise ValueError when the substring is not found. + +=== str.isalnum() === +Return true if all characters in the string are alphanumeric and there is at least one character, false otherwise. + +For 8-bit strings, this method is locale-dependent. + +=== str.isalpha() === +Return true if all characters in the string are alphabetic and there is at least one character, false otherwise. + +For 8-bit strings, this method is locale-dependent. + +=== str.isdigit() === +Return true if all characters in the string are digits and there is at least one character, false otherwise. + +For 8-bit strings, this method is locale-dependent. + +=== str.islower() === +Return true if all cased characters [4] in the string are lowercase and there is at least one cased character, false otherwise. + +For 8-bit strings, this method is locale-dependent. + +=== str.isspace() === +Return true if there are only whitespace characters in the string and there is at least one character, false otherwise. + +For 8-bit strings, this method is locale-dependent. + +=== str.istitle() === +Return true if the string is a titlecased string and there is at least one character, for example uppercase characters may only follow uncased characters and lowercase characters only cased ones. Return false otherwise. + +For 8-bit strings, this method is locale-dependent. + +=== str.isupper() === +Return true if all cased characters [4] in the string are uppercase and there is at least one cased character, false otherwise. + +For 8-bit strings, this method is locale-dependent. + +=== str.join(iterable) === +Return a string which is the concatenation of the strings in the iterable iterable. The separator between elements is the string providing this method. + +=== str.ljust(width[, fillchar]) === +Return the string left justified in a string of length width. Padding is done using the specified fillchar (default is a space). The original string is returned if width is less than or equal to len(s). + +Changed in version 2.4: Support for the fillchar argument. + +=== str.lower() === +Return a copy of the string with all the cased characters [4] converted to lowercase. + +For 8-bit strings, this method is locale-dependent. + +=== str.lstrip([chars]) === +Return a copy of the string with leading characters removed. The chars argument is a string specifying the set of characters to be removed. If omitted or None, the chars argument defaults to removing whitespace. The chars argument is not a prefix; rather, all combinations of its values are stripped: + +>>> +>>> ' spacious '.lstrip() +'spacious ' +>>> 'www.example.com'.lstrip('cmowz.') +'example.com' +Changed in version 2.2.2: Support for the chars argument. + +=== str.partition(sep) === +Split the string at the first occurrence of sep, and return a 3-tuple containing the part before the separator, the separator itself, and the part after the separator. If the separator is not found, return a 3-tuple containing the string itself, followed by two empty strings. + +New in version 2.5. + +=== str.replace(old, new[, count]) === +Return a copy of the string with all occurrences of substring old replaced by new. If the optional argument count is given, only the first count occurrences are replaced. + +=== str.rfind(sub[, start[, end]]) === +Return the highest index in the string where substring sub is found, such that sub is contained within s[start:end]. Optional arguments start and end are interpreted as in slice notation. Return -1 on failure. + +=== str.rindex(sub[, start[, end]]) === +Like rfind() but raises ValueError when the substring sub is not found. + +=== str.rjust(width[, fillchar]) === +Return the string right justified in a string of length width. Padding is done using the specified fillchar (default is a space). The original string is returned if width is less than or equal to len(s). + +Changed in version 2.4: Support for the fillchar argument. + +=== str.rpartition(sep) === +Split the string at the last occurrence of sep, and return a 3-tuple containing the part before the separator, the separator itself, and the part after the separator. If the separator is not found, return a 3-tuple containing two empty strings, followed by the string itself. + +New in version 2.5. + +=== str.rsplit([sep[, maxsplit]]) === +Return a list of the words in the string, using sep as the delimiter string. If maxsplit is given, at most maxsplit splits are done, the rightmost ones. If sep is not specified or None, any whitespace string is a separator. Except for splitting from the right, rsplit() behaves like split() which is described in detail below. + +New in version 2.4. + +=== str.rstrip([chars]) === +Return a copy of the string with trailing characters removed. The chars argument is a string specifying the set of characters to be removed. If omitted or None, the chars argument defaults to removing whitespace. The chars argument is not a suffix; rather, all combinations of its values are stripped: + +>>> +>>> ' spacious '.rstrip() +' spacious' +>>> 'mississippi'.rstrip('ipz') +'mississ' +Changed in version 2.2.2: Support for the chars argument. + +=== str.split([sep[, maxsplit]]) === +Return a list of the words in the string, using sep as the delimiter string. If maxsplit is given, at most maxsplit splits are done (thus, the list will have at most maxsplit+1 elements). If maxsplit is not specified or -1, then there is no limit on the number of splits (all possible splits are made). + +If sep is given, consecutive delimiters are not grouped together and are deemed to delimit empty strings (for example, '1,,2'.split(',') returns ['1', '', '2']). The sep argument may consist of multiple characters (for example, '1<>2<>3'.split('<>') returns ['1', '2', '3']). Splitting an empty string with a specified separator returns ['']. + +If sep is not specified or is None, a different splitting algorithm is applied: runs of consecutive whitespace are regarded as a single separator, and the result will contain no empty strings at the start or end if the string has leading or trailing whitespace. Consequently, splitting an empty string or a string consisting of just whitespace with a None separator returns []. + +For example, ' 1 2 3 '.split() returns ['1', '2', '3'], and ' 1 2 3 '.split(None, 1) returns ['1', '2 3 ']. + +=== str.splitlines([keepends]) === +Return a list of the lines in the string, breaking at line boundaries. This method uses the universal newlines approach to splitting lines. Line breaks are not included in the resulting list unless keepends is given and true. + +For example, 'ab c\n\nde fg\rkl\r\n'.splitlines() returns ['ab c', '', 'de fg', 'kl'], while the same call with splitlines(True) returns ['ab c\n', '\n', 'de fg\r', 'kl\r\n']. + +Unlike split() when a delimiter string sep is given, this method returns an empty list for the empty string, and a terminal line break does not result in an extra line. + +=== str.startswith(prefix[, start[, end]]) === +Return True if string starts with the prefix, otherwise return False. prefix can also be a tuple of prefixes to look for. With optional start, test string beginning at that position. With optional end, stop comparing string at that position. + +Changed in version 2.5: Accept tuples as prefix. + +=== str.strip([chars]) === +Return a copy of the string with the leading and trailing characters removed. The chars argument is a string specifying the set of characters to be removed. If omitted or None, the chars argument defaults to removing whitespace. The chars argument is not a prefix or suffix; rather, all combinations of its values are stripped: + +>>> +>>> ' spacious '.strip() +'spacious' +>>> 'www.example.com'.strip('cmowz.') +'example' +Changed in version 2.2.2: Support for the chars argument. + +=== str.swapcase() === +Return a copy of the string with uppercase characters converted to lowercase and vice versa. + +For 8-bit strings, this method is locale-dependent. + +=== str.title() === +Return a titlecased version of the string where words start with an uppercase character and the remaining characters are lowercase. + +The algorithm uses a simple language-independent definition of a word as groups of consecutive letters. The definition works in many contexts but it means that apostrophes in contractions and possessives form word boundaries, which may not be the desired result: + +>>> +>>> "they're bill's friends from the UK".title() +"They'Re Bill'S Friends From The Uk" +A workaround for apostrophes can be constructed using regular expressions: + +>>> +>>> import re +>>> def titlecase(s): +... return re.sub(r"[A-Za-z]+('[A-Za-z]+)?", +... lambda mo: mo.group(0)[0].upper() + +... mo.group(0)[1:].lower(), +... s) +... +>>> titlecase("they're bill's friends.") +"They're Bill's Friends." +For 8-bit strings, this method is locale-dependent. + +=== str.translate(table[, deletechars]) === +Return a copy of the string where all characters occurring in the optional argument deletechars are removed, and the remaining characters have been mapped through the given translation table, which must be a string of length 256. + +You can use the maketrans() helper function in the string module to create a translation table. For string objects, set the table argument to None for translations that only delete characters: + +>>> +>>> 'read this short text'.translate(None, 'aeiou') +'rd ths shrt txt' +New in version 2.6: Support for a None table argument. + +For Unicode objects, the translate() method does not accept the optional deletechars argument. Instead, it returns a copy of the s where all characters have been mapped through the given translation table which must be a mapping of Unicode ordinals to Unicode ordinals, Unicode strings or None. Unmapped characters are left untouched. Characters mapped to None are deleted. Note, a more flexible approach is to create a custom character mapping codec using the codecs module (see encodings.cp1251 for an example). + +=== str.upper() === +Return a copy of the string with all the cased characters [4] converted to uppercase. Note that str.upper().isupper() might be False if s contains uncased characters or if the Unicode category of the resulting character(s) is not “Lu” (Letter, uppercase), but e.g. “Lt” (Letter, titlecase). + +For 8-bit strings, this method is locale-dependent. + +=== str.zfill(width) === +Return the numeric string left filled with zeros in a string of length width. A sign prefix is handled correctly. The original string is returned if width is less than or equal to len(s). + +New in version 2.2.2. + +The following methods are present only on unicode objects: + +=== unicode.isnumeric() === +Return True if there are only numeric characters in S, False otherwise. Numeric characters include digit characters, and all characters that have the Unicode numeric value property, e.g. U+2155, VULGAR FRACTION ONE FIFTH. + +=== unicode.isdecimal() === +Return True if there are only decimal characters in S, False otherwise. Decimal characters include digit characters, and all characters that can be used to form decimal-radix numbers, e.g. U+0660, ARABIC-INDIC DIGIT ZERO. + +===== 5.6.2. String Formatting Operations ===== +String and Unicode objects have one unique built-in operation: the % operator (modulo). This is also known as the string formatting or interpolation operator. Given format % values (where format is a string or Unicode object), % conversion specifications in format are replaced with zero or more elements of values. The effect is similar to the using sprintf() in the C language. If format is a Unicode object, or if any of the objects being converted using the %s conversion are Unicode objects, the result will also be a Unicode object. + +If format requires a single argument, values may be a single non-tuple object. [5] Otherwise, values must be a tuple with exactly the number of items specified by the format string, or a single mapping object (for example, a dictionary). + +A conversion specifier contains two or more characters and has the following components, which must occur in this order: + +The '%' character, which marks the start of the specifier. +Mapping key (optional), consisting of a parenthesised sequence of characters (for example, (somename)). +Conversion flags (optional), which affect the result of some conversion types. +Minimum field width (optional). If specified as an '*' (asterisk), the actual width is read from the next element of the tuple in values, and the object to convert comes after the minimum field width and optional precision. +Precision (optional), given as a '.' (dot) followed by the precision. If specified as '*' (an asterisk), the actual width is read from the next element of the tuple in values, and the value to convert comes after the precision. +Length modifier (optional). +Conversion type. +When the right argument is a dictionary (or other mapping type), then the formats in the string must include a parenthesised mapping key into that dictionary inserted immediately after the '%' character. The mapping key selects the value to be formatted from the mapping. For example: + +>>> +>>> print '%(language)s has %(number)03d quote types.' % \ +... {"language": "Python", "number": 2} +Python has 002 quote types. +In this case no * specifiers may occur in a format (since they require a sequential parameter list). + +The conversion flag characters are: + +Flag Meaning +'#' The value conversion will use the “alternate form” (where defined below). +'0' The conversion will be zero padded for numeric values. +'-' The converted value is left adjusted (overrides the '0' conversion if both are given). +' ' (a space) A blank should be left before a positive number (or empty string) produced by a signed conversion. +'+' A sign character ('+' or '-') will precede the conversion (overrides a “space” flag). +A length modifier (h, l, or L) may be present, but is ignored as it is not necessary for Python – so e.g. %ld is identical to %d. + +The conversion types are: + +Conversion Meaning Notes +'d' Signed integer decimal. +'i' Signed integer decimal. +'o' Signed octal value. (1) +'u' Obsolete type – it is identical to 'd'. (7) +'x' Signed hexadecimal (lowercase). (2) +'X' Signed hexadecimal (uppercase). (2) +'e' Floating point exponential format (lowercase). (3) +'E' Floating point exponential format (uppercase). (3) +'f' Floating point decimal format. (3) +'F' Floating point decimal format. (3) +'g' Floating point format. Uses lowercase exponential format if exponent is less than -4 or not less than precision, decimal format otherwise. (4) +'G' Floating point format. Uses uppercase exponential format if exponent is less than -4 or not less than precision, decimal format otherwise. (4) +'c' Single character (accepts integer or single character string). +'r' String (converts any Python object using repr()). (5) +'s' String (converts any Python object using str()). (6) +'%' No argument is converted, results in a '%' character in the result. +Notes: + +The alternate form causes a leading zero ('0') to be inserted between left-hand padding and the formatting of the number if the leading character of the result is not already a zero. + +The alternate form causes a leading '0x' or '0X' (depending on whether the 'x' or 'X' format was used) to be inserted between left-hand padding and the formatting of the number if the leading character of the result is not already a zero. + +The alternate form causes the result to always contain a decimal point, even if no digits follow it. + +The precision determines the number of digits after the decimal point and defaults to 6. + +The alternate form causes the result to always contain a decimal point, and trailing zeroes are not removed as they would otherwise be. + +The precision determines the number of significant digits before and after the decimal point and defaults to 6. + +The %r conversion was added in Python 2.0. + +The precision determines the maximal number of characters used. + +If the object or format provided is a unicode string, the resulting string will also be unicode. + +The precision determines the maximal number of characters used. + +See PEP 237. + +Since Python strings have an explicit length, %s conversions do not assume that '\0' is the end of the string. + +Changed in version 2.7: %f conversions for numbers whose absolute value is over 1e50 are no longer replaced by %g conversions. + +Additional string operations are defined in standard modules string and re. + +===== 5.6.3. XRange Type ===== +The xrange type is an immutable sequence which is commonly used for looping. The advantage of the xrange type is that an xrange object will always take the same amount of memory, no matter the size of the range it represents. There are no consistent performance advantages. + +XRange objects have very little behavior: they only support indexing, iteration, and the len() function. + +===== 5.6.4. Mutable Sequence Types ===== +List and bytearray objects support additional operations that allow in-place modification of the object. Other mutable sequence types (when added to the language) should also support these operations. Strings and tuples are immutable sequence types: such objects cannot be modified once created. The following operations are defined on mutable sequence types (where x is an arbitrary object): + +Operation Result Notes +s[i] = x item i of s is replaced by x +s[i:j] = t slice of s from i to j is replaced by the contents of the iterable t +**del s[i:j]** same as **s[i:j] = []** +s[i:j:k] = t the elements of s[i:j:k] are replaced by those of t (1) +del s[i:j:k] removes the elements of s[i:j:k] from the list +s.append(x) same as s[len(s):len(s)] = __[x]__ (2) +s.extend(x) same as s[len(s):len(s)] = __x__ (3) +s.count(x) return number of i‘s for which s[i] == x +s.index(x[, i[, j]]) return smallest k such that s[k] == x and i <= k < j (4) +s.insert(i, x) same as s[i:i] = [x] (5) +s.pop([i]) same as x = s[i]; del s[i]; return x (6) +s.remove(x) same as del s[s.index(x)] (4) +s.reverse() reverses the items of s **in place** (7) +s.sort([cmp[, key[, reverse]]]) sort the items of s in place (7)(8)(9)(10) +Notes: + +t must have the same length as the slice it is replacing. + +The C implementation of Python has historically accepted multiple parameters and implicitly joined them into a tuple; this no longer works in Python 2.0. Use of this misfeature has been deprecated since Python 1.4. + +x can be any iterable object. + +Raises ValueError when x is not found in s. When a negative index is passed as the second or third parameter to the index() method, the list length is added, as for slice indices. If it is still negative, it is truncated to zero, as for slice indices. + +Changed in version 2.3: Previously, index() didn’t have arguments for specifying start and stop positions. + +When a negative index is passed as the first parameter to the insert() method, the list length is added, as for slice indices. If it is still negative, it is truncated to zero, as for slice indices. + +Changed in version 2.3: Previously, all negative indices were truncated to zero. + +The pop() method is only supported by the list and array types. The optional argument i defaults to -1, so that by default the last item is removed and returned. + +The sort() and reverse() methods modify the list in place for economy of space when sorting or reversing a large list. To remind you that they operate by side effect, they don’t return the sorted or reversed list. + +The sort() method takes optional arguments for controlling the comparisons. + +cmp specifies a custom comparison function of two arguments (list items) which should return a negative, zero or positive number depending on whether the first argument is considered smaller than, equal to, or larger than the second argument: cmp=lambda x,y: cmp(x.lower(), y.lower()). The default value is None. + +key specifies a function of one argument that is used to extract a comparison key from each list element: key=str.lower. The default value is None. + +reverse is a boolean value. If set to True, then the list elements are sorted as if each comparison were reversed. + +In general, the key and reverse conversion processes are much faster than specifying an equivalent cmp function. This is because cmp is called multiple times for each list element while key and reverse touch each element only once. Use functools.cmp_to_key() to convert an old-style cmp function to a key function. + +Changed in version 2.3: Support for None as an equivalent to omitting cmp was added. + +Changed in version 2.4: Support for key and reverse was added. + +Starting with Python 2.3, the sort() method is guaranteed to be stable. A sort is stable if it guarantees not to change the relative order of elements that compare equal — this is helpful for sorting in multiple passes (for example, sort by department, then by salary grade). + +CPython implementation detail: While a list is being sorted, the effect of attempting to mutate, or even inspect, the list is undefined. The C implementation of Python 2.3 and newer makes the list appear empty for the duration, and raises ValueError if it can detect that the list has been mutated during a sort. + +===== 5.7. Set Types — set, frozenset ===== +A set object is an unordered collection of distinct hashable objects. Common uses include membership testing, removing duplicates from a sequence, and computing mathematical operations such as intersection, union, difference, and symmetric difference. (For other containers see the built in dict, list, and tuple classes, and the collections module.) + +New in version 2.4. + +Like other collections, sets support x in set, len(set), and for x in set. Being an unordered collection, sets do not record element position or order of insertion. Accordingly, sets do not support indexing, slicing, or other sequence-like behavior. + +There are currently two built-in set types, set and frozenset. The set type is mutable — the contents can be changed using methods like add() and remove(). Since it is mutable, it has no hash value and cannot be used as either a dictionary key or as an element of another set. The frozenset type is immutable and hashable — its contents cannot be altered after it is created; it can therefore be used as a dictionary key or as an element of another set. + +As of Python 2.7, non-empty sets (not frozensets) can be created by placing a comma-separated list of elements within braces, for example: {'jack', 'sjoerd'}, in addition to the set constructor. + +The constructors for both classes work the same: + +==== class set([iterable]) ==== + +==== class frozenset([iterable]) ==== +Return a new set or frozenset object whose elements are taken from iterable. The elements of a set must be hashable. To represent sets of sets, the inner sets must be frozenset objects. If iterable is not specified, a new empty set is returned. + +Instances of set and frozenset provide the following operations: + +==== len(s) ==== +Return the cardinality of set s. + +==== x in s ==== +Test x for membership in s. + +==== x not in s ==== +Test x for non-membership in s. + +==== isdisjoint(other) ==== +Return True if the set has no elements in common with other. Sets are disjoint if and only if their intersection is the empty set. + +New in version 2.6. + +==== issubset(other) ==== + +==== set <= other ==== +Test whether every element in the set is in other. + +==== set < other ==== +Test whether the set is **a proper subset of other**, that is, set <= other and set != other. + +==== issuperset(other) ==== +set >= other +Test whether every element in other is in the set. + +==== set > other ==== +Test whether the set is a proper superset of other, that is, set >= other and set != other. + +==== union(other, ...) ==== +set | other | ... +Return a new set with elements from the set and all others. + +Changed in version 2.6: Accepts multiple input iterables. + +==== intersection(other, ...) ==== +set & other & ... +Return a new set with elements common to the set and all others. + +Changed in version 2.6: Accepts multiple input iterables. + +==== difference(other, ...) ==== +set - other - ... +Return a new set with elements in the set that are not in the others. + +Changed in version 2.6: Accepts multiple input iterables. + +==== symmetric_difference(other) ==== + +==== set ^ other ==== +Return a new set with elements in either the set or other but not both. + +==== copy() ==== +Return a new set with a shallow copy of s. + +Note, the non-operator versions of union(), intersection(), difference(), and symmetric_difference(), issubset(), and issuperset() methods will accept any iterable as an argument. In contrast, their operator based counterparts require their arguments to be sets. This precludes error-prone constructions like set('abc') & 'cbs' in favor of the more readable set('abc').intersection('cbs'). + +Both set and frozenset support set to set comparisons. Two sets are equal if and only if every element of each set is contained in the other (each is a subset of the other). A set is less than another set if and only if the first set is a proper subset of the second set (is a subset, but is not equal). A set is greater than another set if and only if the first set is a proper superset of the second set (is a superset, but is not equal). + +Instances of set are compared to instances of frozenset based on their members. For example, set('abc') == frozenset('abc') returns True and so does set('abc') in set([frozenset('abc')]). + +The subset and equality comparisons do not generalize to a complete ordering function. For example, any two disjoint sets are not equal and are not subsets of each other, so all of the following return False: ab. Accordingly, sets do not implement the __cmp__() method. + +Since sets only define partial ordering (subset relationships), the output of the list.sort() method is undefined for lists of sets. + +Set elements, like dictionary keys, must be hashable. + +Binary operations that mix set instances with frozenset return the type of the first operand. For example: frozenset('ab') | set('bc') returns an instance of frozenset. + +The following table lists operations available for set that do not apply to immutable instances of frozenset: + +==== update(other, ...) ==== + +==== set |= other | ... ==== +Update the set, adding elements from all others. + +Changed in version 2.6: Accepts multiple input iterables. + +==== intersection_update(other, ...) ==== + +==== set &= other & ... ==== +Update the set, keeping only elements found in it and all others. + +Changed in version 2.6: Accepts multiple input iterables. + +==== difference_update(other, ...) ==== + +==== set -= other | ... ==== +Update the set, removing elements found in others. + +Changed in version 2.6: Accepts multiple input iterables. + +==== symmetric_difference_update(other) ==== + +==== set ^= other ==== +Update the set, keeping only elements found in either set, but not in both. + +==== add(elem) ==== +Add element elem to the set. + +==== remove(elem) ==== +Remove element elem from the set. Raises KeyError if elem is not contained in the set. + +==== discard(elem) ==== +Remove element elem from the set if it is present. + +==== pop() ==== +Remove and return an arbitrary element from the set. Raises KeyError if the set is empty. + +==== clear() ==== +Remove all elements from the set. + +Note, the non-operator versions of the update(), intersection_update(), difference_update(), and symmetric_difference_update() methods will accept any iterable as an argument. + +Note, the elem argument to the __contains__(), remove(), and discard() methods may be a set. To support searching for an equivalent frozenset, the elem set is temporarily mutated during the search and then restored. During the search, the elem set should not be read or mutated since it does not have a meaningful value. + +See also +Comparison to the built-in set types +Differences between the sets module and the built-in set types. + +===== 5.8. Mapping Types — dict ===== +A mapping object maps hashable values to arbitrary objects. Mappings are mutable objects. There is currently only one standard mapping type, the dictionary. (For other containers see the built in list, set, and tuple classes, and the collections module.) + +A dictionary’s keys are almost arbitrary values. Values that are not hashable, that is, values containing lists, dictionaries or other mutable types (that are compared by value rather than by object identity) may not be used as keys. Numeric types used for keys obey the normal rules for numeric comparison: if two numbers compare equal (such as 1 and 1.0) then they can be used interchangeably to index the same dictionary entry. (Note however, that since computers store floating-point numbers as approximations it is usually unwise to use them as dictionary keys.) + +Dictionaries can be created by placing a comma-separated list of key: value pairs within braces, for example: {'jack': 4098, 'sjoerd': 4127} or {4098: 'jack', 4127: 'sjoerd'}, or by the dict constructor. + +class dict(**kwarg) +class dict(mapping, **kwarg) +class dict(iterable, **kwarg) +Return a new dictionary initialized from an optional positional argument and a possibly empty set of keyword arguments. + +If no positional argument is given, an empty dictionary is created. If a positional argument is given and it is a mapping object, a dictionary is created with the same key-value pairs as the mapping object. Otherwise, the positional argument must be an iterator object. Each item in the iterable must itself be an iterator with exactly two objects. The first object of each item becomes a key in the new dictionary, and the second object the corresponding value. If a key occurs more than once, the last value for that key becomes the corresponding value in the new dictionary. + +If keyword arguments are given, the keyword arguments and their values are added to the dictionary created from the positional argument. If a key being added is already present, the value from the keyword argument replaces the value from the positional argument. + +To illustrate, the following examples all return a dictionary equal to {"one": 1, "two": 2, "three": 3}: + +>>> +>>> a = dict(one=1, two=2, three=3) +>>> b = {'one': 1, 'two': 2, 'three': 3} +>>> c = dict(zip(['one', 'two', 'three'], [1, 2, 3])) +>>> d = dict([('two', 2), ('one', 1), ('three', 3)]) +>>> e = dict({'three': 3, 'one': 1, 'two': 2}) +>>> a == b == c == d == e +True +Providing keyword arguments as in the first example only works for keys that are valid Python identifiers. Otherwise, any valid keys can be used. + +New in version 2.2. + +Changed in version 2.3: Support for building a dictionary from keyword arguments added. + +These are the operations that dictionaries support (and therefore, custom mapping types should support too): + +len(d) +Return the number of items in the dictionary d. + +d[key] +Return the item of d with key key. Raises a KeyError if key is not in the map. + +New in version 2.5: If a subclass of dict defines a method __missing__(), if the key key is not present, the d[key] operation calls that method with the key key as argument. The d[key] operation then returns or raises whatever is returned or raised by the __missing__(key) call if the key is not present. No other operations or methods invoke __missing__(). If __missing__() is not defined, KeyError is raised. __missing__() must be a method; it cannot be an instance variable. For an example, see collections.defaultdict. + +d[key] = value +Set d[key] to value. + +del d[key] +Remove d[key] from d. Raises a KeyError if key is not in the map. + +key in d +Return True if d has a key key, else False. + +New in version 2.2. + +key not in d +Equivalent to not key in d. + +New in version 2.2. + +iter(d) +Return an iterator over the keys of the dictionary. This is a shortcut for iterkeys(). + +clear() +Remove all items from the dictionary. + +copy() +Return a shallow copy of the dictionary. + +fromkeys(seq[, value]) +Create a new dictionary with keys from seq and values set to value. + +fromkeys() is a class method that returns a new dictionary. value defaults to None. + +New in version 2.3. + +get(key[, default]) +Return the value for key if key is in the dictionary, else default. If default is not given, it defaults to None, so that this method never raises a KeyError. + +has_key(key) +Test for the presence of key in the dictionary. has_key() is deprecated in favor of key in d. + +items() +Return a copy of the dictionary’s list of (key, value) pairs. + +CPython implementation detail: Keys and values are listed in an arbitrary order which is non-random, varies across Python implementations, and depends on the dictionary’s history of insertions and deletions. + +If items(), keys(), values(), iteritems(), iterkeys(), and itervalues() are called with no intervening modifications to the dictionary, the lists will directly correspond. This allows the creation of (value, key) pairs using zip(): pairs = zip(d.values(), d.keys()). The same relationship holds for the iterkeys() and itervalues() methods: pairs = zip(d.itervalues(), d.iterkeys()) provides the same value for pairs. Another way to create the same list is pairs = [(v, k) for (k, v) in d.iteritems()]. + +iteritems() +Return an iterator over the dictionary’s (key, value) pairs. See the note for dict.items(). + +Using iteritems() while adding or deleting entries in the dictionary may raise a RuntimeError or fail to iterate over all entries. + +New in version 2.2. + +iterkeys() +Return an iterator over the dictionary’s keys. See the note for dict.items(). + +Using iterkeys() while adding or deleting entries in the dictionary may raise a RuntimeError or fail to iterate over all entries. + +New in version 2.2. + +itervalues() +Return an iterator over the dictionary’s values. See the note for dict.items(). + +Using itervalues() while adding or deleting entries in the dictionary may raise a RuntimeError or fail to iterate over all entries. + +New in version 2.2. + +keys() +Return a copy of the dictionary’s list of keys. See the note for dict.items(). + +pop(key[, default]) +If key is in the dictionary, remove it and return its value, else return default. If default is not given and key is not in the dictionary, a KeyError is raised. + +New in version 2.3. + +popitem() +Remove and return an arbitrary (key, value) pair from the dictionary. + +popitem() is useful to destructively iterate over a dictionary, as often used in set algorithms. If the dictionary is empty, calling popitem() raises a KeyError. + +setdefault(key[, default]) +If key is in the dictionary, return its value. If not, insert key with a value of default and return default. default defaults to None. + +update([other]) +Update the dictionary with the key/value pairs from other, overwriting existing keys. Return None. + +update() accepts either another dictionary object or an iterable of key/value pairs (as tuples or other iterables of length two). If keyword arguments are specified, the dictionary is then updated with those key/value pairs: d.update(red=1, blue=2). + +Changed in version 2.4: Allowed the argument to be an iterable of key/value pairs and allowed keyword arguments. + +values() +Return a copy of the dictionary’s list of values. See the note for dict.items(). + +viewitems() +Return a new view of the dictionary’s items ((key, value) pairs). See below for documentation of view objects. + +New in version 2.7. + +viewkeys() +Return a new view of the dictionary’s keys. See below for documentation of view objects. + +New in version 2.7. + +viewvalues() +Return a new view of the dictionary’s values. See below for documentation of view objects. + +New in version 2.7. + +===== 5.8.1. Dictionary view objects ===== +The objects returned by dict.viewkeys(), dict.viewvalues() and dict.viewitems() are view objects. They provide a dynamic view on the dictionary’s entries, which means that when the dictionary changes, the view reflects these changes. + +Dictionary views can be iterated over to yield their respective data, and support membership tests: + +len(dictview) +Return the number of entries in the dictionary. + +iter(dictview) +Return an iterator over the keys, values or items (represented as tuples of (key, value)) in the dictionary. + +Keys and values are iterated over in an arbitrary order which is non-random, varies across Python implementations, and depends on the dictionary’s history of insertions and deletions. If keys, values and items views are iterated over with no intervening modifications to the dictionary, the order of items will directly correspond. This allows the creation of (value, key) pairs using zip(): pairs = zip(d.values(), d.keys()). Another way to create the same list is pairs = [(v, k) for (k, v) in d.items()]. + +Iterating views while adding or deleting entries in the dictionary may raise a RuntimeError or fail to iterate over all entries. + +x in dictview +Return True if x is in the underlying dictionary’s keys, values or items (in the latter case, x should be a (key, value) tuple). + +Keys views are set-like since their entries are unique and hashable. If all values are hashable, so that (key, value) pairs are unique and hashable, then the items view is also set-like. (Values views are not treated as set-like since the entries are generally not unique.) Then these set operations are available (“other” refers either to another view or a set): + +dictview & other +Return the intersection of the dictview and the other object as a new set. + +dictview | other +Return the union of the dictview and the other object as a new set. + +dictview - other +Return the difference between the dictview and the other object (all elements in dictview that aren’t in other) as a new set. + +dictview ^ other +Return the symmetric difference (all elements either in dictview or other, but not in both) of the dictview and the other object as a new set. + +An example of dictionary view usage: + +>>> +>>> dishes = {'eggs': 2, 'sausage': 1, 'bacon': 1, 'spam': 500} +>>> keys = dishes.viewkeys() +>>> values = dishes.viewvalues() + +>>> # iteration +>>> n = 0 +>>> for val in values: +... n += val +>>> print(n) +504 + +>>> # keys and values are iterated over in the same order +>>> list(keys) +['eggs', 'bacon', 'sausage', 'spam'] +>>> list(values) +[2, 1, 1, 500] + +>>> # view objects are dynamic and reflect dict changes +>>> del dishes['eggs'] +>>> del dishes['sausage'] +>>> list(keys) +['spam', 'bacon'] + +>>> # set operations +>>> keys & {'eggs', 'bacon', 'salad'} +{'bacon'} + +===== 5.9. File Objects ===== +File objects are implemented using C’s stdio package and can be created with the built-in open() function. File objects are also returned by some other built-in functions and methods, such as os.popen() and os.fdopen() and the makefile() method of socket objects. Temporary files can be created using the tempfile module, and high-level file operations such as copying, moving, and deleting files and directories can be achieved with the shutil module. + +When a file operation fails for an I/O-related reason, the exception IOError is raised. This includes situations where the operation is not defined for some reason, like seek() on a tty device or writing a file opened for reading. + +Files have the following methods: + +==== file.close() ==== +Close the file. A closed file cannot be read or written any more. Any operation which requires that the file be open will raise a ValueError after the file has been closed. Calling close() more than once is allowed. + +As of Python 2.5, you can avoid having to call this method explicitly if you use the with statement. For example, the following code will automatically close f when the with block is exited: + +from __future__ import with_statement # This isn't required in Python 2.6 + +with open("hello.txt") as f: + for line in f: + print line +In older versions of Python, you would have needed to do this to get the same effect: + +f = open("hello.txt") +try: + for line in f: + print line +finally: + f.close() +Note Not all “file-like” types in Python support use as a context manager for the with statement. If your code is intended to work with any file-like object, you can use the function contextlib.closing() instead of using the object directly. + +==== file.flush() ==== +Flush the internal buffer, like stdio‘s fflush(). This may be a no-op on some file-like objects. + +Note flush() does not necessarily write the file’s data to disk. Use flush() followed by os.fsync() to ensure this behavior. + +==== file.fileno() ==== +Return the integer “file descriptor” that is used by the underlying implementation to request I/O operations from the operating system. This can be useful for other, lower level interfaces that use file descriptors, such as the fcntl module or os.read() and friends. + +Note File-like objects which do not have a real file descriptor should not provide this method! + +==== file.isatty() ==== +Return True if the file is connected to a tty(-like) device, else False. + +Note If a file-like object is not associated with a real file, this method should not be implemented. + +==== file.next() ==== +A file object is its own iterator, for example iter(f) returns f (unless f is closed). When a file is used as an iterator, typically in a for loop (for example, for line in f: print line), the next() method is called repeatedly. This method returns the next input line, or raises StopIteration when EOF is hit when the file is open for reading (behavior is undefined when the file is open for writing). In order to make a for loop the most efficient way of looping over the lines of a file (a very common operation), the next() method uses a hidden read-ahead buffer. As a consequence of using a read-ahead buffer, combining next() with other file methods (like readline()) does not work right. However, using seek() to reposition the file to an absolute position will flush the read-ahead buffer. + +New in version 2.3. + +==== file.read([size]) ==== +Read at most size bytes from the file (less if the read hits EOF before obtaining size bytes). If the size argument is negative or omitted, **read all data until EOF is reached**. The bytes are returned as a string object. An empty string is returned when EOF is encountered immediately. (For certain files, like ttys, it makes sense to continue reading after an EOF is hit.) Note that this method may call the underlying C function fread() more than once in an effort to acquire as close to size bytes as possible. Also note that when in non-blocking mode, less data than was requested may be returned, even if no size parameter was given. + +Note This function is simply a wrapper for the underlying fread() C function, and will behave the same in corner cases, such as whether the EOF value is cached. + +==== file.readline([size]) ==== +Read one entire line from the file. __A trailing newline character is kept__ in the string (but may be absent when a file ends with an incomplete line). [6] If the size argument is present and non-negative, it is a maximum byte count (including the trailing newline) and an incomplete line may be returned. When size is not 0, an empty string is returned only when EOF is encountered immediately. + +Note Unlike stdio‘s fgets(), the returned string contains null characters ('\0') if they occurred in the input. + +==== file.readlines([sizehint]) ==== +Read until EOF using readline() and return a list containing the lines thus read. If the optional sizehint argument is present, instead of reading up to EOF, whole lines totalling approximately **sizehint bytes** (possibly after rounding up to an internal buffer size) are read. Objects implementing a file-like interface may choose to ignore sizehint if it cannot be implemented, or cannot be implemented efficiently. + +file.xreadlines() +This method returns the same thing as iter(f). + +New in version 2.1. + +Deprecated since version 2.3: Use for line in file instead. + +==== file.seek(offset[, whence]) ==== +Set the file’s current position, like stdio‘s fseek(). The whence argument is optional and defaults to os.SEEK_SET or 0 (absolute file positioning); other values are os.SEEK_CUR or 1 (seek relative to the current position) and os.SEEK_END or 2 (seek relative to the file’s end). There is no return value. + +For example, f.seek(2, os.SEEK_CUR) advances the position by two and f.seek(-3, os.SEEK_END) sets the position to the third to last. + +Note that if the file is opened for appending (mode 'a' or 'a+'), any seek() operations will be undone at the next write. If the file is only opened for writing in append mode (mode 'a'), this method is essentially a no-op, but it remains useful for files opened in append mode with reading enabled (mode 'a+'). If the file is opened in text mode (without 'b'), only offsets returned by tell() are legal. Use of other offsets causes undefined behavior. + +Note that not all file objects are seekable. + +Changed in version 2.6: Passing float values as offset has been deprecated. + +==== file.tell() ==== +Return the file’s current position, like stdio‘s ftell(). + +Note On Windows, tell() can return illegal values (after an fgets()) when reading files with Unix-style line-endings. Use binary mode ('rb') to circumvent this problem. + +==== file.truncate([size]) ==== +Truncate the file’s size. If the optional size argument is present, the file is truncated to (at most) that size. The size defaults to the current position. The current file position is not changed. Note that if a specified size exceeds the file’s current size, the result is platform-dependent: possibilities include that the file may remain unchanged, increase to the specified size as if zero-filled, or increase to the specified size with undefined new content. Availability: Windows, many Unix variants. + +==== file.write(str) ==== +Write a string to the file. There is no return value. Due to buffering, the string may not actually show up in the file until the flush() or close() method is called. + +==== file.writelines(sequence) ==== +Write a sequence of strings to the file. The sequence can be any iterable object producing strings, typically a list of strings. There is no return value. (The name is intended to match readlines(); __writelines() does not add line separators.__) + +Files support the iterator protocol. Each iteration returns the same result as readline(), and iteration ends when the readline() method returns an empty string. + +File objects also offer a number of other interesting attributes. These are not required for file-like objects, but should be implemented if they make sense for the particular object. + +==== file.closed ==== +bool indicating the current state of the file object. This is a read-only attribute; the close() method changes the value. It may not be available on all file-like objects. + +==== file.encoding ==== +The encoding that this file uses. When Unicode strings are written to a file, they will be converted to byte strings using this encoding. In addition, when the file is connected to a terminal, the attribute gives the encoding that the terminal is likely to use (that information might be incorrect if the user has misconfigured the terminal). The attribute is read-only and may not be present on all file-like objects. It may also be None, in which case the file uses the system default encoding for converting Unicode strings. + +New in version 2.3. + +==== file.errors ==== +The Unicode error handler used along with the encoding. + +New in version 2.6. + +==== file.mode ==== +The I/O mode for the file. If the file was created using the open() built-in function, this will be the value of the mode parameter. This is a read-only attribute and may not be present on all file-like objects. + +==== file.name ==== +If the file object was created using open(), the name of the file. Otherwise, some string that indicates the source of the file object, of the form <...>. This is a read-only attribute and may not be present on all file-like objects. + +==== file.newlines ==== +If Python was built with universal newlines enabled (the default) this read-only attribute exists, and for files opened in universal newline read mode it keeps track of the types of newlines encountered while reading the file. The values it can take are '\r', '\n', '\r\n', None (unknown, no newlines read yet) or a tuple containing all the newline types seen, to indicate that multiple newline conventions were encountered. For files not opened in universal newlines read mode the value of this attribute will be None. + +==== file.softspace ==== +Boolean that indicates whether a space character needs to be printed before another value when using the print statement. Classes that are trying to simulate a file object should also have a writable softspace attribute, which should be initialized to zero. This will be automatic for most classes implemented in Python (care may be needed for objects that override attribute access); types implemented in C will have to provide a writable softspace attribute. + +Note This attribute is not used to control the print statement, but to allow the implementation of print to keep track of its internal state. + +===== 5.10. memoryview type ===== +New in version 2.7. + +memoryview objects allow Python code to access the internal data of an object that supports the buffer protocol without copying. Memory is generally interpreted as simple bytes. + +class memoryview(obj) +Create a memoryview that references obj. obj must support the buffer protocol. Built-in objects that support the buffer protocol include str and bytearray (but not unicode). + +A memoryview has the notion of an element, which is the atomic memory unit handled by the originating object obj. For many simple types such as str and bytearray, an element is a single byte, but other third-party types may expose larger elements. + +len(view) returns the total number of elements in the memoryview, view. The itemsize attribute will give you the number of bytes in a single element. + +A memoryview supports slicing to expose its data. Taking a single index will return a single element as a str object. Full slicing will result in a subview: + +>>> +>>> v = memoryview('abcefg') +>>> v[1] +'b' +>>> v[-1] +'g' +>>> v[1:4] + +>>> v[1:4].tobytes() +'bce' +If the object the memoryview is over supports changing its data, the memoryview supports slice assignment: + +>>> +>>> data = bytearray('abcefg') +>>> v = memoryview(data) +>>> v.readonly +False +>>> v[0] = 'z' +>>> data +bytearray(b'zbcefg') +>>> v[1:4] = '123' +>>> data +bytearray(b'z123fg') +>>> v[2] = 'spam' +Traceback (most recent call last): + File "", line 1, in +ValueError: cannot modify size of memoryview object +Notice how the size of the memoryview object cannot be changed. + +memoryview has two methods: + +tobytes() +Return the data in the buffer as a bytestring (an object of class str). + +>>> +>>> m = memoryview("abc") +>>> m.tobytes() +'abc' +tolist() +Return the data in the buffer as a list of integers. + +>>> +>>> memoryview("abc").tolist() +[97, 98, 99] +There are also several readonly attributes available: + +format +A string containing the format (in struct module style) for each element in the view. This defaults to 'B', a simple bytestring. + +itemsize +The size in bytes of each element of the memoryview. + +shape +A tuple of integers the length of ndim giving the shape of the memory as a N-dimensional array. + +ndim +An integer indicating how many dimensions of a multi-dimensional array the memory represents. + +strides +A tuple of integers the length of ndim giving the size in bytes to access each element for each dimension of the array. + +readonly +A bool indicating whether the memory is read only. + +===== 5.11. Context Manager Types ===== +New in version 2.5. + +Python’s __with__ statement supports the concept of __a runtime context__ defined by a context manager. This is implemented using two separate methods that allow user-defined classes to define a runtime context that is entered before the statement body is executed and exited when the statement ends. + +The context management protocol consists of a pair of methods that need to be provided for a context manager object to define a runtime context: + +contextmanager.__enter__() +Enter the runtime context and return either this object or another object related to the runtime context. The value returned by this method is bound to the identifier in the __as__ clause of with statements using this context manager. + +An example of a context manager that returns itself is a file object. File objects return themselves from __enter__() to allow open() to be used as the context expression in a with statement. + +An example of a context manager that returns a related object is the one returned by decimal.localcontext(). These managers set the active decimal context to a copy of the original decimal context and then return the copy. This allows changes to be made to the current decimal context in the body of the with statement without affecting code outside the with statement. + +contextmanager.__exit__(exc_type, exc_val, exc_tb) +Exit the runtime context and return a Boolean flag indicating if any exception that occurred should be suppressed. If an exception occurred while executing the body of the with statement, the arguments contain **the exception type, value and traceback information**. Otherwise, all three arguments are None. + +Returning a true value from this method will cause the with statement to suppress the exception and continue execution with the statement immediately following the with statement. Otherwise the exception continues propagating after this method has finished executing. Exceptions that occur during execution of this method will replace any exception that occurred in the body of the with statement. + +The exception passed in should never be reraised explicitly - instead, this method should return a false value to indicate that the method completed successfully and does not want to suppress the raised exception. This allows context management code (such as contextlib.nested) to easily detect whether or not an __exit__() method has actually failed. + +Python defines several context managers to support easy thread synchronisation, prompt closure of files or other objects, and simpler manipulation of the active decimal arithmetic context. The specific types are not treated specially beyond their implementation of the context management protocol. See the contextlib module for some examples. + +Python’s generators and the contextlib.contextmanager decorator provide a convenient way to implement these protocols. If a generator function is decorated with the contextlib.contextmanager decorator, it will return a context manager implementing the necessary __enter__() and __exit__() methods, rather than the iterator produced by an undecorated generator function. + +Note that there is no specific slot for any of these methods in the type structure for Python objects in the Python/C API. Extension types wanting to define these methods must provide them as a normal Python accessible method. Compared to the overhead of setting up the runtime context, the overhead of a single class dictionary lookup is negligible. + +===== 5.12. Other Built-in Types ===== +The interpreter supports several other kinds of objects. Most of these support only one or two operations. + +==== 5.12.1. Modules ==== +The only special operation on a module is __attribute access__: m.name, where m is a module and name accesses a name defined in m‘s symbol table. Module attributes can be assigned to. (Note that the import statement is not, strictly speaking, an operation on a module object; import foo does not require a module object named foo to exist, rather it requires an (external) definition for a module named foo somewhere.) + +A special attribute of every module is __dict__. This is the dictionary containing the module’s symbol table. Modifying this dictionary will actually change the module’s symbol table, but direct assignment to the __dict__ attribute is not possible (you can write m.__dict__['a'] = 1, which defines m.a to be 1, but you can’t write m.__dict__ = {}). Modifying __dict__ directly is not recommended. + +Modules built into the interpreter are written like this: . If loaded from a file, they are written as . + +==== 5.12.2. Classes and Class Instances ==== +See Objects, values and types and Class definitions for these. + +==== 5.12.3. Functions ==== +Function objects are created by function definitions. The only operation on a function object is to call it: func(argument-list). + +There are really two flavors of function objects: **built-in functions and user-defined functions**. Both support the same operation (to call the function), but the implementation is different, hence the different object types. + +See Function definitions for more information. + +==== 5.12.4. Methods ==== +Methods are functions that are called using the attribute notation. There are two flavors: **built-in methods** (such as append() on lists) and **class instance methods**. Built-in methods are described with the types that support them. + +The implementation adds two special read-only attributes to class instance methods: __m.im_self__ is the object on which the method operates, and __m.im_func__ is the function implementing the method. Calling m(arg-1, arg-2, ..., arg-n) is completely equivalent to calling m.im_func(m.im_self, arg-1, arg-2, ..., arg-n). + +Class instance methods are either __bound or unbound__, referring to whether the method was accessed through an instance or a class, respectively. When a method is unbound, its im_self attribute will be None and if called, an explicit self object must be passed as the first argument. In this case, self must be an instance of the unbound method’s class (or a subclass of that class), otherwise a TypeError is raised. + +Like function objects, methods objects support getting arbitrary attributes. However, since __method attributes are actually stored on the underlying function object (meth.im_func)__, setting method attributes on either bound or unbound methods is disallowed. Attempting to set an attribute on a method results in an AttributeError being raised. In order to set a method attribute, you need to explicitly __set it on the underlying function object__: + +>>> +>>> class C: +... def method(self): +... pass +... +>>> c = C() +>>> c.method.whoami = 'my name is method' # can't set on the method +Traceback (most recent call last): + File "", line 1, in +AttributeError: 'instancemethod' object has no attribute 'whoami' +>>> __c.method.im_func.whoami__ = 'my name is method' +>>> c.method.whoami +'my name is method' +See The standard type hierarchy for more information. + +==== 5.12.5. Code Objects ==== +Code objects are used by the implementation to represent **“pseudo-compiled”** executable Python code such as a function body. They differ from function objects because **they don’t contain a reference to their global execution environment**. Code objects are returned by the built-in __compile()__ function and can be extracted from function objects through their **func_code** attribute. See also the code module. + +A code object can be executed or evaluated by passing it (instead of a source string) to the __exec__ statement or the built-in __eval()__ function. + +See The standard type hierarchy for more information. + +==== 5.12.6. Type Objects ==== +Type objects represent the various object types. An object’s type is accessed by the built-in function __type()__. There are no special operations on types. The standard module types defines names for all standard built-in types. + +Types are written like this: . + +==== 5.12.7. The Null Object ==== +This object is returned by functions that **don’t explicitly return a value**. It supports no special operations. There is exactly one null object, named __None__ (a built-in name). + +It is written as None. + +==== 5.12.8. The Ellipsis Object ==== +This object is used by **extended slice notation** (see Slicings). It supports no special operations. There is exactly one ellipsis object, named __Ellipsis__ (a built-in name). + +It is written as Ellipsis. When in a subscript, it can also be written as **...**, for example seq[...]. + +==== 5.12.9. The NotImplemented Object ==== +This object is returned from **comparisons and binary operations** when they are asked to operate on types they don’t support. See Comparisons for more information. + +It is written as __NotImplemented__. + +==== 5.12.10. Boolean Values ==== +Boolean values are the **two constant objects False and True**. They are used to represent truth values (although other values can also be considered false or true). In numeric contexts (for example when used as the argument to an arithmetic operator), they behave like the integers 0 and 1, respectively. The built-in function __bool()__ can be used to convert any value to a Boolean, if the value can be interpreted as a truth value (see section Truth Value Testing above). + +They are written as False and True, respectively. + +==== 5.12.11. Internal Objects ==== +See The standard type hierarchy for this information. It describes **stack frame objects, traceback objects, and slice objects**. + +==== 5.13. Special Attributes ==== +The implementation adds a few special read-only attributes to several object types, where they are relevant. Some of these are not reported by the dir() built-in function. + +object.__dict__ +A dictionary or other mapping object used to store an object’s (writable) attributes. + +object.__methods__ +Deprecated since version 2.2: Use the built-in function dir() to get a list of an object’s attributes. This attribute is no longer available. + +object.__members__ +Deprecated since version 2.2: Use the built-in function dir() to get a list of an object’s attributes. This attribute is no longer available. + +instance.__class__ +The class to which a class instance belongs. + +class.__bases__ +**The tuple** of base classes of a class object. + +class.__name__ +The name of the class or type. + +The following attributes are only supported by new-style classes. + +class.__mro__ +This attribute is **a tuple of classes** that are considered when looking for base classes during method resolution. + +class.__mro()__ +This method can be overridden by a metaclass to customize the method resolution order for its instances. It is called at class instantiation, and its result is stored in __mro__. + +class.__subclasses__() +Each new-style class keeps a list of weak references to its immediate subclasses. This method returns a list of all those references still alive. Example: + +>>> +>>> int.__subclasses__() +[] +Footnotes + +[1] Additional information on these special methods may be found in the Python Reference Manual (Basic customization). +[2] As a consequence, the list [1, 2] is considered equal to [1.0, 2.0], and similarly for tuples. +[3] They must have since the parser can’t tell the type of the operands. +[4] (1, 2, 3, 4) Cased characters are those with general category property being one of “Lu” (Letter, uppercase), “Ll” (Letter, lowercase), or “Lt” (Letter, titlecase). +[5] To format only a tuple you should therefore provide a singleton tuple whose only element is the tuple to be formatted. +[6] The advantage of leaving the newline on is that returning an empty string is then an unambiguous EOF indication. It is also possible (in cases where it might matter, for example, if you want to make an exact copy of a file while scanning its lines) to tell whether the last line of a file ended in a newline or not (yes this happens!). +» +indexmodules |next |previous | Python » Documentation » The Python Standard Library » +© Copyright 1990-2012, Python Software Foundation. +The Python Software Foundation is a non-profit corporation. Please donate. +Last updated on Dec 02, 2012. Found a bug? +Created using Sphinx 1.0.7. diff --git a/Zim/Programme/python/The-Python-Standard-Library/int.txt b/Zim/Programme/python/The-Python-Standard-Library/int.txt new file mode 100644 index 0000000..c8dc751 --- /dev/null +++ b/Zim/Programme/python/The-Python-Standard-Library/int.txt @@ -0,0 +1,38 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-02T10:31:10+08:00 + +====== int ====== +Created Sunday 02 December 2012 + + +In [12]: lst = '123\n' //如果字符串中包含__空字符(如空格, \t, \v, \f, \n, \r)等__,int(), float()等函数仍然有效。 +In [13]: lst +Out[13]: '123\n' + +__In [14]: int(lst)__ +__Out[14]: 123__ + +In [15]: lst = '123\n\t' + +In [16]: lst +Out[16]: '123\n\t' + +In [18]: int(lst) +Out[18]: 123 + + +In [19]: lst = '123\nstr' //字符串中包含除了数字外的非空字符,转换时提示错误。 + +In [20]: lst +Out[20]: '123\nstr' + +In [21]: int(lst) +--------------------------------------------------------------------------- +ValueError Traceback (most recent call last) + in () +----> 1 int(lst) + +__ValueError:__ invalid literal for int() with **base 10**: '123\nstr' + +In [22]: diff --git a/Zim/Programme/python/The-Python-Standard-Library/list.txt b/Zim/Programme/python/The-Python-Standard-Library/list.txt new file mode 100644 index 0000000..1329f80 --- /dev/null +++ b/Zim/Programme/python/The-Python-Standard-Library/list.txt @@ -0,0 +1,74 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-02T09:40:06+08:00 + +====== list ====== +Created Sunday 02 December 2012 + +~ $ ipython2 +Python 2.7.3 (default, Apr 24 2012, 00:06:13) +Type "copyright", "credits" or "license" for more information. + +In [1]: lst = ['sdfds', 'dsfsd', 1, 3, [123, 22, 33, 'dsfds']] + +__In [2]: lst[0]=[1, 2, 3] //为列表的某个成员赋值时,python不会对右边的值进行迭代。__ + +In [3]: lst +Out[3]: 1, 2, 3], 'dsfsd', 1, 3, [123, 22, 33, 'dsfds' + +__In [8]: lst[0:0] = 'dffds' //为列表的成员列表赋值时,python会对右边的值进行迭代。__ + +In [9]: lst +Out[9]: [__'d', 'f', 'f', 'd', 's',__ 'dffds', 2, 3, 'dsfsd', 1, 3, [123, 22, 33, 'dsfds']] //可见,python对等式右边序列进行了迭代。 + +In [4]: lst[0:1] = [1, 2, 3] + +In [5]: lst +Out[5]: [1, 2, 3, 'dsfsd', 1, 3, [123, 22, 33, 'dsfds']] //同上 + +In [10]: + +__In [10]: lst[0:0] = ['dffds'] //将字符序列外加[和]就可以阻止迭代(因为这时字符串时列表的唯一成员)__ + +In [11]: lst +Out[11]: +['dffds', + 'd', + 'f', + 'f', + 'd', + 's', + 'dffds', + 2, + 3, + 'dsfsd', + 1, + 3, + [123, 22, 33, 'dsfds']] + +In [12]: + +In [24]: lst +Out[24]: '123\nstr' + +In [25]: lst = [1, 2, 3] + +In [26]: lst = lst + [4, 5, 6] //列表相+时,python会对第二个列表进行__迭代__。 + +In [27]: lst +Out[27]: [1, 2, 3, **4, 5, 6]** + +__In [28]: lst = lst + 'fdf' //只能list间相加__ +--------------------------------------------------------------------------- +TypeError Traceback (most recent call last) + in () +----> 1 lst = lst + 'fdf' + +TypeError: **can only concatenate list** (not "str") to list + +In [29]: lst = lst + list('fdf') + +In [30]: lst +Out[30]: [1, 2, 3, 4, 5, 6, 'f', 'd', 'f'] + +In [31]: diff --git a/Zim/Programme/python/library/pexpect.txt b/Zim/Programme/python/The-Python-Standard-Library/pexpect.txt similarity index 94% rename from Zim/Programme/python/library/pexpect.txt rename to Zim/Programme/python/The-Python-Standard-Library/pexpect.txt index b79e739..bd2c66a 100644 --- a/Zim/Programme/python/library/pexpect.txt +++ b/Zim/Programme/python/The-Python-Standard-Library/pexpect.txt @@ -6,10 +6,6 @@ Creation-Date: 2012-01-05T15:16:44+08:00 Created Thursday 05 January 2012 http://www.noah.org/wiki/Pexpect -pexpect -(Redirected from Pexpect) - - Contents 1 Pexpect version 2.3 @@ -59,22 +55,12 @@ Download the current version here from the SourceForge site here: pexpect curren ===== Description of Pexpect ===== Pexpect is a **pure Python module** that makes Python a better tool for__ controlling and automating other programs__. Pexpect is similar to the Don Libes `Expect` system, but Pexpect as a different interface that is easier to understand. + Pexpect is__ basically a pattern matching system__. It runs programs and watches output. When output matches a given pattern Pexpect can **respond** as if a human were typing responses. Pexpect can be used for** automation, testing, and screen scraping**. -Pexpect can be used for__ automating interactive console applications __such as ssh, ftp, passwd, telnet, etc. -It can also be used to__ control web applications__ via `lynx`, `w3m`, or some other **text-based **web browser. Pexpect is pure Python. Unlike other Expect-like modules for Python Pexpect does not require TCL or Expect nor does it require C extensions to be compiled. It should work on any platform that supports the standard Python pty module. + +Pexpect can be used for__ automating interactive console applications __such as ssh, ftp, passwd, telnet, etc. It can also be used to__ control web applications__ via `lynx`, `w3m`, or some other **text-based **web browser. Pexpect is pure Python. Unlike other Expect-like modules for Python Pexpect does not require TCL or Expect nor does it require C extensions to be compiled. It should work on any platform that supports the standard Python __pty__ module. Send questions to: noah@noah.org Put 'pexpect' in the subject. -License - -MIT style -- Free, open source, and all that good stuff. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -Pexpect Copyright (c) 2010 Noah Spurrier ===== Download and Installation ===== diff --git a/Zim/Programme/python/The-Python-Standard-Library/re.txt b/Zim/Programme/python/The-Python-Standard-Library/re.txt new file mode 100644 index 0000000..e25286a --- /dev/null +++ b/Zim/Programme/python/The-Python-Standard-Library/re.txt @@ -0,0 +1,1045 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-11-29T14:31:19+08:00 + +====== re ====== +Created Thursday 29 November 2012 +http://docs.python.org/2/library/re.html + +7.2. re — Regular expression operations + +This module provides regular expression matching operations similar to those found in Perl. Both patterns and strings to be searched can be **Unicode strings(这意味着re可以匹配中文字符串)** as well as **8-bit strings**. + +Regular expressions use the backslash character **('\')** to indicate special forms or to allow special characters to be used without invoking their special meaning. This __collides__ with Python’s usage of the same character for the same purpose in __string literals__; for example, to match a literal backslash, one might have to write '\\\\' as the pattern string, because the regular expression must be \\, and each backslash must be expressed as \\ inside a regular Python string literal. +python会对字符串字面量(string literal)进行处理,使其中的转义字符具有相应的特殊含义,然后__将处理的结果__交给相应的后续函数。 +In [11]: print **"ab\bc"** +ac //可见python会将\b解释为退格符,处理的**结果'ac'**会传给print语句。 +In [12]: print **r"ab\bc" ** +ab\bc //pyton不会对raw string作任何处理 +In [13]: print r"ab\\c" +ab\\c +In [14]: print "ab\\c" +ab\c +In [15]: print "ab\\\\c" +ab\\c +In [16]: + +The solution is to use __Python’s raw string__ notation for regular expression patterns; backslashes are not handled in any special way in a string literal prefixed with 'r'. So r"\n" is **a two-character string** containing '\' and 'n', while "\n" is a one-character string containing a newline. Usually patterns will be expressed in Python code using this raw string notation. + +It is important to note that most regular expression operations are available as __module-level functions__ and __RegexObject methods__. The functions are shortcuts that don’t require you to compile a regex object first, but miss some fine-tuning parameters. +大多数的正则表达式操作既存在与module-level functions,也存在于RegexObject对象的方法中,后者是调用re.compile()函数对pattern编译后的结果。 + +===== 7.2.1. Regular Expression Syntax ===== + +A regular expression (or RE) specifies a set of strings that matches it; the functions in this module let you check if a particular string matches a given regular expression (or if a given regular expression matches a particular string, which comes down to the same thing). + +Regular expressions can be __concatenated__ to form new regular expressions; if A and B are both regular expressions, then AB is also a regular expression. In general, if a string p matches A and another string q matches B, the string pq will match AB. +最简单的re是单个字符,通过将多个字符结合(concatenated)起来,可以形成新的更复杂的正则表达式。 + +This holds unless A or B contain low precedence(优先权) operations; boundary conditions between A and B; or have numbered group references. Thus, complex expressions can easily be constructed from simpler primitive expressions like the ones described here. For details of the theory and implementation of regular expressions, consult the Friedl book referenced above, or almost any textbook about compiler construction. + +A brief explanation of the format of regular expressions follows. For further information and a gentler presentation, consult the Regular Expression HOWTO. + +Regular expressions can contain both __special and ordinary__ characters. Most ordinary characters, like 'A', 'a', or '0', are the simplest regular expressions; they simply match themselves. You can concatenate ordinary characters, so last matches the string 'last'. (In the rest of this section, we’ll write RE’s in this special style, usually without quotes, and strings to be matched 'in single quotes'.) + +Some characters, like __'|' or '('__, are special. Special characters either stand for classes of ordinary characters, or affect how the regular expressions around them are interpreted. Regular expression pattern strings may not contain null bytes, but can specify the null byte using the **\number** notation, e.g., '\x00'. + +The special characters are: + +'.' + (Dot.) In the default mode, this matches any character __except a newline__. If the __DOTALL flag__ has been specified, this matches any character including a newline. +'^' + (Caret.) Matches the start of the __string__, and in __MULTILINE mode__ also matches immediately after each newline. +'$' + Matches the end of the string or just before the newline at the end of the string, and in MULTILINE mode also matches before a newline. foo matches both ‘foo’ and ‘foobar’, while the regular expression foo$ matches only ‘foo’. More interestingly, searching for foo.$ in 'foo1\nfoo2\n' matches ‘foo2’ normally, but ‘foo1’ in MULTILINE mode; searching for a single $ in 'foo\n' will find two (empty) matches: one just before the newline, and one at the end of the string. +Python中的^和$默认只匹配字符串的开始和结束,如果启用了MULTILINE标志,则含义为匹配行首和行尾。 + +下面的元字符表示前面的RE重复的次数,注意RE和重复元字符和起来形成一个pattern。 +'*' + Causes the resulting RE to match __0 or more__ repetitions of the **preceding RE**, as many repetitions as are possible. ab* will match ‘a’, ‘ab’, or ‘a’ followed by any number of ‘b’s. +这里的preceding RE可以是一个很简单的正则表达式,如单个字符。注意a*可以与__空字符__相匹配,如果想至少出现一个a,应该使用aa* + +'+' + Causes the resulting RE to match 1 or more repetitions of the preceding RE. ab+ will match ‘a’ followed by any non-zero number of ‘b’s; it will not match just ‘a’. + +'?' + Causes the resulting RE to match 0 or 1 repetitions of the preceding RE. ab? will match either ‘a’ or ‘ab’. + +***?, +?, ?? //非贪婪版本** +例如:msg = '

dsfsdf

dksjfskldf

' +__r'

.*

'将匹配整个字符串,而r'

.*?

将只匹配到第一个.__ + The '*', '+', and '?' qualifiers are __all greedy__**; they match as much text as possible.** +__注意:贪婪也是有尺度的,也就是说在尽可能地长的前提是后面的模式能够有机会匹配。python试着在后面的模式能匹配的情况下尽可能地长。非贪婪也类似,尽可能短的前提是后面的模式也能够匹配。__ +Sometimes this behaviour isn’t desired; if the RE <.*> is matched against '

title

', it will match the entire string, and not just '

'. Adding '?' after the qualifier makes it perform the match in **non-greedy or minimal fashion**; as few characters as possible will be matched. Using .*? in the previous expression will match only '

'. + +{m} + Specifies that __exactly m copies__ __of__ the previous RE should be matched; **fewer** matches cause the entire RE not to match(但是待匹配的字符串中包含的重复字符次数比m多,则可以匹配成功。). For example, a{6} will match **exactly** six 'a' characters, but not five. + +{m,n} + Causes the resulting RE to match **from m to n** repetitions of the preceding RE, attempting to match as many repetitions as possible. For example, a{3,5} will match from 3 to 5 'a' characters. Omitting m specifies a lower bound of **zero**, and omitting n specifies an **infinite upper** bound. As an example, a{4,}b will match aaaab or a thousand 'a' characters followed by a b, but not aaab. The comma may not be omitted or the modifier would be confused with the previously described form. +__m,n之一可以省,省了m默认从0开始,省了n表示无穷大。该重复模式总是尽可能地匹配多,但是最多为n。__ + +{m,n}? + Causes the resulting RE to match from m to n repetitions of the preceding RE, attempting to match as few repetitions as possible. This is the non-greedy version of the previous qualifier. For example, on the 6-character string 'aaaaaa', a{3,5} will match 5 'a' characters, while a{3,5}? will only match 3 characters. + +__注意:__上面的重复模式 * ? + {m}等都是__对其前面最简单的RE进行重复__,例如ab*只是对字符b而不是ab进行重复。如果前面是group,则对整个group进行重复。 + +'\' + Either escapes special characters (permitting you to match characters like '*', '?', and so forth), or signals **a special sequence**; special sequences are discussed below. +对特殊字符进行转义,使其具有字面含义;或者表示一个__转义序列__。 + + If you’re not using a raw string to express the pattern, remember that Python also uses the backslash as an escape sequence in string literals; if the escape sequence isn’t recognized by Python’s parser, the backslash and subsequent character are included in the resulting string. However, if Python would recognize the resulting sequence, the backslash should be repeated twice. This is complicated and hard to understand, so it’s highly recommended that you use raw strings for all but the simplest expressions. +建议对所有的复杂正则表达式使用raw string。 + +[] + + Used to indicate __a set of characters__. In a set: + Characters can be listed individually, e.g. [amk] will match 'a', 'm', or 'k'. + Ranges of characters can be indicated by giving two characters and separating them by a '-', for example [a-z] will match any lowercase ASCII letter, [0-5][0-9] will match all the two-digits numbers from 00 to 59, and [0-9A-Fa-f] will match any hexadecimal digit. If - is escaped (e.g. [a\-z]) or if its placed as the first or last character (e.g. [a-]), it will match a literal '-'. + Special characters lose their special meaning inside sets. For example, [(+*)] will match any of the literal characters '(', '+', '*', or ')'. +**特殊字符位于[]中时,具有其字面含义,但是]除外。** + __Character classes__ such as \w or \S (defined below) are also accepted inside a set, although the characters they match depends on whether LOCALE or UNICODE mode is in force. + Characters that are not within a range can be matched by complementing the set. If the first character of the set is '^', all the characters that are not in the set will be matched. For example, [^5] will match any character except '5', and [^^] will match any character except '^'. ^ has no special meaning if it’s not the first character in the set. + To match a literal ']' inside a set, precede it with a backslash, or place it __at the beginning__ of the set. For example, both [()[\]{}] and []()[{}] will both match a parenthesis. + +'|' + A|B, where A and B can be arbitrary REs(**A和B长度没有限制,直到RE边界或遇到下一个|,但是groups除外。**), creates a regular expression that will match either A or B. An arbitrary number of REs can be separated by the '|' in this way. This can be used inside groups (see below) as well. As the target string is scanned, REs separated by '|' are tried from left to right. When one pattern completely matches, that branch is accepted. This means that once A matches, B will not be tested further, even if it would produce a longer overall match. In other words, the '|' operator is never greedy. To match a literal '|', use \|, or enclose it inside a character class, as in [|]. + +(...) + Matches whatever regular expression is inside the parentheses, and indicates the start and end of a group; the contents of a group can be retrieved after a match has been performed, and can be matched later in the string with the __\number__ special sequence, described below. To match the literals '(' or ')', use \( or \), or enclose them inside a character class: [(] [)]. +模式组(pattern group),__整体作为一个正则表达式。后面跟重复模式时,表示模式组里的模式(不包含括号)重复多次。例如:__ + (a[0-9]c){2}(d[0-9]f)表示__模式组的模式__a[0-9]c重复三次,即等效为(a[0-9]c)a[0-9]c(d[0-9]f),所以实际上__只有2个而不是3个__模式组。 + + In [49]: re.search('a(b[0-9]c){2}(d[0-9]e)', 'ab1cb2cd1e').group() //相当于group(0),只返回__模式匹配的字符串__而不一定是完整的字符串。 + Out[49]: 'ab1cb2cd1e' + + In [51]: re.search('a(b[0-9]c){2}(d[0-9]e)', 'ab1cb2cd1e**eee**').group(0,1,2) + Out[51]: ('ab1cb2cd1e', 'b2c', 'd1e') //待匹配字符串的后三个字符**并没有**被匹配。 + + In [48]: re.search('a(b[0-9]c){2}(d[0-9]e)', 'ab1cb2cd1e').groups() + Out[48]: ('b2c', 'd1e') //返回两个元素的tuple,说明模式中只有两个pattern group。其中__重复的模式组其最终值为最后一次匹配的内容(所以上面结果为'b2c'而不是'b1c')。__ + + In [50]: re.search('a(b[0-9]c){2}(d[0-9]e)', 'ab1cb2cd1e').group(0,1,2) + Out[50]: ('ab1cb2cd1e', 'b2c', 'd1e') //返回模式匹配的字符串,pattern group1 和pattern group2匹配的字符串。注意,pattern group__从1开始编号__。 + + In [46]: re.search('**(a(b[0-9]c){2}(d[0-9]e))**', 'ab1cb2cd1e').group() //注意模式组最外面的括号。 + Out[46]: 'ab1cb2cd1e' + + In [45]: re.search('(a(b[0-9]c){2}(d[0-9]e))', 'ab1cb2cd1e').groups() //返回所有pattern group匹配的内容。 + Out[45]: ('ab1cb2cd1e', 'b2c', 'd1e') //可见模式中有三个pattern group,__模式编号从第一个左括号开始为1__。 + + In [47]: re.search('(a(b[0-9]c){2}(d[0-9]e))', 'ab1cb2cd1e').group(0,1,2,3) + Out[47]: ('ab1cb2cd1e', 'ab1cb2cd1e', 'b2c', 'd1e') + + In [56]: re.search('(a(b[0-9]c**(b[0-9]c){2}**)(d[0-9]e))', 'ab1cb2cb3cd1e').groups() //注意:第二个pattern group**嵌套了**另一个group。 + Out[56]: ('ab1cb2cb3cd1e', 'b1cb2cb3c', 'b3c', 'd1e') + + In [58]: re.search('(a(b[0-9]c(b[0-9]c)__\3__)(d[0-9]e))', 'ab1cb2cb2cd1e').groups() //错误,对第三个pattern group匹配内容的__引用不能位于第三个pattern group中__。 + --------------------------------------------------------------------------- + AttributeError Traceback (most recent call last) + + In [59]: re.search('(a(b[0-9]c(b[0-9]c))\3(d[0-9]e))', 'ab1cb2cb2cd1e').groups() //错误,pattern使用的string literal而且其中含有转义字符\3,因此__应用\\3或raw string__. + --------------------------------------------------------------------------- + AttributeError Traceback (most recent call last) + + In [61]: re.search(r"(a(b[0-9]c(b[0-9]c)**\3**)(d[0-9]e))", 'ab1cb2cb2cd1e').groups() //正确,\3引用的是第三个group__匹配的内容__。 + Out[61]: ('ab1cb2cb2cd1e', 'b1cb2cb2c', 'b2c', 'd1e') + +(?...) + This is an extension notation (a '?' following a '(' is not meaningful otherwise). +__单独的左括号后跟问号是没有特殊含义的,除非后面还有右括号。__ +The first character after the '?' determines what the meaning and further syntax of the construct is. Extensions usually do not create a new group; (?P...) is the only exception to this rule. Following are the currently supported extensions. +__(?...)是(...)的扩展形式,其实际含义取决于?后的第一个字符__。具体情况如下所示: + +(?iLmsux) + + (**One or more** letters from the set 'i', 'L', 'm', 's', 'u', 'x'.) The group matches the empty string; the letters set the corresponding flags: **re.I (ignore case), re.L (locale dependent), re.M (multi-line), re.S (dot matches all), re.U (Unicode dependent), and re.X (verbose)**, for the entire regular expression. (The flags are described in Module Contents.) This is useful if you wish to include the flags as part of the regular expression, instead of passing a flag argument to the re.compile() function. +这种形式的好处是在正则表达式的pattern中指定flags,这样就不用在相关函数中设置。 + Note that the (?x) flag changes how the expression is parsed. It should be used first in the expression string, or after one or more whitespace characters. If there are non-whitespace characters before the flag, the results are undefined. + +(?:...) + A non-capturing version of regular parentheses(这里的non-capturing指的是不能不占用pattern group 编号的模式组). Matches whatever regular expression is inside the parentheses, but the substring matched by the group __cannot be retrieved__ after performing a match or referenced later in the pattern. +括号中的pattern还是用于匹配,但是__匹配后的内容不能在以后引用__(例如通过\number的形式,或MatchObject中的group id方式)。 +这种匹配形式是为以后作准备的。 + +(?P...) +__为模式组定义另外一个名称__,以后可以通过该名称应用该模式组匹配的内容。同时模式组编号(从1开始)仍可以使用。 + + Similar to regular parentheses, but the substring matched by the group is accessible within the rest of the regular expression __via the symbolic group name name__. Group names must be valid Python identifiers, and each group name must be defined only once within a regular expression. A symbolic group is __also a numbered group__, just as if the group were not named. So the **group named id** in the example below can also be referenced as the numbered group 1. + + For example, if the pattern is (?P[a-zA-Z_]\w*), the group can be referenced by its name in arguments to methods of __match objects__, such as **m.group('id') or m.end('id')**, and also by name in the regular expression itself (using **(?P=id)**) and replacement text given to **.sub() (using \g)**. + +(?P=name) //匹配前面的模式组名称为name的内容,__与\n的功能类似__,本身不占用模式组编号。 + Matches whatever text was matched by the earlier group named name. + +(?#...) + A comment; the contents of the parentheses are simply ignored. + +(?=...) +__只有当字符串匹配括号中的模式时,才考虑括号前面的模式是否匹配。也就是说,先在带匹配字符串中查找符合括号中的模式字符串,然后才看其前面的内容是否和括号前的模式匹配。如果匹配则返回括号前面的内容。__ + Matches if ... matches next**(if ...matches next是前提,如果成立则matches,即match ...前面的模式)**, but doesn’t consume any of the string(__...匹配的内容并不在结果之中__). This is called **a lookahead assertion**. For example, Isaac (?=Asimov) will match 'Isaac ' only if it’s followed by 'Asimov'. + +实际上Isaac(?=Asimov)先在Isaac Asimov中查找符合Asimov模式的子串,然后看其前面的字符是否是c,如果是再看其前面的字符是否是a...,最后返回的是Isaac。例如: +>>>msg = "zhang jun" +>>>import re +>>>m = re.search(r"zhang (?=jun)", msg) __//先匹配jun,然后look ahead是否匹配,若是则返回前面匹配的内容。__ +>>>print m.group() //返回匹配到的字符串,可见**(?=jun)模式匹配的内容并没有返回**,即上面所说的doesn't consume any of the string. +>>>zhang +>>>m = re.search(r"(?<=zhang )jun", msg) __//先匹配zhang,然后look behind 是否匹配,若是则返回后面匹配的内容。__ +>>>print m.group() +>>>jun + +(?!...) //(?=...)的相反情形 + Matches if ... doesn’t match next. This is a negative lookahead assertion. For example, Isaac (?!Asimov) will match 'Isaac ' only if it’s not followed by 'Asimov'. + +(?<=...) + + Matches if the current position in the string is preceded by a match for ... that ends at the current position. This is called a **positive lookbehind assertion**. (?<=abc)def will find a match in abcdef, since the lookbehind will back up 3 characters and check if the contained pattern matches. The contained pattern must only match strings of some __fixed length__, meaning that abc or a|b are allowed, but a* and a{3,4} are not. Note that patterns which start with positive lookbehind assertions will not match at the beginning of the string being searched; you will most likely want to use the search() function rather than the match() function: + >>> + + >>> import re + >>> m = re.search('(?<=abc)def', 'abcdef') + >>> m.group(0) + 'def' + + This example looks for a word following a hyphen: + >>> + + >>> m = re.search('(?<=-)\w+', 'spam-egg') + >>> m.group(0) + 'egg' + +(?) is a poor email matching pattern, which will match with '' as well as 'user@host.com', but not with ') 注意:模式中共有三个模式组,(?:\.\w+)并不占用组编号。 + New in version 2.4. + +The special sequences consist of '\' and a character from the list below. If the ordinary character is not on the list, then the resulting RE will match the second character. For example, __\$ matches the character '$'__. +__也就是说对于\c的形式,如果c不是下面列出的类型,则\c和c等价。__ + +\number + Matches __the contents__ of the group of the same number. Groups are numbered __starting from 1__. For example, (.+) \1 matches 'the the' or '55 55', but not 'the end' (note the space after the group). This special sequence can only be used to match one of the first 99 groups. If the first digit of number is 0, or number is __3 octal digits long__, it will not be interpreted as a group match, but as the character with octal value number. Inside the '[' and ']' of a character class, all numeric escapes are treated as characters. +作为pattern group匹配内容的引用含义时,number的值为__1-99__.。如果number第一个数字为0或为3位八进制数,则python认为number是一个__代表字符__的八进制字符序列编码。在[...]中的所有\number形式的意义都代表字符。 + +\A #与/Z向对应。 + Matches only at the start of the string. +\b #这里的b理解为__boundary__ + Matches the empty string, but only at the __beginning or end of a word__. A word is defined as a sequence of alphanumeric or underscore characters, so the end of a word is indicated by whitespace or a non-alphanumeric, non-underscore character. + +Note that formally, \b is defined as the boundary between a \w and a \W character (or vice versa), or between \w and the beginning/end of the string, so the precise set of characters deemed to be alphanumeric depends on the values of the UNICODE and LOCALE flags. For example, __r'\bfoo\b'__ matches __'foo'__, 'foo.', '(foo)', 'bar foo baz' but not 'foobar' or 'foo3'. Inside a character range(即[...]形式), \b represents the backspace character, for compatibility with Python’s string literals. + +__python中的单词word是由字母、数字、下划线组成的。注意,\b代表word的边界字符(该字符可以为空),所以\b只能用于word的两边如r"\bfoo\b",注意由于\b代表的字符可以为空,所以r'\bfoo\b'与'foo'匹配。__ + +\B **#not boundary** + Matches the empty string, but only when it is __not at the beginning or end of a word.__ This means that r'py\B' matches 'python', 'py3', 'py2', but not 'py', 'py.', or 'py!'. \B is just the opposite of \b, so is also subject to the settings of LOCALE and UNICODE. + +__\B匹配不在word中间的empty string,这里的empty string 指的是除了字母、数字、下划线以外的字符串。也就是说\B匹配位于word开始或结尾的empty string。__ + +\d + When the UNICODE flag is not specified, matches any decimal digit; this is equivalent to __the set [0-9]__. With UNICODE, it will match whatever is classified as a decimal digit in the Unicode character properties database. + +\D + When the UNICODE flag is not specified, matches any non-digit character; this is __equivalent to the set [^0-9]__. With UNICODE, it will match anything other than character marked as digits in the Unicode character properties database. + +\s + When the UNICODE flag is not specified, it matches __any whitespace character__, this is equivalent to __the set [ \t\n\r\f\v]__. The LOCALE flag has no extra effect on matching of the space. If UNICODE is set, this will match the characters [ \t\n\r\f\v] plus whatever is classified as space in the Unicode character properties database. + +\S + When the UNICODE flags is not specified, matches __any non-whitespace character__; this is equivalent to the __set [^ \t\n\r\f\v]__ The LOCALE flag has no extra effect on non-whitespace match. If UNICODE is set, then any character not marked as space in the Unicode character properties database is matched. + +\w + When the LOCALE and UNICODE flags are not specified, matches __any alphanumeric character and the underscore__; this is equivalent to __the set [a-zA-Z0-9_]__. With LOCALE, it will match the set [0-9_] plus whatever characters are defined as alphanumeric for the current locale. If UNICODE is set, this will match the characters [0-9_] plus whatever is classified as alphanumeric in the Unicode character properties database. + +\W + When the LOCALE and UNICODE flags are not specified, matches any __non-alphanumeric__ character; this is equivalent to the set __[^a-zA-Z0-9_]__. With LOCALE, it will match any character not in the set [0-9_], and not defined as alphanumeric for the current locale. If UNICODE is set, this will match anything other than [0-9_] plus characters classied as not alphanumeric in the Unicode character properties database. + +\Z + Matches only **at the end** of the string. + +If both LOCALE and UNICODE flags are included for a particular sequence, then LOCALE flag takes effect first followed by the UNICODE. + +Most of the standard escapes supported by Python string literals are also accepted by the regular expression parser: + +\a \b \f \n +\r \t \v \x +\\ + +(Note that \b is used to represent word boundaries, and means “backspace” __only inside__ character classes.) + +Octal escapes are included in a limited form: If the first digit is a 0, or if there are three octal digits, it is considered an octal escape. Otherwise, it is a group reference. As for string literals, octal escapes are always at most three digits in length. + +===== 7.2.2. Module Contents ===== +The module defines several functions, constants, and an exception. Some of the functions are simplified versions of the full featured methods for compiled regular expressions. Most non-trivial applications __always use__ the compiled form. +__module级函数同时也可以用于编译后生成的正则表达式对象,而且后者更有效率。__ + +**re.compile(pattern, flags=0) //返回一个RE Object,注意pattern最好使用raw string形式。** + + Compile a regular expression pattern into a **regular expression object,** which can be used for matching using its match() and search() methods, described below. + + The expression’s behaviour can be modified by specifying a flags value. Values can be any of the following variables, combined using __bitwise OR__ (the | operator). + + The sequence + + prog = re.compile(pattern) //prog是一个正则表达式对象。 + result = prog.match(string) + + is equivalent to + + result = re.match(pattern, string) + + but using re.compile() and saving the resulting regular expression object for reuse is **more efficient** when the expression will be used several times in a single program. + + Note + + The compiled versions of the most recent patterns passed to re.match(), re.search() or re.compile() are cached, so programs that use only a few regular expressions at a time needn’t worry about compiling regular expressions. + +**re.DEBUG** + Display debug information about compiled expression. + +**re.I** +**re.IGNORECASE** + Perform __case-insensitive matching__; expressions like [A-Z] will match lowercase letters, too. This is not affected by the current locale. + +re.L +re.LOCALE + Make \w, \W, \b, \B, \s and \S dependent on the current locale. + +re.M +re.MULTILINE + When specified, the pattern character '^' matches at the beginning of the string __and__ at the beginning of each line (immediately following each newline); and the pattern character '$' matches at the end of the string and at the end of each line (immediately preceding each newline). By default, '^' matches only at the beginning of the string, and '$' only at the end of the string and immediately before the newline (if any) at the end of the string. +^和$默认只匹配字符串的开始和结束。在编译pattern如果使用了这个flag,则它们__还可以__匹配行首和行尾。 + +re.S +re.DOTALL + Make the '.' special character match any character at all, __including a newline__; without this flag, '.' will match anything except a newline. + +re.U +re.UNICODE + Make \w, \W, \b, \B, \d, \D, \s and \S dependent on the Unicode character properties database. + + New in version 2.0. + +re.X +**re.VERBOSE** + This flag allows you to write regular expressions that __look nicer__. __Whitespace within the pattern is ignored__, except when in a character class or preceded by an unescaped backslash, and, when a line contains a '#' neither in a character class or preceded by an unescaped backslash, all characters from the leftmost such '#' through the end of the line are ignored. + + That means that the two following regular expression objects that match a decimal number are functionally equal: + + a = re.compile(r"""\d + # the integral part + \. # the decimal point + \d * # some fractional digits""", **re.X**) **//pattern中的空格都被忽视,除非前面带有未转义的反斜杠。** + b = re.compile(r"\d+\.\d*") + +==== re.search(pattern, string, flags=0) //如果有匹配,返回一个MatchObject实例;否则返回None ==== +__注意:search会在string中搜索匹配pattern的字符串,该字符串可以位于任何位置,而且只返回第一个匹配的__。 +Scan through string looking for a location where the regular expression pattern produces a match, and return a corresponding __MatchObject__ instance. Return **None** if no position in the string matches the pattern; note that this is different from finding a zero-length match at some point in the string. + +==== re.match(pattern, string, flags=0) ==== +If zero or more characters __at the beginning of string __match the regular expression pattern, return a corresponding **MatchObject** instance. Return **None** if the string does not match the pattern; note that this is different from a zero-length match. + +Note that even in MULTILINE mode, re.match() will **only match** at the beginning of the string and not at the beginning of each line.(string中可能包含多行,但是还是从string的开始匹配) +注意:re.match()__只会匹配字符串的开头,而不是行首。__ + +If you want to locate a **match anywhere** in string, use search() instead (see also search() vs. match()). + +===== 实例: ===== +In [4]: msg = "This dog is a yellow dog, it is 5 years old." +In [5]: import re +In [6]: reobj = re.compile(__r"^(This).*(?Pdog).*(yellow).*(?P=dog).*(old)"__) +In [7]: mobj = reobj.search(msg) +In [8]: mobj.group() //re.group()如果无参数,则返回pattern匹配的**整个字符串** +Out[8]: 'This dog is a yellow dog, it is 5 years old' + +In [9]: mobj.group(0) //re.group()与re.group(0)__等价__ +Out[9]: 'This dog is a yellow dog, it is 5 years old' + +In [10]: mobj.group(1) //re.group()如果只有一个参数,则返回一个字符串 +Out[10]: 'This' + +In [11]: mobj.group(2) +Out[11]: 'dog' + +In [13]: mobj.group(1,2,3,4) //re.group()如果有多个参数,则__返回一个tuple__,其中每个值与指定的参数对应。 +Out[13]: ('This', 'dog', 'yellow', 'old') + +In [14]: mobj.groups() __//re.groups()则返回一个包含有所有匹配模式组的字符串tuple。其实现其实是group(1-99).__ +Out[14]: ('This', 'dog', 'yellow', 'old') + +In [17]: mobj.group(**"dog"**) //re.group()的参数可以为模式组的编号,也可以为模式组的名称。名称为__字符串__ +Out[17]: 'dog' + +In [12]: mobj.group(1,2,3,4,5) __//注意(?P=name)并不算一个命名的模式组,也不占有一个模式组编号。__ +--------------------------------------------------------------------------- +IndexError Traceback (most recent call last) + in () +----> 1 mobj.group(1,2,3,4,5) + +IndexError: no such group + +In [15]: mobj.groupdict() //返回模式串中匹配的命名的subgroup。key为subgroup名称,值为匹配的内容。 +Out[15]: {'dog': 'dog'} + +In [16]: mobj.start("dog") //返回某一subgrop匹配的字符串手地址。 +Out[16]: 5 + +In [19]: mobj.span("dog") //返回某一subgroup匹配的字符串内容的跨距。 +Out[19]: (5, 8) + +In [20]: mobj.lastindex //返回最后一个subgroup的数字编号。注意(?P=name)和(?:...)模式不占用编号。 +Out[20]: 4 + +In [21]: mobj.lastgroup + +In [22]: mobj.string +Out[22]: 'This dog is a yellow dog, it is 5 years old.' + +In [23]: mobj.re +Out[23]: re.compile(r'^(This).*(?Pdog).*(yellow).*(?P=dog).*(old)', re.UNICODE) + +In [24]: msg = 'This dog is a yellow dog, it is 5 years old.' +In [25]: reobj = re.compile(r'^(This).*(?Pdog).*__(?:yellow)__.*(?P=dog).*(old)', re.UNICODE) +In [26]: mobj = reobj.search(msg) + +In [27]: mobj.groups() __//可见(?:yellow) subgroup并不占用编号,也不能在以后引用。__ +Out[27]: ('This', 'dog', 'old') + +In [28]: mobj.group() **//但是(?:yello)匹配的内容还是会被打印出来。** +Out[28]: 'This dog is a yellow dog, it is 5 years old' + + +In [30]: msg = "123456" +In [31]: reobj = re.compile(__r"(..)+"__) //模式串__表面上只有一个__subgroup,因此(..)+匹配的内容__只能通过一个__subgroup number引用。 +In [32]: mobj = reobj.search(msg) + +In [33]: mobj.group() +Out[33]: '123456' + +In [34]: mobj.groups() //可见只有一个subgroup。 +Out[34]: **('56',)** + +In [35]: mobj.group(1) +Out[35]: '56' + +In [36]: mobj.group(2) +--------------------------------------------------------------------------- +IndexError Traceback (most recent call last) + in () +----> 1 mobj.group(2) + +__IndexError__: no such group + +In [37]: + +==== re.split(pattern, string, maxsplit=0, flags=0) //pattern定义了分割字符(串)的模式,结果为分割后的字符串列表。如果pattern中含有pattern group,在pattern匹配的前提下,匹配group的字符串也将出现在结果列表中。 ==== + +Split string by the occurrences of pattern. If capturing parentheses are used in pattern, then the text of all groups in the pattern are also returned as part of the resulting list. + +If maxsplit is nonzero, at most maxsplit splits occur, and the remainder of the string is returned as the final element of the list. (Incompatibility note: in the original Python 1.5 release, maxsplit was ignored. This has been fixed in later releases.) + +If there are **capturing groups** in the separator and it matches at the start of the string, the result will start with an empty string. The same holds for the end of the string: + >>> + + >>> re.split('(\W+)', '...words, words...') + ['''', '...', 'words', ', ', 'words', '...', ''''] + + That way, separator components are always found **at the same relative indices** within the result list (e.g., if there’s one capturing group in the separator, the 0th, the 2nd and so forth). +* __也就是说,如果pattern中没有括号,则会用pattern匹配的字符分割string,返回分割后的字符串列表。__ +* __如果pattern中包含括号,则结果列表中还包含pattern匹配的分割字符串。__ + + Note that split will never split a string on **an empty pattern match**. For example: + >>> re.split('x*', 'foo') + ['foo'] + >>> re.split("(?m)^$", "foo\n\nbar\n") + ['foo\n\nbar\n'] + +==== 实例 ==== + >>> re.split('\W+', 'Words, words, words.') //pattern中不包含括号,结果列表为分割后的字符串。 + ['Words', 'words', 'words', __''__] //注意最后面的空字符串,注意__分割后子字符串的个数为偶数。__ + + >>> re.split('(\W+)', 'Words, words, words.') //pattern中包含括号,所以结果列表中还包含pattern匹配的分割字符串。 + ['Words', __', '__, 'words', __', '__, 'words', __'.'__, **''**] //黄色的即为pattern中匹配的分割字符串。 + + >>> re.split('\W+', 'Words, words, words.', 1) //maxsplit指定分割的次数,剩余的字符串作为结果返回。 + ['Words', **'words, words.'**] //最后一个元素为未分割的字符串 + + >>> re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE) + ['0', '3', '9'] + + In [37]: ms = 'sdfsdkfgeekarddjfksdjfkgeekarddskfjksdfgeekardxdjfksdfj' + In [38]: res = re.split(r'**(geekard)**', ms) **//注意geekard是一个subgroup会出现在结果列表中,但是**__整个模式__**才是一个分割类型。** + In [39]: res + Out[39]: ['sdfsdkf', __'geekard'__, 'djfksdjfk', 'geekard', 'dskfjksdfgeekardxdjfksdfj'] + + In [75]: re.split(r"(a)([0-9])(c)", 'ab1cb2cb2cd1e') //分割字符串模式匹配失败,**待匹配字符串没有被分割。** + Out[75]: ['ab1cb2cb2cd1e'] + + In [77]: re.split(r"b[0-9]c", 'ab1cb2cb2cd1e') + Out[77]: ['a', '', '', 'd1e'] //注意,分割字符串连续后的结果列表中有两个连续的空字符。 + + In [76]: re.split(r"(b)([0-9])(c)", 'ab1cb2cb2cd1e') + Out[76]: ['a', **'b', '1', 'c'**, __''__, **'b', '2', 'c'**, __''__, 'b', '2', 'c', 'd1e'] //结果列表中空字符的由来。 + In [78]: + +Changed in version 2.7: Added the optional flags argument. + +没有re.find()函数 + +===== re.findall(pattern, string, flags=0) //结果是一个列表 ===== +**string中的某部分字符串**__先要完整地匹配pattern__**,但是根据pattern中包含的subgroup多少,返回结果列表的内容有所不同:** +1. **如果pattern中无subgroup(即没有括号),则返回的是一个完整匹配pattern的字符串列表。** +2. **如果pattern中有一个subgroup,则返回的是每次匹配subgroup的字符串列表(subgroup外的pattern匹配的内容不返回)。** +3. **如果pattern中有多个subgroup,则返回的是一个tuple list,每个tuple中的元素为每次subgroup匹配到的字符串。** + +Return **all non-overlapping matches** of pattern in string, as __a list of strings__. The string is scanned left-to-right, and matches are returned in the order found. If one or more **groups** are present in the pattern, return **a** list of groups; this will be **a list of tuples** if the pattern has more than one group. Empty matches are included in the result unless they touch the beginning of another match. + + New in version 1.5.2. + + Changed in version 2.4: Added the optional flags argument. + +===== 实例: ===== + +In [46]: ms = 'sdfsdkfgeekarddjfksdjfkgeekarddskfjksdfgeekardxdjfksdfj' +In [48]: res = re.findall(r'geekard', ms) //pattern中没有subgroup,返回的是**一个每次完整匹配的字符串列表**。 + +In [49]: res +Out[49]: ['geekard', 'geekard'] //完整匹配两次 + +In [50]: res = re.findall(r'**(geekard)**', ms) __//先要完整匹配__,然后每次返回的时subgroup匹配到的内容。 + +In [51]: res +Out[51]: ['geekard', 'geekard'] + +In [52]: res = re.findall(r'**()(geekard)**', ms) //先要完整匹配,由于pattern中有两个subgroup,所以__每次返回一个tuple__,其中的元素为每个subgroup匹配到的字符串 + +In [53]: res +Out[53]: [('', 'geekard'), ('', 'geekard')] + +In [54]: + +==== re.finditer(pattern, string, flags=0) //返回一可迭代对象,每次迭代返回一个MatchObject实例(只有当pattern中含有pattern group时,MatchObject的groups()才返回内容)。 ==== + + Return an iterator yielding __MatchObject__ instances over all non-overlapping matches for the RE pattern in string. The string is scanned left-to-right, and matches are returned in the order found. Empty matches are included in the result unless they touch the beginning of another match. + + New in version 2.2. + + Changed in version 2.4: Added the optional flags argument. + +===== 实例: ===== +>>> import re +>>> ms = 'sdfsdkfgeekarddjfksdjfkgeekarddskfjksdfgeekardxdjfksdfj' +>>> res = re.finditer(r'**geekard**', ms) //pattern中**没有subgroup** +>>> for mobj in res: +... mobj.groups() //mobj.groups()相当于mobj.group(1-99)。__由于pattern中没有subgroup,所以所有对组编号的引用返回为空__。 +... +() +() +>>> res = re.finditer(r'geekard', ms) //与findall类似,但是返回的是一个迭代器对象,每次迭代时返回一个MatchObject对象。 +>>> for mobj in res: //mobj为MatchObject对象。 +... mobj.group() //mobj.group()返回**完整的匹配字符串** +... +'geekard' +'geekard' +>>> +>>> res = re.finditer(r'**(geekard)()**', ms) //pattern中__有两个subgroups__,所以mobj.groups()和mobj.group(1-2)可以使用。 +>>> for mobj in res: +... mobj.groups() +... +('geekard', '') //**返回的是一个tuple** +('geekard', '') +>>> for mobj in res: +... mobj.group() +... +>>> res = re.finditer(r'(geekard)()', ms) +>>> for mobj in res: +... mobj.group() +... +'geekard' +'geekard' +>>> + +===== re.sub(pattern, repl, string, count=0, flags=0) ===== +pattern可以是一个字符串或RE对象,如果count为0 ,则默认替换string中**所有**匹配pattern的字符串。 +Return the string obtained by replacing the **leftmost non-overlapping** occurrences of pattern in string by the replacement repl. __用repl的内容替换string中匹配pattern的内容,返回的结果是替换后的string字符串。如果pattern中包含有subgroup,则可以在repl中通过\number的形式引用匹配到的内容,这样就可以有选择性的保留原string中的内容。__ + If the pattern isn’t found, string is returned unchanged. repl can be a string or a function; if it is a string, any backslash escapes in it are processed. That is, \n is converted to a single newline character, \r is converted to a carriage return, and so forth. Unknown escapes such as \j are left alone. **Backreferences**, such as \6, are replaced with **the substring matched by group 6** in the pattern. For example: + >>> + + >>> re.sub(r'def\s+([a-zA-Z_][a-zA-Z_0-9]*)\s*\(\s*\):', //pattern匹配整个string,所以整个string被rel替换。 + ... r'static PyObject*\npy_\1(void)\n{', + ... 'def myfunc():') + 'static PyObject*\npy_myfunc(void)\n{' + +===== 实例: ===== +>>> ms = 'sdfsdkfgeekarddjfksdjfkgeekarddskfjksdfgeekardxdjfksdfj' +>>> res = re.sub(r'geekard', '11111', ms) **//将ms字符串中所有geekard替换为11111** +>>> res +'sdfsdkf11111djfksdjfk11111dskfjksdf11111xdjfksdfj' +>>> res = re.sub(r'(geekard)', '11111', ms) **//同上** +>>> res +'sdfsdkf11111djfksdjfk11111dskfjksdf11111xdjfksdfj' +>>> res = re.sub(r'()(geekard)', '11111', ms) **//将ms字符串中所有geekard替换为11111** +>>> res +'sdfsdkf11111djfksdjfk11111dskfjksdfgeekardxdjfksdfj' +>>> res = re.sub(r'()(geekard)', '\111111', ms) **//python会将rel看作一个字符串,先对其中的转义字符进行解释。所以\111111被解释为\111和111,前者为大写字母I的八进制值。** +>>> res +'sdfsdkf__I111__djfksdjfkI111dskfjksdfgeekardxdjfksdfj' + +>>> res = re.sub(r'()(geekard)', '\1abc', ms) //**由于python先对rel字符串中的转义字符处理,所以这里的\1被解释为ASCII中的值为1的符号。** +>>> res +'sdfsdkf__\x01__abcdjfksdjfk\x01abcdskfjksdfgeekardxdjfksdfj' + +>>> res = re.sub(r'()(geekard)', __r'\1abc'__, ms) //通过在rel字符串前加r字符,使其其中的转义字符不被python预先解释,而是传给正则表达式处理。 +>>> res +'sdfsdkfabcdjfksdjfkabcdskfjksdfgeekardxdjfksdfj' +>>> res = re.sub(r'()(geekard)', __r'\2abc'__, ms) //这里的\2代表**pattern中的第二个subgroup匹配的内容**。 +>>> res +'sdfsdkfgeekardabcdjfksdjfkgeekardabcdskfjksdfgeekardxdjfksdfj' +>>> + +If repl is a function, it is called for every non-overlapping occurrence of pattern. The function takes a single __match object__ argument, and returns the replacement **string**. For example: + >>> + + >>> def dashrepl(matchobj): **//参数是MatchObject对象** + ... if matchobj.**group(0)** == '-': return ' ' //**由于sub的pattern中不包含括号,group(i)也为空,所以groups()也为空(groups()等效于,group(1:99))。** + ... else: return '-' + >>> re.sub('-{1,2}', dashrepl, 'pro----gram-files') //{m,n}是**尽可能多**地重复,所以被sub了两次,每次都是--被替换为- + 'pro--gram files' + >>> re.sub(r'\sAND\s', ' & ', 'Baked Beans And Spam', flags=re.IGNORECASE) + 'Baked Beans & Spam' + + The pattern may be a string or an RE object. + +The optional argument count is the maximum number of pattern occurrences to be replaced; count must be a non-negative integer. If omitted or zero, __all occurrences__ will be replaced. Empty matches for the pattern are replaced only when not adjacent to a previous match, so sub('x*', '-', 'abc') returns '-a-b-c-'. + +In addition to character escapes and backreferences as described above, __\g__ will use the substring matched by the group named name, as defined by the **(?P...)** syntax. \g uses the corresponding group number; __\g<2> is therefore equivalent to \2__, but isn’t ambiguous in a replacement such as \g<2>0. \20 would be interpreted as a reference to group 20, not a reference to group 2 followed by the literal character '0'. The backreference __\g<0>__ substitutes in the entire substring matched by the RE. + + Changed in version 2.7: Added the optional flags argument. + +==== re.subn(pattern, repl, string, count=0, flags=0) ==== + + Perform the same operation as sub(), but return a tuple **(new_string, number_of_subs_made)**. + + Changed in version 2.7: Added the optional flags argument. + +==== re.escape(string) ==== + + Return string with __all non-alphanumerics backslashed__; this is useful if you want to match an arbitrary literal string that may have regular expression metacharacters in it. +escape()函数将对string中的所有非字母数字字符转义,这样结果字符串为字符串字面量 + In [5]: re.escape(r"122jds\fd") + Out[5]: '122jds**\\\\f**d' + + In [6]: re.escape("122jds\fd") + Out[6]: '122jds**\\\x0**cd' + +==== re.purge() ==== + + Clear the regular expression cache. + +==== exception re.error ==== + + Exception raised when a string passed to one of the functions here is **not a valid regular expression** (for example, it might contain unmatched parentheses) or when some other error occurs during compilation or matching. It is never an error if a string contains no match for a pattern. + +===== 7.2.3. Regular Expression Objects ===== + +__class re.RegexObject__ + + The RegexObject class supports the following methods and attributes: + +==== search(string[, pos[, endpos]]) ==== + + Scan through string looking for a location where this regular expression produces a match, and return a corresponding **MatchObject** instance. Return **None** if no position in the string matches the pattern; note that this is different from finding a zero-length match at some point in the string. + + The optional second parameter pos gives **an index in the string** where the search is to start; it defaults to 0. This is not completely equivalent to slicing the string; the '^' pattern character matches at the real beginning of the string and at positions just after a newline, but not necessarily at the index where the search is to start. + + The optional parameter endpos limits how far the string will be searched; it will be as if the string is endpos characters long, so only the characters from pos to endpos - 1 will be searched for a match. If endpos is less than pos, no match will be found, otherwise, if rx is a compiled regular expression object, rx.search(string, 0, 50) is equivalent to rx.search(string[:50], 0). + >>> + + >>> pattern = re.compile("d") + >>> pattern.search("dog") # Match at index 0 + <_sre.SRE_Match object at ...> + >>> pattern.search("dog", 1) # No match; search doesn't include the "d" + +==== match(string[, pos[, endpos]]) ==== + + If **zero or more** characters at the beginning of string match this regular expression, return a corresponding **MatchObject** instance. Return **None** if the string does not match the pattern; note that this is different from a zero-length match. + + The optional pos and endpos parameters have the same meaning as for the search() method. + >>> + + >>> pattern = re.compile("o") + >>> pattern.match("dog") # No match as "o" is not at the start of "dog". + >>> pattern.match("dog", 1) # Match as "o" is the 2nd character of "dog". + <_sre.SRE_Match object at ...> + + If you want to locate a match anywhere in string, use search() instead (see also search() vs. match()). + +==== split(string, maxsplit=0) ==== + + Identical to the split() function, using the compiled pattern. + +==== findall(string[, pos[, endpos]]) ==== + + Similar to the findall() function, using the compiled pattern, but also accepts optional pos and endpos parameters that limit the search region like for match(). + +==== finditer(string[, pos[, endpos]]) ==== + + Similar to the finditer() function, using the compiled pattern, but also accepts optional pos and endpos parameters that limit the search region like for match(). + +==== sub(repl, string, count=0) ==== + + Identical to the sub() function, using the compiled pattern. + +==== subn(repl, string, count=0) ==== + + Identical to the subn() function, using the compiled pattern. + +==== flags ==== + + The regex matching flags. This is a combination of the flags given to compile() and any (?...) inline flags in the pattern. + +==== groups ==== + + **The number of **__capturing groups__ in the pattern. + +==== groupindex ==== + + A dictionary mapping any symbolic **group names defined by (?P) to group numbers**. The dictionary is empty if no symbolic groups were used in the pattern. + +==== pattern ==== + + The pattern string from which the RE object was compiled. + +===== 7.2.4. Match Objects ===== + +__class re.MatchObject__ + + Match objects always have **a boolean value of True**. Since match() and search() return __None__ when there is no match, you can test whether there was a match with a simple if statement: + + match = re.search(pattern, string) + if match: + process(match) + + Match objects support the following methods and attributes: + +==== expand(template) ==== + + Return the string obtained by doing __backslash substitution on the template string template__, as done by the sub() method. Escapes such as \n are converted to the appropriate characters, and numeric backreferences (\1, \2) and named backreferences (\g<1>, __\g__) are replaced by the contents of the corresponding group. + +==== group([group1, ...]) ==== + + Returns one or more subgroups of the match. If there is a single argument, the result is a single string; if there are multiple arguments, the result is a tuple with one item per argument. Without arguments, group1 defaults to zero (the whole match is returned). If a groupN argument is zero, the corresponding return value is the entire matching string; if it is in the inclusive range [1..99], it is the string matching the corresponding parenthesized group. If a group number is negative or larger than the number of groups defined in the pattern, an IndexError exception is raised. If a group is contained in a part of the pattern that did not match, the corresponding result is None. If a group is contained in a part of the pattern that matched multiple times, the last match is returned. + >>> + + >>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist") + >>> m.group(0) # The entire match + 'Isaac Newton' + >>> m.group(1) # The first parenthesized subgroup. + 'Isaac' + >>> m.group(2) # The second parenthesized subgroup. + 'Newton' + >>> m.group(1, 2) # Multiple arguments give us a tuple. + ('Isaac', 'Newton') + + If the regular expression uses the (?P...) syntax, the groupN arguments may also be strings identifying groups by their group name. If a string argument is not used as a group name in the pattern, an IndexError exception is raised. + + A moderately complicated example: + >>> + + >>> m = re.match(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") + >>> m.group('first_name') + 'Malcolm' + >>> m.group('last_name') + 'Reynolds' + + Named groups can also be referred to by their index: + >>> + + >>> m.group(1) + 'Malcolm' + >>> m.group(2) + 'Reynolds' + + If a group matches multiple times, only the last match is accessible: + >>> + + >>> m = re.match(r"(..)+", "a1b2c3") # Matches 3 times. + >>> m.group(1) # Returns only the last match. + 'c3' + +==== groups([default]) ==== + + Return a tuple containing all the subgroups of the match, from 1 up to however many groups are in the pattern. The default argument is used for groups that did not participate in the match; it defaults to None. (Incompatibility note: in the original Python 1.5 release, if the tuple was one element long, a string would be returned instead. In later versions (from 1.5.1 on), a singleton tuple is returned in such cases.) + + For example: + >>> + + >>> m = re.match(r"(\d+)\.(\d+)", "24.1632") + >>> m.groups() + ('24', '1632') + + If we make the decimal place and everything after it optional, not all groups might participate in the match. These groups will default to None unless the default argument is given: + >>> + + >>> m = re.match(r"(\d+)\.?(\d+)?", "24") + >>> m.groups() # Second group defaults to None. + ('24', None) + >>> m.groups('0') # Now, the second group defaults to '0'. + ('24', '0') + +==== groupdict([default]) ==== + + Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name. The default argument is used for groups that did not participate in the match; it defaults to None. For example: + >>> + + >>> m = re.match(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") + >>> m.groupdict() + {'first_name': 'Malcolm', 'last_name': 'Reynolds'} + +==== start([group]) ==== + +==== end([group]) ==== + + Return the indices of the start and end of the substring matched by group; group defaults to zero (meaning the whole matched substring). Return -1 if group exists but did not contribute to the match. For a match object m, and a group g that did contribute to the match, the substring matched by group g (equivalent to m.group(g)) is + + m.string[m.start(g):m.end(g)] + + Note that m.start(group) will equal m.end(group) if group matched a null string. For example, after m = re.search('b(c?)', 'cba'), m.start(0) is 1, m.end(0) is 2, m.start(1) and m.end(1) are both 2, and m.start(2) raises an IndexError exception. + + An example that will remove remove_this from email addresses: + >>> + + >>> email = "tony@tiremove_thisger.net" + >>> m = re.search("remove_this", email) + >>> email[:m.start()] + email[m.end():] + 'tony@tiger.net' + +==== span([group]) ==== + + For MatchObject m, return the 2-tuple (m.start(group), m.end(group)). Note that if group did not contribute to the match, this is (-1, -1). group defaults to zero, the entire match. + +==== pos ==== + + The value of pos which was passed to the search() or match() method of the RegexObject. This is the index into the string at which the RE engine started looking for a match. + +==== endpos ==== + + The value of endpos which was passed to the search() or match() method of the RegexObject. This is the index into the string beyond which the RE engine will not go. + +==== lastindex ==== + + The integer index of the __last matched__** capturing group**, or **None** if no group was matched at all. For example, the expressions **(a)b, ((a)(b)), and ((ab)) will have lastindex == 1** if applied to the string 'ab', while the expression (a)(b) will have lastindex == 2, if applied to the same string. + +==== lastgroup ==== + + The name of the **last matched capturing group**, or None if the group didn’t have a name, or if no group was matched at all. +如果pattern中__最后一个subgroup有名字而且该pattern匹配字符串__,则lastgroup为其名字字符串。 + +==== re ==== + + The regular expression object whose match() or search() method produced this MatchObject instance. + +==== string ==== + + The string passed to match() or search(). + +===== 7.2.5. Examples ===== + +==== 7.2.5.1. Checking For a Pair ==== + +In this example, we’ll use the following helper function to display match objects a little more gracefully: + +def displaymatch(match): + if match is None: + return None + return '' % (match.**group()**, match.groups()) +**math.group()返回匹配的整个字符串,而math.groups()返回的时subgroup的tuple。** + +Suppose you are writing a poker program where a player’s hand is represented as a 5-character string with each character representing a card, “a” for ace, “k” for king, “q” for queen, “j” for jack, “t” for 10, and “2” through “9” representing the card with that value. + +To see if a given string is a valid hand, one could do the following: +>>> + +>>> valid = re.compile(r"^[a2-9tjqk]{5}$") +>>> displaymatch(valid.match("akt5q")) # Valid. +"" +>>> displaymatch(valid.match("akt5e")) # Invalid. +>>> displaymatch(valid.match("akt")) # Invalid. +>>> displaymatch(valid.match("727ak")) # Valid. +"" + +That last hand, "727ak", contained a pair, or two of the same valued cards. To match this with a regular expression, one could use backreferences as such: +>>> + +>>> pair = re.compile(r".*(.).*\1") +>>> displaymatch(pair.match("717ak")) # Pair of 7s. +"" +>>> displaymatch(pair.match("718ak")) # No pairs. +>>> displaymatch(pair.match("354aa")) # Pair of aces. +"" + +To find out what card the pair consists of, one could use the group() method of MatchObject in the following manner: +>>> + +>>> pair.match("717ak").group(1) +'7' + +# Error because re.match() returns None, which doesn't have a group() method: +>>> pair.match("718ak").group(1) +Traceback (most recent call last): + File "", line 1, in + re.match(r".*(.).*\1", "718ak").group(1) +AttributeError: 'NoneType' object has no attribute 'group' + +>>> pair.match("354aa").group(1) +'a' + +7.2.5.2. Simulating scanf() + +Python does not currently have an equivalent to scanf(). Regular expressions are generally more powerful, though also more verbose, than scanf() format strings. The table below offers some more-or-less equivalent mappings between scanf() format tokens and regular expressions. +scanf() Token Regular Expression +%c . +%5c .{5} +%d [-+]?\d+ +%e, %E, %f, %g [-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)? +%i [-+]?(0[xX][\dA-Fa-f]+|0[0-7]*|\d+) +%o [-+]?[0-7]+ +%s \S+ +%u \d+ +%x, %X [-+]?(0[xX])?[\dA-Fa-f]+ + +To extract the filename and numbers from a string like + +/usr/sbin/sendmail - 0 errors, 4 warnings + +you would use a scanf() format like + +%s - %d errors, %d warnings + +The equivalent regular expression would be + +(\S+) - (\d+) errors, (\d+) warnings + +7.2.5.3. search() vs. match() + +Python offers two different primitive operations based on regular expressions: re.match() checks for a match only at the beginning of the string, while re.search() checks for a match anywhere in the string (this is what Perl does by default). + +For example: +>>> + +>>> re.match("c", "abcdef") # No match +>>> re.search("c", "abcdef") # Match +<_sre.SRE_Match object at ...> + +Regular expressions beginning with '^' can be used with search() to restrict the match at the beginning of the string: +>>> + +>>> re.match("c", "abcdef") # No match +>>> re.search("^c", "abcdef") # No match +>>> re.search("^a", "abcdef") # Match +<_sre.SRE_Match object at ...> + +Note however that in MULTILINE mode match() only matches at the beginning of the string, whereas using search() with a regular expression beginning with '^' will match at the beginning of each line. +>>> + +>>> re.match('X', 'A\nB\nX', re.MULTILINE) # No match +>>> re.search('^X', 'A\nB\nX', re.MULTILINE) # Match +<_sre.SRE_Match object at ...> + +7.2.5.4. Making a Phonebook + +split() splits a string into a list delimited by the passed pattern. The method is invaluable for converting textual data into data structures that can be easily read and modified by Python as demonstrated in the following example that creates a phonebook. + +First, here is the input. Normally it may come from a file, here we are using triple-quoted string syntax: +>>> + +>>> text = """Ross McFluff: 834.345.1254 155 Elm Street +... +... Ronald Heathmore: 892.345.3428 436 Finley Avenue +... Frank Burger: 925.541.7625 662 South Dogwood Way +... +... +... Heather Albrecht: 548.326.4584 919 Park Place""" + +The entries are separated by one or more newlines. Now we convert the string into a list with each nonempty line having its own entry: +>>> + +>>> entries = re.split("\n+", text) +>>> entries +['Ross McFluff: 834.345.1254 155 Elm Street', +'Ronald Heathmore: 892.345.3428 436 Finley Avenue', +'Frank Burger: 925.541.7625 662 South Dogwood Way', +'Heather Albrecht: 548.326.4584 919 Park Place'] + +Finally, split each entry into a list with first name, last name, telephone number, and address. We use the maxsplit parameter of split() because the address has spaces, our splitting pattern, in it: +>>> + +>>> [re.split(":? ", entry, 3) for entry in entries] +[['Ross', 'McFluff', '834.345.1254', '155 Elm Street'], +['Ronald', 'Heathmore', '892.345.3428', '436 Finley Avenue'], +['Frank', 'Burger', '925.541.7625', '662 South Dogwood Way'], +['Heather', 'Albrecht', '548.326.4584', '919 Park Place']] + +The :? pattern matches the colon after the last name, so that it does not occur in the result list. With a maxsplit of 4, we could separate the house number from the street name: +>>> + +>>> [re.split(":? ", entry, 4) for entry in entries] +[['Ross', 'McFluff', '834.345.1254', '155', 'Elm Street'], +['Ronald', 'Heathmore', '892.345.3428', '436', 'Finley Avenue'], +['Frank', 'Burger', '925.541.7625', '662', 'South Dogwood Way'], +['Heather', 'Albrecht', '548.326.4584', '919', 'Park Place']] + +7.2.5.5. Text Munging + +sub() replaces every occurrence of a pattern with a string or the result of a function. This example demonstrates using sub() with a function to “munge” text, or randomize the order of all the characters in each word of a sentence except for the first and last characters: +>>> + +>>> def repl(m): +... inner_word = list(m.group(2)) +... random.shuffle(inner_word) +... return m.group(1) + "".join(inner_word) + m.group(3) +>>> text = "Professor Abdolmalek, please report your absences promptly." +>>> re.sub(r"(\w)(\w+)(\w)", repl, text) +'Poefsrosr Aealmlobdk, pslaee reorpt your abnseces plmrptoy.' +>>> re.sub(r"(\w)(\w+)(\w)", repl, text) +'Pofsroser Aodlambelk, plasee reoprt yuor asnebces potlmrpy.' + +7.2.5.6. Finding all Adverbs + +findall() matches all occurrences of a pattern, not just the first one as search() does. For example, if one was a writer and wanted to find all of the adverbs in some text, he or she might use findall() in the following manner: +>>> + +>>> text = "He was carefully disguised but captured quickly by police." +>>> re.findall(r"\w+ly", text) +['carefully', 'quickly'] + +7.2.5.7. Finding all Adverbs and their Positions + +If one wants more information about all matches of a pattern than the matched text, finditer() is useful as it provides instances of MatchObject instead of strings. Continuing with the previous example, if one was a writer who wanted to find all of the adverbs and their positions in some text, he or she would use finditer() in the following manner: +>>> + +>>> text = "He was carefully disguised but captured quickly by police." +>>> for m in re.finditer(r"\w+ly", text): +... print '%02d-%02d: %s' % (m.start(), m.end(), m.group(0)) +07-16: carefully +40-47: quickly + +7.2.5.8. Raw String Notation + +Raw string notation (r"text") keeps regular expressions sane. Without it, every backslash ('\') in a regular expression would have to be prefixed with another one to escape it. For example, the two following lines of code are functionally identical: +>>> + +>>> re.match(r"\W(.)\1\W", " ff ") +<_sre.SRE_Match object at ...> +>>> re.match("\\W(.)\\1\\W", " ff ") +<_sre.SRE_Match object at ...> + +When one wants to match a literal backslash, it must be escaped in the regular expression. With raw string notation, this means r"\\". Without raw string notation, one must use "\\\\", making the following lines of code functionally identical: +>>> + +>>> re.match(r"\\", r"\\") +<_sre.SRE_Match object at ...> +>>> re.match("\\\\", r"\\") +<_sre.SRE_Match object at ...> + diff --git a/Zim/Programme/python/The-Python-Standard-Library/string的操作函数.txt b/Zim/Programme/python/The-Python-Standard-Library/string的操作函数.txt new file mode 100644 index 0000000..3d87f2e --- /dev/null +++ b/Zim/Programme/python/The-Python-Standard-Library/string的操作函数.txt @@ -0,0 +1,92 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-11-30T20:55:38+08:00 + +====== string的操作函数 ====== +Created Friday 30 November 2012 + +转自:http://www.91linux.com/html/article/program/python/20090804/17759.htm + +在python有各种各样的string操作函数。在历史上string类在python中经历了一段轮回的历史。在最开始的时候,python有**一个专门的string的module**,要使用string的方法要先import,但后来由于众多的python使用者的建议,从python2.0开始,string方法改为用S.method()的形式调用,只要S是一个字符串对象就可以这样使用,而不用import。同时__为了保持向后兼容__,现在的python中仍然保留了一个string的module,其中定义的方法与S.method()是相同的,这些方法都**最后都指向了**用S.method()调用的函数。要注意,S.method()能调用的方法比string的module中的多,比如isdigit()、istitle()等就只能用S.method()的方式调用。 + +对一个字符串对象,首先想到的操作可能就是计算它有多少个字符组成,很容易想到用S.len(),但这是错的,应该是__len(S)__。因为len()是内置函数,包括在__builtin__模块中。python不把len()包含在string类型中,乍看起来好像有点不可理解,其实一切有其合理的逻辑在里头。len()不仅可以计算字符串中的字符数,还可以计算list的成员数,tuple的成员数等等,因此**单单把len()算在string里是不合适**,因此一是可以__把len()作为通用函数__,用重载实现对不同类型的操作,还有就是可以在每种有len()运算的类型中都要包含一个len()函数。python选择的是第一种解决办法。类似的还有str(arg)函数,它把arg用string类型表示出来。 + +===== 字符串中字符大小写的变换: ===== +S.lower() #小写 +S.upper() #大写 +S.swapcase() #大小写互换 +S.capitalize() #首字母大写 +String.capwords(S) +#这是模块中的方法。它把S用split()函数分开,然后用capitalize()把首字母变成大写,最后用join()合并到一起 +S.title() #只有首字母大写,其余为小写,模块中没有这个方法 + +===== 字符串在输出时的对齐: ===== +S.ljust(width,[fillchar]) +#输出width个字符,S左对齐,不足部分用fillchar填充,默认的为空格。 +S.rjust(width,[fillchar]) #右对齐 +S.center(width, [fillchar]) #中间对齐 +S.zfill(width) #把S变成width长,并在右对齐,不足部分用0补足 + +===== 字符串中的搜索和替换: ===== +S.find(substr, [start, [end]]) +#返回S中出现substr的第一个字母的标号,如果S中没有substr则返回-1。start和end作用就相当于在S[start:end]中搜索 +S.index(substr, [start, [end]]) +#与find()相同,只是在S中没有substr时,会返回一个运行时错误 +S.rfind(substr, [start, [end]]) +#返回S中最后出现的substr的第一个字母的标号,如果S中没有substr则返回-1,也就是说从右边算起的第一次出现的substr的首字母标号 +S.rindex(substr, [start, [end]]) +S.count(substr, [start, [end]]) #计算substr在S中出现的次数 +S.replace(oldstr, newstr, [count]) +#把S中的oldstar替换为newstr,count为替换次数。这是替换的通用形式,还有一些函数进行特殊字符的替换 +S.strip([chars]) +#把S中前后chars中有的字符全部去掉,可以理解为把S前后chars替换为None +S.lstrip([chars]) +S.rstrip([chars]) +S.expandtabs([tabsize]) +#把S中的tab字符替换没空格,每个tab替换为tabsize个空格,默认是8个 + +===== 字符串的分割和组合: ===== +S.split([sep, [maxsplit]]) +#以sep为分隔符,把S分成一个list。maxsplit表示分割的次数。默认的分割符为空白字符 +S.rsplit([sep, [maxsplit]]) +S.splitlines([keepends]) +#把S按照行分割符分为一个list,keepends是一个bool值,如果为真每行后而会保留行分割符。 +S.join(seq) #把seq代表的序列──字符串序列,用S连接起来 + + + +===== 字符串的mapping,这一功能包含两个函数: ===== +String.maketrans(from, to) +#返回一个256个字符组成的翻译表,其中from中的字符被一一对应地转换成to,所以from和to必须是等长的。 +S.translate(table[,deletechars]) +#使用上面的函数产后的翻译表,把S进行翻译,并把deletechars中有的字符删掉。需要注意的是,如果S为unicode字符串,那么就不支持deletechars参数,可以使用把某个字符翻译为None的方式实现相同的功能。此外还可以使用codecs模块的功能来创建更加功能强大的翻译表。 + +===== 字符串还有一对编码和解码的函数: ===== +S.encode([encoding,[errors]]) +#其中encoding可以有多种值,比如gb2312 gbk gb18030 bz2 zlib big5 bzse64等都支持。errors默认值为"strict",意思是UnicodeError。可能的值还有'ignore', 'replace', 'xmlcharrefreplace', 'backslashreplace' 和所有的通过codecs.register_error注册的值。这一部分内容涉及codecs模块,不是特明白 + +S.decode([encoding,[errors]]) + +===== 字符串的测试函数,这一类函数在string模块中没有,这些函数返回的都是bool值: ===== + +S.startwith(prefix[,start[,end]]) +#是否以prefix开头 +S.endwith(suffix[,start[,end]]) +#以suffix结尾 +S.isalnum() +#是否全是字母和数字,并至少有一个字符 +S.isalpha() #是否全是字母,并至少有一个字符 +S.isdigit() #是否全是数字,并至少有一个字符 +S.isspace() #是否全是空白字符,并至少有一个字符 +S.islower() #S中的字母是否全是小写 +S.isupper() #S中的字母是否便是大写 +S.istitle() #S是否是首字母大写的 + +===== 字符串类型转换函数,这几个函数只在string模块中有: ===== + +**string.atoi(s[,base])** +#base默认为10,如果为0,那么s就可以是012或0x23这种形式的字符串,如果是16那么s就只能是0x23或0X12这种形式的字符串 +string.atol(s[,base]) #转成long +string.atof(s[,base]) #转成float + +这里再强调一次,字符串对象是不可改变的,也就是说在python创建一个字符串后,你不能把这个字符中的某一部分改变。任何上面的函数改变了字符串后,都会返回一个__新的字符串__,原字串并没有变。其实这也是有变通的办法的,__可以用S=list(S)这个函数把S变为由单个字符为成员的list,这样的话就可以使用S[3]='a'的方式改变值,然后再使用S=" ".join(S)还原成字符串__ diff --git a/Zim/Programme/python/library/subprocess.txt b/Zim/Programme/python/The-Python-Standard-Library/subprocess.txt similarity index 81% rename from Zim/Programme/python/library/subprocess.txt rename to Zim/Programme/python/The-Python-Standard-Library/subprocess.txt index e7615be..df7f86c 100644 --- a/Zim/Programme/python/library/subprocess.txt +++ b/Zim/Programme/python/The-Python-Standard-Library/subprocess.txt @@ -39,13 +39,13 @@ __subprocess.STDOUT__ [*] subprocess.__call__(**args**, *, stdin=None, stdout=None, stderr=None, shell=False) Run the command described by __args__. __Wait __for command to complete, then return the **returncode** attribute. - 适合非交互式进程,执行args指定的程序直到其结束,返回退出码,即使命令执行失败,也不产生异常。stdin,stdout,stderr一般不使用PIPE,因为调用进程一般不读这种非交互式进程的输出。stdin为None表示子进程的stdin将__继承__调用进程的描述符。 +适合非交互式进程,执行args指定的程序直到其结束,返回退出码,即使命令执行失败,也不产生异常。stdin,stdout,stderr一般不使用PIPE,因为调用进程一般不读这种非交互式进程的输出。stdin为None表示子进程的stdin将__继承__调用进程的描述符。 The arguments shown above are merely the most common ones, described below in Frequently Used Arguments (hence the slightly odd notation in the abbreviated signature). The full function signature is the same as that of the __Popen__ constructor - this functions passes all supplied arguments directly through to that interface. Examples: >>> -__#如果shell=False(默认),则python调用os.execlp()来执行args,因此,如果args是字符串,则只能包含命令名;如果是序列,则可以包含命令参数(参数不支持shell的特性如文件名扩展等。),而且每个元素一般包括一个参数(否则调用程序可能会产生解析错误)。__ +__#如果shell=False(默认),则python调用os.exelp()来执行args,因此,如果args是字符串,则只能包含命令名;如果是序列,则可以包含命令参数(参数不支持shell的特性如文件名扩展等。)。__ >>> subprocess.call(__["ls", "-l"]__) 0 __#如果shell=True,则python调用system shell来执行args,调用形式为: execlp("/bin/sh", '-c', 'arg1', 'arg2', ....)因此,如果args是字符串,则其中可以包含命令参数,而且支持各种shell特性如文件名扩展、命令扩展等;如果args是序列,则其第一个元素为命令名,其它元素为被当作shell本身的参数。__ @@ -53,14 +53,14 @@ __#如果shell=True,则python调用system shell来执行args,调用形式为 >>> subprocess.call("exit 1", shell=True) 1 -__总的来说__**:**如果shell=False, python使用os.execvp(...)来执行args,因此,如果args是一个string,则该string只能是命令名称, 如果要为命令附加参数,则只能使用序列类型; 如果shell=True,则python使用shell来执行行args,因此,args最好是一个string,该string可以包含命令名及其参数,如果使用序列,则从第二个元素开始作为shell本身的参数(而非命令的参数。)。 +__总的来说__**:**如果shell=False, python使用os.execvp(...)来执行args,因此,如果args是一个string,则该string__只能是__命令名称, 如果要为命令附加参数,则只能使用序列类型; 如果shell=True,则python使用shell来执行行args,因此,args最好是一个string,该string可以包含命令名及其参数,如果使用序列,则从第二个元素开始作为shell本身的参数(而非命令的参数。)。 Invoking the system shell with shell=True can be **a security hazard** if combined with untrusted input. See the warning under Frequently Used Arguments for details. __Do not use stdout=PIPE or stderr=PIPE with this function__. As the pipes are not being read in the current process, the child process may block if it generates enough output to a pipe to fill up the OS pipe buffer. 当使用stdout=PIPE时,如果子进程填满了管道而调用进程没有读该管道时__子进程就会被阻塞__。因此使用上面三个函数时一般不使用这个参数。 -The standard input and output channels for the process started by call() are__ bound to the parent’s input and output__. That means the calling programm cannot capture the output of the command. Use check_output() to capture the output for later processing. +The standard input and output channels for the process started by call() are__ bound to the parent’s input and output__. That means the calling programm __cannot capture the output__ of the command. Use check_output() to capture the output for later processing. [*] subprocess.**check_call**(args, *, stdin=None, stdout=None, stderr=None, shell=False) 和call类似,但是对返回值进行检查,如果非0则引发异常。 @@ -152,9 +152,10 @@ __class subprocess.Popen__(args, bufsize=0, **executable**=None, stdin=None, std Arguments are: -* args should be a string, or a sequence of program arguments. The program to execute is normally the **first item** in the args sequence or the string if a string is given, but can be **explicitly set **by using the __executable__ argument. When executable is given, the first item in the args sequence is still treated by most programs as the** command name**, which can then be different from the actual executable name. On Unix, it becomes the** display name** for the executing program in utilities such as ps. +* args should be a string, or a sequence of program arguments. The program to execute is normally the **first item** in the args sequence or the string(string作为命令名,不能有参数) if a string is given, but can be **explicitly set **by using the __executable__ argument. When executable is given, the first item in the args sequence is still treated by most programs as the** command name**, which can then be different from the actual executable name. On Unix, it becomes the** display name** for the executing program in utilities such as ps. 如果ecutable被设置,则其值将作为__实际执行__的命令名。args的第一个元素将作为命令的显示名称。 On Unix, with shell=False (default): In this case, the __Popen class uses os.execvp()__ to execute the child program. args should **normally be a sequence.** If a string is specified for args, it will be used as the **name or path of the program **to execute; this will only work if the program is being given __no__ arguments. +默认情况下shell=False,这样args通常为sequence,其第一个元素为待执行的命令,其它元素为命令参数。如果args为string,则其**只能为命令名或命令路径,不能带任何参数**。这时因为python使用os.execvp()来执行这个string。 Note @@ -172,22 +173,22 @@ __class subprocess.Popen__(args, bufsize=0, **executable**=None, stdin=None, std Note in particular that options (such as -input) and arguments (such as eggs.txt) that are separated by whitespace in the shell go in **separate list elements**, while arguments that need quoting or backslash escaping when used in the shell (such as filenames containing spaces or the echo command shown above) are single list elements. On Unix, with shell=True: __If args is a string, it specifies the command string to execute through the shell.__ This means that the string must be formatted exactly as it would be when typed at the shell prompt. This includes, for example, quoting or backslash escaping filenames with spaces in them. If args is a sequence, the first item specifies the command string, and any additional items will be treated as additional arguments __to the shell itself__. That is to say, Popen does the equivalent of: -在shell=True的情况下,args最好是一个字符串。如果是序列,则从第二个元素开始是__作为shell本身的参数__,而非命令的参数。 +在shell=True的情况下,args**最好是一个字符串**。如果是序列,则从第二个元素开始是__作为shell本身的参数__,而非命令的参数。 __ Popen(['/bin/sh', '-c', args[0], args[1], ...])__ On Windows: the Popen class uses CreateProcess() to execute the child child program, which __operates on strings.__ If args is a sequence, it will be converted to a string in a manner described in Converting an argument sequence to a string on Windows. **总的来说:如果shell=False, python使用os.execvp(...)来执行args,因此,如果args是一个string,则该string只能是命令名称, 如果要为命令执行参数,则只能使用序列类型; 如果shell=True,则python使用shell来执行行args,因此,args最好是一个string,该string可以包含命令名及其参数,如果使用序列,则从第二个元素开始作为**__shell本身__**的参数(而非命令的参数。)。** -* bufsize, if given, has the same meaning as the corresponding argument to the built-in open() function: **0 means **__unbuffered__**, 1 means**__ line buffered__**, any other positive value means use a buffer of (approximately) that size**. A negative bufsize means to use the system **default**, which usually means__ fully__ buffered. The** default value for bufsize is 0** (unbuffered). -控制打开文件的缓冲方式。 +* bufsize, if given, has the same meaning as the corresponding argument to the built-in open() function: **0 means **__unbuffered__**, 1 means**__ line buffered__**, any other positive value means use a buffer of (approximately) that size**. A negative bufsize means to use the system **default**, which usually means__ fully__ buffered. The** default value for bufsize is 0** __(unbuffered)__. + 控制打开文件的缓冲方式,默认为unbuffered。 Note If you experience performance issues, it is recommended that you try to enable buffering by setting bufsize to either -1 or a large enough positive value (such as 4096). -* The executable argument **specifies the program** to execute. It is very seldom needed: Usually, the program to execute is defined by the **args** argument. If shell=True, the executable argument specifies __which shell to use__. On Unix, the default shell is /bin/sh. On Windows, the default shell is specified by the COMSPEC environment variable. The only reason you would need to specify shell=True on Windows is where the command you wish to execute is actually built in to the shell, eg dir, copy. You don’t need shell=True to run a batch file, nor to run a console-based executable. +* The executable argument **specifies the program** to execute. It is very seldom needed: Usually, the program to execute is defined by the **args** argument. If shell=True, the executable argument specifies __which shell to use__. On Unix, the default shell is **/bin/sh**. On Windows, the default shell is specified by the COMSPEC environment variable. The only reason you would need to specify shell=True on Windows is where the command you wish to execute is actually built in to the shell, eg dir, copy. You don’t need shell=True to run a batch file, nor to run a console-based executable. -* stdin, stdout and stderr specify the __executed program’s __standard input, standard output and standard error file handles, respectively. Valid values are __PIPE, an existing file descriptor (a positive integer), an existing file object, and None__. PIPE indicates that a new pipe to the child should be created. With the **default settings of None**, no redirection will occur; the child’s file handles will be inherited from the parent. Additionally, stderr can be STDOUT, which indicates that the stderr data from the child process should be captured into the same file handle as for stdout. +* stdin, stdout and stderr specify the __executed program’s __standard input, standard output and standard error file handles, respectively. Valid values are __PIPE, an existing file descriptor (a positive integer), an existing file object, and None__. PIPE indicates that a new pipe to the child should be created. With the **default settings of None**, no redirection will occur; the child’s file handles will be **inherited from the parent**. Additionally, stderr can be STDOUT, which indicates that the stderr data from the child process should be captured into the same file handle as for stdout. * If__ preexec_fn__ is set to a callable object, this object will be called in the child process just **before** the child is executed. (Unix only) @@ -210,7 +211,8 @@ __class subprocess.Popen__(args, bufsize=0, **executable**=None, stdin=None, std Note This feature is only available if Python is built with universal newline support (the default). Also, the newlines attribute of the file objects stdout, stdin and stderr are not updated by the communicate() method. -* If given, startupinfo will be a STARTUPINFO object, which is passed to the underlying __CreateProcess __function. creationflags, if given, can be CREATE_NEW_CONSOLE or CREATE_NEW_PROCESS_GROUP. (Windows only) +* If given, startupinfo will be a STARTUPINFO object, which is passed to the underlying __CreateProcess __function. +* creationflags, if given, can be CREATE_NEW_CONSOLE or CREATE_NEW_PROCESS_GROUP. (Windows only)? ==== 17.1.1.3. Exceptions ==== @@ -232,11 +234,11 @@ Popen()函数返回一个__Popen对象__。 Instances of the Popen class have the following methods: **Popen.poll()** - Check if child process __has terminated__. Set and return returncode attribute. + Check if child process __has terminated__. Set and return **returncode** attribute. **Popen.wait()** - Wait for child process to terminate. Set and return returncode attribute. + Wait for child process to terminate. Set and return **returncode** attribute. Warning This will deadlock when using stdout=PIPE and/or stderr=PIPE and the child process generates enough output to a pipe such that it blocks waiting for the OS pipe buffer to accept more data. Use communicate() to avoid that. @@ -247,7 +249,7 @@ __Popen.communicate__(input=None) communicate() returns __a tuple__ (stdoutdata, stderrdata). - Note that if you want to send data to the process’s stdin, you need to create the Popen object with__ stdin=PIPE__. Similarly, to get anything other than None in the result tuple, you need to give__ stdout=PIPE__ and/or stderr=PIPE too. + Note that if you want to send data to the process’s stdin, you need to create the Popen object with__ stdin=PIPE__. Similarly, to get anything other than None in the result tuple, you need to give__ stdout=PIPE__ and/or __stderr=PIPE__ too. Note The data read is buffered in memory, so do not use this method if the data size is large or unlimited. @@ -262,7 +264,7 @@ __Popen.communicate__(input=None) **Popen.terminate()** - Stop the child. On Posix OSs the method sends SIGTERM to the child. On Windows the Win32 API function TerminateProcess() is called to stop the child. + Stop the child. On Posix OSs the method sends __SIGTERM__ to the child. On Windows the Win32 API function TerminateProcess() is called to stop the child. **Popen.kill()** @@ -271,7 +273,7 @@ __Popen.communicate__(input=None) ===== The following attributes are also available: ===== **Popen对象**具有下列属性: Warning - Use communicate() rather than .stdin.write, .stdout.read or .stderr.read to avoid deadlocks due to any of the other OS pipe buffers filling up and blocking the child process. + Use communicate() rather than //.stdin.write, .stdout.read or .stderr.read// to avoid deadlocks due to any of the other OS pipe buffers filling up and blocking the child process. Popen.stdin #对Popen调用stdin属性,将返回一个**写打开的文件对象**。 If the stdin argument was PIPE, this attribute is **a file object** that provides input to the child process. Otherwise, it is__ None__. @@ -287,7 +289,7 @@ Popen.__pid__ Popen.__returncode__ - The child return code, **set by poll() and wait()** (and indirectly by communicate()). A __None__ value indicates that the process hasn’t terminated yet. A negative value -N indicates that the child was **terminated by signal N** (Unix only). + The child return code, **set by poll() and wait()** (and indirectly by communicate()). A __None__ value indicates that the process hasn’t terminated yet. A negative value __-N__ indicates that the child was **terminated by signal N** (Unix only). ===== 17.1.3.1. Constants ===== @@ -314,7 +316,7 @@ subprocess.STARTF_USESHOWWINDOW subprocess.CREATE_NEW_CONSOLE - The new process ha**s a new console**, instead of inheriting its parent’s console (the default). + The new process ha**s a new console**, instead of inheriting its parent’s console __(the default)__. This flag is always set when Popen is created with shell=True. subprocess.CREATE_NEW_PROCESS_GROUP @@ -345,12 +347,20 @@ output=`dmesg | grep hda` # becomes p1 = Popen(["dmesg"], __stdout=PIPE__) p2 = Popen(["grep", "hda"], stdin=__p1.stdout__, stdout=PIPE) #p1是Popen对象,其stdout返回一个读打开的文件对象(因为p1的stdout为PIPE)。 -__p1.stdout__.close() # Allow p1 to receive a SIGPIPE if p2 exits. +__p1.stdout__.close() # Allow p1 to receive a SIGPIPE if p2 exits,**关键!!!** output = __p2.communicate()__[0] #communicate()返回一个__元组__,这里只取出第一个元素即子进程的标准输出。 The p1.stdout.close() call after starting the p2 is __important in order__ for p1 to receive a SIGPIPE if p2 exits before p1. +具体解释如下:stackoverflow.com/q/7391689 + From Wikipedia, SIGPIPE is the signal sent to a process when it attempts to **write** to a pipe without a process connected to the other end. + + When you first create p1 using **stdout=PIPE**, there is one process connected to the pipe, which is __your Python process__, and you can read the output using p1.stdout. + 最开始Popen()时,python解释器会连接到p1.stdout管道,这样在p2没有创建前,p1的cmd就可以执行了,输出将临时地放入管道中。 + When you create p2 using stdin=p1.stdout there are now __two processes__ connected to the pipe p1.stdout. + + Generally when you are running processes in a pipeline you want **all processes** to end when any of the processes end. For this to happen automatically __you need to close p1.stdout so p2.stdin is the only process attached to that pipe__, this way if p2 ends and p1 writes additional data to stdout, it will receive a SIGPIPE since there are no longer any processes attached to that pipe. -Alternatively, for trusted input, the shell’s own pipeline support may still be used directly: +Alternatively, for **trusted input**, the shell’s own pipeline support may still be used directly: output=`dmesg | grep hda` # becomes output=__check_output__(“dmesg | grep hda”,__ shell=True__) @@ -432,7 +442,7 @@ p = Popen("cmd", shell=True, bufsize=bufsize, stdin=PIPE, stdout=PIPE, __stderr=STDOUT__, close_fds=True) #STDOUT标示,子进程的标准出错和__它的__标准输出__重定向到同一个__文件。 (child_stdin, child_stdout_and_stderr) = (p.stdin, p.stdout) -On Unix, os.popen2, os.popen3 and os.popen4 also accept __a sequence__ as the command to execute, in which case arguments will be passed directly to the program without shell intervention. This usage can be replaced as follows: +On Unix, os.popen2, os.popen3 and os.popen4 also accept __a sequence__ as the command to execute, in which case arguments will be passed **directly to the program** without shell intervention. This usage can be replaced as follows: (child_stdin, child_stdout) = os.popen2(["/bin/ls", "-l"], mode, bufsize) @@ -444,13 +454,13 @@ Return code handling translates as follows: pipe = os.popen("cmd", 'w') ... -rc = pipe.close() -if rc is not None and rc >> 8: +rc = pipe.close() //"cmd"进程读管道时收到EOF,程序一般将终止。 +if rc is not None and rc **>> 8**: print "There were some errors" ==> process = Popen("cmd", 'w', shell=True, stdin=PIPE) ... -process.stdin.close() +process.stdin.close() //**同样使'cmd'读到EOF**,而自动终止。 if **process.wait()** != 0: print "There were some errors" @@ -473,19 +483,19 @@ p = Popen(["mycmd", "myarg"], bufsize=bufsize, popen2.Popen3 and popen2.Popen4 basically work as subprocess.Popen, except that: - Popen raises an exception if the execution fails. - the capturestderr argument is replaced with the stderr argument. - stdin=PIPE and stdout=PIPE must be specified. - popen2 closes all file descriptors by default, but you have to specify close_fds=True with Popen. +* Popen raises an exception if the execution fails. +* the capturestderr argument is replaced with the stderr argument. +* stdin=PIPE and stdout=PIPE must be specified. +* popen2 closes all file descriptors by default, but you have to specify close_fds=True with Popen. ===== 17.1.5. Notes ===== 17.1.5.1. Converting an argument sequence to a string on Windows On Windows, an args sequence is converted to a string that can be parsed using the following rules (which correspond to the rules used by the MS C runtime): - Arguments are delimited by white space, which is either a space or a tab. - A string surrounded by double quotation marks is interpreted as a single argument, regardless of white space contained within. A quoted string can be embedded in an argument. - A double quotation mark preceded by a backslash is interpreted as a literal double quotation mark. - Backslashes are interpreted literally, unless they immediately precede a double quotation mark. - If backslashes immediately precede a double quotation mark, every pair of backslashes is interpreted as a literal backslash. If the number of backslashes is odd, the last backslash escapes the next double quotation mark as described in rule 3. +* Arguments are delimited by white space, which is either a space or a tab. +* A string surrounded by double quotation marks is interpreted as a single argument, regardless of white space contained within. A quoted string can be embedded in an argument. +* A double quotation mark preceded by a backslash is interpreted as a literal double quotation mark. +* Backslashes are interpreted literally, unless they immediately precede a double quotation mark. +* If backslashes immediately precede a double quotation mark, every pair of backslashes is interpreted as a literal backslash. If the number of backslashes is odd, the last backslash escapes the next double quotation mark as described in rule 3. diff --git a/Zim/Programme/python/library/Python_Subprocess_Module_Examples.txt b/Zim/Programme/python/The-Python-Standard-Library/subprocess/Python_Subprocess_Module_Examples.txt similarity index 100% rename from Zim/Programme/python/library/Python_Subprocess_Module_Examples.txt rename to Zim/Programme/python/The-Python-Standard-Library/subprocess/Python_Subprocess_Module_Examples.txt diff --git a/Zim/Programme/python/library/subprocess/subprocess_-_New_process_module.txt b/Zim/Programme/python/The-Python-Standard-Library/subprocess/subprocess_-_New_process_module.txt similarity index 100% rename from Zim/Programme/python/library/subprocess/subprocess_-_New_process_module.txt rename to Zim/Programme/python/The-Python-Standard-Library/subprocess/subprocess_-_New_process_module.txt diff --git a/Zim/Programme/python/library/subprocess_–_Work_with_additional_processes.txt b/Zim/Programme/python/The-Python-Standard-Library/subprocess/subprocess_–_Work_with_additional_processes.txt similarity index 100% rename from Zim/Programme/python/library/subprocess_–_Work_with_additional_processes.txt rename to Zim/Programme/python/The-Python-Standard-Library/subprocess/subprocess_–_Work_with_additional_processes.txt diff --git a/Zim/Programme/python/library/virtualenvwrapper.txt b/Zim/Programme/python/The-Python-Standard-Library/virtualenvwrapper.txt similarity index 100% rename from Zim/Programme/python/library/virtualenvwrapper.txt rename to Zim/Programme/python/The-Python-Standard-Library/virtualenvwrapper.txt diff --git a/Zim/Programme/python/library/virtualenvwrapper/1.jpg b/Zim/Programme/python/The-Python-Standard-Library/virtualenvwrapper/1.jpg similarity index 100% rename from Zim/Programme/python/library/virtualenvwrapper/1.jpg rename to Zim/Programme/python/The-Python-Standard-Library/virtualenvwrapper/1.jpg diff --git a/Zim/Programme/python/library/virtualenvwrapper/2.jpg b/Zim/Programme/python/The-Python-Standard-Library/virtualenvwrapper/2.jpg similarity index 100% rename from Zim/Programme/python/library/virtualenvwrapper/2.jpg rename to Zim/Programme/python/The-Python-Standard-Library/virtualenvwrapper/2.jpg diff --git a/Zim/Programme/python/The_Python_Tutorial/3._An_Informal_Introduction_to_Python.txt b/Zim/Programme/python/The_Python_Tutorial/3._An_Informal_Introduction_to_Python.txt index 86458b5..71042f0 100644 --- a/Zim/Programme/python/The_Python_Tutorial/3._An_Informal_Introduction_to_Python.txt +++ b/Zim/Programme/python/The_Python_Tutorial/3._An_Informal_Introduction_to_Python.txt @@ -38,7 +38,7 @@ The interpreter acts as a simple calculator: you can type an expression at it an >>> #__ Integer division returns the floor__: ... 7/3 2 ->>> 7/-3 +>>> 7/-3 //对于整型出发,结果也是整数,而且结果总是rounded towards minus infinity。也就是说__结果总是取小于小数的最大整数值。__例如7/-3= -2.25...,而小于-2.25的最大整数为-3. -3 The equal sign ('=') is used to assign a value to a variable. Afterwards, no result is displayed before the next interactive prompt: diff --git a/Zim/Programme/python/library.txt b/Zim/Programme/python/library.txt deleted file mode 100644 index 370834f..0000000 --- a/Zim/Programme/python/library.txt +++ /dev/null @@ -1,7 +0,0 @@ -Content-Type: text/x-zim-wiki -Wiki-Format: zim 0.4 -Creation-Date: 2012-01-05T15:16:40+08:00 - -====== library ====== -Created Thursday 05 January 2012 - diff --git a/Zim/Programme/python/python笔记/dict.txt b/Zim/Programme/python/python笔记/dict.txt index f622428..7422bb7 100644 --- a/Zim/Programme/python/python笔记/dict.txt +++ b/Zim/Programme/python/python笔记/dict.txt @@ -13,7 +13,7 @@ class dict(object) | (key, value) pairs | dict(iterable) -> new dictionary initialized as if via: | d = {} - | for **k, v** in iterable: #迭代器对象每次返回的元素必须是一个容器类型,__容器中元素的个数为2__.**如[a,b], "ab",(a,b)** + | for **k, v** in iterable: #迭代器对象每次返回的元素必须是一个容器类型,__容器中元素的个数为2__.**如[a,b], **~~"ab"~~**,(a,b)** | d[k] = v | dict(__**kwargs)__ -> new dictionary initialized with the name=value pairs | in the keyword argument list. For example: dict(one=1, two=2) diff --git a/Zim/Programme/python/python笔记/unpack.txt b/Zim/Programme/python/python笔记/unpack.txt index bb1f1b1..f2955ef 100644 --- a/Zim/Programme/python/python笔记/unpack.txt +++ b/Zim/Programme/python/python笔记/unpack.txt @@ -36,3 +36,39 @@ unpack一个顺序容器类型时,左边变量的数目必须要与容器中 >>> print k,v d f >>> + +In [9]: for k,v in 'dfdf': //对于字符串的迭代,迭代其每次只返回__一个字符__,所以赋值给k,v时出错 + ...: print k,v + ...: +--------------------------------------------------------------------------- +ValueError Traceback (most recent call last) + in () +----> 1 for k,v in 'dfdf': + 2 print k,v + 3 + +ValueError: need more than 1 value to unpack + +In [10]: k,v='ff' __//对于可迭代对象,展开后的元素个数必须与等式左边的相等。__ + +In [11]: print k,v +**f f** +In [13]: k,v='fff' +--------------------------------------------------------------------------- +ValueError Traceback (most recent call last) + in () +----> 1 k,v='fff' + +ValueError: too many values to unpack + +In [14]: +In [12]: + +In [12]: for k,v in ['df',[1,2],(3,4)]: __//每次迭代器返回列表中的一个元素,每个元素都可以展开为2个变量。__ + print k,v + ....: +d f +1 2 +3 4 + +In [13]: diff --git a/Zim/Utils/gdb/gdb_debugging.txt b/Zim/Utils/gdb/gdb_debugging.txt new file mode 100644 index 0000000..b6e9080 --- /dev/null +++ b/Zim/Utils/gdb/gdb_debugging.txt @@ -0,0 +1,130 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-25T16:49:56+08:00 + +====== gdb debugging ====== +Created Tuesday 25 December 2012 + +[geekard@geekard elf]$ **gdb demo** +GNU gdb (GDB) 7.5.1 +Copyright (C) 2012 Free Software Foundation, Inc. +**(gdb) info source **//查看当前使用的source file name + + +**(gdb) info args ** +name = 0x80486a3 "geekard" +age = 23 +friends = 0xbffff920 +**(gdb) info locals** +a = -1209680176 +nm = 0xbffff914 "\027" +fr = {0xbffff948 "", + 0xb7e5beff "\203\304\030[\303f\220f\220f\220f\220f\220f\220S\203\354\030\215D$,\211D$\f\213D$(讷\r", + 0xb7fb5a00 <_IO_2_1_stdout_> "\204*\255", , 0x80486b4 "\tThe globalVarStatic is:%d\n"} +//从上面两次的反汇编结果可以得出如下结论: +//1. break function-name 设置的断点并不位于function的入口处,而break *function-name才是。 +//2. break function-name设置的断点位于function的prologue指令之后,auto variable初始化指令之前。 +//3. 当执行到前面设置的断点时,函数的args已经由caller通过stack传入,但是函数的auto variable还没有被初始化 + +**(gdb) bt **//查看函数调用信息,即backtrap。callee的编号小于其caller。 +#0 greeting (name=0x80486a3 "geekard", age=23, friends=0xbffff920) at demo.c:13 +#1 0x08048578 in main (argc=5, argv=0xbffff9e4) at demo.c:50 +**(gdb) info f 1 **//查看stack frame编号为1的函数帧信息 +Stack frame at 0xbffff950: + eip = 0x8048578 in main (demo.c:50); saved eip 0xb7e28605 + caller of frame at 0xbffff910 + source language c. + Arglist at 0xbffff948, args: argc=5, argv=0xbffff9e4 + Locals at 0xbffff948, Previous frame's sp is 0xbffff950 + Saved registers: + ebp at 0xbffff948, eip at 0xbffff94c +**(gdb) info f 0** +Stack frame at 0xbffff910: + eip = 0x804842c in greeting (demo.c:13); saved eip 0x8048578 + called by frame at 0xbffff950 + source language c. + Arglist at 0xbffff908, args: name=0x80486a3 "geekard", age=23, friends=0xbffff920 + Locals at 0xbffff908, Previous frame's sp is 0xbffff910 + Saved registers: + eip at 0xbffff90c +**(gdb) disassemble main **//反汇编main函数 +Dump of assembler code for function main: +//从info f 1的输出可知,call main前esp的值为 0xbffff950。 + 0x080484f0 <+0>: push %ebp + 0x080484f1 <+1>: mov %esp,%ebp //main函数的prologue指令,break main时停止在下一条指令处。 + 0x080484f3 <+3>: and $0xfffffff0,%esp //SystemV i386 ABI规定esp必须word对齐。 + 0x080484f6 <+6>: sub $0x30,%esp //为main的auto variables预留空间,此后esp的值为0xbffff10。 + + 0x080484f9 <+9>: mov 0x8049918,%eax + 0x080484fe <+14>: mov %eax,0x2c(%esp) //int autoVar = globVar; + + 0x08048502 <+18>: mov 0x8049924,%eax + 0x08048507 <+23>: mov %eax,0x28(%esp) //i = externVar; + + 0x0804850b <+27>: movl $0x80486a3,0x24(%esp) // char *name = "geekard" + + 0x08048513 <+35>: movl $0x17,0x20(%esp) // int age = 23 + + 0x0804851b <+43>: movl $0x8048639,0x10(%esp) //"Tom" + 0x08048523 <+51>: movl $0x804863d,0x14(%esp) //"John" + 0x0804852b <+59>: movl $0x8048642,0x18(%esp) //"Pi" + 0x08048533 <+67>: movl $0x0,0x1c(%esp) //NULL +//char **friends = {x, x, NUU},friends是一个指向字符指针的指针__变量(占有内存单元)__,其指向的数组有4个元素。 +//变量friends的地址为0xbffff918,即0x8($esp)处,可以用print &friends命令打印出。 +//变量friends的值为0xbffff920,指向栈上"Tom"所在的内参单元地址。对于指针数组变量friends,有以下调试信息: + **(gdb) p &friends ** + $21 = (char ***) 0xbffff918 + **(gdb) p friends** + $17 = (char **) 0xbffff920 + **(gdb) p friends@4** + $18 = {0xbffff920, 0x80485f2 <__libc_csu_init+82>, 0x8048639, 0x804863d} + **(gdb) p *friends@4** + $19 = {0x8048639 "Tom", 0x804863d "John", 0x8048642 "Pi", 0x0} + **(gdb) x /4wx friends@4** + 0xbffff918: 0xbffff920 0x080485f2 0x08048639 0x0804863d + **(gdb) x /4wx *friends@4** + 0xbffff920: 0x08048639 0x0804863d 0x08048642 0x00000000 + **(gdb) p *friends@4** + $20 = {0x8048639 "Tom", 0x804863d "John", 0x8048642 "Pi", 0x0} +//从main函数的auto variable初始化指令可以得出如下结论: +//1. main函数的auto variable是保存在stack中,其layourt的顺序与定义的顺序一致,即先定义的变量先入栈。 +//2.static variable定义在全局数据段(.data section)中,而不是stack中(int static staticVar并没有在stack中初始化)。 +//3.数组的各元素入栈顺序与定义顺序相反,即索引越大越先入栈; 指针数组变量最后入栈。 + +//auto variable初始化完成后的main栈分配如下: +{{./stack.png?height=544}} + + + 0x0804853b <+75>: movl $0x80486ab,(%esp) + 0x08048542 <+82>: call 0x8048300 + 0x08048547 <+87>: mov 0x804991c,%eax + 0x0804854c <+92>: mov %eax,0x4(%esp) + 0x08048550 <+96>: movl $0x80486b4,(%esp) + 0x08048557 <+103>: call 0x80482f0 + 0x0804855c <+108>: lea 0x10(%esp),%eax + 0x08048560 <+112>: mov %eax,0x8(%esp) + 0x08048564 <+116>: mov 0x20(%esp),%eax + 0x08048568 <+120>: mov %eax,0x4(%esp) + 0x0804856c <+124>: mov 0x24(%esp),%eax + 0x08048570 <+128>: mov %eax,(%esp) + 0x08048573 <+131>: call 0x804842c + __0x08048578__ <+136>: movl $0x3,0x4(%esp) + 0x08048580 <+144>: movl $0x2,(%esp) + 0x08048587 <+151>: call 0x8048590 + 0x0804858c <+156>: leave + 0x0804858d <+157>: ret +End of assembler dump. + +//greeting函数的argumets是由main函数通过栈传入的。下面代码节选至main函数的反汇编输出。 + //函数的auto variable是保存在其stack中的。下面时main函数的auto variable初始化代码。 + 0x0804850b <+27>: movl $0x80486a3,0x24(%esp) //name + 0x08048513 <+35>: movl $0x17,0x20(%esp) //age + 0x0804851b <+43>: movl $0x8048639,0x10(%esp) //friends + //将greeting函数的friends参数圧入栈中,注意friends参数的值是main的auto variable,所以它保存在main的stack frame中。 + 0x0804855c <+108>: lea 0x10(%esp),%eax + 0x08048560 <+112>: mov %eax,0x8(%esp) //frinnds + 0x08048564 <+116>: mov 0x20(%esp),%eax + 0x08048568 <+120>: mov %eax,0x4(%esp) //通过栈传入age参数 + 0x0804856c <+124>: mov 0x24(%esp),%eax + 0x08048570 <+128>: mov %eax,(%esp) //通过栈传入name参数 + 0x08048573 <+131>: call 0x804842c diff --git a/Zim/Utils/gdb/gdb_debugging/gdb_demo.txt b/Zim/Utils/gdb/gdb_debugging/gdb_demo.txt new file mode 100644 index 0000000..9e2e655 --- /dev/null +++ b/Zim/Utils/gdb/gdb_debugging/gdb_demo.txt @@ -0,0 +1,174 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-25T22:10:21+08:00 + +====== gdb demo ====== +Created Tuesday 25 December 2012 + +[geekard@geekard elf]$ **cat demo.c** +#include +#include + +int globalVar = 1; +int globalVarUninit; + +static int globalVarStatic = 3; + +extern int externVar; +extern int add(int, int); + +void greeting(char *name, int age, char** friends) +{ + + int a = 23; + char *nm = "zhangjun"; + char *fr[] = {"Tom", "John", "Pi", NULL}; //fr为__数组名称__,它指向一块内存单元,但是本身不咱用内存单元。 + char **fr = {"Tom", "John", "Pi", NULL}; //fr为__指针变量,指向一个保存有4个字符指针的内存单元(位于greeting的栈中)。__ + + if (name == NULL) + name = nm; + if (age == 0) + age = a; + if (friends == NULL) + friends = fr; + + printf("In greeting:\n"); + printf("\tHello, %s:\n", name); + printf("\tYourt age is:%d.\n", age); + printf("\tYourt friends are:\n"); + while (*friends != NULL) { + printf("\t\t%s\n", *friends); + friends += 1; + } + + printf("Goodbye from greeting.\n"); +} + +int main(int argc, char *argv[]) +{ + int autoVar = globalVar; + static int staticVar = 2; + int i = externVar; + + char *name = "geekard"; + int age = 23; + char *friends[] = {"Tom", "John", "Pi", NULL}; + + printf("In main:\n"); + printf("\tThe globalVarStatic is:%d\n",globalVarStatic); + greeting(name, age, friends); + add(2, 3); +} +[geekard@geekard elf]$ **cat foo.c** +int externVar = 1; +int static staticVarStatic = 2; + +int add(int a, int b) +{ + return a + b; +} +[geekard@geekard elf]$ **gcc -g demo.c foo.c -o demo** +demo.c: In function ‘greeting’: +demo.c:18:3: warning: initialization from incompatible pointer type [enabled by default] +demo.c:18:3: warning: (near initialization for ‘fr’) [enabled by default] +demo.c:18:3: warning: excess elements in scalar initializer [enabled by default] +demo.c:18:3: warning: (near initialization for ‘fr’) [enabled by default] +demo.c:18:3: warning: excess elements in scalar initializer [enabled by default] +demo.c:18:3: warning: (near initialization for ‘fr’) [enabled by default] +demo.c:18:3: warning: excess elements in scalar initializer [enabled by default] +demo.c:18:3: warning: (near initialization for ‘fr’) [enabled by default] +[geekard@geekard elf]$ **gdb demo** +GNU gdb (GDB) 7.5.1 +Copyright (C) 2012 Free Software Foundation, Inc. +。。。。。。 +**(gdb) info source** +Current source file is demo.c +Compilation directory is /home/geekard/Code/elf +Located in /home/geekard/Code/elf/demo.c +Contains 53 lines. +Source language is c. +Compiled with DWARF 2 debugging format. +Does not include preprocessor macro info. +**(gdb) info sources** +Source files for which symbols have been read in: + +/home/geekard/Code/elf/demo.c + +Source files for which symbols will be read in on demand: + +/home/geekard/Code/elf/foo.c + +**(gdb) set args -a aa --bb=bbb ccc ** //设置dmo程序的默认命令行参数,执行run命令时会传入该参数。 +**(gdb) show args **//查看demo程序的命令行参数 +Argument list to give program being debugged when it is started is "-a aa --bb=bbb ccc". +**(gdb) info args ** //查看当前stack frame对应函数的参数列表信息。 +No frame selected. +(gdb) show env +XDG_VTNR=1 +。。。。。 +**(gdb) shell clear ** //执行shell命令,这里为clear。 +**(gdb) info address greeting** //在symbol table中查找greeting函数的入口地址 +Symbol "greeting" is a function at address 0x804842c. +**(gdb) b 0x804842c //在某个地址处设置断点时,地址前应该加星号,否则gdb认为它为函数名称。** +Function "0x804842c" not defined. +Make breakpoint pending on future shared library load? (y or [n]) n +**(gdb) b *0x804842c ** +Breakpoint 1 at 0x804842c: file demo.c, line 13. +**(gdb) b *greeting** +Note: breakpoint 1 also set at pc 0x804842c. +Breakpoint 2 at 0x804842c: file demo.c, line 13. +**(gdb) b greeting** //在greeting函数入口处设一个断点 +Breakpoint 3 at 0x8048432: file demo.c, line 15. +**(gdb) info b** +Num Type Disp Enb Address What +1 breakpoint keep y 0x0804842c in greeting at demo.c:13 +2 breakpoint keep y 0x0804842c in greeting at demo.c:13 +3 breakpoint keep y 0x08048432 in greeting at demo.c:15 +**(gdb) delete 2 //删除编号为2的断点** +**(gdb) list 12,16 //查看当前文件的12至16行。** +12 void greeting(char *name, int age, char** friends) +13 { +14 +15 int a = 23; +16 char *nm = "zhangjun"; +**(gdb) r** +Starting program: /home/geekard/Code/elf/demo -a aa --bb=bbb ccc //gdb将set args命令的参数传给demo。 +Breakpoint 1, greeting (name=0x804868b "geekard", age=23, friends=0xbffff950) at demo.c:13 +13 { +**(gdb) disassemble greeting** //反汇编greeting函数。如果disassmble没有参数,则默认反汇编当前所执行的函数。 +Dump of assembler code for function greeting: +=> 0x0804842c <+0>: push %ebp //demo停止在greeting函数的入口处,函数的prologue指令还没有执行。 + 0x0804842d <+1>: mov %esp,%ebp + 0x0804842f <+3>: sub $0x28,%esp +(gdb) c //继续执行,直到下一个断点处。 +Continuing. + +Breakpoint 3, greeting (name=0x804868b "geekard", age=23, friends=0xbffff950) at demo.c:15 +15 int a = 23; +**(gdb) disassemble** //反汇编当前执行的函数 +Dump of assembler code for function greeting: + 0x0804842c <+0>: push %ebp + 0x0804842d <+1>: mov %esp,%ebp + 0x0804842f <+3>: sub $0x28,%esp +=> 0x08048432 <+6>: movl $0x17,-0xc(%ebp) //指令前的"=>"表示下一条**待执行**的指令位置。 +//可见greeting的prologue指令已经执行,这意味着greeting的栈帧已经建立。但是greeting的auto variable初始化指令还没有执行。 + 0x08048439 <+13>: movl $0x8048620,-0x10(%ebp) + 0x08048440 <+20>: movl $0x8048629,-0x14(%ebp) + 0x08048447 <+27>: cmpl $0x0,0x8(%ebp) + 0x0804844b <+31>: jne 0x8048453 + 0x0804844d <+33>: mov -0x10(%ebp),%eax + 0x08048450 <+36>: mov %eax,0x8(%ebp) +。。。。。 +**(gdb) info args //查看当前stack frame对应函数的参数列表。** +name = 0x804868b "geekard" +age = 23 +friends = 0xbffff950 //传经来的时指针 +**(gdb) info locals** //greeting函数的auto variables还没有初始化,所以下列为无效值 +a = -1209680176 +nm = 0xbffff944 "\027" +fr = 0x80486a4 // +(gdb) + + + + diff --git a/Zim/Utils/gdb/gdb_debugging/gdb_pointer.txt b/Zim/Utils/gdb/gdb_debugging/gdb_pointer.txt new file mode 100644 index 0000000..c7701d5 --- /dev/null +++ b/Zim/Utils/gdb/gdb_debugging/gdb_pointer.txt @@ -0,0 +1,82 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-25T21:45:29+08:00 + +====== gdb pointer ====== +Created Tuesday 25 December 2012 +[geekard@geekard elf]$ **cat array.c** +#include +#include + +int main(void) +{ + int ia[3] = {1,2,3}; + char *cpa[3] = {"Tom", "John", NULL}; + char *cpa2[] = {"Tom", "John", NULL}; + char **cpa3 = {"Tom", "John", NULL}; +} +[geekard@geekard elf]$ **gcc -g array.c -o array** +array.c: In function ‘main’: +array.c:9:4: warning: initialization from incompatible pointer type [enabled by default] +array.c:9:4: warning: (near initialization for ‘cpa3’) [enabled by default] +array.c:9:4: warning: excess elements in scalar initializer [enabled by default] +array.c:9:4: warning: (near initialization for ‘cpa3’) [enabled by default] +array.c:9:4: warning: excess elements in scalar initializer [enabled by default] +array.c:9:4: warning: (near initialization for ‘cpa3’) [enabled by default] +[geekard@geekard elf]$ **gdb array** +GNU gdb (GDB) 7.5.1 +。。。。。 +**(gdb) list main** +1 #include +2 #include +3 +4 int main(void) +5 { +6 int ia[3] = {1,2,3}; +7 char *cpa[3] = {"Tom", "John", NULL}; +8 char *cpa2[] = {"Tom", "John", NULL}; //ia, cpa, cpa2为数组首地址的引用名称,而非变量,不占用 +9 char **cpa3 = {"Tom", "John", NULL}; +10 } +**(gdb) b 10** +Breakpoint 1 at 0x8048418: file array.c, line 10. +**(gdb) r** +Starting program: /home/geekard/Code/elf/array +warning: Could not load shared library symbols for linux-gate.so.1. +Do you need "set solib-search-path" or "set sysroot"? + +Breakpoint 1, main () at array.c:10 +10 } +**(gdb) disassemble main** +Dump of assembler code for function main: + 0x080483cc <+0>: push %ebp + 0x080483cd <+1>: mov %esp,%ebp + 0x080483cf <+3>: sub $0x30,%esp + 0x080483d2 <+6>: movl $0x1,-0x10(%ebp) + 0x080483d9 <+13>: movl $0x2,-0xc(%ebp) + 0x080483e0 <+20>: movl $0x3,-0x8(%ebp) + 0x080483e7 <+27>: movl $0x80484b0,-0x1c(%ebp) + 0x080483ee <+34>: movl $0x80484b4,-0x18(%ebp) + 0x080483f5 <+41>: movl $0x0,-0x14(%ebp) + 0x080483fc <+48>: movl $0x80484b0,-0x28(%ebp) + 0x08048403 <+55>: movl $0x80484b4,-0x24(%ebp) + 0x0804840a <+62>: movl $0x0,-0x20(%ebp) + 0x08048411 <+69>: movl $0x80484b0,-0x4(%ebp) +=> 0x08048418 <+76>: leave + 0x08048419 <+77>: ret +End of assembler dump. +**(gdb) bt** +#0 main () at array.c:10 +**(gdb) info f 0** +Stack frame at 0xbffff980: + eip = 0x8048418 in main (array.c:10); saved eip 0xb7e28605 + source language c. + Arglist at 0xbffff978, args: + Locals at 0xbffff978, Previous frame's sp is 0xbffff980 + Saved registers: + ebp at 0xbffff978, eip at 0xbffff97c +**(gdb) info locals** +ia = {1, 2, 3} +cpa = {0x80484b0 "Tom", 0x80484b4 "John", 0x0} +cpa2 = {0x80484b0 "Tom", 0x80484b4 "John", 0x0} +__cpa3 = 0x80484b0__ +//可见ia, cpa, cpa2为数组名称,而cpa3为一指针变量,其值为0x80484b0。 diff --git a/Zim/Utils/gdb/gdb_debugging/stack.png b/Zim/Utils/gdb/gdb_debugging/stack.png new file mode 100644 index 0000000..6da1b0b Binary files /dev/null and b/Zim/Utils/gdb/gdb_debugging/stack.png differ diff --git a/Zim/Utils/gdb/gdb_frame.txt b/Zim/Utils/gdb/gdb_frame.txt new file mode 100644 index 0000000..4c76288 --- /dev/null +++ b/Zim/Utils/gdb/gdb_frame.txt @@ -0,0 +1,250 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-12-25T10:31:03+08:00 + +====== gdb frame ====== +Created Tuesday 25 December 2012 + +char **friends = {"Tom", "John", "Pi", \0}; +等价于 char *friends[ ] = {"Tom", "John", "Pi", \0}; +等价于 char *friends[4] = {"Tom", "John", "Pi", \0}; + +[geekard@geekard rel]$ gdb rel2 +GNU gdb (GDB) 7.5.1 +**(gdb) b greeting** +Breakpoint 1 at 0x8048432: file rel2.c, line 8. +**(gdb) r** +Starting program: /home/geekard/Code/elf/rel/rel2 +warning: Could not load shared library symbols for linux-gate.so.1. +Do you need "set solib-search-path" or "set sysroot"? +3 +Breakpoint 1, greeting (name=0x80485c9 "geekard", age=23, friends=0xbffff944) at rel2.c:8 +8 printf("Hello, %s:\n", name); +**(gdb) disassemble** +Dump of assembler code for function greeting: + 0x0804842c <+0>: push %ebp + 0x0804842d <+1>: mov %esp,%ebp + 0x0804842f <+3>: sub $0x18,%esp + 上面这三条指令称为prologue,由gcc自动添加。break fuction会在执行完function的prologue后停止,这样 +=> 0x08048432 <+6>: mov 0x8(%ebp),%eax + 0x08048435 <+9>: mov %eax,0x4(%esp) + 0x08048439 <+13>: movl $0x8048590,(%esp) + 0x08048440 <+20>: call 0x80482f0 +**(gdb) si //多次执行si,直到输出如下字符串** +0x080482f0 in printf@plt () +**(gdb) bt** +#0 0x080482f0 in printf@plt () +#1 0x08048445 in greeting (name=0x80485c9 "geekard", age=23, friends=0xbffff944) at rel2.c:8 +#2 0x080484fd in main () at [[rel2.c:28]] +**(gdb) info frame 2** +Stack frame at 0xbffff970: + eip = 0x80484fd in main (rel2.c:28); saved eip 0xb7e28605 + caller of frame at 0xbffff930 + source language c. + Arglist at 0xbffff968, args: + Locals at 0xbffff968, Previous frame's sp is 0xbffff970 + Saved registers: + ebp at 0xbffff968, eip at 0xbffff96c +**(gdb) info frame 1** +Stack frame at 0xbffff930: + eip = 0x8048445 in greeting (rel2.c:8); saved eip 0x80484fd + called by frame at 0xbffff970, caller of frame at 0xbffff910 + source language c. + Arglist at 0xbffff928, args: name=0x80485c9 "geekard", age=23, friends=0xbffff944 + Locals at 0xbffff928, Previous frame's sp is 0xbffff930 + Saved registers: + ebp at 0xbffff928, eip at 0xbffff92c +**(gdb) info frame 0** +Stack frame at 0xbffff910: + eip = 0x80482f0 in printf@plt; saved eip 0x8048445 + called by frame at 0xbffff930 + Arglist at 0xbffff908, args: + Locals at 0xbffff908, Previous frame's sp is 0xbffff910 + Saved registers: //没有保存ebp, esi, edi, ebx寄存器的值。 + eip at 0xbffff90c + +**(gdb) disassemble main** +Dump of assembler code for function main: + 0x0804848a <+0>: push %ebp + 0x0804848b <+1>: mov %esp,%ebp + 0x0804848d <+3>: and $0xfffffff0,%esp + 0x08048490 <+6>: sub $0x30,%esp + 0x08048493 <+9>: mov 0x8049804,%eax + 0x08048498 <+14>: mov %eax,0x2c(%esp) + 0x0804849c <+18>: movl $0x80485c9,0x28(%esp) + 0x080484a4 <+26>: movl $0x17,0x24(%esp) + 0x080484ac <+34>: movl $0x80485d1,0x14(%esp) + 0x080484b4 <+42>: movl $0x80485d5,0x18(%esp) + 0x080484bc <+50>: movl $0x80485da,0x1c(%esp) + 0x080484c4 <+58>: movl $0x0,0x20(%esp) + 0x080484cc <+66>: mov 0x8049808,%eax + 0x080484d1 <+71>: mov %eax,0x4(%esp) + 0x080484d5 <+75>: movl $0x80485dd,(%esp) + 0x080484dc <+82>: call 0x80482f0 + 0x080484e1 <+87>: lea 0x14(%esp),%eax + 0x080484e5 <+91>: mov %eax,0x8(%esp) + 0x080484e9 <+95>: mov 0x24(%esp),%eax + 0x080484ed <+99>: mov %eax,0x4(%esp) + 0x080484f1 <+103>: mov 0x28(%esp),%eax + 0x080484f5 <+107>: mov %eax,(%esp) + 0x080484f8 <+110>: call __0x804842c__ + __0x080484fd__ <+115>: leave + 0x080484fe <+116>: ret +End of assembler dump. +**(gdb) disassemble greeting** +Dump of assembler code for function greeting: + 0x0804842c <+0>: push %ebp + 0x0804842d <+1>: mov %esp,%ebp + 0x0804842f <+3>: sub $0x18,%esp + 0x08048432 <+6>: mov 0x8(%ebp),%eax + 0x08048435 <+9>: mov %eax,0x4(%esp) + 0x08048439 <+13>: movl $0x8048590,(%esp) + 0x08048440 <+20>: call __0x80482f0__ + __0x08048445__ <+25>: mov 0xc(%ebp),%eax + 0x08048448 <+28>: mov %eax,0x4(%esp) + 0x0804844c <+32>: movl $0x804859c,(%esp) + 0x08048453 <+39>: call 0x80482f0 + 0x08048458 <+44>: movl $0x80485af,(%esp) + 0x0804845f <+51>: call 0x8048300 + 0x08048464 <+56>: jmp 0x804847f + 0x08048466 <+58>: mov 0x10(%ebp),%eax + 0x08048469 <+61>: mov (%eax),%eax + 0x0804846b <+63>: mov %eax,0x4(%esp) + 0x0804846f <+67>: movl $0x80485c3,(%esp) + 0x08048476 <+74>: call 0x80482f0 + 0x0804847b <+79>: addl $0x4,0x10(%ebp) + 0x0804847f <+83>: mov 0x10(%ebp),%eax + 0x08048482 <+86>: mov (%eax),%eax + 0x08048484 <+88>: test %eax,%eax + 0x08048486 <+90>: jne 0x8048466 + 0x08048488 <+92>: leave + 0x08048489 <+93>: ret +End of assembler dump. +**(gdb) x /8wx 0x80482f0** +0x80482f0 : 0x97ec25ff 0x00680804 0xe9000000 0xffffffe0 +0x8048300 : 0x97f025ff 0x08680804 0xe9000000 0xffffffd0 +**(gdb) x /8wi 0x80482f0** + __0x80482f0__ : jmp *__0x80497ec__ + 0x80482f6 : push $0x0 + 0x80482fb : jmp 0x80482e0 + 0x8048300 : jmp *0x80497f0 + 0x8048306 : push $0x8 + 0x804830b : jmp 0x80482e0 + 0x8048310 <__gmon_start__@plt>: jmp *0x80497f4 + 0x8048316 <__gmon_start__@plt+6>: push $0x10 +**(gdb) x /wx 0x80497ec** +0x80497ec : 0xb7e5bed0 +**(gdb) si** +0xb7e5bed0 in printf () from [[/usr/lib/libc.so.6]] +**(gdb) disassemble *0x80497ec** +Dump of assembler code for function __printf__: +=> __0xb7e5bed0__ <+0>: push %ebx + 0xb7e5bed1 <+1>: sub $0x18,%esp + 0xb7e5bed4 <+4>: call 0xb7f376d3 <__x86.get_pc_thunk.bx> + 0xb7e5bed9 <+9>: add $0x159127,%ebx + 0xb7e5bedf <+15>: lea 0x24(%esp),%eax + 0xb7e5bee3 <+19>: mov %eax,0x8(%esp) + 0xb7e5bee7 <+23>: mov 0x20(%esp),%eax + 0xb7e5beeb <+27>: mov %eax,0x4(%esp) + 0xb7e5beef <+31>: mov -0x68(%ebx),%eax + 0xb7e5bef5 <+37>: mov (%eax),%eax + 0xb7e5bef7 <+39>: mov %eax,(%esp) + 0xb7e5befa <+42>: call 0xb7e520f0 + 0xb7e5beff <+47>: add $0x18,%esp + 0xb7e5bf02 <+50>: pop %ebx + 0xb7e5bf03 <+51>: ret +End of assembler dump. +**(gdb) bt** +#0 0xb7e5bed0 in printf () from /usr/lib/libc.so.6 +#1 0x08048445 in greeting (name=0x80485c9 "geekard", age=23, friends=0xbffff944) at rel2.c:8 +#2 0x080484fd in main () at rel2.c:28 +**(gdb) info f 0** +Stack frame at 0xbffff910: + eip = 0xb7e5bed0 in printf; saved eip 0x8048445 + called by frame at 0xbffff930 + Arglist at 0xbffff908, args: + Locals at 0xbffff908, Previous frame's sp is 0xbffff910 + Saved registers: + eip at 0xbffff90c +没有保存ebp, esi, edi, ebx等寄存器的值,所以它们的值与frame 1中的相等。在编译时如果指定-fomit-frame-pointer选项,则 +编译器为函数调用生成栈帧时不会圧入ebp寄存器的值,而且对arguments和auto variables的引用是通过esp进行的。这样可以将 +ebp寄存器节省出来,用作其它用途。但缺点是无法对函数调用进行frame backtrace(如果目标文件中有.eh_frame section则gdb可以 +从中提取出frame backtrace信息)。 + +**(gdb) info registers** +eax 0x80485c9 134514121 +ecx 0x0 0 +edx 0x0 0 +ebx 0xb7fb5000 -1208266752 //继续保存的是frame 1中的值 +esp 0xbffff90c 0xbffff90c +ebp 0xbffff928 0xbffff928 //继续保存的是frame 1中的值 +esi 0x0 0 +edi 0x0 0 +eip 0xb7e5bed0 0xb7e5bed0 +eflags 0x200282 [ SF IF ID ] +cs 0x73 115 +ss 0x7b 123 +ds 0x7b 123 +es 0x7b 123 +fs 0x0 0 +gs 0x33 51 +**(gdb) f 1 //切换frame到#1, 这样可以查看greeting函数的参数和自动变量值。** +#1 0x08048445 in greeting (name=0x80485c9 "geekard", age=23, friends=0xbffff944) at rel2.c:8 +8 printf("Hello, %s:\n", name); +**(gdb) info registers** //打印frame 1的寄存器 +eax 0x80485c9 134514121 +ecx 0x0 0 +edx 0x0 0 +ebx **0xb7fb5000** -1208266752 +esp 0xbffff910 0xbffff910 +ebp **0xbffff928** 0xbffff928 +esi 0x0 0 +edi 0x0 0 +eip 0x8048445 0x8048445 +eflags 0x200282 [ SF IF ID ] +cs 0x73 115 +ss 0x7b 123 +ds 0x7b 123 +es 0x7b 123 +fs 0x0 0 +gs 0x33 51 +(gdb) + + +(gdb) +**(gdb) info args** +name = 0x80485c9 "geekard" +age = 23 +friends = 0xbffff944 +**(gdb) ptype friends** +type = char ** + +**(gdb) p friends** +$1 = (char **) 0xbffff944 +**(gdb) p *friends** +$6 = 0x80485d1 "Tom" +**(gdb) p friends+1** +$2 = (char **) 0xbffff948 +**(gdb) x /4wx friends** +0xbffff944: 0x080485d1 0x080485d5 0x080485da 0x00000000 +**(gdb) p *friends@4** +$4 = {0x80485d1 "Tom", 0x80485d5 "John", 0x80485da "Pi", 0x0} + +**(gdb) p &friends** +$5 = (char ***) 0xbffff938 + +(**gdb) x /4wx friends+1** +0xbffff948: 0x080485d5 0x080485da 0x00000000 0x00000017 +**(gdb) x /4wx &friends** +0xbffff938: 0xbffff944 0x08048552 0x00000001 0x080485d1 +**(gdb) p friends@4** +$3 = {0xbffff944, 0x8048552 <__libc_csu_init+82>, 0x1, 0x80485d1} + +**(gdb) x /wx friends** +0xbffff944: 0x080485d1 +**(gdb) x /wx friends+1** +0xbffff948: 0x080485d5 +**(gdb) x /wx &friends** +0xbffff938: 0xbffff944 +**(gdb) x /wx *friends** +0x80485d1: 0x006d6f54 diff --git a/Zim/Utils/ip/ip_link.txt b/Zim/Utils/ip/ip_link.txt index cf38c56..554a693 100644 --- a/Zim/Utils/ip/ip_link.txt +++ b/Zim/Utils/ip/ip_link.txt @@ -49,7 +49,7 @@ TYPE := { vlan | veth | vcan | dummy | ifb | macvlan | can | __bridge__ } link/ether c8:60:00:8a:db:e7 brd ff:ff:ff:ff:ff:ff [geekard@kb310 man]$ __sudo ip link add link eth0 name demo-bridge type bridge; ip link show__ -**#命令行中的link eth0其实不用加。** + 1: lo: mtu 16436 qdisc noqueue state UNKNOWN mode DEFAULT link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 2: eth0: mtu 576 qdisc pfifo_fast state __UP__ mode DEFAULT qlen 1000 @@ -148,6 +148,11 @@ rtt min/avg/max/mdev = 0.385/0.546/0.959/0.239 ms Password: RTNETLINK answers: No such file or directory [geekard@kb310 ~]$ __sudo ip route add default via 192.168.1.1 dev demo-bridge__ +注意,添加路由的格式为:ip route add PREFIX via ADDRESS dev DEV +其中PREFIX表示目标网络,如果为缺省路由则因为default。 +via ADDRESS是可选的,表示到到PREFIX目标网络需要经过ADDRESS表示的路由器,__ADDRESS不用前缀形式__。 + [geekard@geekard rel]$ sudo ip route add default via 192.168.1.1/24 dev demo + Error: an inet address is expected rather than "192.168.1.1/24". [geekard@kb310 ~]$ __ip route show table all__ **default via 192.168.1.1 dev demo-bridge ** **default via 192.168.1.1 dev eth0 metric 202** diff --git a/Zim/Utils/systemd.txt b/Zim/Utils/systemd.txt new file mode 100644 index 0000000..ac651ab --- /dev/null +++ b/Zim/Utils/systemd.txt @@ -0,0 +1,2628 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-11-20T16:29:03+08:00 + +====== systemd ====== +Created Tuesday 20 November 2012 +http://www.0pointer.de/blog/projects/systemd.html + +Fri, 30 Apr 2010 + +===== Rethinking PID 1 ===== +If you are well connected or good at reading between the lines you might already know what this blog post is about. But even then you may find this story interesting. So grab a cup of coffee, sit down, and read what's coming. + +This blog story is long, so even though I can only recommend reading the long story, here's the one sentence summary: we are experimenting with a new init system and it is fun. + +Here's the code. And here's the story: + +===== Process Identifier 1 ===== +On every Unix system there is one process with the special process identifier 1. It is __started by the kernel__ before all other processes and is the parent process for all those other processes that have nobody else to be child of. Due to that it can do a lot of stuff that other processes cannot do. And it is also responsible for some things that other processes are not responsible for, such as __bringing up and maintaining userspace__ during boot.(用户空间的启动程序、服务和会话都是由init进程间接或直接启动起来的。) + +Historically on Linux the software acting as PID 1 was the venerable **sysvinit** package, though it had been showing its age for quite a while. Many replacements have been suggested, only one of them really took off: **Upstart**, which has by now found its way into all major distributions. + +As mentioned, the central responsibility of an init system is to __bring up userspace__. And a good init system does that fast. Unfortunately, the traditional SysV init system was not particularly fast. + +For a fast and efficient boot-up two things are crucial: +* To start less. +* And to start more in parallel. + +What does that mean? Starting less means __starting fewer services or deferring the starting of services__ until they are actually needed. There are some services where we know that they will be required sooner or later (syslog, D-Bus system bus, etc.), but for many others this isn't the case. For example, **bluetoothd** does not need to be running unless a bluetooth dongle is actually plugged in or an application wants to talk to its D-Bus interfaces. Same for a printing system: unless the machine physically is connected to a printer, or an application wants to print something, there is no need to run a printing daemon such as **CUPS. Avahi**: if the machine is not connected to a network, there is no need to run Avahi, unless some application wants to use its APIs. And even SSH: as long as nobody wants to contact your machine there is no need to run it, as long as it is then started on the first connection. (And admit it, on most machines where **sshd** might be listening somebody connects to it only every other month or so.) + +Starting more in parallel means that if we have to run something, we should not serialize its start-up (as sysvinit does), but run it all at the same time, so that the available CPU and disk IO bandwidth is maxed out, and hence the overall start-up time minimized. + +===== Hardware and Software Change Dynamically ===== +Modern systems (especially general purpose OS) are highly dynamic in their configuration and use: they are mobile, different applications are started and stopped, different hardware added and removed again. __An init system that is responsible for maintaining services needs to listen to hardware and software changes.__ It needs to __dynamically__ start (and sometimes stop) services as they are needed to run a program or enable some hardware. + +Most current systems that try to parallelize boot-up still synchronize the start-up of the various daemons involved: since Avahi needs D-Bus, D-Bus is started first, and only when D-Bus signals that it is ready, Avahi is started too. Similar for other services: livirtd and X11 need HAL (well, I am considering the Fedora 13 services here, ignore that HAL is obsolete), hence HAL is started first, before livirtd and X11 are started. And libvirtd also needs Avahi, so it waits for Avahi too. And all of them require syslog, so they all wait until Syslog is fully started up and initialized. And so on. + +===== Parallelizing Socket Services ===== +This kind of __start-up synchronization__ results in the serialization of a significant part of the boot process. Wouldn't it be great if we could get rid of the synchronization and serialization cost? Well, we can, actually. For that, we need to understand what exactly the daemons require from each other, and why their start-up is delayed. For traditional Unix daemons, there's one answer to it: __they wait until the socket the other daemon offers its services on is ready for connections.__ Usually that is an AF_UNIX socket in the file-system, but it could be AF_INET[6], too. For example, clients of D-Bus wait that **/var/run/dbus/system_bus_socket** can be connected to, clients of syslog wait for **/dev/log**, clients of CUPS wait for **/var/run/cups/cups.sock** and NFS mounts wait for **/var/run/rpcbind.sock** and the portmapper IP port, and so on. And think about it, this is actually the only thing they wait for! + +Now, if that's all they are waiting for, if we manage to make those sockets available for connection __earlier__ and only actually wait for that instead of the full daemon start-up, then we can speed up the entire boot and start more processes in parallel. So, how can we do that? Actually quite easily in Unix-like systems: __we can create the listening sockets before we actually start the daemon, and then just pass the socket during exec() to it.__ That way, we can create all sockets for all daemons in one step in the init system, and then in a second step run all daemons at once. If a service needs another, and it is not fully started up, that's completely OK: what will happen is that the connection is queued in the providing service and the client will **potentially block** on that single request. But only that one client will block and only on that one request. Also, dependencies between services will __no longer__ necessarily have to be configured to allow proper parallelized start-up: if we start all sockets at once and a service needs another it can be sure that it can connect to its socket. + +Because this is at the core of what is following, let me say this again, with different words and by example: if you start syslog and and various syslog clients at the same time, what will happen in the scheme pointed out above is that the messages of the clients will be added to the **/dev/log** socket buffer. As long as that buffer doesn't run full, the clients will not have to wait in any way and can immediately proceed with their start-up. As soon as syslog itself finished start-up, it will dequeue all messages and process them. Another example: we start D-Bus and several clients at the same time. If a synchronous bus request is sent and hence a reply expected, what will happen is that the client will have to **block**, however only that one client and only until D-Bus managed to catch up and process it. + +Basically, __the kernel socket buffers help us to maximize parallelization, and the ordering and synchronization is done by the kernel,__ without any further management from userspace! And if all the sockets are available before the daemons actually start-up, dependency management also becomes redundant (or at least secondary): if a daemon needs another daemon, it will just connect to it. If the other daemon is already started, this will immediately succeed. If it isn't started but in the process of being started, the first daemon will not even have to wait for it, unless it issues **a synchronous request.** And even if the other daemon is not running at all, it can be **auto-spawned**. From the first daemon's perspective there is no difference, hence dependency management becomes mostly unnecessary or at least secondary, and all of this in optimal parallelization and optionally with __on-demand loading__. On top of this, this is also more robust, because the sockets stay available regardless whether the actual daemons might temporarily become unavailable (maybe due to crashing). In fact, you can easily write a daemon with this that can run, and exit (or crash), and run again and exit again (and so on), and all of that without the clients noticing or loosing any request. + +It's a good time for a pause, go and refill your coffee mug, and be assured, there is more interesting stuff following. + +But first, let's clear a few things up: is this kind of logic new? No, it certainly is not. The most prominent system that works like this is Apple's launchd system: on MacOS the listening of the sockets is pulled out of all daemons and done by launchd. The services themselves hence can all start up in parallel and dependencies need not to be configured for them. And that is actually a really ingenious design, and the primary reason why MacOS manages to provide the fantastic boot-up times it provides. I can highly recommend this video where the launchd folks explain what they are doing. Unfortunately this idea never really took on outside of the Apple camp. + +The idea is actually even older than launchd. Prior to launchd the venerable **inetd** worked much like this: sockets were centrally created in a daemon that would start the actual service daemons passing the socket file descriptors during exec(). However the focus of inetd certainly wasn't local services, but Internet services (although later reimplementations supported AF_UNIX sockets, too). It also wasn't a tool to parallelize boot-up or even useful for getting implicit dependencies right. + +For TCP sockets inetd was primarily used in a way that for every incoming connection a new daemon instance was spawned. That meant that for each connection a new process was spawned and initialized, which is __not__ a recipe for high-performance servers. However, right from the beginning inetd also supported another mode, where a single daemon was spawned on the first connection, and that single instance would then __go on__ and also accept the follow-up connections (that's what the wait/nowait option in inetd.conf was for, a particularly badly documented option, unfortunately.) Per-connection daemon starts probably gave inetd its bad reputation for being slow. But that's not entirely fair. + +===== Parallelizing Bus Services ===== +__Modern daemons on Linux tend to provide services via D-Bus instead of plain AF_UNIX sockets__. Now, the question is, for those services, can we apply the same parallelizing boot logic as for traditional socket services? Yes, we can, D-Bus already has all the right hooks for it: **using bus activation a service can be started the first time it is accessed**. Bus activation also gives us the **minimal per-request synchronisation** we need for starting up the providers and the consumers of D-Bus services at the same time: if we want to start Avahi at the same time as CUPS (side note: CUPS uses Avahi to browse for mDNS/DNS-SD printers), then we can simply run them at the same time, and if CUPS is quicker than Avahi via the bus activation logic we can get D-Bus to queue the request until Avahi manages to establish its service name. + +So, in summary: the __socket-based service activation and the bus-based service activation together enable us to start all daemons in parallel__, without any further synchronization. Activation also allows us to do lazy-loading of services: if a service is rarely used, we can just load it the first time somebody accesses the socket or bus name, instead of starting it during boot. +__这里的activation指的是开启或激活某个服务,而不需要等待该服务的提供者或被依赖的服务启动完成。这是通过事先建立相关的socket 文件实现的。同时只有当某个进程(服务)读写该事先建立的socket file时,另外一个进程(服务)才会被启动,从而既达到了并行启动服务(不需要关心服务间的依赖),又可以延迟加载服务的目的。__ + +And if that's not great, then I don't know what is great! + +除了提供服务的daemon间的同步外,文件系统相关的jobs也需要同步。 + +===== Parallelizing File System Jobs ===== +If you look at the serialization graphs of the boot process of current distributions, there are more synchronisation points than just daemon start-ups: most prominently there are **file-system related jobs**: mounting, fscking, quota(磁盘配额). Right now, on boot-up a lot of time is spent idling to wait until all devices that are listed in **/etc/fstab** show up in the device tree and are then fsck'ed, mounted, quota checked (if enabled). Only after that is fully finished we go on and boot the actual services. 对磁盘的fsck发生在mount它之前。 + +Can we improve this? It turns out we can. Harald Hoyer came up with the idea of using the venerable **autofs** system for this: + +Just like a connect() call shows that a service is interested in another service, an open() (or a similar call) shows that a service is interested in a specific file or file-system. So, in order to improve how much we can parallelize we can make those apps wait only if a file-system they are looking for is not yet mounted and readily available: __we set up an autofs mount point, and then when our file-system finished fsck and quota due to normal boot-up we replace it by the real mount__. While the file-system is not ready yet, the access will be queued by the kernel and the accessing process will block, but only that one daemon and only that one access. And this way we can begin starting our daemons even before all file systems have been fully made available -- without them missing any files, and maximizing parallelization. + +Parallelizing file system jobs and service jobs __does not make sense for /__, after all that's where the service binaries are usually stored. However, for file-systems such as /home, that usually are bigger, even encrypted, possibly remote and seldom accessed by the usual **boot-up daemons**, this can improve boot time considerably. It is probably not necessary to mention this, but __virtual file systems, such as procfs or sysfs should never be mounted via autofs(因为它们是kernel动态生产的,没有fsck的开销。)__. + +I wouldn't be surprised if some readers might find integrating autofs in an init system a bit fragile and even weird, and maybe more on the "crackish" side of things. However, having played around with this extensively I can tell you that this actually feels quite right. __Using autofs here simply means that we can create a mount point without having to provide the backing file system right-away.__ In effect it hence only **delays accesses**. If an application tries to access an autofs file-system and we take very long to replace it with the real file-system, it will hang in an interruptible sleep, meaning that you can safely cancel it, for example via C-c. Also note that at any point, if the mount point should not be mountable in the end (maybe because fsck failed), we can just tell autofs to return a clean error code (like ENOENT). So, I guess what I want to say is that even though integrating autofs into an init system might appear adventurous at first, our experimental code has shown that this idea works surprisingly well in practice -- if it is done for the right reasons and the right way. + +Also note that these should be direct autofs mounts, meaning that from an application perspective there's little effective difference between a classic mount point and one based on autofs. + +===== Keeping the First User PID Small ===== +Another thing we can learn from the MacOS boot-up logic is that __shell scripts are evil__. Shell is fast and shell is slow. It is fast to hack, but slow in execution. The classic sysvinit boot logic is modelled around shell scripts. Whether it is /bin/bash or any other shell (that was written to make shell scripts faster), in the end the approach is doomed to be slow. On my system the scripts in /etc/init.d call grep at least 77 times. awk is called 92 times, cut 23 and sed 74. Every time those commands (and others) are called, a process is spawned, the libraries searched, some start-up stuff like i18n and so on set up and more. And then after seldom doing more than a trivial string operation the process is terminated again. Of course, that has to be incredibly slow. No other language but shell would do something like that. On top of that, shell scripts are also very fragile, and change their behaviour drastically based on environment variables and suchlike, stuff that is hard to oversee and control. + +So, let's __get rid of shell scripts__ in the boot process! Before we can do that we need to figure out what they are currently actually used for: well, the big picture is that most of the time, what they do is actually quite boring. Most of the scripting is spent on trivial **setup and tear-down of services**, and should be rewritten in C, either in separate executables, or moved into the daemons themselves, or simply be done in the init system. + +It is not likely that we can get rid of shell scripts during system boot-up entirely anytime soon. Rewriting them in C takes time, in a few case does not really make sense, and sometimes shell scripts are just too handy to do without. But we can certainly make them less prominent. + +A good metric for measuring shell script infestation of the boot process is the PID number of the first process you can start after the system is fully booted up. Boot up, log in, open a terminal, and type echo $$. Try that on your Linux system, and then compare the result with MacOS! (Hint, it's something like this: Linux PID 1823; MacOS PID 154, measured on test systems we own.) + +===== Keeping Track of Processes ===== +A central part of a system that starts up and maintains services should be process babysitting: it should __watch services__. Restart them if they shut down. If they crash it should collect information about them, and keep it around for the administrator, and cross-link that information with what is available from crash dump systems such as abrt, and in logging systems like syslog or the audit system. + +It should also be capable of shutting down a service completely. That might sound easy, but is harder than you think. __Traditionally on Unix a process that does double-forking can escape the supervision of its parent__, and the old parent will not learn about the relation of the new process to the one it actually started. An example: currently, a misbehaving CGI script that has double-forked is not terminated when you shut down Apache. Furthermore, you will not even be able to figure out its relation to Apache, unless you know it by name and purpose. **double-forking可以使子进程的父进程变为init进程。** + +So, how can we keep track of processes, so that they cannot escape the babysitter, and that we can control them as one unit even if they fork a gazillion times? + +Different people came up with different solutions for this. I am not going into much detail here, but let's at least say that approaches based on **ptrace** or the **netlink connector** (a kernel interface which allows you to get a netlink message each time any process on the system fork()s or exit()s) that some people have investigated and implemented, have been criticised as ugly and not very scalable. + +So what can we do about this? Well, since quite a while the kernel knows __Control Groups (aka "cgroups")__. Basically they allow the creation of a hierarchy of groups of processes. The hierarchy is __directly exposed in a virtual file-system__, and hence easily accessible. The group names are basically directory names in that file-system. If a process belonging to a specific cgroup fork()s, its child will become a member of the same group. Unless it is privileged and has access to the cgroup file system it cannot escape its group. Originally, cgroups have been introduced into the kernel for the purpose of containers: certain kernel subsystems can enforce limits on resources of certain groups, such as limiting CPU or memory usage. __Traditional resource limits (as implemented by setrlimit()) are (mostly) per-process. cgroups on the other hand let you enforce limits on entire groups of processes.__ cgroups are also useful to enforce limits outside of the immediate container use case. You can use it for example to limit the total amount of memory or CPU Apache and all its children may use. Then, a misbehaving CGI script can no longer escape your setrlimit() resource control by simply forking away. + +控制组的功能: +1. 进程层次结构的容器 +2. 进程组资源限制 +3. daemon追踪。 + +In addition to container and resource limit enforcement cgroups are very useful to k__eep track of daemons__: cgroup membership is securely inherited by child processes, they cannot escape. There's a notification system available so that a supervisor process can be notified when a cgroup __runs empty__. You can find the cgroups of a process by reading /proc/$PID/cgroup. cgroups hence make a very good choice to keep track of processes for babysitting purposes. + +===== Controlling the Process Execution Environment ===== +A good babysitter should not only oversee and control when a daemon starts, ends or crashes, but also set up a good, minimal, and secure __working environment__ for it. + +That means setting obvious process parameters such as the setrlimit() resource limits, user/group IDs or the environment block, but does not end there. The Linux kernel gives users and administrators a lot of control over processes (some of it is rarely used, currently). For each process you can set CPU and IO scheduler controls, the capability bounding set, CPU affinity or of course cgroup environments with additional limits, and more. + +As an example, ioprio_set() with IOPRIO_CLASS_IDLE is a great away to minimize the effect of locate's updatedb on system interactivity.在系统进行交互式,可以使用ioprio_set()函数降低updatedb命令的优先级。 + +On top of that certain high-level controls can be very useful, such as setting up read-only file system overlays based on __read-only bind__ mounts. That way one can run certain daemons so that all (or some) file systems appear read-only to them, so that EROFS is returned on every write request. As such this can be used to lock down what daemons can do similar in fashion to a poor man's SELinux policy system (but this certainly doesn't replace SELinux, don't get any bad ideas, please). + +Finally logging is an important part of executing services: ideally every bit of output a service generates should be logged away. An init system should hence provide logging to daemons it spawns right from the beginning, and connect stdout and stderr to syslog or in some cases even **/dev/kmsg** which in many cases makes a very useful replacement for syslog (embedded folks, listen up!), especially in times where the kernel log buffer is configured ridiculously large out-of-the-box. + +===== On Upstart ===== +To begin with, let me emphasize that I actually like the code of Upstart, it is very well commented and easy to follow. It's certainly something other projects should learn from (including my own). + +That being said, I can't say I agree with the general approach of Upstart. But first, a bit more about the project: + +Upstart __does not share code with sysvinit__, and its functionality is a super-set of it, and provides compatibility to some degree with the well known SysV init scripts. It's main feature is its __event-based approach__: starting and stopping of processes is bound to "events" happening in the system, where an "event" can be a lot of different things, such as: a network interfaces becomes available or some other software has been started. + +Upstart **does service serialization via these events**: if the syslog-started event is triggered this is used as an indication to start D-Bus since it can now make use of Syslog. And then, when dbus-started is triggered, NetworkManager is started, since it may now use D-Bus, and so on. + +One could say that this way the actual logical dependency tree that exists and is understood by the admin or developer is translated and encoded into event and action rules: every logical "a needs b" rule that the administrator/developer is aware of becomes a "start a when b is started" plus "stop a when b is stopped". In some way this certainly is a simplification: especially for the code in Upstart itself. However I would argue that this simplification is actually detrimental. First of all, the logical dependency system does not go away, the person who is writing Upstart files must now __translate the dependencies manually into these event/action rules__ (actually, two rules for each dependency). So, instead of letting the computer figure out what to do based on the dependencies, the user has to manually translate the dependencies into simple event/action rules. Also, because the dependency information has never been encoded it is not available at runtime, effectively meaning that an administrator who tries to figure our why something happened, i.e. why a is started when b is started, has no chance of finding that out.(以来信息不能在运行时获取) + +Furthermore, the event logic **turns around all dependencies**, from the feet onto their head. Instead of minimizing the amount of work (which is something that a good init system should focus on, as pointed out in the beginning of this blog story), it actually **maximizes the amount of work** to do during operations. Or in other words, instead of having a clear goal and only doing the things it really needs to do to reach the goal, it does one step, and then after finishing it, it does all steps that possibly could follow it. + +Or to put it simpler: the fact that the user just started D-Bus is in no way an indication that NetworkManager should be started too (but this is what Upstart would do). It's right the other way round: when the user asks for NetworkManager, that is definitely an indication that D-Bus should be started too (which is certainly what most users would expect, right?). + +__A good init system should start only what is needed, and that on-demand__. Either lazily or parallelized and in advance. However it should not start more than necessary, particularly not everything installed that could use that service. + +Finally, I fail to see the actual usefulness of the event logic. It appears to me that most events that are exposed in Upstart actually are not punctual(准时的,正点的) in nature, but have duration: a service starts, is running, and stops. A device is plugged in, is available, and is plugged out again. A mount point is in the process of being mounted, is fully mounted, or is being unmounted. A power plug is plugged in, the system runs on AC, and the power plug is pulled. Only a minority of the events __an init system or process supervisor__ should handle are actually punctual, most of them are tuples of start, condition, and stop. This information is again not available in Upstart, because it focuses in singular events, and ignores durable dependencies. + +Now, I am aware that some of the issues I pointed out above are in some way mitigated by certain more recent changes in Upstart, particularly condition based syntaxes such as start on (local-filesystems and net-device-up IFACE=lo) in Upstart rule files. However, to me this appears mostly as an attempt to fix a system whose __core design is flawed__. + +Besides that Upstart does OK for babysitting daemons, even though some choices might be questionable (see above), and there are certainly a lot of missed opportunities (see above, too). + +There are other init systems besides sysvinit, **Upstart and launchd**. Most of them offer little substantial more than Upstart or sysvinit. The most interesting other contender is Solaris SMF, which supports proper dependencies between services. However, in many ways it is overly complex and, let's say, a bit academic with its excessive use of XML and new terminology for known things. It is also closely bound to Solaris specific features such as the contract system. + +===== Putting it All Together: systemd ===== +Well, this is another good time for a little pause, because after I have hopefully explained above what I think a good PID 1 should be doing and what the current most used system does, we'll now come to where the beef is. So, go and refill you coffee mug again. It's going to be worth it. + +You probably guessed it: what I suggested above as requirements and features for an ideal init system is actually available now, in a (still experimental) init system called __systemd__, and which I hereby want to announce. Again, here's the code. And here's a quick rundown of its features, and the rationale behind them: + +systemd __starts up and supervises__ the entire system (hence the name...). It implements all of the features pointed out above and a few more. It is based around the notion of **units.** Units have a name and a type. Since their configuration is usually loaded directly from the file system, these **unit names are actually file names**. Example: a unit **avahi.service** is read from a configuration file by the same name, and of course could be a unit //encapsulating// the Avahi daemon. There are several kinds of units: + +* **service**: these are the most obvious kind of unit: daemons that can be started, stopped, restarted, reloaded. For compatibility with SysV we not only support our own configuration files for services, but also are __able to read classic SysV init scripts__, in particular we parse the LSB header, if it exists. /etc/init.d is hence not much more than just **another source of configuration**. +* **socket**: this unit encapsulates a socket in the file-system or on the Internet. We currently support AF_INET, AF_INET6, AF_UNIX sockets of the types stream, datagram, and sequential packet. We also support **classic FIFOs** as transport. __Each socket unit has a matching service unit__, **that is started if the first connection comes in on the socket or FIFO**. Example: nscd.socket starts nscd.service on an incoming connection. __当socket unit被连接时,它会启动相应的service unit。__ +* device: this unit encapsulates a device in the Linux device tree. If a device is marked for this __via udev rules__, it will be exposed as a device unit in systemd. Properties set with udev can be used as configuration source to set dependencies for device units. +* mount: this unit encapsulates a mount point in the file system hierarchy. systemd monitors all mount points how they come and go, and can also be used to mount or unmount mount-points. **/etc/fstab** is used here as an additional configuration source for these mount points, similar to how SysV init scripts can be used as additional configuration source for service units. +* automount: this unit type encapsulates an automount point in the file system hierarchy. __Each automount unit has a matching mount unit__, which is started (i.e. mounted) as soon as the automount directory is accessed. +* target: this unit type is used for **logical grouping of units**: instead of actually doing anything by itself it simply **references other units**, which thereby can be controlled together. Examples for this are: multi-user.target, which is a target that basically plays the role of run-level 5 on classic SysV system, or bluetooth.target which is requested as soon as a bluetooth dongle becomes available and which simply __pulls in__ bluetooth related services that otherwise would not need to be started: bluetoothd and obexd and suchlike. **按需启动。** +* snapshot: similar to target units snapshots do not actually do anything themselves and their only purpose is to **reference other units**. Snapshots can be used to __save/rollback the state of all services and units__ of the init system. Primarily it has two intended use cases: to allow the user to temporarily enter a specific state such as "Emergency Shell", terminating current services, and provide an easy way to return to the state before, pulling up all services again that got temporarily pulled down. And to ease support for system suspending: still many services cannot correctly deal with system suspend, and it is often a better idea to shut them down before suspend, and restore them afterwards. + +All these units can have dependencies between each other (both positive and negative, i.e. 'Requires' and 'Conflicts'): a device can have a dependency on a service, meaning that as soon as a device becomes available a certain service is started. Mounts get an implicit dependency on the device they are mounted from. Mounts also gets implicit dependencies to mounts that are their prefixes (i.e. a mount /home/lennart implicitly gets a dependency added to the mount for /home) and so on. + +===== A short list of other features: ===== +1. For each process that is spawned, you may __control__: the environment, resource limits, working and root directory, umask, OOM killer adjustment, nice level, IO class and priority, CPU policy and priority, CPU affinity, timer slack, user id, group id, supplementary group ids, readable/writable/inaccessible directories, shared/private/slave mount flags, capabilities/bounding set, secure bits, CPU scheduler reset of fork, private /tmp name-space, cgroup control for various subsystems. Also, you can easily connect stdin/stdout/stderr of services to syslog, /dev/kmsg, arbitrary TTYs. If connected to a TTY for input systemd will make sure a process gets exclusive access, optionally waiting or enforcing it. +2. Every executed process gets __its own cgroup__ (currently by default in the debug subsystem, since that subsystem is not otherwise used and does not much more than the most basic process grouping), and it is very easy to configure systemd to place services in cgroups that have been configured externally, for example via the libcgroups utilities. +3. The native configuration files use a syntax that closely follows the well-known __.desktop files__. It is a simple syntax for which parsers exist already in many software frameworks. Also, this allows us to rely on existing tools for i18n for service descriptions, and similar. Administrators and developers don't need to learn a new syntax. +4. As mentioned, we __provide compatibility with SysV init scripts__. We take advantages of **LSB and Red Hat chkconfig headers** if they are available. If they aren't we try to make the best of the otherwise available information, such as the start priorities in /etc/rc.d. These init scripts are simply **considered a different source of configuration**, hence an easy upgrade path to proper systemd services is available. Optionally we can read classic PID files for services to identify the main pid of a daemon. **Note that we make use of the dependency information from the LSB init script headers, and translate those into native systemd dependencies.** Side note: Upstart is unable to harvest and make use of that information. Boot-up on a plain Upstart system with mostly LSB SysV init scripts will hence not be parallelized, a similar system running systemd however will. In fact, for Upstart all SysV scripts together make one job that is executed, they are not treated individually, again in contrast to systemd where SysV init scripts are just another source of configuration and are all treated and controlled individually, much like any other native systemd service. +5. Similarly, we read the existing __/etc/fstab__ configuration file, and consider it just another source of configuration. Using the comment= fstab option you can even mark /etc/fstab entries to become systemd controlled automount points. +6. If the same unit is configured in multiple configuration sources (e.g. /etc/systemd/system/avahi.service exists, and /etc/init.d/avahi too), then the native configuration will always take precedence, the legacy format is ignored, allowing an easy upgrade path and packages to carry both a SysV init script and a systemd service file for a while. +7. We support a simple __templating/instance mechanism__. Example: instead of having six configuration files for six gettys, we only have one **getty@.service** file which gets instantiated to **getty@tty2.service** and suchlike. The interface part can even __be inherited by dependency expressions__, i.e. it is easy to encode that a service dhcpcd@eth0.service pulls in avahi-autoipd@eth0.service, while leaving the eth0 string wild-carded. +8. For socket activation we support full __compatibility with the traditional inetd modes__, as well as a very simple mode that tries to mimic launchd socket activation and is recommended for new services. The inetd mode only allows passing one socket to the started daemon, while the native mode supports passing arbitrary numbers of file descriptors. We also support one instance per connection, as well as one instance for all connections modes. In the former mode we **name the cgroup the daemon will be started in after the connection parameters, and utilize the templating logic mentioned** above for this. Example: sshd.socket might spawn services __(先建立socket,然后该socket按需spawn services daemon)__**sshd@192.168.0.1-4711-192.168.0.2-22.service** with a cgroup of **sshd@.service/192.168.0.1-4711-192.168.0.2-22** (i.e. the IP address and port numbers are used in the instance names. For AF_UNIX sockets we use PID and user id of the connecting client). This provides a nice way for the administrator to identify the various instances of a daemon and control their runtime individually. The native socket passing mode is very easily implementable in applications: if $LISTEN_FDS is set it contains the number of sockets passed and the daemon will find them sorted as listed in the .service file, starting from file descriptor 3 (a nicely written daemon could also use fstat() and getsockname() to identify the sockets in case it receives more than one). In addition we set $LISTEN_PID to the PID of the daemon that shall receive the fds, because environment variables are normally inherited by sub-processes and hence could confuse processes further down the chain. Even though this socket passing logic is very simple to implement in daemons, we will provide a BSD-licensed reference implementation that shows how to do this. We have ported a couple of existing daemons to this new scheme. +9. We provide compatibility with __/dev/initctl__ to a certain extent. This compatibility is in fact implemented with a FIFO-activated service, which simply translates these legacy requests to D-Bus requests. Effectively this means the old shutdown, poweroff and similar commands from Upstart and sysvinit continue to work with systemd. +10. We also provide compatibility with __utmp and wtmp__. Possibly even to an extent that is far more than healthy, given how crufty utmp and wtmp are. +11. systemd supports several kinds of dependencies between units. **After/Before** can be used to fix the ordering how units are activated. It is completely orthogonal (直角的,正交的)to **Requires and Wants**, which express a positive requirement dependency, either mandatory, or optional(After/Before和Requries/Wants的含义相同). Then, there is **Conflicts** which expresses a negative requirement dependency. Finally, there are three further, less used dependency types. +12. systemd has a minimal __transaction system(事务系统)__. Meaning: if a unit is requested to start up or shut down we will add **it and all its dependencies** to a temporary transaction. Then, we will verify if the transaction is consistent (i.e. whether the ordering via After/Before of all units is cycle-free). If it is not, systemd will try to fix it up, and removes non-essential jobs from the transaction that might remove the loop. Also, systemd tries to suppress non-essential jobs in the transaction that would stop a running service. Non-essential jobs are those which the original request did not directly include but which where pulled in by Wants type of dependencies. Finally we check whether the jobs of the transaction contradict jobs that have already been queued, and optionally the transaction is aborted then. If all worked out and the transaction is consistent and minimized in its impact it is merged with all already outstanding jobs and added to the run queue. Effectively this means that before executing a requested operation, we will verify that it makes sense, fixing it if possible, and only failing if it really cannot work. +13. We record start/exit time as well as the PID and exit status of every process we spawn and supervise. This data can be used to **cross-link** daemons with their data in abrtd, auditd and syslog. Think of an UI that will highlight crashed daemons for you, and allows you to easily navigate to the respective UIs for syslog, abrt, and auditd that will show the data generated from and for this daemon on a specific run. +14. We support __reexecution of the init process itself__ at any time. The daemon state is serialized before the reexecution and deserialized afterwards. That way we provide a simple way to facilitate init system upgrades as well as handover from an initrd daemon to the final daemon. Open sockets and autofs mounts are properly serialized away, so that they stay connectible all the time, in a way that clients will not even notice that the init system reexecuted itself. Also, the fact that a big part of the service state is encoded anyway in the cgroup virtual file system would even allow us to resume execution without access to the serialization data. The reexecution code paths are actually mostly the same as the init system configuration reloading code paths, which guarantees that reexecution (which is probably more seldom triggered) gets similar testing as reloading (which is probably more common). +15. Starting the work of __removing shell scripts from the boot process__ we have recoded part of the basic system setup in C and moved it directly into systemd. Among that is mounting of the API file systems (i.e. virtual file systems such as /proc, /sys and /dev.) and setting of the host-name. +16. Server state is introspectable and controllable via __D-Bus__. This is not complete yet but quite extensive. +17. While we want to emphasize __socket-based and bus-name-based activation__, and we hence support dependencies between sockets and services, we also support traditional inter-service dependencies. We support multiple ways how such a service can signal its readiness: by forking and having the start process exit (i.e. traditional daemonize() behaviour), as well as by watching the bus until a configured service name appears. +18. There's an __interactive mode__ which asks for confirmation each time a process is spawned by systemd. You may enable it by passing systemd.confirm_spawn=1 on the kernel command line. +19. With the **systemd.default=** kernel command line parameter you can specify which unit systemd should start on boot-up. Normally you'd specify something like multi-user.target here, but another choice could even be a single service instead of a target, for example out-of-the-box we ship a service //emergency.service// that is similar in its usefulness as init=/bin/bash, however has the advantage of actually running the init system, hence offering the option to boot up the full system from the emergency shell. +20. There's a minimal UI that allows you to start/stop/introspect services. It's far from complete but useful as a debugging tool. It's written in Vala (yay!) and goes by the name of **systemadm**. + +It should be noted that systemd uses many Linux-specific features, and does not limit itself to POSIX. That unlocks a lot of functionality a system that is designed for portability to other operating systems cannot provide. + +===== Status ===== +All the features listed above are already implemented. Right now systemd can already be used as a drop-in replacement for Upstart and sysvinit (at least as long as there aren't too many native upstart services yet. Thankfully most distributions don't carry too many native Upstart services yet.) + +However, testing has been minimal, our version number is currently at an impressive 0. Expect breakage if you run this in its current state. That said, overall it should be quite stable and some of us already boot their normal development systems with systemd (in contrast to VMs only). YMMV, especially if you try this on distributions we developers don't use. + +===== Where is This Going? ===== +The feature set described above is certainly already comprehensive. However, we have a few more things on our plate. I don't really like speaking too much about big plans but here's a short overview in which direction we will be pushing this: + +We want to add at least two more unit types: __swap__ shall be used to control swap devices the same way we already control mounts, i.e. with automatic dependencies on the device tree devices they are activated from, and suchlike. __timer__ shall provide functionality similar to cron, i.e. starts services based on time events, the focus being both monotonic clock and wall-clock/calendar events. (i.e. "start this 5h after it last ran" as well as "start this every monday 5 am") + +More importantly however, it is also our plan to experiment with systemd not only for optimizing boot times, but also to make it the ideal __session manager__, to replace (or possibly just augment) **gnome-session, kdeinit** and similar daemons. The problem set of a session manager and an init system are very similar: quick start-up is essential and babysitting processes the focus. Using the same code for both uses hence suggests itself. Apple recognized that and does just that with launchd. And so should we: socket and bus based activation and parallelization is something session services and system services can benefit from equally. + +I should probably note that all three of these features are already partially available in the current code base, but not complete yet. For example, already, you can run systemd just fine as a normal user, and it will detect that is run that way and support for this mode has been available since the very beginning, and is in the very core. (It is also exceptionally useful for debugging! This works fine even without having the system otherwise converted to systemd for booting.) + +However, there are some things we probably should //fix in the kernel// and elsewhere before finishing work on this: we need swap status change notifications from the kernel similar to how we can already subscribe to mount changes; we want a notification when CLOCK_REALTIME jumps relative to CLOCK_MONOTONIC; we want to allow normal processes to get some init-like powers; we need a well-defined place where we can put user sockets. None of these issues are really essential for systemd, but they'd certainly improve things. + +===== You Want to See This in Action? ===== +Currently, there are no tarball releases, but it should be straightforward to check out the code from our repository. In addition, to have something to start with, here's a tarball with unit configuration files that allows an otherwise unmodified Fedora 13 system to work with systemd. We have no RPMs to offer you for now. + +An easier way is to download this Fedora 13 qemu image, which has been prepared for systemd. In the grub menu you can select whether you want to boot the system with Upstart or systemd. Note that this system is minimally modified only. Service information is read exclusively from the existing SysV init scripts. Hence it will not take advantage of the full socket and bus-based parallelization pointed out above, however it will interpret the parallelization hints from the LSB headers, and hence boots faster than the Upstart system, which in Fedora does not employ any parallelization at the moment. The image is configured to output debug information on the serial console, as well as writing it to the kernel log buffer (which you may access with dmesg). You might want to run qemu configured with a virtual serial terminal. All passwords are set to systemd. + +Even simpler than downloading and booting the qemu image is looking at pretty screen-shots. Since an init system usually is well hidden beneath the user interface, some shots of systemadm and ps must do: + +That's systemadm showing all loaded units, with more detailed information on one of the getty instances. + +That's an excerpt of the output of ps xaf -eo pid,user,args,cgroup showing how neatly the processes are sorted into the cgroup of their service. (The fourth column is the cgroup, the debug: prefix is shown because we use the debug cgroup controller for systemd, as mentioned earlier. This is only temporary.) + +Note that both of these screenshots show an only minimally modified Fedora 13 Live CD installation, where services are exclusively loaded from the existing SysV init scripts. Hence, this does not use socket or bus activation for any existing service. + +Sorry, no bootcharts or hard data on start-up times for the moment. We'll publish that as soon as we have fully parallelized all services from the default Fedora install. Then, we'll welcome you to benchmark the systemd approach, and provide our own benchmark data as well. + +Well, presumably everybody will keep bugging me about this, so here are two numbers I'll tell you. However, they are completely unscientific as they are measured for a VM (single CPU) and by using the stop timer in my watch. Fedora 13 booting up with Upstart takes 27s, with systemd we reach 24s (from grub to gdm, same system, same settings, shorter value of two bootups, one immediately following the other). Note however that this shows nothing more than the speedup effect reached by using the LSB dependency information parsed from the init script headers for parallelization. Socket or bus based activation was not utilized for this, and hence these numbers are unsuitable to assess the ideas pointed out above. Also, systemd was set to debug verbosity levels on a serial console. So again, this benchmark data has barely any value. + +===== Writing Daemons ===== +An ideal daemon for use with systemd does a few things differently then things were traditionally done. Later on, we will publish a longer guide explaining and suggesting how to write a daemon for use with this systemd. Basically, things get simpler for daemon developers: + +* We ask daemon writers not to fork or even double fork in their processes, but run their event loop from the initial process systemd starts for you. Also, don't call setsid(). +* Don't drop user privileges in the daemon itself, leave this to systemd and configure it in systemd service configuration files. (There are exceptions here. For example, for some daemons there are good reasons to drop privileges inside the daemon code, after an initialization phase that requires elevated privileges.) +* Don't write PID files +* Grab a name on the bus +* You may rely on systemd for logging, you are welcome to log whatever you need to log to stderr. +* __Let systemd create and watch sockets for yo__u, so that socket activation works. Hence, interpret $LISTEN_FDS and $LISTEN_PID as described above. +* Use SIGTERM for requesting shut downs from your daemon. + +The list above is very similar to what Apple recommends for daemons compatible with launchd. It should be easy to extend daemons that already support launchd activation to support systemd activation as well. + +Note that systemd supports daemons not written in this style perfectly as well, already for compatibility reasons (launchd has only limited support for that). As mentioned, this even extends to existing inetd capable daemons which can be used unmodified for socket activation by systemd. + +So, yes, should systemd prove itself in our experiments and get adopted by the distributions it would make sense to port at least those services that are started by default to use socket or bus-based activation. We have written proof-of-concept patches, and the porting turned out to be very easy. Also, we can leverage the work that has already been done for launchd, to a certain extent. Moreover, adding support for socket-based activation does not make the service incompatible with non-systemd systems. + +===== FAQs ===== +Who's behind this? + Well, the current code-base is mostly my work, Lennart Poettering (Red Hat). However the design in all its details is result of close cooperation between Kay Sievers (Novell) and me. Other people involved are Harald Hoyer (Red Hat), Dhaval Giani (Formerly IBM), and a few others from various companies such as Intel, SUSE and Nokia. +Is this a Red Hat project? + No, this is my personal side project. Also, let me emphasize this: the opinions reflected here are my own. They are not the views of my employer, or Ronald McDonald, or anyone else. +Will this come to Fedora? + If our experiments prove that this approach works out, and discussions in the Fedora community show support for this, then yes, we'll certainly try to get this into Fedora. +Will this come to OpenSUSE? + Kay's pursuing that, so something similar as for Fedora applies here, too. +Will this come to Debian/Gentoo/Mandriva/MeeGo/Ubuntu/[insert your favourite distro here]? + That's up to them. We'd certainly welcome their interest, and help with the integration. +Why didn't you just add this to Upstart, why did you invent something new? + Well, the point of the part about Upstart above was to show that the core design of Upstart is flawed, in our opinion. Starting completely from scratch suggests itself if the existing solution appears flawed in its core. However, note that we took a lot of inspiration from Upstart's code-base otherwise. +If you love Apple launchd so much, why not adopt that? + launchd is a great invention, but I am not convinced that it would fit well into Linux, nor that it is suitable for a system like Linux with its immense scalability and flexibility to numerous purposes and uses. +Is this an NIH project? + Well, I hope that I managed to explain in the text above why we came up with something new, instead of building on Upstart or launchd. We came up with systemd due to technical reasons, not political reasons. + Don't forget that it is Upstart that includes a library called NIH (which is kind of a reimplementation of glib) -- not systemd! +Will this run on [insert non-Linux OS here]? + Unlikely. As pointed out, systemd uses many Linux specific APIs (such as epoll, signalfd, libudev, cgroups, and numerous more), a port to other operating systems appears to us as not making a lot of sense. Also, we, the people involved are unlikely to be interested in merging possible ports to other platforms and work with the constraints this introduces. That said, git supports branches and rebasing quite well, in case people really want to do a port. + Actually portability is even more limited than just to other OSes: we require a very recent Linux kernel, glibc, libcgroup and libudev. No support for less-than-current Linux systems, sorry. + If folks want to implement something similar for other operating systems, the preferred mode of cooperation is probably that we help you identify which interfaces can be shared with your system, to make life easier for daemon writers to support both systemd and your systemd counterpart. Probably, the focus should be to share interfaces, not code. +I hear [fill one in here: the Gentoo boot system, initng, Solaris SMF, runit, uxlaunch, ...] is an awesome init system and also does parallel boot-up, so why not adopt that? + Well, before we started this we actually had a very close look at the various systems, and none of them did what we had in mind for systemd (with the exception of launchd, of course). If you cannot see that, then please read again what I wrote above. + +===== Contributions ===== + +We are very interested in patches and help. It should be common sense that every Free Software project can only benefit from the widest possible external contributions. That is particularly true for a core part of the OS, such as an init system. We value your contributions and hence do not require copyright assignment (Very much unlike Canonical/Upstart!). And also, we use git, everybody's favourite VCS, yay! + +We are particularly interested in help getting systemd to work on other distributions, besides Fedora and OpenSUSE. (Hey, anybody from Debian, Gentoo, Mandriva, MeeGo looking for something to do?) But even beyond that we are keen to attract contributors on every level: we welcome C hackers, packagers, as well as folks who are interested to write documentation, or contribute a logo. + +===== Community ===== + +At this time we only have source code repository and an IRC channel (#systemd on Freenode). There's no mailing list, web site or bug tracking system. We'll probably set something up on freedesktop.org soon. If you have any questions or want to contact us otherwise we invite you to join us on IRC! + +Update: our GIT repository has moved. + +posted at: 10:46 | path: /projects | permanent link to this entry | 336 comments +Posted by Raphael (esarbee) at Fri Apr 30 13:12:10 2010 +Woah, quite a read! While I somewhat dislike the idea of yet another services management solution, I like it coming from you. You keep rocking the boat - as you did with PA, which I like very much - and that's a good thing. + +I admit that I didn't read it with the attention it desired and promise to do so later the day. I do hope, however, that creating custom services will remain at least as easy as it is now. ;) + +Posted by Alex Murray at Fri Apr 30 13:27:26 2010 +This sounds like the kind of innovation Linux needs - a clearly well thought out solution to a problem, not just someone scratching an itch. Great work as always Lennart. The simplicity of the design (using implicit dependencies rather than hard-coding them) is awesome. Sounds perfect for the embedded space as well. + +Would be awesome to see this get picked up by the big players (Ubuntu, Fedora, OpenSUSE, Debian etc). + +Posted by Marco Barisione at Fri Apr 30 13:46:42 2010 +First you broke networking then sound and now booting? :P + +Posted by Kay Sievers at Fri Apr 30 14:02:24 2010 +Sounds great, nice announcement. It runs well here on my box. Still a looong way to go ... + +Happy so far, and good to know that all the many hours we spent on the phone lead to something that matters - however it will look like in the end. :) + +Posted by Luiz Augusto von Dentz at Fri Apr 30 14:09:12 2010 +Pretty amazing I must say, I just wonder now if systemd would incorporate things like powertop, monitoring processes/detecting process responsiveness and things like that. + +Posted by Michael Scherer at Fri Apr 30 14:29:07 2010 +This look nice. But maybe you should have splitted the article in smaller piece, and published them one by one. + +About swap, do you think systemd could be extended to dynamically create swap files on the fly, as done on os x, as part as the babysitting ? This would allow distribution to have a simpler partitionnement step, since user will no longer care about this. Of course, some barriers should be added to avoid filling the harddrive with swap file. ( and of course, we should be sure that swap files are as fast as swap partitions ). + +Posted by Joshua Pritikin at Fri Apr 30 15:15:04 2010 +It sounds like you are fixing real design flaws in upstart. I hope you are aware of runit and bcron. http://smarden.org/runit/ http://untroubled.org/bcron/bcron.html + +These are not complete solutions like you are proposing with systemd, but you ought to be familiar with the design of these two tools. I find them exceptionally well engineered. + +Posted by John Drinkwater at Fri Apr 30 16:19:07 2010 +What was the reason for naming services as /etc/systemd/system/avahi.service rather than /etc/systemd/system/service/avahi (same goes for all units) +Would be more readable (in ps, etc), and get rid of file name extension creep… + +Posted by Grahame Bowland at Fri Apr 30 16:21:44 2010 +A major advantage of the startup sequence being in shell is that an administrator can easily insert bits of code to track down problems. It sounds like your design will make it quite a bit more difficult to track down odd things. + +For example, I had a RHEL5.5 machine the other day with a dodgy autofs setup; whenever 'autofs' started it remounted '/' readonly. Easy to track down at the moment, but it sounds like it might be trickier with systemd. + +While there's a bit of a performance hit, I think on servers where you're booting very infrequently bootup speed is worth trading for determinism and transparency, plus the ability to modify and debug the system easily. + +So, to be positive: how would you approach figuring out which startup script / service is causing a nasty problem under systemd? + +Posted by Damien Thebault at Fri Apr 30 16:22:24 2010 +This looks really nice and it removes a lot of problem from the daemon writers. + +In addition, since it encourages a certain design of daemons (no fork, error messages on stderr), I think it's then even easier to use those daemons from any init system. + +I really think that something like this should be used in many distributions and become the standard init on linux. + +Posted by PJ at Fri Apr 30 17:05:41 2010 +re: history + +This reminds me a little bit of djb's daemontools thing. And also of Richard Gooch's Bootscripts ca. 2002 ( http://www.safe-mbox.com/~rgooch/linux/boot-scripts/index.html ). + +You seem to have taken the next step, however, and got services essentially autoconfiguring their own dependencies, which is awesome. + +re: 'shell scripts are slow' +As Grahame Bowland points out above, the advantage of shell scripts is ease-of-debugging/modification. I see a few options: +* move them to some dynamic language like python or groovy or something where they can be compiled and so run faster +* provide stripped-down versions of the common shell utils (awk, sed, etc) as builtins to the shell that fall back to calling the full version in complicated cases. So the simple "sed 's/foo/bar/;'" case could be optimized into a shell builtin. thereby saving a process-spawn. + +Also re: startup tools, have you looked at start-stop-daemon ? + +Posted by nine at Fri Apr 30 17:36:11 2010 +It's not an issue for SSD drives which will surely replace disk drives sooner or later, but: doesn't starting a whole bunch of daemons at once end up spawning a lot of IO requests, causing your disk to spend a whole lot of time on seeking overhead rather than actually reading/writing data? + +Posted by Davide Repetto at Fri Apr 30 17:38:19 2010 +Very interesting stuff indeed. As usual you rock Lennart! + +I understand there may be an option to automatically shutdown seldom used services, do you envision a simple time-out or are you going for a self adjusting timeout? + +Posted by sjansen at Fri Apr 30 17:50:08 2010 +@nine That's the kernel's responsibility. Perhaps it was a valid concern a couple decades ago, but today it makes sense to design a system that takes advantage of Linux's high quality IO schedulers. + +Posted by James Mansion at Fri Apr 30 18:30:01 2010 +I don't think Solaris SMF is really the only other major system that handles dependencies and on-demand startup. + +You want a system where you can say 'always start A' and 'start B,C on demand' and 'C depends on B' so starting C will start B? Windows does this. + +Posted by Paul Jakma at Fri Apr 30 18:53:02 2010 +The process group stuff sounds very close to the contracts stuff put in place in Solaris for SMF. Just in case you're interested in looking over there. + +Posted by Robert Szalai at Fri Apr 30 19:18:32 2010 +As a personal opinion I very much like this idea. Am I right thinking that to use this to full potential one would need to modify the daemons? Also would it imply that properly written daemons won't need any init scripts? I pretty much dislike the idea of having scripts altogether, the daemons could just read their configuration files. Just wondering why would this be unfeasible, could someone enlighten me, in case I'm too hopeful? + +Posted by Anonymous at Fri Apr 30 20:29:48 2010 +Very impressive architecture. + +I agree with your complaint about shell scripts, and at the same time I want to preserve the configurability they provide. One crazy notion that might work: what if you use a compiled language like Vala to write the startup scripts, keep the Vala source as the canonical location, compile a binary from the source, and use make-style logic to decide if you need to recompile from the source? With sufficient library support from systemd, vala should prove nearly as comfortable as shell, but you end up with a fast compiled binary to run, and in-process handling of things like string operations. + +(If you want to avoid process startup times entirely, you could compile all the Vala configuration files into a single binary with various modules/functions/etc.) + +Posted by Peter Lister at Fri Apr 30 20:31:25 2010 +Damn good thinking. As a sys admin I have hated sysvinit (and the crap that app authors and distributions put in it) for years. + +Can you expand on what you think should happen at suspend / hibernate? And what happens for hot-swap hardware? + +It seems to me that power-up, suspend/resume and discovery/insertion/removal of hardware are all general events that should be reacted to correctly. The discovery of a storage medium and the filesystem(s) on it, the subsequent mounting and the starting of appropriate services are essentially the same whether it's /home on a SCSI disk detected at start-up and mounted so that logins can happen or just my inserting my MP3 player to have its podcasts updated. + +Posted by Eric Moret at Fri Apr 30 21:50:08 2010 +I love everything I read to far. There is one thing I ought to mention though! In the same vein as Polyp Audio (later renamed to Pulse Audio), you should be aware that System D has a somewhat negative meaning in french. See wikipedia entry on System_D. + +Posted by Colin Guthrie at Fri Apr 30 22:01:07 2010 +Awesome work. I now forgive you for spending time away from #pulseaudio :p + +My two major problems with this article: +1. It's very biased towards coffee. I am a tea drinker you insensitive clod! + +2. PID 1 is a silly name. You should have called it PID v2.0 like all the cool kids do on the web! + +I can't think of any real/sensible criticism so I'll shut up now. + +KUTGW as always :) + +Col + +Posted by Anon at Fri Apr 30 22:15:19 2010 +Just when the last init replacement fell the init replacement war starts back up again! + +I'd just like to see people standardise on one ideally but any idea if ChromeOS or MeeGo would benefit from this? + +Posted by Ahmed Kamal at Fri Apr 30 22:34:00 2010 +Wow, quiet the read. Extremely impressive design and analysis. Please keep the informative posts Lenart. And please keep pushing Linux forward :) + +Posted by Dieter_be at Fri Apr 30 22:51:33 2010 +very interesting read. +I don't think shell scripts are bad though. Sure they are slower and cause bumps in your pids, but they are so easy to hack on. I think that's the most important. + +Posted by Colin McCabe at Fri Apr 30 22:56:29 2010 +Looks good so far! + +Is the /sbin/service and chkconfig interface going to change with systemd? + +Posted by Richi Plana at Fri Apr 30 23:14:09 2010 +OMG! Finally!! Several people in the past (myself included) have opened up the idea of implementing system startup in a smarter way (only starting services that are needed and dynamically start things a'la xinetd), but would always get shot down with all sorts of excuses or the infamous "code it yourself" remark. + +Thanks for starting this! Hope things go far. + +Posted by anonymous at Sat May 1 00:39:14 2010 +I've not read through all of this yet, but want to suggest haskell as a possible shell script replacement. haskell is a language with precise semantics - that translates to very tight control of state and could enable very succinct specification of shell script behaviour. you can really use it, it is very easy to understand at it's core (lambda calculus). you could connect with that community, they are very clever i suppose and the code is easy to read if what it describes is "boring" or easy. it might just be perfect. speed is in the same league as C, I think it will use LLVM very soon as well. just have a peek and look at some (easy) code examples! + +And there are already replacements for grep, regular expressions and stuff like that to be found in the package repository at hackage.haskell.org , albeit maybe not perfectly structured. + +this is just a suggestion :=) take it for what it's worth... + +Posted by sztanpet at Sat May 1 00:44:40 2010 +I was wondering if it would add any value to have Lua as the configuration format, it might be overkill but having a full fledged scripting language might come in handy + +Posted by Claes at Sat May 1 01:27:57 2010 +Very interesting. When you eventually start to design the scheduling functionality (cron "replacement"), please consider applying iCalendar semantics (RFC2445) to scheduling rules. + +Posted by Richard at Sat May 1 06:14:16 2010 +You make a good point about shell script inefficiency (repeated calls to grep,awk,cut etc). + +Why not have a slightly larger bash (let's call it "busybash" in reference to busybox) that has some of these built in? + +Bash already provides builtins such as echo, kill and test - why not expand the range to include grep,sed,ls,mv and a few others. + +(Bash does have support for loading extensions, but that's not really the point here) + +Posted by codebeard at Sat May 1 06:16:20 2010 +@ People suggesting replacing shell scripts with python/vala/haskell/whatever + +If the goal is to retain easy debugging as /bin/sh provides, then replacing the scripts with another language is not going to achieve that. Part of the reason that shell scripts are so easy to debug and understand is that they are written in a very simple language that 90% of unix administrators can read and write. Replacing them with scripts written in your favourite scripting language, no matter how easy it seems to you personally, is bound to reduce the ease of debugging. + +Actually, I am confident that most of the shell scripts can be removed without losing easy debugging with systemd. Here's why: + +A quick survey of the init scripts on my system show six main functions (here ordered from most common to least common): +1) Process control (writing/checking PID files, signalling daemons) +2) Setting environment variables and daemon arguments +3) Checking to make sure certain requirements are met (kernel modules, other services, file existence etc) +4) Setting up a working environment (creating special files, setting SELinux contexts and file permissions) +5) Waiting and checking to see if something has completed or is running correctly. handling timeouts etc +6) Saving/loading states on shutdown/startup + +Now, the reason that many scripts can be done away with is that much of this can be handled better by systemd. + +Process control (1), the most common function of init scripts, is handled by systemd. And using the systemd utilities, instead of having to hack around with PID files, we will be able to see exactly what's running and what's not and manage all of this in a consistent manner. + +As for setting environment variables and daemon arguments (2), I think this one needs to be thought about more. I think it should be possible to handle it for 90% of shell scripts, but I will post another comment about that in a bit. + +Checking requirements (3) can be handled in most cases by simply setting the correct dependencies for the service. For dependencies on kernel modules, this should be a defined unit in systemd. + +Sometimes a script will check requirements such as certain paths for the service data etc, but I would say a lot of this is simply in the init scripts to be distro-independent and that actually many of these checks can simply be removed. That is, if I am using a modern distro with package management, the data files will always be at location X, which is also more or less guaranteed to exist if the package hasn't been messed with (otherwise all bets are off anyway). If people have moved things to non-standard places or removed config files or something, then they should be responsible for making the appropriate changes to the service definitions. + +Many checks are a little unnecessary anyway, in the sense that if something is wrong, the service should die gracefully and give the appropriate error messages, instead of duplicating these checks in the init script. Where checking config file syntax or file permissions may be useful is where you want to restart/reload a service; it's better to get a message that you made a typo in the config file than for the service to shutdown and then fail to start up again. So perhaps this case can be handled by having a PreReload/PreRestart parameter in a service definition for running a program/script to check things in this case. + +Setting up working environments (4) should really be handled by either the service itself or by post-install scripts of the distro's package. The remainder of cases can still be handled by scripts. Setting the right file permissions and security also falls into the category of should-be-managed-properly-by-distro. + +Waiting for things and handling timeouts (5) should ideally be handled by systemd. If configured to, it should try to restart a service that dies, possibly retrying a few times before giving up and putting the service in a maintenance state (like in Solaris). + +Saving and loading states (6) should be handled by the service itself. + +The scripts that are the real culprits for being inefficient are the ones that don't actually manage daemons but instead set up whole subsystems such as networking and file systems, with all the hacky config file grepping. It is nice to see that at least file systems will be handled natively by systemd. Maybe networks could also be handled, or perhaps systemd can be integrated with networkmanager or whatever. + +Posted by codebeard at Sat May 1 06:18:50 2010 +As for the exact way of debugging things with systemd, I don't know how it currently works, but I assume that the following will be possible: +- A log of services that started, commands that were run, etc and what chain or dependencies, events or other relationships caused them to be started +- Trace what happened to the sockets that systemd made for a service (did the service ever take control of it, etc) +- Force the serialisation of starting some or all of the services for tracking down race conditions +- Set an arbitrary script or command to run before or after a certain service or every service. This should satisfy those people wanting to be able to hack shell scripts. +- It would be completely awesome if you could "connect to" a service which hasn't forked itself into the background, with the ability to read the program's stdout and stderr in real-time as well as possibly interact with the service through its stdin. It would be even more awesome if you could set an option in the service definition which would start the service with its own pts so that you could connect to it and interact with it using a screen-like program -- some services give you a nice debug console when you run them in a tty so this would be great. + +Posted by codebeard at Sat May 1 06:20:33 2010 +I think it is important that systemd have some understanding of providing things in a timely fashion. For example, if it sets up an AF_INET socket unit but the service never manages to start properly (for example, if it hangs somewhere during startup), then eventually the buffers for a UDP socket are going to fill up and incoming connections will time out for a TCP socket. On slow systems this may actually mean that trying to start every service at once will lead to intermittent failures with services trying to connect to another daemon but timing out. + +For example, let's say I have a web application (in apache) that needs to connect to a mysql socket (AF_UNIX). So, systemd creates the AF_INET socket for apache, and the AF_UNIX socket for mysql, then starts both services simultaneously. Let's say my database is pretty hefty, and mysql takes 40 seconds to get everything started (keep in mind that maybe another 10 or 20 services are also trying to start at the same time, so this isn't unreasonable). In the meantime, apache has only taken 3 seconds to start, and it takes control of the AF_INET socket that systemd made for it and users are now able to connect. However, a user that connects to it just after this will get a messed up webpage with errors about a MySQL timeout since the timeout was set in PHP to 30 seconds. + +Can anything be done to avoid these issues? + +Posted by anonymous at Sat May 1 06:27:48 2010 +Seconding lua. + +It's the ideal scripting language for booting utils: + +1. Fast and portable. Less startup times than shell with more functionality, with less worries about bashisms/kshisms/sticking to POSIX. + +2. Easily augmented via it's C API... more close to the metal than Python and Vala. + +It'd be great to run this from a stripped down initrd with only lua, glibc+libudev+etc. and a fallback dash shell. + +At the very least it should be seriously considered as the config language, instead of plain text freedesktopish files that aren't as easily augmented. + +Posted by codebeard at Sat May 1 06:36:14 2010 +Okay, to add one final comment for now, I wanted to ask this: +Instead of rewriting daemons to use some extra file descriptors given to it, wouldn't it be possible to create the socket and then transparently hand it over to the daemon when the daemon tries to create it? It may require a kernel patch, but wouldn't that be a lot more elegant? Even legacy or closed source daemons (or open source daemons with uncooperative developers) could be made to use a socket from systemd this way. + +For example, let's say that modifying service foo to use a socket from systemd is not practical. So, systemd sees that the kernel supports this feature, and creates a socket /var/run/foo/foo.sock before starting the foo service and informs the kernel about it. As the foo service initialises, it makes the syscall to create /var/run/foo/foo.sock, and instead of receiving an "already exists" error, it will transparently be given the socket already made for it by systemd. As far as the foo service is concerned, it had just made the socket, when really it was made by systemd. They all live happily ever after. + +Is there some reason that this couldn't work? Surely something like this would greatly reduce the amount of work that needs to be done to get systemd doing useful things on current systems. + +Posted by codebeard at Sat May 1 06:52:05 2010 +Looks like I missed posting one of the comments I had written earlier. Oops. Really this is my last comment for now. + +There needs to be an easy way to set environment variables and daemon arguments that avoids having to run any kind of script, in any language, if possible. + +In the current init script system, there is usually a file like /etc/sysconfig/blah for the blah service which is included in the the init script. It will define environment variables for the service and may also be used to set certain parameters on the commmand line for the daemon. It would be great if systemd could understand some of this. Setting these things in the service definition is not really enough, or at least it needs to be possible to override options in a file made for end-user modification. Users should not need to modify the service definitions for routine configuration. In other words, there needs to be a place that users can look in to change options for a program (e.g. which port a program runs on) without having to mess with the service definitions. + +To facilitate this, perhaps systemd needs to understand a basic kind of variable, so that it can be used in a service definition. + +That is, you might have foo.service: +[Service] +ExecStart=/usr/sbin/foo ${domain!} -n ${connections:10} ${debug?-d:-f} ${extra_args} + +Where systemd would consult some /etc/sysdconfig/foo or something and read in any values before parsing the foo.service file. + +It might look something like: +# comments blah +domain=example.org +connections=5 +extra_args=--cores ${connections} + +Above I use some possible syntax for these, such as ! for saying that the domain variable is not optional, the : for giving a default value if it is not set, the ? for treating the variable as a boolean (yes/1/true and no/0/false/unset) and inserting one value or another. + +Of course the exact details of this would need to be considered carefully to try and cover a large range of cases (the goal might be to be able to be able to supersede 80% of the init scripts in the default installation of some distro). + +Posted by Christoph at Sat May 1 10:26:31 2010 +Lennart, once again you have proven to be a genius! This is a pretty long article, but everything is well explained, logical and easy to read. + +I am surprised how far systemd has come by now and I think it has great potential. Looking forward to read more about if! + +Posted by Peter Lister at Sat May 1 12:36:34 2010 +@codebeard + +Rewriting the daemons is a good thing! + +Too many daemons still have stupid amounts of command line config, or require coddling with bash. + +Daemons should just start, find their configs and get on with things without holding everyone else up. I certainly do NOT want a kernel hack just because software authors can't be bothered to improve... + +Posted by Richard Brooks at Sat May 1 21:02:52 2010 +Excellent work. I hope the Upstart developers see the superiority of your solution and will help adopting systemd as the new standard PID 1. + +Posted by codebeard at Sat May 1 21:15:27 2010 +@ Peter Lister + +How is having to rewrite daemons a good thing? Most of them are perfectly fine as they are, as well as being written well for portability. Most do not require stupid amounts of command line config or huge bash scripts. If we can have all of the benefits and keep the simplicity of the system, without having to patch every daemon, then systemd can be adopted much more easily. + +My proposal to have the kernel copy the already created socket when a daemon bind()'s is really no different from mounting a file system when a program does an open(). So, I wouldn't call it any more of a hack. + +Here's what I envision: +systemd: +fork(); +sock = socket(); [e.g. 3] +bind(sock, addr); +fcntl(sock, F_INHERIT); +exec(); + +daemon: +... +sock = socket(); [e.g. 4] +bind(sock, addr); +... + + +Now, if addr matches the addr from a previously defined socket of the same type and with F_INHERIT, then the kernel copies the appropriate data structures (including any connections already made to the socket) from the socket 3 into the socket 4, and removes socket 3. This process of searching for a matching socket is only done for processes which are marked with having at least one F_INHERIT file descriptor. + +Posted by Adam York at Sun May 2 00:50:08 2010 +Sounds great. My only worry is that this will take you away from Pulseaudio development. My worries justified? + +Posted by sam at Sun May 2 01:25:58 2010 +To the commenter who suggested systemd was a bad name: + +From the wikipedia entry: + +System D [in French, Système D] is a shorthand term that refers back to the French word débrouillard[1]. The verb se débrouiller means "to untangle." The basic theory of System D is that it is a manner of responding to challenges that requires one to have the ability to think fast, to adapt, and to improvise when getting the job done. + +That sounds just about perfect to be honest "untangling the boot process" - yes please. + +Posted by horse at Sun May 2 06:20:48 2010 +"Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to debug it." --Brian Kernighan + +Posted by Dude at Sun May 2 07:39:37 2010 +WOW! Lets get this baby mainstream. Hopefully it will be easy to adopt to any distro. + +Posted by Charles at Sun May 2 17:11:24 2010 +@codebeard + +Such a thing could be accomplished without resorting to a kernel hack, by using an optionally enabled LD_PRELOAD hook instead. Interesting idea... + +Posted by David Björkevik at Mon May 3 01:06:26 2010 +If the session manager would start using cgroups to kill off all the users' processes on session end, will this not break screen(1)? + +Posted by Diego Calleja at Mon May 3 01:50:35 2010 +Shell scripts are slow yes, but nobody has been able to prove they are a big bottleneck when booting. Maybe systemd will be so fast that bash will become the bottleneck, who knows. But until then, this "shell scripts are bad" attitude doesn't really have a lot of sense. There are more important things to do than rewritting bash scripts in C, IMHO + +Posted by Peter Götz at Mon May 3 02:49:43 2010 +Lennart, +this looks quite interesting! I downloaded, built and installed your code on Ubuntu 10.04. It starts systemd as I can see, but I get the following error: + +Failed to mount /cgroup/debug: No such file directory. + +I'm new to control groups. Any obvious hints for what I'm doing wrong? Thanks in advance! + +Posted by codebeard at Mon May 3 03:40:25 2010 +@ Charles + +Actually, an LD_PRELOAD hook wouldn't work because bind()/etc are system calls, not part of a library. + +Posted by deitarion/SSokolow at Mon May 3 04:23:53 2010 +@Dude: Careful. Aside from the whole "fix a broken part by wrapping it rather than replacing it" aspect, PulseAudio's biggest problem was distros adopting it before it was mature enough. + +Posted by vitaly at Mon May 3 10:14:06 2010 +For lean, clean, portable and reliable service initialization, see perp, "the perpetrator": + + http://b0llix.net/perp/ + +Posted by Sherman T Potter at Mon May 3 13:45:39 2010 +I used to think PID 1 was always init. Things change. We have new Solaris servers at work. I found out the init process on Solaris virtual zones could be ANY PID number. + +Posted by Chirs Carpenter at Mon May 3 15:13:24 2010 +Aren't we basically heading toward a microkernel here? We're abstracting it to where all the services are controlled by one central process (kernel?) that watches everything and reboots anything tha crashes, etc. It definitely sounds an awful lot like a microkernel (Not saying this is a bad thing). However, maybe we should start taking another look at GNU hurd? + +Posted by owczi at Mon May 3 15:14:35 2010 +This is definitely the way forward. You've once helped to sort out the the mess the Linux sound servers were, now it's time to clean up another area - way to go! Of course - as long as the final solution is well balanced (running services vs. on demand services) and we don't get into the situation Windows has been in for years now: you log on and you see your desktop - which gives you the impression that it's ready to work, while it will be loading services for the next minute or so before you can actually use the system. This is GUI I'm talking about, but it does rely on system services that need to be running. + +Posted by Tel at Mon May 3 15:45:41 2010 +You are perfectly correct about upstart's event driven system being the exact opposite approach to what it should be. One of the big problems with upstart is that if something is wrong with the system (e.g. some important component in the chain is missing) then upstart can't tell you anything useful. + +For example, you expect that FOO should be running so you type: + +# initctl status FOO + +And all it can tell you is that FOO is stopped or waiting. Won't tell you WHY, which is what you really need to know. This gets even worse because some of the events can be given arbitrary names that have nothing to do with the package that provides them, and nothing to do with the program that might be running. As a consequence, if the thing is waiting for one of these type of events, even when you know exactly what event you need, you still don't know how to make that event happen, or which extra package you might need to install in order to make it happen. + +Even when the upstart system is working correctly, you still cannot ask for status information about events that have been emitted in the past. That's because events are not actually jobs, you can only ask for status about jobs. + +---- + +Finally, on a completely different issue, if you get a script-heavy system starting all sorts of daemons and you boot that system on VirtualBox a few times, you will see that it boots amazingly fast. To me, this suggests that CPU is not even slightly the bottleneck at the boot process, so you can forget about looking for optimizations in running grep fewer times. I'm 80% sure that the reason VirtualBox can reboot a machine so fast is disk cache, implying that what really matters for fast boots is how many disk files you touch -- doing stuff in parallel is pretty much a waste of time when disk is throttling you. + +Shell scripts are very bad (especially nested scripts) for forcing a lot of seeks because the shell just reads a line at a time (presuming your system is not smart enough to actively readahead). + +Posted by mario at Mon May 3 18:48:42 2010 +Sounds serious. Upstart is a nice start, but not really easy to understand. Dependencies are never visualized, and if it's broken (mostly dbus and udev), then a typical user is lost. + +With systemd standardizing a lot more features, this might be less of a problem. But still there seems to be much wiggle room for even more fragility of the Linux boot process. Automatisms are welcome, but not at the price of complexity and transparency. If there is too much self-righteous "intelligence" in any system service and hence superimposed user control restrictions, this is detrimental to usefulness. + +So, I look forward to give this a shot once available. However I have this bad feeling it will come at a price as well. (At least if Ubuntu developers package it.) +So give us somewhat more helpful man pages, and proper control to work around builtin tool "intelligence" when necessary. Don't hide more inner system workings in the opaque dbus realm. Don't want to debug boot failures anymore. TNX + +Posted by Enrico Tassi at Tue May 4 00:12:06 2010 +You should definitively call your software "pid1". + +Posted by Anonymous at Tue May 4 16:03:22 2010 +This is a sick trend. These developers with the "innovations" not only trying to ruin stuff that works, but also to stole my precious time. + +They think I have nothing to do but to learn their heroin trips every year. + +There is a stable knowledge, the stable tool. Such a big gain! There are literature, courses, a great amount of materials about this stable tools. And now another moron tries to cancel it all. What he expect from us? A "thank you"? + +Posted by alex jumba at Tue May 4 16:59:18 2010 +great work @lennart, systemd seems poised to solve some fundamental problems. + +Just an observation/comment. By your admission, hardware and software are dynamic and changing constantly during runtime. Does it make sense to add to that list usage patterns?? Given that, does it seem appropriate to allocate resources (e.g. I/O nice) to processes during spawning alone and let them maintain those resource levels during their lifetime, even though usage patterns, just like the hardware and software, change? If the overall goal is to let the machine determine stuff by itself without much manual intervention (e.g. as with what systemd does with dependency graphs), wouldnt it be appropriate for named not only to monitor hardware and daemons as it does currently (ingeniously BTW), but also dynamic resource management (resource = I/O/CPU nice etc)?? This is what I understancd what an INIT 1 system to be, a process manager (which the kernel has "outsourced" to), much like pulseaudio/jackd is for sound and X/kwin for display. + +What am I getting at?? There has been discussion lately about interactivity of (recent) linux kernels, stirred mostly by BFS/BFQ. Your explanation about wanting other apps to get init-like powers brought an idea. Since it is these "servers" (pulseaudio/X etc) which know which apps are in focus and thus are actively being used by the user, wouldn't it be great if some of these "powers" e.g. renicing processes, be done by these servers (either directly or through giving advice/hints to systemd). This way, even resource allocation becomes dynamic (e.g. determined by user's usage patterns) and thus solve the problems with latency and such; (I remember windows has a setting for giving priority to foreground/bachground processes). This way, you don't have to always set slocates i/o priority to be lowest, you dont even have to set it AT ALL, and the system will automagically adjust itself for the workload. + +Posted by Nazo at Wed May 5 00:24:26 2010 +IMHO all major daemons will be in kernel space in future for wishing throughputs, latencies and low powers. I believe userspace daemon are completely replacable by kernelspace daemon. + +IIRC kernel can use simd (RAID driver uses it) and floating point (kernel_fpu_begin/end) with a bit taskswitch slow down like userspace application. Also kernel can use userspace memory, preemptable threads and executing userspace application. + +IIRC bypassing MMU is about 1.5x faster on x86. Some minor architectures may have problem because it has different operation set between kernel and user space. I don't know this is critical or not. But there are already some daemons like knfsd. + +I want to see in-kernel fastest implementations for init, udevd, modprobe, mount, fsck, dbus and... anything! + +References: +Unleashing SSL Acceleration and Reverse-Proxying with Kernel SSL (KSSL) +ttp://www.coresecuritypatterns.com/blogs/?p=1389 +Kernel D-Bus +ttp://www.mnementh.co.uk/home/projects/collabora/kdbus +TUX web server +ttp://en.wikipedia.org/wiki/TUX_web_server +[RFC] Unify KVM kernel-space and user-space code into a single project +ttp://www.gossamer-threads.com/lists/linux/kernel/1202521 +Kernel APIs, Part 1: Invoking user-space applications from the kernel +ttp://www.ibm.com/developerworks/linux/library/l-user-space-apps/index.html +Re: ABI change for device drivers using future AVX instruction set +ttp://kerneltrap.org/mailarchive/linux-kernel/2008/6/28/2285574/thread + +Posted by PaulWay at Wed May 5 05:42:22 2010 +This looks great! I really like the idea of deferred start-up of services, combined with an xinetd-style socket holder. + +One thing that would be interesting to look into further would be to then shut down services after they go through some period of inactivity, or when pressure from other services for resources goes over a threshold. If we've got a machine that only gets SSH connections once a week or so, why not shut down SSH after an hour or a day and give that memory to the database or web server. + +Obviously this is a different use case from the initial purpose of systemd as you've stated - which is to speed up startup by not starting things until we really need them. But I see it as an equally valid purpose for systemd, and it already takes care of some of this stuff for suspend and resume. + +Alternately, maybe when the system is idle after starting up we could start those services who had been deferred? If we want to be able to SSH into the machine with little delay, we could actually start up SSH so that it's ready to go. Then swapping can handle the memory pressure problem above, as it currently does. We've allowed critical things to start up as fast as possible, but we've still got the current level of responsiveness after the whole process is done. + +(Which reminds me unpleasantly of Windows' way of allowing the user to log in while the system processes are still starting up, providing the illusion of quick start-up with the pain of discovering that it's a terrible lie for those users that don't leave their computer to boot for five minutes. But I think the tactics above are better than that.) + +Have fun, + +Paul + +Posted by Aaron at Wed May 5 08:32:30 2010 +Awesome work! I've been envisioning something like this for a long time... ever since I first sat down with Red Hat 6, actually. + +I think a project like this is one more step toward the unified Linux Desktop that was supposed to happen so many times in the recent years. + +In the coming weeks I plan to get this working on funtoo, and maybe gentoo also. I'll provide updates for anyone else who is interested in trying this as well. + +-Aaron + +Posted by Tim Waugh at Wed May 5 11:15:37 2010 +Sounds really great. One minor point: + +"unless the machine physically is connected to a printer, or an application wants to print something, there is no need to run a printing daemon such as CUPS" + +This isn't completely true. You might well have a CUPS server on the local network providing discoverable queues and PostScript/PDF drivers for a network printer. + +But really I suppose this falls into the 'physically connected to a printer' category, so would be adjusted somehow by whoever configures the system? + +Posted by Anonymous at Wed May 5 16:07:58 2010 +at first I really liked the idea of an inetd-like startup system. but then I got really suspicious, when reading the following points: + +- udev-support in pid 1? really? can't you just make a socket with simple text-io for control and introspection? + +- different unit types, templates, extended dependency and ordering support: sounds all overly complex to me + +- include mounting and setting hostname. so it's not a startup system, it's a startup-mount-hostname system. what else do you want to include? what happened to the unix philosophy? the beauty of shell scripts is, that they are generic. you don't need one central monolithic system to support everything. and if you claim, this would speed up booting, proof it or it's not true. it sounds more like you have a hammer looking for a nail. + +- someone already mentioned HURD. that's how do it in a clean and consistent way: whenever a resource is requested, a translator is started to provide it. as linux doesn't support this, everything you can do is create a huge and complex hack. in that case i think i stick with sysv. it has it's weaknesses, but at least it's simple. + +Posted by Lennart at Wed May 5 17:53:37 2010 +Luiz: I don't see how powertop could be of any use in this system. + +Michael: I am not sure such a swap logic really belongs in systemd. There already is an external daemon for this (http://sourceforge.net/projects/swapd/) and I am not convinced that there is reason enough to do that inside the init daemon. + +Joshua: yes, we had a closer look on most other init systems, see our comments about them in the text. + +John: I think that is a matter of taste. We think it it is nicer to use file name suffixes for this, as then an "ls" can give you a better overview about the units defined. + +Grahame: Keeping shell in the boot process just because it can be used for debugging doesn't strike me a good idea. Shell is not a debugging tool, it's a scripting language. We should provide proper debugging tools for the boot process instead. Example: the interactive boot systemd already provides (look for confirm_spawn= in the article above) is a very useful debugging tool since it allows you to single step through the entire boot process. + +Posted by Lennart at Wed May 5 22:59:47 2010 +PJ: If we move the startup logic that currently exists in the various init scripts into the init daemon or the service daemons themselves, then this will actually remove a lot of the fragility of the boot process completely. Hence I see little need to replace shell by any other language. And even with systemd it is still easy to hook some shell script into one the services being run, should you really need to (Just add ExecStartPre=/foo/bar/waldo.sh to the .service file and this script will be run before the main daemon. You can have as many of those scripts as you wish). So summarizing this: there will be less to debug if we have this in robust C code, we should provide actual init debugging tools instead of just a shell for the purpose of debugging (and we are already doing that), and finally, even with systemd you can still hook in a shell script should you feel the need to. + +Posted by Lennart at Wed May 5 23:12:41 2010 +nine: systems like Fedora's "readahead" already linearize the disk seeks during startup. That is a problem orthogonal (though certainly not unrelated) to systemd. + +Davide: automatic shut downs should only be done when the service itself thinks it is idle, and that is kinda hard to properly deduce from the outside. That said I tend to believe that we should not do work we don't really know is necessary. And that means that we don't do the work of shutting down something unless we have a really good reason for it. And that something is "idle" is usually not a good reason. In the end a correctly written daemon that is not being used should have a minimal impact on the system: it would be swapped out and sleep in a poll(), hence not influence the system measurably. So in summary: when doing stop-on-idle, then the daemons must do that themselves, and in many cases I'd not even bother. + +James: indeed SMF is not the only system that does proper dependency management. A few systems do that. However, neither Upstart nor sysvinit do, and that's why I mentioned this. + +Paul: yes, the contract stuff is very much like cgroups, however cgroups are in many way nicer, since you can name them in an fs and so on. (But I guess Solaris people disagree with that...) + +Robert: yes, we'd need to patch daemons. That is explicitly mentioned in the text (look for Writing Daemons) + +Anonymous: recompiling things whenever you change a bit of configuration is slow and cumbersome and requires a lot of dependencies installed. To build a vala program you need glibc, a lot of the gnome stack, gcc and more installed, something you certainly don't want to have around on a small system, just because you want to patch one configuration line. I mean, I like Vala (in fact systemd includes client tools written in Vala), but I don't think it has any place in an init systen, sorry. + +Posted by Lennart at Wed May 5 23:19:19 2010 +Peter: the plan for system suspend is to create a snapshot, activate the unit "suspend.target" which shuts down some services via "Conflicts" dependencies and then afterwards we activate the saved snapshot again. + +And hot-swap hardware should be handled like any other hardware being plugged in our pulled out: .device units are activated and deactivated for them. + +Eric: Well, I am sure that everything has some meaning in some language of this world. Also, reading the Wikipedia article I got the idea that the term wasn't negative at all? + +Colin: I'll keep that in mind for my next init system ;-) + +Anon: see the FAQ section: we welcome every distribution that is interested in this. + +Dieter_be: as pointed out above getting rid of shell scripts by no means means loss of debugging capabilities or that we make it impossible to hook in shell scripts when the admin wants to. + +Posted by Lennart at Wed May 5 23:25:57 2010 +Colin: we'll probably provide similar calls in systemd. + +anonymous: I am not convinced that Haskell would be good in the boot process. Also see my recent comments here that we don't need a replacement for the shell in the boot process. Having good debugging tools and most of the code in the daemons themselves or the init system is a much better choice. + +sztanpet: The same applies for Lua. + +Claes: thanks for the pointer, I'll keep that in mind and investigate that. + +Richard: bash is already slow enough, adding even more stuff into it won't make things any better. The whole approach of shell is just slow. Whatever you do, the focus of shell is always to defer operations to subprocesses spawned off frequently. And that's just wrong. Also see my notes above regarding replacements for shells. + +Posted by Lennart at Wed May 5 23:55:11 2010 +codebeard: very good ideas and I agree with most of them. A few comments: + +1), 2), 4), 5) are already covered by systemd. + +The idea regarding exposing kernel modules as units is interesting, I need to think about that a little more. The first thing that comes to my mind though is that I am a bit afraid of creating the illusion we'd know the same dependencies between modules that modprobe itself knows. I am also not really interested in duplicating that dependency tree in any way. But yepp, I need to think about this more. + +We have most of the debugging functionality in place already. There are logs, and we store away a lot of information what happened. We also offer a serialized, single-stepping, interactive boot, to track down issues. And you can hook your own shell scripts into services if you want to (see my comments above). + +Your ideas regarding that screen-like pty handling would probably mean that we'd have to implement our own virtual terminal (i.e. parsing of VT100 terminal sequences and such). I am not convinced I want to have that in an init system. I think interactive services like that are the wrong approach. + +What we however already support is that you can connect a service to an existing tty, such as a virtual console terminal. + +Regarding the Apache/MySQL issue: If people want to avoid that they should probably just add a dependency. i.e. instead of having apache.service just depend on mysql.socket, it could be changed to depend on mysql.service, if you understand what i mean. But they should do that only locally, of course. + +Regarding your suggestions to fix the kernels so that we don't have to patch the daemons: we actually investigated that in much detail, however this turns out to be really hairy to do. Because at the time of the socket() call in the daemon we don't know that a listening socket is already existing in systemd. We figure that out only at the time of the bind(), and that complicates things considerably, since we'd have two sockets existing by then which would have to become one, in all their properties, i.e. sockopts and suchlike. And that is far from easy. That said, this is certainly something we'd be happy to have in the kernel, even if we don't see that we ourselves will hack that up any time soon. + +Regarding your comments about command line parameters read from /etc/sysconfig: I think daemons that rely on cmdline configuration like that are broken, and should probably be fixed to have a proper configuration. That said should it turn out that many daemons work like that we could probably add something similar to what you suggest. We'll have to see. + +Posted by Lennart at Thu May 6 00:11:01 2010 +anonymous: see my other comments on lua/haskell/vala in the boot process. + +Richard: unfortunately they haven't yet... But I hope this too ;-) + +Adam: RH pays me primarily for PA, not systemd. + +Charles: the right place for that is the kernel. LD_PRELOAD hacks will always be just that ... hacks. + +David: Yes, screen is an interesting point. It probably would have to be patched to get its own session which is then treated seperately from the session it was created from. + +Peter: You need a newer kernel probably, that enables the "debug" cgroup controller. Building systemd is not easy probably. + +deitarion/SSokolow: I disagree with your assessment on PA, see my other recent blog post about that. + +Chris: this has nothing to do with a micro kernel. We just pull a few things together that have previously been done at various seperate places, such as init, the init scripts, inetd, mount(8) or even cron. + +Anonymous: well, you are welcome to continue using a systemd-less system if you are this conservative and think this approach is so wrong. + +alex: changing process properties like that from the outside at runtime is always racy. If something like this is desirable then it should probably be done in the kernel or in the daemons themselves. + +Posted by Lennart at Thu May 6 00:33:54 2010 +Nazo: well, I am pretty sure not many people would agree with your thoughts. + +Paul: as mentioned above I don't think that shutting down sshd in the case you describe really is advisable. We should minimize the work we do, and that includes not shutting down anything we don't have to shut down. A properly written daemon that is swapped out and otherwise just hangs in poll() is not measurable in the system otherwise, and certainly doesn't take away much RAM from other processes. (and sshd is a properly written daemon like this) + +And regarding your suggestions about delaying some daemon startups until the CPU is idle: that would basically mean that we'd add another CPU scheduler on top of the kernel scheduler, which I don't think is a wise idea. Hence: what you want to do we should do with the existing kernel CPU scheduler: by using nice levels and scheduling modes like SCHED_IDLE/SCHED_BATCH we can tell the kernel that some job should be delayed as long as there is something to do. It might make sense to utilize that to priorize things when we start things in parallel. We'll have to investigate that further. + +Tim: I know that at least for the mDNS case if we browse for a printer the replies should be available in less than a second. Also, I believe the gnome printing dialog has a live view on the printers found, right? If that is the case and all other browsing protocols are as quick as mDNS then it should be OK to start cups only when the printing dialog is opened: it might show no printer in the beginning, but after a second it should be populated fully. I'd argue that this user experience would be acceptable to the user, if he even would notice at all. + +Anonymous: hmm? udev uses a very simple protocol that is mostly text-based. Or did you mean "dbus" when you typed "udev"? Well, I don't buy into dbus hatred. D-Bus is just an IPC, it is much better using a well-known, well-analyzed, standardized and introspectable IPC like D-Bus then have each and every single service come up with its own homegrown IPC. Also, Upstart relies on D-Bus the same way systemd does. + +And systemd is called "systemd". We want to manage the system, that's why we called it that way. And setting the hostname and mounting file systems is a core part of the system, and hence we integrate it into systemd. + +I don't buy into Unix philosophy. Unix is broken. It might be one of the better system designs of all those existing, but that doesn't mean it wasn't broken too. We need to fix it and improve it where this is necessary. Strict Unix traditions or POSIX compliance hold us back, and are conservatism where progress is needed. Unix can inspire, but it is unsuitable as a dogma for system design 30 years after its inception. + +Posted by Anonymous at Thu May 6 03:19:04 2010 +Lennart: yes I meant d-bus and i don't use upstart. and yes, unix is broken, but the philosophy is right: make the tools simple and use plain text. whenever something adheres to this, it is a pleasure to work with. it is sometimes amazing how you can use these tools for things that no one has thought about, when they were created. and they still allow you to do something in emergency situations when everything else fails. and this is still true after 30 years and will still be true in the next 30 years to come. as soon as things like d-bus, or xml for that matter, come into play, it becomes a real PITA. I could give many example from my own experience, but then that post would become very long. + +Posted by Lennart at Thu May 6 03:33:28 2010 +Anonymous: Good for you that you don't use Upstart. However, all distros have now switched. All big distros now use D-Bus from the beginning of the boot process on. And introducing systemd does not change that fact in any way. + +Anyway, I don't believe in the Unix philosophy. Sorry for that. The discussion about Unix philosophy is mostly off-topic however and hence should not be continued here. + +Posted by Tim Waugh at Thu May 6 10:58:30 2010 +Re: CUPS + +What I'm trying to say is that there may not be a local client. cupsd is a network service as well as serving local clients, and so its socket may never be connected to. Network clients are other cupsd instances (which yes, systemd may start when the user sees the Print dialog), which will just wait to hear UDP browse packets from the cupsd running on the server. These packets are only sent once every minute or so. + +I really like the system, I am just struggling to see exactly how cupsd fits it and can benefit. + +Here is the system I'm worrying about: + +PrinterA } +PrinterB }- server (running cupsd) +PrinterC } + +cupsd on this machine has been configured to know about these three network printers, and has been told to advertised them on the local network. This is a common situation because network printers by themselves are not always easily or consistently discoverable across the whole group. Some may support mDNS, some may only support SNMP, etc. + +On this server machine, no-one ever logs in. When someone wants to print, they do so on their own client machine: + +clientA } +clientB }- server +clientC } + +All of the machines above are running cupsd. The client cupsd instances discover the queues advertised by the server cupsd instance by listening out for UDP browse packets, which it sends periodically, about once a minute. (Yes, ideally this would be mDNS, but right now it isn't.) + +So now imagine they all switch to using systemd, with no other changes. Someone on clientA is looking at the File->Print dialog, meaning GTK+ has just connected to the local cups UNIX domain socket and started the client cupsd instance. That will sit there waiting to hear about any network CUPS queues that are being advertised. But nothing will start the cupsd instance on the server. CUPS queue discovery is passive. + +Even if the user in charge configured systemd to always start cupsd on the server (can that be done?), the clients will still have to wait up to a minute the first time they ever use the print dialog. + +Of course, CUPS caches information about network CUPS queues so that it doesn't have to wait at all after starting if has it seen those UDP browse packets before, so subsequent File->Print dialogs won't see the same delay. + +So it comes down to: + +1. Can systemd be configured to always start a particular service for which it cannot know whether there are clients, such as cupsd when used in this way? + +2. Even better, can it be configured to automatically discover whether a particular service needs to be "force started" like this? For example, I can imagine a small program to read the CUPS configuration file and see if it is configured this way, and tell systemd to act accordingly. + +3. As things currently stand, there will be up to a minute's delay on each client the first time they use the Print dialog. This will only be gone once CUPS switches over to using mDNS as its primary discovery/advertisement mechanism (which is planned). + +Posted by codebeard at Thu May 6 16:37:27 2010 +@ Lennart + +Thanks for taking the time to reply to everyone! It looks like the correspondence generated by your blog post has been considerable. + +Regarding patching the kernel to copy the socket on bind(), you say that it is really hairy to do all the copying and stuff, but perhaps I am missing something. Correct me if I'm wrong, but doesn't the kernel have this functionality already, in the form of dup2()? + +Here's a small test: +parent.c +#include +#include +#include +#include + +int main(void) { + int sock; + struct sockaddr_un addr = {AF_UNIX, "./socket"}; + unlink("./socket"); + sock = socket(AF_UNIX, SOCK_STREAM, 0); + bind(sock, (struct sockaddr *) &addr, sizeof(addr)); + /* pretend that we just did something like: + * fcntl(sock, F_SETINHERIT, 1); + */ + listen(sock, 10); + execl("./child", "child", NULL); +} + +child.c +#include +#include +#include +#include + +int main(void) { + int sock; + struct sockaddr_un addr; + int i = sizeof(addr); + sleep(10); /* simluate startup time */ + sock = socket(AF_UNIX, SOCK_STREAM, 0); + /* pretend that we just did: + * bind(sock, &addr, sizeof(addr)); + * and that the kernel checked the addr structure for + * matches with F_SETINHERIT fds, then basically just + * returned the following ( but with F_SETINHERIT off, + * so that future calls to bind() will know they don't + * have to search for things ): + */ + dup2(sock-1, sock); + listen(sock, 20); + accept(sock, (struct sockaddr *) &addr, &i); +} + +When I compiled and tested the above code with a sample client, it worked perfectly. dup2() must already do all the necessary locking and stuff that needs to be done to copy the file descriptor, so it's really easy. The only side-effect is that we waste a file descriptor (two fds will end up referring to the same socket), but that's really a minor issue and could probably be fixed if anyone cared. + +Posted by antrik at Thu May 6 19:15:32 2010 +There are some good ideas in here, and it's definitely a step in the right direction. (Especially compared to upstart.) The most important ones in particular: + +- Creating sockets before launching the daemons: sounds like a very nice and useful idea :-) + +- Starting services on demand: very important property missing from most other init systems. Very much like passive translators in the Hurd :-) + +- Using cgroups for managing resources allocations etc. in a hierachical manner: again, a very good approach -- very similar to the one proposed by Neal Walfield (of Hurd fame) in his Viengoos papers, see http://walfield.org + +- The observation that init and session management are closely related is good too -- though it's only mentioned as an afterthought... I believe the whole init system should be built on the idea that it's really just a case of hierarchical session management. + +On the negative side, there is a major contradiction between "we don't want portability" and "we'd like all major distributions to adapt it": Debian also has Hurd and FreeBSD ports -- so without portability, it's pretty much out of the question there... + +I'm not saying it's bad to use system-specific features, if they really help -- on the contrary, I believe this should be done more often. (The Hurd's unique features for example are pointless, if nobody ever uses them...) However, I don't see why you wouldn't want to accept alternative implementations of various functions upstream. + +Of course other systems ideally would use solutions tailored to their specific functionality (I already mentioned passive translators in the Hurd) -- but often the resources for reinventing everything are simply not there; and thus adapting existing solutions can be important. (Also, reducing transition costs.) + +The ability to mix and match various components is one of the major strenghts of the free software world IMHO: it faciliates competition and innovation. It is what allows the best solutions in any particular area to rise to the top. Tieing solutions to particular environments prevents this. + +There are some other good ideas and caveats, which I will skip here, as it would be too much. (I should blog about this, but I don't think I'll get around to it any time soon... :-( ) + +The real showstopper however is, "I don't buy into Unix philosophy." Ouch. Just ouch. + +(Well, obviously the showstopper not being the fact that you mentioned it in a followup comment -- but rather the fact that it shows in various places in the article, and your comment confirming that this is by design...) + +It's a pity to see something implementing so many good ideas, disqualified in such a manner :-( + +Posted by Lennart at Thu May 6 21:02:29 2010 +Tim: yes, for cases like that it is possible to start CUPS regardless whether a local client or local hardware actually are around. It's a matter of simply adding a symlink to the .service file to some directory (instead of or in addition to the symlink to the .socket file). We probably should decide later on whether CUPS really is a candidate for on-demand loading like originally pointed out, or whether we can leave it to the user to fix the link, or whether we can teach CUPS itself to create that symlink. + +Posted by Lennart at Thu May 6 21:09:17 2010 +Codebeard: Well, after the bind() we'd have to return to the application a socket that is the merged version of both our systemd socket AND the socket the daemon created itself. We need to have the queued connections from the systemd socket, but all the various sockopts/fd flags/SIGIO handling/yadda yadda and so on that might have been set between the socket() and the bind() in the daemon itself. That basically means we need some non-trivial code in the kernel that can merge the fd and copy all settings over; it's more than just a simple dup(). I do believe that having something like this in the kernel would be great, but it's nothing we can hack in a couple of hours, unfortunately. + +Posted by Lennart at Thu May 6 21:20:47 2010 +antrik: if some distros care about portability to non-Linux systems then they can deal with the problems that creates, I see no reason to make that my problem. If we cannot make use of the unique features Linux provides we cannot do much what we are doing now in systemd. One example: cgroups is at the heart of what we do. If we want to provide compatibility with other systems we would not be able to use cgroups. And that would be a big loss. Also, if you try to keep compatibility with other systems, you need to abstract the system-specific behaviour. And that adds code you need to maintain. And before you can add support for some OS specific feature you always have to abstract it. It costs a lot of time. One can certainly do that for normal applications easily, since they use only very few OS-dependent functionality. However, that is different for something as low-level and fundamental as the init system. + +And I guess we have to agree to disagree on our belief in the holy grail that is Unix philosophy. If you reject everything coming from folks who didn't drink the Unix cool-aid, then I guess I am sorry for you. + +Posted by Claes at Thu May 6 21:55:43 2010 +Regarding iCalendar semantics as I mentioned above, I think not so much of the file format and the various mostly human based "event types" it discusses. I think of the way it defines scheduling in time, especially recurrence. + +If systemd "understood" recurrence the same way as calendar apps do, it would theoretically be possible to plan, schedule and visualize events with existing calendaring applications. + +cron applies a different system for recurrence and I can't say which is better or worse, but recurrence rules can be confusing and difficult to define. There are probably more tools that uses iCalendar principles regarding this. A good design could implement both. + +Posted by Walther at Fri May 7 10:45:16 2010 +You started out talking about socket/dbus-activation a lot but later you talk a lot about explicit dependencies in configuration files. Do all dependencies have to be defined explicitly? Or is the intent to use mainly socket/dbus-activation and config files for the rest? + +It would be really cool if systemd would detect dependencies on the first boot and would use them to start services in parallel before they are needed on the consecutive boots. (Maybe this is exactly what you are doing but I didn't get that :) + +For instance: systemd starts gdm. gdm starts X through socket activation. After X has started, gdm starts LDAP through socket-activation. Which means that LDAP is started after X has completed (which is not optimal). systemd logs the activations and so on the next boot systemd starts gdm, X and LDAP in parallel before they are activated. + +Posted by Lennart at Fri May 7 20:21:37 2010 +Walther: yes we thought about something like that, and would be relatively easy to do that. We'll play around with that and add it if it really turns out to have a positive effect on boot time. + +Posted by Luca Bruno at Sun May 9 12:23:29 2010 +Not writing scripts in vala (would be overkill with a compiler), but what about systemd itself in Vala? It's not as you said needing lot of gnome stack, you can use it with Posix profile (i.e. no glib). +It has a great support for dbus servers, except you need dbus-glib there. +Btw good work. + +Posted by Lennart at Sun May 9 14:53:40 2010 +Luca: Vala is not OOM-safe (because GLib isn't). However the init daemon is one of the few pieces of userspace code that should be able to deal with OOM. + +Posted by Luca Bruno at Sun May 9 23:26:35 2010 +@Lennart: as I said, you can use Vala without glib + +Posted by Luca Bruno at Sun May 9 23:32:51 2010 +@Lennart also, now that I remember, in Glib you can change the vtable of memory setting your own allocation functions, including malloc: http://library.gnome.org/devel/glib/unstable/glib-Memory-Allocation.html#GMemVTable + +Posted by Lennart at Mon May 10 00:00:35 2010 +Luca: No, the code Vala generates uses GLib and GObject heavily. In fact, the Vala object model is the GObject object model. Vala is unable to generate code without GLib and there is really no reason for supporting Glib-less binaries for them. + +GLib code assumes that malloc() aborts on OOM. You cannot just sneak in a non-aborting malloc() and assume all the right OOM code paths magically appear, because they don't. + +Posted by Luca Bruno at Mon May 10 00:13:21 2010 +Lennart, Vala 0.8.1 (and since many other releases before that), is able to emit code without using glib with --profile posix. + +What does it mean that glib code assumes that malloc() aborts? Glib code uses g_malloc, which calls a vtable.malloc() and does no assumptions on that. So if you create a malloc() function that does not abort, yes, it works. + +Posted by alteclanding at Mon May 10 16:24:07 2010 +Why do people in the open source world keep reinventing the wheel is something I'd never understand. There's fefe's minit, it works great and I have absolutely no idea why no one uses it. + +Posted by Lennart at Tue May 11 01:05:19 2010 +alteclanding: Why do commenters in the open source world keep posting comments even though they obviously haven't read the story or even understood it is something I'd never understand. There's alteclanding's comments, they are nonsense and I have absolutely no idea why he's posting them nonetheless. + +Luca: interesting, didn't know that. What object model are they using when compiling without gobject? + +Simply making malloc() non-abortive doesn't change the fact that nothing that internally calls g_malloc() in glib actually checks for it to return NULL. An example: http://git.gnome.org/browse/glib/tree/glib/glist.c#n283 -- That's one of the most basic data structure operations in GLib, and what you can see there is that memory is allocated and that is assumed to succeed. Right after allocating the data structure is accessed. Would a malloc() implementation return NULL sometimes there this access would immediately cause segfault. And that is why glib is inherently not OOM safe, and it is completely +irrelevant what allocator you plug in there: the OOM handling codepaths are simply not existing. And that is actually a good thing, as I have pointed out here: http://0pointer.de/blog/projects/on-oom.html + +Posted by Luca Bruno at Tue May 11 10:50:57 2010 +Lennart: it simply create structs without using gobject, of course there's no reference counting... it will free an owned object as soon as it's not used. Methods and variables are glibish, i.e. my_struct_method (MyStruct* s); +It doesn't support inheritance. + +For the OOM thing I've got what you mean, I thought you could have done recovery inside the custom malloc itself, but still abort if recovery fails. Clearly it's not your case reading the code. + +Posted by Tobu at Mon May 17 15:42:17 2010 +Here is some interesting feedback: + +http://etbe.coker.com.au/2010/05/16/systemd-init/ +http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=580814 +http://groups.google.com/a/chromium.org/group/chromium-os-dev/browse_thread/thread/d146c73e42fc0e7b + +Posted by Martin Sivak at Wed May 26 12:41:56 2010 +Sorry Lennard but I can't disagree more with your stance towards unix philosophy. + +Having multiple tools (bash is ineffective, but that doesn't mean we have to merge half of the base system into one daemon) where each does only one task and does it right has lots of benefits. + +I like the reporting and control stuff in the idea of systemd when it comes to replacing init. + +But I will stop right there. Having "xinetd like" configuration of system stuff.. why not. Pid files are piece of crap I agree with that. But why on earth are you trying to replace autofs, cron, mounting, xinetd? + +Especially when you are reimplementing the "verified" functionality inetd and xinetd have had for ages now.. (the same applies for cron, autofs, ...) + +What we should do is to improve and enhance these tools not write one new monolitic piece of code, which will be hard to maintain, hard to review hard to verify and hard to analyze from security stand point. + +I would agree that starting hundreds of shell scripts is not perfect, but your solution is the opposite extreme. Starting couple of main daemons instead of the shell scripts won't affect the performance and it will still conform to the unix philosophy. + +What is wrong on the following process structure? + +init process (starting the main daemon set, taking care of respawning them and setting the inital environment) +|- improved xinetd for daemons +|- autofs daemon for automounting +|- cron enhanced of proposed reporting +|- udevd taking care of module loading and device dependent spawning + +They can all even use some kind of common library to simplify the common tasks... if you want extreme solution, improve xinetd with dependency stuff and make it pid1 process. + +You know, there are some of us who use linux on server machines. And we want to be sure that the machine is secure and that we can disable any particular piece.. kind of hard to do when we suddenly have only one piece which does everything. Especially when you find a security bug in that one monolitic piece of code.. + +Posted by Lennart at Mon May 31 21:35:28 2010 +Martin: I am not trying to replace the existing automount daemon. It does a lot of stuff that systemd doesn't do and will never do (i.e. read automount maps from NIS or LDAP!). + +And I tried to explain why I want some automount/mount/inetd functionality in systemd. If you cannot see that, then please read the blog story again. + +And arguing against reimplementation of "verified" functionality means you eventually come to a complete standstill of development. + +Also, I think you are overestimating the complexity of inetd and cron a little. + +Also, by no means we want to get rid of udevd. Upstart has weird plans like that. Not us. + +Posted by Eero Tamminen at Tue Jun 1 21:20:28 2010 +@sjansen Even if the IO side would be handled by the kernel, one still has the problem that the processes generally spend quite a lot of CPU at their startup instead of idling and this means causes kernel to do a lot of scheduling which adds overhead. + +I think it would be better to interleave the startup a bit instead of starting potentially hundred(s) of processes at once. (after profiling the impact of course, preferably also on a single core netbook system) + + +cgroups: I have some doubts about putting every started process into a separate cgroup group. That's fine as default, but it should be possible to put multiple processes to a same group so that their resources are handled as a unit. Otherwise one can more easily run into issues on resource restricted systems due to resource waste when one has set e.g. separate memory usage limits on the groups. + + +setsid: If one gets rid of setsid(), how one can then make sure that started processes can safely kill their whole process group (to get rid of all started children, engine etc processes) without killing the parent (like systemd...)? + +Posted by Eero Tamminen at Tue Jun 1 21:22:08 2010 +D-BUS: The reasons why I personally "hate" dbus isn't its API, but dbus daemon implementation and usage. Programs do all kinds of idiotic things through it; sending data on the whole session bus instead of just control information, send data in XML, subscribe to too many device status messages so that you get client "herd" wakeups etc. + +And the daemon implementation is pretty awful. D-BUS buffers messages without a limit instead of blocking message spammers until the sent messages are consumed. What makes it worse is that D-BUS memory handling is at the same time incredibly inefficient at releasing the memory it has allocated (it fragments it) and too complicated to make much sense of it from Valgrind reports. + +Posted by Lennart at Tue Jun 1 22:26:54 2010 +Eero: you are overestimating the price of switching tasks today... + +regarding cgroups: we now put each service into its own cgroup in a private systemd-specific hierarchy (/cgroup/systemd). With a very simple config option you can optionally add the process to arbitrary other groups in other hierarchies. So what you ask for is already covered. + +regarding setsid: the point of what i wrote is that systemd calls setsid() for you anyway, so you don't have to anymore, and your call will fail with EPERM if you do call it nonetheless. + +And uh, your dbus accusations are bogus. + +Posted by Joe Nall at Wed Jun 2 22:11:42 2010 +What is the plan for managing socket and process selinux contexts? + +Posted by Lennart at Wed Jun 2 22:45:26 2010 +Joe, systemd is not the first daemon managing sockets and processes. Which means we'll do it the same way as it has done previously for xinetd and other babysitters... + +Posted by Anon at Thu Jun 3 21:56:44 2010 +OK, I've read bits and pieces from all over the place about systemd so I apologise if you've answered these questions over and over. + +1. systemd replaces the need for portreserve simply by design in a rather more robust fashion. +2. systemd can support dependencies but where possible dependencies should be avoided. +3. systemd has a (sysv?) mode where it starts all jobs at sysv levels? This can be used on servers or very conservative environments. It is presumably not possible to mix "implicit" mode with sysv mode? +4. Virtual dependencies are to be strenuously avoided. There will not be support on waiting on ntpdate/forced system time (this is considered to be a non-problem). There will not be support for waiting on all normal jobs finished/GUI idle after boot/start cupsd now. +5. udev events can be turned into dependencies. bluetoothd depends on the kernel having sent a bluetooth udev event at somepoint in the past? What about when the dongle is removed? +6. The "screen killed on GUI logout" is an unrealistic problem or will be manually solved by modifying screen? +7. Circular dependencies (A waits on B waits on A) are non-problem or would be a problem anyway. + +I'm curious as to when things like Xorg are started - do things like gdm enough sockets so it basically handled implicitly? + +Posted by Lennart at Fri Jun 4 02:21:29 2010 +Anon: 1, 2, 7 are not really questions but yes, you are right on those. + +Regarding 2: for normal daemons dependencies are not really necessary. For stuff involved in early boot or late shutdown they are more likely to be needed though. The result of that is that OS vendors are probably the only ones having to deal with deps in the systemd scheme, and packagers and 3rd party vendors won't. + +On 3: There is no separate mode for SysV scripts. We simply consider the SysV dirs an additional configuration source. You can mix SysV services with native services as you wish, and distros are expected to do just that during their transition period from sysvinit/upstart to systemd. + +On 4: you can use dependencies if you want. We don't suggest you to use them for normal services though. But there's nothing that would stop you from ignoring us. + +On 5: systemd won't ever shoot down daemons due to idleness, simply because it is very hard to figure out what "idleness" means from the outside of a daemon. We also believe that we should minimize the work done, and hence think that a correctly written daemon that is nominally running but effectively just swapped out and hanging in a poll() is nicer then constantly stopping and restarting services. + +On 6: It's an option under the control of the administrator, whether he wants to allow stuff like screen to work, or not. In a university workstation environment he might choose to kill all the user's processes if the user logs out. On private systems he might want to allow that. We support both schemes, and leave it to the admin to choose. The default will be to allow screen however. + +Posted by Lennart at Fri Jun 4 02:22:46 2010 +Anon: X11 is actually difficult, since it's port numbers are dynamic. That sad MacOS actually starts its X server as soon as a connection is done on port 6000. We could do the same scheme. + +Posted by Will at Sat Jun 26 20:03:20 2010 +This looks like it would obviate the need for in-house proprietary unix job management daemons like AOL's venerable "samon". Also, I like the idea of having a uniform method for stopping and restarting services. PID 1 is the perfect place to put this effort. Thank you. + +Posted by pada at Sun Jun 27 03:59:30 2010 +In order to calculate the dependencies of kernel modules, I'd suggest to make use of modprobe's intelligence by executing +modprobe --list +modprobe --show-depends +and use the output as an additional configuration source, as systemd already does with LSB headers from init scripts. + +That way, systemd won't need to know about any modprobe configuration files, but will be able to figure out the right moment to load a kernel module and whether a module needs to be loaded at all. + +One problem I see here is the time required to execute modprobe. Module dependency information should be cached and not determined on every single boot, but only on "depmod -a" events. + +A different approach would be to use /lib/modules/`uname -r`/modules.* directly as an additional configuration source, but then systemd would be required to parse these files. Is there some standard for the syntax of these files? + +Posted by Mark J at Wed Aug 4 08:07:42 2010 +The majority of the details of this are a few college courses over my head. But listening to your explanation of it on the Linux Outlaws podcast it was fairly easy to understand and generally sounded like an awesome idea. So I just wanted to applaud your hard work! + +Posted by bochecha at Mon Aug 23 11:20:20 2010 +Thanks, this serie of article will no doubt be very interesting. :) + +About this one, I don't really get the LOAD, ACTIVE and SUB columns. + +As I understood it, the first one indicates whether a unit configuration was loaded or not into systemd. But if it wasn't loaded, then it would not appear in the output of systemctl, right? + +You say that ACTIVE is a high-level generalization of SUB. In this case, why is that necessary? Isn't SUB already enough information? + +Maybe if you could give the list of the possible values for each columns then that would help me understand the differences. :) + +Or maybe just point to the appropriate documentation if that is all already documented somewhere, I must admit I haven't had the time yet to look at Systemd as closely as I wanted. + +Posted by Lennart at Mon Aug 23 11:35:34 2010 +bochecha: well, there are many reasons why a service might show up as failed to load in the systemctl output: for example, it was referenced as required dependency of another service, but we couldn't find neither a native service definition file nor a SysV init script for it. Or, there was a parsing failure while reading it. Or, because the file was incomplete. And that might even happen while a service is active, for example, because the user requested a configuration file reload from systemd after changing a service file, and a service that is already running suddenly has an invalid configuration file. That effectively means that the LOAD and the ACTIVE state are mostly orthogonal: you may have a running service where configuration loaded fine, you may have a stopped service where it loaded fine, but you may also have a running service where configuration failed to load. + +And yes, ACTIVE and SUB show you the same information, though ACTIVE in a more generalized form. While SUB has states that are specific to each unit type (e.g. "running", "exited", "dead" for services; "plugged" and "dead" for devices; or "mounted" and "dead" for mount points), ACTIVE exposes the same high-level states for all units. + +We only distuingish 6 ACTIVE states (to list them: active, reloading, inactive, maintenance, activating, deactivating), which are mapped from the lower-level states, which might be many more. For example services have 15 low-level states: dead, start-pre, start, start-post, running, exited, reload, stop, stop-sigterm, stop-sigkill, stop-post, final-sigterm, final-sigkill, maintenance, auto-restart. + +Posted by John Drinkwater at Mon Aug 23 12:23:36 2010 +Why ‘systemctl status ntpd.service’ and not ‘systemctl status ntpd’? +Why does systemctl display names like ‘getty@tty2.service’ and not as ‘getty@tty2’ ? + +Do we really need to have .mount, .service, etc on all our config files now? +IMO, horrible to have file extensions, equally to have them as long as the file name. + +Posted by Lennart at Mon Aug 23 13:36:52 2010 +John, we support different kinds of units. We manage sockets, mount points, services, devices, automount points, timers, paths, targets, swap files/devices and snapshots with the same tools, with the same commands. For example "dbus.service" and "dbus.socket" are both used by the D-Bus system, but can be controlled and introspected independently. To distuingish them, we hence write their full name everywhere, so that you explicitly state that you mean the D-Bus socket instead of the D-Bus service, or vice versa. + +Also, I actually find this one of the pretty things in this design: the unit names are actually identical to the file names they are configured in. + +Posted by Shane Falco at Mon Aug 23 14:19:27 2010 +I'm with Mr. Drinkwater on this. Extensions (especially long extensions) are one symptom of a bad design. All this feels very rushed and hacked together. + +It looks like this core systemctl function won't display cleanly in a standard 80 character wide terminal? Are we trying to change linux so much that we no longer care about those sorts of things? It may be different for gnome developers, but unix admins I know have lots of windows open and usually they're 80 characters wide. + +Finally, why choose a name so close to another common utility? systemctl? Seriously? When another core system utility called sysctl already exists? + +Posted by Lennart at Mon Aug 23 14:26:44 2010 +Shane, I am sorry but I guess we just have to agree to disagree to this. The points you raise are in the category "matter of taste" or even "bike shedding", and so I guess we should leave it as that. + +systemctl shortens the output dependening the terminal size. If you use a tiny terminal, the description string might even be suppressed entirely. The bigger your terminal/screen is, the more output we can stick on it. That should not surprise anybody. Or to put it in other words: we support 80ch terminals just fine, but if you use bigger termiansl we'll make use of it. + +Posted by Shane Falco at Mon Aug 23 14:49:26 2010 +Sounds reasonable and I appreciate the response. It looks like you are taking your own personal experience (which is all anyone can ask) and creating something that you think is appropriate. But I fear that you don't really see the bigger picture of unix admins out there...there are a lot of guys I work with who are junior/middle guys who just work for a paycheck. They're not linux geeks. I dare say they're the majority. They could be doing AIX or Solaris or linux for all they care. I think they're going to have trouble with systemd. It just does too much and it's too baroque. Too confusing. + +I finally, finally got them going with services/chkconfig and now this... + +Posted by Michael at Mon Aug 23 15:00:08 2010 +Just a quick question, can the description be translated ? +I assume that this is not planned, as they are config file, not software, but as we are able to translate .desktop, it would be great to have some way of doing it cleanly. + +Posted by Patryk "patrys" Zawadzki at Mon Aug 23 15:07:40 2010 +Any idea on when the systemd dependencies get released? Currently it requires unreleased stuff such as dbus-1.3.2. + +Posted by Lennart at Mon Aug 23 15:10:54 2010 +Shane, well, what makes you think that we haven't looked around ourselves? Also, we managed to get systemd accepted by Fedora, in particular FESCO. We managed to convince this technical committee that systemd is a good thing. Do you really want to say that Fedora as a whole is incapable of "seeing the big picture", but you are the only one who is? Maybe things are the other way round? Ever thought about that? + +Also, note that systemd actually brings Linux administration much closer to how many of these things are done on Solaris. Much of what we added is inspired by SMF, and other init systems. That means the administrators should enjoy how we make things on Linux work much more like the other big server operating systems. + +Posted by Lennart at Mon Aug 23 15:13:46 2010 +Michael: it currently isn't translated, but the plan is to copy very closely the mechanism how .desktop files are translated (our unit definition files also use an .ini inspired format), so that we can reuse existing tools for this. This hasn't been implemented yet however. + +Posted by Lennart at Mon Aug 23 15:20:51 2010 +Patryk: I plan to roll D-Bus 1.4.0 by the end of this week. However I also plan to add a dependency on very new kernels to systemd, to make sure we can move the cgroup fs mount point to /sys. This means you have to either run an unreleased kernel or backport one patch to your older kernels, as we did in Fedora. So, basically by the end of this week the dependency on one unreleased package will go away, but we'll add another one instead. Sorry for that, but I don't think it would be wise to support the old cgroupfs mount point for longer, to make sure users don't get confused by that unnecessarily. + +Posted by Paul Wise at Mon Aug 23 15:40:41 2010 +Its a shame you missed the LCA2011 CFP deadline, I would have liked to attend a talk on systemd: + +http://lca2011.linux.org.au/ + +Perhaps the organisers would consider a late submission. + +Posted by lirqa at Mon Aug 23 15:51:43 2010 +How fast will it be? How fast is the boot on your system? + +Posted by Simon at Mon Aug 23 16:07:24 2010 +Shane Falco, you are being dishonest. + +Your concern is that this change would require you to learn new things and have to teach new things. + +The way you should rephrase your questions is: + +“Sorry for being off-topic; I am posting this on the For Admins post while my concern is really about "Does systemd offer so many nice things that justifies the change?". I would like to see the question answered: "What are the advantages of systemd that justify this big change? I did not search your previous posts on this subjest."” + +Posted by Michal at Mon Aug 23 16:18:50 2010 +"systemd has been accepted as Feature for Fedora 14" + +Probably will also be in the new Ununtu 11.04 ;) + +Thanks for your work! + +Posted by Diego at Mon Aug 23 16:21:50 2010 +What about gettext support? + +Posted by Lennart at Mon Aug 23 16:42:09 2010 +Diego: it's unlikely we'll use the gettext APIs inside of PID 1, simply because i18n data tends to be stored in /usr, and we try to avoid accesses to that, since some folks still have that one a seperate partition (even though it is crazy and misses the point). However, for the client tools this is differentely and w'll certainly reuse the framworks currently used by other projects, be it gettext or intltool, or the hacks to make .desktop files translatable. + +Posted by Lennart at Mon Aug 23 16:47:53 2010 +Paul, I actually submitted something to LCA, but speaking from experience I won't get funding for the flight. But at least I will be able to say "I have tried"... + +Posted by Lennart at Mon Aug 23 16:50:32 2010 +lirqa: see my comments regarding "speed" on http://lwn.net/Articles/401441/. + +Posted by Lennart at Mon Aug 23 16:51:48 2010 +Michal, it is unlikely that Ubuntu will acknowledge that systemd is the future and Upstart is not any time soon. Note that Upstart is a Canonical-funded project. + +Posted by Michal at Mon Aug 23 17:21:50 2010 +Lennart, Upstart was announced four years ago. Even main developer isn't satisfied with v0.6. I don't see any progress in their repo. I would not be surprised if they in the next year just switched to systemd. Canonical doesn't have enough people to develop something else than a new gnome desktop theme. + +Posted by Matthew Jones at Mon Aug 23 17:37:33 2010 +Lennart, I just watched the Debconf video about Debian looking to adopt Upstart. + +The main issue that was stated for Debian not adopting Systemd, was their BSD kernel support. Will Systemd work with the BSD kernel? How backwards compatible is it for other Unix-like systems that are stuck with init.d scripts? + +Posted by Lennart at Mon Aug 23 17:42:45 2010 +Michal, after having talked to Keybuk a couple of times in the last months and acknowledging the fact he very recently still did talks on Upstart at Debconf and LinuxCon I fear that's wishful thinking, even if I too hope I am wrong on hat. + +Posted by Lennart at Mon Aug 23 17:46:50 2010 +Matthew, systemd is Linux-only. We have no plans to support niche kernels. That'd would severely limit our technical options and hold Linux back unnecessarily. If Debian cares about those kernels, it's on them to provide support for it. Note however, that Upstart doesn't work on those other kernels either and similar to us has little interest in supporting it. Note that nothing stops Debian to ship systemd on Linux by default and provide SysV compatibility scripts for the other OSes. + +Posted by Omer Akram at Mon Aug 23 17:53:30 2010 +Its my personal thinking but Upstart-1.0 is coming so tighten your seat belts. + +Posted by Michal at Mon Aug 23 17:57:03 2010 +Lennart, Wait until the Canonical bosses will read the sites with positive reviews of new Fedora/SuSe/etc versions. Phoronix probably soon begin to do some benchmarks. + +When they see that people see systemd as a breath of fresh air and the upstart as a failure to meet promises - they will throw it away. + +SJR can write his code for Debian for free ;) + +Posted by Michal at Mon Aug 23 17:58:29 2010 +Omer Akram, "Its my personal thinking but Upstart-1.0 is coming so tighten your seat belts.". + +Where? + +I don't see anything here +http://bazaar.launchpad.net/~scott/upstart/trunk/changes + +Posted by Simon at Mon Aug 23 18:20:36 2010 +Michal, could you please stop the trolling re: upstart? + +Posted by Diego at Mon Aug 23 18:23:55 2010 +Ouch...however, doesn't this help in some way? http://www.gnu.org/software/gettext/manual/gettext.html#Locating-Catalogs + +Posted by Lennart at Mon Aug 23 18:30:11 2010 +Diego, well I am pretty sure people would hate me if i'd start moving i18n data to /lib... + +Posted by Omer Akram at Mon Aug 23 18:30:37 2010 +>I don't see anything here +>http://bazaar.launchpad.net/~scott/upstart/trunk/changes + +thats for a surprise + +Posted by oiaohm at Mon Aug 23 18:38:48 2010 +I think you over looked something in the PAM module/possible future feature. + +Session disconnects and reconnects support. This would be a great step forwards particularly if text based vt can be moved to X11 terminals and reverse. + +Also a great feature for X11 servers in future. + +Question currently I read systemd as starting system wide services. Could it not be extended in future to also start and manage per user services like pulseaudio and jackaudio? + +So spiting these services away from normal user processes and making it simpler for users to restart them and 100 percent clean them up in failure. Service is a Service no matter where it running would be a good policy. Also allow sandboxing of these services in a far more controlled way. cgroups do process tracking to sandbox very well. + +I guess systemd is fairly moduler. Could hooks be added for smack LSM as well as SElinux? Those are the two mainline LSM's that used labels. Rest of the LSM's don't. So really for full support of a Mainline kernel a user might load up supporting both is required. + +I hope one day to see systemd with GTK and QT front ends. Start of serous real graphical management distribution independent. + +Posted by Diego at Mon Aug 23 19:02:27 2010 +Why would Ubuntu switch so suddenly? Remember that systemd hasn't been deployed in any mainstream distro. They'll probably do it in the future, but...right now? Why would they even interested? + +As for Debian...well...it's not like the rest of the Linux world is going to wait for them. If they want to continue pushing for GNU/kfreeBSD while ubuntu dominates the linux desktop and centos the free server market share, that's fine for them. + +Posted by Nagilum at Mon Aug 23 20:45:09 2010 +If ntpd.service would have emitted some error message while starting up, how would I display that using systemd? + +Posted by Lennart at Mon Aug 23 20:49:05 2010 +Nagilum: by checking the logs. The long term plan is to hook up "systemctl status" to the logs, so that you'll see the most recent log messages generated by a service next to the service. But until that happened we need to beef up syslog considerable, i.e. make it indexable and stuff like that. + +Posted by Rainer Weikusat at Mon Aug 23 21:35:14 2010 +The reason to separate /usr from / is that it +contains architecture dependent, shareable data. +And that's still relevant today because of +the possibility to have 'Linux containers' which +share everything shareable with the host +installation they run on. Of course, this also +needs the ability to easily customize system +startup, say, by deleting scripts which are not +needed for a container instance (root-fs of that +having started out and remove parts of existing +scripts which serve no purpose in a container +instance. + +And no, I'm not 'crazy' because I happen to have +some experience with the servers I operate you +are quite obviously lacking. + +Posted by Lennart at Mon Aug 23 21:38:11 2010 +Rainer, I am sorry. But you are completely misunderstanding the /usr vs. / split. Also note that most commercial Unixes already got rid of the distinction and symlink one to the other. Please read up on things before calling me a noob. Thanks. + +Posted by Simon at Mon Aug 23 23:06:05 2010 +How does pam_systemd relate to ConsoleKit? There seems to be some overlap with regard to maintaining info about current user sessions... + +Posted by Lennart at Mon Aug 23 23:11:36 2010 +Simon, yes, there's a non-trivial amount of duplication between CK and systemd. Note that Jon passed on half of the maintainership of CK to me and there's something like a consensus of the people involved to fully merge CK (or something equivalent) into systemd, in the long run at least. + +Posted by Rahul Sundaram at Mon Aug 23 23:11:54 2010 +Simon, + +My understanding is that ConsoleKit will be obsoleted by Systemd in the near future. Lennart is a maintainer of ConsoleKit as well for the time being. Other distros not using systemd can continue to use ConsoleKit I guess + +Posted by William Lovaton at Mon Aug 23 23:30:46 2010 +I'm really impressed Lennart!. Congratulations for your hard work, I can't wait for Fedora 15. + +Thanks. + +Posted by Claes at Tue Aug 24 00:23:52 2010 +I am excited to see so much progress. I don't have much to bring to the table, a few reflections only about the terminology. + +Having a kind of status called ACTIVE, and one of its states called active as well feels weird. And to see a string like "Active: maintenance" feels confusing. Likewise would "Active: active". I think something like "Status: failed" would communicate the situation better. + +Posted by Lennart at Tue Aug 24 00:42:21 2010 +Claes, well, status is too generic, because we have the high-level and the low-level state, which we need to distuingish somehow in the interface. Onbe we called "active" state, the other "sub" state. + +Also note that the word "status" (in contrast to state) is already used in the output of the exit status of the program. + +Posted by Denice at Tue Aug 24 00:43:45 2010 +I'm a little worried that anyone thinks Solaris' SMF is something worthy of copying. I find it horribly over-engineered. These days it is common to run virtual servers which do really only one thing (web server, or a mysql slave, or an ldap server). I have a number of xen guests that list perhaps 15 'chkconfig-ed on' services: +chkconfig --list|grep :on + +So from a system administrator's point of view, speaking of managing targeted servers and not multimedia desktops, I don't need anything complicated to manage runtime services. + +You might want to seriously think about writing a tutorial for a typical small server (apache only, for example - no graphics, no bluetooth, no atd, no iscsi, etc.), and then convince us that systemd provides any value. + +cheers, etc. + +Posted by Shane at Tue Aug 24 01:49:13 2010 +Denice said it better than I ever could. As someone stuck with over a hundred Solaris 10 servers, I agree completely with her assessment. + +Here's a nice little commentary on Apple's launchd which I feel is just as appropriate for systemd: + +http://lowendmac.com/ed/winston/10kw/launchd.html + +It's monolithic, it's "over engineered", and it does too many things. In a nutshell, it's anti-unix. + +Posted by Cameron Hutchison at Tue Aug 24 02:35:25 2010 +"thus ensuring that everything ever logged on the system will properly end up in the log files" + +Does this include timestamps being properly captured? When trying to debug delays with suspend/resume, the logs weren't much help since all the suspend and resume log messages had the same timestamp in the system logs. + +Posted by Stan at Tue Aug 24 06:49:31 2010 +A new init system is a great opportunity for distros to eliminate the minor (yet damaging) differences, so that a service written for one distro will be 100% compatible in another distro. A single code base also has the advantage of heavy testing and extermination of bugs. + +By including special code for non-standard stuff like "SUSE extensions", systemd is just putting a bandaid on the problem instead of fixing it. + +Posted by Anonymous at Tue Aug 24 06:57:59 2010 +Would you consider writing more about the C-based init scripts? I've had the general feeling for a long time that all distributions need to do the same small amount of work to bootstrap the early boot process, and I'd love to hear more about the common core you distilled it down to. Obviously I can (and will) go read the C source, but I'd love to hear the higher-level view you've obtained by reviewing distributions. + +Thanks! + +Posted by Tomasz at Tue Aug 24 08:42:17 2010 +oiaohm: user session support is in current systemd. For graphical insight look at "systemadm" (in fedora: systemd-gtk package). + +Posted by Alexandr Kara at Tue Aug 24 10:28:37 2010 +I must say I am impressed by the progress on systemd so far, but I am a little worried about one thing. You say that systemd requires a very recent kernel. Does that mean that when booted with an older kernel, it will just refuse to start? Or will it have some "compatibility" mode when it starts services in parallel and without using cgroups? Or maybe drop to old init (if still installed)? + +Posted by Tshepang Lekhonkhobe at Tue Aug 24 11:54:20 2010 +Lennart, rock on! + +Posted by Karellen at Tue Aug 24 14:02:45 2010 +@Shane: + + [systemd] does too many things + + + +It manages the startup and lifetime of system processes. That's it. + +From the article you linked: + + Merging periodically run jobs into the main system process doesn't make sense. + + + +Why not? "cron" and "at" manage the startup of periodic system processes. The only thing they do different from "init" is that they start the processes at a time other than bootup. Everything else is common between them. So why not de-duplicate the effort involved in starting, tracking and logging, and just allow "init" to start other processes at times other than boot? + + Replacing a simple /etc/crontab text file with multiple, awkwardly named XML plist files scattered among no less than four different directories is taking two big steps toward complexity. + + + +There's no reason that systemd would be implemented that badly. In fact, I'm pretty sure that systemd reads existing "crontab" files just fine. So systemd doesn't require any changes there. + + Starting infrequently used on-demand socket-based daemons from launchd seems like it could open the main system process to a potential denial of service attack. I have not explored this idea or researched to see if it has already been tried, + + + +Well, I haven't researched it, that looks like nothing more than FUD and making-shit-up to me. + + One of the core principles of Unix programing is do one thing and do it well. + + + +Like having one and only one place to consistently manage the startup and monitoring of system processes? Oh yeah, that's totally anti-Unix-philosophy. + +Posted by Lennart at Tue Aug 24 14:23:31 2010 +Cameron: the kernel log buffer only includes timestamps when this is enabled on the kernel command line. A good syslog implementation could read those timestamps and handle them properly. However, I think the current implementations unfortunately don't do that. + +Stan, we only support OpenSUSE extension for the LSB/SysV stuff which in the long run is legacy anyway. + +Anonymous: there's no such thing as a C-based init script. That's a misconception. + +Alexandr: yes, we require a very new kernel. Which is a safe requirement to make for something that needs to be integrated by the distributor anyway. + +Posted by Anonymous at Tue Aug 24 15:08:39 2010 +Lennart: You said in your post that "We reimplemented almost all boot-up and shutdown scripts of the standard Fedora install in much smaller, simpler and faster C utilities, or in systemd itself." "C-based init scripts" seemed like a fair paraphrase of that sentence; would you prefer "C replacements for init scripts"? Either way, I think my original question still applies; I'd love to hear more about them in the future, if you'd consider writing more about them. + +Posted by Aleks at Tue Aug 24 15:58:55 2010 +Great work Lennart! I'm very impressed by the progress of systemd and excited about trying it out. + +Posted by Marius Gedminas at Tue Aug 24 16:59:14 2010 +Could you post an example of a pretty process tree produced by systemd-cgls? + +How does the systemd distinguish user processes that should be killed on logout from processes that should be left running (e.g. screen, nohup, wget -b)? + +(Why does this form keep rejecting my comments? Try #3.) + +Posted by Lennart at Tue Aug 24 19:21:04 2010 +Anonymous: well, what happens with the boot scripts depends on the case. One example: part of the boot and shutdown scripts it is to restore and save the random seed of /dev/random. This was previously done via some shell hackery. In systemd, we replaced that by a simple C program, i.e. this one: http://cgit.freedesktop.org/systemd/tree/src/random-seed.c -- which can easily be called from a simple .service unit in systemd, i.e. this one: http://cgit.freedesktop.org/systemd/tree/units/systemd-random-seed-load.service.in -- and that's all there is to it. + +Marius, check http://www.freedesktop.org/wiki/Software/systemd/TipsAndTricks at the end. systemd doesn't duistinguish user processes that should be killed or not. This is about security, and it's a decision of the administrator if he wants to allow the user to keep processes around after logout or not, regardless if that process is called "screen" or "foobar" or whatever. However, privileged processes can escape this, and make themselves a member of an arbitrary cgroup of the system and thus avoid being killed when the user logs out. This could even be done via PAM, where invoking the PAM session hooks whcih will create a new session cgroup and move the calling process into it. For example, if it is desirable that the user may keep processes around after logout via screen and only screen, then screen should be patched to call into PAM (which I think it might actually already do in some cases). But again, just calling a process "screen" should never be something magic that allows you to keep a process around. This must be possible only via privileged code and not otherwise. + +Posted by Lennart at Tue Aug 24 19:37:50 2010 +Denice, Linux is a scalabale operating system. It is used on big irons to tiniest devices. With systemd we try to cover the whole bandwidth, and please understand that your specific use case is not the only one we need to cover. + +Shane, you are right, systemd is nothing like traditional Unix. And that is a good thing. Unix has been designed 41 years ago. You honestly believe that its design is perfect and flawless and 41 years after it was designed still should be followed in all detail? No, computers changed, and Unix never was perfect. It probably was a better design than most other operating systems, but this does not mean it is perfect and we should never depart from it. systemd is inspired by Unix, but also from what has been done on MacOS and even on the Windows world, and on Solaris. We didn't copy any of the existing services 1:1, we just let us inspire by their best features and translated them to Linux and added quite a bit of new stuff on top. And that's how it should be done. Unix is an inspiration, it is not the holy grail. Not 41y after it was designed. + +The fact that on traditional Unix the init system was seperate from cron, from at, from inetd, from the dbus service activator and from everything else meant that all of them reimplemented a big chunk of their code, i.e. what was involved with spawning processes. It was a useless code duplication, and all implementations sucked at it in one way or another. Also, you could not run the same thing from more than one of these systems without manually ensuring that things would happen race-freely and properly ordered. In systemd we unified all of this. We use the same codepaths for spawning processes, regardless if they are started via timers, via sockets, via busses, at boot-up, via devices and so on. This allows us to reduce the amount of code duplication, and provide the same awesome process babysitting to all triggers. And that is a big big advantage. If you look at the systemd source code you will notice that the remaining amount of code, for example for doing timer-based spawning is actually very very short, less than 500 lines (including comments and whitespace!). So overall, we simplify things drastically, we get rid of immense code duplication, and we still are a lot more powerful than what came before. + +So, in summary: just because we do things differently doesn't mean we do it worse. + +And if you tell me that systemd is not Unixy, then I can only agree, and I don't feel ashamed at all of that. Because my horizon is much further than just Unix. + +Posted by Denice at Wed Aug 25 02:12:31 2010 +Lennart, my 'specific use case', as you put it, is pretty standard actually. I'm managing 300+ Linux servers (and a few handfuls of Solaris boxen), and we simply don't run lots of services on any of them. Linux system administrators don't let the plethora of services run that you have in your example above. What I am looking at above seems to be a desktop. How about an example like I mention in one of your posts - just a typical targeted server... + +Posted by Riku at Wed Aug 25 13:03:57 2010 +That quite a bit of progress. I salute your "Get Things Done" attitude :) + +Stupid question: What does systemd taking care of d-bus activation mean? eg. Why is current d-bus activation insufficient and how does systemd change that? + +The timer part is exciting. But it doesn't replace atd and crond yet ;) According to manpage you can't seeminlgy set a timer to fire at specific time/day/daily. + +Posted by Giovanni at Thu Aug 26 02:14:32 2010 +I find Solaris SMF one of the most amazing features that we as sysadmins have to aid us in managing hundreds of servers and it's great that something similar is making its way into Linux. Way to go! + +Posted by Bryan Horstmann-Allen at Thu Aug 26 09:45:29 2010 +Denice: What happens when the Linux OOM killer freaks out and kills a bunch of your services? What ensures they get restarted? Or that they're even running at all? (I guess if you aren't running "a lot" of services, you aren't doing much at all anyway.) + +If you aren't using some form of daemon management (runit, daemontools, etc), in addition to your monitoring, you have failed. + +Lennart: Nice to see the trend to more mature service management in the Linux space, but further fragmentation is annoying... Is Upstart horribly broken, or simply not extensive enough? + +The addition of an API to manage services (and everything else systemd appears to manage) is completely awesome. Can't wait to see a Puppet/Chef provider. :) + +Posted by Bryan Horstmann-Allen at Thu Aug 26 09:48:19 2010 +Ah, I see your post on Upstart. Nevermind. :-) + +Posted by Karel at Thu Aug 26 13:25:34 2010 +I really love basic Unix principles and I think that good software should be based on KISS rules. And from my point of view systemd is not bad thing. (frankly, it looks better than PA:-) + +It would be really nice to have one place where we manage system processes in userspace. The management should be integrated to Linux -- Linux means cgroups, udev, shared mount subtrees (namespaces), selinux, inotify, etc. It does not make any sense to ignore the modern technologies that are implemented in kernel or use the technologies separately. + +Posted by dissent at Thu Aug 26 16:14:06 2010 +you must love to reimplement perfectly working stuff in a very "futuristic" way... and the talk about not caring for compatibility with "irrelevant" systems/distros make you look so adventurous and sexy... + +Posted by hreidmarr at Thu Aug 26 18:36:07 2010 +I smell problems. Tons of them. And, as always, Fedora will be the catalyst. + +Anyway, let the world burn! + +Posted by fran at Fri Aug 27 16:18:46 2010 +Hey dissent, yes we still love our commodore 64s too. + +Stick to CentOS if you can't stand change. + +Posted by Andy Jackson at Tue Sep 7 21:33:18 2010 +I'm fascinated with your random-seed example & research including Debian. + +If using C programs is beneficial & systemd independent (upstart could do similar calls), then can this be a separate project so that others (me) can integrate its gains into other distros? + +Posted by Lennart at Wed Sep 8 00:22:59 2010 +Riku: there are mainly two reasons for hooking up D-Bus activation to systemd: 1) this way you have a single maintenance interface for all daemons that run on the machine, which covers stuff previously started via SysV stuff as well as stuff previously started exclusively via bus activation. You also can use the same configuration options for the services, i.e. all the logging, execution environment fancinesses and whatever else systemd offers to limit what processes and daemons can do, which is substantially more than what the minimal D-Bus process spawning code can do. 2) this allows us to race-freely start services based on different triggers. Example: avahi shall be started as soon as a network iface shows up, or somebody uses its socket interface, or somebody uses the bus interface. Regardless which trigger came first, we are now able to start only one instance, and do that race-freely. + +Posted by Lennart at Wed Sep 8 01:12:21 2010 +Andy, well, I am working on systemd, and I have little interest in improving other init systems. People are welcome to steal code from systemd (after all its Free Software) but writing the code in a style that it would be useful outside of systemd would be very limiting since we couldn't use systemd's rich set of utility functions for implementing these little utilities. + +Posted by bharatt at Wed Sep 8 04:47:03 2010 +Hi Lennart, +Have a query, (which could have been addressed earlier). +How about switching between runlevels dynamically? +We use "init ". +Is this still possible, or any equivalent command is there in "systemd"? +"chkconfig" exists or it is replaced by systemd? + +Posted by liam at Wed Sep 8 04:51:28 2010 +Thanks for these posts. +I'm a bit uncertain as to how far cgroups can be pushed for administrative purposes. Can you have nested cgroups? For instance, a Gnome/X/whatever group that one could kill? Can the end user create alias' for cgroups which could then aggregate them into more manageable units? + +thanks + +Posted by nim@fedoraproject.org at Wed Sep 8 08:07:29 2010 +Yurk + +We are in 2010 now, can't you use the correct unicode glyphs to make a tree that is not cobbled from unrelated characters? This hurts my eyes + +(see the box drawings at U+2500...) + +Posted by Lennart at Wed Sep 8 11:36:25 2010 +Liam, cgroups are fully recursive, you may split every cgroup into sub-cgroups. And as soon as systemd is used for session management the same way it is used for system management session services will be arranged the same way in subgroups of the group the session manager happened to be executed under. + +nim, the tool actually uses unicode glyphs. But when I copied this into the blog story I noticed that not a single browser I tried on not a single OS I tried could show them properly and hence I replaced them in this blog story by this 7bit ASCII graphics. + +Posted by Karellen at Wed Sep 8 14:09:30 2010 +"But when I copied this into the blog story I noticed that not a single browser I tried on not a single OS I tried could show [unicode box characters] properly" + +How were you sending the the characters? At HTML numeric character references (e.g. &#1234;) or as plain inline unicode text? + +If inline, were you telling the browser which character encoding you were using? As far as I can tell, your web server simply claims "Content-Type: text/html", and there is no HTML "meta" tag in the page to specify a character encoding. + +Note that the HTML 4 spec, section 5.2.2 , says: + +"The HTTP protocol ([RFC2616], section 3.7.1) mentions ISO-8859-1 as a default character encoding when the "charset" parameter is absent from the "Content-Type" header field. In practice, this recommendation has proved useless because some servers don't allow a "charset" parameter to be sent, and others may not be configured to send the parameter. Therefore, user agents must not assume any default value for the "charset" parameter." + +Note that neither ASCII, or ISO-8859-1, contain any box-drawing characters. + +Yes, browsers probably should assume UTF-8 (IETF std 63) by default, but there's no standard says they should, and they don't. :-( In the mean time, it's worth specifying it yourself. + +Posted by cesarb at Wed Sep 8 15:25:17 2010 +Box drawing characters seem to show fine for me on Firefox: http://en.wikipedia.org/wiki/Box-drawing_characters has several examples. + +Posted by Lennart at Thu Sep 9 00:10:19 2010 +Well, I didn't want to become a HTML5 hacker for this blog story. I just wanted to get the story out. So I did the simplemost thing, I "photoshopped" the output and replaced the graphical chars with 7bit ASCII. + +Posted by nine at Thu Sep 9 12:04:49 2010 +UNACCEPTABLE MODIFICATION!!!! + +No seriously, this looks great. These features look like they will add real value for administrators. + +I have been getting used to upstart lately with Ubuntu 10.04. It seems like a complex restructure of init.d and the only benefit is faster boot. Did you know to restart a service it uses 'restart foo'??? + +Posted by Anonymous at Thu Sep 9 14:13:19 2010 +"Well, I didn't want to become a HTML5 hacker for this blog story. I just wanted to get the story out. So I did the simplemost thing, I "photoshopped" the output and replaced the graphical chars with 7bit ASCII." + +- "charset" is part of HTML 4, published in 1997 +- UTF-8 was published 1993 + +If you really can't manage to put the real output into your blog post, use a terminal screenshot. + +Posted by Nehemiah at Thu Sep 9 16:47:21 2010 +can't you read from /etc/issue for welcome text? + +Posted by ChrisM at Thu Sep 9 23:09:25 2010 +Just a little comment - the ability to disable services is something that was missing from upstart for a long time and is important to many people. See this feature request: + +https://bugs.launchpad.net/upstart/+bug/94065 + +Will systemd include this functionality? + +Posted by bronson at Fri Sep 10 00:47:26 2010 +Apparently anonymous thought he was commenting on an article about HTML, not systemd. Very strange. + +Posted by Lennart at Fri Sep 10 01:53:45 2010 +ChrisM: yes, systemd has that with "systemctl enable" and "systemctl disable" and had it for quite a while. + +Posted by liam at Fri Sep 10 05:08:07 2010 +Sounds fantastic. You said exactly what I wanted to hear. +Thanks. + +Posted by Perry Lorier at Sun Sep 12 19:57:12 2010 +So, you've reinvented process groups? + +Posted by Lennart at Sun Sep 12 20:21:43 2010 +Perry, no, not at all. process groups you can escape. They aren't hierarchical, they cannot be labelled. Process groups are very very different from cgroups, and useful for little more than pipeline building in shells. + +Posted by Mirko at Wed Sep 15 12:29:04 2010 +I like the theory of it very much. How about an additional binary interface to implement services which could add additional controls, meta data etc. not easily and efficiently done by signals and other IPC mechanisms. the like. Also, services could be linked into shared objects more than one at a time, and thus further speed up system start up by requiring the load of a single binary image possibly containing more than one service entry point? + +Posted by Mirko at Wed Sep 15 12:36:50 2010 +I also like the explanation you give on the "upside down" logic of upstart. If, for example, you plug in a USB scanner and you want to open your image scan software as a reaction on that, there is really no need to load a daemon or driver software as a pure result of the "plug" event. Actually, the plug event should solely be processed by higher-level software, like the GUI, which in turn will open the image scanning software, which in turn will request access to a service, which in turn will access a device node, which in turn will load appropriate driver modules, which in turn will initialize the hardware. Upstart messes this clean request-response queue up by unsolicitedly assuming that the mere fact of plugging in a device means that it is about to be used by a specific driver or a specific daemon, no matter what the user actually wants to do... + +Posted by Palatinux at Sat Sep 25 02:51:14 2010 +I've read an article about systemd in a English Linux magazine which made me very curious. Just by looking at all the information on this page, I can say that this is quite a breakthrough. + +As the lead developer of Fortress Linux I was looking for a faster and secure manner to manage and start processes under Linux and Systemd looks like a good implementation to work together or next to secured containers under Fortress Linux. + +The ondemand loading and other features of Systemd may also result into less possible vulnerabilities within the FL OS. + +I am going to test it first within Small Fortress Linux which is minimized/Live Linux OS. + +If it really shows out to be a good init system, then you can bet it will be the default init system of Fortress Linux. + + +Sincerely, + +Palatinux + +http://www.fortresslinux.org + +Posted by 0x1b at Mon Sep 27 23:37:26 2010 +A couple related questions - could be in the plan, could be OT: +1) Does systemd do any orientation - for example, laptops go from the home net, to wifi on the bus, to work net, to coffee shop wifi, to presentation lan etc etc. Can systemd figure out which nets it has access to, to drive daemon launches? +2) Along the lines of the iCalendar request, is systemd going to participate in work flow schemes? +3) Are you developing systemd-to-systemd API? for example, if a service is conflicted on a system, could it ask a neighbor to run the conflict to satisfy a dependency? + +Also, congrats "Lightning Rod" Lennart (tip of the hat to Ben). I was expecting EvilDead2 and I'm, I'm not that scared anymore... just please wait for f15 to make it the default. + +Posted by Anonymous at Fri Oct 1 06:58:25 2010 +Ideally, couldn't you configure ABRT to only run when core files show up in a given directory, or when something requests its dbus service? + +Posted by drago01 at Fri Oct 1 10:52:20 2010 +CPUSchedulingPolicy=idle ... is there the same thing for IO i.e IOSchedulingPolicy=idle ? + +In most cases I couldn't care less about CPU on todays multicore machines but IO is still a very limited resource (when not running an SSD). + +The kernel actually allows setting IO priorities (when using the CFQ scheduler). + +Posted by Lennart at Fri Oct 1 13:04:51 2010 +Anonymous: While this would definitely be desirable AFAICS abrt doesn't support this scheme, since it needs to be running when the first crash dump is collected. + +drag01: There's IOSchedulingClass=idle for you. + +Posted by John Drinkwater at Fri Oct 1 13:34:29 2010 +Restart=restart-always +Again, why have this redundancy if you are starting a design from scratch? +Restart=always|once|on-success + +CPUSchedulingPolicy=idle +IOSchedulingClass=idle +Why is one a class, and another a policy? People will mistype these. + +This is not bikeshedding, this is a request to stop making everything long-winded when it does not need to be so. If systemd is to be around for the next few decades, and you have time to refine it before the next Fedora release, please do so. + +Posted by Lennart at Fri Oct 1 13:55:07 2010 +John, regarding Restart= you have a point. And I fixed that now. + +Regarding the Class vs. Policy thing: that's how the kernel calls these things, blame the kernel folks for that. I think it would be a very bad idea to introduce deviating terminology here where the kernel fucked up. + +Posted by Milan Bouchet-Valat at Fri Oct 1 15:00:38 2010 +Glad to see you have an easy to parse Description field! But while you're at it, could you consider providing translated descriptions for configuration tools? + +Recently, Ubuntu had a GSoC about writing a new config tool for Upstart. One of the issues was that there's no way to get a localized translation from Upstart jobs or SysV scripts, let alone an icon! It would be great if you tackled this issue in Systemd, e.g. with a standard .desktop-like file that services should ship. + + +The other part of the work Jacob Peddicord did in his GSoC is more remote from Systemd, but might be interesting. He has a whole project of describing configuration files associated with a service: +http://jacob.peddicord.net/gsoc2010/ +http://people.ubuntu.com/~jpeddicord/SLS/0.8/sls-format-0.8.html + +I guess it can be good you know it exists... + +Posted by Lennart at Fri Oct 1 15:11:55 2010 +Milan: the longer term plan is to support translations for the descriptions the same way as .desktop files have them. Right now we don't do this, but this is definitely the plan. I am also open to adding an Icon setting, though I am a bit concerned that if we add and Icon, then the next thing asked for is a Vendor ID and so on and so on. + +Posted by j at Fri Oct 1 18:35:14 2010 +verbosity is redundant (and confusing) for a unix system tool. Since Io scheduling classes are linux-specific, it can be written like that: + +CPUSchedulingPolicy -> SchedLevel +IOSchedulingClass -> IOSchedLevel + +BTW is systemd portable to all Unix or it needs linux kernel for some reason? + +Posted by Lennart at Fri Oct 1 18:47:31 2010 +j, calling the same stuff in userspace differently than in kernelspace, and calling the same stuff in the chrt tool differently than in systemd is a very bad idea. + +systemd is strictly Linux specific. It is not portable to other Unixes and we do not care about portability to them. This allows us to make use of Linux features and is one of the reasons why systemd is so much more powerful than any other init system around. + +Posted by Grahame at Fri Oct 1 19:03:30 2010 +At the moment if I'm having a problem with a daemon failing to start I might just hack the init script, chuck strace in, and restart it. It'd be great if you could show how you might shim a failing daemon, particularly when debugging 'fails on reboot' issues (eg. starts fine later.) + +Posted by Anonymous at Fri Oct 1 22:10:11 2010 +I'm wondering about a services that get autostarted via D-Bus. D-Bus starts them itself, so unless I'm wrong they'll end up in the D-Bus service cgroup, not in their own cgroups. Yet I want them to be controllable as services itself. Is this possible to achieve? + +Posted by Michael at Fri Oct 1 23:21:36 2010 +@Anonymous: + +This is one of systemd's great features: +Starting with dbus 1.4.0, dbus-daemon can hand over starting of system services to systemd, where you have all those possibilites to monitor and confine the service (in it's own cgroup) + +All you need to do is to add a +SystemdService=foo.service +line to the D-Bus service file, create a foo.service file for systemd and systemd will automatically start the service defined in foo.service. + +Posted by Andreas at Sat Oct 2 00:49:51 2010 +I agree with those complaining about names like CPUSchedulingPolicy but as Lennart said that is hardly the fault of systemd. Not really much that can be done about it. + +This is post is the part I like the most about systemd. No more boilerplate bash and no horrible XML like the launchd plists or overly verbose XML like SMF. Now there might be other good init systems but this is the first one I have seen where it is easy to just read the job configurations. + +Like the use of sections too in the files so when I read them I can mostly ignore sections like [Install]. + +Posted by codebeard at Sat Oct 2 04:36:12 2010 +@Grahame + +I assume that you can do something like: +ExecStart=/usr/bin/strace -f -o /root/abrtd.strace /usr/sbin/abrtd -d -s + +But perhaps Lennart has another way in mind to do this? + +Posted by John Drinkwater at Sat Oct 2 14:24:51 2010 +Lennart, thanks. Apologies if my comment came over a little stronger than I intended. +I notice some variables for scheduling have different ranges, is this again a kernel issue? Maybe I should go bang some heads there.. + +Posted by Baybal at Sun Oct 24 07:15:55 2010 +Bash is of course slow, but do you know about zsh, dash and finally perl? + +Posted by Aaron Seigo at Fri Nov 19 05:13:28 2010 +"Yupp, KDE folks, you can add an agent for this, too" + +where is the documentation for the relevant API used to accomplish this? + +Posted by alex at Fri Nov 19 06:22:18 2010 +Lennart, what % of a boot time systemd is reducing in compare to a easy to read/manage sysV boot system? + +I were trying to find the measurements, but never found any. + +Posted by Anonymous at Fri Nov 19 07:36:44 2010 +How easily could I disable the automatic cleaning of /tmp? I lost useful bits one too many times before I turned off cleaning of /tmp on all my systems. Plus, this seems like a good opportunity to find out how easily the built-in equivalents to init scripts allow configuration. + +Posted by Michael at Fri Nov 19 08:20:46 2010 +@Anoymous: +/etc/tmpfiles.d/systemd.conf contains (among others) those two lines: + +d /tmp 1777 root root 10d +d /var/tmp 1777 root root 30d + +Just comment them out and you're done. +For more info see http://0pointer.de/public/systemd-man/tmpfiles.d.html + +Posted by Michael at Fri Nov 19 09:05:44 2010 +>> In fact, shell scripts during early boot +>> are only used in exceptional cases + +Why is LVM an "exceptional case"? It's the default to install Fedora on LVM after all. Would you say it is better to not use LVM or will there be better support for it in the future? + +Posted by Dave Airlie at Fri Nov 19 09:49:06 2010 +Hey Lennart, a lot of people use reboot -f when their system won't let them umount filesystems, like for when they've oopsed the kernel and want to remote reboot, so I hope you haven't actually removed proper forced reboot in favour of calling umounts which will just hang the system in the kernel if something has gone wrong in the storage subsystem. + +Posted by bkor at Fri Nov 19 10:43:11 2010 +Always nice to see the updates & new features of systemd. + +Posted by Anon2 at Fri Nov 19 10:57:40 2010 +Always nice to see how an originally nice idea morphs into a Swiss Army Knife software. Next step to shave off a few more milliseconds of a boot time is to move the systemd code into the kernel. + +Posted by Jaroslav Reznik at Fri Nov 19 11:07:11 2010 +From one of Fedora's KDE folks - is it really so difficult to ping us and ask for help supporting yours technologies that come to Fedora? Same as with Polkit... It's not easy to catch it then if we don't know what to support. + +Thanks Aaron for comment. /me is going to look for documentation... + +Posted by Vasilis Vasaitis at Fri Nov 19 13:09:24 2010 +Great stuff! The only thing that comes to mind is, you guys should really make sure to provide detailed documentation for the user/administrator, in man/texinfo format (ideally both). One really good thing about the traditional shell-based boot system is that it's extremely self-documenting: even if I don't know anything about how a distribution has its boot system set up I can start reading inittab and take it from there. With systemd inevitably a lot of the boot process becomes much more opaque, so there should be plenty of documentation about what it does, in what order, how everything is configured/modified/disabled, etc etc. + +Posted by Lennart at Fri Nov 19 16:05:57 2010 +Aaron, systemd is actually documented very well, in fact much better than most projects, however this interface isn't so far. Feel free to ping me if you need details. I don't bite. If KDE hackers want to be involved, then involve yourself, don't always wait for us to ping you. + +alex: I am pretty sure systemd is much easier to manage than sysv. I am booting in 14s now a fully equipped F15 with crypto and everything. With sysv it used to be something like 26s or so. But on purpose I don't give out numbers like this since they are not necessarily reproducible. The speed-up is bigger if you have a system which starts more stuff anyway. And the measurements are highly dependent on your hardware. + +Anonymous: you can configure that easily in the files Michael suggested. Instead of disabling those lines I however recommend simply replacing the last word in those lines. If you write "-" instead of 10d then the automatic cleanup is disabled. Note however that you are most likely doing something wrong if you store files you don't want to lose in /tmp. + +Michael: yes, I believe we should no longer install LVM by default. It slows down boot considerably and is still not updated to today's hotpluggable dynamic world. And for the majority of all folks (especially laptop people) it offers zero benefit. In fact, Fedora is the only distribution enabling LVM by default, and I believe we should stop doing that. With the advent of btrfs volume management will become much nicer and future-proof anyway. + +Posted by Lennart at Fri Nov 19 16:11:58 2010 +Dave: traditional 'reboot -f' continues to exist. + +Jaroslav: see my comment to Aaron regarding KDE involvement. Please consider this blog story my ping to you. I am happy to provide you with any information you need and reference implementations. I'd even be willing to review any code you guys might come up with to check if it does what it is supposed to do. + +Vasilis: systemd documentation for admins is actually pretty good, much better than what most projects have. Just check out the man pages: http://0pointer.de/public/systemd-man/ + +Posted by Lennart at Fri Nov 19 17:09:12 2010 +Jaroslav, Aaron: wrote some documentation of the algorithm now for you: http://www.freedesktop.org/wiki/Software/systemd/PasswordAgents -- Happy? + +Posted by nate-m at Fri Nov 19 18:29:30 2010 +"""Michael: yes, I believe we should no longer install LVM by default.""" + + +When Btrfs gets up and going well then LVM will be redundant and inferior for most purposes. Hopefully it won't take long. + +Then they can figure out how to integrate support for btrfs snapshots and volume management into systemd. :P + +For example one of the more useful ways to use Btrfs is to create a new volume for each user. Then users can enable features like compression and encryption for their user. Also makes it useful for snapshotting users and applications. + +And there is the btrfs plugin for yum for rolling back updates and such things. + +Fun stuff. Lets hope it does not suck as much as LVM. :) + +Posted by j at Fri Nov 19 19:16:18 2010 +Is systemd only for sysv folks? + +Posted by Lennart at Fri Nov 19 19:22:08 2010 +j, uh? what do you mean by that? We (optionally) support SysV scripts as an alternative source of configuration. You can disable that at compile time even though most distributions will probably leave it enabled by default. + +Some distros (Gentoo) have chosen to disable SysV support in systemd by default, since they historically actually did not use SysV scripts for bootup. + +Posted by alex at Fri Nov 19 20:07:52 2010 +> This will ensure that SIGTERM is delivered to all processes of the crond service, not just the main process + +boy, you know that u dont need to restart cron? its rereading config files right away once it was changed. And even if its needed, you need send HUP to a parent is enought. Anyway. cron, bind, apache... anyway. + +Lennart, with all the respect, your knowledges about daemons are so screwed up. You should read the book of Evi Nemet before touching this things. + +Posted by j at Fri Nov 19 20:09:48 2010 +Sorry, I was under the impression it somehow relies on sysv. Read up on it in the meanwhile (the announcement post), please disregard. + +Posted by bochecha at Fri Nov 19 20:32:50 2010 +@alex: I lost count of the times I had a stalled cron daemon that kept spawning children that would never complete, bringing the host to its knees. + +Stopping such a cron daemon is not enough usually, and when killing it, all children processes remain alive and attach to init, so you have to « kill -9 » them all individually. + +I for one welcome « systemctl kill » heartily. :) + +Posted by Michael at Fri Nov 19 20:35:50 2010 +@alex: +There are different cron implementations. The one on Debian (vixie-cron based) indeed does pick up changes to configuration files automatically. + +Fedora uses a different cron implementation from what I could find out which does not automatically reload on configuration changes. + +Please also note that Lennart used --kill-who=main for the SIGHUP example. Exactly for the reason you mentioned that only the main process (what you called parent) needs this signal + +Posted by Jeffrey W. Baker at Fri Nov 19 22:31:40 2010 +Might I suggest abbreviating the syntax of these two things to 'systemctl kill' and 'systemctl killall'? That will be a bit nicer than --kill-who=whatever. + +I agree with @bochecha that the ability to kill all user's children of crond is a miracle. It is quite difficult to write a proper cron job that will never launch in parallel with itself and most users will screw it up. + +Posted by Saint DanBert at Fri Nov 19 23:47:20 2010 +I use *-buntu systems of various sorts. I would love to work with you to see if we can make it work there. I did not find a link to a listserv or similar where one might volunteer. + +Also, I've been a code slinger for almost 40 years. Lately I do technical writing, requirements docs and similar. Again, where do I go to volunteer. + +HERE I AM -- A VOLUNTEER. Someone make contact so that I might help with this effort. + +Cheers, +~~~ 8d;-Dan + +Posted by Jakub Narębski at Sat Nov 20 00:04:36 2010 ++1 for 'systemctl kill' and 'systemctl killall'. + +The --kill-who doesn't make for nice API. + +Posted by Lennart at Sat Nov 20 01:31:42 2010 +alex: you seem a little bit confused, killing SIGTERM triggers a shutdown of a process, not a restart. Also note that HUP in most daemons actually triggers a reload, not a restart, which is quite a distinction. Finally, different cron implementations work differently. With the advent of inotify more and more daemons now automatically reload their configuration files if they change (although for the cron case you don't even really need that), but that's a more recent development. I won't comment on who of us has the screwed up knowledge here... + +Jeffrey: definitely an interesting idea, however I am not 100% convinced we really want this. After all I want to be able to read the command line as if it was a sentence, and "kill foo.service" kinda tells me that this will kill this service, but "killall" would suggest there were more than service by the same name? The killall command we all know and love works like that: it iterates through the process tree and kills everything that matches the name. If we reuse this verb in this context here, then I believe this would be slightly misleading. + +Posted by Lennart at Sat Nov 20 01:38:41 2010 +Saint: see the systemd website, it includes links to IRC and mailing lists and everything. http://www.freedesktop.org/wiki/Software/systemd + +Posted by Horst H. von Brand at Sat Nov 20 01:52:50 2010 +Why is one SIGxxx and the other plain HUP? + +Posted by Lennart at Sat Nov 20 02:05:54 2010 +Horst: just to make the point that you may write the full name including the SIGxxx prefix or leave it out. Since I myself never can remember which tools want the full name and which tools take the unprefixed name I just made all systemd tools take both. While I general believe too much redundancy in the configuration languages is a bad idea I thought that in this case it's fine. (Note that I actually wrote pretty much this in the blog story, in the second to last paragraph) + +Posted by someone at Sat Nov 20 02:57:47 2010 +Do these commands work? + +systemctl kill -9 crond.service +systemctl kill -s 9 crond.service +systemctl kill -s 0 crond.service +systemctl kill -SIGKILL crond.service +systemctl kill -KILL crond.service + +i.e. it would be great if the syntax was exactly the same as kill (except drop the -l case). + +Posted by Lennart at Sat Nov 20 03:29:09 2010 +someone: you have to specify the '-s', but yes, otherwise all three possible syntaxes are accepted (with and without the SIG prefix, and numeric) + +Posted by nona at Sat Nov 20 05:31:56 2010 +Can we use those password agents in early (initrd) boot? + +I'm thinking cryptoroot. AFAICT, systemd isn't supposed to go into the initrd, and these new agents depend on systemd, so how is that going to work? + +Posted by oiaohm at Sat Nov 20 09:39:02 2010 +I am sorry but btrfs is not a replacement to Linux LVM. Yes the LVM support should be fixed up. http://en.wikipedia.org/wiki/Logical_volume_management + +There are cases where LVM can come into its own when you have multi distributions on the same drive. + +LVM can contain all types of partitions. btrfs downfall it's solution can only contain btrfs and cannot snapshot other partition types. Really LVM support in Linux kernel extended to handle windows LVM would be handy. So yes there could be a need for Multi OS installs for LVM support to work correctly. Not something that can be just turned off by them for speed. + +The NFS read only one is also critical this is handy for secure diskless remote boot terminals where you want reset to return to a clean state. + +Lennart you are making the same mistake as some of the design selections with pulseaudio and alsa. Simple fact NFS read only, LVM, RAID and so on exist in the old system so the new system need to support them or have a replacement that is better for the tasks they do. + +If you want to deprecate LVM, NFS read only, RAID support please explain what there proper replacements is matching there function. BTRFS is not a proper replacement to LVM or RAID due to its limited Filesystem type support. + +Posted by Diego at Sat Nov 20 10:43:44 2010 +oiaohm: Eventually Btrfs should be able to export a btrfs subvolume as a block device, so you will be able to put a Ext4 filesystem on top of it - but anyway, systemd is not unsupporting LVM, Lennart only said it will not allow to have boots without scripts. + +That said, deprecating LVM is just not going to happen, LVM is really powerful and provides features (like extending a filesystem to a new disk) that users can't live without. And the installer allows to install Fedora without LVM. + +Posted by David Weinehall at Sat Nov 20 12:02:36 2010 +@Lennart: only the "-s " syntax is POSIX-compliant though; omitting "-s" or including "SIG" is implementation specific and is not guaranteed to be supported. + +Posted by Grahame Bowland at Sat Nov 20 12:44:52 2010 +A bit of a minor thing, but why do all the systemd commands require you to type 'crond.service' rather than just 'crond'? It's a bit cumbersome and seems unnecessary. + +Posted by Lennart at Sat Nov 20 16:43:50 2010 +nona, dracut handles passwords for crypt root already quite well. I see not need to replace that by our agent logic. + +oiaohm: calm down. I am not suggesting to deprecate LVM. Just remove it from the default install. + +Posted by Lennart at Sat Nov 20 16:47:33 2010 +David, uh? systemctl is my brainchaild, it's definitely not POSIX compliant, since it was defined by me, not POSIX. + +Grahame: since we maintain not only services, but also sockets, devices, mount points, automount points, timers, inotify triggers, and more. The suffix encodes what kind of object it is you deal with. + +Posted by strcmp at Sun Nov 21 13:49:39 2010 +systemctl kill ssh.service looks dangerous on remote systems, in this case --kill-who=main should be the default... Because of that i vote for something like kill/killall. + +Posted by Jon at Sun Nov 21 14:54:25 2010 +foo.service seems backwards, in that case. It would seem more logical to me (at least) that you start with the widest scope and narrow down, e.g. service.ssh (or socket.ssh or whatever). + +See also: heirarchical include mechanisms in 4GLs, e.g. Java; Don't see also: The domain name system. + +Posted by Lennart at Sun Nov 21 14:59:50 2010 +strcmp: note that user sessions are moved into their own cgroups anyway, and ssh sessions would hence not be killed by killing the sshd daemon itself. + +Jon: well, file names tend to have the type at the end, and since our unit names are actually identical to the names of the files their configuration is stored in we chose to do . instead of the reversed order. + +Posted by Holger at Sun Nov 21 21:40:44 2010 +So I guess it would be possible to use it to sent STOP/CONT to a service incl. all its childs, assuming CONT doing the "wakeup" in the revers order of STOP, right? + +Posted by Lennart at Sun Nov 21 21:46:28 2010 +Holger: yes, you can send STOP/CONT, but the order of the delivery is actually undefined. + +Posted by Andreas at Mon Nov 22 00:30:28 2010 +Typing "kill --kill-who=XXX" feels a bit redundant. Could it not be shorter like just" kill --who=XXX"? + +Posted by yhdezalvarez at Mon Nov 22 14:08:15 2010 +@Andreas + +> Could it not be shorter like just" kill --who=XXX"? + +why not "kill --target=XXX"? "who" sounds a little odd. + +Posted by Andreas at Mon Nov 22 14:33:36 2010 +Yeah, --who= was just the first thing which I thought of. + +Posted by Dag Wieers at Mon Nov 22 17:58:46 2010 +Lennart, + +Since the moment I read your first systemd announcement I am excited about this new development. It's one of those things you wonder why that wasn't done decades ago ;-) + +However, I see one thing that I liked about the sysv scripts, that is not possible. The importance of the original sysv scripts is that they are written in bash and so offers a lot of flexibility to system administrators. Flexibility comes with responsibility :-) However, where in the past sysv scripts did more than simply start/stop/restart/reload, some scripts allowed to check configuration syntax (eg. apache), initialize something (eg. sshd), etc... + +There is an advantage in keeping those actions as part of the systemd tools in my opinion. Even if they are simply passing the action through to a daemon-specific configuration tool (eg. apachectl), which could become a standard. This is exactly why I liked the design of "op" so much (compared to sudo). It provided system administrators (and users) with a single interface to actions using a clean syntax. + +While reading the post and the documentation I couldn't find whether "custom actions" would be retained in your design. If not, what would be the recommended alternative ? + +Posted by Lennart at Mon Nov 22 19:13:05 2010 +Grahame: codebears is right. You can easily prefix binary paths with strace. Just copy the service file from /lib/systemd/system to /etc/systemd/system and edit the ExecStart= line, and done. + +John: yes, we mostly expose the kernel stuff 1:1. + +Dag: we do not support custom actions, since their set of parameters and what they return is completely free-form it would be a bit weird to pass that through D-Bus. For example, if something is interactive, how would you pass that through D-Bus. If people want additional control interfaces for their tools, then they should create them outside of systemd, for example by creating a seperate ctl tool, such as apachectl. I mean, I think it makes sense to expose new "verbs" in systemd iff these verbs make sense for everybody the same way as "start" and "stop" and similar apply for every service the same way. However, something like "apachectl graceful" is in all its meaning highly specific to Apache, and hence trying to abstract that in systemd must fail, since it's nothing that really could be abstracted nicely. SMF allows definition of additional verbs for each service, but I am not convinced this is really a good idea. + +Posted by Will P at Thu Nov 25 13:34:43 2010 +It would be really nice to have a way of shutting down a service by specifying a time-delay after TERM before sending KILL, so you can give the service time to gracefully shut down, but then forcibly kill it if it hasn't shut down on its own. Having the logic present in the systemctl command would let it wait out the full duration or exit early if the service completed its shutdown before the time expired. + +maybe: +systemctl killwait -w 15 nfsd +systemctl stopwait -w 15 nfsd + +The killwait would use SIGTERM then SIGKILL... The stopwait would use the ExecStop method, followed by SIGKILL. + +This is a feature that the init script 'killproc' function provides primitive support for. (It's in /etc/init.d/functions on my fedora14 system). Having the more exact knowledge from systemd about the actual state of the service processes would make this a much more robust method than what 'killproc' tries to do. + +For historical perspective, there are internal process management tools at some companies that provide this same functionality, which use process groups to implement the same kind of "service" management, with this 'killproc' delay behavior between TERM and KILL. + +If this feature already exists, then bravo! If not, then what do you think of adding it? + +Posted by sysitos at Thu Nov 25 13:40:02 2010 +@Lennart, yes you are right, the killall would lead to confusion, but if you kill the whole tree, than the right name would be killtree ;) + +So my suggestion: +systemctl kill crond.service -> would only kill the single crond service +systemctl killtree crond.service -> would kill the crond and all of the childs + +And so the even the systemctl kill command couldn't lead to confusion with the well know kill command. + +CU sysitos + +Posted by sysitos at Thu Nov 25 13:56:27 2010 +@me and Lennart, some addition. + +You could than even add the +systemctl killall service1.service -> would kill all instances of service1 +systemctl killalltree service1.service -> would kill all instancen and childs of service1 + +But a question, what happens when a service was started multiple times and now is running multiple times? Which service is than killed by the first kill? The first one, the last one? + +Thanks. +CU sysitos + +Posted by Berniyh at Fri Nov 26 01:02:09 2010 +I would propose these two commands: +systemctl killmain foo.service # Kill the main service (see --kill-who=main) +systemctl killcgroup foo.service # Kill the service and all of its childs. could be killcg, too. + +If I understand the above text correct, what is actually killed in a "complete" kill is everything in the cgroup of the daemon. So this would actually make the command more intuitive. + +Posted by kriss at Sun Dec 5 17:45:51 2010 +Wow, great idea. Hope the project is going to be used especially on mobile Linux distributions. + +Small typo: livirtd v.s. libvirtd (was actually googling for "livirtd" ;-) ) + +Posted by Ralph Corderoy at Sun Dec 5 18:15:58 2010 +--kill-who? I think that should be --kill-whom! Would --victim be more fun and avoid the confusion for those that do or don't speak English natively? :-) + +Posted by Bjoern Michaelsen at Mon Jan 3 00:43:58 2011 +I have not looked at all at systemd yet, so I wonder if it is possible to just use it to query for PIDs? + +systemctl kill -s HUP --kill-who=main crond.service + +looks very un-unixy to me as it needlessly mixes multiple tasks. How about something like: + +systemctl getpid --main crond.service | xargs kill -S HUP + +"one tool, one job" and all that jazz ... + +(I just found the "systemctl status" in the first installment of the series, but its output does not seem to be easily parse- or pipeable. It would be a shame having to revert to mayor shell or perl voodoo to do some basic scenario not covered by systemctls "convenience functions".) + +Posted by ck at Mon Jan 3 08:28:45 2011 +Um, nobody uses pkill? + +$ pkill rsyslogd +$ pkill -u bob rsyslogd +$ pkill -1 rsyslogd +... + +What does "systemd" know what "pkill" doesn't know? + +Posted by Lennart at Mon Jan 3 21:02:26 2011 +ck: you didn't even make it to the second paragraph of "Killing Services", have you? killall and pkill do the same thing, and that paragraph tells you why it is ugly. And it doesn't cover the CGI usecase anyway... + +Posted by Lennart at Mon Jan 3 21:07:12 2011 +Bjoern: there is "systemctl show" which you can use to query particular properties of a service, which is easily parsable and pipable. + +Posted by ck at Tue Jan 4 00:23:00 2011 +Lennart: I /did/ make it to the 2nd part, thank you very much. It went on to explain how process spawn child process and that "killall" cannot cope with that. I'm not using "killall" any more (hint: try "killall" on a Solaris box :)). But I fail to see what "systemctl" knows that "pkill" or "killall" do not see. If process xy or one of its child processes are not "registered" to systemd, "systemctl" won't see them either? How is "systemctl" superior to pkill? I don't see it. I guess what I fail to understand from your article is how the magic is done. +Thanks. + +Posted by Lennart at Tue Jan 4 00:36:44 2011 +ck: systemd creates a kernel cgroup for each service. processes do not have to register with systemd, they will be members of the cgroup and their children automatically too, regardless if the fork, rename themselves or try anything else to escape supervision. + +Posted by Bjoern Michaelsen at Tue Jan 4 02:40:01 2011 +Lennart: Thanks for the reply, sounds great! + +systemctl show -p MainPID crond.service | xargs kill -S HUP + +(just guessing by some man page found somewhere on the interwebz) + +Posted by ck at Tue Jan 4 12:14:24 2011 +cgroups, OK. This might make sense after all. Thanks for the response, Lennart. + +Posted by pal at Sun Jan 9 04:11:59 2011 +Bjoern Michaelsen: +your pipe example has race condition, that's why it's so unixy + +Posted by Jeremy Kajikawa at Tue Mar 29 17:06:16 2011 +This is a great read! and I was considering writing my own init ahahahaha + +No need to write my own with this as an installable + +I am converting my SourceMage Gnu/Linux system +over to using this right now + +Brilliant, Insanely Brilliant! + +Posted by Soliko Man at Sat Apr 9 18:50:05 2011 +When running command "sudo systemctl" I get this error: +"Failed to get D-Bus connection: Failed to connect to socket /org/freedesktop/systemd1/private: Connection refused" +Anyone can tell me what is wrong with my system? + +Posted by Adam Pribyl at Mon Apr 11 18:09:41 2011 +Saliko: this is know problem of sudo and su. You have to use "su - " to set complete root environment. + +Posted by Lennart at Mon Apr 11 18:16:38 2011 +Soliko: you are probably not running systemd if this happens. Or you are running a systemd userspace that is older than what you are running as PID 1- + +Adama: No, this has nothing to do with su or sudo. + +Posted by Mark Sobell at Fri Apr 22 02:36:48 2011 +Terminology (old runlevels). + +Most of the documentation I have read says that is the correct term, but keeps referring back to runlevel. For example, there is the graphical.target. But I guess it is uncomfortable to say . Maybe not. Thoughts? + +Also, when used by itself, covers a lot of ground. As in Was How to ask this question using the new terminology? + +Maybe no one has gone there yet. If that is the case I will come up with something I feel is appropriate. But if someone has figured this out I would like to hear about it. + +Thanks! + +Posted by Angie at Fri Apr 22 21:43:30 2011 +Honestly, I don't see any need to replace SystemV either by upstart or systemd. + +Start your system with bare minimal daemons. As the need may be start / stop others. Simplicity rules not promoting lazy unaware Linux users. + +Posted by spuk at Tue May 10 08:56:46 2011 +I can't say I like systemd so far, but it looks like a decent new process manager.. + +re 'cron.service' X 'crond', systemctl could infer what you're referring to, when the name given is univocally mappable to an existing unit name... usually 'cron' would referr only to 'cron.service', not anything else. + +Also.. I assume the standard GNU utils should be patched without much trouble to act on cgroups (i.e. systemd managed things)? Like making kill and killall work on cgroups the same as systemctl kill etc., no? Would DBUS be required for that? + +Posted by Roman at Tue May 10 20:34:51 2011 +Sorry, but this is not appropriate not for 6"-inch devices, not for evince (ugly formatting). + +Posted by Sankar at Tue May 10 21:29:53 2011 +@Roman: If you use Calibre or some such software you should be able to convert to an e-book format easily. I wanted a PDF with pagebreaks for each chapter so that it is easier to carry around and read from a laptop. + +@Lennart: Thanks. + +Posted by Roland Taylor at Tue May 10 21:46:12 2011 +Sankarasivasubramanian Pasupathilingam - that's a lot of name O.o! + +Posted by Thomas at Wed May 18 14:39:13 2011 +Following a link on LWN I read all linked systemd articles and I must say I'm really impressed with the thought that has been put into it. I still disagree on some minor points, but all in all it seems like a very good move. + +My first thought on systemd: Reimplemented in c? No more shell scripts? Why? WHY? Who cares about 7 seconds longer boot time. + +But now I'm seriously hooked. cgroups, recording daemon status apart from 'scroll back the line buffer', shorter and easier readable config without duplicating trivial stuff all over the place .... and a sane configuration file layout (even providing a mechanism for overriding maintainer startup config and a unique system id not depending on hostname/ip) convinced me this is a Good Move(TM) + + +It's a bit late for a reply to Shane Falco but here it is: Get better admins. Seriously. Learning a new tool that is properly designed and fixes a lot of issues is always a good thing. + +Posted by Thelin at Tue May 24 17:48:09 2011 +I'm just curious what the "Außergewöhnlicher Migrationsdruck" is doing as a heading for systemd? + +Posted by Tshepang Lekhonkhobe at Tue May 24 18:24:01 2011 +You guy, you rock so much. This systemd thing is huuuge marketing for Fedora. Thanks a lot. + +Posted by Arno at Tue May 24 18:45:15 2011 +>probably the biggest distribution release of all time + +You'd think so, would you? From what I can tell, this release involves 10,000+ packages and 2 architectures. I see the Fedora mirrorlist mentions 5 architectures, but there does not appear to be any way to get there from the download page. I don't think this qualifies as a big release. + +You must have missed the 29,000 packages, 9 architectures release that happened just about four months ago... + +Posted by Rahul Sundaram at Tue May 24 19:44:13 2011 +Packages and architecture aren't the only way to count as biggest and anyway, both are wrong claims. + +sudo yum repolist -C +Loaded plugins: presto, refresh-packagekit, security, yum-fast-downloader +.... + +repolist: 27,474 + +Posted by Simon at Tue May 24 23:53:13 2011 +All that documentation is good and useful - though it would be nice if you could write a short piece on how to set up systemd from scratch. + +For example, if I take an LFS system without any existing sysvinit, and install systemd from source, what configuration do I have to do to get 1) a logger daemon running, 2) local filesystems mounted, 3) networking running (assuming NetworkManager), and 4) a working command prompt? + +I'm speaking from the perspective of an LFS user, but I imagine some basic "migration" document would be useful for the regular distros too. + +Posted by Christian Kellner at Wed May 25 00:51:10 2011 +I quote Alex: "Sweeeet!" ;-) + +Posted by Aissen at Wed May 25 10:44:26 2011 +You didn't mention directly the awesome systemd-analyze tool (maybe because it lacks a manpage?), although it's mentioned in the #7 blog story. + +Posted by Freissen at Wed Jun 15 12:10:54 2011 +"Außergewöhnlicher Migrationsdruck" ?! + +Posted by Rob at Sun Jul 10 02:37:24 2011 +what surprises me is that you haven't written a howto on porting sysVinit scripts to systemd units and linked to it in the "documentation" section. + +That would seem of paramount importance for helping people to move to systemd. + +In particular, it would be extremely useful to have a document that show exactly how to port several sysVinit scripts, and not just the damned trivial cases -- something complex too. + +cheers, + +Rob + +Posted by Lennart at Mon Jul 11 15:22:01 2011 +Rob, this would surprise me too if it was true. But... I actually wrote such a blog article: + +http://0pointer.de/blog/projects/systemd-for-admins-3.html + +Posted by Thomas at Tue Jul 12 17:53:49 2011 +Could you please have the man pages on a location that google can read? + +Either by moving it or removing /public/ from http://0pointer.de/robots.txt ? + +It sucks not to be able to search them :) + +Posted by dashesy at Wed Jul 13 00:33:20 2011 +This is the best article I have found about systemd (what it stands for, not what it does). +I am using systemd in my F15 desktop with great pleasure (my embedded system kernel does not have functional cgroups). +From technical point of view it is perfect, thanks for the great job, to be frank the only thing I do not like about systemd is the excessive number of mount points it adds to mtab. This is also the argument I have against excessive kernel command line options for example dracut adds to grub, it is not visually beautiful. + +Posted by anonymous at Thu Jul 14 06:26:29 2011 +bit confuse with: + +To test it we copy it to /etc/systemd/system/ + +in my f15 installation can not find *.service file but i find in /lib/systemd/system + +where .service file actually resident? + +Posted by Rob at Sat Jul 23 15:30:02 2011 +indeed, you have in fact written an answer to this question. I don't know how I could possibly have missed it, considering the obvious link name ("systemd for admins #3"). That kind of linking is what Vincen Flanders on webpagesthatsuck.com calls "mystery meat navigation": + + http://www.webpagesthatsuck.com/mysterymeatnavigation.html + +Unfortunately, this is exactly the trivial example that I was hoping not to find. + +How about taking a complex example and porting it? + +I have a candidate for you -- the hylafax script. You can find it here (with a few changes by me): + + http://www.spielwiese.de/rob/hylafax-init.sh + +It does the following: +- tests for the existence of a file as proof that hylafax is properly configured +- if the file exists, it slurps it in using "." +- sets some shell vars to default values +- figures out how to call "echo" +- depending on the value of various variables possibly start several processes: "faxq", "hfaxd", and in my case also start a process required for faxing via ISDN, "c3faxrecv". + +Things I don't understand: +- how to start multiple processes (do I need multiple systemd services?) +- how to start processes according to application configuration rules (i.e., according to settings in some config file). + +Obviously, systemd is a paradigm-change from sysVinit. What I miss is a document describing how all the sysVinit methods/techniques used get translated into systemd. + +thanks, + +Rob + +Posted by Andreas at Thu Aug 4 15:34:23 2011 +@Rob: See + +http://fedoraproject.org/wiki/SysVinit_to_Systemd_Cheatsheet + +Regards, +Andreas + +Posted by David at Fri Aug 26 11:28:32 2011 +Hello, + +http://i.imgur.com/usftZ.png + +Posted by BMG at Fri Oct 28 11:14:52 2011 +Hello, + +I am using systemd 35 on x86 architecture, and I have a problem with the reboot and halt services, don't work very well, only after I type reboot twice the system is rebooted. I think that is a process or something hang and it takes to long to restart + +reboot.service +[Unit] +Description=Reboot +DefaultDependencies=no +Requires=shutdown.target umount.target final.target +After=shutdown.target umount.target final.target + +[Service] +Type=oneshot +ExecStart=//bin/systemctl --force reboot + +halt.service +[Unit] +Description=Halt +DefaultDependencies=no +Requires=shutdown.target umount.target final.target +After=shutdown.target umount.target final.target + +[Service] +Type=oneshot +ExecStart=//bin/systemctl --force halt + +The option with --force works but I need reboot and halt to work properly. + +Can anybody help me please? + +P.S. systemctl restart reboot.service + systemctl restart halt.service + +are rebooting and halting the system but I need reboot and halt. + +Thank you + +Posted by BMG at Fri Oct 28 11:27:08 2011 +On serial I have the following output either : + +[ 533.721589] <29>systemd[1]: connman.service: main process exited, code=exited, status=1 +[ 533.817543] <29>systemd[1]: Unit connman.service entered failed state. +[ 549.440883] <30>systemd[1]: Reloading. +[ 550.849766] <29>systemd[1]: dev.mount mount process exited, code=exited status=1 +[ 550.938737] <27>systemd[1]: Socket service systemd-kmsg-syslogd.service already active, refusing. +[ 551.045294] <29>systemd[1]: Job systemd-stdout-syslog-bridge.service/start failed with result 'dependency'. + +[ 630.301180] Process mthemedaemon (pid: 113, ti=f45d2000 task=f4914170 task.ti=f45d2000) +[ 630.301186] +[ 630.301191] Call Trace: +Sending SIGKILL to remaining processes... +Unmounting file systems. +Could not unmount /dev: Device or resource busy + +Posted by Transmitters at Thu Nov 10 03:40:06 2011 +Thank you for your analysis and sharing, from your article I learned more. + +Posted by Pawel at Mon Dec 12 16:18:43 2011 +"I don't bite. If KDE hackers want to be involved, then involve yourself, don't always wait for us to ping you." + +It seems you represent different standards towards gnome. There's also planet KDE if you're not aware. + +Posted by dave at Mon Feb 27 23:49:34 2012 +Hi + +I was having problems with my shutdown services not running as expected. I traced the issue to the use of the '--force' flag in the halt, poweroff and reboot services. Here's more about the issue and how to resolve: - + +http://www.practicalclouds.com/content/blog/1/dave-mccormick/2012-02-27/why-do-my-systemd-shutdown-scripts-not-run + +regards + + +Dave + +Posted by Anonymous at Sat Apr 21 01:22:23 2012 +"systemctl can now execute all its operations remotely too (-H switch)." + +$ systemctl -H root@localhost +Failed to get D-Bus connection: In D-Bus address, character '@' should have been escaped + +Posted by Lennart at Sat Apr 21 02:35:44 2012 +Anonymous: you need a newer D-Bus for that. + +Posted by ? at Sat Apr 21 12:22:28 2012 +Why did you change the license to a weaker one? You should have changed it to AGPL3+ + +Posted by Anonymous at Sat Apr 21 15:03:20 2012 +@Lennart: I have to re-check the machine in question, but I'm pretty sure this was D-Bus 1.5.12 (Debian unstable) + +Posted by Jamie at Sat Apr 21 15:24:31 2012 +Any chance of writing in a "chkconfig --list" equivalent? + +Posted by Lennart at Sat Apr 21 15:39:23 2012 +Jamie: see item 42. + +Posted by Lennart at Sat Apr 21 15:52:56 2012 +?: we include libraries non-GPL code should be able to link to, and we wanted the same license for all of systemd, hence we chose to change the license to LGPL for all of it. + +Posted by JR at Sat Apr 21 16:58:19 2012 +Keep up the good work! + +Incidentally, any news about whether Debian will adopt systemd or not? Now with their kFreeBSD spin in the mix I imagine they're not very keen to move to a linux-only init. Maintenance reasons and all that. + +I realize it's packaged in the repos, but I'm not sure if it ever left the "highly experimental here be dragons donut use outside of VMs" stage? + +Posted by Lennart at Sun Apr 22 01:32:07 2012 +JR: I can't speak for Debian. But knowing Debian if they have the choice between A, B or C they usually choose A, B and C. More specifically that means, that they include sysvinit, Upstart and systemd in their distribution (they already do that for quite a while). Also, they are very conservative, and avoid decisions as much as they can, so I am pretty sure they'll stick with sysvinit for a long long time to come. Then, there's quite a big influence from Canonical in Debian, and Canonical is confused enough to continue pushing Upstart, so I doubt the decision for systemd would be an easy and obvious one for them. And as long as there is contention about the topic no decision will take place. + +Debian does include systemd in their testing/unstable distributions now, but if you run that you still end up with a tonload of glue scripts that are quite frankly quite unnecessary on a systemd system. So even if you run systemd on Debian you get an experience that is much more like sysvinit than the real thing. (the real thing is a system that boots up with zero shell scripts) + +Short: if you want a modern system, then there are better choices than Debian (or Gentoo, which is in a very similar spot, sharing the inability to make choices and strong conservatism). Try Fedora, or OpenSUSE or Mageia, or similar. + +Posted by Mark at Sun Apr 22 12:34:13 2012 +In the interests of clarity, I would like to propose some changes to your status update. + +"We introduced /run and made it a hard dependency of systemd. This directory is now widely accepted and implemented on all relevant [sic] Linux distributions." + +This should read: "If any distribution does not support systemd requirements, then it is to be deemed irrelevant and not worthy of consideration." + +Also, from the comments; "if you want a modern system, then there are better choices than Debian" + +This should read: "Any distribution which fails to embrace systemd openly and fully must be considered a legacy operating system." + +And while we are on the subject of inferior distributions; "Also, [Debian] are very conservative, and avoid decisions as much as they can, so I am pretty sure they'll stick with sysvinit for a long long time to come." + +This should read: "Any decision not to adopt systemd is not actually a decision at all, and must either be attributed to incompetence, indecisiveness or irrational conservatism." + +I'm sure this comment will be kindly moderated out of existence, but at least be frank with your readers in future - otherwise, how are we going to know which distributions to avoid like the plague, and which ones to embrace? + +Posted by Anonymous at Sun Apr 22 15:19:55 2012 +@Mark: if you want to troll, at least try harder! + +/run transition: name a distribution that you deem relevant that hasn't switched to /run. + +"Any decision not to adopt systemd": there is no decision not to adopt systemd in Debian (beware double negatives). Just because you said so on the debian-devel mailing list, it's not a Debian decision. + +Posted by Simon at Mon Apr 23 00:11:26 2012 +Agree, that's just trolling. It's not a criticism of Debian to say they're conservative - it's just a statement of fact. They don't jump on the latest fad - they just support it as an option, and wait to see what other do. It's their main selling point as a distro, really... + +Posted by Josh at Mon Apr 23 04:12:16 2012 +@Lennart: I'm currently working with the maintainer of sysvinit in Debian to make it possible to run systemd without invoking any of the legacy init script infrastructure. As of right now, the Debian systemd package already masks almost all of the legacy init scripts in the "initscripts" package; with the next version of initscripts it'll become possible for systemd to mask off the rest and stop depending on initscripts entirely. + +That just leaves the init scripts shipped in individual daemon packages, and many such packages have already started to ship systemd services that mask their legacy init scripts. + +So, while the situation hasn't gotten much better for packagers (who still have to ship legacy init scripts in their daemon packages), from a user perspective the prospect of a shell-free boot has gotten much closer. + +Posted by Matthew Miller at Mon Apr 23 15:24:18 2012 +I'm really glad to see this post, and I've been glad to see all of the effort put into making this a comprehensive system. Some of my earlier concerns were due to developer comments about how the whole project was basically trivial and kind of a weekend's hack-work. Clearly, that's not the case, but I'm glad to see that my fears haven't borne out. The rough edges are getting the effort they need to really be smooth and shiny. + +Posted by Abe at Mon Apr 23 16:40:42 2012 +Out of curiosity - what are the chances for patches implementing user sessions with systemd to land in upstream? +The idea to use systemd to start not just system-wide but also user-specific services seems pretty natural to me. Any chance to see systemd replacing gdm, kdm and whatever in forseeable future? + +Posted by nate at Mon Apr 23 18:22:36 2012 +Is there any thought to supporting a network heartbeat mechanism with Systemd ? + +It seems to me that there is a significant overlap between a init system like systemd and a clustering solution like Linux-HA. + +All that would be needed is the ability to integrate a existing heartbeat mechanism and then have systemd be able to mount/check file systems, bring up a IP address on the desired interface, and then start up the corresponding services. + +Posted by Adam Williamson at Mon Apr 23 18:49:19 2012 +Yes, yes, yes, but does it make tea yet? + +Posted by Antony Williams at Mon Apr 23 18:49:34 2012 +@Lennart +I think you're missing the point with your 'debian is not modern' quote. +The purpose of debian isn't to be modern, it's to be stable. +Debian is to Red Hat as Ubuntu is to Fedora. +Fedora and Ubuntu take risks and switch to new technology +Debian/Rhel switch when it's safe + +Posted by nate at Mon Apr 23 19:21:48 2012 +@Antony Williams + +Not really. Ubuntu is created by taking a snapshot of Debian unstable and making their Ubuntu-specific desktop changes on top of it. + +The closest you can get for Fedora vs Redhat on Debian is 'Debian unstable' vs 'Debian stable'. + +The biggest problem is that Debian can't make a decision. They just choose to try to support everything so you end up with 3 different INIT systems. + +You have upstart from Ubuntu that Debian tries to support. Then you have SystemD that Debian tries to support. And then you have people fighting to keep the old script-based 'SystemV' style init because that is the only one that can run on the non-Linux Debian versions. (GNU/Hurd and GNU/kfreebsd) + +So instead of putting the work into transition to one of newer systems or sticking with the old they just require package maintainers to do 600% more work for 'service up' and 'service down' functionality then any other operating system. + +It's just quite insane and a very bad situation for Debian right now, init-wise. + +Posted by Antony Williams at Mon Apr 23 20:38:54 2012 +@nate +please double check your facts. +While Ubuntu LTS is based on Debian testing, +but ALL other Ubuntu releases are based on Debian Unstable. + +Secondly, Ubuntu vs Debian is a much better comparison than Debian stable vs unstable, because both unstable and testing are ROLLING releases. +You can't compare them. + +Posted by Anonymous at Tue Apr 24 11:21:33 2012 +Hot off the press, Ubuntu will stick with upstart: + +http://www.markshuttleworth.com/archives/1121 + +Rumours and allegations of a move from Upstart to SystemD are unfounded: Upstart has a huge battery of tests, the competition has virtually none. + +Quality comes from focus and clarity of purpose, it comes from careful design and rigorous practices. After a review by the Ubuntu Foundations team our course is clear: we’re committed to Upstart, it’s the better choice for a modern init, innit. For our future on cloud and client, Upstart is crisp, clean and correct. + +Posted by nate at Tue Apr 24 14:56:07 2012 +@Antony Williams + +How can a person engage in a discussion when you don't even read what was written? + +> please double check your facts. + +Yay. Insult. + +What I said: +>> Ubuntu is created by taking a snapshot of Debian unstable and making their Ubuntu-specific desktop changes on top of it. + +What you said: +> but ALL other Ubuntu releases are based on Debian Unstable. + + +wut? + +> Secondly, Ubuntu vs Debian is a much better comparison than Debian stable vs unstable, because both unstable and testing are ROLLING releases. + +And your point is what? +Nothing. That's right. + +> You can't compare them. + +I absolutely can. + +See? Comparison: + +1) Purpose: +* Fedora is created for the purpose of testing out technologies for Redhat. +* Debian unstable is created for the purpose of testing out technologies for Debian stable. +* Ubuntu is NOT created for the purpose of testing out technologies for Debian stable. Ubuntu is created for the purpose of making money by having a 'stable' and 'enterprise' ready OS. + +2) Usage: +* Fedora is used by enthusiasts as a cutting edge Desktop that is RPM based and similar to Redhat, which many use professionally. Occasionally used by people that need something newer then latest Redhat release. +* Debian Unstable is used by enthusiasts as a cutting Desktop that is Deb based and similar to Debian, which many use professionally. Occasionally used by people that need something newer then latest Debian stable release. + + +etc etc. + +See? I was actually able to compare them quite easily. + +Posted by Ole Laursen at Tue Apr 24 16:07:51 2012 +Interesting message from Mark Shuttleworth. Unfortunately, Upstart still appears not be ready for server daemons: + +https://bugs.launchpad.net/upstart/+bug/406397 + +It's easy to be "crisp, clean and correct" if you're not actually solving the problem. ;-) + +Posted by Jon at Mon Apr 30 16:28:02 2012 +Debian have not made a decision yet, but it would be a decision for two releases time (there is not enough time to implement systemd for the next release even if there was unanimous agreement to do so). Thus there's no reason to rush the decision. The ongoing flamewars on -devel are a red herring, really: Debian is a do-ocracy and the discussion is a diversion from the fact that the people who do the work will make the decision (and the vast majority of participants in the flamewars will do no work, just argue) + +Posted by oiaohm at Wed May 2 00:24:24 2012 +Lennart "Debian does include systemd in their testing/unstable distributions now, but if you run that you still end up with a tonload of glue scripts that are quite frankly quite unnecessary on a systemd system." + +I can understand this. Remember debian testing is not running latest versions you have done Lennart. Some of those scripts address faults in older versions of systemd. So faults you have fixed debian don't have fixed yet. Debian is a little pragmatic particularly with init systems. + +Posted by Bob Gustafson at Fri May 4 16:30:02 2012 +I'm wondering if there is a list of error codes and their explanation. I have googled and clicked, but haven't yet seen a list. + +I have some vncserver problems and see code 125 and code 29 in the logs. + +Posted by Rosalba at Thu May 31 13:04:46 2012 +Just so you know, I've been using Gentoo since 2003, and I really love it. It's on all my maeihncs (laptop, desktop, media center, servers, you name it), and I don't think I will ever change unless something cooler appears in the future. But that doesn't seem probable in the near future.I also have been using Linux/Unix since 1996. I have used (and programmed in) Solaris, HP/UX, AIX, and BSD.And I also use the GNOME 3 overlay, together with the systemd overlay. I also love them, BTW. And while I don't use the GNOME 3 overlay in all my boxes (I don't need GNOME 3 in my media center nor in my servers), I do use systemd in all of them.I really respect your opinion as a Gentoo Developer (and thanks, BTW, for helping to make my favorite distribution even better), but I respectfully disagree with what you are saying here. Not only because I think systemd is great (and pulseaudio and avahi too, BTW), but also because I think you are misinterpreting some things.If systemd is integrated into GNOME, it still will be able to compile and be used in all the other Unixes, and it will not be mandatory*. But if it's available, it will give us some really cool features.GNOME will implement some interfaces (via dbus, probably): if the underlying OS can fulfill them all, great: If not, it will be marked so and not use those features. That's all.With the use of USE flags, people in Gentoo who (for whatever reason) doesn't like systemd would not have to use it. And people in BSD or Solaris, who actually can't* use it, would not need to worry about. They will lose some features, that's all.So nobody is shovelling stuff in other people's mouths. This is Open Source: people write what they think is cool code, and we get to use it if so we desire it. If not, we can always keep using Linux 1.0, and GNOME 1.2. That's our choice. Or even better: we can take the code and change so we don't need to use things that (for whatever reason) we don't want to use.I really like the way the future is looking: GNOME 3 and systemd and the new kernel features and everything managed in my favorite distribution looks awesome, and with the things coming in the future it would look even awesomer.Just my 2 ${CURRENCY/100}. And again, thanks for helping to mantain my favorite distribution. + +Posted by Brian at Thu Aug 9 21:02:16 2012 +"In fact, you can easily write a daemon with this that can run, and exit (or crash), and run again and exit again (and so on), and all of that without the clients noticing or loosing any request." + +This made me think of erlang. I assume systemd is written in C++, but just curious if you considered other languages? + +Not that I think it's a good idea to make the most core service of a linux system dependent on the erlang compiler... but an erlang mock-up would be interesting as a prototyping tool for systemd. + +Posted by Bassu at Wed Oct 31 00:30:47 2012 +Lennart, I just wanted to say a personal thank you for all of your awesomeness and the hard work you have put in to this! + +$ sudo systemd-analyze +Startup finished in 2751ms (kernel) + 1888ms (userspace) = 4639ms + +Posted by Ken Stailey at Sat Nov 10 20:45:39 2012 +The ps "ax" and "-e" options both enable displaying "everything", i.e. all processes. It is not necessary to use both. + +Compare: +ps xawf -eo pid,user,cgroup,args +ps wf -eo pid,user,cgroup,args + +Leave a Comment: + +Your Name: + + +Your E-mail (optional): + + +Comment: + + +As a protection against comment spam, please type the following number into the field on the right: +Secret Number Image + +Please note that this is neither a support forum nor a bug tracker! Support questions or bug reports posted here will be ignored and not responded to! + +It should be obvious but in case it isn't: the opinions reflected here are my own. They are not the views of my employer, or Ronald McDonald, or anyone else. + +Please note that I take the liberty to delete any comments posted here that I deem inappropriate, off-topic, or insulting. And I excercise this liberty quite agressively. So yes, if you comment here, I might censor you. If you don't want to be censored you are welcome to comment on your own blog instead. +Lennart's Blog | Lennart's Homepage | Lennart's Photos | Impressum/Imprint +Lennart Poettering +Syndicated on Planet GNOME, Planet Fedora, planet.freedesktop.org, Planet Debian Upstream. feed RSS 0.91, RSS 2.0 +Archives: 2005, 2006, 2007, 2008, 2009, 2010, 2011 + +Valid XHTML 1.0 Strict! Valid CSS! diff --git a/Zim/Utils/systemd/Booting_up-Tools_and_tips_for_systemd_a_Linux_init_tool.txt b/Zim/Utils/systemd/Booting_up-Tools_and_tips_for_systemd_a_Linux_init_tool.txt new file mode 100644 index 0000000..ac1fdd7 --- /dev/null +++ b/Zim/Utils/systemd/Booting_up-Tools_and_tips_for_systemd_a_Linux_init_tool.txt @@ -0,0 +1,167 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-11-20T20:10:35+08:00 + +====== Booting up-Tools and tips for systemd a Linux init tool ====== +Created Tuesday 20 November 2012 +http://www.h-online.com/open/features/Booting-up-Tools-and-tips-for-systemd-1570630.html + + 15 May 2012, 10:00 + +Booting up: Tools and tips for systemd, a Linux init tool +by Lennart Poettering, Kay Sievers, Thorsten Leemhuis + +These days, Fedora, openSUSE, Mandriva and a few other distributions use the **systemd init tool** for system starts. It includes its own tools for configuration and diagnosis, and the tricks it needs when the system doesn't start are different from sysvinit's. + +The following article about systemd was first published in the German magazine c't 13/11 and has been updated in various places before appearing in The H Open. An article from the same issue of c't on systemd's ideas, approach and functionality was also recently published on The H Open. + +The systemd init tool, just over two years old, is already the default in some distributions; a few more include it as an alternative to **upstart** and the outdated **sysvinit**. Thanks to __compatibility features__, some of the commands and tricks familiar from distributions with sysvinit and upstart work with systemd too. To really take advantage of the much newer init system's capabilities, however, administrators should get to know systemd's tools and parameters as well. + +The main tool for interacting with systemd is **systemctl**, a command-line program. The tool requires root privileges to make changes to the configuration or to restart background services, but even non-root users can use some diagnostic requests. If you launch the program without any parameters, you will see a list of the "units" that execute tasks when the system is started, including mounting and checking disks, starting background services and configuring hardware. + +For a standard Fedora 15 installation, systemctl lists around 160 active units, divided into ten types. __Service units__ are one of the most important types of units as they take care of background services, which a sysvinit distribution typically starts using init scripts. __Mount and automount units__ mount filesystems. __Socket units__ create sockets and, as soon as a socket is accessed, indirectly start another unit using dependencies. You can use a parameter to tell systemctl to only list certain types of unit, for example all service units: + + **systemctl --type=service** + +Systemctl automatically forwards its task to less; you can use the arrow keys to scroll not just up and down but also to the right, since more information is occasionally "hidden" there. + +To boot a system, systemd uses units, which are divided into types that systemctl can list separately. The first column in the list tells you the name of the unit; the second column, whether systemd was able to **load the unit definition**. The third column says whether the unit is active. If you include the -a parameter, the program only displays **inactive units** – that is, units that are installed but not used during boot-up; the same applies to unit files that the init system wasn't able to load, most likely because of an error in the unit file. + +The fourth column gives the current status: "exited" means that the process **completed without any errors**. This is the case for, say, services that don't continue in the background after they've been launched – for example, the service unit that, for compatibility reasons, executes the /etc/rc.d/rc.local file, familiar from sysvinit, at system start. "Running" is for services that are running in the background, such as cron, dbus, sshd and udev. + +The sixth column describes the unit. Units labelled "LSB" or "SYSV" have been automatically created by systemd to take care of traditional init scripts. + +Services that could not be started or that crashed later are marked as "failed" in the fifth column, in red, if the console can display colours. You can find out when the crash happened and which error code the program provided when it ended using commands such as: + + systemctl status ntpd.service + +Systemctl's status command provides a time and error code for crashed services Zoom For a newly installed Fedora 15, systemctl lists about 60 service units, including the login processes for the text consoles (agetty), since – unlike sysvinit – systemd uses service units to manage these like a normal background service. + +Next: Unit files and targets + +===== Working with units ===== + +The system configuration files for creating units are in **/lib/systemd/system/**, but a file with the same name in **/etc/systemd/system** will take priority. + +Unit definitions are usually much shorter than the classic sysvinit scripts. For example, a unit file for the service for synchronising network time via NTP is just a few lines long: + +[Unit] +Description=Network Time Service + +[Service] +ExecStart=/usr/bin/ntpd -n -u ntp:ntp -g + +[Install] +WantedBy=multi-user.target + +All unit files include a section, starting with __[Unit]__, with general settings and a short description. The __[Service]__ section has service-specific tasks – for NTP, just the command-line to launch the service. If a specific command is required to end the program, you can set it using **ExecStop=**. This step is unnecessary for the NTP daemon, since, following Unix tradition, it can be ended with a simple "SIGTERM" signal, which tells systemd to end if no other command has been specified. + +The **[Install]** section contains instructions for systemd to interpret during (de-)installation; the entry in the NTP example means that the time should be synchronised when the "Multi-User" target is activated. + +===== Targets ===== +**The concept of "targets" units is similar to that of sysvinit's runlevels;** indeed, for compatibility, systemd even understands runlevel names for equivalent targets. In Fedora 15, you can therefore enter the familiar single as a parameter in the kernel's boot-loader; systemd then activates **rescue.target**, which provides a minimal interface comparable to single-user mode. + +Along the same lines, 3 can be used to activate a multi-user mode – that is, to fully start the system without using the graphical login interface. In systemd, this mode is represented by the **multi-user.target uni**t, which can be set as the standard with this link: + +ln -sf /lib/systemd/system/multi-user.target /etc/systemd/system/default.target + +If, at a later point, you do want the graphical login interface to be a standard part of boot-up, you can set **graphical.target** as the standard target in the same way; this is the equivalent of runlevel 5 in older versions of Fedora and openSUSE. As an alternative to the old runlevels, you can also give the kernel the names of the target units to be started: + + __systemd.unit=multi-user.target__ + +**To activate a different target unit during operation**, you can use systemctl's isolate command, which requires root privileges: + +systemctl isolate rescue.target + +The change to the rescue target is interesting for administration tasks, since systemd now ends all user logins and background services so that only system services run, such as the one monitoring logical volumes (lvm2-monitor). Sometimes, even these services need to be ended for rebuilds; you can then use **emergency.target** to go to emergency mode, where only the process for the input prompt runs, along with the kernel threads. + +===== Wants and needs ===== +Systemctl's show command delivers some internal information on running units and the tasks they execute, as well as information about which units systemd calls on to enable the multi-user target: + + **systemctl show -p Wants multi-user.target** + +Other targets can also be in the output, such as **basic.target** in multi-user.target. In turn, the former depends on **sysinit.target**, which requires **local-fs.target**. These three targets take care of the system's basic configuration, including mounting filesystems and starting udev. To specify dependency on the basic target, the unit configuration file multi-user.target contains the following statements: + +Requires=basic.target +After=basic.target + +With After in addition to Requires, systemd knows that it must __not only enable the target but also wait for that target__ to completely start. + +__Wants is a weaker alternative to Requires__. Systemd enables these units as well, but continues starting the system even if one of them doesn't start. This kind of dependency can also be specified with links to unit files in directories consisting of the unit file's path and name and a .wants in order to determine which units are retrieved when a target is accessed.__(也就是说,这种依赖关系可以通过在unit name +.wants目录中建立被依赖unit的link方式创建。)__ You can use ls or systemctl's show command: + +ls /*/systemd/system/multi-user.target.wants/ +systemctl show -p Wants multi-user.target + +===== Shutting down ===== + +If you want to deactivate the NTPD service unit so the system time is not synchronised via NTP upon boot-up, you can use the following style of command: + + systemctl disable ntpd.service + +Here, systemctl is simply removing the link to the service unit file **in the Wants directories**; it creates a link when a service is activated with **enable**. Both steps can also be done manually in order to (de)activate units without using systemctl. + +__也就是说,systemctl disable ntpd.service会将该unit file 的链接从/etc/systemd/system/multi-user.targets.wants目录中删除。__ +__systemctl enable ntpd.service会将该unit file的链接建立在上面的目录中。__ + +If a service is started by a traditional init script rather than a unit, __systemctl forwards the activation request to the chkconfig program__. With Fedora 15, for example, this can occur when you install Apache and activate it using systemctl. In turn, chkconfig can also delegate tasks to systemctl in Fedora 15 – but only some, so you're better off not using it at all or only with caution. + +The (de)activation of a service takes effect the next time it is started or when the system is shut down; the following command starts a service immediately: + + systemctl start ntpd.service + +For sysvinit distributions, the equivalent to this command is **service ntpd start**. A systemctl command with the **stop** parameter instead of start ends the service. With the **status** command, systemctl delivers information about the unit, including its current status and the name of the file that specifies it. The program also says whether the service is currently running and, if so, for how long it has been running as well as which processes belong to it, with the main process explicitly displayed. + +Group affiliation can be used to determine which service a process belongs to. It is quite easy to find out which service started which processes by looking at the control groups created by systemd. The command __systemd-cgls__ displays the cgroup hierarchy created by systemd; alternatively, ps shows group affiliation: + +ps xaw -eo pid,args,cgroup + +Next: Fixing problems + +===== Stopped at the starting line ===== + +If there are problems during boot-up that systemd seems to be directly or indirectly involved in, start the kernel with the following parameters: + + **systemd.log_target=kmsg systemd.log_level=debug** + +Systemd recognises the parameters and provides extensive troubleshooting information on the console. At the same time, the information is saved for later analysis in the kernel notification buffer created by dmesg. + +The "systemd-cgls" tool can display the control groups and the processes belonging to them. + +The command line programs __poweroff, halt and reboot are part of systemd,__ but the system can also be shut down or restarted using systemctl commands, which are the same. The system can also be restarted this way: + + systemctl kexec + +After all services have been stopped, systemd tells the running kernel to directly start a previously configured Linux kernel, which allows for fast restarts, since it bypasses BIOS and boot-loader. If no kexec kernel is configured, systemd executes a normal restart. + +===== Deep down ===== +For standard administration tasks, you will usually only come into contact with **service and target units**; the others are primarily important for deeper systemd functions or, during boot-up, take care of everything that __distribution-specific scripts__ took care of in sysvinit and upstart distributions. These tasks include mounting the filesystems specified in /etc/fstab, activating the swap space and occasionally cleaning up directories for temporary files. + +For some of these tasks, systemd has an **automount** function that can create pseudo mount points for filesystems configured in /etc/fstab; they are not really mounted until they are first accessed. Adding **comment=systemd.automount** in /etc/fstab changes any mount point into an automount point, which can speed up the boot-up process and be interesting for access to network shares, since the WLAN connection is not created until the user uses NetworkManager. + +===== Looking for answers ===== +Systemctl can be used to tell systemd to **send a signal without knowing the service's process ID**. For example, the following command puts rsyslogd in debug mode, which is ended when you enter the command a second time. + +**systemctl kill** --signal=USR1 rsyslogd.service + +If you don't specify which signal to send, systemctl sends a normal term signal, which ends all processes belonging to a service. + +Systemd includes a program that __visualises the boot processes__; the dark red areas signal services' start phases .The command systemd-analyze tells you how long the system took to boot and how much of that time was due to the kernel, initramfs and the systemd-controlled configuring of the userland. If you want to look more closely into the latter factor, use **systemd-analyze blame** to get individual units' start times. For more detailed information on the boot process, the program can create an SVG file that visualises the units' starts: + +__systemd-analyze plot > plot.svg__ + +Sometimes, this can be used to track down units that excessively prolong boot-up. The seventh part of the "Systemd for Administrators" blog series has a few tips on correctly interpreting these results. The series, with twelve sections at the moment, also includes many other tips and notes on using systemd: + +* Verifying Bootup +* Which Service Owns Which Processes? +* How Do I Convert A SysV Init Script Into A systemd Service File? +* Killing Services +* The Three Levels of "Off" +* Changing Roots +* The Blame Game +* The New Configuration Files +* On /etc/sysconfig and /etc/default +* Instantiated Services +* Converting inetd Services +* Securing Your Services + +Lennart Poettering's homepage also has many other articles with more background information on the init system. In addition, Poettering recently listed some of the changes made to systemd in the last year and a half in his third "Systemd Status Update". diff --git a/Zim/Utils/systemd/systemd_for_Administrators,_Part_1.txt b/Zim/Utils/systemd/systemd_for_Administrators,_Part_1.txt new file mode 100644 index 0000000..a484b7b --- /dev/null +++ b/Zim/Utils/systemd/systemd_for_Administrators,_Part_1.txt @@ -0,0 +1,273 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-11-21T19:33:39+08:00 + +====== systemd for Administrators, Part 1 ====== +Created Wednesday 21 November 2012 +http://www.0pointer.de/blog/projects/systemd-for-admins-1.html + +レナート Wunschkonzert, Ponyhof und Abenteuerspielplatz ﻟﻴﻨﺎﺭﺕ + +Mon, 23 Aug 2010 +systemd for Administrators, Part 1 + +As many of you know, systemd is the new Fedora init system, starting with F14, and it is also on its way to being adopted in a number of other distributions as well (for example, OpenSUSE). For administrators systemd provides a variety of new features and changes and enhances the administrative process substantially. This blog story is the first part of a series of articles I plan to post roughly every week for the next months. In every post I will try to explain one new feature of systemd. Many of these features are small and simple, so these stories should be interesting to a broader audience. However, from time to time we'll dive a little bit deeper into the great new features systemd provides you with. + +===== Verifying Bootup ===== +Traditionally, when booting up a Linux system, you see a lot of little messages passing by on your screen. As we work on speeding up and parallelizing the boot process these messages are becoming visible for a shorter and shorter time only and be less and less readable -- if they are shown at all, given we use __graphical boot splash__ technology like **Plymouth** these days. Nonetheless the information of the boot screens was and still is very relevant, because it shows you for each service that is being started as part of bootup, wether it managed to start up successfully or failed (with those green or red [ OK ] or [ FAILED ] indicators). To improve the situation for machines that boot up fast and parallelized and to make this information more nicely available during runtime, we added a feature to systemd that tracks and remembers for each service whether it started up successfully, whether it exited with a non-zero exit code, whether it timed out, or whether it terminated abnormally (by segfaulting or similar), both during start-up and runtime. By simply typing systemctl in your shell you can query the state of all services, both **systemd native and SysV/LSB services**: + +[root@lambda] ~# systemctl +UNIT LOAD ACTIVE SUB JOB DESCRIPTION +dev-hugepages.automount loaded active running Huge Pages File System Automount Point +dev-mqueue.automount loaded active running POSIX Message Queue File System Automount Point +proc-sys-fs-binfmt_misc.automount loaded active waiting Arbitrary Executable File Formats File System Automount Point +sys-kernel-debug.automount loaded active waiting Debug File System Automount Point +sys-kernel-security.automount loaded active waiting Security File System Automount Point +sys-devices-pc...0000:02:00.0-net-eth0.device loaded active plugged 82573L Gigabit Ethernet Controller +[...] +sys-devices-virtual-tty-tty9.device loaded active plugged /sys/devices/virtual/tty/tty9 +-.mount loaded active mounted / +boot.mount loaded active mounted /boot +dev-hugepages.mount loaded active mounted Huge Pages File System +dev-mqueue.mount loaded active mounted POSIX Message Queue File System +home.mount loaded active mounted /home +proc-sys-fs-binfmt_misc.mount loaded active mounted Arbitrary Executable File Formats File System +abrtd.service loaded active running ABRT Automated Bug Reporting Tool +accounts-daemon.service loaded active running Accounts Service +acpid.service loaded active running ACPI Event Daemon +atd.service loaded active running Execution Queue Daemon +auditd.service loaded active running Security Auditing Service +avahi-daemon.service loaded active running Avahi mDNS/DNS-SD Stack +bluetooth.service loaded active running Bluetooth Manager +console-kit-daemon.service loaded active running Console Manager +cpuspeed.service loaded active exited LSB: processor frequency scaling support +crond.service loaded active running Command Scheduler +cups.service loaded active running CUPS Printing Service +dbus.service loaded active running D-Bus System Message Bus +getty@tty2.service loaded active running Getty on tty2 +getty@tty3.service loaded active running Getty on tty3 +getty@tty4.service loaded active running Getty on tty4 +getty@tty5.service loaded active running Getty on tty5 +getty@tty6.service loaded active running Getty on tty6 +haldaemon.service loaded active running Hardware Manager +hdapsd@sda.service loaded active running sda shock protection daemon +irqbalance.service loaded active running LSB: start and stop irqbalance daemon +iscsi.service loaded active exited LSB: Starts and stops login and scanning of iSCSI devices. +iscsid.service loaded active exited LSB: Starts and stops login iSCSI daemon. +livesys-late.service loaded active exited LSB: Late init script for live image. +livesys.service loaded active exited LSB: Init script for live image. +lvm2-monitor.service loaded active exited LSB: Monitoring of LVM2 mirrors, snapshots etc. using dmeventd or progress polling +mdmonitor.service loaded active running LSB: Start and stop the MD software RAID monitor +modem-manager.service loaded active running Modem Manager +netfs.service loaded active exited LSB: Mount and unmount network filesystems. +NetworkManager.service loaded active running Network Manager +ntpd.service loaded maintenance maintenance Network Time Service +polkitd.service loaded active running Policy Manager +prefdm.service loaded active running Display Manager +rc-local.service loaded active exited /etc/rc.local Compatibility +rpcbind.service loaded active running RPC Portmapper Service +rsyslog.service loaded active running System Logging Service +rtkit-daemon.service loaded active running RealtimeKit Scheduling Policy Service +sendmail.service loaded active running LSB: start and stop sendmail +sshd@172.31.0.53:22-172.31.0.4:36368.service loaded active running SSH Per-Connection Server +sysinit.service loaded active running System Initialization +systemd-logger.service loaded active running systemd Logging Daemon +udev-post.service loaded active exited LSB: Moves the generated persistent udev rules to /etc/udev/rules.d +udisks.service loaded active running Disk Manager +upowerd.service loaded active running Power Manager +wpa_supplicant.service loaded active running Wi-Fi Security Service +avahi-daemon.socket loaded active listening Avahi mDNS/DNS-SD Stack Activation Socket +cups.socket loaded active listening CUPS Printing Service Sockets +dbus.socket loaded active running dbus.socket +rpcbind.socket loaded active listening RPC Portmapper Socket +sshd.socket loaded active listening sshd.socket +systemd-initctl.socket loaded active listening systemd /dev/initctl Compatibility Socket +systemd-logger.socket loaded active running systemd Logging Socket +systemd-shutdownd.socket loaded active listening systemd Delayed Shutdown Socket +dev-disk-by\x1...x1db22a\x1d870f1adf2732.swap loaded active active /dev/disk/by-uuid/fd626ef7-34a4-4958-b22a-870f1adf2732 +basic.target loaded active active Basic System +bluetooth.target loaded active active Bluetooth +dbus.target loaded active active D-Bus +getty.target loaded active active Login Prompts +graphical.target loaded active active Graphical Interface +local-fs.target loaded active active Local File Systems +multi-user.target loaded active active Multi-User +network.target loaded active active Network +remote-fs.target loaded active active Remote File Systems +sockets.target loaded active active Sockets +swap.target loaded active active Swap +sysinit.target loaded active active System Initialization + +LOAD = Reflects whether the unit definition was properly loaded. +ACTIVE = The high-level unit activation state, i.e. generalization of SUB. +SUB = The low-level unit activation state, values depend on unit type. +JOB = Pending job for the unit. + +221 units listed. Pass --all to see inactive units, too. +[root@lambda] ~# + +(I have shortened the output above a little, and removed a few lines not relevant for this blog post.) + +Look at the **ACTIVE** column, which shows you the high-level state of a service (or in fact of any kind of unit systemd maintains, which can be more than just services, but we'll have a look on this in a later blog posting), whether it is active (i.e. running), inactive (i.e. not running) or in any other state. If you look closely you'll see one item in the list that is marked maintenance and highlighted in red. This informs you about a service that failed to run or otherwise encountered a problem. In this case this is ntpd. Now, let's find out what actually happened to ntpd, with the systemctl status command: + +[root@lambda] ~# **systemctl status ntpd**__.service__ +ntpd.service - Network Time Service + Loaded: loaded (/etc/systemd/system/ntpd.service) + **Active: maintenance** + Main: 953 (code=exited, status=255) + CGroup: name=systemd:/systemd-1/ntpd.service +[root@lambda] ~# + +This shows us that NTP terminated during runtime (when it ran as **PID 953**), and tells us exactly the error condition: the process exited with an exit status of 255. + +In a later systemd version, we plan to hook this up to ABRT, as soon as this enhancement request is fixed. Then, if systemctl status shows you information about a service that crashed it will direct you right-away to the appropriate crash dump in ABRT. + +Summary: use systemctl and systemctl status as modern, more complete replacements for the traditional boot-up status messages of SysV services. systemctl status not only captures in more detail the error condition but also shows runtime errors in addition to start-up errors. + +That's it for this week, make sure to come back next week, for the next posting about systemd for administrators! + +posted at: 10:22 | path: /projects | permanent link to this entry | 40 comments +Posted by bochecha at Mon Aug 23 11:20:20 2010 +Thanks, this serie of article will no doubt be very interesting. :) + +About this one, I don't really get the **LOAD, ACTIVE and SUB** columns. + +As I understood it, the first one indicates whether a unit **configuration** was loaded or not into systemd. But if it wasn't loaded, then it would not appear in the output of systemctl, right? + +You say that ACTIVE is a high-level generalization of SUB. In this case, why is that necessary? Isn't SUB already enough information? + +Maybe if you could give the list of the possible values for each columns then that would help me understand the differences. :) + +Or maybe just point to the appropriate documentation if that is all already documented somewhere, I must admit I haven't had the time yet to look at Systemd as closely as I wanted. + +Posted by Lennart at Mon Aug 23 11:35:34 2010 +bochecha: well, there are many reasons why a service might **show up as failed** to load in the systemctl output: for example, it was referenced as required dependency of another service, but we couldn't find neither a native service definition file nor a SysV init script for it. Or, there was a parsing failure while reading it. Or, because the file was incomplete. And that might even happen while a service is active, for example, because the user requested **a configuration file reload** from systemd after changing a service file, and a service that is already running suddenly has an invalid configuration file. That effectively means that the LOAD and the ACTIVE state are mostly orthogonal(正交、互相垂直): you may have a running service where configuration loaded fine, you may have a stopped service where it loaded fine, but you may also have a running service where configuration failed to load. + +**LOAD和ACTIVE具有不同的功能,LOAD表明该service的configure file是否正常读取,而ACTIVE表明该serive是否在运行。一般来说LOAD失败时service也是failed,但是有可能正常运行的service在reload configration file时出错。** + +And yes, ACTIVE and SUB show you the same information, though ACTIVE in a more generalized form. While SUB has states that are __specific to each unit type__ (e.g. **"running", "exited", "dead" for services; "plugged" and "dead" for devices; or "mounted" and "dead" for mount points**), ACTIVE exposes the same high-level states for all units. + +ACTIVE较SUB表示的是一个更一般、抽象的状态,而SUB与具体的unit类型相关,表达的是更具体的信息。例如ACTIVE的状态是active但是SUB的内容可能是running或plugged或exited。 + +We only distuingish __6 ACTIVE states__ (to list them: **active, reloading, inactive, maintenance, activating, deactivating**), which are mapped from the lower-level states, which might be many more. For example __services have 15 low-level states__: **dead, start-pre, start, start-post, running, exited, reload, stop, stop-sigterm, stop-sigkill, stop-post, final-sigterm, final-sigkill, maintenance, auto-restart.** + +Posted by John Drinkwater at Mon Aug 23 12:23:36 2010 +Why systemctl status ntpd.service and not systemctl status ntpd? +Why does systemctl display names like getty@tty2.service and not as getty@tty2 ? + +Do we really need to have .mount, .service, etc on all our config files now? +IMO, horrible to have file extensions, equally to have them as long as the file name. + +Posted by Lennart at Mon Aug 23 13:36:52 2010 +John, we support **different kinds of units**. We manage __sockets, mount points, services, devices, automount points, timers, paths, targets, swap files/devices and snapshots__ with the same tools, with the same commands. For example "dbus.service" and "dbus.socket" are both used by the D-Bus system, but can be controlled and introspected __independently__. To distuingish them, we hence write their full name everywhere, so that you explicitly state that you mean the D-Bus socket instead of the D-Bus service, or vice versa. + +Also, I actually find this one of the pretty things in this design: the unit names are actually __identical__ to the file names they are configured in. + +Posted by Shane Falco at Mon Aug 23 14:19:27 2010 +I'm with Mr. Drinkwater on this. Extensions (especially long extensions) are one symptom of a bad design. All this feels very rushed and hacked together. + +It looks like this core systemctl function won't display cleanly in a standard 80 character wide terminal? Are we trying to change linux so much that we no longer care about those sorts of things? It may be different for gnome developers, but unix admins I know have lots of windows open and usually they're 80 characters wide. + +Finally, why choose a name so close to another common utility? systemctl? Seriously? When another core system utility called sysctl already exists? + +Posted by Lennart at Mon Aug 23 14:26:44 2010 +Shane, I am sorry but I guess we just have to agree to disagree to this. The points you raise are in the category "matter of taste" or even "bike shedding", and so I guess we should leave it as that. + +systemctl shortens the output dependening the terminal size. If you use a tiny terminal, the description string might even be suppressed entirely. The bigger your terminal/screen is, the more output we can stick on it. That should not surprise anybody. Or to put it in other words: we support 80ch terminals just fine, but if you use bigger termiansl we'll make use of it. + +Posted by Shane Falco at Mon Aug 23 14:49:26 2010 +Sounds reasonable and I appreciate the response. It looks like you are taking your own personal experience (which is all anyone can ask) and creating something that you think is appropriate. But I fear that you don't really see the bigger picture of unix admins out there...there are a lot of guys I work with who are junior/middle guys who just work for a paycheck. They're __not linux geeks__. I dare say they're the majority. They could be doing AIX or Solaris or linux for all they care. I think they're going to have trouble with systemd. It just does too much and it's too baroque. Too confusing. + +I finally, finally got them going with services/chkconfig and now this... + +Posted by Michael at Mon Aug 23 15:00:08 2010 +Just a quick question, can the description be translated ? +I assume that this is not planned, as they are config file, not software, but as we are able to translate .desktop, it would be great to have some way of doing it cleanly. + +Posted by Lennart at Mon Aug 23 15:10:54 2010 +Shane, well, what makes you think that we haven't looked around ourselves? Also, we managed to get systemd accepted by Fedora, in particular FESCO. We managed to convince this technical committee that systemd is a good thing. Do you really want to say that Fedora as a whole is incapable of "seeing the big picture", but you are the only one who is? Maybe things are the other way round? Ever thought about that? + +Also, note that systemd actually brings Linux administration much closer to how many of these things are done on Solaris. Much of what we added is inspired by SMF, and other init systems. That means the administrators should enjoy how we make things on Linux work much more like the other big server operating systems. + +Posted by Lennart at Mon Aug 23 15:13:46 2010 +Michael: it currently isn't translated, but the plan is to copy very closely the mechanism how .desktop files are translated (our unit definition files also use an .ini inspired format), so that we can reuse existing tools for this. This hasn't been implemented yet however. + +Posted by Simon at Mon Aug 23 16:07:24 2010 +Shane Falco, you are being dishonest. + +Your concern is that this change would require you to learn new things and have to teach new things. + +The way you should rephrase your questions is: + +“Sorry for being off-topic; I am posting this on the For Admins post while my concern is really about "Does systemd offer so many nice things that justifies the change?". I would like to see the question answered: "What are the advantages of systemd that justify this big change? I did not search your previous posts on this subjest."” + +Posted by Diego at Mon Aug 23 16:21:50 2010 +What about gettext support? + +Posted by Lennart at Mon Aug 23 16:42:09 2010 +Diego: it's unlikely we'll use the gettext APIs inside of PID 1, simply because i18n data tends to be stored in /usr, and we try to avoid accesses to that, since some folks still have that one a seperate partition (even though it is crazy and misses the point). However, for the client tools this is differentely and w'll certainly reuse the framworks currently used by other projects, be it gettext or intltool, or the hacks to make .desktop files translatable. +ure people would hate me if i'd start moving i18n data to /lib... + +Posted by Nagilum at Mon Aug 23 20:45:09 2010 +If ntpd.service would have emitted some error message while starting up, how would I display that using systemd? + +Posted by Lennart at Mon Aug 23 20:49:05 2010 +Nagilum: by checking the logs. The long term plan is to hook up "systemctl status" to the logs, so that you'll see the most recent log messages generated by a service next to the service. But until that happened we need to beef up syslog considerable, i.e. make it indexable and stuff like that. + +Posted by Denice at Tue Aug 24 00:43:45 2010 +I'm a little worried that anyone thinks Solaris' SMF is something worthy of copying. I find it horribly over-engineered. These days it is common to run virtual servers which do really only one thing (web server, or a mysql slave, or an ldap server). I have a number of xen guests that list perhaps 15 'chkconfig-ed on' services: +chkconfig --list|grep :on + +So from a system administrator's point of view, speaking of managing targeted servers and not multimedia desktops, I don't need anything complicated to manage runtime services. + +You might want to seriously think about writing a tutorial for a typical small server (apache only, for example - no graphics, no bluetooth, no atd, no iscsi, etc.), and then convince us that systemd provides any value. + +cheers, etc. + +Posted by Shane at Tue Aug 24 01:49:13 2010 +Denice said it better than I ever could. As someone stuck with over a hundred Solaris 10 servers, I agree completely with her assessment. + +Here's a nice little commentary on Apple's launchd which I feel is just as appropriate for systemd: + +http://lowendmac.com/ed/winston/10kw/launchd.html + +It's monolithic, it's "over engineered", and it does too many things. In a nutshell, it's anti-unix. + +Posted by Karellen at Tue Aug 24 14:02:45 2010 +@Shane: + + [systemd] does too many things +It manages the startup and lifetime of system processes. That's it. + +From the article you linked: + Merging periodically run jobs into the main system process doesn't make sense. + +Why not? "cron" and "at" manage the startup of periodic system processes. The only thing they do different from "init" is that they start the processes at a time other than bootup. Everything else is common between them. So why not de-duplicate the effort involved in starting, tracking and logging, and just allow "init" to start other processes at times other than boot? + + Replacing a simple /etc/crontab text file with multiple, awkwardly named XML plist files scattered among no less than four different directories is taking two big steps toward complexity. + +There's no reason that systemd would be implemented that badly. In fact, I'm pretty sure that systemd reads existing "crontab" files just fine. So systemd doesn't require any changes there. + + Starting infrequently used on-demand socket-based daemons from launchd seems like it could open the main system process to a potential denial of service attack. I have not explored this idea or researched to see if it has already been tried, + +Well, I haven't researched it, that looks like nothing more than FUD and making-shit-up to me. + + One of the core principles of Unix programing is do one thing and do it well. + +Like having one and only one place to consistently manage the startup and monitoring of system processes? Oh yeah, that's totally anti-Unix-philosophy. + +Posted by Lennart at Tue Aug 24 19:37:50 2010 +Denice, __Linux is a scalabale operating system__. It is used on big irons to tiniest devices. With systemd we try to cover the whole bandwidth, and please understand that your specific use case is not the only one we need to cover. + +Shane, you are right, systemd is nothing like traditional Unix. And that is a good thing. Unix has been designed 41 years ago. You honestly believe that its design is perfect and flawless and 41 years after it was designed still should be followed in all detail? No, computers changed, and Unix never was perfect. It probably was a better design than most other operating systems, but this does not mean it is perfect and we should never depart from it. systemd is inspired by Unix, but also from what has been done on MacOS and even on the Windows world, and on Solaris. We didn't copy any of the existing services 1:1, we just let us inspire by their best features and translated them to Linux and added quite a bit of new stuff on top. And that's how it should be done. Unix is an inspiration, it is not the holy grail. Not 41y after it was designed. + +The fact that on traditional Unix the init system was seperate from cron, from at, from inetd, from the dbus service activator and from everything else meant that all of them reimplemented a big chunk of their code, i.e. what was involved with spawning processes. It was a useless code duplication, and all implementations sucked at it in one way or another. Also, you could not run the same thing from more than one of these systems without manually ensuring that things would happen race-freely and properly ordered. In systemd we unified all of this. We use the same codepaths for spawning processes, regardless if they are started via timers, via sockets, via busses, at boot-up, via devices and so on. This allows us to reduce the amount of code duplication, and provide the same awesome process babysitting to all triggers. And that is a big big advantage. If you look at the systemd source code you will notice that the remaining amount of code, for example for doing timer-based spawning is actually very very short, less than 500 lines (including comments and whitespace!). So overall, we simplify things drastically, we get rid of immense code duplication, and we still are a lot more powerful than what came before. + +So, in summary: just because we do things differently doesn't mean we do it worse. + +And if you tell me that systemd is not Unixy, then I can only agree, and I don't feel ashamed at all of that. Because my horizon is much further than just Unix. + +Posted by Lennart at Mon Jul 11 15:22:01 2011 +Rob, this would surprise me too if it was true. But... I actually wrote such a blog article: + +http://0pointer.de/blog/projects/systemd-for-admins-3.html diff --git a/Zim/Utils/systemd/systemd_for_Administrators,_Part_II.txt b/Zim/Utils/systemd/systemd_for_Administrators,_Part_II.txt new file mode 100644 index 0000000..5bf9ee8 --- /dev/null +++ b/Zim/Utils/systemd/systemd_for_Administrators,_Part_II.txt @@ -0,0 +1,333 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-11-21T20:12:53+08:00 + +====== systemd for Administrators, Part II ====== +Created Wednesday 21 November 2012 + +レナート Wunschkonzert, Ponyhof und Abenteuerspielplatz ﻟﻴﻨﺎﺭﺕ + +Wed, 08 Sep 2010 +systemd for Administrators, Part II + +Here's the second installment of my ongoing series about systemd for administrators. + +===== Which Service Owns Which Processes? ===== + +On most Linux systems the number of processes that are running by default is substantial. Knowing __which process does what and where it belongs to__ becomes increasingly difficult. Some services even maintain a couple of worker processes which clutter the "ps" output with many additional processes that are often not easy to recognize. This is further complicated if daemons spawn arbitrary 3rd-party processes, as Apache does with CGI processes, or cron does with user jobs. + +A slight remedy for this is often the __process inheritance tree__, as shown by "ps xaf". However this is usually not reliable, as processes whose parents die get reparented to PID 1, and hence all information about inheritance gets lost. If a process "double forks" it hence loses its relationships to the processes that started it. (This actually is supposed to be a feature and is relied on for the traditional Unix daemonizing logic.) Furthermore processes can freely change their names with PR_SETNAME or by patching argv[0], thus making it harder to recognize them. In fact they can play hide-and-seek with the administrator pretty nicely this way. + +In systemd we __place every process that is spawned in a control group named after its service__. Control groups (or cgroups) at their most basic are simply groups of processes that can be arranged in a hierarchy and labelled individually. When processes spawn other processes these children are automatically made members of the parents cgroup. Leaving a cgroup is not possible for unprivileged processes. Thus, cgroups can be used as **an effective way to label processes** after the service they belong to and be sure that the service cannot escape from the label, regardless how often it forks or renames itself. Furthermore this can be used to safely __kill a service and all processes it created__, again with no chance of escaping. + +In today's installment I want to introduce you to two commands you may use to relate systemd services and processes. The first one, is the well known ps command which has been updated to show cgroup information along the other process details. And this is how it looks: + +**$ ps xawf -eo pid,user,cgroup,args** + PID USER CGROUP COMMAND + 2 root - [kthreadd] + 3 root - \_ [ksoftirqd/0] +[...] + 4281 root - \_ [flush-8:0] + 1 root name=systemd:/systemd-1 /sbin/init + 455 root name=systemd:/systemd-1/sysinit.service /sbin/udevd -d +28188 root name=systemd:/systemd-1/sysinit.service \_ /sbin/udevd -d +28191 root name=systemd:/systemd-1/sysinit.service \_ /sbin/udevd -d + 1096 dbus name=systemd:/systemd-1/dbus.service /bin/dbus-daemon --system --address=systemd: --nofork --systemd-activation + 1131 root name=systemd:/systemd-1/auditd.service auditd + 1133 root name=systemd:/systemd-1/auditd.service \_ /sbin/audispd + 1135 root name=systemd:/systemd-1/auditd.service \_ /usr/sbin/sedispatch + 1171 root name=systemd:/systemd-1/NetworkManager.service /usr/sbin/NetworkManager --no-daemon + 4028 root name=systemd:/systemd-1/NetworkManager.service \_ /sbin/dhclient -d -4 -sf /usr/libexec/nm-dhcp-client.action -pf /var/run/dhclient-wlan0.pid -lf /var/lib/dhclient/dhclient-7d32a784-ede9-4cf6-9ee3-60edc0bce5ff-wlan0.lease - + 1175 avahi name=systemd:/systemd-1/avahi-daemon.service avahi-daemon: running [epsilon.local] + 1194 avahi name=systemd:/systemd-1/avahi-daemon.service \_ avahi-daemon: chroot helper + 1193 root name=systemd:/systemd-1/rsyslog.service /sbin/rsyslogd -c 4 + 1195 root name=systemd:/systemd-1/cups.service cupsd -C /etc/cups/cupsd.conf + 1207 root name=systemd:/systemd-1/mdmonitor.service mdadm --monitor --scan -f --pid-file=/var/run/mdadm/mdadm.pid + 1210 root name=systemd:/systemd-1/irqbalance.service irqbalance + 1216 root name=systemd:/systemd-1/dbus.service /usr/sbin/modem-manager + 1219 root name=systemd:/systemd-1/dbus.service /usr/libexec/polkit-1/polkitd + 1242 root name=systemd:/systemd-1/dbus.service /usr/sbin/wpa_supplicant -c /etc/wpa_supplicant/wpa_supplicant.conf -B -u -f /var/log/wpa_supplicant.log -P /var/run/wpa_supplicant.pid + 1249 68 name=systemd:/systemd-1/haldaemon.service hald + 1250 root name=systemd:/systemd-1/haldaemon.service \_ hald-runner + 1273 root name=systemd:/systemd-1/haldaemon.service \_ hald-addon-input: Listening on /dev/input/event3 /dev/input/event9 /dev/input/event1 /dev/input/event7 /dev/input/event2 /dev/input/event0 /dev/input/event8 + 1275 root name=systemd:/systemd-1/haldaemon.service \_ /usr/libexec/hald-addon-rfkill-killswitch + 1284 root name=systemd:/systemd-1/haldaemon.service \_ /usr/libexec/hald-addon-leds + 1285 root name=systemd:/systemd-1/haldaemon.service \_ /usr/libexec/hald-addon-generic-backlight + 1287 68 name=systemd:/systemd-1/haldaemon.service \_ /usr/libexec/hald-addon-acpi + 1317 root name=systemd:/systemd-1/abrtd.service /usr/sbin/abrtd -d -s + 1332 root name=systemd:/systemd-1/getty@.service/tty2 /sbin/mingetty tty2 + 1339 root name=systemd:/systemd-1/getty@.service/tty3 /sbin/mingetty tty3 + 1342 root name=systemd:/systemd-1/getty@.service/tty5 /sbin/mingetty tty5 + 1343 root name=systemd:/systemd-1/getty@.service/tty4 /sbin/mingetty tty4 + 1344 root name=systemd:/systemd-1/crond.service crond + 1346 root name=systemd:/systemd-1/getty@.service/tty6 /sbin/mingetty tty6 + 1362 root name=systemd:/systemd-1/sshd.service /usr/sbin/sshd + 1376 root name=systemd:/systemd-1/prefdm.service /usr/sbin/gdm-binary -nodaemon + 1391 root name=systemd:/systemd-1/prefdm.service \_ /usr/libexec/gdm-simple-slave --display-id /org/gnome/DisplayManager/Display1 --force-active-vt + 1394 root name=systemd:/systemd-1/prefdm.service \_ /usr/bin/Xorg :0 -nr -verbose -auth /var/run/gdm/auth-for-gdm-f2KUOh/database -nolisten tcp vt1 + 1495 root name=systemd:/user/lennart/1 \_ pam: gdm-password + 1521 lennart name=systemd:/user/lennart/1 \_ gnome-session + 1621 lennart name=systemd:/user/lennart/1 \_ metacity + 1635 lennart name=systemd:/user/lennart/1 \_ gnome-panel + 1638 lennart name=systemd:/user/lennart/1 \_ nautilus + 1640 lennart name=systemd:/user/lennart/1 \_ /usr/libexec/polkit-gnome-authentication-agent-1 + 1641 lennart name=systemd:/user/lennart/1 \_ /usr/bin/seapplet + 1644 lennart name=systemd:/user/lennart/1 \_ gnome-volume-control-applet + 1646 lennart name=systemd:/user/lennart/1 \_ /usr/sbin/restorecond -u + 1652 lennart name=systemd:/user/lennart/1 \_ /usr/bin/devilspie + 1662 lennart name=systemd:/user/lennart/1 \_ nm-applet --sm-disable + 1664 lennart name=systemd:/user/lennart/1 \_ gnome-power-manager + 1665 lennart name=systemd:/user/lennart/1 \_ /usr/libexec/gdu-notification-daemon + 1670 lennart name=systemd:/user/lennart/1 \_ /usr/libexec/evolution/2.32/evolution-alarm-notify + 1672 lennart name=systemd:/user/lennart/1 \_ /usr/bin/python /usr/share/system-config-printer/applet.py + 1674 lennart name=systemd:/user/lennart/1 \_ /usr/lib64/deja-dup/deja-dup-monitor + 1675 lennart name=systemd:/user/lennart/1 \_ abrt-applet + 1677 lennart name=systemd:/user/lennart/1 \_ bluetooth-applet + 1678 lennart name=systemd:/user/lennart/1 \_ gpk-update-icon + 1408 root name=systemd:/systemd-1/console-kit-daemon.service /usr/sbin/console-kit-daemon --no-daemon + 1419 gdm name=systemd:/systemd-1/prefdm.service /usr/bin/dbus-launch --exit-with-session + 1453 root name=systemd:/systemd-1/dbus.service /usr/libexec/upowerd + 1473 rtkit name=systemd:/systemd-1/rtkit-daemon.service /usr/libexec/rtkit-daemon + 1496 root name=systemd:/systemd-1/accounts-daemon.service /usr/libexec/accounts-daemon + 1499 root name=systemd:/systemd-1/systemd-logger.service /lib/systemd/systemd-logger + 1511 lennart name=systemd:/systemd-1/prefdm.service /usr/bin/gnome-keyring-daemon --daemonize --login + 1534 lennart name=systemd:/user/lennart/1 dbus-launch --sh-syntax --exit-with-session + 1535 lennart name=systemd:/user/lennart/1 /bin/dbus-daemon --fork --print-pid 5 --print-address 7 --session + 1603 lennart name=systemd:/user/lennart/1 /usr/libexec/gconfd-2 + 1612 lennart name=systemd:/user/lennart/1 /usr/libexec/gnome-settings-daemon + 1615 lennart name=systemd:/user/lennart/1 /usr/libexec/gvfsd + 1626 lennart name=systemd:/user/lennart/1 /usr/libexec//gvfs-fuse-daemon /home/lennart/.gvfs + 1634 lennart name=systemd:/user/lennart/1 /usr/bin/pulseaudio --start --log-target=syslog + 1649 lennart name=systemd:/user/lennart/1 \_ /usr/libexec/pulse/gconf-helper + 1645 lennart name=systemd:/user/lennart/1 /usr/libexec/bonobo-activation-server --ac-activate --ior-output-fd=24 + 1668 lennart name=systemd:/user/lennart/1 /usr/libexec/im-settings-daemon + 1701 lennart name=systemd:/user/lennart/1 /usr/libexec/gvfs-gdu-volume-monitor + 1707 lennart name=systemd:/user/lennart/1 /usr/bin/gnote --panel-applet --oaf-activate-iid=OAFIID:GnoteApplet_Factory --oaf-ior-fd=22 + 1725 lennart name=systemd:/user/lennart/1 /usr/libexec/clock-applet + 1727 lennart name=systemd:/user/lennart/1 /usr/libexec/wnck-applet + 1729 lennart name=systemd:/user/lennart/1 /usr/libexec/notification-area-applet + 1733 root name=systemd:/systemd-1/dbus.service /usr/libexec/udisks-daemon + 1747 root name=systemd:/systemd-1/dbus.service \_ udisks-daemon: polling /dev/sr0 + 1759 lennart name=systemd:/user/lennart/1 gnome-screensaver + 1780 lennart name=systemd:/user/lennart/1 /usr/libexec/gvfsd-trash --spawner :1.9 /org/gtk/gvfs/exec_spaw/0 + 1864 lennart name=systemd:/user/lennart/1 /usr/libexec/gvfs-afc-volume-monitor + 1874 lennart name=systemd:/user/lennart/1 /usr/libexec/gconf-im-settings-daemon + 1903 lennart name=systemd:/user/lennart/1 /usr/libexec/gvfsd-burn --spawner :1.9 /org/gtk/gvfs/exec_spaw/1 + 1909 lennart name=systemd:/user/lennart/1 gnome-terminal + 1913 lennart name=systemd:/user/lennart/1 \_ gnome-pty-helper + 1914 lennart name=systemd:/user/lennart/1 \_ bash +29231 lennart name=systemd:/user/lennart/1 | \_ ssh tango + 2221 lennart name=systemd:/user/lennart/1 \_ bash + 4193 lennart name=systemd:/user/lennart/1 | \_ ssh tango + 2461 lennart name=systemd:/user/lennart/1 \_ bash +29219 lennart name=systemd:/user/lennart/1 | \_ emacs systemd-for-admins-1.txt +15113 lennart name=systemd:/user/lennart/1 \_ bash +27251 lennart name=systemd:/user/lennart/1 \_ empathy +29504 lennart name=systemd:/user/lennart/1 \_ ps xawf -eo pid,user,cgroup,args + 1968 lennart name=systemd:/user/lennart/1 ssh-agent + 1994 lennart name=systemd:/user/lennart/1 gpg-agent --daemon --write-env-file +18679 lennart name=systemd:/user/lennart/1 /bin/sh /usr/lib64/firefox-3.6/run-mozilla.sh /usr/lib64/firefox-3.6/firefox +18741 lennart name=systemd:/user/lennart/1 \_ /usr/lib64/firefox-3.6/firefox +28900 lennart name=systemd:/user/lennart/1 \_ /usr/lib64/nspluginwrapper/npviewer.bin --plugin /usr/lib64/mozilla/plugins/libflashplayer.so --connection /org/wrapper/NSPlugins/libflashplayer.so/18741-6 + 4016 root name=systemd:/systemd-1/sysinit.service /usr/sbin/bluetoothd --udev + 4094 smmsp name=systemd:/systemd-1/sendmail.service sendmail: Queue runner@01:00:00 for /var/spool/clientmqueue + 4096 root name=systemd:/systemd-1/sendmail.service sendmail: accepting connections + 4112 ntp name=systemd:/systemd-1/ntpd.service /usr/sbin/ntpd -n -u ntp:ntp -g +27262 lennart name=systemd:/user/lennart/1 /usr/libexec/mission-control-5 +27265 lennart name=systemd:/user/lennart/1 /usr/libexec/telepathy-haze +27268 lennart name=systemd:/user/lennart/1 /usr/libexec/telepathy-logger +27270 lennart name=systemd:/user/lennart/1 /usr/libexec/dconf-service +27280 lennart name=systemd:/user/lennart/1 /usr/libexec/notification-daemon +27284 lennart name=systemd:/user/lennart/1 /usr/libexec/telepathy-gabble +27285 lennart name=systemd:/user/lennart/1 /usr/libexec/telepathy-salut +27297 lennart name=systemd:/user/lennart/1 /usr/libexec/geoclue-yahoo + +(Note that this output is shortened, I have removed most of the kernel threads here, since they are not relevant in the context of this blog story) + +In the third column you see the cgroup systemd assigned to each process. You'll find that the udev processes are in the **name=systemd:/systemd-1/sysinit.service** cgroup, which is where systemd places **all processes started by the sysinit.service** service, which covers early boot. + +My personal recommendation is to set the shell alias psc to the ps command line shown above: + +**alias psc='ps xawf -eo pid,user,cgroup,args'** + +With this service information of processes is just four keypresses away! + +A different way to present the same information is the __systemd-cgls__ tool we ship with systemd. It shows the cgroup hierarchy in a pretty tree. Its output looks like this: + +$ systemd-cgls ++ 2 [kthreadd] +[...] ++ 4281 [flush-8:0] ++ user +| \ lennart +| \ 1 +| + 1495 pam: gdm-password +| + 1521 gnome-session +| + 1534 dbus-launch --sh-syntax --exit-with-session +| + 1535 /bin/dbus-daemon --fork --print-pid 5 --print-address 7 --session +| + 1603 /usr/libexec/gconfd-2 +| + 1612 /usr/libexec/gnome-settings-daemon +| + 1615 /ushr/libexec/gvfsd +| + 1621 metacity +| + 1626 /usr/libexec//gvfs-fuse-daemon /home/lennart/.gvfs +| + 1634 /usr/bin/pulseaudio --start --log-target=syslog +| + 1635 gnome-panel +| + 1638 nautilus +| + 1640 /usr/libexec/polkit-gnome-authentication-agent-1 +| + 1641 /usr/bin/seapplet +| + 1644 gnome-volume-control-applet +| + 1645 /usr/libexec/bonobo-activation-server --ac-activate --ior-output-fd=24 +| + 1646 /usr/sbin/restorecond -u +| + 1649 /usr/libexec/pulse/gconf-helper +| + 1652 /usr/bin/devilspie +| + 1662 nm-applet --sm-disable +| + 1664 gnome-power-manager +| + 1665 /usr/libexec/gdu-notification-daemon +| + 1668 /usr/libexec/im-settings-daemon +| + 1670 /usr/libexec/evolution/2.32/evolution-alarm-notify +| + 1672 /usr/bin/python /usr/share/system-config-printer/applet.py +| + 1674 /usr/lib64/deja-dup/deja-dup-monitor +| + 1675 abrt-applet +| + 1677 bluetooth-applet +| + 1678 gpk-update-icon +| + 1701 /usr/libexec/gvfs-gdu-volume-monitor +| + 1707 /usr/bin/gnote --panel-applet --oaf-activate-iid=OAFIID:GnoteApplet_Factory --oaf-ior-fd=22 +| + 1725 /usr/libexec/clock-applet +| + 1727 /usr/libexec/wnck-applet +| + 1729 /usr/libexec/notification-area-applet +| + 1759 gnome-screensaver +| + 1780 /usr/libexec/gvfsd-trash --spawner :1.9 /org/gtk/gvfs/exec_spaw/0 +| + 1864 /usr/libexec/gvfs-afc-volume-monitor +| + 1874 /usr/libexec/gconf-im-settings-daemon +| + 1882 /usr/libexec/gvfs-gphoto2-volume-monitor +| + 1903 /usr/libexec/gvfsd-burn --spawner :1.9 /org/gtk/gvfs/exec_spaw/1 +| + 1909 gnome-terminal +| + 1913 gnome-pty-helper +| + 1914 bash +| + 1968 ssh-agent +| + 1994 gpg-agent --daemon --write-env-file +| + 2221 bash +| + 2461 bash +| + 4193 ssh tango +| + 15113 bash +| + 18679 /bin/sh /usr/lib64/firefox-3.6/run-mozilla.sh /usr/lib64/firefox-3.6/firefox +| + 18741 /usr/lib64/firefox-3.6/firefox +| + 27251 empathy +| + 27262 /usr/libexec/mission-control-5 +| + 27265 /usr/libexec/telepathy-haze +| + 27268 /usr/libexec/telepathy-logger +| + 27270 /usr/libexec/dconf-service +| + 27280 /usr/libexec/notification-daemon +| + 27284 /usr/libexec/telepathy-gabble +| + 27285 /usr/libexec/telepathy-salut +| + 27297 /usr/libexec/geoclue-yahoo +| + 28900 /usr/lib64/nspluginwrapper/npviewer.bin --plugin /usr/lib64/mozilla/plugins/libflashplayer.so --connection /org/wrapper/NSPlugins/libflashplayer.so/18741-6 +| + 29219 emacs systemd-for-admins-1.txt +| + 29231 ssh tango +| \ 29519 systemd-cgls +\ systemd-1 + + 1 /sbin/init + + ntpd.service + | \ 4112 /usr/sbin/ntpd -n -u ntp:ntp -g + + systemd-logger.service + | \ 1499 /lib/systemd/systemd-logger + + accounts-daemon.service + | \ 1496 /usr/libexec/accounts-daemon + + rtkit-daemon.service + | \ 1473 /usr/libexec/rtkit-daemon + + console-kit-daemon.service + | \ 1408 /usr/sbin/console-kit-daemon --no-daemon + + prefdm.service + | + 1376 /usr/sbin/gdm-binary -nodaemon + | + 1391 /usr/libexec/gdm-simple-slave --display-id /org/gnome/DisplayManager/Display1 --force-active-vt + | + 1394 /usr/bin/Xorg :0 -nr -verbose -auth /var/run/gdm/auth-for-gdm-f2KUOh/database -nolisten tcp vt1 + | + 1419 /usr/bin/dbus-launch --exit-with-session + | \ 1511 /usr/bin/gnome-keyring-daemon --daemonize --login + + getty@.service + | + tty6 + | | \ 1346 /sbin/mingetty tty6 + | + tty4 + | | \ 1343 /sbin/mingetty tty4 + | + tty5 + | | \ 1342 /sbin/mingetty tty5 + | + tty3 + | | \ 1339 /sbin/mingetty tty3 + | \ tty2 + | \ 1332 /sbin/mingetty tty2 + + abrtd.service + | \ 1317 /usr/sbin/abrtd -d -s + + crond.service + | \ 1344 crond + + sshd.service + | \ 1362 /usr/sbin/sshd + + sendmail.service + | + 4094 sendmail: Queue runner@01:00:00 for /var/spool/clientmqueue + | \ 4096 sendmail: accepting connections + + haldaemon.service + | + 1249 hald + | + 1250 hald-runner + | + 1273 hald-addon-input: Listening on /dev/input/event3 /dev/input/event9 /dev/input/event1 /dev/input/event7 /dev/input/event2 /dev/input/event0 /dev/input/event8 + | + 1275 /usr/libexec/hald-addon-rfkill-killswitch + | + 1284 /usr/libexec/hald-addon-leds + | + 1285 /usr/libexec/hald-addon-generic-backlight + | \ 1287 /usr/libexec/hald-addon-acpi + + irqbalance.service + | \ 1210 irqbalance + + avahi-daemon.service + | + 1175 avahi-daemon: running [epsilon.local] + + NetworkManager.service + | + 1171 /usr/sbin/NetworkManager --no-daemon + | \ 4028 /sbin/dhclient -d -4 -sf /usr/libexec/nm-dhcp-client.action -pf /var/run/dhclient-wlan0.pid -lf /var/lib/dhclient/dhclient-7d32a784-ede9-4cf6-9ee3-60edc0bce5ff-wlan0.lease -cf /var/run/nm-dhclient-wlan0.conf wlan0 + + rsyslog.service + | \ 1193 /sbin/rsyslogd -c 4 + + mdmonitor.service + | \ 1207 mdadm --monitor --scan -f --pid-file=/var/run/mdadm/mdadm.pid + + cups.service + | \ 1195 cupsd -C /etc/cups/cupsd.conf + + auditd.service + | + 1131 auditd + | + 1133 /sbin/audispd + | \ 1135 /usr/sbin/sedispatch + + dbus.service + | + 1096 /bin/dbus-daemon --system --address=systemd: --nofork --systemd-activation + | + 1216 /usr/sbin/modem-manager + | + 1219 /usr/libexec/polkit-1/polkitd + | + 1242 /usr/sbin/wpa_supplicant -c /etc/wpa_supplicant/wpa_supplicant.conf -B -u -f /var/log/wpa_supplicant.log -P /var/run/wpa_supplicant.pid + | + 1453 /usr/libexec/upowerd + | + 1733 /usr/libexec/udisks-daemon + | + 1747 udisks-daemon: polling /dev/sr0 + | \ 29509 /usr/libexec/packagekitd + + dev-mqueue.mount + + dev-hugepages.mount + \ sysinit.service + + 455 /sbin/udevd -d + + 4016 /usr/sbin/bluetoothd --udev + + 28188 /sbin/udevd -d + \ 28191 /sbin/udevd -d + +(This too is shortened, the same way) + +As you can see, this command shows the processes by their cgroup and hence service, as systemd labels the cgroups after the services. For example, you can easily see that the auditing service auditd.service spawns three individual processes, auditd, audisp and sedispatch. + +If you look closely you will notice that a number of processes have been assigned to the cgroup **/user/1**. At this point let's simply leave it at that __systemd not only maintains services in cgroups, but user session processes as well__. In a later installment we'll discuss in more detail what this about. + +So much for now, come back soon for the next installment! + +posted at: 00:52 | path: /projects | permanent link to this entry | 13 comments +Posted by liam at Wed Sep 8 04:51:28 2010 +Thanks for these posts. +I'm a bit uncertain as to how far cgroups can be pushed for administrative purposes. Can you have nested cgroups? For instance, a Gnome/X/whatever group that one could kill? Can the end user create alias' for cgroups which could then aggregate them into more manageable units? + +thanks + +Posted by Lennart at Wed Sep 8 11:36:25 2010 +Liam, cgroups are fully recursive, you may split every cgroup into sub-cgroups. And as soon as __systemd is used for session management__ the same way it is used for system management session services will be arranged the same way in subgroups of the group the session manager happened to be executed under. + +Posted by Perry Lorier at Sun Sep 12 19:57:12 2010 +So, you've reinvented process groups? + +Posted by Lennart at Sun Sep 12 20:21:43 2010 +Perry, no, not at all. process groups you can escape. They aren't hierarchical, they cannot be labelled. Process groups are very very different from cgroups, and useful for little more than pipeline building in shells. + +Posted by Ken Stailey at Sat Nov 10 20:45:39 2012 +The ps "ax" and "-e" options both enable displaying "everything", i.e. all processes. It is not necessary to use both. diff --git a/Zim/Utils/systemd/systemd_for_Administrators,_Part_III.txt b/Zim/Utils/systemd/systemd_for_Administrators,_Part_III.txt new file mode 100644 index 0000000..bc9838d --- /dev/null +++ b/Zim/Utils/systemd/systemd_for_Administrators,_Part_III.txt @@ -0,0 +1,250 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-11-21T20:36:45+08:00 + +====== systemd for Administrators, Part III ====== +Created Wednesday 21 November 2012 + + +レナート Wunschkonzert, Ponyhof und Abenteuerspielplatz ﻟﻴﻨﺎﺭﺕ + +Fri, 01 Oct 2010 +systemd for Administrators, Part III + +Here's the third installment of my ongoing series about systemd for administrators. + +===== How Do I Convert A SysV Init Script Into A systemd Service File? ===== + +Traditionally, Unix and Linux services (daemons) are started via __SysV init scripts__. These are Bourne Shell scripts, usually residing in a directory such as __/etc/rc.d/init.d/__ which when called with one of a few standardized arguments (verbs) such as __start, stop or restart__ controls, i.e. starts, stops or restarts the service in question. For starts this usually involves invoking the daemon binary, which then forks a background process (more precisely daemonizes). + +Shell scripts tend to be slow, needlessly hard to read, very verbose and fragile. Although they are __immensly flexible__ (after all, they are just code) some things are very hard to do properly with shell scripts, such as ordering parallized execution, correctly supervising processes or just **configuring execution contexts** in all detail. systemd provides __compatibility__ with these shell scripts, but due to the shortcomings pointed out it is recommended to install native systemd service files for all daemons installed. Also, in contrast to SysV init scripts which have to be adjusted to the distribution systemd service files are compatible with any kind of distribution running systemd (which become more and more these days...). + +What follows is a terse guide how to take a SysV init script and translate it into a native systemd service file. Ideally, upstream projects should ship and install systemd service files in their tarballs. If you have successfully converted a SysV script according to the guidelines it might hence be a good idea to submit the file as patch to upstream. How to prepare a patch like that will be discussed in a later installment, suffice to say at this point that the daemon(7) manual page shipping with systemd contains a lot of useful information regarding this. + +So, let's jump right in. As an example we'll convert the init script of the **ABRT** daemon into a systemd service file. ABRT is a standard component of every Fedora install, and is an acronym for Automatic Bug Reporting Tool, which pretty much describes what it does, i.e. it is a service for collecting crash dumps. Its SysV script I have uploaded here. + +The first step when converting such a script is to read it (surprise surprise!) and distill the useful information from the usually pretty long script. In almost all cases the script consists of mostly boilerplate code that is identical or at least very similar in all init scripts, and usually copied and pasted from one to the other. So, let's extract the interesting information from the script linked above: + +* A description string for the service is "Daemon to detect crashing apps". As it turns out, the header comments include a redundant number of description strings, some of them describing less the actual service but the init script to start it. systemd services include a description too, and it should describe the service and not the service file. +* The __LSB header__[1] contains dependency information. systemd due to its design around socket-based activation usually needs __no__ (or very little) manually configured dependencies. (For details regarding socket activation see the original announcement blog post.) In this case the dependency on $syslog (which encodes that abrtd requires a syslog daemon), is the only valuable information. While the header lists another dependency ($local_fs) this one is redundant with systemd as normal system services are always started with all local file systems available. +* The LSB header suggests that this service should be started in runlevels 3 (multi-user) and 5 (graphical). +* The daemon binary is /usr/sbin/abrtd + +And that's already it. The entire remaining content of this 115-line shell script is simply boilerplate or otherwise redundant code: code that deals with synchronizing and serializing startup (i.e. the code regarding lock files) or that outputs status messages (i.e. the code calling echo), or simply parsing of the verbs (i.e. the big case block). + +From the information extracted above we can now write our systemd service file: + +**[Unit]** +**Description=Daemon to detect crashing apps** +**After=syslog.target** + +**[Service]** +**ExecStart=/usr/sbin/abrtd** +**Type=forking** + +**[Install]** +**WantedBy=multi-user.target** + +A little explanation of the contents of this file: The [Unit] section contains generic information about the service. systemd not only manages system services, but also devices, mount points, timer, and other components of the system. The generic term for all these objects in systemd is a unit, and the [Unit] section encodes information about it that might be applicable not only to services but also in to the other unit types systemd maintains. In this case we set the following unit settings: we set the description string and configure that the daemon shall be **started after** Syslog[2], similar to what is encoded in the LSB header of the original init script. For this Syslog dependency we create a dependency of type __After=__ on a systemd unit syslog.target. The latter is **a special target unit** in systemd and is the standardized name to pull in a syslog implementation. For more information about these standardized names see the systemd.special(7). Note that a dependency of type After= only encodes the suggested ordering, but does not actually cause syslog to be started when abrtd is -- and this is exactly what we want, since abrtd actually works fine even without syslog being around. However, if both are started (and usually they are) then the order in which they are is controlled with this dependency. + +The next section is [Service] which encodes information about the service itself. It contains all those settings that apply only to services, and not the other kinds of units systemd maintains (mount points, devices, timers, ...). Two settings are used here: __ExecStart=__ takes the path to the binary to execute when the service shall be started up. And with __Type=__ we configure **how the service notifies the init system that it finished starting up.** Since traditional Unix daemons do this by returning to the parent process after having forked off and initialized the background daemon we set the type to forking here. That tells systemd to wait until the start-up binary returns and then consider the processes still running afterwards the daemon processes. + +The final section is [Install]. It encodes information about how the suggested installation should look like, i.e. under which circumstances and by which triggers the service shall be started. In this case we simply say that __this service shall be started when the multi-user.target unit is activated__. This is a special unit (see above) that basically takes the role of the classic SysV Runlevel 3[3]. The setting WantedBy= has little effect on the daemon during runtime. It is only read by the systemctl enable command, which is the recommended way to enable a service in systemd. This command will simply ensure that our little service gets automatically activated as soon as multi-user.target is requested, which it is on all normal boots[4]. + +And that's it. Now we already have a minimal working systemd service file. To test it we copy it to /etc/systemd/system/abrtd.service and invoke systemctl daemon-reload. This will make systemd take notice of it, and now we can start the service with it: systemctl start abrtd.service. We can verify the status via systemctl status abrtd.service. And we can stop it again via systemctl stop abrtd.service. Finally, we can enable it, so that it is activated by default on future boots with systemctl enable abrtd.service. + +The service file above, while sufficient and basically a 1:1 translation (feature- and otherwise) of the SysV init script still has room for improvement. Here it is a little bit updated: + +[Unit] +Description=ABRT Automated Bug Reporting Tool +After=syslog.target + +[Service] +Type=dbus +BusName=com.redhat.abrt +ExecStart=/usr/sbin/abrtd -d -s + +[Install] +WantedBy=multi-user.target + +So, what did we change? Two things: we improved the description string a bit. More importantly however, we changed the type of the service to dbus and configured the D-Bus bus name of the service. Why did we do this? As mentioned classic SysV services daemonize after startup, which usually involves double forking and detaching from any terminal. While this is useful and necessary when daemons are invoked via a script, this is unnecessary (and slow) as well as counterproductive when a proper process babysitter such as systemd is used. The reason for that is that the forked off daemon process usually has little relation to the original process started by systemd (after all the daemonizing scheme's whole idea is to remove this relation), and hence it is difficult for systemd to figure out after the fork is finished which process belonging to the service is actually the main process and which processes might just be auxiliary. But that information is crucial to implement advanced babysitting, i.e. supervising the process, automatic respawning on abnormal termination, collectig crash and exit code information and suchlike. In order to make it easier for systemd to figure out the main process of the daemon we changed the service type to dbus. The semantics of this service type are appropriate for all services that take a name on the D-Bus system bus as last step of their initialization[5]. ABRT is one of those. With this setting systemd will spawn the ABRT process, which will no longer fork (this is configured via the -d -s switches to the daemon), and systemd will consider the service fully started up as soon as com.redhat.abrt appears on the bus. This way the process spawned by systemd is the main process of the daemon, systemd has a reliable way to figure out when the daemon is fully started up and systemd can easily supervise it. + +And that's all there is to it. We have a simple systemd service file now that encodes in 10 lines more information than the original SysV init script encoded in 115. And even now there's a lot of room left for further improvement utilizing more features systemd offers. For example, we could set Restart=restart-always to tell systemd to automatically restart this service when it dies. Or, we could use OOMScoreAdjust=-500 to ask the kernel to please leave this process around when the OOM killer wreaks havoc. Or, we could use CPUSchedulingPolicy=idle to ensure that abrtd processes crash dumps in background only, always allowing the kernel to give preference to whatever else might be running and needing CPU time. + +For more information about the configuration options mentioned here, see the respective man pages systemd.unit(5), systemd.service(5), systemd.exec(5). Or, browse all of systemd's man pages. + +Of course, not all SysV scripts are as easy to convert as this one. But gladly, as it turns out the vast majority actually are. + +That's it for today, come back soon for the next installment in our series. + +Footnotes + +[1] The LSB header of init scripts is a convention of including meta data about the service in comment blocks at the top of SysV init scripts and is defined by the Linux Standard Base. This was intended to standardize init scripts between distributions. While most distributions have adopted this scheme, the handling of the headers varies greatly between the distributions, and in fact still makes it necessary to adjust init scripts for every distribution. As such the LSB spec never kept the promise it made. + +[2] Strictly speaking, this dependency does not even have to be encoded here, as it is redundant in a system where the Syslog daemon is socket activatable. Modern syslog systems (for example rsyslog v5) have been patched upstream to be socket-activatable. If such a init system is used configuration of the After=syslog.target dependency is redundant and implicit. However, to maintain compatibility with syslog services that have not been updated we include this dependency here. + +[3] At least how it used to be defined on Fedora. + +[4] Note that in systemd the graphical bootup (graphical.target, taking the role of SysV runlevel 5) is an implicit superset of the console-only bootup (multi-user.target, i.e. like runlevel 3). That means hooking a service into the latter will also hook it into the former. + +[5] Actually the majority of services of the default Fedora install now take a name on the bus after startup. + +posted at: 04:42 | path: /projects | permanent link to this entry | 19 comments +Posted by Anonymous at Fri Oct 1 06:58:25 2010 +Ideally, couldn't you configure ABRT to only run when core files show up in a given directory, or when something requests its dbus service? + +Posted by drago01 at Fri Oct 1 10:52:20 2010 +CPUSchedulingPolicy=idle ... is there the same thing for IO i.e IOSchedulingPolicy=idle ? + +In most cases I couldn't care less about CPU on todays multicore machines but IO is still a very limited resource (when not running an SSD). + +The kernel actually allows setting IO priorities (when using the CFQ scheduler). + +Posted by Lennart at Fri Oct 1 13:04:51 2010 +Anonymous: While this would definitely be desirable AFAICS abrt doesn't support this scheme, since it needs to be running when the first crash dump is collected. + +drag01: There's IOSchedulingClass=idle for you. + +Posted by John Drinkwater at Fri Oct 1 13:34:29 2010 +Restart=restart-always +Again, why have this redundancy if you are starting a design from scratch? +Restart=always|once|on-success + +CPUSchedulingPolicy=idle +IOSchedulingClass=idle +Why is one a class, and another a policy? People will mistype these. + +This is not bikeshedding, this is a request to stop making everything long-winded when it does not need to be so. If systemd is to be around for the next few decades, and you have time to refine it before the next Fedora release, please do so. + +Posted by Lennart at Fri Oct 1 13:55:07 2010 +John, regarding Restart= you have a point. And I fixed that now. + +Regarding the Class vs. Policy thing: that's how the kernel calls these things, blame the kernel folks for that. I think it would be a very bad idea to introduce deviating terminology here where the kernel fucked up. + +Posted by Milan Bouchet-Valat at Fri Oct 1 15:00:38 2010 +Glad to see you have an easy to parse Description field! But while you're at it, could you consider providing translated descriptions for configuration tools? + +Recently, Ubuntu had a GSoC about writing a new config tool for Upstart. One of the issues was that there's no way to get a localized translation from Upstart jobs or SysV scripts, let alone an icon! It would be great if you tackled this issue in Systemd, e.g. with a standard .desktop-like file that services should ship. + + +The other part of the work Jacob Peddicord did in his GSoC is more remote from Systemd, but might be interesting. He has a whole project of describing configuration files associated with a service: +http://jacob.peddicord.net/gsoc2010/ +http://people.ubuntu.com/~jpeddicord/SLS/0.8/sls-format-0.8.html + +I guess it can be good you know it exists... + +Posted by Lennart at Fri Oct 1 15:11:55 2010 +Milan: the longer term plan is to support translations for the descriptions the same way as .desktop files have them. Right now we don't do this, but this is definitely the plan. I am also open to adding an Icon setting, though I am a bit concerned that if we add and Icon, then the next thing asked for is a Vendor ID and so on and so on. + +Posted by j at Fri Oct 1 18:35:14 2010 +verbosity is redundant (and confusing) for a unix system tool. Since Io scheduling classes are linux-specific, it can be written like that: + +CPUSchedulingPolicy -> SchedLevel +IOSchedulingClass -> IOSchedLevel + +BTW is systemd portable to all Unix or it needs linux kernel for some reason? + +Posted by Lennart at Fri Oct 1 18:47:31 2010 +j, calling the same stuff in userspace differently than in kernelspace, and calling the same stuff in the chrt tool differently than in systemd is a very bad idea. + +systemd is strictly Linux specific. It is not portable to other Unixes and we do not care about portability to them. This allows us to make use of Linux features and is one of the reasons why systemd is so much more powerful than any other init system around. + +Posted by Grahame at Fri Oct 1 19:03:30 2010 +At the moment if I'm having a problem with a daemon failing to start I might just hack the init script, chuck strace in, and restart it. It'd be great if you could show how you might shim a failing daemon, particularly when debugging 'fails on reboot' issues (eg. starts fine later.) + +Posted by Anonymous at Fri Oct 1 22:10:11 2010 +I'm wondering about a services that get autostarted via D-Bus. D-Bus starts them itself, so unless I'm wrong they'll end up in the D-Bus service cgroup, not in their own cgroups. Yet I want them to be controllable as services itself. Is this possible to achieve? + +Posted by Michael at Fri Oct 1 23:21:36 2010 +@Anonymous: + +This is one of systemd's great features: +Starting with dbus 1.4.0, dbus-daemon can hand over starting of system services to systemd, where you have all those possibilites to monitor and confine the service (in it's own cgroup) + +All you need to do is to add a +SystemdService=foo.service +line to the D-Bus service file, create a foo.service file for systemd and systemd will automatically start the service defined in foo.service. + +Posted by Andreas at Sat Oct 2 00:49:51 2010 +I agree with those complaining about names like CPUSchedulingPolicy but as Lennart said that is hardly the fault of systemd. Not really much that can be done about it. + +This is post is the part I like the most about systemd. No more boilerplate bash and no horrible XML like the launchd plists or overly verbose XML like SMF. Now there might be other good init systems but this is the first one I have seen where it is easy to just read the job configurations. + +Like the use of sections too in the files so when I read them I can mostly ignore sections like [Install]. + +Posted by codebeard at Sat Oct 2 04:36:12 2010 +@Grahame + +I assume that you can do something like: +ExecStart=/usr/bin/strace -f -o /root/abrtd.strace /usr/sbin/abrtd -d -s + +But perhaps Lennart has another way in mind to do this? + +Posted by John Drinkwater at Sat Oct 2 14:24:51 2010 +Lennart, thanks. Apologies if my comment came over a little stronger than I intended. +I notice some variables for scheduling have different ranges, is this again a kernel issue? Maybe I should go bang some heads there.. + +Posted by Dag Wieers at Mon Nov 22 17:58:46 2010 +Lennart, + +Since the moment I read your first systemd announcement I am excited about this new development. It's one of those things you wonder why that wasn't done decades ago ;-) + +However, I see one thing that I liked about the sysv scripts, that is not possible. The importance of the original sysv scripts is that they are written in bash and so offers a lot of flexibility to system administrators. Flexibility comes with responsibility :-) However, where in the past sysv scripts did more than simply start/stop/restart/reload, some scripts allowed to check configuration syntax (eg. apache), initialize something (eg. sshd), etc... + +There is an advantage in keeping those actions as part of the systemd tools in my opinion. Even if they are simply passing the action through to a daemon-specific configuration tool (eg. apachectl), which could become a standard. This is exactly why I liked the design of "op" so much (compared to sudo). It provided system administrators (and users) with a single interface to actions using a clean syntax. + +While reading the post and the documentation I couldn't find whether "custom actions" would be retained in your design. If not, what would be the recommended alternative ? + +Posted by Lennart at Mon Nov 22 19:13:05 2010 +Grahame: codebears is right. You can easily prefix binary paths with strace. Just copy the service file from /lib/systemd/system to /etc/systemd/system and edit the ExecStart= line, and done. + +John: yes, we mostly expose the kernel stuff 1:1. + +Dag: we do not support custom actions, since their set of parameters and what they return is completely free-form it would be a bit weird to pass that through D-Bus. For example, if something is interactive, how would you pass that through D-Bus. If people want additional control interfaces for their tools, then they should create them outside of systemd, for example by creating a seperate ctl tool, such as apachectl. I mean, I think it makes sense to expose new "verbs" in systemd iff these verbs make sense for everybody the same way as "start" and "stop" and similar apply for every service the same way. However, something like "apachectl graceful" is in all its meaning highly specific to Apache, and hence trying to abstract that in systemd must fail, since it's nothing that really could be abstracted nicely. SMF allows definition of additional verbs for each service, but I am not convinced this is really a good idea. + +Posted by anonymous at Thu Jul 14 06:26:29 2011 +bit confuse with: + +To test it we copy it to /etc/systemd/system/ + +in my f15 installation can not find *.service file but i find in /lib/systemd/system + +where .service file actually resident? + +Posted by dave at Mon Feb 27 23:49:34 2012 +Hi + +I was having problems with my shutdown services not running as expected. I traced the issue to the use of the '--force' flag in the halt, poweroff and reboot services. Here's more about the issue and how to resolve: - + +http://www.practicalclouds.com/content/blog/1/dave-mccormick/2012-02-27/why-do-my-systemd-shutdown-scripts-not-run + +regards + + +Dave + +Leave a Comment: + +Your Name: + + +Your E-mail (optional): + + +Comment: + + +As a protection against comment spam, please type the following number into the field on the right: +Secret Number Image + +Please note that this is neither a support forum nor a bug tracker! Support questions or bug reports posted here will be ignored and not responded to! + +It should be obvious but in case it isn't: the opinions reflected here are my own. They are not the views of my employer, or Ronald McDonald, or anyone else. + +Please note that I take the liberty to delete any comments posted here that I deem inappropriate, off-topic, or insulting. And I excercise this liberty quite agressively. So yes, if you comment here, I might censor you. If you don't want to be censored you are welcome to comment on your own blog instead. +Lennart's Blog | Lennart's Homepage | Lennart's Photos | Impressum/Imprint +Lennart Poettering +Syndicated on Planet GNOME, Planet Fedora, planet.freedesktop.org, Planet Debian Upstream. feed RSS 0.91, RSS 2.0 +Archives: 2005, 2006, 2007, 2008, 2009, 2010, 2011 + +Valid XHTML 1.0 Strict! Valid CSS! diff --git a/Zim/内核开发/linux_input_ecosystem.txt b/Zim/内核开发/linux_input_ecosystem.txt new file mode 100644 index 0000000..e7c8ebc --- /dev/null +++ b/Zim/内核开发/linux_input_ecosystem.txt @@ -0,0 +1,106 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-11-20T11:48:13+08:00 + +====== linux input ecosystem ====== +Created Tuesday 20 November 2012 +http://joeshaw.org/2010/10/01/681/ + +1 October 2010 + +Over the past couple of days, I’ve been trying to figure out how input in Linux works on modern systems. There are lots of small pieces at various levels, and it’s hard to understand how they all interact. Things are not helped by the fact that things have changed quite a bit over the past couple of years as **HAL** — which I helped write — has been giving way to **udev**, and existing literature is largely out of date. This is my attempt at understanding how things work today, in the Ubuntu Lucid release. + +===== kernel ===== +In the Linux kernel’s **input system**, there are two pieces: __the device driver and the event driver__. The device driver talks to the hardware, obviously. Today, for most USB devices this is handled by **the usbhid driver**. The event drivers handle how to __expose the events generated by the device driver to userspace.__ Today this is primarily done through **evdev**, which creates character devices (typically named **/dev/input/eventN**) and communicates with them through **struct input_event** messages. See include/linux/input.h for its definition. + +A great tool to use for getting information about evdev devices and events is **evtest**. + +A somewhat outdated but still relevant description of the kernel input system can be found in the kernel’s Documentation/input/input.txt file. + +===== udev ===== +When a device is connected, the kernel creates an entry in **sysfs** for it and generates **a hotplug event**. __That hotplug event is processed by udev__, which applies some policy, attaches additional properties to the device, and ultimately creates a device node for you somewhere in /dev. + +For input devices, the rules in **/lib/udev/rules.d/60-persistent-input.rules** are executed. Among the things it does is run a /**lib/udev/input_id** tool which queries the capabilities of the device from its sysfs node and sets environment variables like **ID_INPUT_KEYBOARD, ID_INPUT_TOUCHPAD, etc.** in the udev database. + +For more information on input_id see the original announcement email to the hotplug list. + +例如:udevadm info 的输出如下 +P: /devices/platform/i8042/serio4 +E: DEVPATH=/devices/platform/i8042/serio4 +E: DRIVER=psmouse +E: MODALIAS=serio:ty01pr00id00ex00 +E: SERIO_EXTRA=00 +E: SERIO_ID=00 +E: SERIO_PROTO=00 +E: SERIO_TYPE=01 +E: SUBSYSTEM=serio + +P: /devices/platform/i8042/serio4/**input/input6** +E: ABS=11000003 +E: DEVPATH=/devices/platform/i8042/serio4/input/input6 + -- +E: NAME="SynPS/2 Synaptics TouchPad" +E: PHYS="isa0060/serio4/input0" +E: PRODUCT=11/2/7/1b1 +E: PROP=1 +E: __SUBSYSTEM=input__ +E: TAGS=:seat: +E: USEC_INITIALIZED=13139 + +P: /devices/platform/i8042/serio4/input/input6/**event6 //设备路径path** +N: input/event6 **//kernel打印出的设备名称name** +__S: input/by-path/platform-i8042-serio-4-event-mouse //设备文件的符号链接symlink__ +__E: DEVLINKS=/dev/input/by-path/platform-i8042-serio-4-event-mouse //udev导出的环境变量Env__ +__E: DEVNAME=/dev/input/event6__ +E: DEVPATH=/devices/platform/i8042/serio4/input/input6/event6 +E: ID_INPUT=1 +E: __ID_INPUT_TOUCHPAD=1__ +E: __ID_PATH=platform-i8042-serio-4__ +E: ID_PATH_TAG=platform-i8042-serio-4 +E: ID_SERIAL=noserial +E: MAJOR=13 +E: MINOR=70 +E: SUBSYSTEM=input +E: USEC_INITIALIZED=13541 + +P: /devices/platform/i8042/serio4/input/input6/mouse0 +N: input/mouse0 +S: input/by-path/platform-i8042-serio-4-mouse +E: DEVLINKS=/dev/input/by-path/platform-i8042-serio-4-mouse +E: DEVNAME=/dev/input/mouse0 +E: DEVPATH=/devices/platform/i8042/serio4/input/input6/mouse0 +E: ID_INPUT=1 +E: ID_INPUT_TOUCHPAD=1 +E: ID_PATH=platform-i8042-serio-4 +E: ID_PATH_TAG=platform-i8042-serio-4 +E: ID_SERIAL=noserial +E: MAJOR=13 +E: MINOR=32 +E: SUBSYSTEM=input +E: USEC_INITIALIZED=13486 + +===== X ===== +X has __a udev config backend__ which queries udev for the various input devices. It does this at startup and also watches for hotplugged devices. X looks at the different **ID_INPUT_*** properties to determine whether it’s a keyboard, a mouse, a touchpad, a joystick, or some other device. This information can be used in **/etc/X11/xorg.conf.d** files in the form of MatchIsPointer, MatchIsTouchpad, MatchIsJoystick, etc. in __InputClass__ sections to see whether to apply configuration to a given device. + +Xorg has a handful of its own drivers to handle input devices, including **evdev, synaptics, and joystick**. And here is where things start to get confusing. + +Linux has this great __generic event interface in evdev__, which means that very few drivers are needed to interact with hardware, since they’re not speaking device-specific protocols. Of the few needed on Linux nearly all of them speak evdev, including the three I listed above. + +evdev即事件驱动程序,为输入子系统提供了一个默认的事件处理方法。它接收来自硬件底层驱动程序的大多数事件,并使用相应的逻辑对其进行处理。evdev输入事件驱动 +程序从底层驱动程序接收事件信息,将其反映到sysfs文件系统中,用户程序通过对sysfs文件系统的操作,就能够达到处理事件的目的。 + +The evdev driver provides basic __keyboard and mouse__ functionality, speaking — obviously — evdev through the **/dev/input/eventN** devices. It also handles things like the **lid(linux input device) and power switches**. This is the basic, generic input driver for Xorg on Linux. + +The synaptics driver is the most confusing of all. It also speaks evdev to the kernel. On Linux it does not talk to the hardware directly, and is __in no way Synaptics™ hardware-specific__. The synaptics driver is simply a separate driver from evdev which adds a lot of features expected of touchpad hardware, for example two-finger scrolling. It should probably be renamed the “touchpad” module, except that on non-Linux OSes it can still speak the Synaptics protocol. + +The joystick driver similarly handles joysticky things, but speaks evdev to the kernel rather than some device-specific protocol. + +synaptics和joystick驱动程序其实都不是直接控制硬件的驱动,相反,它们从硬件驱动程序接收事件,然后进行处理,将结果通过evdev协议发给kernel。 + +X only has concepts of __keyboards and pointers, the latter of which includes mice, touchpads, joysticks, wacom tablets, etc__. + +X also has the concept of the __core keyboard and pointer__, which is how events are most often delivered to applications. By default all devices send core events, but certain setups might want to make devices non-core. + +If you want to receive events for non-core devices, you need to use the **XInput or XInput2 extensions** for that. XInput exposes core-like events (like DeviceMotionNotify and DeviceButtonPress), so it is not a major difficulty to use, although its setup is annoyingly different than most other X extensions. I have not used XInput2. + +Peter Hutterer’s blog is an excellent resource for all things input related in X. diff --git a/Zim/观点/一些有趣的编程名言.txt b/Zim/观点/一些有趣的编程名言.txt new file mode 100644 index 0000000..99e5235 --- /dev/null +++ b/Zim/观点/一些有趣的编程名言.txt @@ -0,0 +1,40 @@ +Content-Type: text/x-zim-wiki +Wiki-Format: zim 0.4 +Creation-Date: 2012-11-30T20:20:03+08:00 + +====== 一些有趣的编程名言 ====== +Created Friday 30 November 2012 +原文:http://ihower.tw/blog/archives/7046/ + +以下内容收集自一些优秀的技术书籍章节开头的名言佳句,非常有意思,直接或间接反映了软件开发中的一些事情。 + + 生命太短暂,不要去做一些根本__没有人想要__的东西。——Ash Maurya,Running Lean 作者 + 如果你交给某人一个程序,你将折磨他一整天;如果你教某人如何编写程序,你将折磨他一辈子。——David Leinweber + 软件设计有两种方式:一种方式是,使软件过于简单,明显没有缺陷;另一种方式是,使软件过于复杂,没有明显的缺陷。——C.A.R. Hoare + 其实,我尝试着使Ruby更自然,而不是简单。Ruby看起来很简单,但内部是非常复杂的,就像我们的身体一样。——松本行弘,Ruby之父 + 大部分情况下,**构建程序的过程本质上是对规范调试的过程**。——Fred Brooks,《人月神话》作者 + 软件开发往往是这样:最开始的90%代码占用了开始的90%的开发时间;剩下10%代码同样需要90%的开发时间。——Tom Cargill + 当你试图解决一个__你不理解的问题__时,复杂化就产成了。——Andy Boothe + 用几个小时来制定计划,可以节省几周的编程时间。—— 匿名 + __控制复杂性__是计算机编程的本质。—— Brian Kernighan + 计算机科学领域的所有问题都可以通过其他方式间接解决。——David Wheeler + 编程是两队人马在竞争:软件工程师努力设计出最大最好的连白痴都会使用的程序;而宇宙在拼命制造最大最好的白痴。到目前为止,宇宙是胜利者。—— Rick Cook + 调试一个初次见到的代码比重写代码要困难两倍。因此,按照定义,如果你写代码非常巧妙,那么没有人足够聪明来调试它。—— Brian W. Kernighan + 我不是一个伟大的程序员,我只是一个具有__良好习惯的优秀程序员__。― Kent Beck + 你们中大多数人都熟悉程序员的美德,有三种:那就是懒惰、急躁和傲慢。– Larry Wall,Perl語言发明人 + 任何一个傻瓜都会写能够让机器理解的代码,只有好的程序员才能写出人类可以理解的代码。——Martin Fowler + 靠代码行数来衡量开发进度,就像是凭重量来衡量飞机制造的进度。——比尔•盖茨 + 这不是一个bug,这只是一个未列出来的特性。——匿名 + 作为一个程序员,郁闷的事情是,面对一个代码块,却不敢去修改。更糟糕的是,这个代码块还是自己写的。—— Peyton Jones + 它在我的机器上可以很好运行!——大部分程序员 + **能说算不上什么,有本事就把你的代码给我看看。**——Linus Torvalds,Linux之父 + 我认为对象就像是生物学里的细胞,或者网络中的一台计算机,只能够通过消息来通信——Alan Kay,Smalltalk的发明人,面向对象之父 + 当你选择了一种语言,意味着你还选择了__一组技术、一个社区__。——Joshua Bloch + 质量、速度、廉价,选择其中两个。——匿名 + 过早的优化是罪恶之源。——Donald Knuth + 没有什么代码的执行速度比空代码更快。——Merb核心原则 + 如果你是房间里最聪明的人,那么你走错房间了。——匿名 + 如果只需更改一个单一的代码行,你的部门需要花费多长时间?——Mary Poppendieck + 九个人不能让一个孩子在一个月内出生。——Fred Brooks,《人月神话》作者 + 好代码本身就是最好的文档。当你需要添加一个注释时,你应该考虑如何修改代码才能不需要注释。——Steve McConnell,Code Complete 作者 + 一个人在教会电脑之前,别说他真正理解这个东西了。——Donald Knuth